使用线性判别分析分类问题错误

发布于 2025-02-10 06:52:26 字数 2588 浏览 1 评论 0原文

Hi am trying to implement the LInear Discriminant Analysis module for a project but am getting stuck here is the git repo :

linear discriminant analysis:

代码：


import numpy as np

class lineardiscriminantanalysis :
  
  

  def __init__(self,training_data_X, training_data_Y) :
    def get_priorprobability():
      P_y_eq_k = []
      for x in self.class_ :
        for y in x :
          p_y_eq_k = [np.sum(self.training_data_Y == y) / len(self.training_data_Y)]
          P_y_eq_k.append(p_y_eq_k)
      return P_y_eq_k
  
    def get_classspecificmeanvector():
      count = 0
      for x in self.class_ :
        for y in x :
          id = []
          c_ = 0
          for z in self.training_data_Y :
            if z == y :
              i = 1
              c_ += 1
            else :
              i = 0
            id.append(i)
          if count == 0 :
            X_i = np.matmul( np.matrix(id) , self.training_data_X)
            s = 1/c_
            classspecificmeanvector = np.matmul( np.matrix(id).dot(s) , self.training_data_X)
            count += 1
          else :
            classspecificmeanvector = np.insert(classspecificmeanvector,1 ,  np.matmul(np.matrix(id).dot(1/c_) , self.training_data_X), axis=0)
            X_i = np.insert( X_i , 1 , np.matmul( np.matrix(id) , self.training_data_X) , axis=0)
      return classspecificmeanvector , X_i

    def get_cov():
      cov = np.matmul(np.subtract(self.X_i , self.classspecificmeanvector), np.subtract(self.X_i , self.classspecificmeanvector).transpose() ).dot(1/(len(self.training_data_Y) - len(self.prioprobability) ))
      return cov
      
    
    # Linear regression module init
    self.training_data_X = training_data_X # The training data x => features numpy_matrix
    self.training_data_Y = training_data_Y # The training data y => response numpy_matrix
    self.class_ = np.unique(self.training_data_Y, axis=0)
    self.prioprobability = get_priorprobability()
    self.classspecificmeanvector , self.X_i = get_classspecificmeanvector()
    self.cov = get_cov()
    

if __name__ == "__main__" :
  x = np.matrix([[1,3],[2,3],[2,4],[3,1],[3,2],[4,2]])
  y = np.matrix([[1],[1],[1],[2],[2],[2]])
  Lda = lineardiscriminantanalysis(x,y)
  print(Lda.prioprobability) 
  print(Lda.classspecificmeanvector)
  print(Lda.cov)

事实是：我想实现我与np.cov获得的结果：我所期望的

：得到

[[13.88888889 11.11111111]
 [11.11111111 13.88888889]]

我的期望：

[[0.3333 0.66666]
 [0.66666 0.3333]]

请帮助我解决这个问题

原文

Hi am trying to implement the LInear Discriminant Analysis module for a project but am getting stuck here is the git repo :

linear discriminant analysis:

linear discriminant analysis

the code :


import numpy as np

class lineardiscriminantanalysis :
  
  

  def __init__(self,training_data_X, training_data_Y) :
    def get_priorprobability():
      P_y_eq_k = []
      for x in self.class_ :
        for y in x :
          p_y_eq_k = [np.sum(self.training_data_Y == y) / len(self.training_data_Y)]
          P_y_eq_k.append(p_y_eq_k)
      return P_y_eq_k
  
    def get_classspecificmeanvector():
      count = 0
      for x in self.class_ :
        for y in x :
          id = []
          c_ = 0
          for z in self.training_data_Y :
            if z == y :
              i = 1
              c_ += 1
            else :
              i = 0
            id.append(i)
          if count == 0 :
            X_i = np.matmul( np.matrix(id) , self.training_data_X)
            s = 1/c_
            classspecificmeanvector = np.matmul( np.matrix(id).dot(s) , self.training_data_X)
            count += 1
          else :
            classspecificmeanvector = np.insert(classspecificmeanvector,1 ,  np.matmul(np.matrix(id).dot(1/c_) , self.training_data_X), axis=0)
            X_i = np.insert( X_i , 1 , np.matmul( np.matrix(id) , self.training_data_X) , axis=0)
      return classspecificmeanvector , X_i

    def get_cov():
      cov = np.matmul(np.subtract(self.X_i , self.classspecificmeanvector), np.subtract(self.X_i , self.classspecificmeanvector).transpose() ).dot(1/(len(self.training_data_Y) - len(self.prioprobability) ))
      return cov
      
    
    # Linear regression module init
    self.training_data_X = training_data_X # The training data x => features numpy_matrix
    self.training_data_Y = training_data_Y # The training data y => response numpy_matrix
    self.class_ = np.unique(self.training_data_Y, axis=0)
    self.prioprobability = get_priorprobability()
    self.classspecificmeanvector , self.X_i = get_classspecificmeanvector()
    self.cov = get_cov()
    

if __name__ == "__main__" :
  x = np.matrix([[1,3],[2,3],[2,4],[3,1],[3,2],[4,2]])
  y = np.matrix([[1],[1],[1],[2],[2],[2]])
  Lda = lineardiscriminantanalysis(x,y)
  print(Lda.prioprobability) 
  print(Lda.classspecificmeanvector)
  print(Lda.cov)

The fact is : i want to implement the cov matrix bat the result i get with np.cov is not what i expected :

There what i get

[[13.88888889 11.11111111]
 [11.11111111 13.88888889]]

What i expected :

[[0.3333 0.66666]
 [0.66666 0.3333]]

Please help me solve this problem

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

若无相欠,怎会相见 2025-02-17 06:52:26

我找到了正确的方法谢谢

import numpy as np
from numpy.linalg import inv

class lineardiscriminantananlysis :
  
  

  def __init__(self,training_data_X, training_data_Y) :
    def get_priorprobability():
      P_y_eq_k = []
      for x in self.class_ :
        for y in x :
          p_y_eq_k = [np.sum(self.training_data_Y == y) / len(self.training_data_Y)]
          P_y_eq_k.append(p_y_eq_k)
      return P_y_eq_k
  
    def get_classspecificmeanvector():
      count = 0
      for x in self.class_ :
          id = []
          c_ = 0
          for z in self.training_data_Y :
            if z == x[0] :
              i = 1
              c_ += 1
            else :
              i = 0
            id.append(i)
          if count == 0 :
            s = 1/c_
            classspecificmeanvector = np.matmul( np.matrix(id).dot(s) , self.training_data_X)
            Tp = (np.matrix(id).T * np.matmul( np.matrix(id).dot(s) , self.training_data_X) )
            count += 1
          else :
            classspecificmeanvector = np.insert(classspecificmeanvector,1 ,  np.matmul(np.matrix(id).dot(1/c_) , self.training_data_X), axis=0)
            Tp += np.matrix(id).T * np.matrix(id).dot(s) * self.training_data_X
            x_to_mean = (Tp - self.training_data_X)
            count += 1
      return classspecificmeanvector , x_to_mean

    def get_sigma():
      sigma = (1/(self.x_to_mean.shape[0] - self.class_.shape[0] ) )* self.x_to_mean.T * self.x_to_mean
      return sigma
      
    
    # Linear regression module init
    self.training_data_X = training_data_X # The training data x => features numpy_matrix
    self.training_data_Y = training_data_Y # The training data y => response numpy_matrix
    self.class_ = np.unique(self.training_data_Y, axis=0)
    self.prioprobability = get_priorprobability()
    self.classspecificmeanvector, self.x_to_mean = get_classspecificmeanvector()
    self.sigma = get_sigma()

  def predict(self , x):
      print(self.training_data_X.T , x.T)
      s =  ( self.classspecificmeanvector * inv(self.sigma) * x.T ) + 0.5 * ((self.classspecificmeanvector * inv(self.sigma) ) * self.classspecificmeanvector.T) + np.log(self.prioprobability)
      print( self.class_[np.argmax(np.sum(s,axis=1))][0] ) 

if __name__ == "__main__" :
  x = np.matrix([[1,3],[2,3],[2,4],[3,1],[3,2],[4,2] ])
  y = np.matrix([[1],[1],[1],[2],[2],[2] ] )
  Lda = lineardiscriminantananlysis(x,y)
  #print(Lda.class_) 
  #print(Lda.prioprobability) 
  #print(Lda.classspecificmeanvector)
  #print(Lda.sigma)
  x_ = np.matrix([ [0,5] ]  )
  Lda.predict(x_)
  x_ = np.matrix([ [3,0] ]  )
  Lda.predict(x_)

```

I Found the correct way thank you

import numpy as np
from numpy.linalg import inv

class lineardiscriminantananlysis :
  
  

  def __init__(self,training_data_X, training_data_Y) :
    def get_priorprobability():
      P_y_eq_k = []
      for x in self.class_ :
        for y in x :
          p_y_eq_k = [np.sum(self.training_data_Y == y) / len(self.training_data_Y)]
          P_y_eq_k.append(p_y_eq_k)
      return P_y_eq_k
  
    def get_classspecificmeanvector():
      count = 0
      for x in self.class_ :
          id = []
          c_ = 0
          for z in self.training_data_Y :
            if z == x[0] :
              i = 1
              c_ += 1
            else :
              i = 0
            id.append(i)
          if count == 0 :
            s = 1/c_
            classspecificmeanvector = np.matmul( np.matrix(id).dot(s) , self.training_data_X)
            Tp = (np.matrix(id).T * np.matmul( np.matrix(id).dot(s) , self.training_data_X) )
            count += 1
          else :
            classspecificmeanvector = np.insert(classspecificmeanvector,1 ,  np.matmul(np.matrix(id).dot(1/c_) , self.training_data_X), axis=0)
            Tp += np.matrix(id).T * np.matrix(id).dot(s) * self.training_data_X
            x_to_mean = (Tp - self.training_data_X)
            count += 1
      return classspecificmeanvector , x_to_mean

    def get_sigma():
      sigma = (1/(self.x_to_mean.shape[0] - self.class_.shape[0] ) )* self.x_to_mean.T * self.x_to_mean
      return sigma
      
    
    # Linear regression module init
    self.training_data_X = training_data_X # The training data x => features numpy_matrix
    self.training_data_Y = training_data_Y # The training data y => response numpy_matrix
    self.class_ = np.unique(self.training_data_Y, axis=0)
    self.prioprobability = get_priorprobability()
    self.classspecificmeanvector, self.x_to_mean = get_classspecificmeanvector()
    self.sigma = get_sigma()

  def predict(self , x):
      print(self.training_data_X.T , x.T)
      s =  ( self.classspecificmeanvector * inv(self.sigma) * x.T ) + 0.5 * ((self.classspecificmeanvector * inv(self.sigma) ) * self.classspecificmeanvector.T) + np.log(self.prioprobability)
      print( self.class_[np.argmax(np.sum(s,axis=1))][0] ) 

if __name__ == "__main__" :
  x = np.matrix([[1,3],[2,3],[2,4],[3,1],[3,2],[4,2] ])
  y = np.matrix([[1],[1],[1],[2],[2],[2] ] )
  Lda = lineardiscriminantananlysis(x,y)
  #print(Lda.class_) 
  #print(Lda.prioprobability) 
  #print(Lda.classspecificmeanvector)
  #print(Lda.sigma)
  x_ = np.matrix([ [0,5] ]  )
  Lda.predict(x_)
  x_ = np.matrix([ [3,0] ]  )
  Lda.predict(x_)

```

回复收藏 0 原文

~没有更多了~