python代码基于高斯核的局部加权线性回归

    技术2023-09-13  129

    # 局部加权线性回归 from numpy import * import matplotlib.pyplot as plt #import line_regression # from line_regression import loadDataSet # 局部加权线性回归函数 def lwlr(testPoint,xArr,yArr,k=1.0): # 读入数据并创建所需矩阵 xMat = mat(xArr); yMat = mat(yArr).T # np.shape()函数计算传入矩阵的维数 m = shape(xMat)[0] # 权重,创建对角矩阵,维数与xMat维数相同 weights = mat(eye((m))) # m维的单位对角矩阵 ''' 权重矩阵是一个方阵,阶数等于样本点个数。也就是说,该矩阵为每个样本点初始 化了一个权重。接着,算法将遍历数据集,计算每个样本点对应的权重值, ''' for j in range(m): diffMat = testPoint - xMat[j,:] # 采用高斯核函数进行权重赋值,样本附近点将被赋予更高权重 weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2)) xTx = xMat.T * (weights * xMat) ## (2*2) = (2*n) * ( (n*n)*(n*2) ) if linalg.det(xTx) == 0.0: print ("This matrix is singular,cannot do inverse") return ws = xTx.I * (xMat.T * (weights * yMat)) ##(2*1) = (2*2) * ( (2*n) * (n*n) * (n*1)) #print(ws) return testPoint * ws # 样本点依次做局部加权 def lwlrTest(testArr,xArr,yArr,k=1.0): m = shape(testArr)[0] yHat = zeros(m) for i in range(m): # 为样本中每个点,调用lwlr()函数计算ws值以及预测值yHat # print(k) yHat[i] = lwlr(testArr[i],xArr,yArr,k) return yHat # 载入数据,进行局部加权线性回归计算 # xArr = [[1,0.00023], # [1,0.009562], # [1,0.095423], # [1,0.100215], # [1,0.131548], # [1,0.162489], # [1,0.2], # [1,0.225422], # [1,0.251548], # [1,0.280158], # [1,0.3], # [1,0.335411], # [1,0.365144], # [1, 0.4], # [1, 0.425251], # [1, 0.4515], # [1, 0.490548], # [1, 0.532154], # [1, 0.552154], # [1, 0.58], # [1, 0.6], # [1, 0.643251], # [1, 0.680548], # [1, 0.7], # [1, 0.732548], # [1, 0.762544], # [1, 0.79254], # [1, 0.8], # [1, 0.85], # [1, 0.912]] # yArr =[3.012, # 3.123, # 3.232, # 3.253, # 3.22, # 3.2, # 3.15, # 3.25, # 3.312, # 3.51, # 3.553, # 3.52, # 3.51, # 3.492, # 3.56, # 3.756, # 3.85, # 4, # 4.15, # 4.2, # 3.99, # 3.9, # 4.005, # 4.12, # 4.25, # 4.36, # 4.41, # 4.5, # 4.653, # 4.7] # 不同k值得到的y值 xArr = [[1, 732], [1, 733], [1, 734], [1, 735], [1, 736], [1, 737], [1, 738], [1, 739], [1, 740], [1, 741], [1, 742], [1, 743], [1, 744], [1, 745], [1, 746], [1, 747], [1, 748], [1, 749], [1, 750], [1, 751], [1, 752], [1, 753], [1, 754], [1, 755], [1, 756], [1, 757], [1, 758], [1, 759], [1, 760], [1, 761], [1, 762]] yArr = [427.32139373952896, 430.735363778539, 425.6767343525396, 431.4185708563029, 430.99515141425684, 432.3820058846558, 433.74955658806323, 433.5750437214359, 433.7781211647823, 431.3961869282752, 433.9786575155382, 439.3783187858723, 440.22278884180486, 439.7035715665656, 436.3271129747373, 437.6662079987627, 441.00947912285056, 436.8307767906325, 440.9128942751956, 438.4408287267856, 432.90572835049574, 444.17917838673696, 449.64086693594845, 443.6499887657059, 446.6215382473078, 444.3090856509079, 442.6554589980954, 447.21599252533633, 445.5086451071811, 446.07960289412387, 447.5937190253615] yHat1 = lwlrTest(xArr,xArr,yArr,0.5) yHat2 = lwlrTest(xArr,xArr,yArr,1) yHat3 = lwlrTest(xArr,xArr,yArr,1.5) print(yArr[15]) print(yHat1[15]) xMat = mat(xArr);yMat = mat(yArr) srtInd = xMat[:,1].argsort(0) # print(srtInd) (n*1)数列,值从0---n-1 xSort = xMat[srtInd][:,0,:] fig = plt.figure() ax = fig.add_subplot(131) ax.plot(xSort[:,1],yHat1[srtInd]) ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],) plt.title('k=0.5') ax = fig.add_subplot(132) ax.plot(xSort[:,1],yHat2[srtInd]) ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],) plt.title('k=1') ax = fig.add_subplot(133) ax.plot(xSort[:,1],yHat3[srtInd]) ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],) plt.title('k=1.5') plt.show() #print(corrcoef(yHat.T,yMat))

    结果

    Processed: 0.009, SQL: 9