2020吴恩达 machine learning 编程作业 python实现 ex3

    技术2022-07-11  87

    # -*- coding: utf-8 -*- """ Created on Wed Jul 1 17:28:18 2020 @author: cheetah023 """ import numpy as np import matplotlib.pyplot as plt import scipy.io as sci import scipy.optimize as opt #函数定义 def sigmoid(X): return 1 /(1 + np.exp(-X)) def lrcostFunction(theta, X, y, lamda): theta = np.reshape(theta,(X.shape[1],1)) y = np.reshape(y,(X.shape[0],1)) h = sigmoid(np.dot(X,theta)) m = X.shape[0] cost = (np.dot(-y.T,np.log(h)) - np.dot(1-y.T,np.log(1-h))) / m cost = cost + np.dot(theta.T[0,1:],theta[1:,0]) * lamda / (2 * m) return cost def gradient(theta, X, y, lamda): theta = np.reshape(theta,(X.shape[1],1)) m = X.shape[0] h = sigmoid(np.dot(X,theta)) theta[0] = 0 grad = np.zeros([X.shape[1],1]) grad = np.dot(X.T,(h - y)) / m grad = grad + theta * lamda / m return grad def oneVsAll(X, y, num_labels, lamda): (m,n) = X.shape all_theta = np.zeros([num_labels,n+1]) ones = np.ones([m,1]) X = np.column_stack([ones,X]) initial_theta = np.zeros([n+1,1]) for c in range(1,num_labels+1): p = np.zeros([m,1]) idx = np.where(y == c) p[idx] = 1 result = opt.minimize(fun=lrcostFunction, x0=initial_theta, args=(X,p,lamda), method='TNC', jac=gradient) #索引为0对应标签为1,索引为8对应标签为9,索引为9对应标签为0 all_theta[c-1,:] = result.x return all_theta def predictOneVsAll(all_theta, X): m = X.shape[0] p = np.zeros([m,1]) ones = np.ones([m,1]) X = np.column_stack([ones,X]) h = sigmoid(np.dot(X,all_theta.T)) ##索引为0对应标签为1,索引为8对应标签为9,索引为9对应标签为0 p = np.argmax(h,axis = 1) + 1 p = np.reshape(p,[m,1]) return p #Part 1: Loading and Visualizing Data input_layer_size = 400; # 20x20 Input Images of Digits num_labels = 10; # 10 labels, from 1 to 10 data = sci.loadmat('ex3data1.mat') #print(data) #data是个字典类型 X = data['X'] y = data['y'] #print('x',type(X),'y',type(y)) #Part 2a: Vectorize Logistic Regression theta_t = [[-2], [-1], [1], [2]] ones = np.ones([5,1]) #这里X_t使用[3,5]的维度再转置,是为了和octave数据对上 X_t = np.reshape(range(1,16),[3,5]) /10 X_t = X_t.T X_t = np.column_stack([ones,X_t]) #print('X_t',X_t) y_t = [[1], [0], [1], [0], [1]] lamda_t = 3 cost = lrcostFunction(theta_t, X_t, y_t, lamda_t) grad = gradient(theta_t, X_t, y_t, lamda_t) print('Cost:', cost); print('Expected cost: 2.534819'); print('Gradients:'); print('', grad); print('Expected gradients:'); print('0.146561\n -0.548558\n 0.724722\n 1.398003'); #Part 2b: One-vs-All Training lamda = 0.1 all_theta = oneVsAll(X, y, num_labels, lamda) #print('all_theta',all_theta.shape) #Part 3: Predict for One-Vs-All p = predictOneVsAll(all_theta, X) #temp里面的值是True(=1)和False(=0) temp = (p==y) prob = np.mean(temp) print('Training Set Accuracy:',prob)

    运行结果:

    Cost: [[2.5348194]] Expected cost: 2.534819 Gradients:  [[ 0.14656137]  [-0.54855841]  [ 0.72472227]  [ 1.39800296]] Expected gradients: 0.146561  -0.548558  0.724722  1.398003 Training Set Accuracy: 0.9646  

    参考资料:

    https://blog.csdn.net/lccflccf/category_8379707.html

    https://blog.csdn.net/Cowry5/article/details/83302646

    https://blog.csdn.net/weixin_44027820/category_9754493.html  

    总结:

    1、要注意octave里面y的下标从1开始,python里面y的下标从0开始,在最后输出预测结果的时候要注意对齐

    2、octave的准确率在94.980000,我跑出来是0.9646可能是使用的opt.minimize函数和octave不一样。不过cost和grad算出来是基本一致的,觉得应该没啥问题

     

    Processed: 0.011, SQL: 9