2020吴恩达 machine learning 编程作业 python实现 ex4

技术2022-07-14 82

# -*- coding: utf-8 -*- """ Created on Wed Jul 1 22:37:49 2020 @author: cheetah023 """ import numpy as np import scipy.io as sci import scipy.optimize as opt #函数定义 def sigmoid(X): return 1 / (1 + np.exp(-X)) def sigmoidGradient(X): g = sigmoid(X) * (1 - sigmoid(X)) return g def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda): #print('nn_params',nn_params.shape) theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)], [hidden_layer_size,input_layer_size+1]) theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):], [num_labels, hidden_layer_size+1]) m = X.shape[0] ones = np.ones([m,1]) X = np.column_stack([ones,X]) a2 = sigmoid(np.dot(X,theta1.T))#5000*25 a2 = np.column_stack([ones,a2])#5000*26 a3 = sigmoid(np.dot(a2,theta2.T))#5000*10 y_t = np.zeros([m,num_labels]) for i in range(0,m): y_t[i,y[i]-1] = 1 cost = np.sum(-y_t * np.log(a3) - (1-y_t) * np.log(1-a3)) / m theta1_t = theta1[:,1:] #print('theta1_t:',theta1_t.shape) theta2_t = theta2[:,1:] reg = (np.sum(theta1_t ** 2) + np.sum(theta2_t ** 2)) * lamda / (2 * m) #print('reg:',reg) cost = cost + reg return cost def nnGradient(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda): m = X.shape[0] ones = np.ones([m,1]) X = np.column_stack([ones,X]) theta1 = np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)], [hidden_layer_size,input_layer_size+1]) theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):], [num_labels, hidden_layer_size+1]) theta1_grad = np.zeros(theta1.shape);#25*401 theta2_grad = np.zeros(theta2.shape);#10*26 theta1_t = theta1[:,1:]#25*400 theta2_t = theta2[:,1:]#10*25 y_t = np.zeros([m,num_labels]) for i in range(0,m): y_t[i,y[i]-1] = 1 for i in range(0,m): z2 = np.dot(X[i,:],theta1.T)#1*25 a2 = sigmoid(z2)#1*25 a2 = np.hstack((1,a2))#1*26 a3 = sigmoid(np.dot(a2,theta2.T))#1*10 delta3 = a3 - y_t[i,:]#1*10 delta2 = np.dot(delta3,theta2_t) * sigmoidGradient(z2)#1*25 delta2 = np.reshape(delta2,[1,hidden_layer_size]) X_t = np.reshape(X[i,:],[1,input_layer_size+1]) delta3 = np.reshape(delta3,[1,num_labels]) a2 = np.reshape(a2,[1,hidden_layer_size+1]) theta1_grad = theta1_grad + np.dot(delta2.T,X_t)#25*401 theta2_grad = theta2_grad + np.dot(delta3.T,a2)#10*26 theta1_grad[:,0] = theta1_grad[:,0] / m theta1_grad[:,1:] = theta1_grad[:,1:] / m + (lamda/m) * theta1_t theta2_grad[:,0] = theta2_grad[:,0] / m theta2_grad[:,1:] = theta2_grad[:,1:] / m + (lamda/m) * theta2_t #grad = np.vstack((theta1_grad.reshape(-1,1),theta2_grad.reshape(-1,1))) grad = np.concatenate((theta1_grad.flatten(), theta2_grad.flatten())) return grad def randInitializeWeights(L_in, L_out): W = np.zeros([L_out, 1 + L_in]); epsilon_init = 0.12 W = np.random.rand(L_out,1+L_in) * 2 * epsilon_init - epsilon_init return W def predict(Theta1, Theta2, X): m = X.shape[0] ones = np.ones([m,1]) X = np.column_stack([ones,X]) a2 = sigmoid(np.dot(X,theta1.T))#5000*25 a2 = np.column_stack([ones,a2])#5000*26 a3 = sigmoid(np.dot(a2,theta2.T))#5000*10 p = np.argmax(a3,axis = 1) + 1 p = np.reshape(p,[m,1]) return p # Setup the parameters you will use for this exercise input_layer_size = 400; # 20x20 Input Images of Digits hidden_layer_size = 25; # 25 hidden units num_labels = 10; # 10 labels, from 1 to 10 #Part 1: Loading and Visualizing Data data1 = sci.loadmat('ex4data1.mat') #print(data.keys()) X = data1['X'] y = data1['y'] print('X:',X.shape) print('y:',y.shape) #Part 2: Loading Parameters data2 = sci.loadmat('ex4weights.mat') #print(data2.keys()) theta1 = data2['Theta1'] theta2 = data2['Theta2'] print('theta1:',theta1.shape) print('theta2:',theta2.shape) #Part 3: Compute Cost (Feedforward) lamda = 0 nn_params = np.vstack((theta1.reshape([-1,1]),theta2.reshape([-1,1]))) cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda) print('Cost at parameters (loaded from ex4weights):',cost) print('(this value should be about 0.287629)'); #Part 4: Implement Regularization lamda = 1 cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda) print('Cost at parameters (loaded from ex4weights):',cost) print('(this value should be about 0.383770'); #Part 5: Sigmoid Gradient g = sigmoidGradient(np.array([-1, -0.5, 0, 0.5, 1])) print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]: '); print(g); #Part 6: Initializing Pameters initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size) initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels) initial_nn_params = np.vstack((initial_Theta1.reshape([-1,1]), initial_Theta2.reshape([-1,1]))) #Part 7: Implement Backpropagation(时间有点紧，就没实现检查梯度的函数) #Part 8a: Implement Regularization lamda = 3 cost = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda) print('Cost at (fixed) debugging parameters (lambda = 3):',cost) print('(for lambda = 3, this value should be about 0.576051)') #Part 8b: Training NN lamda = 1 #TNC跑出来0.84-0.88，CG跑出来0.96左右 result = opt.minimize(fun=nnCostFunction, x0=initial_nn_params, args=(input_layer_size,hidden_layer_size,num_labels,X,y,lamda), #method='TNC',#truncated Newton algorithm method='CG',#conjugate gradient algorithm jac=nnGradient, options={'maxiter': 50}) ''' #和opt.minimize的参数method='CG'时等价 nnParam = opt.fmin_cg(f=nnCostFunction, x0=initial_nn_params, fprime=nnGradient, args=(input_layer_size, hidden_layer_size,num_labels, X, y, lamda), maxiter=50, disp=True) ''' nnParam = result.x theta1 = np.reshape(nnParam[0:hidden_layer_size*(input_layer_size+1)], [hidden_layer_size,input_layer_size+1]) theta2 = np.reshape(nnParam[hidden_layer_size*(input_layer_size+1):], [num_labels, hidden_layer_size+1]) #Part 9: Visualize Weights(没画图) #Part 10: Implement Predict p = predict(theta1, theta2, X) temp = (p == y) prob = np.mean(temp) print('Training Set Accuracy:',prob)

运行结果：

X: (5000, 400) y: (5000, 1) theta1: (25, 401) theta2: (10, 26) Cost at parameters (loaded from ex4weights): 0.2876291651613189 (this value should be about 0.287629) Cost at parameters (loaded from ex4weights): 0.38376985909092365 (this value should be about 0.383770 Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]: [0.19661193 0.23500371 0.25 0.23500371 0.19661193] Cost at (fixed) debugging parameters (lambda = 3): 0.5760512469501331 (for lambda = 3, this value should be about 0.576051) Training Set Accuracy: 0.9634

总结：

1、画图的功底还是差了些，时间少就没法画了

2、矩阵运算的时候，大多数问题都是维度的问题，先查这个会比较省时间

3、刚开始使用opt.minimize的时候忘了设置最大迭代次数，导致好久都没跑完，等了个寂寞

Processed: 0.018, SQL: 9