# -*- coding: utf-8 -*-
"""
Created on Wed Jul 1 17:28:18 2020
@author: cheetah023
"""
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sci
import scipy.optimize as opt
#函数定义
def sigmoid(X):
return 1 /(1 + np.exp(-X))
def lrcostFunction(theta, X, y, lamda):
theta = np.reshape(theta,(X.shape[1],1))
y = np.reshape(y,(X.shape[0],1))
h = sigmoid(np.dot(X,theta))
m = X.shape[0]
cost = (np.dot(-y.T,np.log(h)) - np.dot(1-y.T,np.log(1-h))) / m
cost = cost + np.dot(theta.T[0,1:],theta[1:,0]) * lamda / (2 * m)
return cost
def gradient(theta, X, y, lamda):
theta = np.reshape(theta,(X.shape[1],1))
m = X.shape[0]
h = sigmoid(np.dot(X,theta))
theta[0] = 0
grad = np.zeros([X.shape[1],1])
grad = np.dot(X.T,(h - y)) / m
grad = grad + theta * lamda / m
return grad
def oneVsAll(X, y, num_labels, lamda):
(m,n) = X.shape
all_theta = np.zeros([num_labels,n+1])
ones = np.ones([m,1])
X = np.column_stack([ones,X])
initial_theta = np.zeros([n+1,1])
for c in range(1,num_labels+1):
p = np.zeros([m,1])
idx = np.where(y == c)
p[idx] = 1
result = opt.minimize(fun=lrcostFunction,
x0=initial_theta,
args=(X,p,lamda),
method='TNC',
jac=gradient)
#索引为0对应标签为1,索引为8对应标签为9,索引为9对应标签为0
all_theta[c-1,:] = result.x
return all_theta
def predictOneVsAll(all_theta, X):
m = X.shape[0]
p = np.zeros([m,1])
ones = np.ones([m,1])
X = np.column_stack([ones,X])
h = sigmoid(np.dot(X,all_theta.T))
##索引为0对应标签为1,索引为8对应标签为9,索引为9对应标签为0
p = np.argmax(h,axis = 1) + 1
p = np.reshape(p,[m,1])
return p
#Part 1: Loading and Visualizing Data
input_layer_size = 400; # 20x20 Input Images of Digits
num_labels = 10; # 10 labels, from 1 to 10
data = sci.loadmat('ex3data1.mat')
#print(data) #data是个字典类型
X = data['X']
y = data['y']
#print('x',type(X),'y',type(y))
#Part 2a: Vectorize Logistic Regression
theta_t = [[-2], [-1], [1], [2]]
ones = np.ones([5,1])
#这里X_t使用[3,5]的维度再转置,是为了和octave数据对上
X_t = np.reshape(range(1,16),[3,5]) /10
X_t = X_t.T
X_t = np.column_stack([ones,X_t])
#print('X_t',X_t)
y_t = [[1], [0], [1], [0], [1]]
lamda_t = 3
cost = lrcostFunction(theta_t, X_t, y_t, lamda_t)
grad = gradient(theta_t, X_t, y_t, lamda_t)
print('Cost:', cost);
print('Expected cost: 2.534819');
print('Gradients:');
print('', grad);
print('Expected gradients:');
print('0.146561\n -0.548558\n 0.724722\n 1.398003');
#Part 2b: One-vs-All Training
lamda = 0.1
all_theta = oneVsAll(X, y, num_labels, lamda)
#print('all_theta',all_theta.shape)
#Part 3: Predict for One-Vs-All
p = predictOneVsAll(all_theta, X)
#temp里面的值是True(=1)和False(=0)
temp = (p==y)
prob = np.mean(temp)
print('Training Set Accuracy:',prob)
运行结果:
Cost: [[2.5348194]] Expected cost: 2.534819 Gradients: [[ 0.14656137] [-0.54855841] [ 0.72472227] [ 1.39800296]] Expected gradients: 0.146561 -0.548558 0.724722 1.398003 Training Set Accuracy: 0.9646
参考资料:
https://blog.csdn.net/lccflccf/category_8379707.html
https://blog.csdn.net/Cowry5/article/details/83302646
https://blog.csdn.net/weixin_44027820/category_9754493.html
总结:
1、要注意octave里面y的下标从1开始,python里面y的下标从0开始,在最后输出预测结果的时候要注意对齐
2、octave的准确率在94.980000,我跑出来是0.9646可能是使用的opt.minimize函数和octave不一样。不过cost和grad算出来是基本一致的,觉得应该没啥问题