2020吴恩达 machine learning 编程作业 python实现 ex8

    技术2026-03-19  10

    # -*- coding: utf-8 -*- """ Created on Sat Jul 4 16:49:40 2020 @author: cheetah023 """ import numpy as np import scipy.io as sci import matplotlib.pyplot as plt import scipy.optimize as opt #函数定义 def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, lamda): J = 0 X = np.reshape(params[0:num_movies*num_features],[num_movies,num_features]) Theta = np.reshape(params[num_movies*num_features:],[num_users,num_features]) J = np.sum(((np.dot(X,Theta.T) - Y) ** 2) * R) / 2 reg = (lamda / 2) * (np.sum(Theta ** 2) + np.sum(X ** 2)) J = J + reg return J def cofiGradient(params, Y, R, num_users, num_movies, num_features, lamda): X = np.reshape(params[0:num_movies*num_features],[num_movies,num_features]) Theta = np.reshape(params[num_movies*num_features:],[num_users,num_features]) X_grad = ((X @ Theta.T - Y) * R) @ Theta + lamda * X Theta_grad = ((X @ Theta.T - Y) * R).T @ X + lamda * Theta grad = np.r_[X_grad.flatten(),Theta_grad.flatten()] return grad def loadMovieList(): fid = open('movie_ids.txt','r',encoding='Latin-1') n =1682 movieList = [] for line in fid: idx = line.find(' ') + 1 movieList.append(line[idx:]) fid.close() return movieList def normalizeRatings(Y, R): m,n = Y.shape y_mean = np.sum(Y,axis=1) / np.sum(R,axis=1) y_mean = y_mean.reshape([m,1]) y_mean_t = np.tile(y_mean,(1,n)) y_norm = (Y - y_mean_t) * R return y_norm, y_mean #Part 1: Loading movie ratings dataset data = sci.loadmat('ex8_movies.mat') #print('data.keys',data.keys()) Y = data['Y'] R = data['R'] print('Y:',Y.shape) print('R:',R.shape) score1 = np.sum(Y[0,:]) / np.sum(R[0,:]) print('Average rating for movie 1 (Toy Story):',score1); plt.figure(0) plt.imshow(Y) plt.xlabel('Users') plt.ylabel('Movies') #Part 2: Collaborative Filtering Cost Function data = sci.loadmat('ex8_movieParams.mat') #print('data',data.keys()) X = data['X'] Theta = data['Theta'] num_users = data['num_users'] num_movies = data['num_movies'] num_features = data['num_features'] #减少数量,跑得快 num_users = 4; num_movies = 5; num_features = 3 X = X[0:num_movies, 0:num_features] Theta = Theta[0:num_users, 0:num_features] Y = Y[0:num_movies, 0:num_users] R = R[0:num_movies, 0:num_users] params = np.r_[X.flatten(),Theta.flatten()] J = cofiCostFunc(params, Y, R, num_users, num_movies,num_features, 0) print('Cost at loaded parameters:',J) print('(this value should be about 22.22)') #Part 3: Collaborative Filtering Gradient #检查梯度的函数没写 #Part 4: Collaborative Filtering Cost Regularization J = cofiCostFunc(params, Y, R, num_users, num_movies,num_features, 1.5) print('Cost at loaded parameters (lambda = 1.5):',J) print('(this value should be about 31.34)') #Part 5: Collaborative Filtering Gradient Regularization #检查梯度的函数没写 #Part 6: Entering ratings for a new user movieList = loadMovieList() my_ratings = np.zeros([1682, 1]) my_ratings[0] = 4 my_ratings[97] = 2 my_ratings[6] = 3 my_ratings[11]= 5 my_ratings[53] = 4 my_ratings[63]= 5 my_ratings[65]= 3 my_ratings[68] = 5 my_ratings[182] = 4 my_ratings[225] = 5 my_ratings[354]= 5 for i in range(0, len(my_ratings)): if my_ratings[i] > 0: print('Rated %d for %s'%(my_ratings[i],movieList[i])) #Part 7: Learning Movie Ratings data = sci.loadmat('ex8_movies.mat') Y = data['Y'] R = data['R'] Y = np.column_stack([my_ratings,Y]) R_myrating = my_ratings != 0 R = np.column_stack([R_myrating,R]) [Ynorm, Ymean] = normalizeRatings(Y, R) num_users = Y.shape[1] num_movies = Y.shape[0] num_features = 10; X = np.random.random([num_movies,num_features]) theta = np.random.random([num_users,num_features]) initial_params = np.r_[X.flatten(),theta.flatten()] lamda = 10 params = opt.fmin_cg(f=cofiCostFunc,x0=initial_params, args=(Ynorm, R, num_users,num_movies, num_features, lamda), fprime=cofiGradient, maxiter=100,disp=True) X = np.reshape(params[0:num_movies*num_features],[num_movies,num_features]) theta = np.reshape(params[num_movies*num_features:],[num_users,num_features]) P = X @ theta.T my_predictions = P[:,0] + Ymean.flatten() print('P',P.shape) print('Ymean',Ymean.shape) print('my_predictions',my_predictions.shape) idx = np.argsort(my_predictions)[::-1] print('idx',idx.shape) for i in range(0,10): j = idx[i] print('Predicting rating ',my_predictions[j]) print('for ',movieList[j])

    运行结果:

    Y: (1682, 943) R: (1682, 943) Average rating for movie 1 (Toy Story): 3.8783185840707963 Cost at loaded parameters: 22.224603725685675 (this value should be about 22.22) Cost at loaded parameters (lambda = 1.5): 31.34405624427422 (this value should be about 31.34) Rated 4 for Toy Story (1995)

    Rated 3 for Twelve Monkeys (1995)

    Rated 5 for Usual Suspects, The (1995)

    Rated 4 for Outbreak (1995)

    Rated 5 for Shawshank Redemption, The (1994)

    Rated 3 for While You Were Sleeping (1995)

    Rated 5 for Forrest Gump (1994)

    Rated 2 for Silence of the Lambs, The (1991)

    Rated 4 for Alien (1979)

    Rated 5 for Die Hard 2 (1990)

    Rated 5 for Sphere (1998)

    Warning: Maximum number of iterations has been exceeded.          Current function value: 38956.345281          Iterations: 100          Function evaluations: 153          Gradient evaluations: 153 P (1682, 944) Ymean (1682, 1) my_predictions (1682,) idx (1682,) Predicting rating  5.00000042235092 for  Marlene Dietrich: Shadow and Light (1996) 

    Predicting rating  5.000000293775594 for  Santa with Muscles (1996)

    Predicting rating  5.000000254944607 for  They Made Me a Criminal (1939)

    Predicting rating  5.000000222955008 for  Entertaining Angels: The Dorothy Day Story (1996)

    Predicting rating  5.000000130674626 for  Saint of Fort Washington, The (1993)

    Predicting rating  5.000000102308117 for  Great Day in Harlem, A (1994)

    Predicting rating  4.999999978328779 for  Someone Else's America (1995)

    Predicting rating  4.999995528723899 for  Star Kid (1997)

    Predicting rating  4.9999934984042165 for  Aiqing wansui (1994)

    Predicting rating  4.99999223680014 for  Prefontaine (1997)

    总结:

    1、检查梯度的函数checkCostFunction没有实现

    2、矩阵内积@用起来比dot简洁

     

     

    Processed: 0.010, SQL: 9