这里列举了10种常用的机器学习算法,主要是调用sklearn库的算法。另外,我自己也实现了其中的部分算法,会在之后附录在相应算法旁边,待更新。 1、svm(支持向量机):
from sklearn.datasets import load_iris import numpy as np from sklearn.metrics import accuracy_score from sklearn import svm from sklearn.model_selection import train_test_split if __name__ == '__main__': allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) clf = svm.SVC(C=0.8, kernel='rbf', gamma=20, decision_function_shape='ovr') clf.fit(train_X, train_y) #print(clf.score(trainDataSet, trainTarget)) # 精度 preY = clf.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('SVM') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))2、LR(逻辑回归):
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression import numpy as np from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split if '__main__' == __name__: allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) #x_train, x_test, y_train, y_test = load_data() # l2为正则项 model = LogisticRegression(penalty='l2') model.fit(train_X, train_y) preY=model.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('LR') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))3、LDA(线性回归):
import numpy as np from sklearn.datasets import load_iris from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split if '__main__' == __name__: allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) lda = LinearDiscriminantAnalysis(n_components=2) lda.fit(train_X, train_y) X_new = lda.transform(train_X) preY=lda.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('LDA') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))4、KNN(最近邻算法): KNN 手写数字识别
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np from sklearn.metrics import accuracy_score from collections import Counter from sklearn.model_selection import train_test_split #下载iris鸢尾花数据集 # 和切分数据集为训练集和测试集 # 训练集:trainX、trainY; # 测试集:testX、testY #KNN算法实现 #testX为测试样本及 # k为邻近的样本点数量 def KNN(trainX,trainY,testX,k=5): predY = [] for x in testX: # 计算样本点训练集的欧氏距离 distance = [np.sqrt(np.sum(np.power(x_train - x, 2))) for x_train in trainX] # 从小到大排序,每个数的索引位置 indexSort = np.argsort(distance) # 获得距离样本点最近的K个点的标记值y nearK_y = [trainY[i] for i in indexSort[:k]] # 统计邻近K个点标记值的数量 cntY = Counter(nearK_y) # 返回标记值最多的那个标记 y_predict = cntY.most_common(1)[0][0] predY.append(y_predict) return predY if __name__ == '__main__': allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) preY=KNN(train_X,train_y,test_X) allPre.append(accuracy_score(preY,test_y)) print('KNN') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))5、决策树 分类决策树 回归决策树
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier as DTC import numpy as np from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split if '__main__' == __name__: allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) #x_train, x_test, y_train, y_test = load_data() # l2为正则项 model = DTC() model.fit(train_X, train_y) preY=model.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('LR') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))7、AdaBoost(集成学习):
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.ensemble import AdaBoostClassifier as ADA import numpy as np from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split if '__main__' == __name__: allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) #x_train, x_test, y_train, y_test = load_data() # l2为正则项 model = ADA() model.fit(train_X, train_y) preY=model.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('LR') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))8、Naive bayes(朴素贝叶斯)
from sklearn.datasets import load_iris from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import accuracy_score import numpy as np from sklearn.model_selection import train_test_split if __name__ == '__main__': allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) clf = MultinomialNB() clf = clf.fit(train_X,train_y) preY=clf.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('NB') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))9、Kmeans(K均值聚类):
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn import cluster import numpy as np from sklearn.metrics import accuracy_score if '__main__' == __name__: allPre=[] for i in range(1,11): train_X,test_X, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) #x_train, x_test, y_train, y_test = load_data() # l2为正则项 model = cluster.KMeans(init='k-means++', n_clusters=4, random_state=8) model.fit(train_X,train_y) preY=model.predict(test_X) allPre.append(accuracy_score(preY,test_y)) print('LR') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))10、PCA(主成分分析):
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np from sklearn.metrics import accuracy_score from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier if '__main__' == __name__: allPre=[] for i in range(1,11): train_x,test_x, train_y, test_y = train_test_split(load_iris().data, load_iris().target, test_size = 0.2, random_state = i) model = PCA(n_components=0.95) model.fit(train_x, test_x) pca_train_x=model.fit_transform(train_x) pca_test_x=model.fit_transform(test_x) knn = KNeighborsClassifier() #PCA不能直接分类,它只是降维,分类需要其它方法 knn.fit(pca_train_x, train_y) preY=knn.predict(pca_test_x) allPre.append(accuracy_score(preY,test_y)) print('LR') for i in range(0,10): print('The test:',i+1,', Accuracy is %.2f'%allPre[i]) print('The average is %.2f'%np.mean(allPre))