KNN分类鸢尾花数据集

    技术2022-07-11  98

    from sklearn import datasets from sklearn.model_selection import train_test_split import numpy as np from collections import Counter iris = datasets.load_iris() X=iris.data y=iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1) def test(trainset,testdata,k,lables): sqdiffMat=(trainset-testdata)**2 distances=np.sum(sqdiffMat,axis=1)**0.5 SequentialIndex=np.argsort(distances) klables=[] for i in range(k): klables.append(lables[SequentialIndex[i]]) labledict=Counter(klables) testclass=labledict.most_common(1)[0][0] return testclass y_pred=[] for i in range(len(y_test)): y_pred.append(test(X_train,X_test[i],10,y_train)) print("IRIS:Number of mislabeled points out of a total %d points : %d, Acc: %f%%" % (X_test.shape[0], (y_test != y_pred).sum(),(y_test == y_pred).sum()/X_test.shape[0]))
    Processed: 0.011, SQL: 9