可视化决策树:安装Graphviz
安装完后要把环境变量添加关于决策树的数据输入
要求转换成二进制 例如: age income student y h n m m y s l n 转化成 y m s h m l n y 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 1 0生成决策树
from sklearn.feature_extraction import DictVectorizer import csv from sklearn import preprocessing from sklearn import tree from sklearn.externals.sz import StringIO allData=open(r'D:\asd.csv','rb') #二进制形式读取数据 reader = csv.reader(allData) header = reader.next() #第一行属性名称 feature =[] label=[] for row in reader: #每个row是一行内容 label.append(row[len(row)-1]) #取每一行最后一个值(label) rowDict={} #装特征值 for i in range(1,len(row)-1): rowDict[header[i]] = row[i] #对应k/v放入 feature.append(rowDict) print(feature) vec= DictVectorizer() dummyX=vec.fit_transform(feature).toarray()#转换成01 print("dummyX:"+str(dummyX)) print(vec.get_feature_names()) lb = preprocessing.LableBinarizer() dummyY = lb.fit_transform(label) #X,Y都转换成0,1了 clf = tree.DecisionTreeClassifier(criterion='entropy') clf = clf.fit(dummyX,dummyY) print("clf:" + str(clf)) #打印出来树 with open("allInformation.dot",'w') as f: f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file = f ) #预测 newrow = [ ] pre = clf.predict(newrow)可视化树
在命令行行输入: dot -T pdf allInformation.dot -o output.pdf