Import pandas as PD import NumPy as NP sklearn.preprocessing import imputer# importing data preprocessing module processing raw data from Sklearn.model_selec tion import train_test_split# importing modules from Sklearn.metrics Import to automatically generate training sets and test sets classification_report# importing forecast results evaluation module from Sklearn.neighbors Import kneighborsclassifier#knn nearest neighbor algorithm from Sklearn.tree import decisiontreeclassifier# decision tree algorithm module from Sklearn.naive_bayes Import gaussiannb# Bayes algorithm module #数据导入模块 def loaddataset (feature_paths,label_paths): #创建空数组 feature= Np.ndarray (shape= (0,41)) Label=np.ndarray (shape= (0,1)) #处理数据 for file in feature_paths: #逗号分隔符读取特征数据,
The question mark is replaced with the missing value and the table header is not read.
DF = pd.read_table (file, delimiter= ', ', na_values= '? ', Header=none) #补全函数: Indicates that the missing data is missing value and the mean complement missing value. IMP = Imputer (missing_values= ' NaN ', strategy= ' mean ', axis=0) #函数方法调用接头 imp.fit (DF) df=imp.transfor
M (DF) #transform会将一个函数应用到各个分组. #将新的数据合并到特征集合中 feature = np.concatenate (feature, DF)) for file in LABEL_PATHS:DF = PD.RThe ead_table (file, header=none) label = np.concatenate ((label, DF)) #将标签规整为一维向量. Label = np.ravel (label) return feature, label #主函数模块 if __name__ = ' __main__ ': #设置数据标签块 feature_paths=[' A.fea Ture ', ' b.feature ', ' c.feature ', ' d.feature ', ' e.feature '] label_paths = [' A.label ', ' B.label ', ' C.label ', ' D.label ', ' E.label '] #读入训练数据 x_train,y_train=loaddataset (Feature_paths[:4],label_paths[:4]) #读入测试数据 X_test,y_test=loa Ddataset (feature_paths[4:],label_paths[4:]) #使用train_test_split函数打乱训练数据 x_train,x_,y_train,y_=train_test_split (x
_train,y_train,test_size=0.0) #如果test_size = 0 The resulting training data is the complete raw data, but the sequence is disrupted.
#创建k邻近分类器 #打印说明 that you want to use K proximity print (' Start trainning knn ... ') knn=kneighborsclassifier (). Fit (X_train, Y_train)
Print (' I have already trainning the data you just gave me! ')
Answer_knn=knn.predict (x_test) print (' Prediction done! ') #创建决策树分类器 print (' Start trainning decisiontreeclassifier ... ') dt = Decisiontreeclassifier (). Fit (X_train, Y_train) print (' I have already trainning the data for you just gave ')
Answer_dt=dt.predict (x_test) print (' Prediction done! ') #创建贝叶斯分类器 print (' Start trainning Bayes ... ') GNB = GAUSSIANNB (). Fit (X_train, Y_train) print (' I have already-trai
Nning The data you just gave me! ')
answer_gnb= gnb.predict (x_test) print (' Prediction done! ') #评价结果 #classification_report () evaluates data from the accuracy rate precision, recall recall,f1-scoref1 value, and support support four dimensions print (' \n\nthe cl Assification for KNN: ') print (Classification_report (y_test, ANSWER_KNN)) print (' \n\nthe classification
RT for DT: ') print (Classification_report (y_test, Answer_dt)) print (' \n\nthe classification A For Bayes: ') Print (Classification_report (y_test, ANSWER_GNB))
Note: The use of the transform function is not yet clear.