python機器學習——資料的分類(knn,決策樹,貝葉斯)代碼筆記__python

來源:互聯網
上載者:User
import pandas as pdimport numpy as npfrom sklearn.preprocessing import Imputer#匯入資料預先處理模組處理未經處理資料from sklearn.model_selection import train_test_split#匯入自動產生訓練集和測試集的模組from sklearn.metrics import classification_report#匯入預測結果評估模組from sklearn.neighbors import KNeighborsClassifier#knn近鄰演算法from sklearn.tree import DecisionTreeClassifier#決策樹演算法模組from sklearn.naive_bayes import GaussianNB#貝葉斯演算法模組#資料匯入模組def loadDataSet(feature_paths,label_paths):    #建立空數組    feature=np.ndarray(shape=(0,41))    label=np.ndarray(shape=(0,1))    #處理資料    for file in feature_paths:        #逗號分隔字元讀取特徵資料,問號替換為缺失值,不讀取表頭。        df = pd.read_table(file, delimiter=',', na_values='?', header=None)        #補全函數:指明遺失資料為缺失值,採用平均值補全缺失值。        imp = Imputer(missing_values='NaN', strategy='mean', axis=0)        #函數方法調用接頭        imp.fit(df)        df=imp.transform(df)#transform會將一個函數應用到各個分組。        #將新的資料合併到特徵集合中        feature = np.concatenate((feature, df))    for file in label_paths:        df = pd.read_table(file, header=None)        label = np.concatenate((label, df))    #將標籤規整為一維向量。    label = np.ravel(label)    return feature, label#主函數模組if __name__ =='__main__':    #設定資料標籤塊    feature_paths=['A.feature','B.feature','C.feature','D.feature','E.feature']    label_paths = ['A.label','B.label','C.label','D.label','E.label']    #讀入訓練資料    x_train,y_train=loadDataSet(feature_paths[:4],label_paths[:4])    #讀入測試資料    x_test,y_test=loadDataSet(feature_paths[4:],label_paths[4:])    #使用train_test_split函數打亂訓練資料    x_train,x_,y_train,y_=train_test_split(x_train,y_train,test_size=0.0)#如果test_size=0那麼得到的訓練資料就是完整的未經處理資料,只是打亂了順序。    #建立k鄰近分類器    #列印說明,說明要使用k鄰近器    print('start trainning knn……')    knn=KNeighborsClassifier().fit(x_train ,y_train)    print('I have already trainning the data you just gave me!')    answer_knn=knn.predict(x_test)    print('Prediction done!')    #建立決策樹分類器    print('start trainning DecisionTreeClassifier……')    dt = DecisionTreeClassifier().fit(x_train, y_train)    print('I have already trainning the data you just gave me!')    answer_dt=dt.predict(x_test)    print('Prediction done!')    #建立貝葉斯分類器    print('start trainning Bayes……')    gnb = GaussianNB().fit(x_train, y_train)    print('I have already trainning the data you just gave me!')    answer_gnb= gnb.predict(x_test)    print('Prediction done!')    #評價結果    #classification_report()將對資料從精確率precision,召回率recall,f1—scoref1值,以及支援度support四個維度對資料結果進行評價    print('\n\nThe classification report for knn:')    print(classification_report(y_test, answer_knn))    print('\n\nThe classification report for DT:')    print(classification_report(y_test, answer_dt))    print('\n\nThe classification report for Bayes:')    print(classification_report(y_test, answer_gnb))
註:關於transform函數的使用還不是很明確。
相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.