Performance Evaluation Reference: http://blog.csdn.net/pipisorry/article/details/52250760
From Sklearn Import Metrics
#!usr/bin/env python #-*-coding:utf-8-*-import sys import OS import time from Sklearn import metrics import NumPy as NP import Cpickle as pickle reload (SYS) sys.setdefaultencoding (' UTF8 ') # multinomial Naive Bayes Classifier def naive_ba
Yes_classifier (train_x, train_y): From sklearn.naive_bayes import MULTINOMIALNB model = MULTINOMIALNB (alpha=0.01) Model.fit (train_x, train_y) return model # KNN Classifier def knn_classifier (train_x, train_y): from Sklearn
. Neighbors Import Kneighborsclassifier model = Kneighborsclassifier () model.fit (train_x, train_y) return model # Logistic Regression Classifier def logistic_regression_classifier (train_x, train_y): From Sklearn.linear_model im Port Logisticregression model = logisticregression (penalty= ' L2 ') Model.fit (train_x, train_y) return model # Random Forest Classifier def random_forest_classifier (train_x, train_y): From sklearn.ensemble import Randomforestclas Sifier model = RandoMforestclassifier (n_estimators=8) model.fit (train_x, train_y) return model # decision Tree Classifier def Decisi On_tree_classifier (train_x, train_y): From Sklearn import tree model = tree. Decisiontreeclassifier () Model.fit (train_x, train_y) return Model # GBDT (Gradient boosting decision Tree) Classi Fier def gradient_boosting_classifier (train_x, train_y): From sklearn.ensemble import gradientboostingclassifier m Odel = Gradientboostingclassifier (n_estimators=200) model.fit (train_x, train_y) return model # SVM Classifier de F Svm_classifier (train_x, train_y): From SKLEARN.SVM Import svc model = svc (kernel= ' RBF ', probability=true) mo Del.fit (train_x, train_y) return model # SVM Classifier using cross validation def svm_cross_validation (train_x, Trai n_y): From Sklearn.grid_search import GRIDSEARCHCV from SKLEARN.SVM Import svc model = svc (kernel= ' RBF ', Proba Bility=true) Param_grid = {' C ': [1e-3, 1e-2, 1e-1, 1, 10,"Gamma": [0.001, 0.0001]} Grid_search = GRIDSEARCHCV (model, param_grid, n_jobs = 1, verbose=1) grid_se Arch.fit (train_x, train_y) best_parameters = Grid_search.best_estimator_.get_params () for Para, Val in best_parame Ters.items (): Print para, Val model = SVC (kernel= ' RBF ', c=best_parameters[' C '], gamma=best_parameters[' gamma ') , probability=true) Model.fit (train_x, train_y) return model def read_data (data_file): import gzip f = GZ
Ip.open (Data_file, "RB") train, val, test = Pickle.load (f) f.close () train_x = train[0] train_y = train[1]
test_x = test[0] test_y = test[1] Return train_x, train_y, test_x, test_y if __name__ = = ' __main__ ': data_file = "data\\mnist.pkl.gz" Thresh = 0.5 Model_save_file = None Model_save = {} test_classifier s = [' NB ', ' KNN ', ' LR ', ' RF ', ' DT ', ' SVM ', ' gbdt '] classifiers = {' NB ': Naive_bayes_classifier, ' KNN
': Knn_classifier, ' LR ': logistic_regression_classifier, ' RF ': random_forest_classifier, ' DT
':d ecision_tree_classifier, ' SVM ': svm_classifier, ' SVMCV ': svm_cross_validation, ' GBDT ': gradient_boosting_classifier} print ' reading training and testing data ... ' train_x, Trai
N_y, test_x, test_y = Read_data (data_file) num_train, num_feat = Train_x.shape num_test, num_feat = Test_x.shape Is_binary_class = (len (np.unique (train_y)) = = 2) print ' ******************** Data Info ********************* ' p Rint ' #training data:%d, #testing_data:%d, dimension:%d '% (Num_train, num_test, num_feat) for classifier in T
Est_classifiers:print ' *******************%s ******************** '% classifier start_time = Time.time ()
Model = Classifiers[classifier] (train_x, train_y) print ' training took%fs! '% (Time.time ()-start_time) predict = Model. Predict (test_x) if model_save_file! = None:model_save[classifier] = Model if Is_binary_class : Precision = Metrics.precision_score (test_y, predict) recall = Metrics.recall_score (test_y, Predi CT) print ' precision:%.2f%%, recall:%.2f%% '% (* precision, recall) accuracy = METRICS.ACC
Uracy_score (test_y, predict) print ' accuracy:%.2f%% '% (+ accuracy) if model_save_file! = None:
Pickle.dump (Model_save, open (Model_save_file, ' WB '))