Chapter 3: A Tour of the machine learning classifiers Using Scikit-learn
3.1:training a perceptron via Scikit-learn
From Sklearn import Datasetsimport numpy as Npiris = Datasets.load_iris () X = iris.data[:, [2, 3]]y = Iris.targetnp.unique ( Y) from sklearn.cross_validation import train_test_split# randomly extracts a sample of 30% from 150 samples as Test_datax_train, X_test, Y_train, Y_ Test = Train_test_split (x,y,test_size=0.3, random_state=0) #数据归一化 #standardscaler estimated the parametersμ (sample mean ) and (Standard deviation) # (X-mean)/(standard deviation) from sklearn.preprocessing import STANDARDSCALERSC = STANDARDSC Aler () Sc.fit (x_train) x_train_std = Sc.transform (x_train) x_test_std = Sc.transform (x_test) #Perceptron分类 #eta0 is Equivalent to the Learning ratefrom sklearn.linear_model Import perceptronppn = Perceptron (n_iter=40, eta0=0.1, Random_st ate=0) Ppn.fit (X_TRAIN_STD, y_train) y_pred = ppn.predict (X_TEST_STD) #y_test! = y_pred ' Array ([False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False , False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, Fals E, False, false]) "Print (' misclassified samples:%d '% (y_test! = y_pred). SUM ()) #Misclassified Samples:4#thus, the MISCL Assification error on the test datasets is 0.089 or 8.9 percent (4/45) #the metrics module:performance Metricsfrom sklearn.m Etrics Import accuracy_scoreprint (' accuracy:%.2f '% Accuracy_score (y_test, y_pred)) #Accuracy: 0.91#plot_decision_ Regions:visualize How well it separates the different flower Samplesfrom matplotlib.colors import Listedcolormapimport Mat Plotlib.pyplot as Pltdef plot_decision_regions (X, y, classifier, Test_idx=none, resolution=0.02): #setup marker Gener Ator and Color Map markers = (' s ', ' x ', ' o ', ' ^ ', ' V ') colors = (' red ', ' blue ', ' lightgreen ', ' black ', ' cyan ') CMap = Listedcolormap (Colors[:len (Np.unique (y))) # Plot The decision surface x1_min, X1_max = x[:, 0].min () -1, x[:, 0].max () + 1 X2_miN, X2_max = x[:, 1].min ()-1, x[:, 1].max () + 1 xx1, xx2 = Np.meshgrid (Np.arange (X1_min, X1_max, resolution), Np.aran GE (X2_min, X2_max, resolution)) Z = Classifier.predict (Np.array ([Xx1.ravel (), Xx2.ravel ()]). T) Z = Z.reshape (xx1.shape) Plt.contourf (xx1, xx2, Z, alpha=0.4, Cmap=cmap) Plt.xlim (Xx1.min (), Xx1.max ()) Plt.ylim (Xx2.min (), Xx2.max ()) # Plot all samples for IDX, C1 in enumerate (Np.unique (y)): Print Idx,c1 Plt.scatter (X=x[y = = C1, 0], Y=x[y = = C1, 1], alpha=0.8, C=cmap (IDX), MARKER=MARKERS[IDX],LABEL=C1) #highlight test Samples If test_idx:x_test, Y_test = X[test_idx,:], Y[test_idx] #把 corlor settings NULL, controlled by edgecolors color Plt.scatter (x_test[:, 0],x_test[:, 1], color= ", edgecolors= ' black ', alpha=1.0, linewidths= 2, marker= ' O ', s=150, label= ' Test set ') X_COMBINED_STD = Np.vstack ((x_train_std, x_test_std)) y_combined = Np.hst Ack ((Y_train, Y_test)) plot_decision_regions (X=x_cOMBINED_STD, y=y_combined, Classifier=ppn, Test_idx=range (105,150)) Plt.xlabel (' petal length [standardized] ') Plt.ylabel (' petal width [standardized] ') plt.legend (loc= ' upper left ') plt.show ()
Python Machine Learning