Import Pylab as PL import NumPy as NP from sklearn.neighbors import kneighborsclassifier from Sklearn.metrics Import class Ification_report from sklearn.cross_validation import Train_test_split,stratifiedkfold,cross_val_score from Sklearn.decomposition Import PCA from sklearn.feature_selection import rfecv from SKLEARN.SVM import SVC import sklearn.pr Eprocessing as pp def dsplit (train_init,target_init): Train,test,train_target,test_target = Train_test_split (train_in
it,target_init,test_size=0.1,random_state=42) Print len (train_init) print Len (train) Print Len (train_target) Return Train,test,train_target,test_target def DOPCA (train,train_target,test_init): PCA = PCA (n_components=12,whit En=true) train = Pca.fit_transform (train,train_target) test_init =pca.transform (test_init) return train,test_i Nit def classifier (train,train_target): Kclass = Kneighborsclassifier (n_neighbors=13,algorithm= ' kd_tree ', weight s= ' uniform ', p=1) Kclass.fit (train,Train_target) # res = Kclass.predict (train) # print Classification_report (train_target,res) # res1 = KCl Ass.predict (test) # print Classification_report (test_target,res1) return kclass train_init = np.genfromtxt (open (' t Rain.csv ', ' RB '), delimiter= ', ') Target_init = np.genfromtxt (open (' trainlabels.csv ', ' RB '), delimiter= ', ') Test_init = Np.genfromtxt (Open (' test.csv ', ' RB '), delimiter= ', ') #train, test,train_target,test_target = Dsplit (Train_init, Target_init) #train, test,test_init = DOPCA (train,train_target,test,test_init) train,test_init = DOPCA (Train_init, Target_init,test_init) Kclass = classifier (train,target_init) res = kclass.predict (test_init) Idcol = Np.arange (start=1 , stop=9001) Res2 = Np.column_stack ((idcol,res)) np.savetxt (' Prediction.csv ', res2,fmt= '%d ', delimiter= ",")
Public score=0.92399
The Kd-tree KNN algorithm is used and the PCA is decomposed.
After the experiment, it is found that using PCA can improve the correctness of classification. Not using PCA, ps=0.87221.
Function:
Decomposing data:
Train_test_split (train_init,target_init,test_size=0.1,random_state=42)
Pca:
PCA = PCA (n_components=12,whiten=true)
train = Pca.fit_transform (train,train_target)
test_init = Pca.transform (Test_init)
Knn:
Kclass = Kneighborsclassifier (n_neighbors=13,algorithm= ' kd_tree ', weights= ' uniform ', p=1)
Kclass.fit (train, Train_target)
res = kclass.predict (test_init)