#Coding:utf-8ImportNumPy as NPImportoperatorImportOSdefclassify0 (InX, DataSet, labels, k): Datasetsize=Dataset.shape[0] Diffmat= Np.tile (InX, (datasetsize,1))-DataSet Sqdiffmat= Diffmat**2sqdistances= Sqdiffmat.sum (Axis = 1) Distances= sqdistances**0.5sorteddistanceindices=distances.argsort () ClassCount= {} forIinchRange (k): Voteilabel=Labels[sorteddistanceindices[i]] Classcount[voteilabel]= Classcount.get (voteilabel,0) +1Sortedclasscount= Sorted (Classcount.items (), key = Operator.itemgetter (1), reverse =True)returnSortedclasscount[0][0]deffile2matric (filename): Fr=open (filename) arrayoline=fr.readlines () numberoflines=Len (arrayoline) Returnmat= Np.zeros ((numberoflines,3)) Classlabelvector=[] Index=0 forLineinchArrayoline:line=Line.strip () listfromline= Line.split ('\ t') Returnmat[index,:]= Listfromline[0:3] ifLISTFROMLINE[-1] = ='largedoses': Temp= 3ifLISTFROMLINE[-1] = ='smalldoses': Temp= 2ifLISTFROMLINE[-1] = ='Didntlike': Temp= 1classlabelvector.append (temp) Index+ = 1returnReturnmat, Classlabelvectordefautonormal (DataSet): Minvals=dataset.min (0) maxvals=Dataset.max (0) ranges= Maxvals-minvals Normdataset=Np.zeros (Np.shape (DataSet)) m=Dataset.shape[0] Normdataset= Dataset-np.tile (Minvals, (m,1)) Normdataset= Normdataset/np.tile (ranges, (m,1)) returnnormdataset, ranges, minvalsdefclassifypersion (): Resultlist= [' not at all','In small doses','In large doses'] Percenttats= Float (Raw_input ("percentage of time spent play video games?")) Ffmiles= Float (Raw_input ("frequent filer Niles earned per year?")) Icecream= Float (Raw_input ("liters of ice cream consumed per year?")) Datingdatamat, Datinglabels= File2matric (r". \datingtestset.txt") Normalmat, ranges, minvals=autonormal (datingdatamat) Inarr=Np.array ([Ffmiles,percenttats,icecream]) Classifierresult= Classify0 ((inarr-minvals)/ranges, Normalmat, datinglabels,3) Print "You'll probably like this persion:", resultlist[classifierresult-1]defimg2vertor (filename): Returnvect= Np.zeros ((1,1024)) Fr= open (filename,'r+') forIinchRange (32): Linestr=Fr.readline () forJinchRange (32): Returnvect[0,32*I+J] =Int (linestr[j])returnReturnvectdefhandwritingclasstest (): Hwlabels=[] traingfilelist= Os.listdir (r". \digits\trainingdigits") M=Len (traingfilelist) Trainingmat= Np.zeros ((m,1024)) forIinchRange (m): Filenamestr=Traingfilelist[i] Filestr= Filenamestr.split ('.') [0] Classnumstr= Int (Filestr.split ('_') [0]) hwlabels.append (CLASSNUMSTR) trainingmat[i,:]= Img2vertor (r". \digits\trainingdigits\%s"%filenamestr) Testfilelist= Os.listdir (r". \digits\testdigits") Mtest=Len (testfilelist) Errorcount= 0.0 forIinchRange (mtest): Filenamestr=Testfilelist[i] Filestr= Filenamestr.split (".") [0] Classnumstr= Filestr.split ("_") [0] Vectorundertest= Img2vertor (r". \digits\testdigits\%s"%filenamestr) Classifierresult= Classify0 (Vectorundertest, Trainingmat, Hwlabels, 3) Print "The classifier came back with:%d, the real answer is:%s"%(CLASSIFIERRESULT,CLASSNUMSTR)if(Classifierresult! =Int (CLASSNUMSTR)): Errorcount+ = 1.0Print "\ n the total number of error is%d"%ErrorcountPrint "\ n The total error rate is%f"% (Errorcount/float (mtest))
The above code consists of two small items:
The first is to use the KNN algorithm to improve the pairing effect of the dating site, using the Python interactive interface to run
Successively input Parameters 10 10000 0.5 results for in small doses
Data points this out download extract code: UE4A
The second small project is a handwritten digital recognition system
Also run with the interactive interface
Import KNN
Knn.handwritingclasstest ()
Data points here Download extract code: 9QD1
This code is based on the "machine Learning Combat" This book is written.
The Python implements KNN