Python3.6.3 under Modify code in the Def classify0 (inx,dataset,labels,k) function Classcount.iteritems () is Classcount.items (), In addition, print in the new version of Python is a function, print after the addition of a pair of parentheses, or execution will be error. The second chapter changes the code as follows.
fromNumPyImport*Importoperator fromOsImportListdirdefclassify0 (InX, DataSet, labels, k): Datasetsize=Dataset.shape[0] Diffmat= Tile (InX, (datasetsize,1))-DataSet Sqdiffmat= Diffmat**2sqdistances= Sqdiffmat.sum (Axis=1) Distances= sqdistances**0.5sorteddistindicies=distances.argsort () ClassCount={} forIinchRange (k): Voteilabel=Labels[sorteddistindicies[i]] Classcount[voteilabel]= Classcount.get (voteilabel,0) + 1Sortedclasscount= Sorted (Classcount.items (), Key=operator.itemgetter (1), reverse=True)returnSortedclasscount[0][0]defCreateDataSet (): Group= Array ([[[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels= ['A','A','B','B'] returnGroup, LabelsdefFile2matrix (filename): Fr=open (filename) numberoflines= Len (Fr.readlines ())#get The number of lines in the fileReturnmat = Zeros ((numberoflines,3))#prepare matrix to returnClasslabelvector = []#Prepare labels returnFR =open (filename) index=0 forLineinchfr.readlines (): line=Line.strip () listfromline= Line.split ('\ t') Returnmat[index,:]= Listfromline[0:3] Classlabelvector.append (int (listfromline[-1]) Index+ = 1returnReturnmat,classlabelvectordefAutonorm (dataSet): Minvals=dataset.min (0) maxvals=Dataset.max (0) ranges= Maxvals-minvals Normdataset=zeros (Shape (dataSet)) m=Dataset.shape[0] Normdataset= Dataset-tile (Minvals, (m,1)) Normdataset= Normdataset/tile (ranges, (m,1))#element wise divide returnnormdataset, ranges, minvalsdefdatingclasstest (): HoRatio= 0.50#Hold out 10%Datingdatamat,datinglabels = File2matrix ('DatingTestSet2.txt')#Load Data setfrom fileNormmat, ranges, minvals =autonorm (Datingdatamat) m=Normmat.shape[0] Numtestvecs= Int (m*hoRatio) Errorcount= 0.0 forIinchRange (numtestvecs): Classifierresult= Classify0 (normmat[i,:],normmat[numtestvecs:m,:],datinglabels[numtestvecs:m],3) Print("The classifier came back with:%d, the real answer is:%d"%(Classifierresult, datinglabels[i]))if(Classifierresult! = Datinglabels[i]): Errorcount + = 1.0Print("The total error rate is:%f"% (errorcount/float (numtestvecs))) Print(Errorcount)defimg2vector (filename): Returnvect= Zeros ((1,1024)) Fr=open (filename) forIinchRange (32): Linestr=Fr.readline () forJinchRange (32): Returnvect[0,32*I+J] =Int (linestr[j])returnReturnvectdefhandwritingclasstest (): Hwlabels=[] trainingfilelist= Listdir ('trainingdigits')#load the training setm =Len (trainingfilelist) Trainingmat= Zeros ((m,1024)) forIinchRange (m): Filenamestr=Trainingfilelist[i] Filestr= Filenamestr.split ('.') [0]#Take off. txtclassnumstr = Int (Filestr.split ('_') [0]) hwlabels.append (CLASSNUMSTR) trainingmat[i,:]= Img2vector ('trainingdigits/%s'%filenamestr) Testfilelist= Listdir ('testdigits')#iterate through the test setErrorcount = 0.0mtest=Len (testfilelist) forIinchRange (mtest): Filenamestr=Testfilelist[i] Filestr= Filenamestr.split ('.') [0]#Take off. txtclassnumstr = Int (Filestr.split ('_') [0]) Vectorundertest= Img2vector ('testdigits/%s'%filenamestr) Classifierresult= Classify0 (Vectorundertest, Trainingmat, Hwlabels, 3) Print("The classifier came back with:%d, the real answer is:%d"%(Classifierresult, classnumstr))if(Classifierresult! = classnumstr): Errorcount + = 1.0Print("\nthe total number of errors is:%d"%errorcount)Print("\nthe total error rate is:%f"% (Errorcount/float (mtest)))
Machine learning 2-k Nearest neighbor Algorithm learning notes