# The premise of using any machine learning algorithm is to choose features.
From numpy import *import operatorfrom os import listdirdef classify0 (InX, DataSet, labels, k): datasetsize = dataset.shape[0] Diffmat = Tile (InX, (datasetsize,1))-dataset Sqdiffmat = diffmat**2 &N Bsp Sqdistances = Sqdiffmat.sum (Axis=1) distances = sqdistances**0.5 Sorteddistindicies = Distances.argsort () classcount={} for I I N Range (k): Voteilabel = labels[sorteddistindicies[i]] CLASSCOUNT[V Oteilabel] = Classcount.get (voteilabel,0) + 1 Sortedclasscount = sorted (Classcount.iteritems (), key= Operator.itemgetter (1), reverse=true) return sortedclasscount[0][0]def CreateDataSet (): Group = Array ([[[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = [' A ', ' a ', ' B ', ' B '] return group, Labelsdef file2matrix (filename): FR =Open (filename) NumberOfLines = Len (Fr.readlines ()) #get the number of lines in th E file Returnmat = zeros ((numberoflines,3)) #prepare matrix to return Classlabelvector = [] #prepare labels return FR = open (filename) index = 0 for line in Fr.readlines ():   ; line = Line.strip () Listfromline = line.split (' t ') RE Turnmat[index,:] = listfromline[0:3] classlabelvector.append (int (listfromline[-1)) Index + = 1 return returnmat,classlabelvector def autonorm (dataSet): & nbsp Minvals = dataset.min (0) maxvals = Dataset.max (0) ranges = maxvals-minvals Normdataset = Zeros (shApe (DataSet)) m = dataset.shape[0] Normdataset = Dataset-tile (Minvals, (m,1)) NOR Mdataset = Normdataset/tile (ranges, (m,1)) #element wise divide return normdataset, ranges, MINVALS&NB Sp def datingclasstest (): HoRatio = 0.50 #hold out 10% Datingdatamat,dati Nglabels = File2matrix (' datingTestSet2.txt ') #load data setfrom file Normmat, ranges, MI Nvals = Autonorm (datingdatamat) m = normmat.shape[0] numtestvecs = Int (m*horatio) E Rrorcount = 0.0 for I in range (numtestvecs): Classifierresult = Classify0 (normmat[ i,:],normmat[numtestvecs:m,:],datinglabels[numtestvecs:m],3) print "The classifier came Back with:%d, the real answer is:%d "% (Classifierresult, datinglabels[i]) if (classifierresu Lt! = Datinglabels[i]): ErrOrcount + = 1.0 print "The total error rate is:%f"% (Errorcount/float (numtestvecs)) Print error count def img2vector (filename): Returnvect = zeros ((1,1024)) FR = open ( FileName) For I in range (+): LINESTR = Fr.readline () F or J in range (+): RETURNVECT[0,32*I+J] = Int (linestr[j]) RETURN RET Urnvectdef handwritingclasstest (): hwlabels = [] trainingfilelist = Listdir (' trainingDigits ') #load the training set m = Len (trainingfilelist) Trainingmat = Zeros ((m,1024)) for I in range (m): FILENAMESTR = trainingfilelist[i]   ; FILESTR = Filenamestr.split ('. ') [0] #take off .txt CLASSNUMSTR = Int (Filestr.split ('_') [0]) Hwlabels.append (CLASSNUMSTR) trainingmat[i,:] = Img2vector (' trainingdigits/%s '% filenamestr) testfilelist = Listdir (' testdigits ') #iterate t Hrough the test set Errorcount = 0.0 mtest = Len (testfilelist) for I in range (mtest ): FILENAMESTR = testfilelist[i] FILESTR = Filenamestr.split ('. ') [0] #take off .txt CLASSNUMSTR = Int (Filestr.split ('_') [0]) & nbsp Vectorundertest = Img2vector (' testdigits/%s '% filenamestr) Classifierresult = Classify0 ( Vectorundertest, Trainingmat, Hwlabels, 3) print "The classifier came back with:%d, the real a Nswer is:%d "% (Classifierresult, classnumstr) if (classifierresult! = classnumstr): Errorcoun T + = 1.0 print "\nthe total nUmber of Errors is:%d "% errorcount print" \nthe total error rate is:%f "% (Errorcount/float (mtest))
[Machine learning]KNN algorithm Python Implementation (example: digital recognition)