About knn, lucky to see this article, written very well, here is not to repeat. Directly affixed to the code, there are small changes. (originally Python2 version, This is changed to python3, mainly Print)
Environment: Win7 32bit + Spyder + anaconda3.5
I. First Order
#-*-coding:utf-8-*-"" "Created on Sun Nov 6 16:09:00 2016@author:administrator" "" #Input: #newInput: data points to be measured (1xM) #dataSet : Known data (NxM) #labels: label for known data (1xM) #k: number of nearest data points selected # #Output: #待测数据点的分类标签 #from numpy import *# creat a dataset which contain 4 Samples with 2 classdef createdataset (): # creat a Matrix:each row as a samplegroup = array ([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]]) labels = [' a ', ' a ', ' b ', ' b ']return group, labels#classify using knndef knnclassify (newinput, dataSet, labels, k): numsamples = dataset.shape[0] # row number# step1:calculate Euclidean distance# tile (A, reps): constract an arr Ay by repeating A reps Timesdiff = Tile (newinput, (numsamples, 1))-datasetsqurediff = diff**2squredist = Sum (squrediff, Axis=1) # sum if performed by rowdistance = squredist * * 0.5#step2:sort the distance# argsort () returns the indices that Would sort an array in a ascending order sorteddistindices = argsort (distance) ClassCount = {}for i in range (k): # choose T He min K Distancevotelabel = labels[sorteddistindices[i]] #step4: count The Times labels occur# when the key Votelabel was not in dictionary ClassCount , # get () would return 0classcount[votelabel] = Classcount.get (votelabel, 0) + 1#step5:the Max vote class would returnmaxcou NT = 0for k, v in classcount.items (): if v > maxcount:maxcount = vmaxindex = Kreturn maxindex# testdataset, labels = cre Atedataset () testx = array ([1.2, 1.0]) k = 3outputLabel = knnclassify (testx, dataSet, labels, 3) print ("Your input is:", test X, "and classified to class:", outputlabel) testx = array ([0.1, 0.3]) k = 3outputLabel = knnclassify (testx, dataSet, Labels , 3) print ("Your input is:", testx, "and classified to class:", Outputlabel)
Operation Result:
second, Advanced
The handwriting recognition database used in this document is downloaded Here. Information about the introduction in the above blog has been introduced very clearly.
#-*-coding:utf-8-*-"" "Created on Sun Nov 6 16:09:00 2016@author:administrator" "" #Input: #newInput: data points to be measured (1xM) #dataSet : Known data (NxM) #labels: label for known data (1xM) #k: number of nearest data points selected # #Output: #待测数据点的分类标签 #from numpy Import * #classify using Knndef Knnclassify (newinput, dataSet, labels, k): numsamples = dataset.shape[0] # row number# step1:calculate Euclidean distance# Tile (A, reps): constract An array by repeating A reps Timesdiff = Tile (newinput, (numsamples, 1))-datasetsqurediff = dif f**2squredist = sum (squrediff, axis=1) # sum if performed by Rowdistance = Squredist * 0.5#step2:sort the distance# args ORT () returns the indices that would sort an array in a ascending order sorteddistindices = argsort (distance) ClassCount = {}for i in range (k): # choose the min k Distancevotelabel = labels[sorteddistindices[i]] #step4: count The Times labels OCCU r# when the key Votelabel was not in dictionary classcount, # get () would return 0classcount[votelabel] = Classcount.get (vot elabel, 0) + 1#step5:the Max vote CLASS would returnmaxcount = 0for k, v in classcount.items (): if v > maxcount:maxcount = vmaxindex = Kreturn maxindex# conve RT image to Vector def img2vector (filename): rows = cols = Imgvector = Zeros ((1, rows * COLS)) Filein = open (filename) for row in range (rows): linestr = filein.readline () for col in range (cols ): imgvector[0, Row * + col] = int (linestr[col]) return imgvector# load DataSet def loaddataset (): # # Step 1:getting Training Set Print ("---Getting training set ...") Datasetdir = ' f:\\techonolgoy\\ algorithm learning \\KN n\\ advanced \ \ ' Trainingfilelist = Os.listdir (datasetdir + ' trainingdigits ') # Load the training set numsamples = Len (t Rainingfilelist) train_x = Zeros ((numsamples, 1024x768)) train_y = [] for i in range (numsamples): fi Lename = trainingfilelist[i] # get train_x train_x[i,:] = img2vector (datasetdir + ' trainingdigits/%s ' % Filename) # get label from file name such as "1_18.txt" label = int (filename.split ('_') [0]) # return 1 Train_y.ap Pend (label) # # Step 2:getting Testing Set Print ("---Getting testing set ...") testingfilelist = Os.listdi R (datasetdir + ' testdigits ') # Load the testing set numsamples = Len (testingfilelist) test_x = Zeros ((numsamples , 1024x768)) test_y = [] for i in range (numsamples): filename = testingfilelist[i] # get train_x test_x[i,:] = img2vector (datasetdir + ' testdigits/%s '% Filename) # get label from file name such a s "1_18.txt" label = int (filename.split ('_') [0]) # return 1 test_y.append (label) return train_x, train_y, test_x, test_y # Test Hand writing class Def Testhandwritingclass (): # # Step 1:load data Print ("st EP 1:load data ... ") train_x, train_y, test_x, test_y = loaddataset () # # Step 2:training ... print (" step 2 : Training ... ") pasS # # Step 3:testing print ("step 3:testing ...") numtestsamples = test_x.shape[0] MatchCount = 0 For I in range (numtestsamples): predict = knnclassify (test_x[i], train_x, train_y, 3) if predict = = Te St_y[i]: MatchCount + = 1 accuracy = float (matchcount)/numtestsamples # # step 4:show The result Print ("step 4:show The result ...") print (' the classify accuracy is:%.2f%% '% (accuracy *)) testhandwritin Gclass ()
Operation Result:
On the Python3 realization of KNN