The
Selects only the first k most similar data in a sample dataset, K is usually an integer not greater than 20, and finally selects the most frequently occurring class in the K most similar data as the classification of the new data.
Pros:
High precision, insensitive to outliers, no data input assumes
No training algorithm
Disadvantage:
Computational complexity, high spatial complexity
applicable:
Numeric and nominal type
Code:
from numpy Import * import operator Def createdataset (): group = Array ([[1.0,1.1],[1.0,1.0],
[0,0],[0,0.1]]) labels = [' A ', ' a ', ' B ', ' B '] return group,labels def classify0 (inx,dataset,labels,k): #计算距离 Datasetsize = dataset.shape[0] Diffmat = Tile (InX, (datasetsize,1))-dataset Sqdiffmat = diffmat**2 sqdistances
= Sqdiffmat.sum (Axis=1) distances = sqdistances**0.5 #排序 Sorteddistindicis = Distances.argsort () #取前K个值 ClassCount = {} for I in range (k): Voteilabel = Labels[sorteddistindicis[i]] Classcount[voteilabel]
= Classcount.get (voteilabel,0) +1 #从大到小排序类别的统计结果 sortedclasscount = sorted (Classcount.iteritems (), Key=operator.itemgetter (1), reverse=true) return sortedclasscount[0][0] Data,label = CreateDataSet () p Rint (Classify0 ([0,0],data,label,3))