(Python) (supervised) kNN-Nearest Neighbor Classification Algorithm
Supervised kNN neighbor algorithms:
(1) calculate the distance between a point and the current point in a dataset of known classes.
(2) sort by ascending distance
(3) Select k points with the minimum distance from the current point
(4) determine the frequency of occurrence of the category of the first k points
(5) return the category with the highest frequency of occurrence of the first k points as the prediction category of the current point.
# Data example
1 2:
1 3:
1 4:
1 5: B
6 2: B
6 3: B
100 200: c
101 199: c
300 444: d
299 50: d
1000 10000: d
# Version 0: python only
kNNfrom math import sqrtfrom collections import Counterdistance=lambda a,b:sqrt(sum(map(lambda ai,bi:pow(ai-bi,2),a,b))) if len(a)==len(b) else Error0:data length match faildistance2=lambda a,b:distance([int(i) for i in a.split()],[int(i) for i in b.split()]) # for strings#print(distance2('1 2 4 7 8','2 5 5 6 110'))readData=lambda file:{line.split(':')[0]:line.strip().split(':')[1] for line in open(file)}#print(readData())def judgeSpot(fileIn='test0.txt',x='1 2',num=5): distanceDict,data={},readData(fileIn) for k in data: distanceDict[str(distance2(x,k))]=data[k] # sortDistance=sorted(distanceDict.items(),key=lambda x:float(x[0]))[:num] # kindDict=[item[1] for item in sortDistance] return sorted(dict(Counter(item[1] for item in sorted(distanceDict.items(),key=lambda x:float(x[0]))[:num])).items(),key=lambda x:x[1],reverse=True)[0][0]#print(judgeSpot('1000 10000','test0.txt'),)def judgeSpot2(dataIn,x='1 2',num=5): distanceDict,data={},dataIn for k in data: distanceDict[str(distance2(x,k))]=data[k] # sortDistance=sorted(distanceDict.items(),key=lambda x:float(x[0]))[:num] # kindDict=[item[1] for item in sortDistance] return sorted(dict(Counter(item[1] for item in sorted(distanceDict.items(),key=lambda x:float(x[0]))[:num])).items(),key=lambda x:x[1],reverse=True)[0][0]print(judgeSpot('test0.txt','1000 10000'),)#Rate of Rightdef rateRight(fileIn='test0.txt',num=5): countRight,data=0,readData(fileIn) for k in data: if judgeSpot2(data,k,num)==data[k]: countRight+=1 return countRight/float(len(open(fileIn).readlines()))print(rateRight())