Reference: http://blog.csdn.net/tjusxh/article/details/51052319
K-Nearest Neighbor algorithm: Simply speaking, it is the method to classify the distance between different eigenvalues.
Three basic elements: selection of K-value, distance measurement, classification decision rule
Advantages: High precision, insensitive to outliers, no data input assumptions.
Disadvantages: High computational complexity and high spatial complexity.
The general flow of the KNN algorithm:
1. Import data
2. To ensure the same feature weights, the numerical normalization
3. Distance calculation
4. Sort the distances and select the top K distances to find the category with the most labels
#include <iostream>#include<string>#include<vector>#include<algorithm>#include<sstream>#include<fstream>#include<cmath>using namespacestd;//Create a data structure with elements for distance, labelstructnode{Doubledis; Doublelabels; Node () {} node (DoubleDisDoubleLabels):d is (dis), Labels (Labels) {}};BOOLCMP (Node A, Node B) {returna.dis<B.dis;}//The first step is to import the datavector<vector<Double>> GetFile (ifstream&inch) {vector<vector<Double>> Datamat;//Two-dimensional container for returnvector<Double>item;//one-dimensional container for compression into a two-dimensional container strings; Istringstream str; while(Getline (inch, s))//enter data by row{str.str (s); Doubletmp; while(Str >>tmp) Item.push_back (TMP); Datamat.push_back (item); Item.clear (); Str.clear (); } returnDatamat;}voidprintf (vector<vector<Double>>data) { intn = data.size ();//here n is the number of samples intm = data[0].size ();//Features + tags for(inti =0; I < n; i++) { for(intj =0; J < M; J + +) cout<< Data[i][j] <<" "; cout<<Endl; }}//Normalization of Valuesvector<vector<Double>>makeone (vector<vector<Double>>thedata) {Vector<vector<Double>>datamat =Thedata; intNumRows =thedata.size (); intNumColumns = thedata[0].size (); //find the maximum and minimum values for each columnvector<Double>Max; Vector<Double>Min; Vector<Double>ranges; for(inti =0; I < NumColumns-1; i++)//because the last element in each row is a label, subtract 1 { Doublemax = thedata[i][0]; DoubleMin = thedata[i][0]; for(intj =1; J < NumRows; J + +) { if(Thedata[j][i] >max) Max=Thedata[j][i]; if(Thedata[j][i] <min) min=Thedata[j][i]; } max.push_back (Max); Min.push_back (min); } for(inti =0; I < NumColumns-1; i++) for(intj =0; J < NumRows; J + +) {Datamat[j][i]= (Thedata[j][i]-min[i])/(Max[i]-Min[i]); } returnDatamat;}//Enter the sample to be testedvector<Double>getx (vector<vector<Double>>Datamat) { intm = datamat[0].size (); Vector<Double>Inputx; cout<<"Please enter the sample to be tested (the feature number is"<< M-1<<"each)"<<Endl; for(inti =0; I < M-1; i++) { Doubletemp; CIN>>temp; Inputx.push_back (temp); } returnInputx;}//calculate the distance from input to the sample pointNode Getlabel (vector<Double>input, vector<Double>asample) {Node node1; intm =input.size (); Node1.labels=Asample[m]; Doubledis =0; for(inti =0; I < m; i++) Dis+ = POW (input[i]-asample[i],2.0); Node1.dis=dis; returnNode1;}//sort all of the sample points, select the top K, and find the top K tags that appear mostDoubleMakecomp (vector<node>nodes) { intK; DoubleL =0; Sort (Nodes.begin (), Nodes.end (), CMP); //sort the distance in ascending ordercout <<"Please enter K:"<<" "; CIN>>K; intMax =0; Doublelabel[ -] = {0 }; for(inti =0; i<k; i++) {//the label of the first Klabel[int(Nodes[i].labels)] ++; } for(inti =0; I < K; i++)//used to count the most frequently occurring labels { if(Max <Label[i]) {L=i; Max=Label[i]; } } returnl;}intmain () {vector<vector<Double>>Datamat; Ifstream file ("DatingTestSet2.txt"); Datamat=getFile (file); Datamat=Makeone (Datamat); intNumofsamples =datamat.size (); while(1) {vector<Node>Dis_label; Vector<Double>input = GetX (Datamat);//Enter the sample to be tested//calculates the distance of input to each sample point, inputs as input, sample points, output is: distance, label for(inti =0; i < numofsamples; i++) {Node A=Getlabel (input, datamat[i]); Dis_label.push_back (A); //calculates the distance of input to all sample points } DoubleClassfy =Makecomp (Dis_label); cout<< classfy <<Endl; } return 0;}
K-Nearest Neighbor algorithm learning