Import class library
1 import numpy as np 2 from sklearn.neighbors import KNeighborsClassifier 3 from sklearn.model_selection import train_test_split 4 from sklearn.preprocessing import StandardScaler 5 from sklearn.linear_model import LinearRegression 6 from sklearn.metrics import r2_score 7 from sklearn.datasets import load_iris 8 import matplotlib.pyplot as plt 9 import pandas as pd10 import seaborn as sns
# Entropy gain # greater entropy, greater information, and greater uncertainty
Knn1. calculate the distance from the predicted value to all points. 2. Sort all distances. 3. Find the class with the largest category in the first K samples as the class to be predicted.
Code
1 A = NP. array ([[1, 1], [1, 1.5], [0.5, 1.5]) 2 B = NP. array ([[3.0, 3.0], [3.0, 3.5], [2.8, 3.1]) 3 4 5 def knn_pre_norm (point): 6 a_len = NP. linalg. norm (point-a, axis = 1) 7 B _len = NP. linalg. norm (point-B, axis = 1) 8 print (a_len.min () 9 print (B _len.min () 10 11 12 def knn_predict_rev (point): 13 X = NP. array ([[1, 1], [1, 1.5], [0.5, 1.5], [3.0, 3.0], [3.0, 3.5]) 14 y = NP. array ([0, 0, 0, 1, 1, 1]) 15 16 KNN = kneighborsclassifier (n_neighbors = 2) 17 KNN. FIT (x, y) 18 19 print (KNN. predict (NP. array ([[1.0, 3.0]) 20 21 22 def iris_linear (): 23 # Load IRIS data 24 Li = load_iris () 25 # scatter plot 26 # PLT. scatter (Li. data [:, 0], Li. data [:, 1], cpolicli.tar get) 27 # PLT. scatter (Li. data [:, 2], Li. data [:, 3], cpolicli.tar get) 28 # PLT. show () 29 # split the test set and training set. The proportion of the test set to the entire dataset is 0.2530 x_train, x_test, y_train, y_test = train_test_split (Li. data, li.tar get, test_size = 0.25) 31 # create KNN classification. Use at least five neighbors as the classification criteria. 32 KNN = kneighborsclassifier (n_neighbors = 5) 33 # training data 34 KNN. FIT (x_train, y_train) 35 # prediction test set 36 # print (KNN. predict (x_test) 37 # prediction NP. array ([[6.3, 3, 5.2, 2.3]) 38 print (KNN. predict (NP. array ([[6.3, 3, 5.2, 2.3]) 39 # prediction NP. array ([[6.3, 3, 5.2, 2.3]) has a probability of 40 print (KNN) for each category. predict_proba (NP. array ([[6.3, 3, 5.2, 2.3]) 41 42 43 if _ name _ = '_ main __': 44 # knn_predict_rev (none) 45 # knn_pre_norm (NP. array ([2.3, 2.3]) 46 iris_linear ()
Machine Learning-KNN