I. Linear Logistic Regression
The Code is as follows:
Import numpy as npimport pandas as pdimport matplotlib. pyplot as pltimport scipy. optimize as optimport Seaborn as SNS # Read the dataset Path = 'ex2data1.txt 'Data = PD. read_csv (path, header = none, names = ['expired', 'expired', 'admitted']) # Separate Positive and Negative datasets positive = data [DATA ['admitted']. ISIN ([1])] Negative = data [DATA ['admitted']. ISIN ([0])] ''' # view the distribution fig, Ax = PLT. subplots (figsize = (12, 8) ax. scatter (positive ['exam 1'], positive ['exam 2'], S = 60, c = 'B', marker = 'O', label = 'admitted ') ax. scatter (negative ['exam 1'], negative ['exam 2'], S = 50, c = 'R', marker = 'x', label = 'unadmitted ') ax. legend () ax. set_xlabel ('exam 1 score ') ax. set_ylabel ('exam 2 score ') PLT. show () ''' # Implementation of the sigmoid function def sigmoid (h): return 1/(1 + NP. exp (-h) ''' # test the sigmoid function Nums = NP. arange (-10, 11, step = 1) fig, Ax = PLT. subplots (figsize = (12, 8) ax. plot (Nums, sigmoid (Nums), 'k') PLT. show () ''' # Calculate the loss function value def cost (Theta, x, y): Theta = NP. matrix (theta) x = NP. matrix (x) y = NP. matrix (y) Part1 = NP. multiply (-y, NP. log (sigmoid (x * Theta. t) Part2 = NP. multiply (1-y), NP. log (1-sigmoid (x * Theta. t) return NP. sum (part1-part2)/Len (x) # Add one column before the original matrix 1st to all 1data. insert (0, 'ones', 1) Cols = data. shape [1] x = data. iloc [:, 0: Cols-1] Y = data. iloc [:, cols-1: Cols] x = NP. array (X. values) y = NP. array (Y. values) Theta = NP. zeros (3) # Here is a row vector # returns a gradient vector. Note that the vector def gradient (Theta, x, y): Theta = NP. matrix (theta) x = NP. matrix (x) y = NP. matrix (y) parameters = Theta. ravel (). shape [1] grad = NP. zeros (parameters) error = sigmoid (x * Theta. t)-y grad = error. t. DOT (x) grad = grad/Len (x) return grad # use advanced algorithms to calculate the best Theta value result = OPT. fmin_tnc (func = Cost, X0 = Theta, fprime = gradient, argS = (x, y) # print (cost (result [0], x, y )) # test the performance of the obtained Theta # Calculate the prediction results of the original dataset def predict (Theta, X): Theta = NP. matrix (theta) x = NP. matrix (x) probability = sigmoid (x * Theta. t) return [1 if I> 0.5 else 0 for I in probability] theta_min = Result [0] predictions = predict (theta_min, X) correct = [1 If (A = 1 and B = 1) or (A = 0 and B = 0) else 0 for (a, B) in zip (predictions, y)] Accuracy = (sum (MAP (INT, correct) % Len (correct) print ('accuracy = {0} % '. format (accuracy) # training set test accuracy 89% # plot theta_temp = theta_mintheta_temp = theta_temp/theta_temp [2] x = NP. arange (130, step = 0.1) y =-(theta_temp [0] + theta_temp [1] * X) # Draw the origin SNS. set (context = 'notebook', style = 'tacs', font_scale = 1.5) SNS. lmplot ('exam 1', 'exam 2', hue = 'admitted', Data = data, size = 6, fit_reg = false, scatter_kws = {"S": 25 }) # Draw the dividing line PLT. plot (X, Y, 'Gray ') PLT. xlim (0,130) PLT. ylim (0,130) PLT. title ('destboundary ') PLT. show ()
Ii. Non-linear logistic regression (Regularization)
The Code is as follows:
Import pandas as pdimport numpy as npimport scipy. optimize as optimport matplotlib. pyplot as pltpath = 'ex2data2.txt 'Data = PD. read_csv (path, header = none, names = ['test 1', 'test 2', 'accepted']) positive = data [DATA ['accepted']. ISIN ([1])] Negative = data [DATA ['accepted']. ISIN ([0])] ''' # displays the distribution of raw data fig, Ax = PLT. subplots (figsize = (12, 8) ax. scatter (positive ['test 1'], positive ['test 2'], S = 50, c = 'B', marker = 'O', label = 'access ') ax. scatter (negative ['test 1'], negative ['test 2'], S = 50, c = 'R', marker = 'x', label = 'unaccepted ') ax. legend () # The accepted and unaccepted labels in the upper right corner are displayed. set_xlabel ('test 1 score ') ax. set_ylabel ('test 2 score ') PLT. show () ''' degree = 5x1 = data ['test 1'] X2 = data ['test 2'] # insert a column full of 1data in the third column of data. insert (3, 'ones', 1) # create polynomial feature values. The highest order is 4for I in range (1, degree): For J in range (0, I ): data ['F' + STR (I) + STR (j)] = NP. power (x1, I-j) * NP. power (X2, j) # Delete the data columns Test 1 and Test 2 from the original data. drop ('test 1', axis = 1, inplace = true) data. drop ('test 2', axis = 1, inplace = true) # implement def sigmoid (h): return 1/(1 + NP. exp (-h) def cost (Theta, X, Y, learnrate): Theta = NP. matrix (theta) x = NP. matrix (x) y = NP. matrix (y) First = NP. multiply (-y, NP. log (sigmoid (x * Theta. t) Second = NP. multiply (1-y), NP. log (1-sigmoid (x * Theta. t) reg = (learnrate/(2 * Len (X) * NP. sum (NP. power (theta [:, 1: Theta. shape [1], 2) return NP. sum (first-second)/Len (x) + reglearnrate = 1 Cols = data. shape [1] x = data. iloc [:, 1: Cols] Y = data. iloc [:, 0: 1] x = NP. array (x) y = NP. array (y) Theta = NP. zeros (X. shape [1]) # Calculate the prediction results of the original dataset def predict (Theta, X): Theta = NP. matrix (theta) x = NP. matrix (x) probability = sigmoid (x * Theta. t) return [1 if I> 0.5 else 0 for I in probability] def gradientreg (Theta, X, Y, learnrate): Theta = NP. matrix (theta) x = NP. matrix (x) y = NP. matrix (y) paramates = int (Theta. ravel (). shape [1]) grad = NP. zeros (paramates) grad = (sigmoid (x * Theta. t)-Y ). T * x/Len (x) + (learnrate/Len (x) * Theta [:, i] grad [0] = grad [0]-(learnrate/Len (x) * Theta [:, I] Return gradresult = OPT. fmin_tnc (func = Cost, X0 = Theta, fprime = gradientreg, argS = (X, Y, learnrate) print (result) theta_min = NP. matrix (result [0]) predictions = predict (theta_min, x) Correct = [1 If (A = 1 and B = 1) or (a = 0 and B = 0) else 0 for (a, B) in zip (predictions, y)] Accuracy = (sum (MAP (INT, correct) % Len (correct) print ('accuracy = {0} % '. format (accuracy ))
Logistic regression (linear and nonlinear)