#-*-coding:cp936-*-from numpy import *def loaddataset (): Datamat = []; Labelmat = [] fr = open (' testSet.txt ') for line in Fr.readlines (): Linearr = Line.strip (). Split () data Mat.append ([1.0, Float (linearr[0]), float (linearr[1])]) labelmat.append (int (linearr[2))) return Datamat,labelmat def sigmoid (InX): #逻辑函数 return 1.0/(1+exp (-inx)) #梯度上升算法def gradascent (Datamatin, classlabels): Datamatrix = Mat (dat Amatin) #convert to NumPy matrix Labelmat = Mat (Classlabels). Transpose () #convert to NumPy matrix M,n = Shape (datamatrix) alpha = 0.001 #梯度上升的步长 maxcycles = #迭代的最大次数 weights = Ones ((n,1)) for K in range (MAXCYCL ES): #heavy on matrix operations h = sigmoid (datamatrix*weights) #matrix mult error = (Labe lmat-h) #vector Subtraction weights = weights + Alpha * datamatrix.transpose () * ERROR #matrix Mult Return weights #迭代计算回归系数def plotbestfit (weights): Import MatploTlib.pyplot as Plt datamat,labelmat=loaddataset () Dataarr = Array (datamat) n = shape (Dataarr) [0] xcord1 = []; Ycord1 = [] Xcord2 = []; Ycord2 = [] for i in range (n): If int (labelmat[i]) = = 1:xcord1.append (dataarr[i,1]); Ycord1.append (dataarr[i,2]) else:xcord2.append (dataarr[i,1]); Ycord2.append (dataarr[i,2]) FIG = plt.figure () ax = Fig.add_subplot (111) ax.scatter (Xcord1, Ycord1, s=30, c= ' red ' , marker= ' s ') Ax.scatter (Xcord2, Ycord2, s=30, c= ' green ') x = Arange ( -3.0, 3.0, 0.1) y = (-weights[0]-weights[1]* x)/weights[2] Ax.plot (x, y) plt.xlabel (' X1 '); Plt.ylabel (' X2 '); Plt.show () #随机梯度上升算法def stocGradAscent0 (Datamatrix, classlabels): M,n = shape (datamatrix) Alpha = 0.01 weights = O NES (n) #initialize to all ones for I in range (m): h = sigmoid (sum (datamatrix[i]*weights)) error = Class Labels[i]-h #error和h都相当于是矩阵 weights = weights + Alpha * ERROR * Datamatrix[i] return weIghtsdef stocGradAscent1 (Datamatrix, Classlabels, numiter=150): M,n = shape (datamatrix) weights = Ones (n) #initial Ize to all ones for J in Range (Numiter): Dataindex = Range (m) for I in Range (m): Alpha = 4/(1. 0+j+i) +0.0001 #apha decreases with iteration, does not randindex = Int (Random.uniform (0,len)) #go To 0 because of the constant H = sigmoid (sum (datamatrix[randindex]*weights)) error = Classlabels[ra Ndindex]-H weights = weights + Alpha * ERROR * Datamatrix[randindex] del (Dataindex[randindex]) #计算 The finished sample will be deleted. Return weightsdef Classifyvector (InX, weights): prob = sigmoid (sum (inx*weights)) if prob > 0.5:re Turn 1.0 Else:return 0.0# uses examples of hernia disease to calculate def colictest (): Frtrain = open (' HorseColicTraining.txt '); frtest = open (' horseColicTest.txt ') trainingset = []; Traininglabels = [] for line in Frtrain.readlines (): Currline = Line.strip (). Split (' \ t ') LiNearr =[] for I in range: Linearr.append (float (currline[i))) Trainingset.append (Linearr) #获取样 This feature vector traininglabels.append (float (currline[21])) #获取样本的类型标志 trainweights = stocGradAscent1 (Array (trainingset), Traininglabels, #训练获得回归系数 errorcount = 0; Numtestvec = 0.0 for line in Frtest.readlines (): #测试样本的测试 Numtestvec + = 1.0 Currline = Line.strip (). Split (' \ t ') Linearr =[] for I in range: Linearr.append (float (currline[i])) if INT (classifyv Ector (Array (Linearr), trainweights))! = Int (currline[21]): Errorcount + = 1 #计算错误率 errorrate = (Float (errorcou NT)/numtestvec) print "The error rate of this test is:%f"% errorrate return errorratedef multitest (): numtests = 10; errorsum=0.0 for K in range (numtests): Errorsum + = Colictest () print "After%d iterations the average error R ATE is:%f "% (numtests, errorsum/float (numtests))
Implementation results:
The error rate of this test is:0.358209the error rate of this test is:0.417910the error rate of this test is:0.268657th E error rate of the This test is:0.298507the error rate of this test is:0.358209the error rate of this test is:0.343284the Error rate of this test is:0.358209the error rate of this test is:0.373134the error rate of this test is:0.358209the ER ROR rate of this test is:0.402985after iterations the average error rate is:0.353731
Machine learning python for logistic regression