Multidimensional Training data: Https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw2%2Fhw2_train.dat
Multidimensional test data: Https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw2%2Fhw2_test.dat
#!/usr/bin/env python #-*-coding:utf-8-*-"" "__title__ = ' onedimension.py ' __author__ = ' Administrator ' __mtime__ = ' 2015/10/10 ' "" "Import random from numpy import * def data_generator (size): X = zeros (size) Y = zeros (size) F or I in range (0, size): TMP = Random.uniform ( -1, 1) x[i] = tmp Flip = False if random.unif
ORM (0, 1) < 0.2:flip = True Y[i] = Int (sign (TMP)) if flip:y[i] =-1 * y[i]
return (x, y) def err_counter (x, y, Sflip, theta): DataSize = Len (x) s = 1 if not sflip:s =-1 ERR = 0 for i in range (0, datasize): h = S * SIGN (X[i]-theta) if H * Y[i] < 0:err = Err + 1 return Err def errout_calculator (Sflip, theta): s = 1 if not sflip:s =-1 return 0.5 + 0.3 * S * (ABS (theta)-1) def decision_stump_1d (X, Y): Sflip = true # true for s = +1, False for s =-1 datasize = Len (X) Sortedx =Sort (X) theta = 0 Errin = 0 thetabest = 0 errinbest = datasize #print Sortedx for I in range (0, DA Tasize + 1): if i = = 0:theta = (( -1.0) + sortedx[0]) * 0.5 elif i = = datasize:th ETA = (1.0 + sortedx[i-1]) * 0.5 Else:theta = (Sortedx[i] + sortedx[i-1]) * 0.5 Errin = E Rr_counter (X, Y, True, theta) if Errin < errinbest: (Sflip, thetabest, errinbest) = (True, theta, E Rrin) Errin = Err_counter (X, Y, False, theta) if Errin < errinbest: (Sflip, Thetabest, Erri
nbest) = (False, theta, errin) errout = Errout_calculator (Sflip, thetabest) #print "errinbest:" + str (errinbest) #print "Errout:" + str (errout) #print ' ============================= ' #print "theta, Sflip =" + str (thetabes
T) + ', ' + str (sflip) return ((Sflip, Thetabest), (Errinbest, Errout))
#!/usr/bin/env python
#-*-coding:utf-8-*-
"" "
__title__ = ' multidimension.py ' __author__
= ' Administrator '
__mtime__ = ' 2015/10/10 ' "" "
import random from
numpy import * from
onedimension Import *
def decision_stump_md (X, Y):
(datasize, Datadim) = x.shape
sflipbest = []
thetabest = []
errinbest = []
theta = 0
sflip = True
for i in range (0, Datadim):
res = decision_stump_1d (x[:, I], Y )
Sflipbest.append (res[0][0])
thetabest.append (res[0][1])
errinbest.append (res[1][0))
Bestdim = Argsort (Errinbest) [0]
theta = Thetabest[bestdim]
sflip = Sflipbest[bestdim]
print sflipbest< C25/>print thetabest
Print errinbest
print ([Sflip, Theta, Bestdim], Errinbest[bestdim]) return
([ Sflip, Theta, Bestdim], Errinbest[bestdim]
def errout_estimator (X, Y, Sflip, Theta, Dim):
errout = Err_ Counter (x[:, Dim), Y, Sflip, theta) return
errout
#!/usr/bin/env python #-*-coding:utf-8-*-"" "__title__ = ' HW 2 main.py ' __author__ = ' w1d2s ' __mtime__ = ' 2015/10/10 ' "" "From numpy Import * onedimension import * from multidimension import * Import sys import string def data_pretre Atment (path): rawdata = open (path). ReadLines () print RawData datanum = Len (rawdata) Datadim = Len (rawdata[ 0].strip (). Split ('))-1 dataidx = 0 X = zeros ([Datanum, Datadim]) Y = zeros (datanum) print (Datanum, dat ADim in rawdata:templist = Line.strip (). Split (") y[dataidx] = String.atoi (Templist[datadi M]) X[dataidx,:] = Templist[0:datadim] Dataidx + + 1 return (X, Y) if __name__ = = ' __main__ ': '
' Print ' Hello world! ' errincnt = 0 erroutcnt = 0 round = 5000 for I in range (0, round): X, Y = Data_generator RE s = decision_stump_1d (X, Y) errincnt = errincnt + res[1][0] erroutcnt = erroutcnt + res[1][1] PRint ' Average errin: ' + str (float (errincnt/round)) print ' Average errout: ' + str (float (erroutcnt/round)) '
X, y = data_pretreatment (' train.txt ') x_t, y_t = data_pretreatment (' test.txt ') res = DECISION_STUMP_MD (x, y) Sflip = res[0][0] theta = res[0][1] Dim = res[0][2] Print errout_estimator (x_t, y_t, Sflip, Theta, Dim)