A little knowledge:
Interestingly, SciPy includes the NumPy namespace, meaning that all np.func can be called by Sp.func equivalent.
Brief introduction:
This section deals with the flow of an internet company and learns the most basic machine learning applications.
Import Package & Path settings:
Import Osimport scipy as Spimport matplotlib.pyplot as Pltdata_dir = Os.path.join (os.path.dirname (Os.path.realpath (__file__)), "..", "Data") # __file__ is used to get the path of the module, which may get a relative path, # os.path.dirname (__file__), relative to the path when the return value is empty, # in order to get the absolute path, Requires Os.path.dirname (Os.path.realpath (__file__)). #. Realpath get full path plus file name #. dirname will remove the script name, leaving only the path # print (Os.path.realpath (__file__)) # Print (Os.path.dirname ( Os.path.realpath (__file__)))
Read-In & Wash data:
There is no use of the original book reading, it seems the original author did not know that this is more concise:
# data = Sp.genfromtxt (Os.path.join (Data_dir, "WEB_TRAFFIC.TSV"), delimiter= "\ t") # x = data[:,0]# y = data[:,1]# and three lines above equals X, y = Sp.loadtxt (Os.path.join (Data_dir, "WEB_TRAFFIC.TSV"), delimiter= "\ T", unpack=true) print ("Illegal data:", Sp.sum (Sp.isnan (y))) # statistics Nan Missing x = X[~sp.isnan (y)] # Boolean index y = Y[~sp.isnan (y)] # Boolean index
Drawing functions:
This function is very subtle, and there are a lot of places where Python's advanced techniques are used, worth learning:
List+zip, the use of the list generator, etc.
By the way, the SP.POLY1D () build object has an attribute F.order and can view its own order.
colors = [' g ', ' k ', ' B ', ' m ', ' R '] #<-----------linestyles = ['-', '-. ', '--', ': ', '-'] #<-----------def Plot_model S (x, Y, models, fname, Mx=none, Ymax=none, Xmin=none): "' Draw the original data scatter plot and fit line graph:p Aram x: Horizontal axis:p Aram y: Ordinate:p Aram models: Fit line (list incoming):p Aram fname: Save image name:p Aram MX: Fit line X's list given:p Aram Ymax:y axis Upper limit:p Aram xmin:x shaft lower limit: Return:none "PLT.CLF () # Empty current coordinates like Plt.scatter (x, Y, s=10, alpha=1, marker= '. ') # c: Scatter color # s: Scatter size # Alpha: is transparent degree # plt.title ("Last month network traffic graph") Plt.xlabel ("Time") Plt.ylabel ("Hits/hour") plt . xticks ([w*7*24 for w in range], ["Week%i"% W for W in range]) if models: # is not to draw a fitted line if MX is none:mx = sp.linspace (0, X[-1], +) for model, style, color in zip (models, linestyles, colors): #<-----------PLt.plot (MX, model (MX), Linestyle=style, linewidth=2, C=color) #<-----------plt.legend (["d=%i"% m.order for M i N Models], loc= "upper left") #<-----------Plt.autoscale (tight=true) Plt.ylim (ymin=0) if Ymax:plt.ylim (Ymax=ymax) if Xmin:plt.xlim (xmin=xmin) Plt.grid (True, linestyle= '-', color= ' 0.75 ') #<----------- # gridline settings, color should be grayscale plt.savefig (fname)Full data plotting:
# Draw Raw Data scatter plot plot_models (x, Y, None, Os.path.join ("..", "1400_01_01.png")) FP1, res, rank, SV, Rcond = Sp.polyfit (x, Y, 1, F ull=true) print ("Fit parameter:%s"% FP1) print ("error value:%s"% res) F1 = sp.poly1d (FP1) F2 = sp.poly1d (Sp.polyfit (x, Y, 2)) F3 = Sp.poly1 D (Sp.polyfit (x, Y, 3)) F10 = SP.POLY1D (Sp.polyfit (x, Y, ten)) F100 = sp.poly1d (Sp.polyfit (x, Y,)) plot_models (x, Y, [F1], Os.path.join ("..", "1400_01_02.png")) plot_models (x, Y, [F1, F2], Os.path.join ("..", "1400_01_03.png")) Plot_models (x, Y, [F1, F2, F3, F10, F100], Os.path.join ("..", "1400_01_04.png"))
Turning Point processing:
Separating the data before and after the turning point:
"' Turning Point processing section ' ' inflection = 3.5*7*24xa = X[:inflection]ya = Y[:INFLECTION]XB = X[inflection:]yb = y[inflection:] # Note slices The notation, no commas
Batching before and after the turning point:
# Turning point before the first order fitting FA = sp.poly1d (Sp.polyfit (xa, ya, 1)) # After the turning point the first order fitting fb = SP.POLY1D (Sp.polyfit (XB, YB, 1)) Plot_models (x, y, [FA, FB], Os.path.join ("..", "1400_01_05.png") # Squared Difference def error (f, x, y): return Sp.sum (Sp.sum (f (x)-y) **2) #<----------- Print ("All Data base Error statistics:") for f in [F1, F2, F3, F10, F100]: print ("Error d=%i:%f"% (F.order, error (f, x, y)) print ("Error after turning point statistics : ") for f in [F1, F2, F3, F10, F100]: print (" Error d=%i:%f "% (F.order, error (F, XB, YB))) print (" First-order stitching fitting Error Statistics:%f "% (er Ror (FA, xa, ya) + error (FB, XB, YB)))
' Trend prediction section ' # Full data 6-Week forecast plot_models (x, Y, [F1, F2, F3, F10, F100], Os.path.join ("..", "1400_01_06.png"), mx= Sp.linspace (0, 6*7*24, max), ymax=10000, xmin=0) # full model turning point after fitting fb1 = FBFB2 = sp.poly1d (Sp.polyfit (XB, YB, 2)) FB3 = Sp.pol Y1D (Sp.polyfit (XB, YB, 3)) FB10 = sp.poly1d (Sp.polyfit (XB, YB, ten)) fb100 = sp.poly1d (Sp.polyfit (XB, YB,)) print (" Error statistics after full model turning point: ") for f in [Fb1, FB2, FB3, FB10, fb100]: print (" Error d=%i:%f "% (F.order, error (F, XB, YB)) # After the turning Point data 6-week forecast Plot_models ( x, y, [Fb1, FB2, FB3, FB10, fb100], Os.path.join ("..", "1400_01_07.png"), mx=sp.linspace (0, 6*7*24 , (+), ymax=10000, xmin=0)
The data behind the vertices is processed separately:
Sp.random.permutation () This function returns the scrambled input
Import scipy as Spsp.random.permutation ([1,2,3,4,5]) # Out[3]: # Array ([4, 5, 1, 2, 3])
30% for testing, 70% for fitting, random separation of data here
The data section after the turning point is used for the training section to test the amount of data for "' frac = 0.3split_idx = Int (Frac*len (XB)) # 30% shuffled = sp.random.permutation (List ( Range (len (XB))) # Full XB index unordered test = sorted (Shuffled[:split_idx]) # Disorderly Order index extract before 30%, after sort train = sorted (shuffled [SPLIT_IDX:]) # random order index extracted after 70%. Post sort
Fit comparison:
FBT1 = sp.poly1d (Sp.polyfit (Xb[train], Yb[train], 1)) Fbt2 = sp.poly1d (Sp.polyfit (Xb[train], Yb[train], 2)) FBT3 = SP.POLY1D (Sp.polyfit (Xb[train], Yb[train], 3)) fbt10 = sp.poly1d (Sp.polyfit (Xb[train], Yb[train], ten)) fbt100 = SP.POLY1D (Sp.polyfit (Xb[train], Yb[train], +)) print ("Test point Error:") for f in [Fbt1, Fbt2, FBT3, Fbt10, fbt100]: print (" Error d=%i:%f "% (F.order, error (F, xb[test], yb[test])) # Draw part of the training model fitting diagram Plot_models (x, Y, [Fbt1, Fbt2, FBT3, Fbt10, fbt100] , Os.path.join ('.. ', ' 1400_01_08.png '), mx=sp.linspace (0, 6*7*24), ymax=10000, xmin=0)
Optimizer Solution equation:
From scipy.optimize import fsolve# want to predict traffic 100000 time print (FBT2) print (fbt2-100000) Reached_max = Fsolve (fbt2-100000, 800 )/(7x24) print ("100,000 hits/hour excpeted at week%f"% Reached_max)
"Python" Machinelearning Machine Learning Introduction _ Minimal Machine learning application