標籤:
1 import numpy as np 2 3 def SumSquareError(dataset,A): 4 # 輸入目標資料集與假設曲線函數,計算誤差平方和 5 # 資料形式 dataset[i] = [x,y],y = hypfunc(x) 6 # A: 多項式係數[a0,a1,...,an-1] 7 hypresult = [hypfunc(dataset[i,0],A) for i in range(dataset.shape[0])] 8 sse = np.sum((hypresult - dataset[:,1])**2) 9 return sse 10 11 def hypfunc(x,A):12 # 輸入:x 橫座標數值, A 多項式係數 [a0,a1,...,an-1]13 # 返回 y = hypfunc(x)14 return np.sum(A[i]*(x**i) for i in range(len(A)))15 16 """17 最小二乘思路18 設 假設 yh = a0x^0 + a1x^1 + a2x^2 +...+ akx^k19 則誤差 R2 = sum(y(xi)-yh(xi)) i = 1...n20 R2 = sum [(yi-(a0x^0 + a1x^1 + a2x^2 +...+ akx^k))]2 ~ 021 R2對ai求偏導:並令(共k+1個方程)22 div(R2,ai) = -2 * sum(yi-(a0x^0 + a1x^1 + a2x^2 +...+ akx^k)) * x^i = 023 有如下矩陣 用方程求解24 [[1 x1 ... x1^k],...,[1 xn ... xn^k]] * [a0,...,ak] = [y1,...,yn]25 """26 27 import random28 import matplotlib.pyplot as plt29 30 if __name__=="__main__":31 pass32 # 產生曲線上各個點33 x = np.arange(-1,1,0.02)34 y = [((a*a-1)*(a*a-1)*(a*a-1)+0.5)*np.sin(a*2) for a in x]35 xa = []36 ya = []37 # 對曲線上每個點進行隨機位移38 for i in range(len(x)):39 d = np.float(random.randint(60,140))/10040 ya.append(y[i]*d)41 xa.append(x[i]*d)42 n = len(xa) # 資料個數43 44 order = 9 # 設定k階多項式 0 ~ k45 # 根據資料點構造X,Y的 範德蒙德矩陣46 matX = np.array([[np.sum([xa[i]**(k2+k1) for i in range(n)]) 47 for k2 in range(order+1)] for k1 in range(order+1)])48 49 matY = np.array([np.sum([(xa[i]**k)*ya[i] for i in range(n)])50 for k in range(order+1)])51 print matX.shape,matY.shape52 53 A = np.linalg.solve(matX, matY)54 print A55 56 # 畫出資料點與擬合曲線57 plt.figure()58 # 輸出資料點59 plt.plot(xa,ya,linestyle=‘‘,marker=‘.‘) 60 61 # 畫出擬合後曲線62 yhyp = [hypfunc(x[i],A) for i in range(n)]63 plt.plot(x,yhyp,linestyle=‘-‘,marker=‘‘) 64 65 plt.show()
最小二乘法 python實現