# Linear regression least squares
From Sklearn import Linear_model
Import Sys
Import Tushare as TS
Import Matplotlib.pyplot as Plt
Import Pandas as PD
Import Sklearn.metrics as SM
Sh=ts.get_hist_data (' sh '). Sort_index () #获取上证指数每日数据 and sorted by time index
Pf=ts.get_hist_data (' 600000 '). Sort_index () #获取浦发银行数据 and sorted by time index
sh[' re ']=np.log (sh[' close ']/sh[' Close '].shift (1)) #计算上证指数收益率
pf[' re ']=np.log (pf[' close ']/pf[' Close '].shift (1)) #计算浦发银行收益率
Sh=sh.dropna () #删除缺失值
Pf=pf.dropna () #删除缺失值
Data=pd.merge (sh[' re '],pf[' re '],left_index=true,right_index=true) #将数据合并
data.columns=[' x ', ' Y '] #给列命名
testdata=data[' 2018-05-01 ':]
traindata=data[' 2017-01-01 ': ' 2018-04-30 '
X_train=np.array (traindata[' x ']). Reshape (Len (traindata[' x ']), 1)
Y_train=np.array (traindata[' y ']). Reshape (Len (traindata[' y ')), 1)
X_test=np.array (testdata[' x ']). Reshape (Len (testdata[' x ']), 1)
Y_test=np.array (testdata[' y ']). Reshape (Len (testdata[' y ']), 1) #区分训练集, test set
Linearr=linear_model. Linearregression () #建立线性回归模型
Linearr.fit (X_train,y_train) #数据学习
Y_train_pred=linearr.predict (X_train) #基于训练集得到的线性y值
Plt.figure ()
Plt.scatter (x_train,y_train,color= ' green ') #原始训练集数据散点图
Plt.plot (x_train,y_train_pred,color= ' black ', linewidth=4) #线性回归的拟合线
Plt.title (' Train ') #标题
Plt.show ()
Y_test_pred=linearr.predict (X_test)
Plt.scatter (x_test,y_test,color= ' green ') #绘制测试集数据散点图
Plt.plot (x_test,y_test_pred,color= ' black ', linewidth=4) #基于线性回归的预测线
Plt.title (' Test ')
Plt.show ()
Print (' mse= ', Sm.mean_squared_error (y_test,y_test_pred)) #MSE值
Print (' r2= ', Sm.r2_score (y_test,y_test_pred)) #R2值越大越好
The linear regression of Python machine learning