Linear regression formula:
Import pandas as PD
import matplotlib.pyplot as plt
columns = ["mpg", "cylinders", "displacement", "horsepower", "" Weight "," acceleration "," model year "," origin "," car name "]
#由于原使数据只有数据, so columns is to add them to the category of columns, delim_ Whitespace represents a space as a separator for
cars = pd.read_table ("D:\\test\machinelearning\\auto-mpg.data", Delim_whitespace=true, Names=columns)
Cars.head (2)
|
mpg |
Cylinders |
Displacement |
horsepower |
Weight |
Acceleration |
Model | Year
Origin | Car
name |
0 |
18.0 |
8 |
307.0 |
130.0 |
3504.0 |
12.0 |
70 |
1 |
Chevrolet Chevelle Malibu |
1 |
15.0 |
8 |
350.0 |
165.0 |
3693.0 |
11.5 |
70 |
1 |
Buick Skylark 320 |
Fig = plt.figure ()
ax1=fig.add_subplot (2,1,1)
cars.plot ("Weight", "mpg", kind= "scatter", ax=ax1)
Plt.show ()
Import Sklearn from
sklearn.linear_model import linearregression
lr =linearregression (); #获取线性回归模型
Lr.fit (cars[["weight"]], cars["mpg") #输入是重量, the output is the distance per gallon to run, training it
prediction = lr.predict (cars[["Weight"]) #训练完后 , you can test with test data to see what the predicted output value is, for the convenience of still using the training data
print (Prediction[0:5])
print (cars["mpg"][0:5])
[19.41852276 17.96764345 19.94053224 19.96356207 19.84073631]
0 18.0
1 15.0
2 18.0
3 16.0
4 17.0
Name:mpg, Dtype:float64
Plt.scatter (cars["Weight"],cars["mpg"],c= ' red ')
plt.scatter (cars["Weight"],prediction,c= ' blue ')
Plt.show ()
From sklearn.metrics import mean_squared_error
MSE = mean_squared_error (cars["mpg"],prediction) #求真实值与预测值之间的均方差
Print (MSE)
18.7809397346
mse**0.5 #再开根号
4.3336981591509574