#encoding: Utf-8
Import Pandas as PD
Import NumPy as NP
From Sklearn import Datasets,linear_model
From Sklearn.metrics import Roc_curve,auc
Import Pylab as Pl
From Matplotlib.pyplot import plot
def confusionmatrix (Predicted,actual,threshold):
If Len (predicted)!=len (actual): return-1
TP = 0.0
fp = 0.0
TN = 0.0
fn = 0.0
For I in range (Len (actual)):
If actual[i]>0.5:
If Predicted[i]>threshold:
TP + = 1.0
Else
fn + = 1.0
Else
If Predicted[i]<threshold:
TN + = 1.0
Else
FP + 1.0
RTN = [Fp,fn,fp,tn]
Return RTN
#获取数据
Rockdata = open (' Sonar.all-data ')
Xlist = []
Labels = []
#将标签转换成数值, convert m to 1.0,r conversion to 0.0
For line in Rockdata:
row = Line.strip (). Split (",")
if (row[-1] = = ' M '):
Labels.append (1.0)
Else
Labels.append (0.0)
Row.pop ()
Floatrow = [Float (num) for NUM in row]
Xlist.append (Floatrow)
Print Labels
#获取数据的行数, the data is divided into 2 subsets, 1/3 test sets and 2/3 training sets by the redundancy of 3.
indices = range (len (xlist))
Xlisttest = [Xlist[i] for I in indices if i%3==0]
Xlisttrain = [Xlist[i] for I in indices if i%3!=0]
Labelstest = [Labels[i] for I in indices if i%3==0]
Labelstrain = [Labels[i] for I in indices if i%3!=0]
#将列表转换成数组
Xtrain = Np.array (Xlisttrain)
Ytrain = Np.array (Labelstrain)
XTest = Np.array (xlisttest)
Ytest = Np.array (labelstest)
#预测模型
Rocksvminesmodel = Linear_model. Linearregression ()
#训练数据
Rocksvminesmodel.fit (Xtrain,ytrain)
# Predictive Training Data
Trainingpredictions = Rocksvminesmodel.predict (Xtrain)
Print ("---------", Trainingpredictions[0:5],trainingpredictions[-6:-1])
#生成训练数据的混淆矩阵
Confusionmattrain = Confusionmatrix (trainingpredictions,ytrain,0.5)
Print Confusionmattrain
#预测测试数据
Testpredictions = Rocksvminesmodel.predict (xTest)
#生成测试数据的混淆矩阵
Confusiontest = Confusionmatrix (testpredictions,ytest,0.5)
Print Confusiontest
#通过roc_curve函数计算fpt, TPR, and calculate ROC_AUC,AUC the higher the representation the better
Fpr,tpr,thresholds = Roc_curve (ytrain,trainingpredictions)
ROC_AUC = AUC (FPR,TPR)
Print Roc_auc
#生成训练集上的ROC曲线
#plot ROC Curve
PL.CLF () #清楚图形, you need to initialize the graph
Pl.plot (fpr,tpr,label= ' ROC curve (area=%0.2f) '%roc_auc) #画ROC曲线
Pl.plot ([0,1],[0,1], ' K ') #生成对角线
Pl.xlim ([0.0,1.0]) #X轴范围
Pl.ylim ([0.0,1.0]) #Y轴范围
Pl.xlabel (' False Positive rate ') #X轴标签显示
Pl.ylabel (' True Positive rate ') #Y轴标签显示
Pl.title (' In sample ROC rocks versus mines ') #标题
Pl.legend (loc= "lower left") #图例位置
Pl.show ()
#生成测试集上的ROC曲线
Fpr,tpr,thresholds = Roc_curve (ytest,testpredictions)
ROC_AUC = AUC (FPR,TPR)
Print Roc_auc
#plot ROC Curve
PL.CLF ()
Pl.plot (fpr,tpr,label= ' ROC curve (area=%0.2f) '%roc_auc)
Pl.plot ([0,1],[0,1], ' K ')
Pl.xlim ([0.0,1.0])
Pl.ylim ([0.0,1.0])
Pl.xlabel (' False Positive rate ')
Pl.ylabel (' True Positive rate ')
Pl.title (' In sample ROC rocks versus mines ')
Pl.legend (loc= "lower right")
Pl.show ()
ROC curves on the training set
ROC curves on the test set
Python Random classification