Python Random classification

Source: Internet
Author: User

#encoding: Utf-8
Import Pandas as PD
Import NumPy as NP
From Sklearn import Datasets,linear_model
From Sklearn.metrics import Roc_curve,auc
Import Pylab as Pl
From Matplotlib.pyplot import plot

def confusionmatrix (Predicted,actual,threshold):
If Len (predicted)!=len (actual): return-1
TP = 0.0
fp = 0.0
TN = 0.0
fn = 0.0
For I in range (Len (actual)):
If actual[i]>0.5:
If Predicted[i]>threshold:
TP + = 1.0
Else
fn + = 1.0
Else
If Predicted[i]<threshold:
TN + = 1.0
Else
FP + 1.0
RTN = [Fp,fn,fp,tn]
Return RTN
#获取数据
Rockdata = open (' Sonar.all-data ')
Xlist = []
Labels = []
#将标签转换成数值, convert m to 1.0,r conversion to 0.0
For line in Rockdata:
row = Line.strip (). Split (",")
if (row[-1] = = ' M '):
Labels.append (1.0)
Else
Labels.append (0.0)
Row.pop ()
Floatrow = [Float (num) for NUM in row]
Xlist.append (Floatrow)
Print Labels
#获取数据的行数, the data is divided into 2 subsets, 1/3 test sets and 2/3 training sets by the redundancy of 3.
indices = range (len (xlist))
Xlisttest = [Xlist[i] for I in indices if i%3==0]
Xlisttrain = [Xlist[i] for I in indices if i%3!=0]
Labelstest = [Labels[i] for I in indices if i%3==0]
Labelstrain = [Labels[i] for I in indices if i%3!=0]
#将列表转换成数组
Xtrain = Np.array (Xlisttrain)
Ytrain = Np.array (Labelstrain)
XTest = Np.array (xlisttest)
Ytest = Np.array (labelstest)
#预测模型
Rocksvminesmodel = Linear_model. Linearregression ()
#训练数据
Rocksvminesmodel.fit (Xtrain,ytrain)
# Predictive Training Data
Trainingpredictions = Rocksvminesmodel.predict (Xtrain)
Print ("---------", Trainingpredictions[0:5],trainingpredictions[-6:-1])
#生成训练数据的混淆矩阵
Confusionmattrain = Confusionmatrix (trainingpredictions,ytrain,0.5)
Print Confusionmattrain
#预测测试数据
Testpredictions = Rocksvminesmodel.predict (xTest)
#生成测试数据的混淆矩阵
Confusiontest = Confusionmatrix (testpredictions,ytest,0.5)
Print Confusiontest
#通过roc_curve函数计算fpt, TPR, and calculate ROC_AUC,AUC the higher the representation the better
Fpr,tpr,thresholds = Roc_curve (ytrain,trainingpredictions)
ROC_AUC = AUC (FPR,TPR)
Print Roc_auc
#生成训练集上的ROC曲线
#plot ROC Curve
PL.CLF () #清楚图形, you need to initialize the graph
Pl.plot (fpr,tpr,label= ' ROC curve (area=%0.2f) '%roc_auc) #画ROC曲线
Pl.plot ([0,1],[0,1], ' K ') #生成对角线
Pl.xlim ([0.0,1.0]) #X轴范围
Pl.ylim ([0.0,1.0]) #Y轴范围
Pl.xlabel (' False Positive rate ') #X轴标签显示
Pl.ylabel (' True Positive rate ') #Y轴标签显示
Pl.title (' In sample ROC rocks versus mines ') #标题
Pl.legend (loc= "lower left") #图例位置
Pl.show ()

#生成测试集上的ROC曲线
Fpr,tpr,thresholds = Roc_curve (ytest,testpredictions)
ROC_AUC = AUC (FPR,TPR)
Print Roc_auc
#plot ROC Curve
PL.CLF ()
Pl.plot (fpr,tpr,label= ' ROC curve (area=%0.2f) '%roc_auc)
Pl.plot ([0,1],[0,1], ' K ')
Pl.xlim ([0.0,1.0])
Pl.ylim ([0.0,1.0])
Pl.xlabel (' False Positive rate ')
Pl.ylabel (' True Positive rate ')
Pl.title (' In sample ROC rocks versus mines ')
Pl.legend (loc= "lower right")
Pl.show ()


ROC curves on the training set

ROC curves on the test set

Python Random classification

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.