Machine Learning: Wine classification

Source: Internet
Author: User
Tags normalizer svm

Data Source: Http://archive.ics.uci.edu/ml/datasets/Wine

Reference: "Machine learning Python Combat" Wei original

Purpose of the blog: review

Tool: Geany

#导入类库

From pandas import Read_csv #读数据
From pandas.plotting import Scatter_matrix #画散点图
From pandas import set_option #设置打印数据精确度

Import NumPy as NP

Import Matplotlib.pyplot as Plt #画图

From sklearn.preprocessing import normalizer #数据预处理: Normalization
From sklearn.preprocessing import Standardscaler #数据预处理: Normal

From sklearn.preprocessing import Minmaxscaler #数据预处理: Adjusting the data scale

From sklearn.model_selection import Train_test_split #分离数据集
From sklearn.model_selection import Cross_val_score #计算算法准确度
From sklearn.model_selection import Kfold #交叉验证
From sklearn.model_selection import GRIDSEARCHCV #机器学习算法的参数优化方法: Mesh Optimization method

From Sklearn.linear_model import linearregression #线性回归
From Sklearn.linear_model import Lasso #套索回归
From Sklearn.linear_model import elasticnet #弹性网络回归
From Sklearn.linear_model import logisticregression #逻辑回归算法

From sklearn.discriminant_analysis import lineardiscriminantanalysis #线性判别分析
From sklearn.discriminant_analysis import quadraticdiscriminantanalysis #二次判别分析
From Sklearn.tree import Decisiontreeregressor #决策树回归
From Sklearn.tree import Decisiontreeclassifier #决策树分类

From sklearn.neighbors import Kneighborsregressor #KNN回归

From sklearn.neighbors import Kneighborsclassifier #KNN分类

From Sklearn.naive_bayes import GAUSSIANNB #贝叶斯分类器

From SKLEARN.SVM import SVR #支持向量机 regression
From SKLEARN.SVM import SVC #支持向量机 classification

From Sklearn.pipeline Import Pipeline #pipeline能够将从数据转换到评估模型的整个机器学习流程进行自动化处理

From sklearn.ensemble import Randomforestregressor #随即森林回归
From sklearn.ensemble import Randomforestclassifier #随即森林分类
From sklearn.ensemble import Gradientboostingregressor #随即梯度上升回归
From sklearn.ensemble import Gradientboostingclassifier #随机梯度上分类
From sklearn.ensemble import Extratreesregressor #极端树回归
From sklearn.ensemble import Extratreesclassifier #极端树分类
From sklearn.ensemble import Adaboostregressor #AdaBoost回归
From sklearn.ensemble import Adaboostclassifier #AdaBoost分类

From sklearn.metrics Import Mean_squared_error #
From sklearn.metrics import Accuracy_score #分类准确率

From sklearn.metrics import Confusion_matrix # Confusion Matrix

From sklearn.metrics import Classification_report #分类报告


#导入数据
filename = ' wine.csv '
data = read_csv (filename, header=none, delimiter= ', ')
#数据理解
Print (Data.shape)
#print (Data.dtypes)
#print (Data.corr (method= ' Pearson '))
#print (Data.describe ())
#print (data.groupby (0). Size ())


#数据可视化: histograms, scatter plots, density plots, relational matrix graphs

#直方图

#data. hist ()
#plt. Show ()


#密度图

#data. Plot (kind= ' density ', subplots=true, layout= (bis), Sharex=false, Sharey=false)
#plt. Show ()


#散点图

#scatter_matrix (data)
#plt. Show ()


#关系矩阵图

#fig = Plt.figure ()
#ax = Fig.add_subplot (111)
#cax = Ax.matshow (Data.corr (), Vmin=-1, Vmax=1)
#fig. Colorbar (CAX)
#plt. Show ()



#数据处理: Adjusting data scale, normalization, normality, and binary value
Array = data.values
X = array[:, 1:14].astype (float)
Y = array[:,0]

Scaler = Minmaxscaler (feature_range= (0,1)). Fit (X)
X_m = Scaler.transform (X)

Scaler = Normalizer (). Fit (X)
X_n = Scaler.transform (X)

Scaler = Standardscaler (). Fit (X)
x_s = Scaler.transform (X)

#分离数据集
Validation_size = 0.2
Seed = 7

X_train, X_test, y_train, y_test = Train_test_split (X, Y, Test_size=validation_size, Random_state=seed)

X_m_train, X_m_test, y_m_train, y_m_test = Train_test_split (X, Y, Test_size=validation_size, Random_state=seed)

X_n_train, X_n_test, y_n_train, y_n_test = Train_test_split (X, Y, Test_size=validation_size, Random_state=seed)

X_s_train, X_s_test, y_s_train, y_s_test = Train_test_split (X, Y, Test_size=validation_size, Random_state=seed)

#选择模型: (This example is a classification problem)
#非线性: KNN, SVC, CART, GAUSSIANNB,
#线性: KNN, SVR, LR, Lasso, Elasticnet, LDA,
Models = {}
models[' KNN '] = Kneighborsclassifier ()
models[' SVM '] = SVC ()
models[' CART '] = Decisiontreeclassifier ()
models[' GN '] = GAUSSIANNB ()
#models [' LR '] = linearregression ()
#models [' Lasso '] = Lasso ()
#models [' EN '] = elasticnet ()
models[' LDA '] = lineardiscriminantanalysis ()
models[' QDA '] = quadraticdiscriminantanalysis ()

#评估模型
scoring = ' accuracy '
Num_folds = 10
Seed = 7

results = []
For key in models:
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Cv_results =cross_val_score (Models[key], X_train, Y_train, scoring=scoring, Cv=kfold)
Results.append (Cv_results)
Print ('%s%f (%f) '% (key, Cv_results.mean (), CV_RESULTS.STD ()))

Results_m = []
For key in models:
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Cv_results_m =cross_val_score (Models[key], X_m_train, Y_m_train, scoring=scoring, Cv=kfold)
Results_m.append (Cv_results_m)
Print (' Scaling data scale:%s%f (%f) '% (key, Cv_results_m.mean (), CV_RESULTS_M.STD ()))

Results_n = []
For key in models:
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Cv_results_n =cross_val_score (Models[key], X_n_train, Y_n_train, scoring=scoring, Cv=kfold)
Results_n.append (Cv_results_n)
Print (' Normalized data:%s%f (%f) '% (key, Cv_results_n.mean (), CV_RESULTS_N.STD ()))

results_s = []
For key in models:
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
cv_results_s =cross_val_score (Models[key], X_s_train, Y_s_train, scoring=scoring, Cv=kfold)
Results_s.append (cv_results_s)
Print (' Normal data:%s%f (%f) '% (key, Cv_results_s.mean (), CV_RESULTS_S.STD ()))
#箱线图

#算法优化: LDA
Param_grid = {' Solver ': [' SVD ', ' lsqr ', ' Eigen ']}
Model = Lineardiscriminantanalysis ()
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Grid = GRIDSEARCHCV (Estimator=model, Param_grid=param_grid, scoring=scoring, Cv=kfold)
Grid_result = Grid.fit (X=x_train, Y=y_train)
Print (' Best:%s ' using:%s '% (Grid_result.best_score_, Grid_result.best_params_))
Cv_results = Zip (grid_result.cv_results_[' mean_test_score '), grid_result.cv_results_[' Std_test_score '], grid_ result.cv_results_[' params '])
For mean, STD, params in cv_results:
Print ('%f (%f) with%r '% (mean, STD, params))


#算法集成
#bagging: Random forest, limit tree;
#boosting: Ada, random gradient rise
Ensembles = {}
ensembles[' RF '] = Randomforestclassifier ()
ensembles[' ET '] = Extratreesclassifier ()
ensembles[' ADA ' = Adaboostclassifier ()
ensembles[' GBM '] = Gradientboostingclassifier ()

results = []
For key in ensembles:
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Cv_results =cross_val_score (Ensembles[key], X_train, Y_train, scoring=scoring, Cv=kfold)
Results.append (Cv_results)
Print ('%s%f (%f) '% (key, Cv_results.mean (), CV_RESULTS.STD ()))

#集成算法调参gbm
Param_grid = {' N_estimators ': [10,50,100,200,300,400,500,600,700,800,900]}
Model = Gradientboostingclassifier ()
Kfold = Kfold (N_splits=num_folds, Random_state=seed)
Grid = GRIDSEARCHCV (Estimator=model, Param_grid=param_grid, Cv=kfold, scoring=scoring)
Grid_result = Grid.fit (X=x_train, Y=y_train)
Print (' Best:%s ' using:%s '% (Grid_result.best_score_, Grid_result.best_params_))
Cv_results = Zip (grid_result.cv_results_[' mean_test_score '), grid_result.cv_results_[' Std_test_score '], grid_ result.cv_results_[' params '])
For mean, STD, params in cv_results:
Print ('%f (%f) with%r '% (mean, STD, params))

#训练最终模型
Model = lineardiscriminantanalysis (solver= ' SVD ')
Model.fit (X=x_train, Y=y_train)

#评估最终模型
predictions = Model.predict (x_test)
Print (Accuracy_score (y_test, predictions))
Print (Confusion_matrix (y_test, predictions))
Print (Classification_report (y_test, predictions))



Machine Learning: Wine classification

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.