Python Clustering algorithm and image display Results--python Learning notes 23

Source: Internet
Author: User

Data:

http://download.csdn.net/detail/qq_26948675/9683350

Open, click on the blue name, view resources, you can download the

Code:

#-*-Coding:utf-8-*-
#使用K-means algorithm Clustering consumption behavior characteristic data


Import Pandas as PD


#参数初始化
Inputfile = ' Chapter5/demo/data/consumption_data.xls ' #销量及其他属性数据
outputfile = ' Chapter5/demo/data_type.xls ' #保存结果的文件名
K = 3 #聚类的类别
iteration = #聚类最大循环次数
data = Pd.read_excel (inputfile, index_col = ' Id ') #读取数据
Data_zs = 1.0* (Data-data.mean ())/data.std () #数据标准化


From Sklearn.cluster import Kmeans
Model = Kmeans (n_clusters = k, n_jobs = 4, Max_iter = iteration) #分为k类, concurrent number 4
Model.fit (Data_zs) #开始聚类


#简单打印结果
R1 = PD. Series (Model.labels_). Value_counts () #统计各个类别的数目
r2 = PD. Dataframe (Model.cluster_centers_) #找出聚类中心
r = Pd.concat ([R2, r1], Axis = 1) #横向连接 (0 is Portrait), get the number of classes corresponding to the cluster center
R.columns = List (data.columns) + [u ' category number '] #重命名表头
Print (R)


#详细输出原始数据及其类别
r = Pd.concat ([data, PD. Series (model.labels_, index = data.index)], Axis = 1) #详细输出每个样本对应的类别
R.columns = List (data.columns) + [u ' cluster class '] #重命名表头
R.to_excel (outputfile) #保存结果


def density_plot (data): #自定义作图函数
Import Matplotlib.pyplot as Plt
plt.rcparams[' font.sans-serif '] = [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ' = False #用来正常显示负号
p = data.plot (kind= ' KDE ', linewidth = 2, subplots = True, Sharex = False)
[P[i].set_ylabel (U ' density ') for I in range (k)]
Plt.legend ()
Return PLT


Pic_output = ' Chapter5/demo/data ' #概率密度图文件名前缀
For I in range (k):
Density_plot (data[r[u ' cluster class ']==i]). Savefig (U '%s%s.png '% (pic_output, i))
#-*-Coding:utf-8-*-
#接k_means. py
From Sklearn.manifold import Tsne


Tsne = Tsne ()
Tsne.fit_transform (Data_zs) #进行数据降维
Tsne = PD. Dataframe (tsne.embedding_, index = data_zs.index) #转换数据格式


Import Matplotlib.pyplot as Plt
plt.rcparams[' font.sans-serif '] = [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ' = False #用来正常显示负号


#不同类别用不同颜色和样式绘图
D = Tsne[r[u ' Cluster class '] = = 0]
Plt.plot (D[0], d[1], ' r. ')
D = Tsne[r[u ' Cluster class '] = = 1]
Plt.plot (D[0], d[1], ' go '
D = Tsne[r[u ' Cluster class '] = = 2]
Plt.plot (D[0], d[1], ' b* ')
Plt.show ()

Results:



Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.