Data:
http://download.csdn.net/detail/qq_26948675/9683350
Open, click on the blue name, view resources, you can download the
Code:
#-*-Coding:utf-8-*-
#使用K-means algorithm Clustering consumption behavior characteristic data
Import Pandas as PD
#参数初始化
Inputfile = ' Chapter5/demo/data/consumption_data.xls ' #销量及其他属性数据
outputfile = ' Chapter5/demo/data_type.xls ' #保存结果的文件名
K = 3 #聚类的类别
iteration = #聚类最大循环次数
data = Pd.read_excel (inputfile, index_col = ' Id ') #读取数据
Data_zs = 1.0* (Data-data.mean ())/data.std () #数据标准化
From Sklearn.cluster import Kmeans
Model = Kmeans (n_clusters = k, n_jobs = 4, Max_iter = iteration) #分为k类, concurrent number 4
Model.fit (Data_zs) #开始聚类
#简单打印结果
R1 = PD. Series (Model.labels_). Value_counts () #统计各个类别的数目
r2 = PD. Dataframe (Model.cluster_centers_) #找出聚类中心
r = Pd.concat ([R2, r1], Axis = 1) #横向连接 (0 is Portrait), get the number of classes corresponding to the cluster center
R.columns = List (data.columns) + [u ' category number '] #重命名表头
Print (R)
#详细输出原始数据及其类别
r = Pd.concat ([data, PD. Series (model.labels_, index = data.index)], Axis = 1) #详细输出每个样本对应的类别
R.columns = List (data.columns) + [u ' cluster class '] #重命名表头
R.to_excel (outputfile) #保存结果
def density_plot (data): #自定义作图函数
Import Matplotlib.pyplot as Plt
plt.rcparams[' font.sans-serif '] = [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ' = False #用来正常显示负号
p = data.plot (kind= ' KDE ', linewidth = 2, subplots = True, Sharex = False)
[P[i].set_ylabel (U ' density ') for I in range (k)]
Plt.legend ()
Return PLT
Pic_output = ' Chapter5/demo/data ' #概率密度图文件名前缀
For I in range (k):
Density_plot (data[r[u ' cluster class ']==i]). Savefig (U '%s%s.png '% (pic_output, i))
#-*-Coding:utf-8-*-
#接k_means. py
From Sklearn.manifold import Tsne
Tsne = Tsne ()
Tsne.fit_transform (Data_zs) #进行数据降维
Tsne = PD. Dataframe (tsne.embedding_, index = data_zs.index) #转换数据格式
Import Matplotlib.pyplot as Plt
plt.rcparams[' font.sans-serif '] = [' Simhei '] #用来正常显示中文标签
plt.rcparams[' axes.unicode_minus ' = False #用来正常显示负号
#不同类别用不同颜色和样式绘图
D = Tsne[r[u ' Cluster class '] = = 0]
Plt.plot (D[0], d[1], ' r. ')
D = Tsne[r[u ' Cluster class '] = = 1]
Plt.plot (D[0], d[1], ' go '
D = Tsne[r[u ' Cluster class '] = = 2]
Plt.plot (D[0], d[1], ' b* ')
Plt.show ()
Results: