#聚类分析par (Mfrow=c) #计算距离install. Packages ("Flexclust") data (nutrient,package= "Flexclust") head (nutrient,4)
Energy Protein fat Calcium iron
Beef Braised 340 20 28 9 2.6
Hamburger 245 21 17 9 2.7
Beef Roast 420 15 39 7 2.0
Beef Steak 375 19 32 9 2.6
D<-dist (Nutrient) As.matrix (d) [1:4,1:4]
Beef Braised Hamburger beef roast Beef steak
Beef braised 0.00000 95.6400 80.93429 35.24202
Hamburger 95.64000 0.0000 176.49218 130.87784
Beef Roast 80.93429 176.4922 0.00000 45.76418
Beef Steak 35.24202 130.8778 45.76418 0.00000
#层次聚类分析
Par (nfrow=c ())
Data (nutrient,package= "Flexclust")
Row.names (Nutrient) <-tolower (Row.names (nutrient))
Nutrient.scaled<-scale (Nutrient)
D<-dist (nutrient.scaled)
Fit.average<-hclust (d,method= "average")
Plot (fit.average,hang=-1,cex=.8,main= "average Linkage Clustering")
#选择聚类的个数
Install.packages ("Nbclust")
Library (Nbclust)
Devasknewpage (Ask=true)
Nc<-nbclust (nutrient.scaled,distance= "Euclidean", min.nc=2,max.nc=15,method= "average")
Table (Nc$best.n[1,])
Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")
#获取最终的聚类方案
Par (mfrow=c ())
Clusters<-cutree (fit.average,k=5)
Table (clusters)
1 2 3) 4 5
7 16 1) 2 1
Aggregate (Nutrient,by=list (cluster=clusters), median)
Cluster energy protein fat calcium iron
1 1 340.0 19 29 9 2.50
2 2 170.0 20 8 13 1.45
3 3 160.0 26 5 14 5.90
4 4 57.5 9 1 78 5.70
5 5 180.0 22 9 367 2.50
Aggregate (As.data.frame (nutrient.scaled), By=list (cluster=clusters), median)
Cluster energy protein fat calcium iron
1 1 1.3101024 0.0000000 1.3785620-0.4480464 0.08110456
2 2-0.3696099 0.2352002-0.4869384-0.3967868-0.63743114
3 3-0.4684165 1.6464016-0.7534384-0.3839719 2.40779157
4 4-1.4811842-2.3520023-1.1087718 0.4361807 2.27092763
5 5-0.2708033 0.7056007-0.3981050 4.1396825 0.08110456
Plot (fit.average,hang=-1,cex=.8,main= "Average Linkage clustering Cluster solution")
Rect.hclust (fit.average,k=5)
#划分聚类分析
Install.packages ("Rattle")
#install. Packages ("RGTK2")
Install.packages ("Https://cran.r-project.org/bin/windows/contrib/3.3/RGtk2_2.20.31.zip", Repos=null)
Install.packages ("Httr")
Library ("Rattle")
Library ("RGtk2")
Library ("Httr")
A <-GET ("Https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data")
Wine <-read.csv (textconnection (content (a)), header=f)
Names (wine) <-c ("Type", "Alcohol", "Malic acid", "ash", "alcalinity of Ash", "magnesium", "Total phenols", "Flavanoids" , "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "od280/od315 of diluted Wines", "proline")
#data (wine,package= "rattle")
Head (wine)
Df<-scale (Wine[-1])
Wssplot (DF)
Library (Nbclust)
Set.seed (1234)
Devasknewpage (Ask=true)
Nc<-nbclust (df,min.nc=2,max.nc=15,method= "Kmeans")
Table (Nc$best.n[1,])
Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")
Set.seed (1234)
Fit.km<-kmeans (DF,3,NSTART=25)
Fit.km$size
Fit.km$centers
Ct.km<-table (Wine$type,fit.km$cluster)
ct.km
1 2 3
1 59 0 0
2 3 65 3
3 0 0 48
Library (Flexclust)
Randindex (ct.km)
Ari
0.897495
#围绕中心点的划分
Library (Cluster)
Set.seed (1234)
Fit.pam<-pam (Wine[-1],k=3,stand=true)
Fit.pam$method
Clusplot (fit.pam,main= "bivariate Cluster Plot")
Library (Flexclust)
Randindex (Ct.pam)
Ari
0.6994957
#围绕中心点的划分
Library (Cluster)
Set.seed (1234)
Fit.pam<-pam (Wine[-1],k=3,stand=true)
Fit.pam$medoids
Clusplot (fit.pam,main= "bivariate Cluster Plot2")
Ct.pam<-table (wine$type,fit.pam$clustering)
Randindex (Ct.pam)
Ari
0.6994957
#避免不存在的类
Install.packages ("Fmultivar")
Library (Fmultivar)
Set.seed (1234)
Df<-rnorm2d (1000,rho=.5)
Df<-as.data.frame (DF)
Plot (df,main= "binariate Normal distribution with rho=0.5")
#wssplot (DF)
Library (Nbclust)
Nc<-nbclust (df,min.nc=2,max.nc=15,method= "Kmeans")
Dev.new ()
Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")
Library (GGPLOT2)
Library (Cluster)
Fit<-pam (df,k=2)
Df$clustering<-factor (fit$clustering)
Ggplot (Data=df,aes (x=v1,y=v2,color=clustering,shape=clustering)) +geom_point () +ggtitle ("Clustering of Bivariate Normal Data ")
Plot (nc$all.index[,4],type= "O", ylab= "CCC", xlab= "number of clusters", col= "Blue")
R Language Learning Note (14): Cluster analysis