R Language Learning Note (14): Cluster analysis

Source: Internet
Author: User

#聚类分析par (Mfrow=c) #计算距离install. Packages ("Flexclust") data (nutrient,package= "Flexclust") head (nutrient,4)

Energy Protein fat Calcium iron
Beef Braised 340 20 28 9 2.6
Hamburger 245 21 17 9 2.7
Beef Roast 420 15 39 7 2.0
Beef Steak 375 19 32 9 2.6

D<-dist (Nutrient) As.matrix (d) [1:4,1:4]

Beef Braised Hamburger beef roast Beef steak
Beef braised 0.00000 95.6400 80.93429 35.24202
Hamburger 95.64000 0.0000 176.49218 130.87784
Beef Roast 80.93429 176.4922 0.00000 45.76418
Beef Steak 35.24202 130.8778 45.76418 0.00000

#层次聚类分析
Par (nfrow=c ())
Data (nutrient,package= "Flexclust")
Row.names (Nutrient) <-tolower (Row.names (nutrient))
Nutrient.scaled<-scale (Nutrient)

D<-dist (nutrient.scaled)

Fit.average<-hclust (d,method= "average")
Plot (fit.average,hang=-1,cex=.8,main= "average Linkage Clustering")

#选择聚类的个数
Install.packages ("Nbclust")
Library (Nbclust)
Devasknewpage (Ask=true)
Nc<-nbclust (nutrient.scaled,distance= "Euclidean", min.nc=2,max.nc=15,method= "average")
Table (Nc$best.n[1,])

Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")


#获取最终的聚类方案
Par (mfrow=c ())

Clusters<-cutree (fit.average,k=5)
Table (clusters)

1 2 3) 4 5
7 16 1) 2 1

Aggregate (Nutrient,by=list (cluster=clusters), median)

Cluster energy protein fat calcium iron
1 1 340.0 19 29 9 2.50
2 2 170.0 20 8 13 1.45
3 3 160.0 26 5 14 5.90
4 4 57.5 9 1 78 5.70
5 5 180.0 22 9 367 2.50

Aggregate (As.data.frame (nutrient.scaled), By=list (cluster=clusters), median)

Cluster energy protein fat calcium iron
1 1 1.3101024 0.0000000 1.3785620-0.4480464 0.08110456
2 2-0.3696099 0.2352002-0.4869384-0.3967868-0.63743114
3 3-0.4684165 1.6464016-0.7534384-0.3839719 2.40779157
4 4-1.4811842-2.3520023-1.1087718 0.4361807 2.27092763
5 5-0.2708033 0.7056007-0.3981050 4.1396825 0.08110456

Plot (fit.average,hang=-1,cex=.8,main= "Average Linkage clustering Cluster solution")

Rect.hclust (fit.average,k=5)

#划分聚类分析
Install.packages ("Rattle")
#install. Packages ("RGTK2")
Install.packages ("Https://cran.r-project.org/bin/windows/contrib/3.3/RGtk2_2.20.31.zip", Repos=null)
Install.packages ("Httr")
Library ("Rattle")
Library ("RGtk2")
Library ("Httr")
A <-GET ("Https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data")
Wine <-read.csv (textconnection (content (a)), header=f)

Names (wine) <-c ("Type", "Alcohol", "Malic acid", "ash", "alcalinity of Ash", "magnesium", "Total phenols", "Flavanoids" , "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "od280/od315 of diluted Wines", "proline")
#data (wine,package= "rattle")
Head (wine)
Df<-scale (Wine[-1])

Wssplot (DF)
Library (Nbclust)
Set.seed (1234)
Devasknewpage (Ask=true)
Nc<-nbclust (df,min.nc=2,max.nc=15,method= "Kmeans")
Table (Nc$best.n[1,])

Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")

Set.seed (1234)
Fit.km<-kmeans (DF,3,NSTART=25)
Fit.km$size
Fit.km$centers

Ct.km<-table (Wine$type,fit.km$cluster)
ct.km


1 2 3
1 59 0 0
2 3 65 3
3 0 0 48

Library (Flexclust)
Randindex (ct.km)

Ari
0.897495


#围绕中心点的划分
Library (Cluster)
Set.seed (1234)
Fit.pam<-pam (Wine[-1],k=3,stand=true)
Fit.pam$method

Clusplot (fit.pam,main= "bivariate Cluster Plot")

Library (Flexclust)
Randindex (Ct.pam)

Ari
0.6994957


#围绕中心点的划分
Library (Cluster)
Set.seed (1234)
Fit.pam<-pam (Wine[-1],k=3,stand=true)
Fit.pam$medoids
Clusplot (fit.pam,main= "bivariate Cluster Plot2")


Ct.pam<-table (wine$type,fit.pam$clustering)

Randindex (Ct.pam)

Ari
0.6994957


#避免不存在的类
Install.packages ("Fmultivar")
Library (Fmultivar)
Set.seed (1234)
Df<-rnorm2d (1000,rho=.5)
Df<-as.data.frame (DF)
Plot (df,main= "binariate Normal distribution with rho=0.5")


#wssplot (DF)
Library (Nbclust)
Nc<-nbclust (df,min.nc=2,max.nc=15,method= "Kmeans")
Dev.new ()
Barplot (Table (nc$best.n[1,]), xlab= "Number of Clusters", ylab= "number of Criteria", main= "number of Clusters Chosen by 26 Criteria ")


Library (GGPLOT2)
Library (Cluster)
Fit<-pam (df,k=2)
Df$clustering<-factor (fit$clustering)
Ggplot (Data=df,aes (x=v1,y=v2,color=clustering,shape=clustering)) +geom_point () +ggtitle ("Clustering of Bivariate Normal Data ")

Plot (nc$all.index[,4],type= "O", ylab= "CCC", xlab= "number of clusters", col= "Blue")

R Language Learning Note (14): Cluster analysis

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.