0 Introduction
Random forest algorithm is widely used in data mining and classification regression for its many advantages, please teach yourself haha. I started by doing the classification, is a rookie.
1 algorithm principle
2 Modeling
3 Simulation Results
4 Results Analysis and summary
Procedure Appendix
Examples
# # Classification:
# #data (Iris)
Set.seed (71)
Iris.rf <-randomforest (species ~., Data=iris, Importance=true,
Proximity=true)
Print (IRIS.RF)
# # Look at variable importance:
Round (Importance (IRIS.RF), 2)
# # do MDS on 1-proximity:
Iris.mds <-Cmdscale (1-iris.rf$proximity, Eig=true)
Op <-par (pty= "s")
Pairs (Cbind (Iris[,1:4], iris.mds$points), cex=0.6, Gap=0,
Col=c ("Red", "green", "blue") [As.numeric (Iris$species)],
Main= "Iris data:predictors and MDS of Proximity Based on Randomforest")
Par (OP)
Print (IRIS.MDS$GOF)
# # The ' unsupervised ' case:
Set.seed (17)
Iris.urf <-randomforest (iris[,-5])
Mdsplot (Iris.urf, iris$species)
# # Stratified Sampling:draw, and the species to grow each tree.
(Iris.rf2 <-randomforest (Iris[1:4], iris$species,
Sampsize=c (20, 30, 20))
# # Regression:
# # data (airquality)
Set.seed (131)
OZONE.RF <-randomforest (ozone ~., data=airquality, Mtry=3,
Importance=true, Na.action=na.omit)
Print (OZONE.RF)
# # Show "Importance" of variables:higher value mean more important:
Round (Importance (OZONE.RF), 2)
# # "X" can be a matrix instead of a data frame:
Set.seed (17)
x <-Matrix (runif (5e2), 100)
Y <-GL (2, 50)
(MYRF <-randomforest (x, y))
(Predict (MYRF, x))
# # "Complicated" formula:
(Swiss.rf <-randomforest (sqrt (fertility) ~.-Catholic + I (Catholic < 50),
Data=swiss))
(Predict (SWISS.RF, Swiss))
# Test use of 32-level factor as a predictor:
Set.seed (1)
X <-data.frame (X1=GL (+), x2=runif (530), Y=rnorm (530))
(Rf1 <-randomforest (x[-3], x[[3]], ntree=10))
# # Grow No more than 4 nodes per tree:
(TreeSize (Randomforest (species ~., Data=iris, maxnodes=4, ntree=30)))
# # Test Proximity in regression
IRIS.RRF <-randomforest (iris[-1], iris[[1]], ntree=101, Proximity=true, Oob.prox=false)
STR (iris.rrf$proximity)
Random Forest algorithm