I. box plot
1. Box chart
Air <-read.csv ("airpollution.csv") boxplot (air, las = 1) # las specifies the style of the Axis scale. 1 indicates horizontal placement.
2. Narrow box width
Boxplot (air, boxwex = 0.2, las = 1) # boxwex is used to narrow down the box, but 0.2 here does not mean to narrow down to 0.2 times of the original box.
3. Specify the Cabinet width
Boxplot (air, width = C () # width specifies the width of the two boxes, respectively 1 and 2
4. Group
Metals <-read.csv ("metals.csv") boxplot (Cu ~ Source, # The vertical axis is Cu, the horizontal axis is source data = metals, main = "Summary of copper (Cu) concentrations by site") boxplot (Cu ~ Source * expt, # group data = metals based on all possible combinations of source and expt, main = "Summary of copper (Cu) concentrations by site ")
5. The number of observed values determines the Cabinet width.
Boxplot (Cu ~ Source, Data = metals, varwidth = true, # If varwidt is true, the width of the box is determined by the number of observations. Main = "Summary of copper concentrations by site ")
6.With Notch(Incision)Box chart
Boxplot (Cu ~ Source, Data = metals, varwidth = true, notch = true, # If notch is true, the box chart shows the main = "Summary of copper concentrations by site ")
7.Exclusive group value
Boxplot (metals [,-1], outline = false, # If outline is false, the off-group value is excluded. The default value is true main = "Summary of metal concentrations by site (without outliers )")
8. Horizontal placement
Boxplot (metals [,-1], horizontal = true, # horizontal placement of Box charts las = 1, main = "Summary of metal concentrations by site ")
9. Change the box style
Boxplot (metals [,-1], border = "white", # Box border color Col = "black", # Box color boxwex = 0.3, # Box width medlwd = 1, # width of the median line whiskcol = "black", # the color of the tentacles staplecol = "black", # the color of the two crossbars outcol = "red", # the color of the group value cex = 0.3, # outpch = 19, # Main = "Summary of metal concentrations by site") grid (Nx = Na, # Do not draw vertical gridlines ny = NULL, # use the default Col = "gray" for horizontal gridlines, # grid line color lty = "Dashed") # grid line style: dotted line
10.Extension cable
Boxplot (metals [,-1], range = 0, # Extend the tentacles to the true maximum and minimum values, instead of the default contact Border = "white ", col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site (range = 0 )")
11.Show observed quantity
B <-boxplot (metals [,-1], xaxt = "N", # do not draw the X axis border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site") axis (side = 1, # Add the coordinate axis at = 1: length (B $ names) under the canvas ), # If there are multiple names, draw the number of scales labels = paste (B $ names, "\ n (n =", B $ N ,")", SEP = ""), # \ n is the line feed MGP = C (3, 2, 0) # spacing between the coordinate scale and the canvas
12.Use gplot package
Install. packages ("gplots") Library (gplots) boxplot. N (metals [,-1], border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black ", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site") # boxplot. N function box plot is similar to boxplot function box plot
13.Split data
Cuts <-C (0, 40, 80) Y <-Split (x = Metals $ Cu, F = findinterval (Metals $ Cu, cuts) boxplot (Y, xaxt = "N", border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black ", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1) axis (1, at =, labels = C ("below 0", "0 to 40", "40 to 80", "above 80"), LWD = 0, # width of the coordinate axis between the dial lines LWD. ticks = 1, # the width of the axis dial Col = "gray") # The Color of the dial
For split data, see:
Http://blog.csdn.net/zhuanzhu123/article/details/9343639
14. Function-based
# Define a box plot function for split data. cuts <-function (Y, cuts ,...) {Y <-Split (Y, F = findinterval (Y, cuts) B <-boxplot (Y, xaxt = "N", border = "white ", col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1 ,...) clabels <-paste ("below", cuts [1]) for (K in 1 :( length (CUTS)-1) {clabels <-C (clabels, paste (. character (cuts [k]), "to",. character (cuts [k + 1])} clabels <-C (clabels, paste ("abve",. character (cuts [length (CUTS)]) axis (1, at = 1: length (clabels), labels = clabels, LWD = 0, LWD. ticks = 1, Col = "gray")} # Call the boxplot function. cuts (Metals $ Ba, C (20, 40, 80 ))
The benefit of function-based is that you don't need to repeat the previous work, you just needCodeWrite it and call it later. This will save a lot of work and time.
# Other function boxplot. Cuts <-function (Y, cuts) {f = cut (Y, C (min (Y [! Is. Na (y)]), cuts, max (Y [! Is. na (y)]), ordered_results = true); y <-Split (Y, F = f) B <-boxplot (Y, xaxt = "N ", border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red ", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1) clabels =. character (levels (F) axis (1, at = 1: length (clabels), labels = clabels, LWD = 0, LWD. ticks = 1, Col = "gray")} # Call boxplot. cuts (Metals $ Cu, C (0, 40, 80 ))
15.Subset
Boxplot (Cu ~ Source, Data = metals, subset = Cu> 40) # subset: copper content greater than 40
Ii. heatmap
1. Heatmap
# Read data sales <-read.csv ("sales.csv") # Install and load the package. packages ("rcolorbrewer") Library (rcolorbrewer) # process data into a matrix rownames (sales) <-Sales [, 1] Sales <-Sales [,-1] data_matrix <-data. matrix (sales) # generate the palette pal = Brewer. pal (7, "ylorrd") # color split point breaks <-seq (1500) # The breaks sequence is constructed based on the actual data size # layout (matrix (Data = C (1, 2), nrow = 1, ncol = 2) is set on the canvas ), widths = C (0.2), heights = C (0.2) # margin settings PAR (MAR = C (0.2, 10,), Oma = C, 0.2), Mex = 0.5) # heat map image (x = 1: nrow (data_matrix), # X axis Y = 1: ncol (data_matrix), # Y axis Z = data_matrix, axes = false, xlab = "month", ylab = "", Col = pal [1 :( length (breaks)-1)], breaks = breaks, # color split point, the number of colors must be one more, and the main = "sales heat map" must be sorted in ascending order) # Add the X axis (1, at = 1: nrow (data_matrix ), labels = rownames (data_matrix), Col = "white", # horizontal axis color las = 1) # Add Y axis (2, at = 1: ncol (data_matrix ), labels = colnames (data_matrix), Col = "white", las = 1) # Add a white interval line abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, Col = "white", LWD = 2, XPD = false) # XPD if true, all plovers is clipped to the figure region # legend breaks2 <-breaks [-length (breaks)] # new sequence, auxiliary legend addition PAR (MAR = C (5, 1, 4, 7) # margin image (x = 1, y = 0: length (breaks2), Z = T (matrix (breaks2) * 1.001, Col = pal [1: length (breaks)-1], axes = false, breaks = breaks, xlab = "", ylab = "", xaxt = "N ") # Add the text information of the legend axis (4, at = 0 :( length (breaks2)-1), labels = breaks2, Col = "white", las = 1) # Add white interval abline (H = C (1: length (breaks2), Col = "white", LWD = 2, XPD = f)
2. Related heatmap
# Read data genes <-read.csv ("genes.csv") # This data stores the correlation coefficient between genes # Row name rownames (genes) <-colnames (genes) data_matrix <-data. matrix (genes) # palette pal = heat. colors (5) breaks <-seq (0.2,) # layout (matrix (Data = C (), nrow = 1, ncol = 2 ), widths = C (0.2), heights = C (0.2) PAR (MAR = C (0.2, 0.2,), Oma = C ), mex = 0.5) # heatmap image (x = 1: nrow (data_matrix), Y = 1: ncol (data_matrix), Z = data_matrix, xlab = "", ylab = "", breaks = breaks, Col = pal, axes = false) # horizontal text (x = 1: nrow (data_matrix) + 0.75, y = PAR ("USR ") [4] + 1.25, SRT = 45, # tilt angle adj = 1, # distance labels = rownames (data_matrix), XPD = true) # vertical axis (2, at = 1: ncol (data_matrix), labels = colnames (data_matrix), Col = "white", las = 1) # White interval abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, Col = "white", LWD = 2, XPD = f) title ("correlation between genes", line = 8, adj = 0) # legend breaks2 <-breaks [-length (breaks)] # color scale PAR (MAR = C (2.5, 4.7,) # legend color image (x = 1, y = 0: length (breaks2 ), z = T (matrix (breaks2) * 1.001, Col = pal [1: length (breaks)-1], axes = false, breaks = breaks, xlab = "", ylab = "", xaxt = "N") # axis (4, at = 0 :( length (breaks2), labels = breaks, Col = "white ", las = 1) # legend white interval abline (H = C (1: length (breaks2), Col = "white", LWD = 2, XPD = f)
3. Show multi-variable data
NBA <-read.csv ("nba.csv") rownames (NBA) <-NBA [, 1] data_matrix <-T (Scale (data. matrix (NBA [,-1]) pal = Brewer. pal (6, "Blues") statnames <-C ("games played", "Minutes played", "total points", "field goals made", "field goals attempted ", "field goal percentage", "free throws made", "free throws attempted", "free throw percentage", "three pointers made", "three pointers attempted ", "Three point percentage", "offensive rebounds", "defensive rebounds", "Total rebounds", "assists", "steals", "blocks", "turnovers ", "fouls") PAR (MAR = C (3,14, 19,2), Oma = C (0.2, 0.2, 0.2, 0.2), Mex = 0.5) # heat map image (x = 1: nrow (data_matrix), Y = 1: ncol (data_matrix), Z = data_matrix, xlab = "", ylab = "", col =, axes = false) # X axis labels text (1: nrow (data_matrix), par ("USR") [4] + 1 + 1, SRT = 45, adj = 0, labels = statnames, XPD = true, cex = 0.85) # Y axis labels axis (side = 2, at = 1: ncol (data_matrix ), labels = colnames (data_matrix), Col = "white", las = 1, cex. axis = 0.85) # White separating lines abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, col = "white", LWD = 1, XPD = f) # graph title text (PAR ("USR") [1] + 5, par ("USR ") [4] + 12, "NBA per game performance of top 50 corers", XPD = true, font = 2, cex = 1.5)