WEEK 1 course: box chart, heat map

Source: Internet
Author: User
Tags ranges rcolorbrewer

I. box plot

1. Box chart

 
Air <-read.csv ("airpollution.csv") boxplot (air, las = 1) # las specifies the style of the Axis scale. 1 indicates horizontal placement.

2. Narrow box width

 
Boxplot (air, boxwex = 0.2, las = 1) # boxwex is used to narrow down the box, but 0.2 here does not mean to narrow down to 0.2 times of the original box.

3. Specify the Cabinet width

 
Boxplot (air, width = C () # width specifies the width of the two boxes, respectively 1 and 2

4. Group

Metals <-read.csv ("metals.csv") boxplot (Cu ~ Source, # The vertical axis is Cu, the horizontal axis is source data = metals, main = "Summary of copper (Cu) concentrations by site") boxplot (Cu ~ Source * expt, # group data = metals based on all possible combinations of source and expt, main = "Summary of copper (Cu) concentrations by site ")

5. The number of observed values determines the Cabinet width.

 
Boxplot (Cu ~ Source, Data = metals, varwidth = true, # If varwidt is true, the width of the box is determined by the number of observations. Main = "Summary of copper concentrations by site ")

6.With Notch(Incision)Box chart

 
Boxplot (Cu ~ Source, Data = metals, varwidth = true, notch = true, # If notch is true, the box chart shows the main = "Summary of copper concentrations by site ")

7.Exclusive group value

 
Boxplot (metals [,-1], outline = false, # If outline is false, the off-group value is excluded. The default value is true main = "Summary of metal concentrations by site (without outliers )")

8. Horizontal placement

 
Boxplot (metals [,-1], horizontal = true, # horizontal placement of Box charts las = 1, main = "Summary of metal concentrations by site ")

9. Change the box style

Boxplot (metals [,-1], border = "white", # Box border color Col = "black", # Box color boxwex = 0.3, # Box width medlwd = 1, # width of the median line whiskcol = "black", # the color of the tentacles staplecol = "black", # the color of the two crossbars outcol = "red", # the color of the group value cex = 0.3, # outpch = 19, # Main = "Summary of metal concentrations by site") grid (Nx = Na, # Do not draw vertical gridlines ny = NULL, # use the default Col = "gray" for horizontal gridlines, # grid line color lty = "Dashed") # grid line style: dotted line

10.Extension cable

 
Boxplot (metals [,-1], range = 0, # Extend the tentacles to the true maximum and minimum values, instead of the default contact Border = "white ", col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site (range = 0 )")

11.Show observed quantity

 
B <-boxplot (metals [,-1], xaxt = "N", # do not draw the X axis border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site") axis (side = 1, # Add the coordinate axis at = 1: length (B $ names) under the canvas ), # If there are multiple names, draw the number of scales labels = paste (B $ names, "\ n (n =", B $ N ,")", SEP = ""), # \ n is the line feed MGP = C (3, 2, 0) # spacing between the coordinate scale and the canvas

12.Use gplot package

Install. packages ("gplots") Library (gplots) boxplot. N (metals [,-1], border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black ", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of metal concentrations by site") # boxplot. N function box plot is similar to boxplot function box plot

13.Split data

Cuts <-C (0, 40, 80) Y <-Split (x = Metals $ Cu, F = findinterval (Metals $ Cu, cuts) boxplot (Y, xaxt = "N", border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black ", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1) axis (1, at =, labels = C ("below 0", "0 to 40", "40 to 80", "above 80"), LWD = 0, # width of the coordinate axis between the dial lines LWD. ticks = 1, # the width of the axis dial Col = "gray") # The Color of the dial

For split data, see:

Http://blog.csdn.net/zhuanzhu123/article/details/9343639

14. Function-based

# Define a box plot function for split data. cuts <-function (Y, cuts ,...) {Y <-Split (Y, F = findinterval (Y, cuts) B <-boxplot (Y, xaxt = "N", border = "white ", col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1 ,...) clabels <-paste ("below", cuts [1]) for (K in 1 :( length (CUTS)-1) {clabels <-C (clabels, paste (. character (cuts [k]), "to",. character (cuts [k + 1])} clabels <-C (clabels, paste ("abve",. character (cuts [length (CUTS)]) axis (1, at = 1: length (clabels), labels = clabels, LWD = 0, LWD. ticks = 1, Col = "gray")} # Call the boxplot function. cuts (Metals $ Ba, C (20, 40, 80 ))

The benefit of function-based is that you don't need to repeat the previous work, you just needCodeWrite it and call it later. This will save a lot of work and time.

# Other function boxplot. Cuts <-function (Y, cuts) {f = cut (Y, C (min (Y [! Is. Na (y)]), cuts, max (Y [! Is. na (y)]), ordered_results = true); y <-Split (Y, F = f) B <-boxplot (Y, xaxt = "N ", border = "white", Col = "black", boxwex = 0.3, medlwd = 1, whiskcol = "black", staplecol = "black", outcol = "red ", cex = 0.3, outpch = 19, main = "Summary of copper concentrations", xlab = "concentration ranges", las = 1) clabels =. character (levels (F) axis (1, at = 1: length (clabels), labels = clabels, LWD = 0, LWD. ticks = 1, Col = "gray")} # Call boxplot. cuts (Metals $ Cu, C (0, 40, 80 ))

15.Subset

 
Boxplot (Cu ~ Source, Data = metals, subset = Cu> 40) # subset: copper content greater than 40

Ii. heatmap

1. Heatmap

# Read data sales <-read.csv ("sales.csv") # Install and load the package. packages ("rcolorbrewer") Library (rcolorbrewer) # process data into a matrix rownames (sales) <-Sales [, 1] Sales <-Sales [,-1] data_matrix <-data. matrix (sales) # generate the palette pal = Brewer. pal (7, "ylorrd") # color split point breaks <-seq (1500) # The breaks sequence is constructed based on the actual data size # layout (matrix (Data = C (1, 2), nrow = 1, ncol = 2) is set on the canvas ), widths = C (0.2), heights = C (0.2) # margin settings PAR (MAR = C (0.2, 10,), Oma = C, 0.2), Mex = 0.5) # heat map image (x = 1: nrow (data_matrix), # X axis Y = 1: ncol (data_matrix), # Y axis Z = data_matrix, axes = false, xlab = "month", ylab = "", Col = pal [1 :( length (breaks)-1)], breaks = breaks, # color split point, the number of colors must be one more, and the main = "sales heat map" must be sorted in ascending order) # Add the X axis (1, at = 1: nrow (data_matrix ), labels = rownames (data_matrix), Col = "white", # horizontal axis color las = 1) # Add Y axis (2, at = 1: ncol (data_matrix ), labels = colnames (data_matrix), Col = "white", las = 1) # Add a white interval line abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, Col = "white", LWD = 2, XPD = false) # XPD if true, all plovers is clipped to the figure region # legend breaks2 <-breaks [-length (breaks)] # new sequence, auxiliary legend addition PAR (MAR = C (5, 1, 4, 7) # margin image (x = 1, y = 0: length (breaks2), Z = T (matrix (breaks2) * 1.001, Col = pal [1: length (breaks)-1], axes = false, breaks = breaks, xlab = "", ylab = "", xaxt = "N ") # Add the text information of the legend axis (4, at = 0 :( length (breaks2)-1), labels = breaks2, Col = "white", las = 1) # Add white interval abline (H = C (1: length (breaks2), Col = "white", LWD = 2, XPD = f)

2. Related heatmap

# Read data genes <-read.csv ("genes.csv") # This data stores the correlation coefficient between genes # Row name rownames (genes) <-colnames (genes) data_matrix <-data. matrix (genes) # palette pal = heat. colors (5) breaks <-seq (0.2,) # layout (matrix (Data = C (), nrow = 1, ncol = 2 ), widths = C (0.2), heights = C (0.2) PAR (MAR = C (0.2, 0.2,), Oma = C ), mex = 0.5) # heatmap image (x = 1: nrow (data_matrix), Y = 1: ncol (data_matrix), Z = data_matrix, xlab = "", ylab = "", breaks = breaks, Col = pal, axes = false) # horizontal text (x = 1: nrow (data_matrix) + 0.75, y = PAR ("USR ") [4] + 1.25, SRT = 45, # tilt angle adj = 1, # distance labels = rownames (data_matrix), XPD = true) # vertical axis (2, at = 1: ncol (data_matrix), labels = colnames (data_matrix), Col = "white", las = 1) # White interval abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, Col = "white", LWD = 2, XPD = f) title ("correlation between genes", line = 8, adj = 0) # legend breaks2 <-breaks [-length (breaks)] # color scale PAR (MAR = C (2.5, 4.7,) # legend color image (x = 1, y = 0: length (breaks2 ), z = T (matrix (breaks2) * 1.001, Col = pal [1: length (breaks)-1], axes = false, breaks = breaks, xlab = "", ylab = "", xaxt = "N") # axis (4, at = 0 :( length (breaks2), labels = breaks, Col = "white ", las = 1) # legend white interval abline (H = C (1: length (breaks2), Col = "white", LWD = 2, XPD = f)

3. Show multi-variable data

NBA <-read.csv ("nba.csv") rownames (NBA) <-NBA [, 1] data_matrix <-T (Scale (data. matrix (NBA [,-1]) pal = Brewer. pal (6, "Blues") statnames <-C ("games played", "Minutes played", "total points", "field goals made", "field goals attempted ", "field goal percentage", "free throws made", "free throws attempted", "free throw percentage", "three pointers made", "three pointers attempted ", "Three point percentage", "offensive rebounds", "defensive rebounds", "Total rebounds", "assists", "steals", "blocks", "turnovers ", "fouls") PAR (MAR = C (3,14, 19,2), Oma = C (0.2, 0.2, 0.2, 0.2), Mex = 0.5) # heat map image (x = 1: nrow (data_matrix), Y = 1: ncol (data_matrix), Z = data_matrix, xlab = "", ylab = "", col =, axes = false) # X axis labels text (1: nrow (data_matrix), par ("USR") [4] + 1 + 1, SRT = 45, adj = 0, labels = statnames, XPD = true, cex = 0.85) # Y axis labels axis (side = 2, at = 1: ncol (data_matrix ), labels = colnames (data_matrix), Col = "white", las = 1, cex. axis = 0.85) # White separating lines abline (H = C (1: ncol (data_matrix) + 0.5, V = C (1: nrow (data_matrix) + 0.5, col = "white", LWD = 1, XPD = f) # graph title text (PAR ("USR") [1] + 5, par ("USR ") [4] + 12, "NBA per game performance of top 50 corers", XPD = true, font = 2, cex = 1.5)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.