Summary ()
Sapply (X,fun,options): statistics for each vector in a data frame or matrix
Mean
SD: Standard deviation
var: variance
Min
Max
Median
Length
Range
Quantile
VARs <-C ("MPG", "HP", "WT")
Head (Mtcars[vars])
Summary (Mtcars[vars])
Mystats <-function (x, na.omit = FALSE) {
if (na.omit)
X <-x[!is.na (x)] #去掉缺失值
M <-mean (x)
n <-Length (x)
S <-SD (x)
Skew <-sum ((x-m) ^3/s^3)/n #偏度
Kurt <-sum ((x-m) ^4/s^4)/n-3 #峰度
return (c (n = n, mean = m, Stdev = s, skew = skew, kurtosis = Kurt))
}
Sapply (Mtcars[vars], mystats)
#如结果, distribution Right (0.61), slightly flat than normal (-0.37)
#Hmisc包中的describe可返回变量和观测的数量, the number of missing and unique values, the average, the number of bits, and the five largest and smallest values
Library (HMISC)
Describe (Mtcars[vars])
#pastects包中有一个stat. desc function, you can calculate a variety of descriptive statistics
Library (PASTECS)
Stat.desc (Mtcars[vars])
#psych包的describe可以计算描述性统计量
Library (Psych)
Describe (Mtcars[vars])
#aggregate (): Grouping to get descriptive statistics
Aggregate (Mtcars[vars], by = list (AM = mtcars$am), mean)
Aggregate (Mtcars[vars], by = list (AM = mtcars$am), SD)
#by (Data,indices,fun)
Dstats <-function (x) (C (Mean=mean (x), SD=SD (x)))
By (Mtcars[vars], mtcars$am, dstats)
#doBy的summaryBy可以分组计算描述性统计量
Library (Doby)
Summaryby (mpg + hp + WT ~ AM, data = mtcars, fun = mystats)
#psych中的desribe. By can be grouped to calculate descriptive statistics, but it is not allowed to specify any functions.
Library (Psych)
Describe.by (Mtcars[vars], mtcars$am)
#使用reshape包
Library (reshape)
Dstats <-function (x) (c (n = length (x), mean = mean (x), SD = SD (x)))
#id. VARs, according to what group, measure see what variable?
DFM <-Melt (mtcars, measure.vars = C ("MPG", "HP", "WT"), Id.vars = C ("AM", "cyl"))
Cast (DFM, AM + cyl + variable ~., dstats)
Table: Creating an n-dimensional column table with n categorical variables
Xtabs: Creates an n-dimensional list of columns based on a formula and a matrix or data frame
Prop.table: Represents an entry in a table as a fractional form
Margin.table: Calculates the purpose of the table and
Addmargins: Putting results in a table
Ftable: Create a compact, tiled, table-linked list
#一维列联表
MyTable <-with (arthritis, table (improved))
MyTable
Prop.table (MyTable)
#二维列联表
MyTable <-Xtabs (~ treatment+improved, Data=arthritis)
MyTable
#计算边缘频数, 1 represents the first variable
Margin.table (mytable, 1)
#计算边缘比例, 1 represents the first variable
Prop.table (mytable, 1)
Margin.table (MyTable, 2)
Prop.table (MyTable, 2)
Prop.table (MyTable)
#每行和每列最后加sum
Addmargins (MyTable)
Admargins (Prop.table (mytable))
#仅添加各行的和
Addmargins (Prop.table (mytable, 1), 2)
#仅添加各列的和
Addmargins (Prop.table (MyTable, 2, 1)
#可以用gmodels的CrossTable生成二维列联表
Library (Gmodels)
CrossTable (Arthritis$treatment, arthritis$improved)
#独立性检验
Library (VCD)
MyTable <-xtabs (~treatment+improved, Data=arthritis)
Chisq.test (MyTable)
#以上代码结果p值为0.001, there is some kind of relationship
MyTable <-xtabs (~improved+sex, Data=arthritis)
Chisq.test (MyTable)
#以上代码结果p值为0.08, it's okay.
#以下代码结果p值为0.001, it's related.
MyTable <-xtabs (~treatment+improved, Data=arthritis)
Fisher.test (MyTable)
#以下代码p值为0.0006, assuming there is no third-order interaction
MyTable <-xtabs (~treatment+improved+sex, Data=arthritis)
Mantelhaen.test (MyTable)
#以下为相关性度量, the greater the value, the stronger the correlation
Library (VCD)
MyTable <-xtabs (~treatment+improved, Data=arthritis)
Assocstats (MyTable)
R Language Practical reading notes (vii) BASIC statistical analysis