SETWD ("d:/r/r-data/")
Data=read.table ("Salary.txt", header=t)
Attach (data)
Mean (Salary) #工资的平均值
Length (Salary) #数据个数
Cumsum (Salary) #累加
Salary1=cut (salary,3) #将数据分为三组
Table (Salary1)
Salary1=cut (Salary,3,labels=c ("Low", "medium", "High")) #给每个组设置标签
Table (Salary1)
Breakpoints=c (0,30,40,50,60,70)
Salary2=cut (salary,breaks=breakpoints) #按区间进行分组
Table (Salary2)
Pic=function (x) {#写一个存储过程
Par (Mfrow=c (2,2)) #绘图区域分割为四部分
hist (x) #直方图
Dotchart (x) #点图
BoxPlot (x) #箱线图
Qqnorm (x); Qqline (x) #正态概率图
Par (MFROW=C) #恢复单图区域
}
Pic (Salary) #调用编写好的函数pic ()
Data=read.table ("D:/r/r-data/salary.txt", Header=t,stringsasfactors =f)
Names (data) =c (' City ', ' work ', ' price ', ' SALARY ')
Names (data) #用names函数来修改标签名
data2=data[1,3]
Data3=data[-1,-3] #删除第一行第三列
Attach (data)
The following object is masked from data (position 3):
City,price,salary,work
Data$salary=replace (Salary,salary>65,na) #将工资大于65的值改为NA
Is.na (SALARY) #查找缺失值
Sum (is.na (SALARY))
Complete.cases (data$salary) #查找缺失值
Data$price=replace (Price,price>80,na)
Install.packages ("mice")
The library (mice) #通过Mice包中的md the. Pattern () function to display the missing value pattern
Md.pattern (data)
Install.packages ("VIM")
Library (VIM)
AGGR (data) #通过VIM包的aggr函数来绘制数据缺失模式图
Data1=data[complete.cases (data$salary),]
Dim (data1)
Data2=data[!is.na (SALARY),] #! is a non-
Dim (data2)
#删除缺失样本
Data[is.na (data)]=mean (Salary[!is.na (SALARY)))
#mean函数对非NA值的SALARY数据求平均值
A=c ("HONGKONG", 1910,75.0,41.8)
Data4=rbind (data,a) #rbind按行将数据连接起来 #cbind按列将数据连接起来
Data4[14:16,]
Weight=c (150,135,210,140) #数据型向量
Height=c (65,61,70,65)
Gender=c ("F", "F", "M", "F") #字符型向量
Stu=data.frame (Weight,height,gender)
Row.names (Stu) =c ("Alice", "Bob", "Cal", "David")
#通过data. Frame function constructs a data frame
Index=list ("City" =data$city, "Index" =1:15)
Index$city
Data.index=merge (data,index,by= "City")
#使用merge函数将index和data合并
DATA[DATA$SALARY>65,] #提取工资大于65的
Data[c (2,4),] #读取第二行和第四行
data[data$price==65.6,] #价格等于65.6, pay attention to Double = =
Order.salary=order (stu$weight) #进行排序
Order.salary
Rank (data$salary) #根据向量的秩进行排序
T (data) #进行转置
X=data.frame (A=1:4,b=seq (1.2,1.5,0.1), C=rep (1,4))
X
X1=stack (x)
X1 #把一个数据框转换成两列
Unstack (X1,from=values~ind)
#还原回去
Library (RESHAPE2)
Melt (x) #使用reshape2包中的melt函数将数据框转化为两列
Data (airquality)
STR (airquality) #显示对象的内部结构, function similar to summary ()
Longdata=melt (Airquality,id.vars=c ("Ozone", "Month", "Day"), Measure.vars=2:4)
STR (longdata)
R Language Learning notes (data preprocessing)