# R语言_高级数据管理

```#数值处理函数
#数学函数
abs
sqrt
ceiling
floor
round(x,digits=n)    #舍入为指定位的小数
signif   #舍入为指定位的有效数字
log(x,base=n)
log()   #常用对数
log10()
exp()
#统计函数
mean(x,trim=0.05,na.rm=TRUE)   #截尾平均数
median
sd
var
quantitle(x,probs)
range  #求值域 diff(range(x))
sum
diff(x,lag=n)  #滞后差分
min
max
scale(x,center=TRUE,scale=TRUE)*sd+m #为数据对象按列进行中心化或标准化

#概率函数
set.seed()
runif   #产生0-1之间服从均匀分布的伪随机数
[dpqr]distribution_abbreviation() #density/distribution/quantile/random
dnorm
pnorm
qnorm
rnorm
#例子1
x = pretty(c(-3,3),30)
y = dnorm(x)
plot(x,y,
type="l",
xlab="normalDeviate",ylab="density",
yaxs="i")  #标准正态曲线
pnorm(1.96)     #1.96左侧曲线下的面积
qnorm(.975,mean=0,sd=1)  #0.975分位点
rnorm(50,mean=50,sd=10)
#生成多元正态数据
library(MASS)
options(digits=3)
set.seed(1234)
mean = c(230.7,146.7,3.6)
sigma = matrix(
c(15360.8,6721.2,-47.1,
6721.2,4700.9,-16.5,
-47.1,-16.5,0.3),
nrow=3,ncol=3)
d = mvrnorm(500,mean,sigma)
d = as.data.frame(d)
names(d)  = c("y","x1","x2")
dim(d)

#字符处理函数
x = "abcdef"
nchar(x)
substr(x,2,4)
substr(x,2,4)  <- "22222"
grep("A",c("b","a","A"),fixed=TRUE)  #在x中搜索某种模式
sub("\\s",".","hello world")
y = strsplit(c("a b","c d")," ")
unlist(y)[2]
sapply(y,"[",2)
paste("x",1:3,sep='o')
paste("today is",date())
toupper
tolower

#其他实用函数
length
seq(1,10,2)
rep(1:3,times=2,each=2)
cut(x,n)   #将连续型变量x分割为n个水平的因子
pretty(x,n) #将一个连续型变量x分割为n个区间
cat("hello","gy","\n") #连接对象，并输出至屏幕或文件

#控制流
#for
for (i in 1:10)
{
print ("a")
print ("b")
}
i = 10
#while
while(i>0)
{
print ("hello")
i  = i-1
}
#if-else
i = 10
if (i==1)
print ("1")
else
print ("10")
#ifelse
ifelse(i==10,"p","f")
#switch
i="a"
print(switch(i,
a = "ten",
b = "one"))

#用户自编函数
#例子1
mystats = function(x,parametric=TRUE,print=FALSE){
if(parametric) {
center = mean(x); spread = sd(x)
} else {
}

if(print & parametric) {
cat("mean=",center,"\n",
} else if (print & ! parametric) {
cat("median=",center,"\n",
}
return (result)
}
#例子2
mydate = function(type="long") {
switch(type,
long = format(Sys.time(),"%A %B %d %Y"),
short = format(Sys.time(),"%m-%d-%y"),
cat(type,"is not recognized")
)
}
mydate("long")
mydate("short")
mydate()
mydate("s")

#整合与重构
#转置
cars = mtcars[1:5,1:4]
cars
t(cars)
#整合数据
options(digits=3)
with(mtcars,{
aggdata = aggregate(mtcars,by=list(G1=cyl,G2=gear),FUN=mean,
na.rm=TRUE)
aggdata
})
#reshape包
library(reshape)
#确立标识符(id,time,x1,x2)和变量
d = data.frame(id=c(1,1,2,2),time=c(1,2,1,2),
x1=c(5,3,6,2),x2=c(6,5,1,4))
md = melt(d,id=c("id","time"))  #melt
cast(md,id~variable,mean)  #执行整合
cast(md,id+time~variable)  #不执行整合```

145 篇文章47 人订阅

0 条评论