R语言_高级数据管理

#数值处理函数
#数学函数
abs
sqrt
ceiling
floor
round(x,digits=n)    #舍入为指定位的小数
signif   #舍入为指定位的有效数字
log(x,base=n)
log()   #常用对数
log10()
exp()
#统计函数
mean(x,trim=0.05,na.rm=TRUE)   #截尾平均数
median
sd
var
mad
quantitle(x,probs)
range  #求值域 diff(range(x))
sum
diff(x,lag=n)  #滞后差分
min
max
scale(x,center=TRUE,scale=TRUE)*sd+m #为数据对象按列进行中心化或标准化


#概率函数
set.seed()
runif   #产生0-1之间服从均匀分布的伪随机数
[dpqr]distribution_abbreviation() #density/distribution/quantile/random
dnorm
pnorm
qnorm
rnorm
#例子1
x = pretty(c(-3,3),30)
y = dnorm(x)
plot(x,y,
     type="l",
     xlab="normalDeviate",ylab="density",
     yaxs="i")  #标准正态曲线
pnorm(1.96)     #1.96左侧曲线下的面积
qnorm(.975,mean=0,sd=1)  #0.975分位点
rnorm(50,mean=50,sd=10)
#生成多元正态数据
library(MASS)
options(digits=3)
set.seed(1234)
mean = c(230.7,146.7,3.6)
sigma = matrix(
    c(15360.8,6721.2,-47.1,
      6721.2,4700.9,-16.5,
      -47.1,-16.5,0.3),
    nrow=3,ncol=3)
d = mvrnorm(500,mean,sigma)
d = as.data.frame(d)
names(d)  = c("y","x1","x2")
dim(d)
head(d,n=10)


#字符处理函数
x = "abcdef"
nchar(x)
substr(x,2,4)
substr(x,2,4)  <- "22222"
grep("A",c("b","a","A"),fixed=TRUE)  #在x中搜索某种模式
sub("\\s",".","hello world")
y = strsplit(c("a b","c d")," ")
unlist(y)[2]
sapply(y,"[",2)
paste("x",1:3,sep='o')
paste("today is",date())
toupper
tolower


#其他实用函数
length
seq(1,10,2)
rep(1:3,times=2,each=2)
cut(x,n)   #将连续型变量x分割为n个水平的因子
pretty(x,n) #将一个连续型变量x分割为n个区间
cat("hello","gy","\n") #连接对象,并输出至屏幕或文件


#控制流
#for
for (i in 1:10)
{
    print ("a")
    print ("b")
}
i = 10
#while
while(i>0)
{
    print ("hello")
    i  = i-1
}
#if-else
i = 10
if (i==1)
    print ("1")
else
    print ("10")
#ifelse
ifelse(i==10,"p","f")
#switch
i="a"
print(switch(i,
           a = "ten",
           b = "one"))


#用户自编函数
#例子1
mystats = function(x,parametric=TRUE,print=FALSE){
    if(parametric) {
        center = mean(x); spread = sd(x)
    } else {
        center = median(x); spread = mad(x)
    }

    if(print & parametric) {
        cat("mean=",center,"\n",
            "sd=",spread,"\n")
    } else if (print & ! parametric) {
        cat("median=",center,"\n",
            "mad=",spread,"\n")
    }
    result = list(center=center,spread=spread)
    return (result)
}
#例子2
mydate = function(type="long") {
    switch(type,
           long = format(Sys.time(),"%A %B %d %Y"),
           short = format(Sys.time(),"%m-%d-%y"),
           cat(type,"is not recognized")
           )
}
mydate("long")
mydate("short")
mydate()
mydate("s")


#整合与重构
#转置
cars = mtcars[1:5,1:4]
cars
t(cars)
#整合数据
options(digits=3)
with(mtcars,{
    aggdata = aggregate(mtcars,by=list(G1=cyl,G2=gear),FUN=mean,
                        na.rm=TRUE)
    aggdata
})
#reshape包
library(reshape)
#确立标识符(id,time,x1,x2)和变量
d = data.frame(id=c(1,1,2,2),time=c(1,2,1,2),
               x1=c(5,3,6,2),x2=c(6,5,1,4)) 
md = melt(d,id=c("id","time"))  #melt
cast(md,id~variable,mean)  #执行整合
cast(md,id+time~variable)  #不执行整合

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

扫码关注云+社区

领取腾讯云代金券