大部分引用自生信技能树
#####2.1.向量生成🌟#####
#R markdown Mac Os插入代码块快捷键command + option + i
#(1)用 c() 结合到一起
c(2,5,6,2,9)
## [1] 2 5 6 2 9
c("a","f","md","b")
## [1] "a" "f" "md" "b"
#(2)连续的数字用冒号“:”
1:5
## [1] 1 2 3 4 5
#(3)有重复的用rep(),有规律的序列用seq(),随机数用rnorm()
rep("x",times = 3)
## [1] "x" "x" "x"
seq(from = 3,to = 21,by = 3)
## [1] 3 6 9 12 15 18 21
rnorm(n = 3)
## [1] 0.5710862 0.2770823 1.0682798
#(4)通过组合,产生更为复杂的向量。
paste0(rep("x",times = 3),1:3)
## [1] "x1" "x2" "x3"
#####2.2对单个向量进行的操作####
#(1)赋值给一个变量名
x = c(1,3,5,1) #随意的写法
x
## [1] 1 3 5 1
x <- c(1,3,5,1) #规范的赋值符号 Alt+减号
x
## [1] 1 3 5 1
#赋值+输出一起实现
x <- c(1,3,5,1);x
## [1] 1 3 5 1
(x <- c(1,3,5,1))
## [1] 1 3 5 1
#(2)简单数学计算
x+1
## [1] 2 4 6 2
log(x)
## [1] 0.000000 1.098612 1.609438 0.000000
sqrt(x)
## [1] 1.000000 1.732051 2.236068 1.000000
#(3)根据某条件进行判断,生成逻辑型向量
x>3
## [1] FALSE FALSE TRUE FALSE
x==3
## [1] FALSE TRUE FALSE FALSE
#(4)初级统计
max(x) #最大值
## [1] 5
min(x) #最小值
## [1] 1
mean(x) #均值
## [1] 2.5
median(x) #中位数
## [1] 2
var(x) #方差
## [1] 3.666667
sd(x) #标准差
## [1] 1.914854
sum(x) #总和
## [1] 10
length(x) #长度
## [1] 4
unique(x) #去重复
## [1] 1 3 5
duplicated(x) #对应元素是否重复
## [1] FALSE FALSE FALSE TRUE
table(x) #重复值统计
## x
## 1 3 5
## 2 1 1
sort(x)
## [1] 1 1 3 5
sort(x,decreasing = F)
## [1] 1 1 3 5
sort(x,decreasing = T)
## [1] 5 3 1 1
#####2.3.对两个向量进行的操作#####
x = c(1,3,5,1)
y = c(3,2,5,6)
#(1)比较运算,生成等长的逻辑向量
x == y
## [1] FALSE FALSE TRUE FALSE
y == x
## [1] FALSE FALSE TRUE FALSE
#(2)数学计算
x + y
## [1] 4 5 10 7
#(3)连接
paste(x,y,sep=",")
## [1] "1,3" "3,2" "5,5" "1,6"
#paste与paste0的区别
paste(x,y)
## [1] "1 3" "3 2" "5 5" "1 6"
paste0(x,y)
## [1] "13" "32" "55" "16"
paste(x,y,sep = "")
## [1] "13" "32" "55" "16"
paste(x,y,sep = ",")
## [1] "1,3" "3,2" "5,5" "1,6"
#当两个向量长度不一致
x = c(1,3,5,6,2)
y = c(3,2,5)
x == y # 啊!warning!
## [1] FALSE FALSE TRUE FALSE TRUE
#循环补齐--看ppt
#利用循环补齐简化代码
paste0(rep("x",3),1:3)
## [1] "x1" "x2" "x3"
paste0("x",1:3)
## [1] "x1" "x2" "x3"
#(4)交集、并集、差集
intersect(x,y)
## [1] 3 5 2
union(x,y)
## [1] 1 3 5 6 2
setdiff(x,y)
## [1] 1 6
setdiff(y,x)
## numeric(0)
x %in% y #x的每个元素在y中存在吗
## [1] FALSE TRUE TRUE FALSE TRUE
y %in% x #y的每个元素在x中存在吗
## [1] TRUE TRUE TRUE
#####2.4.向量筛选(取子集)--看ppt#####
x <- 8:12
#根据逻辑值取子集
x[x == 10]
## [1] 10
x[x < 12]
## [1] 8 9 10 11
x[x %in% c(9,13)]
## [1] 9
#根据位置取子集
x[4]
## [1] 11
x[2:4]
## [1] 9 10 11
x[c(1,5)]
## [1] 8 12
x[-4]
## [1] 8 9 10 12
x[-(2:4)]
## [1] 8 12
####2.5.修改向量中的某个/某些元素:取子集+赋值
x
## [1] 8 9 10 11 12
#改一个元素
x[4] <- 40
x
## [1] 8 9 10 40 12
#改多个元素
x[c(1,5)] <- c(80,20)
x
## [1] 80 9 10 40 20
k1 = rnorm(12);k1
## [1] 0.1238212 0.9495584 -2.6507704 -1.7917652 2.3110265 0.6795380 -0.4338229 -1.0173876
## [9] -1.5896614 0.9601339 -2.8067981 -0.9245640
k2 = rep(c("a","b","c","d"),each = 3);k2
## [1] "a" "a" "a" "b" "b" "b" "c" "c" "c" "d" "d" "d"
plot(k1)
#k1横坐标 k2纵坐标
boxplot(k1~k2)
# 练习2-2
# 1.生成1到15之间所有偶数
x <- seq(2,15,2);x
## [1] 2 4 6 8 10 12 14
# 2.生成向量,内容为:"student2" "student4" "student6" "student8" "student10" "student12"
# "student14"
# 提示:paste0
x <- paste0("student",seq(2,14,2));x
## [1] "student2" "student4" "student6" "student8" "student10" "student12" "student14"
# 3.将两种不同类型的数据用c()组合在一起,看输出结果
x <- c(2,3,4)
y <- c("a","b")
z <- c(x,y);z
## [1] "2" "3" "4" "a" "b"
# 练习2-4
# 说明:运行load("gands.Rdata"),即可得到和使用我准备的向量g和s,
# 如有报错,说明你的代码写错或project没有正确打开
load("gands.Rdata")
# 1.用函数计算向量g的长度
length(g)
## [1] 100
# 2.筛选出向量g中下标为偶数的基因名。
g[seq(2,length(g),2)]
## [1] "CRAMP1L" "PRSS8" "CRAMP1L" "SLCO1C1" "COMMD1" "CCT4"
## [7] "RAB7A" "ZDHHC16" "MYL12B" "SNRPE" "ZNF586" "GGT7"
## [13] "RAB7A" "AFG3L2" "AC104581.1" "MPP2" "ATP2A2" "SNRPE"
## [19] "PRSS8" "ZNF461" "CECR5" "CLEC17A" "ATG10" "ATG10"
## [25] "SLC25A25" "KRTAP4-3" "SLCO1C1" "GGT7" "GSTP1" "UBAC1"
## [31] "NYNRIN" "MYL12B" "KCND1" "RGPD3" "C10orf128" "SLC30A9"
## [37] "GGT7" "TUBA4A" "KLHDC8A" "HBP1" "MARC2" "LCP1"
## [43] "OR2D3" "LIPE" "LIPE" "CANX" "ATP6V1B2" "MARC2"
## [49] "LCP1" "HOOK2"
# 3.向量g中有多少个元素在向量s中存在(要求用函数计算出具体个数)?将这些元素筛选出来
# 提示:%in%
x = g[g%in%s];x
## [1] "GFM2" "SLCO1C1" "NYNRIN" "COMMD1" "COMMD1"
## [6] "AC017081.1" "RAB7A" "CASKIN2" "GGT7" "SNRPE"
## [11] "RGPD3" "ZNF586" "COMMD1" "GGT7" "URB1"
## [16] "RAB7A" "MPP2" "AFG3L2" "URB1" "AC104581.1"
## [21] "MPP2" "SNRPE" "ARHGAP1" "ZNF461" "OR2D3"
## [26] "CECR5" "SPDL1" "CLEC17A" "ZNF461" "ATG10"
## [31] "ATG10" "ATG10" "SLC25A25" "SLC30A9" "SLCO1C1"
## [36] "GGT7" "CASKIN2" "GSTP1" "MPP2" "NYNRIN"
## [41] "INTS12" "MPP2" "RGPD3" "RGPD3" "SLC30A9"
## [46] "C10orf128" "HBD" "SLC30A9" "GGT7" "HEPH"
## [51] "RP5-1021I20.4" "KLHDC8A" "HBD" "ZNF586" "CECR5"
## [56] "OR2D3" "LIPE" "INTS12" "LIPE" "SPDL1"
## [61] "SLCO1C1" "GGT7" "CECR5"
length(x)
## [1] 63
knitr:knit("*.rmd")
#重点:数据框
#1.数据框来源
# (1)用代码新建
# (2)由已有数据转换或处理得到
# (3)读取表格文件
# (4)R语言内置数据
#2.新建和读取数据框
df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
df1
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
df2 <- read.csv("gene.csv")
df2
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
#3.数据框属性
#
dim(df1)
## [1] 4 3
nrow(df1)
## [1] 4
ncol(df1)
## [1] 3
#
rownames(df1)
## [1] "1" "2" "3" "4"
colnames(df1)
## [1] "gene" "change" "score"
#4.数据框取子集
df1$gene #删掉score,按tab键试试
## [1] "gene1" "gene2" "gene3" "gene4"
mean(df1$score)
## [1] 0.5
## 按坐标
df1[2,2]
## [1] "up"
df1[2,]
## gene change score
## 2 gene2 up 3
df1[,2]
## [1] "up" "up" "down" "down"
df1[c(1,3),1:2]
## gene change
## 1 gene1 up
## 3 gene3 down
## 按名字
df1[,"gene"]
## [1] "gene1" "gene2" "gene3" "gene4"
df1[,c('gene','change')]
## gene change
## 1 gene1 up
## 2 gene2 up
## 3 gene3 down
## 4 gene4 down
#5.数据框修改
#改一个格
df1[3,3] <- 5
df1
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down 5
## 4 gene4 down -4
#改一整列
df1$score <- c(12,23,50,2)
df1
## gene change score
## 1 gene1 up 12
## 2 gene2 up 23
## 3 gene3 down 50
## 4 gene4 down 2
#?
df1$p.value <- c(0.01,0.02,0.07,0.05)
df1
## gene change score p.value
## 1 gene1 up 12 0.01
## 2 gene2 up 23 0.02
## 3 gene3 down 50 0.07
## 4 gene4 down 2 0.05
#改行名和列名
rownames(df1) <- c("r1","r2","r3","r4")
#只修改某一行/列的名
colnames(df1)[2] <- "CHANGE"
#6.两个数据框的连接
test1 <- data.frame(name = c('jimmy','nicker','Damon','Sophie'),
blood_type = c("A","B","O","AB"))
test1
## name blood_type
## 1 jimmy A
## 2 nicker B
## 3 Damon O
## 4 Sophie AB
test2 <- data.frame(name = c('Damon','jimmy','nicker','tony'),
group = c("group1","group1","group2","group2"),
vision = c(4.2,4.3,4.9,4.5))
test2
## name group vision
## 1 Damon group1 4.2
## 2 jimmy group1 4.3
## 3 nicker group2 4.9
## 4 tony group2 4.5
test3 <- data.frame(NAME = c('Damon','jimmy','nicker','tony'),
weight = c(140,145,110,138))
test3
## NAME weight
## 1 Damon 140
## 2 jimmy 145
## 3 nicker 110
## 4 tony 138
merge(test1,test2,by="name")
## name blood_type group vision
## 1 Damon O group1 4.2
## 2 jimmy A group1 4.3
## 3 nicker B group2 4.9
merge(test1,test3,by.x = "name",by.y = "NAME")
## name blood_type weight
## 1 Damon O 140
## 2 jimmy A 145
## 3 nicker B 110
#### 矩阵和列表
m <- matrix(1:9, nrow = 3)
colnames(m) <- c("a","b","c") #加列名
m
## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
m[2,]
## a b c
## 2 5 8
m[,1]
## [1] 1 2 3
m[2,3]
## c
## 8
m[2:3,1:2]
## a b
## [1,] 2 5
## [2,] 3 6
m
## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
t(m)
## [,1] [,2] [,3]
## a 1 2 3
## b 4 5 6
## c 7 8 9
as.data.frame(m)
## a b c
## 1 1 4 7
## 2 2 5 8
## 3 3 6 9
#列表
x <- list(m1 = matrix(1:9, nrow = 3),
m2 = matrix(2:9, nrow = 2))
x
## $m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## $m2
## [,1] [,2] [,3] [,4]
## [1,] 2 4 6 8
## [2,] 3 5 7 9
x[[1]]
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
x$m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
# 补充:元素的名字
scores = c(100,59,73,95,45)
names(scores) = c("jimmy","nicker","Damon","Sophie","tony")
scores
## jimmy nicker Damon Sophie tony
## 100 59 73 95 45
scores["jimmy"]
## jimmy
## 100
scores[c("jimmy","nicker")]
## jimmy nicker
## 100 59
names(scores)[scores>60]
## [1] "jimmy" "Damon" "Sophie"
### 后置的难点
## 按条件(逻辑值)
df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
k = df1$score>0;k
## [1] TRUE TRUE FALSE FALSE
df1[k,]
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
#筛选score > 0的基因
df1[k,1]
## [1] "gene1" "gene2"
df1$gene[k]
## [1] "gene1" "gene2"
df1$gene[df1$score>0]
## [1] "gene1" "gene2"
# 删除
rm(x)
rm(df1,df2)
rm(list = ls())
# R包镜像
# R包安装前先设置镜像,境外如果装包速度快就不需要设置
# 清华镜像
# http://mirrors.tuna.tsinghua.edu.cn/CRAN/
# http://mirrors.tuna.tsinghua.edu.cn/bioconductor/
# 中科大镜像
# http://mirrors.ustc.edu.cn/CRAN/
# http://mirrors.ustc.edu.cn/bioc/
options("repos"=c(CRAN="http://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options(BioC_mirror="http://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
# 备用:西湖大学
options(BioC_mirror="https://mirrors.westlake.edu.cn/bioconductor")
R包安装
推荐操作
总结
列出一个包里有哪些函数或数据
一个问题
经典问题
用于导入\导出文件的包
data.table的读写操作
rio的读写操作(推荐读取excel文件采取这种方式)
总结
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。