Vector向量----一维
matrix矩阵----二维,只允许一种数据类型
data.frame数据框----二维,每列只允许一种数据类型
list列表----可装万物
x=iris
volcano
View(volcano) ##表格视图
> class(volcano) ##数据框中包括哪些数据类型
[1] "matrix" "array"
heatmap(volcano) ##热图
> letters
[1] "a" "b" "c" "d" "e" "f"
[7] "g" "h" "i" "j" "k" "l"
[13] "m" "n" "o" "p" "q" "r"
[19] "s" "t" "u" "v" "w" "x"
[25] "y" "z"
> LETTERS
[1] "A" "B" "C" "D" "E" "F"
[7] "G" "H" "I" "J" "K" "L"
[13] "M" "N" "O" "P" "Q" "R"
[19] "S" "T" "U" "V" "W" "X"
[25] "Y" "Z"
> LETTERS[1:5]
[1] "A" "B" "C" "D" "E"
> which(LETTERS=="M")
[1] 13
> df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down -2
4 gene4 down -4
> df2 <- read.csv("gene.csv")
> df2
gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down -2
4 gene4 down -4
> #dimention计算数据框的维度
> dim(df1) ###四行三列
[1] 4 3
> nrow(df1) ###四行
[1] 4
> ncol(df1) ###三列
[1] 3
> rownames(df1) ###行名
[1] "1" "2" "3" "4"
> colnames(df1) ###列名
[1] "gene" "change" "score"
> ###字符 $ 专用于取数据框子集
> df1$gene
[1] "gene1" "gene2" "gene3" "gene4"
> mean(df1$score)
[1] 0.5
> ## 按坐标
> df1[2,2]
[1] "up"
> df1[2,]
gene change score
2 gene2 up 3
> df1[,2]
[1] "up" "up" "down" "down"
> df1[c(1,3),1:2] ### c(1,3)指的是每一行向量的选择,1:2指的是每一列向量的选择
gene change
1 gene1 up
3 gene3 down
> df1[c(2,3),1:2]
gene change
2 gene2 up
3 gene3 down
> df1[c(2,3),1:3]
gene change score
2 gene2 up 3
3 gene3 down -2
###按名字
> df1[,"gene"]
[1] "gene1" "gene2" "gene3" "gene4"
> df1[,c('gene','change')] ### $ 只能选一列,而该命令可以一次选几列
gene change
1 gene1 up
2 gene2 up
3 gene3 down
4 gene4 dow
##数据框按条件/逻辑值取子集,TRUE对应的行/列留下,FALSE对应的行/列丢掉
> df1[df1$score>0,] ## 按行筛选score大于0的行
gene change score
1 gene1 up 5
2 gene2 up 3
> df1$score[df1$score>0] ## x[x>0]
[1] 5 3
## 筛选score大于0的基因
> df1[df1$score>0,1] ##逗号前是筛选行,逗号后是筛选列
[1] "gene1" "gene2"
###用于取子集的逻辑值向量与x对应即可,不必由x生成
> x=df1$gene;x
[1] "gene1" "gene2" "gene3" "gene4"
> y=df1$score;y
[1] 5 3 -2 -4
> x[y>0]
[1] "gene1" "gene2"
> ##如何取数据框的最后一列?
> df1[,3]
[1] 5 3 -2 -4
> df1[,ncol(df1)] ##数据框有多少列,那他的最后一列就是多少列
[1] 5 3 -2 -4
> ##如何取数据框除了最后一列以外的其他列?
> df1[,-ncol(df1)] ## !只给逻辑值使用
gene change
1 gene1 up
2 gene2 up
3 gene3 down
4 gene4 down
> test<-read.csv("exercise.csv");test
Petal.Length Petal.Width Species
1 4.6 1.5 a
2 5.9 2.1 b
3 4.5 1.5 a
4 6.0 2.5 b
5 4.0 1.3 a
6 4.7 1.4 a
7 1.3 0.2 c
8 1.4 0.2 c
9 5.1 1.9 b
10 5.8 2.2 b
11 4.9 1.5 a
12 1.4 0.2 c
13 1.5 0.2 c
14 5.6 1.8 b
15 1.4 0.2 c
> median(test[,1])
[1] 4.6
> test[test$Species!="b" #or
> test[test$Species=="a"|test$Species=="c",]
Petal.Length Petal.Width Species
1 4.6 1.5 a
3 4.5 1.5 a
5 4.0 1.3 a
6 4.7 1.4 a
7 1.3 0.2 c
8 1.4 0.2 c
11 4.9 1.5 a
12 1.4 0.2 c
13 1.5 0.2 c
15 1.4 0.2 c
###更好的答案:
test[test$Species%in%c("a","c"),]
> #改一个格
> df1[3,3] <- 5
> df1
gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down 5
4 gene4 down -4
> #改一整列
> df1$score <- c(12,23,50,2)
> df1
gene change score
1 gene1 up 12
2 gene2 up 23
3 gene3 down 50
4 gene4 down 2
###新增一列
> df1$p.value <- c(0.01,0.02,0.07,0.05)
> df1
gene change score p.value
1 gene1 up 12 0.01
2 gene2 up 23 0.02
3 gene3 down 50 0.07
4 gene4 down 2 0.05
> #改行名和列名
> rownames(df1)<-c("r1","r2","r3","r4") ##全部修改行名
> df1
gene change score p.value
r1 gene1 up 12 0.01
r2 gene2 up 23 0.02
r3 gene3 down 50 0.07
r4 gene4 down 2 0.05
> rownames(df1)[2]<-"dd" ##修改一个行名
> df1
gene change score p.value
r1 gene1 up 12 0.01
dd gene2 up 23 0.02
r3 gene3 down 50 0.07
r4 gene4 down 2 0.05
> colnames(df1)[2] <- "CHANGE" ##修改一个列名
> df1
gene CHANGE score p.value
r1 gene1 up 12 0.01
dd gene2 up 23 0.02
r3 gene3 down 50 0.07
r4 gene4 down 2 0.05
## merge 只能用于数据框,且只能连接共同列中交集的数据
> test1=data.frame(name=c("jimmy","nicker","Damon","Sophie"),
bloodtype=c("A","B","O","AB"))
> test1
name bloodtype
1 jimmy A
2 nicker B
3 Damon O
4 Sophie AB
> test2 <- data.frame(name = c('Damon','jimmy','nicker','tony'),
group = c("group1","group1","group2","group2"),
vision = c(4.2,4.3,4.9,4.5))
> test2
name group vision
1 Damon group1 4.2
2 jimmy group1 4.3
3 nicker group2 4.9
4 tony group2 4.5
> merge(test1,test2,by="name")
name bloodtype group vision
1 Damon O group1 4.2
2 jimmy A group1 4.3
3 nicker B group2 4.9
> colnames(test2)[1]<-"Name"
> test2
Name group vision
1 Damon group1 4.2
2 jimmy group1 4.3
3 nicker group2 4.9
4 tony group2 4.5
### 此时test2与test1之间共同列的列名不同
### 可使用参数:merge(test1,test2,by.x="列名1",by.y="列名2")
> merge(test1,test2,by.x="name",by.y="Name")
name bloodtype group vision
1 Damon O group1 4.2
2 jimmy A group1 4.3
3 nicker B group2 4.9
> m<-matrix(1:9,nrow = 3) ## 1:9按照三行三列划分为矩阵,默认是按列排序
> m
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> m<-matrix(1:9,nrow = 3,byrow = TRUE) ## byrow=TRUE,按行排序
> m
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
> m[2,] ## 取第二行
[1] 4 5 6
> m[,2] ## 取第二列
[1] 2 5 8
> m[2:3,1:2] ##取2至3行中的1到2列
[,1] [,2]
[1,] 4 5
[2,] 7 8
> colnames(m)<-c("a","b","c")
> m
a b c
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
### 转置函数 t()
> t(m)
[,1] [,2] [,3]
a 1 4 7
b 2 5 8
c 3 6 9
> as.data.frame(m)
a b c
1 1 2 3
2 4 5 6
3 7 8 9
> class(m)
[1] "matrix" "array"
> m=as.data.frame(m) #### R语言里的修改都要赋值,没有赋值就没有发生过!!!!!
> class(m)
[1] "data.frame"
>m
a b c
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
> pheatmap::pheatmap(m) ###默认值存在聚类
> pheatmap::pheatmap(m,cluster_cols=F,cluster_rows=F)
列表下一级也是元素
### 生成包含两个矩阵的列表
> l<-list(m1=matrix(1:9,nrow=3),m2=matrix(2:9,nrow=2))
> l
$m1
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
$m2
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
### 取子集
>l[[2]] ### 取列表中的第几个元素
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
> l$m1 ### 按元素名称取子集
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
> scores=c(100,59,73,95,45)
> names(scores)=c("jimmy","nicker","Damon","Sophie","tony")
> scores ###有名字无名字不影响他是数值型向量
jimmy nicker Damon Sophie tony ##向量中每个元素的名字
100 59 73 95 45
> scores[c("jimmy","nicker")]
jimmy nicker
100 59
> names(scores)[scores>60]
[1] "jimmy" "Damon" "Sophie"
> rm(l) ###删除一个
> rm(df1,m) ###删除多个
> rm(list=ls()) ###删除全部(环境清除)
快捷键ctrl+l ###清空控制台运行记录
> table(iris$Species)
setosa versicolor virginica
50 50 50
> a<-iris[1:5,1:4]
> a
Sepal.Length Sepal.Width Petal.Length
1 5.1 3.5 1.4
2 4.9 3.0 1.4
3 4.7 3.2 1.3
4 4.6 3.1 1.5
5 5.0 3.6 1.4
Petal.Width
1 0.2
2 0.2
3 0.2
4 0.2
5 0.2
> a<-as.matrix(a)
> class(a)
[1] "matrix" "array"
> rownames(a)<-c("flower1","flower2","flower3","flower4","flower5")
> rownames(a)<-paste0("flower",1:5)
> rownames(a)<-paste0("flower",1:nrow(a)) ### nrow(a),a有多少行,那他的最后一行就是第几行
> a
Sepal.Length Sepal.Width Petal.Length
flower1 5.1 3.5 1.4
flower2 4.9 3.0 1.4
flower3 4.7 3.2 1.3
flower4 4.6 3.1 1.5
flower5 5.0 3.6 1.4
Petal.Width
flower1 0.2
flower2 0.2
flower3 0.2
flower4 0.2
flower5 0.2
> l[2]
$m2
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
> class(l[2]) ###取列表,无法绘图
[1] "list"
> l[[2]]
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
> class(l[[2]])###取矩阵,可绘图
[1] "matrix" "array"
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。