df1 <- data.frame(gene = paste0("gene",1:4),change = rep(c("up","down"),each = 2),
df1 gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down -2
4 gene4 down -4
df2 <- read.csv("gene.csv") df2 gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down -2
4 gene4 down -4
#dim维度 数据框有四行(nrow);三列(ncol)
dim(df1) 1 4 3 nrow(df1) 1 4 ncol(df1) 1 3
#rownames 行名
rownames(df1) 1 "1" "2" "3" "4" colnames(df1) #colnames 列名 1 "gene" "change" "score"
1 "gene1" "gene2" "gene3" "gene4"
mean(df1$score) 1 0.5
> df1[2,2]
1 "up"
> df1[2,] #取第二行
gene change score
2 gene2 up 3
> df1[,2] #取第二列 一般用$.
1 "up" "up" "down" "down"
> df1[c(1,3),1:2]
gene change
1 gene1 up
3 gene3 down
> df1[,"gene"] #$只能取一列,这种方法可以取多列
1 "gene1" "gene2" "gene3" "gene4"
> df1[,c('gene','change')] #一次取多列 [,c('','')]
gene change
1 gene1 up
2 gene2 up
3 gene3 down
4 gene4 down
> df1[df1$score>0,] #筛选scorescore>0的行留下
gene change score
1 gene1 up 5
2 gene2 up 3
> df1[df1$score>0,1] #筛选scorescore>0的行,对应的第一列留下
1 "gene1" "gene2"
> df1$gene[df1$score>0] #与上面的结果一样
1 "gene1" "gene2"
df1,3undefined1 5 3 -2 -4
> df1[,ncol(df1)] #ncol( ) 取数据框的最后一列
1 5 3 -2 -4
> df1[,-ncol(df1)] #ncol( ) 前加减号
gene change
1 gene1 up
2 gene2 up
3 gene3 down
4 gene4 down
> df1[df1$score > 0,1]
1 "gene1" "gene2"
> df1$gene[df1$score > 0]
1 "gene1" "gene2"
test = read.csv("exercise.csv")
median(test$Petal.Length)
median(test[,1])
test[test$Species!="b",]
test[test$Species=="a"|test$Species=="c",] #或者用|,|前后必须为逻辑值
test[test$Species %in% c("a","c"),] #最好用这一种
> df1[3,3] <- 5
> df1
> gene change score
1 gene1 up 5
2 gene2 up 3
3 gene3 down 5
4 gene4 down -4
> df1$score <- c(12,23,50,2) #修改这一列
> df1
> gene change score
1 gene1 up 12
2 gene2 up 23
3 gene3 down 50
4 gene4 down 2
> df1$p.value <- c(0.01,0.02,0.07,0.05) #增加新的一列
> df1
gene change score p.value
1 gene1 up 12 0.01
2 gene2 up 23 0.02
3 gene3 down 50 0.07
4 gene4 down 2 0.05
> rownames(df1) <- c("r1","r2","r3","r4")
> colnames(df1)[2] <- "CHANGE"
> test1 <- data.frame(name = c('jimmy','nicker','Damon','Sophie'), blood_type = c("A","B","O","AB"))
> test1
name blood\_type
1 jimmy A
2 nicker B
3 Damon O
4 Sophie AB
> test2 <- data.frame(name = c('Damon','jimmy','nicker','tony'), group = c("group1","group1","group2","group2"), vision = c(4.2,4.3,4.9,4.5))
> test2
> name group vision
1 Damon group1 4.2
2 jimmy group1 4.3
3 nicker group2 4.9
4 tony group2 4.5
> test3 <- data.frame(NAME = c('Damon','jimmy','nicker','tony'),
+ weight = c(140,145,110,138))
> test3
> NAME weight
1 Damon 140
2 jimmy 145
3 nicker 110
4 tony 138
> merge(test1,test2,by="name") #两个数据的连接 merge( )取得是交集
name blood\_type group vision
1 Damon O group1 4.2
2 jimmy A group1 4.3
3 nicker B group2 4.9
> merge(test1,test3,by.x = "name",by.y = "NAME") #当列名不相同时 x是test1 y是test3
name blood\_type weight
1 Damon O 140
2 jimmy A 145
3 nicker B 110
> df1!duplicated(df1$change), #按照change这一列去重复
> gene change score
1 gene1 up 5
3 gene3 down -2
> m <- matrix(1:9, nrow = 3) #不支持加$
> colnames(m) <- c("a","b","c") #加列名
> m
> a b c
1, 1 4 7
2, 2 5 8
3, 3 6 9
> m[2,] # 取矩阵的第二行
a b c
2 5 8
> m[,1] #取矩阵的第一列
1 1 2 3
> m[2,3] #取矩阵的第二行第三列c
8
> m[2:3,1:2] #取矩阵2、3行的1、2列
a b
1, 2 5
2, 3 6
> m
a b c
1, 1 4 7
2, 2 5 8
3, 3 6 9
> t(m) #行变列 列变行 ,1 ,3
a 1 2 3
b 4 5 6
c 7 8 9
####矩阵转换为数据框
> as.data.frame(m)
a b c
1 1 4 7
2 2 5 8
3 3 6 9
#生成两个包含矩阵元素的列表
#画图都是针对矩阵和数据框 没有针对列表设置的
> l <- list(m1 = matrix(1:9, nrow = 3),
m2 = matrix(2:9, nrow = 2))
#取列表里的子集也是用$
> $m1
[,1] [,2] [,3]
1, 1 4 7
2, 2 5 8
3, 3 6 9
>$m2
[,1] [,2] [,3] [,4]
1, 2 4 6 8
2, 3 5 7 9
>l[[2]] #两个[[]] 记为一种列表取子集的固定写法
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
> l[2] ,2 ,4
1, 2 4 6 8
2, 3 5 7 9
> l$m1
> ,1 ,3
1, 1 4 7
2, 2 5 8
3, 3 6 9
>scores = c(100,59,73,95,45) #给对应的数字起名字,不影响它本身仍为数值型向量
> names(scores) = c("jimmy","nicker","Damon","Sophie","tony")
> scores
> jimmy nicker Damon Sophie tony
100 59 73 95 45
> scores"jimmy" #从向量中把jimmy的分数提取出来
> jimmy
100
> scoresc("jimmy","nicker")
> jimmy nicker
100 59
> names(scores)scores>60 从向量中吧分数大于60的人提取出来
[1]"jimmy" "Damon" "Sophie"
rm(l)
rm(df1,df2)
rm(list = ls())
ctrl+L #快捷键清空控制台上的记录
#1.统计iris最后一列有哪几个取值,每个取值重复了多少次
> table(iris$Species)
setosa versicolor virginica
50 50 50
#2.提取iris的前5行,前4列,并转换为矩阵,赋值给a。
> a <- as.matrix(iris[1:5,1:4])
> a
Sepal.Length Sepal.Width Petal.Length Petal.Width
1 5.1 3.5 1.4 0.2
2 4.9 3.0 1.4 0.2
3 4.7 3.2 1.3 0.2
4 4.6 3.1 1.5 0.2
5 5.0 3.6 1.4 0.2
#3.将a的行名改为flower1,flower2...flower5。
> rownames(a) <- paste0("flower",1:nrow(a))
> rownames(a)<-paste0("flower",1:5)
#这两个的结果一样 nrow是指的最后一行
> a
Sepal.Length Sepal.Width Petal.Length Petal.Width
flower1 5.1 3.5 1.4 0.2
flower2 4.9 3.0 1.4 0.2
flower3 4.7 3.2 1.3 0.2
flower4 4.6 3.1 1.5 0.2
flower5 5.0 3.6 1.4 0.2
#4.探索列表取子集l[2]和l[[2]]的区别(提示:数据结构
> l <- list(m1 = matrix(1:9, nrow = 3),
+ m2 = matrix(2:9, nrow = 2))
> class(class(l[2]))
[1] "character"
> class(l[[2]]) #list 需要[[]]才可以提出矩阵
[1] "matrix" "array"
> l
$m1
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
$m2
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
-----来自生信技能树----
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。