专栏首页生物信息云R绘图笔记 | 柱状图绘制

R绘图笔记 | 柱状图绘制

可先阅读文章:R绘图笔记 | R语言绘图系统与常见绘图函数及参数

1.单数据系列柱状图

###绘图数据
data <- "Sample1;Sample2;Sample3;Sample4;Sample5
gene1;2.6;2.9;2.1;4.5;2.2
gene2;20.8;9.8;7.0;3.7;19.2
gene3;10.0;11.0;9.2;12.4;9.6
gene4;9;3.3;10.3;11.1;10"
data <- read.table(text=data, header=T, row.names=1, sep=";", quote="")
data
##gene1的在不同样本中的表达
data1 <- as.data.frame(t(data)[,1])
names(data1) <- "gene1"
data1$sample <- rownames(data1)
> data1
        gene1  sample
Sample1   2.6 Sample1
Sample2   2.9 Sample2
Sample3   2.1 Sample3
Sample4   4.5 Sample4
Sample5   2.2 Sample5

绘图:geom_bar用于绘制柱状图,ylim设置纵轴值范围,them设置主题,axis.title设置坐标轴名称参数,axis.text设置坐标轴参数。

ggplot(data=data1,aes(x=sample,y=gene1))+
  geom_bar(stat = "identity", 
           width = 0.8,colour="black",size=0.25,
           fill="#FC4E07",alpha=1)+
  ylim(0,max(data1$gene1))+
  theme(
    axis.title=element_text(size=15,face="plain",color="blue"),
    axis.text = element_text(size=12,face="plain",color="red")
  )

可将数据进行排序后绘图。

#排序方法1:基于数据框data.frame
library(dplyr)
data1.a<-arrange(data1,desc(gene1))
data1.a$sample <- factor(data1.a$sample, levels = data1.a$sample)
ggplot(data=data1.a,aes(x=sample,y=gene1))+ 
  geom_bar(stat = "identity", width = 0.8,
           colour="black",size=0.25,fill="#FC4E07",alpha=1)
#排序方法2:基于向量vector
data1.b <- data1
order<-sort(data1.b$gene1,index.return=TRUE,decreasing = TRUE) 

data1.b$sample <- factor(data1.b$sample , levels = data1.b$sample [order$ix]) 
ggplot(data=data1.b,aes(x=sample,y=gene1))+ 
  geom_bar(stat = "identity", width = 0.8,
           colour="black",size=0.25,fill="black",alpha=1)

将所有样本的基因表达值都绘制出来,position=position_dodge()表示柱子并排放置。也可以通过position_dodge()函数来改变数据序列间的间隔。

data2 <- data.frame(gene = rownames(data),data)
data2 <- melt(data2, id.vars=c("gene"))
ggplot(data2, aes(x=gene, y=value))+ 
  geom_bar(stat="identity", position=position_dodge(), aes(fill=variable))

但是,通常我们是不这样作图的,而是取均值,加上误差线。

# 获取平均值和标准差
data3 <- data2 %>% group_by(gene) %>% dplyr::summarise(sd=sd(value), value=mean(value))
data3 <- as.data.frame(data3)
> data3
   gene        sd value
1 gene1 0.9710819  2.86
2 gene2 7.5491721 12.10
3 gene3 1.2837445 10.44
4 gene4 3.1325708  8.74
ggplot(data3, aes(x=gene, y=value)) +
  geom_bar(stat="identity", aes(fill=gene)) +
  geom_errorbar(aes(ymin=value-sd, ymax=value+sd), width=0.2, position=position_dodge(width=0.75)) +
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black")
    )

2.双序列图的绘制

library(reshape2)
data4 <- data.frame(Gene = c("gene1","gene2","gene3"),
                     CTRL = c(7.67,4.02,3.95),
                     Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE)
#colnames(data4) <- c("Gene","CTRL","Drug")
data4<-melt(data4,id.vars="Gene")
data4
> data4
   Gene variable value
1 gene1     CTRL  7.67
2 gene2     CTRL  4.02
3 gene3     CTRL  3.95
4 gene1     Drug  5.84
5 gene2     Drug  6.45
6 gene3     Drug  6.76
ggplot(data=data4,aes(Gene,value,fill=variable))+
  geom_bar(stat="identity",position=position_dodge(),
           color="black",width=0.7,size=0.25)+
  scale_fill_manual(values=c("#A61CE6", "#E81CA4"))+
  ylim(0, 10)+
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black"),
    legend.title=element_text(size=14,face="plain",color="black"),
    legend.background  =element_blank(),
    legend.position = c(0.88,0.88)
  ) + ylab("Expression values")

按CTRL组排序。

data5 <- data.frame(Gene = c("gene1","gene2","gene3"),
                     CTRL = c(8.67,4.02,6.95),
                     Drug = c(5.84,6.45,6.76),stringsAsFactors=FALSE)

data5$Gene <- factor(data5$Gene, levels = data5$Gene[order(data5[,"CTRL"],decreasing = TRUE)])

data5 <- melt(data5,id.vars='Gene')

ggplot(data=data5,aes(Gene,value,fill=variable))+
  geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+
  scale_fill_manual(values=c("#00AFBB", "#E7B800"))+
  ylim(0, 10)+ ylab("Expression values") +
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black"),
    legend.title=element_text(size=14,face="plain",color="black"),
    legend.background  =element_blank(),
    legend.position = c(0.88,0.88)
  )

3.堆积柱状图

data6 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"),
                     sam1 = c(150,1200,1300,2800,2000),
                     sam2 =c(400,1100,2300,2900,2700),
                     sam3 = c(390,1700,3300,3500,4200),
                     sam4 = c(300,900,1900,2800,3300),
                     sam5 = c(130,790,1800,3000,4200),
                     sam6 = c(100,1300,1900,1800,2700),
                     sam7 = c(100,1200,1700,1600,2100),
                     sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE)

data6 <- melt(data6,id.vars='Gene')

ggplot(data=data6,aes(variable,value,fill=Gene))+
  geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+
  scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+
  ylim(0, 15000)+ xlab("Sample") + ylab("Expression values") +
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black"),
    legend.title=element_text(size=14,face="plain",color="black"),
    legend.background  =element_blank(),
    legend.position = c(0.85,0.82)
  )
data7 <- data.frame(Gene = c("gene1","gene2","gene3","gene4","gene5"),
                   sam1 = c(150,1200,1300,2800,2000),
                   sam2 =c(400,1100,2300,2900,2700),
                   sam3 = c(390,1700,3300,3500,4200),
                   sam4 = c(300,900,1900,2800,3300),
                   sam5 = c(130,790,1800,3000,4200),
                   sam6 = c(100,1300,1900,1800,2700),
                   sam7 = c(100,1200,1700,1600,2100),
                   sam8 = c(150,1100,1300,1280,1300),stringsAsFactors=FALSE)
> data7
   Gene sam1 sam2 sam3 sam4 sam5 sam6 sam7 sam8
1 gene1  150  400  390  300  130  100  100  150
2 gene2 1200 1100 1700  900  790 1300 1200 1100
3 gene3 1300 2300 3300 1900 1800 1900 1700 1300
4 gene4 2800 2900 3500 2800 3000 1800 1600 1280
5 gene5 2000 2700 4200 3300 4200 2700 2100 1300
##按行求和,排序
sum <- sort(rowSums(data7[,2:ncol(data7)]),index.return=TRUE)
#按列求和,排序
colsum<-sort(colSums(data7[,2:ncol(data7)]),index.return=TRUE,decreasing = TRUE)

data7 <- data7[,c(1,colsum$ix+1)]
> data7
   Gene sam3 sam5 sam2 sam4 sam6 sam1 sam7 sam8
1 gene1  390  130  400  300  100  150  100  150
2 gene2 1700  790 1100  900 1300 1200 1200 1100
3 gene3 3300 1800 2300 1900 1900 1300 1700 1300
4 gene4 3500 3000 2900 2800 1800 2800 1600 1280
5 gene5 4200 4200 2700 3300 2700 2000 2100 1300
data7$Gene <- factor(data7$Gene, levels = data7$Gene[order(sum$ix)])
data7<-melt(data7,id.vars='Gene')
ggplot(data=data7,aes(variable,value,fill=Gene))+
  geom_bar(stat="identity",position="stack", color="black", width=0.7,size=0.25)+
  scale_fill_manual(values=brewer.pal(9,"YlOrRd")[c(6:2)])+
  ylim(0, 15000)+ xlab("Sample") + ylab("Expression values")+
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black"),
    legend.title=element_text(size=14,face="plain",color="black"),
    legend.background  =element_blank(),
    legend.position = c(0.85,0.82)
  )

4.百分比堆积柱形图

scale_fill_manual用于修改填充色。

ggplot(data=data7,aes(variable,value,fill=Gene))+
  geom_bar(stat="identity", position="fill",color="black", width=0.8,size=0.25)+
  scale_fill_manual(values=brewer.pal(9,"GnBu")[c(7:2)])+
  xlab("Sample") + ylab("Expression values")+
  theme(
    axis.title=element_text(size=15,face="plain",color="black"),
    axis.text = element_text(size=12,face="plain",color="black"),
    legend.title=element_text(size=14,face="plain",color="black"),
    legend.position = "right"
  )

5.不等宽柱形图

library(ggplot2)
#install.packages("Cairo")
library(Cairo)
#install.packages("showtext")
library(showtext)
data8<-data.frame(Name=paste0("Group",1:5),Scale=c(35,30,20,25,15),Count=c(56,37,63,57,59))
data8$xmin<-0
for (i in 2:5){
  data8$xmin[i]<-sum(data8$Scale[1:i-1])
}
#构造矩形X轴的终点(最大点)
for (i in 1:5){
  data8$xmax[i]<-sum(data8$Scale[1:i])
}
#构造数据标签的横坐标:
for (i in 1:5){
  data8$label[i]<-sum(data8$Scale[1:i])-data8$Scale[i]/2
}
data8
> data8
    Name Scale Count xmin xmax label
1 Group1    35    56    0   35  17.5
2 Group2    30    37   35   65  50.0
3 Group3    20    63   65   85  75.0
4 Group4    25    57   85  110  97.5
5 Group5    15    59  110  125 117.5
#windowsFonts(myFont = windowsFont("微软雅黑"))
#颜色的映射设定是在 aes() 内部完成的,而颜色的重新设定是在 aes() 外部完成的
ggplot(data8)+
  geom_rect(aes(xmin=xmin,xmax=xmax,ymin=0,ymax=Count,fill=Name),colour="black",size=0.25)+
  geom_text(aes(x=label,y=Count+3,label=Count),size=4,col="black")+
  geom_text(aes(x=label,y=-2.5,label=Name),size=4,col="black")+
  ylab("Count")+
  xlab("Group")+
  ylim(-5,80)+
  theme(panel.background=element_rect(fill="white",colour=NA),
        panel.grid.major = element_line(colour = "grey60",size=.25,linetype ="dotted" ),
        panel.grid.minor = element_line(colour = "grey60",size=.25,linetype ="dotted" ),
        text=element_text(size=15),
        plot.title=element_text(size=15,hjust=.5),#family="myfont",
        legend.position="none"
  )

5.径向柱形图

data9 <- data.frame(species=rep(paste0("specie",c(1:10)), 5), 
                 gene=rep(paste0("gene",c(1:5)), each=10), 
                 value=rep((1:5), each=10) + rnorm(50, 0,.5))

head(data9)
> head(data9)
  species  gene     value
1 specie1 gene1 0.8178002
2 specie2 gene1 0.5365643
3 specie3 gene1 0.7836265
4 specie4 gene1 0.9158748
5 specie5 gene1 0.8929767
6 specie6 gene1 1.9134189
myAng <- seq(-20,-340,length.out = 10)
ggplot(data=data9,aes(species,value,fill=gene))+
  geom_bar(stat="identity", color="black", position=position_dodge(),width=0.7,size=0.25)+
  coord_polar(theta = "x",start=0) +
  ylim(c(-3,6))+
  scale_fill_brewer()+
  theme_light()+
  theme( panel.background = element_blank(),
         panel.grid.major = element_line(colour = "grey80",size=.25),
         axis.text.y = element_text(size = 12,colour="black"),
         axis.line.y = element_line(size=0.25),
         axis.text.x=element_text(size = 13,colour="black",angle = myAng))

coord_polar将直角坐标转化为极坐标。

参考资料:

1.R语言数据可视化之美,张杰/著

本文分享自微信公众号 - MedBioInfoCloud(MedBioInfoCloud)

原文出处及转载信息见文内详细说明,如有侵权,请联系 yunjia_community@tencent.com 删除。

原始发表时间:2020-10-29

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

我来说两句

0 条评论
登录 后参与评论

相关文章

  • R绘图笔记 | 散点分布图与柱形分布图

    https://docs.qq.com/sheet/DV0dxREV1YkJ0ZmVj

    DoubleHelix
  • R绘图笔记 | 一般的散点图绘制

    plot函数中,x和y分别表示所绘图形的横坐标和纵坐标;函数中的...为附加的参数。plot函数默认的使用格式如下:

    DoubleHelix
  • R语言基础绘图教程——第4章:面积图和饼图

    DoubleHelix
  • 一篇全是代码的数据可视化案例

    实在没时间写文字步骤了,算了,就甩一篇代码给大家吧,这篇代码包含五张图,分别可以呈现放射状线路图、迁徙路径图、闭环路径图、菱形气泡图、方形气泡图。 librar...

    数据小磨坊
  • 超全的pandas数据分析常用函数总结:下篇

    基础知识在数据分析中就像是九阳神功,熟练的掌握,加以运用,就可以练就深厚的内力,成为绝顶高手自然不在话下!

    数据森麟
  • laravel Validator ajax返回错误信息的方法

    将return back()- withErrors($validator- errors())- withInput();替换为下面的代码

    砸漏
  • 超全的pandas数据分析常用函数总结:下篇

    基础知识在数据分析中就像是九阳神功,熟练的掌握,加以运用,就可以练就深厚的内力,成为绝顶高手自然不在话下!

    朱小五
  • 用Python可视化股票指标

    但是一个量化交易可以通过回测系统建立信心然后让其一如既往的运行,以达到让钱生钱的目的,并且是自动的。

    py3study
  • 数据处理 | pandas-超常用的数据提取操作方法汇总

    pandas是python数据分析必备工具,它有强大的数据清洗能力,往往能用非常少的代码实现较复杂的数据处理

    DataCharm
  • 直接扩频通信(中)Verilog 实现

    今天给大侠带来直接扩频通信,由于篇幅较长,分三篇。今天带来中篇,也是第二篇,系统的 verilog 实现 。话不多说,上货。

    FPGA技术江湖

扫码关注云+社区

领取腾讯云代金券

,,