我前面的甲基化教程主要是针对450k这样的芯片,所以champ流程就绰绰有余,很多小伙伴在咱们公众号后台咨询甲基化测序数据分析,恰好最近实习生投稿:
下面是去年实习生的分享
前阵子复现单细胞数据,发现个很有趣的富集图。通过compareCluster功能将不同细胞群的通路用Circular barplot展示出来,下面来复现下这张图:
来源:Stromal cell diversity associated with immune evasion in human triple-negative breast cancer,doi: 10.15252/embj.2019104063,IF=11.598,The EMBO Journal。数据https://github.com/sunnyzwu/stromal_subclasses
学习了单细胞的小伙伴也可以做下练手,一起交流下(还可以参加曾老师的独家单细胞分享会,只此一家,千万别错过哦)
腾讯会议
会议主题:单细胞学徒培养2022
会议时间:2022/01/02-2022/07/20 21:00-21:30(GMT+08:00) 中国标准时间 - 北京, 每天
(周三周四休息哦)
点击链接入会,或添加至会议列表:
https://meeting.tencent.com/dm/EWBbxqqpD6Fa
#腾讯会议:507-1242-3763
会议密码:2022
会议直播:
https://meeting.tencent.com/l/BDR6rygede4B
###### step1:导入数据 ######
rm(list=ls())
options(stringsAsFactors = F)
library(dplyr)
setwd("3-cell")
sce.markers <- read.csv('celltype_deg_sce.markers.csv',row.names = 1)
table(sce.markers$cluster)
#dPVL iCAFs imPVL myCAFs
# 120 22 129 46
dPVL=sce.markers[sce.markers$cluster=='dPVL',]$gene
imPVL=sce.markers[sce.markers$cluster=='imPVL',]$gene
iCAFs=sce.markers[sce.markers$cluster=='iCAFs',]$gene
myCAFs=sce.markers[sce.markers$cluster=='myCAFs',]$gene
total <- list(dPVL=dPVL,imPVL=imPVL,iCAFs=iCAFs,myCAFs=myCAFs)
# 转ID
library(org.Hs.eg.db)
i=1
for (i in 1:4) {
#i=1
## 把SYMBOL改为ENTREZID
total[[i]]=as.character(na.omit(AnnotationDbi::select(org.Hs.eg.db,
keys = total[[i]],
columns = 'ENTREZID',
keytype = 'SYMBOL')[,2]))}
lapply(total, head)
> lapply(total, head)
$dPVL
[1] "58189" "10398" "6876" "59" "11034" "64129"
$imPVL
[1] "8490" "397" "2167" "948" "2701" "221395"
$iCAFs
[1] "9452" "347" "10563" "6387" "8406" "8788"
$myCAFs
[1] "115908" "6423" "165" "151887" "1278" "10631"
可修改fun = enrichKEGG
为KEGG分析
Chapter 14 Biological theme comparison | Biomedical Knowledge Mining using GOSemSim and clusterProfiler (yulab-smu.top)
###### step2:富集分析 ######
library(clusterProfiler)
xx <- compareCluster(total, fun="enrichGO",
OrgDb = org.Hs.eg.db,
pvalueCutoff=0.99) # pvalueCutoff显著性应该为0.05
table(xx@compareClusterResult$Cluster) #每个基因集富集个数
head(as.data.frame(xx)) #查看完整结果
#dotplot(xx) #气泡图
df_go_diff <- as.data.frame(xx)
## 选择富集到两组样本以上的通路可视化
df <- as.data.frame(table(df_go_diff$Description))
# > head(df)
# Var1 Freq
# 1 actin binding 2
# 2 actin filament binding 1
# 3 actinin binding 2
# 4 alcohol binding 2
# 5 alpha-actinin binding 2
# 6 amide binding 2
df <- df[order(df$Freq,decreasing = T),][1:20,]
df_go_diff <- df_go_diff[df_go_diff$Description %in% df$Var1,]
df_go_diff <- df_go_diff[order(df_go_diff$Description,decreasing = F),]
colnames(df_go_diff)
#[1] "Cluster" "ID" "Description" "GeneRatio" "BgRatio"
# [6] "pvalue" "p.adjust" "qvalue" "geneID" "Count"
table(df_go_diff$Description)
###### step3:Circular barplot ######
## 相似通路合并(给个大分类)
df_go_diff$group <- factor(c(rep('A',10),rep('B',10),rep('C',16),rep('D',16)))
# 分割弦图需添加NA Set a number of 'empty bar'
empty_bar <- 4
# Add lines to the initial dataset
nObsType <- nlevels(as.factor(df_go_diff$Description)) #20
to_add <- data.frame(matrix(NA, empty_bar*nlevels(df_go_diff$Cluster),ncol(df_go_diff))) # 11列,16个NA
colnames(to_add) <- colnames(df_go_diff)
to_add$group <- rep(levels(df_go_diff$group),each=empty_bar)
data <- rbind(df_go_diff, to_add)
data <- data %>% arrange(group) # 根据group排序
write.csv(data,file = 'data_for_go.csv')
# 在excel中手动添加按顺序添加id
data <- read.csv('data_for_go.csv',row.names = 1)
colnames(data)
# Get the name and the y position of each label
label_data <- data %>% group_by(id, Description) %>% summarize(tot=sum(-log10(qvalue)))
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.2) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
> label_data
# A tibble: 24 x 5
# Groups: id [24]
id Description tot hjust angle
<int> <chr> <dbl> <dbl> <dbl>
1 1 actin binding 8.17 0 78
2 2 actinin binding 3.38 0 63
3 3 alcohol binding 5.26 0 48.
4 4 alpha-actinin binding 1.62 0 33
5 5 amide binding 2.78 0 18
6 6 NA NA 0 3
7 7 amyloid-beta binding 3.16 0 -12
8 8 calmodulin binding 2.02 0 -27
9 9 carboxylic acid binding 3.64 0 -42.
10 10 carboxypeptidase activity 1.70 0 -57.
# ... with 14 more rows
# prepare a data frame for base lines 添加A-D分组
empty_bar <- 4
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - 1) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# > head(base_data)
# # A tibble: 4 x 4
# # Rowwise:
# group start end title
# <chr> <int> <dbl> <dbl>
# 1 A 1 5 3 #从第一注释块到第五个画线,标记A组
# 2 B 7 11 9
# 3 C 13 17 15
# 4 D 19 23 21
library(ggplot2)
library(viridis)
p <- ggplot(data,aes(x=as.factor(id),y=-log10(qvalue),fill=Cluster))+
# Add the stacked bar
geom_bar(stat = "identity",alpha=0.5)+
scale_fill_viridis(discrete=TRUE)+
theme_minimal()+
# 旋转
coord_polar(start = 0)+
theme_minimal()+
ylim(-5,10)+
theme(
#legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank()
#plot.margin = margin(0.5, -5, 0.5, 0.5, "cm")
) + coord_polar()+
# Add labels on top of each bar
geom_text(data=label_data, aes(x=id, y=tot+0.3, label=Description, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=3, angle= label_data$angle, inherit.aes = FALSE )+
# geom_vline(aes(xintercept = 24.1),color="grey",slope=24)+
# Add base line information
geom_segment(data=base_data, aes(x = start, y = -0.5, xend = end, yend = -0.5), colour = "black", alpha=0.6, size=0.8, inherit.aes = FALSE )+
geom_text(data=base_data, aes(x = title, y = -1, label=group), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)+
# Add text showing the value of each 100/75/50/25 lines
ggplot2::annotate("text", x = c(rep(max(data$id),5),24.5), y = c(0, 2,4,6,8,9), label = c("0", "2", "4","6","8","-log10(qvalue)") , color="grey", size=2 , angle=c(rep(0,5),6), fontface="bold", hjust=1)
p
ggsave(p,filename = 'Circular barplot.pdf',units = 'cm',width = 22,height = 15)
library(yyplot2) ggpar(mar=rep(6,4)) 也不行。开头的直方图有变化,但运用到整个弦图没变化
用par设置ggplot2参数?这个可以有!(guangchuangyu.github.io)
ggpar(mar=rep(6,4))
p <- ggplot(data,aes(x=as.factor(id),y=-log10(qvalue),fill=Cluster))+
# Add the stacked bar
geom_bar(stat = "identity",alpha=0.5)+
scale_fill_viridis(discrete=TRUE)
p
ggpar(mar=rep(1,4))
p <- ggplot(data,aes(x=as.factor(id),y=-log10(qvalue),fill=Cluster))+
# Add the stacked bar
geom_bar(stat = "identity",alpha=0.5)+
scale_fill_viridis(discrete=TRUE)
p
推荐这个绘图网站很棒,有代码有美图,内容非常全面