跟小洁老师学习GEO的第三天

原创

贝诺酯

发布于 2023-03-19 18:46:55

4680

差异分析可视化

rm(list = ls()) 
load(file = "step1output.Rdata")
load(file = "step4output.Rdata")
# 火山图
library(dplyr)
library(ggplot2)
dat  = distinct(deg,symbol,.keep_all = T)

p <- ggplot(data = dat, 
            aes(x = logFC, 
                y = -log10(P.Value))) +
  geom_point(alpha=0.4, size=3.5, 
             aes(color=change)) +
  scale_color_manual(values=c("blue", "grey","red"))+
  geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",linewidth=0.8) +
  geom_hline(yintercept = -log10(p_t),lty=4,col="black",linewidth=0.8) +
  theme_bw()#去掉灰色背景
p

for_label <- dat%>% 
  filter(symbol %in% c("HADHA","LRRFIP1"))

volcano_plot <- p +
  geom_point(size = 3, shape = 1, data = for_label) +
  ggrepel::geom_label_repel(
    aes(label = symbol),
    data = for_label,
    color="black"
  )
volcano_plot

差异基因热图

load(file = 'step2output.Rdata')
# 表达矩阵行名替换
exp = exp[dat$probe_id,]
rownames(exp) = dat$symbol
if(T){
  #取前10上调和前10下调
  library(dplyr)
  dat2 = dat %>%
    filter(change!="stable") %>% 
    arrange(logFC) 
  cg = c(head(dat2$symbol,10),
         tail(dat2$symbol,10))
}else{

全部差异基因

  cg = dat$symbol[dat$change !="stable"]
  length(cg)
}
n=exp[cg,]
dim(n)

差异基因热图

library(pheatmap)
annotation_col=data.frame(group=Group)
rownames(annotation_col)=colnames(n) 
heatmap_plot <- pheatmap(n,show_colnames =F#不展示行名,
                         scale = "row",
                         #cluster_cols = F, 
                         annotation_col=annotation_col,
                         breaks = seq(-3,3,length.out = 100)
) 
heatmap_plot

拼图

library(patchwork)
volcano_plot +heatmap_plot$gtable

感兴趣基因的相关性

library(corrplot)
g = sample(deg$symbol[1:500],10) # 这里是随机取样，注意换成自己感兴趣的基因
g

M = cor(t(exp[g,]))#计算列与列之间的相关性
pheatmap(M)

library(paletteer)
my_color = rev(paletteer_d("RColorBrewer::RdYlBu"))#选出一组颜色
my_color = colorRampPalette(my_color)(10)#切成十种颜色
corrplot(M, type="upper",
         method="pie",
         order="hclust", 
         col=my_color,
         tl.col="black", 
         tl.srt=45)
library(cowplot)
cor_plot <- recordPlot()

拼图

load("pca_plot.Rdata")
plot_grid(pca_plot,cor_plot,
          volcano_plot,heatmap_plot$gtable)
dev.off()

保存

pdf("deg.pdf")
plot_grid(pca_plot,cor_plot,
          volcano_plot,heatmap_plot$gtable)
dev.off()

如何确认自己的差异分析分组反了没有

a  = deg$symbol[1]
boxplot(exp[a,]~Group)
deg$logFC[1]

富集分析数据库

KEGG数据库

GO数据库

Y叔和clusterProfiler

富集分析：衡量每个通路里的基因在差异基因里是否足够多

GO 富集分析

rm(list = ls())  
load(file = 'step4output.Rdata')
library(clusterProfiler)
library(ggthemes)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(stringr)
library(enrichplot)



#(1)输入数据
gene_up = deg$ENTREZID[deg$change == 'up'] 
gene_down = deg$ENTREZID[deg$change == 'down'] 
gene_diff = c(gene_up,gene_down)

#(2)富集
#以下步骤耗时很长，设置了存在即跳过
f = paste0(gse_number,"_GO.Rdata")
if(!file.exists(f)){
  ego <- enrichGO(gene = gene_diff,
                  OrgDb= org.Hs.eg.db,
                  ont = "ALL",
                  readable = TRUE)
  ego_BP <- enrichGO(gene = gene_diff,
                  OrgDb= org.Hs.eg.db,
                  ont = "BP",
                  readable = TRUE)
  #ont参数：One of "BP", "MF", and "CC" subontologies, or "ALL" for all three.
  save(ego,ego_BP,file = f)
}
load(f)

#(3)可视化
#条带图
barplot(ego)
barplot(ego, split = "ONTOLOGY", font.size = 10, 
        showCategory = 5) + 
  facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y") 

#气泡图
dotplot(ego)

dotplot(ego, split = "ONTOLOGY", font.size = 10, 
        showCategory = 5) + 
  facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y") 

#(3)展示top通路的共同基因，要放大看。

#gl 用于设置下图的颜色
gl = deg$logFC
names(gl)=deg$ENTREZID
#Gene-Concept Network,要放大看
cnetplot(ego,
         #layout = "star",
         color.params = list(foldChange = gl),
         showCategory = 3)

# 2.KEGG pathway analysis----
#上调、下调、差异、所有基因
#（1）输入数据
gene_up = deg[deg$change == 'up','ENTREZID'] 
gene_down = deg[deg$change == 'down','ENTREZID'] 
gene_diff = c(gene_up,gene_down)

#（2）对上调/下调/所有差异基因进行富集分析
f2 = paste0(gse_number,"_KEGG.Rdata")
if(!file.exists(f2)){
  kk.up <- enrichKEGG(gene         = gene_up,
                      organism     = 'hsa')
  kk.down <- enrichKEGG(gene         =  gene_down,
                        organism     = 'hsa')
  kk.diff <- enrichKEGG(gene         = gene_diff,
                        organism     = 'hsa')
  save(kk.diff,kk.down,kk.up,file = f2)
}
load(f2)

#(3)看看富集到了吗？https://mp.weixin.qq.com/s/NglawJgVgrMJ0QfD-YRBQg
table(kk.diff@result$p.adjust<0.05)
table(kk.up@result$p.adjust<0.05)
table(kk.down@result$p.adjust<0.05)

#(4)双向图
# 富集分析所有图表默认都是用p.adjust,富集不到可以退而求其次用p值，在文中说明即可

source("kegg_plot_function.R")
g_kegg <- kegg_plot(kk.up,kk.down)
g_kegg
#g_kegg +scale_y_continuous(labels = c(2,0,2,4,6))

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。