RNAseq下游分析--edgeR +cluserprofiler

生信编程日常

发布于 2020-04-01 15:36:46

9590

发布于 2020-04-01 15:36:46

文章被收录于专栏：生物信息学、python、R、linux生物信息学、python、R、linux

RNAseq分析可以用hisat2+samtools+stringtie得到表达矩阵，后面可以通过edgeR + clusterprofiler分析。

source("https://bioconductor.org/biocLite.R")#install
biocLite("edgeR")
library(edgeR)
library(org.Mm.eg.db)
library(clusterProfiler)
library(xlsx)

Mut_Wt为stringtie输入文件转化成的counts文件或者其他方式得到的要分析的data.frame,行为基因名，列为sample，基因表达为counts

Mut_Wt.df<-Mut_Wt#用于存储原始表达矩阵
#三个重复，1代表control，2代表treat
#1 设计分组
group_list <- factor(c(rep("1",3), rep("2",3)))
#2去掉低表达的基因
Mut_Wt <- Mut_Wt[rowSums(cpm(Mut_Wt) > 1) >= 2,]
Mut_Wt <- DGEList(counts = Mut_Wt, group = group_list)
Mut_Wt <- calcNormFactors(Mut_Wt)
#3 计算离散度
Mut_Wt <- estimateCommonDisp(Mut_Wt)
Mut_Wt <- estimateTagwiseDisp(Mut_Wt)
#4 得到差异基因，并分为显著性的上调和下调
Mut_Wt_et <- exactTest(Mut_Wt)
Mut_Wt_tTag <- topTags(Mut_Wt_et, n=nrow(Mut_Wt))
Mut_Wt_tTag <- as.data.frame(Mut_Wt_tTag)
Mut_Wt_tTag_count<-merge(Mut_Wt_tTag,Mut_Wt.df,by.x = 0,by.y = 0)
Mut_Wt_up<-subset(Mut_Wt_tTag_count,logFC>log2(1.5)&PValue<0.05)
Mut_Wt_up<-Mut_Wt_up[order(-Mut_Wt_up$logFC,-Mut_Wt_up$PValue),]
Mut_Wt_down<-subset(Mut_Wt_tTag_count,logFC< -log2(1.5)&PValue<0.05)
Mut_Wt_down<-Mut_Wt_down[order(-abs(Mut_Wt_down$logFC),-Mut_Wt_down$PValue),]

edgeR的结果可以直接做用clusterprofiler做goterm,用循环方便一些

library(xlsx)
library(ggplot2)
#go term of Mut vs WT
gotermlist<-list(Mut_Wt_up,Mut_Wt_down)
names(gotermlist)<-c('Mut_Wt_up','Mut_Wt_down')
for (i in 1:2){
  write.xlsx(gotermlist[[i]],"Mut_Wt.xlsx",sheetName = names(gotermlist)[i],append =T )
}

#############################################################################
for (i in 1:2){
  GeneSymbol<-as.character(gotermlist[[i]]$Row.names)
EN = bitr(GeneSymbol,fromType = "SYMBOL",toType = "ENTREZID",OrgDb = "org.Mm.eg.db")
#BP
goBP = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "BP", pvalueCutoff = 0.05, readable= TRUE)
goCC = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "CC", pvalueCutoff = 0.05, readable= TRUE)
goMF = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "MF", pvalueCutoff = 0.05, readable= TRUE)
#BP
png(paste0("picture/",names(gotermlist)[i],"_BP.png"),width = 800,height = 480)
print(clusterProfiler::dotplot(goBP,showCategory=15,title=paste0(names(gotermlist)[i],"_BP"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
dev.off()
#CC
png(paste0("picture/",names(gotermlist)[i],"_CC.png"),width = 800,height = 480)
print(clusterProfiler::dotplot(goCC,showCategory=15,title=paste0(names(gotermlist)[i],"_CC"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
dev.off()
#MF
png(paste0("picture/",names(gotermlist)[i],"_MF.png"),width = 800,height = 480)
print(clusterProfiler::dotplot(goMF,showCategory=15,title=paste0(names(gotermlist)[i],"_MF"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
dev.off()

#KEGG
KEGG = enrichKEGG(organism = "mmu",gene = as.vector(EN$ENTREZID))
KEGG = setReadable(KEGG, OrgDb = org.Mm.eg.db, keytype="SYMBOL")

png(paste0("picture/",names(gotermlist)[i],"_KEGG.png"),width = 800,height = 480)
print(clusterProfiler::dotplot(KEGG,showCategory=15,title=paste0(names(gotermlist)[i],"_KEGG"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
dev.off()


try({write.xlsx(goBP@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_BP"),append = T)
  write.xlsx(goCC@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_CC"),append = T)
  write.xlsx(goMF@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_MF"),append = T)
  write.xlsx(KEGG@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_KEGG"),append = T)
})
}

本文参与腾讯云自媒体分享计划，分享自作者个人站点/博客。

如有侵权请联系 cloudcommunity@tencent.com 删除

data