############################################################
############################################################
富集的统计学基础是超几何分布,简单来说根据下面的Fisher精确检验(Fisher exact test)公式,对每个GO或KEGG term计算一个p值
p=(M/K)(N-M)/(n-k)/(N/n),其中
N:所有gene总数
n:N中差异表达gene的总数
M:N中属于某个GO term的gene个数
k: n中属于某个GO term的gene个数
p:表示差异表达gene富集到这个GO term上的可信程度
得到的差异表达基因列表就可以,也就是说不需要其他的值
只能说实在是太多太多了。。。。但是用的时候要小心,因为你多用几个工具,即使设定同样的p值也会发现结果有出入,有时还差异挺大。
##########################################################
#enrichment analysis using clusterprofiler package created by yuguangchuang
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
#get the ENTREZID for the next analysis
setwd("F:/rna_seq/data/matrix")
sig.gene<-read.csv(file="DEG_treat_vs_control.csv")
head(sig.gene)
gene<-sig.gene[,1]
head(gene)
gene.df<-bitr(gene, fromType = "ENSEMBL",
toType = c("SYMBOL","ENTREZID"),
OrgDb = org.Mm.eg.db)
head(gene.df)
GO富集分析:
#Go classification
#Go enrichment
ego_cc<-enrichGO(gene = gene.df$ENSEMBL,
OrgDb = org.Mm.eg.db,
keyType = 'ENSEMBL',
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_bp<-enrichGO(gene = gene.df$ENSEMBL,
OrgDb = org.Mm.eg.db,
keyType = 'ENSEMBL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
barplot(ego_bp,showCategory = 18,title="The GO_BP enrichment analysis of all DEGs ")+
scale_size(range=c(2, 12))+
scale_x_discrete(labels=function(ego_bp) str_wrap(ego_bp,width = 25))
gobp.jpeg
#KEGG enrichment
install.packages("stringr")
library(stringr)
kk<-enrichKEGG(gene =gene.df$ENTREZID,
organism = 'mmu',
pvalueCutoff = 0.05)
kk[1:30]
barplot(kk,showCategory = 25, title="The KEGG enrichment analysis of all DEGs")+
scale_size(range=c(2, 12))+
scale_x_discrete(labels=function(kk) str_wrap(kk,width = 25))
dotplot(kk,showCategory = 25, title="The KEGG enrichment analysis of all DEGs")+
scale_size(range=c(2, 12))+
scale_x_discrete(labels=function(kk) str_wrap(kk,width = 25))
kegg.jpeg
keggdot.jpeg
# Gene Set Enrichment Analysis(GSEA)
# 获取按照log2FC大小来排序的基因列表
genelist <- sig.gene$log2FoldChange
names(genelist) <- sig.gene[,1]
genelist <- sort(genelist, decreasing = TRUE)
# GSEA分析
gsemf <- gseGO(genelist,
OrgDb = org.Mm.eg.db,
keyType = "ENSEMBL",
ont="BP"
)
# 查看信息
head(gsemf)
# 画出GSEA图
gseaplot(gsemf, geneSetID="GO:0001819")
gsea.jpeg
参考Y叔的包说明,里面写的特别详细
还有lxmic的简书