单细胞测序数据也可以做gsea,步骤跟用RNAseq的数据差不多,主要是要用到差异基因并且根据Fold change来排序。
library(msigdbr)
library(fgsea)
library(dplyr)
library(ggplot2)
选择自己数据的物种以及要做的GSEA的数据库类型
##查看物种的数据 msigdbr_show_species()
m_df<- msigdbr(species = "Homo sapiens", category = "H")
#将m_df的基因与通路取出并改成一个通路对应相应基因的格式
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)
这里选用seurat求差异基因,并将差异基因按显著性排序
#每一个细胞类型的GSEA按显著性进行降序排序
gesa_TvsC_allgenes<-FindMarkers(seurat.combined, ident.1 = "TCF7KO", ident.2 = "NC", verbose = FALSE,test.use ="roc",logfc.threshold = 0.01,only.pos =F)
gesa_TvsC_allgenes$gene<-rownames(gesa_TvsC_allgenes)
gsea_genes<-gesa_TvsC_allgenes %>%
arrange(desc(myAUC), desc(avg_diff)) %>%
dplyr::select(gene,avg_diff)
library(tibble)
ranks <- deframe(gsea_genes)
fgseaRes <- fgsea(pathways = fgsea_sets,
stats = ranks ,
minSize=5,
maxSize=500,
nperm=10000)
对结果进行整理
library(dplyr)
fgseaResTidy <- fgseaRes %>%
as_tibble() %>%
arrange(desc(NES))
fgseaResTidy %>%
dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>%
arrange(padj) %>%
head()
ggplot(fgseaResTidy %>% filter(padj < 0.5) %>% head(n= 15), aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill= NES < 0)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
plotEnrichment(fgsea_sets[["HALLMARK_APICAL_JUNCTION"]],
ranks) + labs(title="HALLMARK_APICAL_JUNCTION")