WGCNA + ssGSEA的组合分析

DoubleHelix

发布于 2023-09-06 10:08:22

4670

发布于 2023-09-06 10:08:22

文章被收录于专栏：生物信息云生物信息云

参考文章：

加权基因共表达网络分析（WGCNA）案例代码

诚意满满的WGCNA分析，不进来看看吗？

生物信息数据分析教程视频——16-单样本基因集富集分析(ssGSEA)用于肿瘤相关免疫细胞浸润水平评估

1.案例数据：

表达谱数据：行为基因，列为样本。

load("expressionData.Rdata")# wgcnadat
head(wgcnadat)[,1:4]

表型数据：行为表型特征，列为样本。这是我是基于ssGSEA计算的17种功能状态的ssGSEA分数。

load("TCGA-BLCA-normalize_turmor_ssGSEA_Score.Rdata")# norm_ssGSEA_Score
head(norm_ssGSEA_Score)[,1:4]

分析代码：

library(dplyr)
library(tidyr)
library(RColorBrewer)
library(reshape2)
library(ggplot2)
library(tibble)
library(ggpubr)
library(pheatmap)
library(ComplexHeatmap)
library(ggrepel)
library(WGCNA)
outdirp = "results"
ifelse(dir.exists(outdirp),"文件夹已经存在",
       dir.create(outdirp,recursive = T))

load("expressionData.Rdata")# wgcnadat
head(wgcnadat)[,1:4]
wgcnadat = as.data.frame(t(wgcnadat))
dim(wgcnadat)
sampleTree = hclust(dist(wgcnadat), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);

pdf(file = paste0(outdirp,"/01-Sample clustering.pdf"),
    width = 15,height = 10)
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)
dev.off()

message(paste0("=========WGCNA========="))
net = blockwiseModules(wgcnadat, power = 6,
                       TOMType = "unsigned", minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "femaleMouseTOM",
                       verbose = 3)

load("TCGA-BLCA-normalize_turmor_ssGSEA_Score.Rdata")# norm_ssGSEA_Score
head(norm_ssGSEA_Score)[,1:4]
Score = as.data.frame(t(norm_ssGSEA_Score))
#id = intersect(rownames(wgcnadat),rownames(Score))
Score = Score[rownames(wgcnadat),]

traitColors = numbers2colors(Score, signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
sampleTree2 = hclust(dist(wgcnadat), method = "average")

pdf(file = paste0(outdirp,"/03-Sample dendrogram and trait heatmap.pdf"),
    width = 15,height = 9)
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(Score),
                    dendroLabels = F,
                    main = "Sample dendrogram and trait heatmap")
dev.off()


# 定义基因和样本的数量
nGenes = ncol(wgcnadat);
nSamples = nrow(wgcnadat);
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(wgcnadat, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, Score, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix =  paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
###===============热图======
# par(mar = c(6, 8.5, 3, 3));
# # Display the correlation values within a heatmap plot
# pdf(file = paste0(outdirp,"/04-Module-trait relationships.pdf"),
#     width = 10,height = 8.5)
# labeledHeatmap(Matrix = moduleTraitCor,
#                xLabels = names(Score),
#                yLabels = names(MEs),
#                ySymbols = names(MEs),
#                colorLabels = FALSE,
#                colors = greenWhiteRed(50),
#                textMatrix = textMatrix,
#                setStdMargins = FALSE,
#                cex.text = 0.7,
#                zlim = c(-1,1),
#                main = paste("Module-trait relationships"))
# dev.off()
##===========上面图太丑，重新画
# group <- names(MEs)
split = 1:length(names(MEs))#对应group分组的个数
ha <- rowAnnotation(
  empty = anno_empty(border = FALSE),
  foo = anno_block(
    gp = gpar(fill = substring(names(MEs), 3))
  )
)
color.key<-c("#3300CC","#3399FF","white","#FF3333","#CC0000")
pdf(file = paste0(outdirp,"/04-Module-trait relationships.pdf"),
    width = 14,height = 6)
#colorRampPalette(color.key)(50)
Heatmap(
  matrix = as.matrix(moduleTraitCor),
  col= blueWhiteRed(50),
  name = " ",
  #column_split = split,
  row_split = split,
  left_annotation = ha,
  cluster_rows = F,
  column_title = NULL,
  row_title = NULL,
  column_names_rot = -45,
  cell_fun = function(j, i, x, y, width, height, fill) {
    grid.text(textMatrix[i, j],
              x, y,
              gp = gpar(fontsize = 10))}
)
dev.off()
# names (colors) of the modules

##计算表达值与MEs之间的相关性
modNames = substring(names(MEs), 3)#取出颜色名称字符串
geneModuleMembership = as.data.frame(cor(wgcnadat, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership),
                                          nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
dim(geneModuleMembership)
###计算表型与表达值之间的相关性
geneTraitSignificance = as.data.frame(cor(wgcnadat, Score, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),
                                          nSamples));
names(geneTraitSignificance) = paste("GS.", names(Score), sep="");
names(GSPvalue) = paste("p.GS.", names(Score), sep="");

# chooseOneHubInEachModule(wgcnadat,moduleColors)
# chooseTopHubInEachModule(wgcnadat,moduleColors)
###绘制相关性散点图
###
Trait = colnames(moduleTraitCor)
module = rownames(moduleTraitCor)
# Recalculate topological overlap
TOM = TOMsimilarityFromExpr(wgcnadat, power = 6)
# mod = module[1]
for(mod in module){
  mod_col = substring(mod, 3)
  inModule = (moduleColors== mod_col)
  moduleGenes = rownames(geneModuleMembership)[inModule]
  
  ## 也是提取指定模块的基因名
  # Select the corresponding Topological Overlap
  modTOM = TOM[inModule, inModule];
  dimnames(modTOM) = list(moduleGenes,moduleGenes)
  ## 模块对应的基因关系矩阵,输出数据可用于Cytoscape绘图
  cyt = exportNetworkToCytoscape(
    modTOM,
    edgeFile = paste0(outdirp,"/CytoscapeInput-edges-",mod, "0.1-threshold.txt"),
    nodeFile = paste0(outdirp,"/CytoscapeInput-nodes-",mod, "0.1-threshold.txt"),
    weighted = TRUE,
    threshold = 0.1,
    nodeNames = moduleGenes,
    nodeAttr = moduleColors[inModule]
  )
  #phe = Trait[7]
  ##筛选hub基因
  for(phe in Trait){
    if(abs(moduleTraitCor[mod,phe]) > 0.8){
      gs_names = colnames(geneTraitSignificance)
      df = data.frame(MM=geneModuleMembership[moduleGenes, paste0("MM",mod_col)],
                      GS=geneTraitSignificance[moduleGenes, grep(phe,gs_names)])
      rownames(df) = moduleGenes
      
      hubgene = moduleGenes[abs(df$MM)> 0.8 & abs(df$GS)> 0.6]
      write(hubgene,file = paste0(outdirp,"/",mod,"-",phe,"-MM(0.8)-GS(0.6)-hubgene.txt"))
      
      #hubgenenet = networkScreening(Score$Angiogenesis,MEs,wgcnadat)
      pdf(file = paste0(outdirp,"/06-MM and GS",mod,"-",phe, "-relationships.pdf"),
          width = 6,height = 6.5)
      verboseScatterplot(abs(geneModuleMembership[moduleGenes, paste0("MM",mod_col)]),
                         abs(geneTraitSignificance[moduleGenes, grep(phe,gs_names)]),
                         xlab = paste("Module Membership in",substring(mod, 3), "module"),
                         ylab = paste("Gene significance for",phe),
                         main = paste("Module membership vs. gene significance\n"),
                         cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = mod_col)
      dev.off()
    }
    
  }
  
}

WGCNA + ssGSEA的组合分析

WGCNA + ssGSEA的组合分析

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐