手把手带你复现NC图表之Figure 4

生信技能树jimmy

发布于 2023-09-26 20:12:13

6170

复现文章信息：

文章题目：Single-cell analysis reveals prognostic fibroblast subpopulations linked to molecular and immunological subtypes of lung cancer 期刊：Nature Communications 日期：2023年1月31日 DOI: 10.1038/s41467-023-35832-6

复现图——Figure 4

轨迹推断识别与肺泡或外膜成纤维细胞向肌成纤维细胞转分化相关的共有基因模块

R包载入与数据准备

代码如下：

library(Seurat)
library(ggplot2)
library(WGCNA)
library(tidyverse)
library(ggpubr)
library(ggsci)
library(destiny)
library(metap)
library(pheatmap)
library(xlsx)
library(ggplot2)
library(ggfittext)
data_directory <- "H:\\文献复现\\6" 
source(paste0(data_directory, "0_NewFunctions.R"))

load(paste0(data_directory, "Trajectory Analysis results.Rdata"))
load(paste0(data_directory, "IntegratedFibs_Zenodo.Rdata"))
load(paste0(data_directory, "MxIHC_Zenodo.Rdata"))
REACTOME<-read.xlsx("41467_2023_35832_MOESM16_ESM.xlsx",
                   as.data.frame=T,
                   header=T)
#在文章中的Supplementary Data 13可以下载
Fibs.integrated <- AddMetaData(Fibs.integrated, DPT_output)
A.M_pT_res_stats2$Gene <- rownames(A.M_pT_res_stats2)
T.M_pT_res_stats2$Gene <- rownames(T.M_pT_res_stats2)

Figure 4A-C

探究从肺泡和外膜成纤维细胞亚群（在对照组织中富集）到肌成纤维细胞（在肿瘤组织中富集）的分化过程，对scRNA-seq数据集进行轨迹推断。结果表明外膜和肺泡成纤维细胞可以作为独立的祖细胞，肌成纤维细胞可以从这些祖细胞转分化

pA <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = Fibs_subpopulation)) +
  theme_pubr(base_size = 10) +
  theme(axis.text = element_blank(), legend.title = element_blank(), legend.position = "right") +
  geom_point(size = 0.1) +
  scale_color_manual(values = Fibs_col.palette) +
  guides(colour = guide_legend(override.aes = list(size = 2)))
#突出显示分配给每个扩散拟时序（DPT）分支的细胞
pB <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = factor(dpt_branch))) +
  theme_pubr(base_size = 10) +
  theme(axis.text = element_blank(), legend.position = "right") +
  geom_point(size = 0.1) +
  scale_color_brewer(name = "DPT branch") +
  guides(colour = guide_legend(override.aes = list(size = 2), title.position = "top", title.hjust =  0.5))
#对细胞进行拟时序排序，代表它们向肌成纤维细胞表型的相对进展
pC <- ggplot(DM_dim_data, aes(x = DC1, y = DC2, colour = All_pT)) +
  theme_pubr(base_size = 10) +
  theme(axis.text = element_blank(), legend.key.width = unit(5, "pt"),
        legend.position = "right", legend.key.height = unit(10, "pt")) +
  geom_point(size = 0.1) +
  scale_color_viridis_c(name = "Pseudotime") +
  guides(colour = guide_colourbar(title.position = "top", title.hjust =  0.5))

Fig_4ABC <- ggarrange(pA,NULL,pB,NULL,pC, ncol = 5, widths = c(1,0.1,1,0.1), align = "h",
                  labels = c("a", "", "b", "", "c"))

Fig_4ABC

Alveolar到Myo

names(A.M_pT_res)
datExpr_Full <- list()
for(i in names(A.M_pT_res)){
  datExpr_Full[[i]]<- t(A.M_pT_res[[i]]$pT_fit)
}
sig_genes <- as.character(na.omit(rownames(A.M_pT_res_stats2)[A.M_pT_res_stats2$meta_p_adj.BH < 1e-10]))
length(sig_genes)
datExpr_CTR.TUMOUR <- list()
for(i in names(datExpr_Full)[c(1:4,7,8)]){
  datExpr_CTR.TUMOUR[[i]] <- datExpr_Full[[i]][, sig_genes]
}
datExpr_CTR.TUMOUR_median <- 
  Reduce(pmedian, datExpr_CTR.TUMOUR)
fit <- hclust(as.dist(1-cor(datExpr_CTR.TUMOUR_median)), method="ward.D2")
plot(fit) 
groups <- cutree(fit, k=4)
pheatmap(t(datExpr_CTR.TUMOUR_median[, order(groups)]), scale = "row",
                   cluster_cols = F,
                   cluster_rows = F,
                   clustering_distance_rows = "correlation",
                   clustering_method = "ward.D2",
                   show_rownames = F,
                   annotation_row = data.frame(
                     row.names = sig_genes,
                     cluster = as.factor(groups)
                   ))
rowMatch <- match(rownames(A.M_pT_res_stats2) ,sig_genes)
A.M_pT_res_stats2$Cluster <- as.factor(groups)[rowMatch]
A.M_pT_res_stats2$Module <- factor(
  A.M_pT_res_stats2$Cluster,
  levels = c(1:4),
  labels = c("Progenitor","Differentiation","Transient","Proto.Differentiation")
)
levels(A.M_pT_res_stats2$Module)
A.M_pT_res_stats2$Module <- factor(A.M_pT_res_stats2$Module, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
gene_order <- factor(
  groups,
  levels = c(1:4),
  labels = c("Progenitor","Differentiation","Transient","Proto.Differentiation")
)
gene_order <- factor(gene_order, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
Mod_cols <- RColorBrewer::brewer.pal(5, "Set1")
names(Mod_cols) <- levels(A.M_pT_res_stats2$Module)

Figure 4D

热图显示DPT中肺泡成纤维细胞进展为肌成纤维细胞时差异表达的基因。用层次聚类法将这些基因分组到DPT表达谱定义的模块中

AM.PT_TUMOUR_Heatmap <- 
  pheatmap(t(datExpr_CTR.TUMOUR_median)[order(gene_order), ], scale = "row",
                     cluster_cols = F, cluster_rows = F,
                     breaks = seq(-3,3,length=101),
                     show_rownames = F, show_colnames = F,
                     legend = F,
                     annotation_legend = F,
                     fontsize = 7,
                     annotation_names_row = F,
                     annotation_names_col = F,
                     gaps_row = c(
                       sum(as.numeric(gene_order) == 1, na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:2), na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:3), na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:4), na.rm = T)
                     ),
                     annotation_row = data.frame(
                       row.names = A.M_pT_res_stats2$Gene[A.M_pT_res_stats2$Gene %in% sig_genes],
                       Module = A.M_pT_res_stats2$Module[A.M_pT_res_stats2$Gene %in% sig_genes]
                     ),
                     annotation_col = data.frame(
                       row.names = rownames(datExpr_CTR.TUMOUR_median),
                       Pseudotime = as.numeric(rownames(datExpr_CTR.TUMOUR_median))
                     ),
                     annotation_colors = list(
                       Pseudotime = viridis::viridis(3),
                       Module = Mod_cols
                     )
  )

AM.PT_TUMOUR_Heatmap

DC_plotting.df <-   data.frame(
  DM_dim_data,
  Fibs.integrated@meta.data
)
names(DC_plotting.df)
AM_pT_plot <-  
  DC_plotting.df %>%
  ggplot(aes(x = DC1, y = DC2, colour = A.M_pT)) +
  geom_point(size = 0.1) + 
  theme_void(base_size = 10) +
  theme(legend.position = c(0.1,1), legend.justification = c("left", "top"),
        legend.direction = "horizontal",
        legend.key.width = unit(8, "pt"), legend.key.height = unit(2, "pt")) +
  guides(colour = guide_colourbar(title.position="top", title.hjust = 0.5)) +
  scale_color_viridis_c(name = "DPT", na.value = "grey80") 
AM_pT_plot

Fig_4D <- ggarrange(
  ggarrange(NULL, AM_pT_plot, ncol = 2),
  AM.PT_TUMOUR_Heatmap$gtable,
  nrow = 2, heights = c(2.5,6.5)
)

Fig_4D

Adventitial到Myo

names(T.M_pT_res)
datExpr_Full <- list()
for(i in names(T.M_pT_res)){
  datExpr_Full[[i]]<- t(T.M_pT_res[[i]]$pT_fit)
}
sig_genes <- as.character(na.omit(rownames(T.M_pT_res_stats2)[T.M_pT_res_stats2$meta_p_adj.BH < 1e-10]))
length(sig_genes)
datExpr_CTR.TUMOUR <- list()
for(i in names(datExpr_Full)[c(1:4,7,8)]){
  datExpr_CTR.TUMOUR[[i]] <- datExpr_Full[[i]][, sig_genes]
}
datExpr_CTR.TUMOUR_median <- 
  Reduce(pmedian, datExpr_CTR.TUMOUR)
fit <- hclust(as.dist(1-cor(datExpr_CTR.TUMOUR_median)), method="ward.D2")
plot(fit) # display dendogram
groups <- cutree(fit, k=4)
pheatmap(t(datExpr_CTR.TUMOUR_median[, order(groups)]), scale = "row",
                   cluster_cols = F,
                   cluster_rows = F,
                   clustering_distance_rows = "correlation",
                   clustering_method = "ward.D2",
                   #cutree_rows = 4,
                   show_rownames = F,
                   annotation_row = data.frame(
                     row.names = sig_genes,
                     cluster = as.factor(groups)
                   ))
rowMatch <- match(rownames(T.M_pT_res_stats2) ,sig_genes)
T.M_pT_res_stats2$Cluster <- as.factor(groups)[rowMatch]
T.M_pT_res_stats2$Module <- factor(
  T.M_pT_res_stats2$Cluster,
  levels = c(1:4),
  labels = c("Proto.Differentiation","Progenitor","Transient","Differentiation")
)
levels(T.M_pT_res_stats2$Module)
T.M_pT_res_stats2$Module <- factor(T.M_pT_res_stats2$Module, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
gene_order <- factor(
  groups, levels = c(1:4),
  labels = c("Proto.Differentiation","Progenitor","Transient","Differentiation")
)
gene_order <- factor(gene_order, levels = c("Progenitor", "Transient", "Proto.Differentiation", "Differentiation"))
Mod_cols <- RColorBrewer::brewer.pal(5, "Set1")
names(Mod_cols) <- levels(T.M_pT_res_stats2$Module)
Heatmap_df <- t(datExpr_CTR.TUMOUR_median)[order(gene_order), ]
Heatmap_df <- Heatmap_df[!is.na(gene_order[order(gene_order)]), ]
dim(Heatmap_df)

Figure 4E

热图显示DPT中外膜成纤维细胞进展为肌成纤维细胞时差异表达的基因。用层次聚类法将这些基因分组到DPT表达谱定义的模块中

TM.PT_TUMOUR_Heatmap <- 
                     pheatmap(Heatmap_df, scale = "row",
                     cluster_cols = F, cluster_rows = F,
                     breaks = seq(-3,3,length=101),
                     show_rownames = F, show_colnames = F,
                     legend = F,
                     annotation_legend = F,
                     fontsize = 7,
                     annotation_names_row = F,
                     annotation_names_col = F,
                     gaps_row = c(
                       sum(as.numeric(gene_order) == 1, na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:2), na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:3), na.rm = T),
                       sum(as.numeric(gene_order) %in% c(1:4), na.rm = T)
                     ),
                     annotation_row = data.frame(
                       row.names = T.M_pT_res_stats2$Gene[T.M_pT_res_stats2$Gene %in% sig_genes],
                       Module = T.M_pT_res_stats2$Module[T.M_pT_res_stats2$Gene %in% sig_genes]
                     ),
                     annotation_col = data.frame(
                       row.names = rownames(datExpr_CTR.TUMOUR_median),
                       Pseudotime = as.numeric(rownames(datExpr_CTR.TUMOUR_median))
                     ),
                     annotation_colors = list(
                       Pseudotime = viridis::viridis(3),
                       Module = Mod_cols
                     )
  )
TM_pT_plot <-  
  DC_plotting.df %>%
  ggplot(aes(x = DC1, y = DC2, colour = T.M_pT)) +
  geom_point(size = 0.1) + 
  theme_void(base_size = 10) +
  theme(legend.position = c(0.1,1), legend.justification = c("left", "top"),
        legend.direction = "horizontal",
        legend.key.width = unit(8, "pt"), legend.key.height = unit(2, "pt")) +
  guides(colour = guide_colourbar(title.position="top", title.hjust = 0.5)) +
  scale_color_viridis_c(name = "DPT", na.value = "grey80") 
TM_pT_plot

Fig_4E <- ggarrange(
  ggarrange(NULL, TM_pT_plot, ncol = 2),
  TM.PT_TUMOUR_Heatmap$gtable,
  nrow = 2, heights = c(2.5,6.5)
)

Fig_4E

这鉴定了两条轨迹上的四个模块，代表转分化的不同阶段：祖细胞、早期激活、原分化和分化

Figure 4F

损失曲线图显示了由对照和肿瘤样本或仅对照组织样本组成的数据集中每个共有DPT模块的表达谱。共有模块由在肺泡成纤维细胞至肌成纤维细胞和外膜成纤维细胞至肌成纤维细胞轨迹中分配到相同簇的基因组成，列出了每个模块的10个代表性基因

Mod.Overlap_df <-  cbind(A.M_pT_res_stats2[,c("Gene", "Module", "meta_p_adj.BH")],
                         T.M_pT_res_stats2[,c("Gene", "Module", "meta_p_adj.BH")])
names(Mod.Overlap_df) <- c("GeneA", "Alv2M_Module", "Alv2M_adjP", "GeneB", "Adv2M_Module", "Adv2M_adjP")
Core.Mods <- Mod.Overlap_df[Mod.Overlap_df$Alv2M_Module == Mod.Overlap_df$Adv2M_Module, ]
Module.Genes_list <- list()
for(i in levels(Core.Mods$Alv2M_Module)) {
  Module.Genes_list[[i]] <- as.character(na.omit(Core.Mods$GeneA[Core.Mods$Alv2M_Module == i]))
}
names(Fibs.integrated@meta.data)
DefaultAssay(Fibs.integrated) <- "RNA"
Fibs.integrated <- 
  AddModuleScore(Fibs.integrated,
                 features = Module.Genes_list)
names(Fibs.integrated@meta.data)
names(Fibs.integrated@meta.data)[grep("^Cluster", names(Fibs.integrated@meta.data))] <-
  paste0("Core_", levels(T.M_pT_res_stats2$Module))
pT_df <- data.frame(
  Fibs.integrated@meta.data[, c("Sample.type", "All_pT", "dpt_branch", "Fibs_MajorClusters", "Dataset", "Sample.Subtype",
                                paste("Core", levels(T.M_pT_res_stats2$Module), sep = "_"))]
  
)
variables = paste("Core", levels(T.M_pT_res_stats2$Module), sep = "_")
pT_df_long <- reshape2::melt(pT_df[, c("All_pT", "Sample.type", "Dataset",variables)],
                             id.vars = c("All_pT","Sample.type", "Dataset"), na.rm = T)
levels(pT_df_long$variable)
pT_df_long$Dataset_ordered <- 
  factor(pT_df_long$Dataset, levels = levels(pT_df_long$Dataset) )
pT_df_long$Dataset_type <- 
  factor(pT_df_long$Dataset_ordered, levels = levels(pT_df_long$Dataset_ordered),
         labels = c("CTR & Tumour", "CTR Only", "CTR Only", rep("CTR & Tumour", 6)))
names(pT_df_long)
levels(pT_df_long$variable)
Mod_cols2 <- Mod_cols
names(Mod_cols2) <- levels(pT_df_long$variable)
pT_df_long$Mod.label <- factor(pT_df_long$variable,
                               labels = c("Progenitor", "Early-activation",
                                          "Proto-differentiation", "Differentiation"))
Fig_4F <- 
  pT_df_long[!is.na(pT_df_long$variable), ] %>%
  ggplot(aes(x = All_pT, y = value, colour = variable)) +
  facet_grid(Mod.label~Dataset_type, scales = "free_y") + 
  geom_smooth(method = "loess") +
  guides(colour = guide_legend(override.aes = list(size = 2))) +
  theme_bw(base_size = 15) +
  theme(legend.position = "none", panel.grid = element_blank(),
        strip.text = element_text(size = 10)) +
  xlab("Pseudotime") + ylab("Module Score") +
  scale_color_manual(values = Mod_cols2)

Fig_4F

Figure 4G

箱形图显示按组织类型和拟时序五分位数分组的细胞中共有DPT模块的表达

Fibs.integrated$All_pT_5cat <- factor(Hmisc::cut2(Fibs.integrated$All_pT, g = 5),
                                      labels = paste0("pT q", 1:5))

#Early−activation模块
test <- aggregate(x =Fibs.integrated@meta.data,
                  by =Core_Transient ~ All_pT_5cat + SampleID + Sample.type,
                  FUN = median)
Transient.boxplot <- 
  test %>%
  drop_na(All_pT_5cat) %>%
  ggplot(aes(x = Sample.type, y = Core_Transient, fill = Sample.type)) +
  theme_pubr(base_size = 15) +
  theme(axis.text.x = element_blank()) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
  facet_grid(~All_pT_5cat) +
  ylab("Early-activation Module\n(Normalised exprs)") +
  stat_compare_means(comparisons = list(c("Control", "Tumour")),
                     size = 2.5,
                     bracket.size = 0.1, tip.length = 0.01,
                     label.y = max(test$Core_Transient)+0.1,
                     #vjust = 0.5,
                     hide.ns = T)+
  expand_limits(y = 2)
#Proto−Differentation模块
test <- aggregate(x =Fibs.integrated@meta.data,
                  by =Core_Proto.Differentiation ~ All_pT_5cat + SampleID + Sample.type,
                  FUN = median)

PD.boxplot <- 
  test %>%
  drop_na(All_pT_5cat) %>%
  ggplot(aes(x = Sample.type, y = Core_Proto.Differentiation, fill = Sample.type)) +
  theme_pubr(base_size = 15) +
  theme(axis.text.x = element_blank()) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
  facet_grid(~All_pT_5cat) +
  ylab("Proto-Differentation Module\n(Normalised exprs)") +
  stat_compare_means(comparisons = list(c("Control", "Tumour")),
                     size = 2.5,
                     bracket.size = 0.1, tip.length = 0.01,
                     label.y = max(test$Core_Proto.Differentiation)+0.1,
                     #vjust = 0.5,
                     hide.ns = T)+
  expand_limits(y = 2)

#Differentation模块
test <- aggregate(x =Fibs.integrated@meta.data,
                  by =Core_Differentiation ~ All_pT_5cat + SampleID + Sample.type,
                  FUN = median)
Differentiation.boxplot <- 
  test %>%
  drop_na(All_pT_5cat) %>%
  ggplot(aes(x = Sample.type, y = Core_Differentiation, fill = Sample.type)) +
  theme_pubr(base_size = 15) +
  theme(axis.text.x = element_blank()) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 0.1) +
  facet_grid(~All_pT_5cat) +
  ylab("Differentation Module\n(Normalised exprs)") +
  stat_compare_means(comparisons = list(c("Control", "Tumour")),
                     size = 2.5,
                     bracket.size = 0.1, tip.length = 0.01,
                     label.y = max(test$Core_Differentiation)+0.1,
                     hide.ns = T)+
  expand_limits(y = 1.5)

Fig_4G <- 
  ggarrange(
    Transient.boxplot,
    PD.boxplot,
    Differentiation.boxplot,
    nrow = 3, common.legend = T, legend = "right")

Fig_4G

Figure 4H

条形图显示每个共识DPT模块的REACTOME途径富集结果

REACTOME<-REACTOME[c(5,1,18,49,59,58),]
REACTOME$"Pathway genes in Consensus Module (%)"<-(REACTOME$Coverage.pct)*100
REACTOME<-REACTOME[rev(rownames(REACTOME)),]
col<-rep(c("#984EA3","#4DAF4A","#377EB8"),each = 2)
names(col)<-REACTOME$Name
ggplot(REACTOME, aes(x=factor(Name,levels=REACTOME$Name), y=`Pathway genes in Consensus Module (%)`, 
                     fill = Name,label=paste0(REACTOME$Name," (adj.P=", signif(REACTOME$q.value.FDR.B.H, 3),") ")))+
  geom_bar(stat = "identity", position = "dodge",color = "black")+
  labs(x="REACTOME pathway", y="Pathway genes in Consensus Module (%)", fill="Module") +
  theme_bw() +
  theme(axis.text.y = element_blank())+
  coord_flip()+
  scale_fill_manual(values = col)+theme(legend.position = "none")+
  geom_bar_text(position="stack",
                place="left",
                color="white",
                min.size = 10,
                reflow = TRUE)+
  scale_y_continuous(expand = c(0,0),
                     limits = c(0,55))+ 
  scale_x_discrete(expand=c(0,0))

Figure 4I

线图显示了通过mxIHC的每个成纤维细胞亚群的配对肿瘤和相邻正常组织之间的平均HSPA1A表达水平

Fibs_HSPA1A.agg <- aggregate(
  MxIHC_Fibroblasts$HSPA1A,
  by = list(Sample = MxIHC_Fibroblasts$Slide.name,
            Subpop = MxIHC_Fibroblasts$Cell.type,
            TvN = MxIHC_Fibroblasts$Sample.Region.TvN,
            Subtype = MxIHC_Fibroblasts$Subtype,
            Inflam.Fibro = MxIHC_Fibroblasts$Control...Fibrosis.Inflammation,
            exclusion = MxIHC_Fibroblasts$Include.Normal.95pct),
  median)

Fig_4I <- 
  Fibs_HSPA1A.agg %>%
  drop_na(TvN) %>%
  filter(!Sample == "Drop-Seq Cohort 1") %>%
  ggplot(aes(x = TvN, y = x)) +
  theme_pubr(base_size = 15) +
  theme(legend.position = "none", axis.title.x = element_blank()) +
  facet_grid(~Subpop) +
  geom_point(size = 0.1, aes(colour = Subpop)) +
  geom_line(aes(group = Sample, colour = Subpop)) +
  scale_colour_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("HSPA1A\n(MxIHC Median per cell exprs)") +
  stat_compare_means(comparisons = list(c("Control", "Tumour")), paired = T,
                     size = 4,
                     method = "wilcox", label.y = 0.18, tip.length = 0.01)+
  expand_limits(y = 0.2)

Fig_4I

Figure 4

这些结果表明了两种成纤维细胞亚群(外膜和肺泡)都可以作为肌成纤维细胞的组织驻留祖细胞。这些数据还表明，无论祖细胞亚群如何，转分化过程都重要:其涉及炎症基因上调的短暂阶段，独立于肿瘤的相互作用关系;随后是涉及热休克反应信号的原始分化，通过与肿瘤的相互作用而增加;最终导致完全分化的肌成纤维细胞表型，其ECM组织和胶原形成能力增加，这在肿瘤相关刺激下显著增强。

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2023-06-09 21:04，如有侵权请联系 cloudcommunity@tencent.com 删除

figure