手把手带你复现NC图表之Figure 2

生信技能树jimmy

发布于 2023-09-26 20:04:10

35100

代码可运行

文章被收录于专栏：单细胞天地

运行总次数：0

代码可运行

复现文章信息：

文章题目：Single-cell analysis reveals prognostic fibroblast subpopulations linked to molecular and immunological subtypes of lung cancer 期刊：Nature Communications 日期：2023年1月31日 DOI: 10.1038/s41467-023-35832-6

复现图——Figure 2

通过整合7个scRNA-seq数据集研究NSCLC中的成纤维细胞异质性

非小细胞肺癌中存在的成纤维细胞与非癌性肺组织中确定的三个主要亚群一致，并且可能对ECM维持/重构进行差异调节。这些数据还表明，与NSCLC肿瘤的相互作用导致这些亚群中基因表达的显著变化，除了亚群特异性表型变化外，还持续涉及间质胶原的上调。此外，与对照肺成纤维细胞相比，非小细胞肺癌的myoCAF基因特征增加，而对照肺组织的iCAF基因特征增加。

R包载入与数据准备

library(Seurat)
library(WGCNA)
library(tidyverse)
library(ggpubr)
library(ggsci)
library(msigdbr)
library(GSVA)
library(limma)
library(readxl)
library(org.Hs.eg.db)
library(hpar)
data_directory <- "H:\\文献复现\\6\\" 
source(paste0(data_directory, "0_NewFunctions.R"))

load(paste0(data_directory, "IntegratedFibs_Zenodo.Rdata"))
load(paste0(data_directory, "HCLA_annotation.Rdata"))
Fibroblast_GeneSets_v2_2021_0623 <- read_excel(paste0(data_directory, "Fibroblast GeneSets v2 2021_0623.xlsx"))
#https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-023-35832-6/MediaObjects/41467_2023_35832_MOESM21_ESM.xlsx
Figure2i <- read_excel(paste0(data_directory, "Figure 2i.xlsx"))
Figure2j <- read_excel(paste0(data_directory, "Figure 2j.xlsx"))

成纤维细胞聚类

作者使用来自NSCLC和对照组织样本的另外6个scRNA-seq数据集重复了成纤维细胞分选（不包括壁细胞）的过程。生成了一个包含9673个成纤维细胞的数据集（5183个细胞来自39个对照组样本，3440个来自46个LUAD样本，654个来自16个LUSC样本）。通过无监督聚类识别的三个主要亚群，并且鉴定的三个簇与先前在对照肺组织中描述的成纤维细胞亚群一致（外膜、肺泡和肌成纤维细胞）。


DefaultAssay(Fibs.integrated) <- "integrated"
Fibs.integrated <- ScaleData(Fibs.integrated)
Fibs.integrated <- RunPCA(Fibs.integrated)
ElbowPlot(Fibs.integrated, ndims = 50)
PCs2use <- c(1:30)
Fibs.integrated <- RunUMAP(Fibs.integrated, dims = PCs2use)
Fibs.integrated <- FindNeighbors(Fibs.integrated, dims = PCs2use, verbose = FALSE)
Fibs.integrated <- AddMetaData(Fibs.integrated, as.data.frame(Fibs.integrated@reductions$umap@cell.embeddings))
Fibs.integrated <-  FindClusters(Fibs.integrated, resolution = 0.1, verbose = FALSE)
Fibs.integrated$Fibs_MajorClusters <- factor(Fibs.integrated$seurat_clusters, levels = c(2,1,0), labels = c("Adventitial", "Alveolar", "Myo"))

在7套数据中作者为了整合和纠正数据集之间的批效应，使用典型相关分析并且使用共享最近邻模块化优化算法执行无监督聚类。为了确定最具生物学信息量的聚类解决方案，改变分辨率参数，迭代增加识别的聚类数量，并检查每个聚类识别的标记基因数量(样本水平平均log foldchange>1,adj.P<0.01，至少有50%的样本表示)。这表明，三个主要的集群被一致地确定，而更高分辨率的聚类导致鉴定集群没有或很少的标记基因符合这些标准

Figure 2B

Fig_2B <- 
  DimPlot(Fibs.integrated, group.by = "Fibs_MajorClusters", pt.size = 1.2) &
  theme_pubr(base_size = 15) &
  scale_colour_manual(values = Fibs_col.palette) &
  theme(legend.position = c(0,0),
        legend.justification = c(0,0),
        legend.background = element_blank(),
        plot.title = element_blank(),
        legend.key.size = unit(7,"pt")
  )
Fig_2B

成纤维细胞标志物分析

首先计算每个亚群(上皮细胞、肺泡细胞和肌成纤维细胞)的样品水平基因表达谱（单个细胞的平均值），然后进行差异基因表达分析

Fibs.integrated <- SetIdent(Fibs.integrated, value = "Fibs_MajorClusters")
All.Markers <- FindAllMarkers(Fibs.integrated, test.use = "bimod", assay = "RNA", only.pos = T)
All.Markers$ENSEMBL <- mapIds(org.Hs.eg.db, keys = All.Markers$gene, keytype = "SYMBOL", column="ENSEMBL")
test <- hpar::getHpa(All.Markers$ENSEMBL)
table(test$Reliability)
test <- test[!duplicated(test$Gene.name), ]
rownames(test) <- test$Gene.name
All.Markers$HPA_reliability <- test[All.Markers$gene, "Reliability"]
test <- hpar::getHpa(All.Markers$ENSEMBL, hpadata = "hpaSubcellularLoc")
test <- test[!duplicated(test$Gene.name), ]
rownames(test) <- test$Gene.name
test$membrane <- 1:nrow(test) %in% grep("membrane", test$GO.id, fixed = T)
test$cytosol <- 1:nrow(test) %in% grep("Cytosol", test$GO.id, fixed = T)
test$filaments <- 1:nrow(test) %in% grep("filaments", test$GO.id, fixed = T)
test$nuclear <- 1:nrow(test) %in% grep("Nucle", test$GO.id, fixed = T)
All.Markers$subcell.loc <- test[All.Markers$gene,
                                c("membrane","cytosol","filaments","nuclear")]
All.Markers$pct.diff <- All.Markers$pct.1 - All.Markers$pct.2
All.Markers$Detected.Intracellular <- rowSums(All.Markers$subcell.loc) > 0
#样本层面成纤维细胞标记物分析
Fibs.integrated <- SetIdent(Fibs.integrated, value = "Fibs_MajorClusters")
Fibs.integrated_samples <- AverageExpression(Fibs.integrated, assays = "RNA", return.seurat = T, 
                                             group.by = c("Fibs_MajorClusters", "SampleID2"),
                                             slot = "data")
Fibs.integrated_samples <- NormalizeData(Fibs.integrated_samples)
dim(Fibs.integrated_samples)
#从样本名称中提取元数据
Fibs.integrated_samples@meta.data <- 
  Fibs.integrated_samples@meta.data %>%
  mutate(Fibs_MajorClusters = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,1],
         Dataset = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,2],
         SampleID = str_split_fixed(colnames(Fibs.integrated_samples), "_", 3)[,3])
#整理样本和样本差异分析结果
Fibs.Sample_MetaData <- Fibs.integrated_samples@meta.data[,"SampleID", drop = F]
matchSamples <- match(Fibs.Sample_MetaData$SampleID, Sample_MetaData$SampleID)
table(is.na(matchSamples))
Fibs.Sample_MetaData <- cbind(Fibs.Sample_MetaData, Sample_MetaData[matchSamples, ])
keepCols_metaData <- c("SampleID", "nCount_RNA", "nFeature_RNA",
                       "Fibs_MajorClusters", "Dataset")
Fibs.integrated_samples@meta.data <- Fibs.integrated_samples@meta.data[, keepCols_metaData]
Fibs.integrated_samples <- AddMetaData(Fibs.integrated_samples, Fibs.Sample_MetaData)
Fibs.integrated_samples$Sample.type[Fibs.integrated_samples$Sample.type == "Tumor"] <- "Tumour"
Fibs.integrated_samples$pN <- droplevels(Fibs.integrated_samples$pN, exclude = c(NA, "NA"))
levels(Fibs.integrated_samples$pN)
Fibs.integrated_samples <- SetIdent(Fibs.integrated_samples, value = "Fibs_MajorClusters")
Sample.level_Markers <- FindAllMarkers(Fibs.integrated_samples, test.use = "wilcox", assay = "RNA",
                                       only.pos = F, return.thresh = 1, min.pct = 0.25, logfc.threshold = 0)
Marker.match <- match(paste(All.Markers$cluster, All.Markers$gene),
                      paste(Sample.level_Markers$cluster, Sample.level_Markers$gene))
All.Markers$p_val_adj.Sample <- Sample.level_Markers$p_val_adj[Marker.match]
All.Markers$log2FC.Sample <- Sample.level_Markers$avg_log2FC[Marker.match]

Figure 2C

显示每个亚群10个最显著标志物的样品水平表达（单细胞平均值）的热图

All.Markers$cluster <- factor(as.character(All.Markers$cluster), levels = c("Adventitial", "Alveolar", "Myo"))
All.Markers %>%
  group_by(cluster) %>%
  filter(avg_log2FC > 0) %>%
  top_n(w = -p_val_adj.Sample, n = 10) -> top10_markers
DefaultAssay(Fibs.integrated_samples) <- "RNA"
Fibs.integrated_samples <- ScaleData(Fibs.integrated_samples, features = top10_markers$gene, assay = "RNA")
levels(Fibs.integrated_samples$Fibs_MajorClusters)
Fibs.integrated_samples$Fibs_MajorClusters <- factor(Fibs.integrated_samples$Fibs_MajorClusters,
  levels = c("Adventitial", "Alveolar", "Myo")
)
Fig_2C <-
  DoHeatmap(Fibs.integrated_samples, features = rev(top10_markers$gene),
            assay = "RNA",
            angle = 45,
            group.by = c("Fibs_MajorClusters") ,
            group.colors = Fibs_col.palette,
            label = T, disp.max = 2) &
  theme(axis.text.y = element_text(face = "italic", size = 10),legend.position = "none") 
Fig_2C

通路分析

使用REACTOME通路进行GSVA，以及使用先前描述的成纤维细胞亚群的基因标记进行GSVA

h_gene_sets = msigdbr(species = "human", category = "C2", subcategory = "CP:REACTOME")
Reactome_pathwaysH = split(x = h_gene_sets$human_gene_symbol, f = h_gene_sets$gs_name)
REACTOME_gsva_res <- gsva(as.matrix(Fibs.integrated_samples@assays$RNA@data), gset.idx.list = Reactome_pathwaysH, method="gsva",
                          min.sz=10, max.sz=500)
#通过经验Bayes进行差异分析
f <- factor(Fibs.integrated_samples$Fibs_MajorClusters, levels = c("Myo","Alveolar", "Adventitial"))
design <- model.matrix(~0+f)
colnames(design) <- c("Myo", "Alveolar", "Adventitial")
REACTOME_gsva.fit <- lmFit(REACTOME_gsva_res, design)
REACTOME_gsva.fit <- eBayes(REACTOME_gsva.fit)
contrast.matrix <- makeContrasts(Myo.v.ALL = Myo-(Alveolar+Adventitial)/2,
                                 Alv.v.ALL = Alveolar-(Myo+Adventitial)/2,
                                 Adv.v.ALL = Adventitial-(Myo+Alveolar)/2,
                                 Myo.v.Alv = Myo - Alveolar,
                                 Myo.v.Adv = Myo-Adventitial,
                                 Alv.v.Adv = Alveolar-Adventitial,
                                 levels=design)
REACTOME_gsva.fit2 <- contrasts.fit(REACTOME_gsva.fit, contrast.matrix)
REACTOME_gsva.fit2 <- eBayes(REACTOME_gsva.fit2)
res2 <- decideTests(REACTOME_gsva.fit2, p.value=0.01, method = "global")
summary(res2)
REACTOME_tt.res <- list(
  Myo = topTable(REACTOME_gsva.fit2, coef = "Myo.v.ALL", n=1270),
  Alveolar = topTable(REACTOME_gsva.fit2, coef = "Alv.v.ALL", n=1270),
  Adventitial = topTable(REACTOME_gsva.fit2, coef = "Adv.v.ALL", n=1270)
)
REACTOME_tt.res_df <- do.call(rbind, REACTOME_tt.res)
REACTOME_tt.res_df$Cluster <- do.call(rbind, strsplit(rownames(REACTOME_tt.res_df), ".", 2))[,1]
REACTOME_tt.res_df$Pathway <- do.call(rbind, strsplit(rownames(REACTOME_tt.res_df), ".", 2))[,2]
REACTOME_tt.res_df$Pathway.label <- gsub("REACTOME_", "", REACTOME_tt.res_df$Pathway)
REACTOME_tt.res_df$Pathway.label <- gsub("_", " ", REACTOME_tt.res_df$Pathway.label)
REACTOME_tt.res_subset <- REACTOME_tt.res_df[REACTOME_tt.res_df$logFC > 0, ] %>%
  group_by(Cluster) %>%
  top_n(wt = -adj.P.Val, n = 5)
REACTOME_tt.res_subset <- REACTOME_tt.res_subset[-10, ]
REACTOME_tt.res_subset <- rbind(REACTOME_tt.res_subset, REACTOME_tt.res_df["Alveolar.REACTOME_TRP_CHANNELS", ])

Figure 2D

条形图显示每个亚群中最显著上调的REACTOME通路的log2倍数变化

Fig_2D <- 
  REACTOME_tt.res_subset %>%
  ggplot(aes(x = reorder(Pathway.label, logFC), y = logFC, label =  paste0("adj.P=", signif(adj.P.Val, 3)),
             fill = Cluster)) +
  theme_pubr(base_size = 15)+
  theme(axis.title.y = element_blank(),legend.title = element_blank(),
        axis.text.y = element_text(size = 7),
        legend.position = "top", strip.text = element_blank(), legend.key.size = unit(10, "pt"))+
  geom_bar(stat = "identity", alpha = 0.75)+
  facet_grid(Cluster~., scales = "free", space = "free") +
  geom_text(hjust = 1.1, colour = "white", size = 3) +
  scale_fill_manual(values = Fibs_col.palette) +
  coord_flip()
Fig_2D

基质体基因分析

检测每个成纤维细胞亚群是否上调了与特定基质体类别（基底膜、间质胶原、ECM糖蛋白和蛋白聚糖），结果表明肌成纤维细胞上调了每个基质体类别中的多个基因，包括大多数上调的间质胶原

CGP_gene_sets = msigdbr(species = "human", category = "C2")
CGP_pathwaysH = split(x = CGP_gene_sets$human_gene_symbol, f = CGP_gene_sets$gs_name)
All.Markers$NABA_Matrisome <- All.Markers$gene %in% CGP_pathwaysH$NABA_MATRISOME
All.Markers$NABA_BASEMENT_MEMBRANES <- All.Markers$gene %in% CGP_pathwaysH$NABA_BASEMENT_MEMBRANES
All.Markers$NABA_COLLAGENS <- All.Markers$gene %in% CGP_pathwaysH$NABA_COLLAGENS
All.Markers$NABA_ECM_GLYCOPROTEINS <- All.Markers$gene %in% CGP_pathwaysH$NABA_ECM_GLYCOPROTEINS
All.Markers$NABA_PROTEOGLYCANS <- All.Markers$gene %in% CGP_pathwaysH$NABA_PROTEOGLYCANS
names(All.Markers)
All.Markers$type <- factor(
  apply(All.Markers[,c("NABA_BASEMENT_MEMBRANES","NABA_COLLAGENS","NABA_ECM_GLYCOPROTEINS","NABA_PROTEOGLYCANS")],1, which.max),
                           levels = 1:5,
                           labels = c("Basement.Membranes", "Interstitial.Collagens",
                                      "ECM.Glycoproteins", "Proteoglycans", "Other")
)
All.Markers$type[All.Markers$type == "Basement.Membranes" &
                   All.Markers$NABA_BASEMENT_MEMBRANES == F] <- "Other"

Figure 2E

条形图显示每个亚群差异表达的不同基质体组分的比例

Fig_2E <- 
  All.Markers[!All.Markers$type == "Other" &
                All.Markers$p_val_adj.Sample < 0.01, ] %>%
  ggplot(aes(x = type, fill = cluster)) +
  theme_pubr(base_size = 15) +
  theme(legend.position = "top", legend.key.size = unit(20,"pt"), legend.title = element_blank()) +
  geom_bar(position = "fill") +
  scale_fill_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("Proportion of DEGs") +
  xlab("Matrisome Category")
Fig_2E

Figure 2F-G

#计算每个基质体类别的模块评分，并比较了成纤维细胞亚群之间的总体表达）。
Fibs.integrated_samples <- AddModuleScore(
  Fibs.integrated_samples,
  features = list(
    CGP_pathwaysH$NABA_BASEMENT_MEMBRANES,
    CGP_pathwaysH$NABA_COLLAGENS,
    CGP_pathwaysH$NABA_ECM_GLYCOPROTEINS,
    CGP_pathwaysH$NABA_PROTEOGLYCANS,
    CGP_pathwaysH$NABA_COLLAGENS[!CGP_pathwaysH$NABA_COLLAGENS %in% CGP_pathwaysH$NABA_BASEMENT_MEMBRANES]
  ),
  assay = "RNA"
)
names(Fibs.integrated_samples@meta.data)[grep("^Cluster", names(Fibs.integrated_samples@meta.data))] <-
  c("Basement.Membranes", "Collagens", "ECM.Glycoproteins", "Proteoglycans", "Interstitial.Collagens")
comparisons <- list(c("Myo", "Adventitial"), c("Myo", "Alveolar"), c("Alveolar", "Adventitial"))
#箱形图显示编码每个亚群基底膜组分的基因的样品水平表达
pA <- 
  Fibs.integrated_samples@meta.data %>%
  ggplot(aes(x = Fibs_MajorClusters, y = Basement.Membranes,
             fill = Fibs_MajorClusters)) +
  theme_pubr(base_size = 15) +
  theme(legend.position = "right", axis.title.x = element_blank()) +
  geom_boxplot(outlier.shape = NA, alpha = 0.75) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(comparisons = list(c("Myo", "Adventitial"), c("Alveolar", "Adventitial")),
                     size = 5,
                     hide.ns = T,label.y = c(0.65,0.8)) +
  scale_fill_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("Basement Membranes") +
  guides(fill = "none") +  expand_limits(y = 0.9)
#箱形图显示了每个亚群间质胶原编码基因的样品水平表达
pB <- 
  Fibs.integrated_samples@meta.data %>%
  ggplot(aes(x = Fibs_MajorClusters, y = Interstitial.Collagens,
             fill = Fibs_MajorClusters)) +
  theme_pubr(base_size = 15) +
  theme(legend.position = "right", axis.title.x = element_blank()) +
  geom_boxplot(outlier.shape = NA, alpha = 0.75) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(comparisons = list(c("Myo", "Adventitial"), c("Myo", "Alveolar")),
                     size = 5,
                     hide.ns = T, label.y = c(0.9,1.1)) +
  scale_fill_manual(values = Fibs_col.palette) +
  ylab("Interstitial Collagens") +
  rotate_x_text(angle = 45) +
  guides(fill = "none") + expand_limits(y = 1.25)
Fig_2FG <- ggarrange(pA,pB, ncol = 2, labels = c("f", "g"))
Fig_2FG
#这表明与外膜和肺泡成纤维细胞相比，肌成纤维细胞显著上调了间质胶原;而与外膜成纤维细胞相比，肺泡和肌成纤维细胞的基底膜基因表达均显著增加）。

Figure 2H

箱形图显示了按组织类型划分的每个亚群间质胶原编码基因的样本水平表达

comparisons <- list(c("Control", "Tumour"))
Fig_2H <- 
  Fibs.integrated_samples@meta.data %>%
  drop_na(Sample.type) %>%
  ggplot(aes(x = Sample.type, y = Interstitial.Collagens)) +
  theme_pubr(base_size = 15) +
  facet_wrap(~Fibs_MajorClusters) +
  geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(comparisons = comparisons,
                     size = 5,
                     label.y = 0.9) +
  scale_fill_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("Interstitial Collagens") +
  guides(fill = "none") +
  theme(axis.title.x = element_blank(),
        legend.position = "none") + expand_limits(y = 1.1)
Fig_2H

在三个亚群的肿瘤样品中间质胶原表达显著增加

Figure 2I

箱形图显示按组织类型划分的每个亚群编码myoCAF标志物的基因的样本水平表达

name<-colnames(Figure2i)
name[3]<-"myoCAF"
colnames(Figure2i)<-name
Figure2i<-cbind(Fibs.integrated_samples@meta.data,Figure2i[,c(1,3)])
comparisons <- list(c("Control", "Tumour"))
Fig_2I <- 
  Figure2i %>%
  drop_na(Sample.type) %>%
  ggplot(aes(x = Sample.type, y = myoCAF signature (relative expression))) +
  theme_pubr(base_size = 5) +
  facet_wrap(~Fibs_MajorClusters) +
  geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 1) +
  stat_compare_means(comparisons = comparisons,
                     size = 2.5,
                     label.y = 0.9) +
  scale_fill_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("myoCAF Signature") +
  guides(fill = "none") +
  theme(axis.title.x = element_blank(),
        legend.position = "none") + expand_limits(y = 1.1)
Fig_2I

myoCAF基因标签在肿瘤样品中显著上调

Figure 2J

箱形图显示了按组织类型划分的各亚群中编码iCAF标志物的基因的样本水平表达

name<-colnames(Figure2j)
name[3]<-"iCAF"
colnames(Figure2j)<-name
Figure2j<-cbind(Fibs.integrated_samples@meta.data,Figure2j[,c(1,3)])
comparisons <- list(c("Control", "Tumour"))
Fig_2J <- 
  Figure2j %>%
  drop_na(Sample.type) %>%
  ggplot(aes(x = Sample.type, y = iCAF)) +
  theme_pubr(base_size = 15) +
  facet_wrap(~Fibs_MajorClusters) +
  geom_boxplot(aes(fill = Fibs_MajorClusters), outlier.shape = NA, alpha = 0.75) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  stat_compare_means(comparisons = comparisons,
                     size = 5,
                     label.y = 0.9) +
  scale_fill_manual(values = Fibs_col.palette) +
  rotate_x_text(angle = 45) +
  ylab("Interstitial Collagens") +
  guides(fill = "none") +
  theme(axis.title.x = element_blank(),
        legend.position = "none") + expand_limits(y = 1.1)
Fig_2J