前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >文献数据复现——原发性和转移性肝细胞癌多细胞生态系统的单细胞图谱

文献数据复现——原发性和转移性肝细胞癌多细胞生态系统的单细胞图谱

作者头像
生信菜鸟团
发布2022-10-31 15:55:34
1.8K1
发布2022-10-31 15:55:34
举报
文章被收录于专栏:生信菜鸟团生信菜鸟团

这周推文是复现GSE149614数据集的文献中的图,先降维分群,然后对细胞亚群进行生物学命名后,继续看一下不同样本中免疫细胞和非免疫细胞的占比。

文献:A single-cell atlas of the multicellular ecosystem of primary and metastatic hepatocellular carcinoma.

文献背景:

文章中招募了 10 名患有原发性和/或转移性肿瘤的 HCC 患者, 不同肿瘤淋巴结转移(TNM)的代表阶段和肝炎病毒感染状态。在这些患者的四种相关组织类型中进行单细胞转录组测序,包括非肿瘤肝 (NTL)、原发性肿瘤 (PT)、门静脉癌栓 (PVTT) 和转移性淋巴结(MLN)组织。最终获得了 71,915 个单细胞的转录组数据,其中每个细胞平均检测到 1979 个基因。

数据集:GSE149614

数据集分组:

NTL non-tumor liver:非肿瘤肝脏 PT primary tumor:原发肿瘤 PVTT portalvein tumor thrombus:门静脉癌栓 MLN metastatic lymph node:转移淋巴结

要复现的图

与 NTL 相比,PT 显示出 T/NK 细胞的显著消耗和myeloid 细胞的富集,这与之前对 HCC 的研究一致。 此外,PVTT 和 MLN 组织表现出与 T 相似的主要细胞类型组成。

代码如下

step1: 导入QC后数据,降维分群

代码语言:javascript
复制
rm(list=ls())
options(stringsAsFactors = F) 
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)

dir.create("./2-harmony")
getwd()
setwd("./2-harmony")

# sce.all=readRDS("../1-QC/sce.all_qc.rds")
sce=sce.all.filt 
sce
sce <- NormalizeData(sce, 
                         normalization.method = "LogNormalize",
                         scale.factor = 1e4) 
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce, features = VariableFeatures(object = sce))

library(harmony)
seuratObj <- RunHarmony(sce, "orig.ident")
names(seuratObj@reductions)
seuratObj <- RunUMAP(seuratObj,  dims = 1:15, 
                     reduction = "harmony")
DimPlot(seuratObj,reduction = "umap",label=T ) 

sce=seuratObj
sce <- FindNeighbors(sce, reduction = "harmony",
                     dims = 1:15) 
sce.all=sce

#接下来分析,按照分辨率为0.5进行 
sel.clust = "RNA_snn_res.0.5"
sce.all <- SetIdent(sce.all, value = sel.clust)
table(sce.all@active.ident) 
saveRDS(sce.all, "sce.all_int_0.5.rds")
setwd('../')

step2:检查常见分群情况

代码语言:javascript
复制
rm(list=ls())
###### step5:检查常见分群情况  ######
getwd()
dir.create("./3-cell")
setwd("./3-cell")  
sce.all<-readRDS("../2-harmony/sce.all_int_0.5.rds")

pumap<-DimPlot(sce.all, reduction = "umap", group.by = "RNA_snn_res.0.5"
               ,label = T,label.box = T) 
ggsave('umap_by_RNA_snn_res.0.5.pdf')

#marker gene
metadata <- read.csv("../GSE146409-paper_marker.csv",
+                      header = T)
a<-metadata$gene
head(a)
#[1] "FXYD3"   "CLDN4"   "CEACAM6" "CEACAM5" "ELF"     "CLDN10" 
genes_to_check<-a
a
 #[1] "FXYD3"      "CLDN4"      "CEACAM6"    "CEACAM5"    "ELF"       
 #[6] "CLDN10"     "SLC22A10"   "FETUB"      "LBP"        "HPR"       
#[11] "LECT2"      "SERPINA10"  "CD5L"       "VCAM1"      "CETP"      
#[16] "LILRB5"     "MARCO"      "SDC3"       "TREM2"      "GPNMB"     
#[21] "CAPG"       "FCER1A"     "CD1C"       "CLEC10A"    "JAML"      
#[26] "S100A9"     "FCN1"       "S100A8"     "FGR"        "XCR1"      
#[31] "CLEC9A"     "IDO1"       "WDFY4"      "FLT3"       "CPNE3"     
#[36] "GZMA"       "CD3E"       "KLRB1"      "NKG7"       "CD7"       
#[41] "CCL5"       "IGLL5"      "FCRL5"      "TNFRSF17"   "DERL3"     
#[46] "JCHAIN"     "MZB1"       "CPE"        "SLCO2A1"    "CLEC14A"   
#[51] "TGM2"       "PODXL"      "VWA1"       "SOX18"      "PLVAP"     
#[56] "CD34"       "ICAM2"      "RELN"       "CLEC4M"     "CLEC1B"    
#[61] "CLEC4G"     "FCN2"       "OIT3"       "RERGL"      "AC006254.1"
#[66] "MYH11"      "ITGA8"      "PLN"        "ADIRF"      "OLFML2A"   
#[71] "NDUFA4L2"   "RGS5"       "TPPP3"      "PLXDC1"     "FRZB"      
#[76] "MMP11"      "CTHRC1"     "INHBA"      "HOPX"       "POSTN"     
#[81] "LTBP2"      "CXCL12"     "PTGDS"      "MASP1"      "FBLN1"     
#[86] "C7"         "HGF"        "TPX2"       "MKi67"      "UBE2C"     
#[91] "ASPM"       "TOP2A"      "RRM2"       "EPCAM"      "KRT19"     
#[96] "KRT18"      "PROM1"      "ALDH1A1"    "CD24"  
library(stringr)  
genes_to_check=str_to_upper(unique(genes_to_check))
genes_to_check
p_all_markers <- DotPlot(sce.all, features = genes_to_check,
                         assay='RNA')  + coord_flip()

p_all_markers
ggsave(plot=p_all_markers, filename="check_paper_zeng_marker_by_seurat_cluster0.5_2.pdf"
       ,width = 10,height = 14)

#拼图
library(patchwork)
p_all_markers+pumap
ggsave('markers_umap0.5.pdf',width = 18,height = 14)

step3: 细胞亚群生物学命名

代码语言:javascript
复制
  #细胞分群命名
  celltype=data.frame(ClusterID=0:20,
                      celltype= 0:20) 
  #定义细胞亚群  
  celltype[celltype$ClusterID %in% c(2,7,10,14,15,19),2]='Hepatocyte'
  celltype[celltype$ClusterID %in% c(4),2]='tumor'
  celltype[celltype$ClusterID %in% c(5),2]='cycling'
  celltype[celltype$ClusterID %in% c(0,13,16),2]='T/NK'
  
  celltype[celltype$ClusterID %in% c(1,3,11,17),2]='myeloid'
  celltype[celltype$ClusterID %in% c(9,12),2]='B cells'
  celltype[celltype$ClusterID %in% c(6,18,20),2]='Endo'
  celltype[celltype$ClusterID %in% c(8),2]='Fibo'
  
  head(celltype)
  celltype
  table(celltype$celltype)
  sce.all@meta.data$celltype = "NA"
  for(i in 1:nrow(celltype)){
    sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.0.5 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
  table(sce.all@meta.data$celltype)

DimPlot(sce.all, reduction = "umap", group.by = "celltype",
          #cols = color,
          pt.size = 1.5,
          label = T,label.box = T
  ) 
  ggsave('./celltype-umap.pdf',height = 6,width = 8) 
saveRDS(sce.all, "sce.all_int_celltype.rds")
setwd("../")

step4: 细胞比例

免疫细胞

代码语言:javascript
复制
# 1.加载R包 ----
rm(list=ls()) 
library(ggplot2) 
library(cowplot) 
library(paletteer)  
library(gplots)
library(ggpubr)    
library(ggsci) 
library(stringr)

# 2.设置工作路径 ----
#在meta信息中添加分组信息
getwd() 
dir.create("../4_group1")
setwd("../4_group1/")
sce=readRDS("../3-cell/sce_celltype_int_2.RDS")

table(sce@meta.data$group)
sce.all=sce[,sce$celltype%in%c("T/NK","B","Myeloid","cycling")]

sce=sce.all
# 4.可视化 ----
## 4.1 每种细胞类型中,分组所占比例 ----
library(tidyr)# 使用的gather & spread
library(reshape2) # 使用的函数 melt & dcast 
library(dplyr)
library(ggplot2)
tb=table(sce$celltype,
         sce$orig.ident)
balloonplot(tb)
head(tb)
bar_data <- as.data.frame(tb)
head(bar_data)

bar_per <- bar_data %>% 
  group_by(Var1) %>%
  mutate(sum(Freq)) %>%
  mutate(percent = Freq / `sum(Freq)`)
head(bar_per) 
write.csv(bar_per,file = "celltype_by_group_percent.csv")

#可视化
#celltypebygroup
bar_per$Var2

bar_per$Var2 <- factor(bar_per$Var2,levels=c('HCC01T','HCC02T','HCC03T',
                       'HCC04T','HCC05T','HCC06T','HCC07T','HCC08T','HCC09T',
                       'HCC10T','HCC03N','HCC04N','HCC05N','HCC06N',
                         'HCC07N','HCC08N','HCC09N','HCC10N',"HCC07P","HCC08P","HCC10L"),ordered = TRUE)

library(ggalluvial)
mycolor = c('#E64B3599',
            '#4DBBD599',
            '#00A08799',
            '#3C548899',
            '#F39B7F99')
ggplot(bar_per, aes(x =Var2, y= percent, fill = Var1,
                    stratum=Var1, alluvium=Var1)) + 
  # guides(color=guide_legend(title = "ABC"))+
  geom_col(width = 0.5, color='black')+
  geom_flow(width=0.5,alpha=0.4, knot.pos=0.5)+
  theme_classic() +
  labs(x='sample',y ='Ratio',title = "by group"
  )+
  coord_flip()+
  scale_fill_manual(values = mycolor)
ggsave("celltypebyorigent_percent_immune.pdf")

非免疫细胞

代码语言:javascript
复制
#同样的输入数据格式,提取非免疫细胞的子集
table(sce$celltype)
sce.all=sce[,sce$celltype%in%c("Endo","Hepatocyte","Fibo","tumor")]

sce=sce.all

#可视化
library(ggalluvial)
mycolor = c('#efb306',
            '#7db954',
            '#852f88',
            '#4e54ac',
            '#0f8096')
ggplot(bar_per, aes(x =Var2, y= percent, fill = Var1,
                    stratum=Var1, alluvium=Var1)) + 
  geom_col(width = 0.5, color='black')+
  geom_flow(width=0.5,alpha=0.4, knot.pos=0.5)+
  theme_classic() +
  labs(x='sample',y ='Ratio',title = "by group"
  )+
  coord_flip()+
  scale_fill_manual(values = mycolor)
ggsave("celltypebyorigent_percent_nonimmune.pdf")
本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2022-10-20,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 生信菜鸟团 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档