这周推文是复现GSE149614数据集的文献中的图,先降维分群,然后对细胞亚群进行生物学命名后,继续看一下不同样本中免疫细胞和非免疫细胞的占比。
文献:A single-cell atlas of the multicellular ecosystem of primary and metastatic hepatocellular carcinoma.
文献背景:
文章中招募了 10 名患有原发性和/或转移性肿瘤的 HCC 患者, 不同肿瘤淋巴结转移(TNM)的代表阶段和肝炎病毒感染状态。在这些患者的四种相关组织类型中进行单细胞转录组测序,包括非肿瘤肝 (NTL)、原发性肿瘤 (PT)、门静脉癌栓 (PVTT) 和转移性淋巴结(MLN)组织。最终获得了 71,915 个单细胞的转录组数据,其中每个细胞平均检测到 1979 个基因。
数据集:GSE149614
数据集分组:
NTL non-tumor liver:非肿瘤肝脏 PT primary tumor:原发肿瘤 PVTT portalvein tumor thrombus:门静脉癌栓 MLN metastatic lymph node:转移淋巴结
要复现的图
与 NTL 相比,PT 显示出 T/NK 细胞的显著消耗和myeloid 细胞的富集,这与之前对 HCC 的研究一致。 此外,PVTT 和 MLN 组织表现出与 T 相似的主要细胞类型组成。
代码如下
step1: 导入QC后数据,降维分群
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
dir.create("./2-harmony")
getwd()
setwd("./2-harmony")
# sce.all=readRDS("../1-QC/sce.all_qc.rds")
sce=sce.all.filt
sce
sce <- NormalizeData(sce,
normalization.method = "LogNormalize",
scale.factor = 1e4)
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce, features = VariableFeatures(object = sce))
library(harmony)
seuratObj <- RunHarmony(sce, "orig.ident")
names(seuratObj@reductions)
seuratObj <- RunUMAP(seuratObj, dims = 1:15,
reduction = "harmony")
DimPlot(seuratObj,reduction = "umap",label=T )
sce=seuratObj
sce <- FindNeighbors(sce, reduction = "harmony",
dims = 1:15)
sce.all=sce
#接下来分析,按照分辨率为0.5进行
sel.clust = "RNA_snn_res.0.5"
sce.all <- SetIdent(sce.all, value = sel.clust)
table(sce.all@active.ident)
saveRDS(sce.all, "sce.all_int_0.5.rds")
setwd('../')
step2:检查常见分群情况
rm(list=ls())
###### step5:检查常见分群情况 ######
getwd()
dir.create("./3-cell")
setwd("./3-cell")
sce.all<-readRDS("../2-harmony/sce.all_int_0.5.rds")
pumap<-DimPlot(sce.all, reduction = "umap", group.by = "RNA_snn_res.0.5"
,label = T,label.box = T)
ggsave('umap_by_RNA_snn_res.0.5.pdf')
#marker gene
metadata <- read.csv("../GSE146409-paper_marker.csv",
+ header = T)
a<-metadata$gene
head(a)
#[1] "FXYD3" "CLDN4" "CEACAM6" "CEACAM5" "ELF" "CLDN10"
genes_to_check<-a
a
#[1] "FXYD3" "CLDN4" "CEACAM6" "CEACAM5" "ELF"
#[6] "CLDN10" "SLC22A10" "FETUB" "LBP" "HPR"
#[11] "LECT2" "SERPINA10" "CD5L" "VCAM1" "CETP"
#[16] "LILRB5" "MARCO" "SDC3" "TREM2" "GPNMB"
#[21] "CAPG" "FCER1A" "CD1C" "CLEC10A" "JAML"
#[26] "S100A9" "FCN1" "S100A8" "FGR" "XCR1"
#[31] "CLEC9A" "IDO1" "WDFY4" "FLT3" "CPNE3"
#[36] "GZMA" "CD3E" "KLRB1" "NKG7" "CD7"
#[41] "CCL5" "IGLL5" "FCRL5" "TNFRSF17" "DERL3"
#[46] "JCHAIN" "MZB1" "CPE" "SLCO2A1" "CLEC14A"
#[51] "TGM2" "PODXL" "VWA1" "SOX18" "PLVAP"
#[56] "CD34" "ICAM2" "RELN" "CLEC4M" "CLEC1B"
#[61] "CLEC4G" "FCN2" "OIT3" "RERGL" "AC006254.1"
#[66] "MYH11" "ITGA8" "PLN" "ADIRF" "OLFML2A"
#[71] "NDUFA4L2" "RGS5" "TPPP3" "PLXDC1" "FRZB"
#[76] "MMP11" "CTHRC1" "INHBA" "HOPX" "POSTN"
#[81] "LTBP2" "CXCL12" "PTGDS" "MASP1" "FBLN1"
#[86] "C7" "HGF" "TPX2" "MKi67" "UBE2C"
#[91] "ASPM" "TOP2A" "RRM2" "EPCAM" "KRT19"
#[96] "KRT18" "PROM1" "ALDH1A1" "CD24"
library(stringr)
genes_to_check=str_to_upper(unique(genes_to_check))
genes_to_check
p_all_markers <- DotPlot(sce.all, features = genes_to_check,
assay='RNA') + coord_flip()
p_all_markers
ggsave(plot=p_all_markers, filename="check_paper_zeng_marker_by_seurat_cluster0.5_2.pdf"
,width = 10,height = 14)
#拼图
library(patchwork)
p_all_markers+pumap
ggsave('markers_umap0.5.pdf',width = 18,height = 14)
step3: 细胞亚群生物学命名
#细胞分群命名
celltype=data.frame(ClusterID=0:20,
celltype= 0:20)
#定义细胞亚群
celltype[celltype$ClusterID %in% c(2,7,10,14,15,19),2]='Hepatocyte'
celltype[celltype$ClusterID %in% c(4),2]='tumor'
celltype[celltype$ClusterID %in% c(5),2]='cycling'
celltype[celltype$ClusterID %in% c(0,13,16),2]='T/NK'
celltype[celltype$ClusterID %in% c(1,3,11,17),2]='myeloid'
celltype[celltype$ClusterID %in% c(9,12),2]='B cells'
celltype[celltype$ClusterID %in% c(6,18,20),2]='Endo'
celltype[celltype$ClusterID %in% c(8),2]='Fibo'
head(celltype)
celltype
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.0.5 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)
DimPlot(sce.all, reduction = "umap", group.by = "celltype",
#cols = color,
pt.size = 1.5,
label = T,label.box = T
)
ggsave('./celltype-umap.pdf',height = 6,width = 8)
saveRDS(sce.all, "sce.all_int_celltype.rds")
setwd("../")
step4: 细胞比例
免疫细胞
# 1.加载R包 ----
rm(list=ls())
library(ggplot2)
library(cowplot)
library(paletteer)
library(gplots)
library(ggpubr)
library(ggsci)
library(stringr)
# 2.设置工作路径 ----
#在meta信息中添加分组信息
getwd()
dir.create("../4_group1")
setwd("../4_group1/")
sce=readRDS("../3-cell/sce_celltype_int_2.RDS")
table(sce@meta.data$group)
sce.all=sce[,sce$celltype%in%c("T/NK","B","Myeloid","cycling")]
sce=sce.all
# 4.可视化 ----
## 4.1 每种细胞类型中,分组所占比例 ----
library(tidyr)# 使用的gather & spread
library(reshape2) # 使用的函数 melt & dcast
library(dplyr)
library(ggplot2)
tb=table(sce$celltype,
sce$orig.ident)
balloonplot(tb)
head(tb)
bar_data <- as.data.frame(tb)
head(bar_data)
bar_per <- bar_data %>%
group_by(Var1) %>%
mutate(sum(Freq)) %>%
mutate(percent = Freq / `sum(Freq)`)
head(bar_per)
write.csv(bar_per,file = "celltype_by_group_percent.csv")
#可视化
#celltypebygroup
bar_per$Var2
bar_per$Var2 <- factor(bar_per$Var2,levels=c('HCC01T','HCC02T','HCC03T',
'HCC04T','HCC05T','HCC06T','HCC07T','HCC08T','HCC09T',
'HCC10T','HCC03N','HCC04N','HCC05N','HCC06N',
'HCC07N','HCC08N','HCC09N','HCC10N',"HCC07P","HCC08P","HCC10L"),ordered = TRUE)
library(ggalluvial)
mycolor = c('#E64B3599',
'#4DBBD599',
'#00A08799',
'#3C548899',
'#F39B7F99')
ggplot(bar_per, aes(x =Var2, y= percent, fill = Var1,
stratum=Var1, alluvium=Var1)) +
# guides(color=guide_legend(title = "ABC"))+
geom_col(width = 0.5, color='black')+
geom_flow(width=0.5,alpha=0.4, knot.pos=0.5)+
theme_classic() +
labs(x='sample',y ='Ratio',title = "by group"
)+
coord_flip()+
scale_fill_manual(values = mycolor)
ggsave("celltypebyorigent_percent_immune.pdf")
非免疫细胞
#同样的输入数据格式,提取非免疫细胞的子集
table(sce$celltype)
sce.all=sce[,sce$celltype%in%c("Endo","Hepatocyte","Fibo","tumor")]
sce=sce.all
#可视化
library(ggalluvial)
mycolor = c('#efb306',
'#7db954',
'#852f88',
'#4e54ac',
'#0f8096')
ggplot(bar_per, aes(x =Var2, y= percent, fill = Var1,
stratum=Var1, alluvium=Var1)) +
geom_col(width = 0.5, color='black')+
geom_flow(width=0.5,alpha=0.4, knot.pos=0.5)+
theme_classic() +
labs(x='sample',y ='Ratio',title = "by group"
)+
coord_flip()+
scale_fill_manual(values = mycolor)
ggsave("celltypebyorigent_percent_nonimmune.pdf")