文献复现及简介—胰腺癌的单细胞水平肿瘤异质性 https://mp.weixin.qq.com/s/gWz-Jl5baz4vRUjhLrYN7Q
文章中的细胞类型注释
Endo内皮; Mes,间质; B, B细胞; Hep,肝细胞; DC,树突状细胞; pDC,浆细胞样树突状细胞; Mac,巨噬细胞; T, T细胞; NK,自然杀伤细胞。
rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
getwd()
dir.create("3-cell")
setwd('3-cell/')
sce.all=readRDS( "../2-harmony/sce.all_int.rds")
sce.all
sce.all
sce=sce.all
library(ggplot2)
#细胞生物学命名marker
genes_to_check = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
'CD19', 'CD79A', 'MS4A1' ,
'IGHG1', 'MZB1', 'SDC1',
'CD68', 'CD163', 'CD14',
'TPSAB1' , 'TPSB2', # mast cells,
'RCVRN','FPR1' , 'ITGAM' ,
'C1QA', 'C1QB', # mac
'S100A9', 'S100A8', 'MMP19',# monocyte
'FCGR3A',
'LAMP3', 'IDO1','IDO2',## DC3
'CD1E','CD1C', # DC2
'KLRB1','NCR1', # NK
'FGF7','MME', 'ACTA2',"CD10", ## human fibo
'DCN', 'LUM', 'GSN' , ## mouse PDAC fibo
'MKI67' , 'TOP2A', 'PECAM1', 'VWF', ## endo
'EPCAM' , 'KRT19','KRT7', # epi
'FYXD2', 'TM4SF4', 'ANXA4',# cholangiocytes
'APOC3', 'FABP1', 'APOA1', # hepatocytes
'Serpina1c','PROM1', 'ALDH1A1',
"NKG7"#NKT
)
library(stringr)
genes_to_check=str_to_upper(genes_to_check)
genes_to_check
p <- DotPlot(sce.all, features = unique(genes_to_check),
assay='RNA' ) + coord_flip()
p
ggsave('check_last_markers.pdf',height = 11,width = 11)
#髓系
Mac=c("CD14", "CD163", "APOE", "C1QA", "C1QB", "C1QC")
pDC=c("LILRA4", "IL3RA","TCF4","TCL1A","CLEC4C")
DC1=c("CLEC9A", "XCR1", "BATF3")
DC2=c("CD1A", "FCER1A", "CD1C","CD1E","CLEC10A")
DC3=c("CCR7", "LAMP3", "FSCN1","CCL22","BIRC3")
Mono=c("VCVN", "FCN1", "S100A12", "S100A8", "S100A9",'FCGR3A')
genes_to_check =list(
Mac=Mac,
pDC=pDC,
DC1=DC1,
DC2=DC2,
DC3=DC3 ,
Mono=Mono
)
library(stringr)
genes_to_check = lapply(genes_to_check, str_to_upper)
p_all_markers=DotPlot(sce.all ,
features = genes_to_check,
scale = T,assay='RNA' )+
theme(axis.text.x=element_text(angle=45,hjust = 1))
p_all_markers
ggsave('check_myeloids_markers.pdf',height = 11,width = 11)
# 需要自行看图,定细胞亚群:
# 文章里面的 :
# 3,4,5: Mac
# 17:pDC
# 7: cDC2
# 13:Plasma
# 9:B
# 1,2,6,10,11,19,20:Epi-tumor
# 8:fibo
# 0,18:T/NK
# 12: T
# 16: Meyoid
# 15: Hep
# 14:14
# 21,22:干扰亚群
celltype=data.frame(ClusterID=0:22,
celltype= 0:22)
#定义细胞亚群
celltype[celltype$ClusterID %in% c(3,4,5 ),2]='Mac'
celltype[celltype$ClusterID %in% c( 17),2]='pDC'
celltype[celltype$ClusterID %in% c(7),2]='cDC2'
celltype[celltype$ClusterID %in% c( 13 ),2]='Plasma'
celltype[celltype$ClusterID %in% c( 9),2]='B'
celltype[celltype$ClusterID %in% c( 1,2,6,11,19,20),2]='Epi-tumor'
celltype[celltype$ClusterID %in% c( 8),2]='fibo'
celltype[celltype$ClusterID %in% c(0,18),2]='T/NK'
celltype[celltype$ClusterID %in% c(10),2]='10'
celltype[celltype$ClusterID %in% c(12),2]='T'
celltype[celltype$ClusterID %in% c( 16),2]='Meyoid'
celltype[celltype$ClusterID %in% c(15),2]='Hep'
celltype[celltype$ClusterID %in% c(14),2]='14'
celltype[celltype$ClusterID %in% c(21,22),2]='干扰亚群'
head(celltype)
celltype
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.0.8== celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)
th=theme(axis.text.x = element_text(angle = 45,
vjust = 0.5, hjust=0.5))
#可视化
library(patchwork)
p_all_markers=DotPlot(sce.all, features = genes_to_check,
assay='RNA' ,group.by = 'celltype' ) + coord_flip()+th
p_umap=DimPlot(sce.all, reduction = "umap", group.by = "celltype",label = T,label.box = T)
p_all_markers+p_umap
#ggsave('markers_umap_by_celltype_2.pdf',width = 13,height = 8)
sce.all=RunTSNE(sce.all, dims = 1:15,
reduction = "harmony")
p_tsne=DimPlot(sce.all, reduction = "tsne", group.by = "celltype",label = T)
p_all_markers+p_tsne
#ggsave('markers_tsne_by_celltype.pdf',width = 12,height = 8)
save(sce.all,file = 'sce-by-markers.Rdata')
# 去除干扰亚群
table(sce.all$celltype)
sce.all <- sce.all[, !(sce.all$celltype %in% c( '干扰亚群'))]
DimPlot(sce.all, reduction = "umap", group.by = "celltype",
label = T,label.box = T)
ggsave('last_umap_by_celltype.pdf',height = 7,width = 8)
library(ggsci)
color = c(pal_d3("category20")(20),
pal_d3("category20b")(20),
pal_d3("category20c")(20),
pal_d3("category10")(10))
p_tsne=DimPlot(sce.all, reduction = "tsne", group.by = "celltype",label = T,
label.box = T,
cols = color)
p_all_markers+p_tsne
ggsave('markers_tsne_by_celltype_end.pdf',width = 12,height = 8)
p_umap=DimPlot(sce.all, reduction = "umap", group.by = "celltype",label = T,label.box = T,
cols = color)
p_all_markers+p_umap
ggsave('markers_umap_by_celltype_end.pdf',width = 13,height = 8)
以上图来看给10,12,14亚群定义细胞分类,并没有那么明确,tsne图和umap图是有一些冲突的,「我想给10:Epi-tumor; 12:T, 14:fibo。」
如果这么给定的话,tsne图上还说的过去,但是umap图上就比较勉强,因为10的给定是关系到后续做infercnv的恶性和非恶性的划分,小伙伴们也可以自己尝试复现一下,「看看这几类细胞亚群定义成什么比较合适」~
同时也附上各个亚群的细胞数
「以T/NK细胞、内皮细胞、成纤维细胞和肝细胞为参照,显示用于分析恶性和非恶性的CNV评分(每个细胞改变的均方); 数据按非恶性(n = 15,302)和恶性(7,740)身份划分。」我们普遍是把Epi-tumor亚群定义为恶性细胞,后续就以部分继续做infercnv,尝试后续的复现~