导语
GUIDE ╲
近年来,免疫检查点阻断(ICB)在多种恶性肿瘤的治疗中取得了成功。然而,由于患者之间免疫治疗的异质性的存在,仍需要研究宿主-肿瘤的相互作用,特别是肿瘤微环境(TME)内的免疫细胞浸润,来确定可靠的精准治疗预测生物标志物。
背景介绍
新一代测序的发展使得公开可用的多组学数据快速积累。在肿瘤免疫学中,多组学的整合在越来越受到重视,但也带来了计算和生物学方面的挑战!
今天小编给大家带来的是南方医科大学廖旺军教授团队曾东强博士(Y叔-余光创教授参与指导)开发的免疫研究工具--IOBR包,该R包2021年7月2号发表在frontiers in immunology杂志(IF=7.561)。IOBR集成了8种已发表的对肿瘤微环境 (TME) contexture进行decoding的方法:CIBERSORT、TIMER、xCell、MCPcounter、ESITMATE、EPIC、IPS、quantTIseq。此外,IOBR收集了255个已发表的特征基因集,涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构。IOBR还采用了多种方法进行变量转换、可视化、批量生存分析、特征选择和统计分析,并且支持相应结果的批量分析和可视化。
使用文档链接:
https://iobr.github.io/IOBR/IOBR-VIGNETTE.html
IOBR 工作原理如下所述:
R包安装
##需要R版本大于等于3.6.3
##安装IBOR以及一些分析依赖的包
if (!requireNamespace("IOBR", quietly = TRUE))
remotes::install_github("IOBR/IOBR")
BiocManager::install("maftools")
if (!requireNamespace("EPIC", quietly = TRUE))
devtools::install_github("GfellerLab/EPIC", ref="master")
if (!requireNamespace("estimate", quietly = TRUE)){
rforge <- "http://r-forge.r-project.org"
install.packages("estimate", repos=rforge, dependencies=TRUE)
}
library(IOBR)
library(EPIC)
library(estimate)
library(tidyverse)
library(tidyHeatmap)
library(maftools)
library(ggpubr)
library(ggplot2)
可视化展示
01
数据准备
使用UCSCXenaTools包获取TCGA的RNA-seq数据。
library(UCSCXenaTools)
##下载TCGA胃癌RNA-seq数据
eset_stad<-XenaGenerate(subset = XenaCohorts =="GDC TCGA Stomach Cancer (STAD)") %>%
XenaFilter(filterDatasets = "TCGA-STAD.htseq_counts.tsv") %>%
XenaQuery() %>%
XenaDownload() %>%
XenaPrepare()
##展示
eset_stad[1:5,1:5]
转换成TPM矩阵:
#去掉版本号
eset_stad$Ensembl_ID<-substring(eset_stad$Ensembl_ID, 1, 15)
eset_stad<-column_to_rownames(eset_stad, var = "Ensembl_ID")
# 去标准化
eset_stad<-(2^eset_stad)-1
eset_stad<-count2tpm(countMat = eset_stad, idType = "Ensembl", org="hsa")
GEO下载芯片数据GSE1009355(胃癌),并进行注释
library("GEOquery")
# 下载数据
eset_geo<-getGEO(GEO = "GSE100935",
getGPL = F,
destdir = "./")
eset <-eset_geo[[1]]
eset <-exprs(eset)
head(anno_hug133plus2)
eset<-anno_eset(eset = eset,
annotation = anno_hug133plus2,
symbol = "symbol",
probe = "probe_id",
method = "mean")
02
特征基因集和TME反卷积模块
IOBR 整合了 255 个已发表的特征基因集,涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构等。在特征评分评估过程中采用了三种方法,包括单样本基因集富集分析(ssGSEA)、主成分分析(PCA)和Z-score。准备的输入数据是一个矩阵(log2(TPM+1) 转换),包含 98 个 TCGA-STAD 样本,行中为基因,列中为样本。对于单细胞数据,可以使用CIBERSORT、EPIC等多种工具计算免疫细胞比例
data("eset_stad")
##CIBERSORT
cibersort<-deconvo_tme(eset = eset_stad, method = "cibersort", arrays = FALSE, perm = 200 )
第一段落:第一行空两格,字体选择15px,字间距选择1(1-2),行间距1.5(1.75-2),两端缩进尺寸为1.0px(两侧边距5-20)
03
Phenotype module
IMvigor210 免疫治疗队列的数据用于表型相关特征的批量分析。
data("imvigor210_sig")
data("imvigor210_pdata")
识别表型相关特征
pdata_group<-imvigor210_pdata[!imvigor210_pdata$BOR_binary=="NA",c("ID","BOR","BOR_binary")]
##肿瘤特征tumor_signature
res<-iobr_cor_plot(pdata_group = pdata_group,
id1 = "ID",
feature_data = imvigor210_sig,
id2 = "ID",
target = NULL,
group = "BOR_binary",
is_target_continuous = FALSE,
padj_cutoff = 1,
index = 1,
category = "signature",
signature_group = sig_group[c(1,3,5)],
ProjectID = "IMvigor210",
palette_box = "paired1",
palette_corplot = "pheatmap",
palette_heatmap = 2,
feature_limit = 26,
character_limit = 30,
show_heatmap_col_name = FALSE,
show_col = FALSE,
show_plot = TRUE,
path = "1-BOR-relevant-signatures")
可视化表型相关特征基因
##CD_8_T_effector
res<-iobr_cor_plot(pdata_group = pdata_group,
id1 = "ID",
feature_data = imvigor210_eset,
id2 = "ID",
target = NULL,
group = "BOR_binary",
is_target_continuous = FALSE,
padj_cutoff = 1,
index = 1,
category = "gene",
signature_group = signature_collection[c(1:2,4)],
ProjectID = "IMvigor210",
palette_box = "paired1",
palette_corplot = "pheatmap",
palette_heatmap = 4,
feature_limit = 26,
character_limit = 30,
show_heatmap_col_name = FALSE,
show_col = FALSE,
show_plot = TRUE,
path = "2-BOR-relevant-genes")
估计 lncRNA 相关特征
##tumor_signature
pdata_group<-as.data.frame(pdata_group[,c("ID","HCP5","LINC00657")])
head(pdata_group)
res<-iobr_cor_plot(pdata_group = pdata_group,
id1 = "ID",
feature_data = imvigor210_sig,
id2 = "ID",
target = "HCP5",
group = "group3",
is_target_continuous = TRUE,
padj_cutoff = 1,
index = 1,
category = "signature",
signature_group = sig_group[1:3],
ProjectID = "IMvigor210",
palette_box = "set2",
palette_corplot = "pheatmap",
palette_heatmap = 2,
feature_limit = 26,
character_limit = 30,
show_heatmap_col_name = FALSE,
show_col = FALSE,
show_plot = TRUE,
path = "3-HCP5-relevant-signatures")
识别与目标特征相关的特征
pdata_group<-as.data.frame(imvigor210_pdata[,c("ID","Pan_F_TBRs")])
pdata_group$Pan_F_TBRs<-scale(as.numeric(pdata_group$Pan_F_TBRs))
head(pdata_group)
res<-iobr_cor_plot(pdata_group = pdata_group,
id1 = "ID",
feature_data = imvigor210_sig,
id2 = "ID",
target = "Pan_F_TBRs",
group = "group3",
is_target_continuous = TRUE,
padj_cutoff = 1,
index = 5,
category = "signature",
signature_group = sig_group[1:2],
ProjectID = "IMvigor210",
palette_box = "set2",
palette_corplot = "pheatmap",
palette_heatmap = 2,
feature_limit = 26,
character_limit = 30,
show_heatmap_col_name = FALSE,
show_col = FALSE,
show_plot = TRUE,
path = "5-Pan_F_TBRs-relevant-signatures")
评估 Pan-F-TBRs 相关的 TME 细胞浸润
res<-iobr_cor_plot(pdata_group = pdata_group,
id1 = "ID",
feature_data = imvigor210_sig,
id2 = "ID",
target = "Pan_F_TBRs",
group = "group3",
is_target_continuous = TRUE,
padj_cutoff = 1,
index = 6,
category = "signature",
signature_group = sig_group[20:24],
ProjectID = "IMvigor210",
palette_box = "jco",
palette_corplot = "pheatmap",
palette_heatmap = 3,
feature_limit = 26,
character_limit = 30,
show_heatmap_col_name = FALSE,
show_col = FALSE,
show_plot = TRUE,
path = "6-Pan_F_TBRs-relevant-TME-cell")
04
Mutation module
#下载突变数据
maf_file<-"TCGA.STAD.mutect.c06465a3-50e7-46f7-b2dd-7bd654ca206b.DR-10.0.somatic.maf"
mut_list<-make_mut_matrix(maf = maf_file,
isTCGA = T,
category = "multi")
var_stad<-XenaGenerate(subset = XenaCohorts =="GDC TCGA Stomach Cancer (STAD)") %>%
XenaFilter(filterDatasets = "TCGA-STAD.mutect2_snv.tsv") %>%
XenaQuery() %>%
XenaDownload() %>%
XenaPrepare()
mut_list2<-make_mut_matrix(mut_data = var_stad,
category = "multi",
Tumor_Sample_Barcode = "Sample_ID",
Hugo_Symbol = "gene",
Variant_Classification = "effect",
Variant_Type = "Variant_Type")
分析特征相关的突变
mut<-mut_list$snp
res<-find_mutations(mutation_matrix = mut,
signature_matrix = tcga_stad_sig,
id_signature_matrix = "ID",
signature = "CD_8_T_effector",
min_mut_freq = 0.01,
plot = TRUE,
method = "Wilcoxon",
save_path = paste0("7-CD_8_T_effector-relevant-mutations"),
palette = "jco",
show_plot = T,
width = 8,
height = 4,
oncoprint_group_by = "mean",
oncoprint_col = "#224444",
gene_counts = 10)
05
Model Construction Module
IOBR 还提供特征选择、生物标志物识别和基于探索表型相关特征中预先确定的表型相关生物标志物的模型构建功能,用于预测模型构建。
data("imvigor210_sig")
data("imvigor210_pdata")
# 用于分析二元变量
input<-imvigor210_pdata %>%
dplyr::select(ID,BOR_binary) %>%
inner_join(.,imvigor210_sig,by="ID") %>%
filter(!is.na(.$BOR_binary)) %>%
filter(!.$BOR_binary=="NA")
# Feature engineering
res<-batch_wilcoxon(data = as.data.frame(input),
target = "BOR_binary",
group_names = c("NR","R"),
feature = colnames(input)[3:ncol(input)])
head(res)
model_feas<-as.character(res[res$p.value<0.05,]$sig_names)
input<-as.data.frame(imvigor210_sig)
feas<-colnames(input)[colnames(input)%in%model_feas]
input<-input[, c("ID",feas)]
# 目标数据
pdata_group <- imvigor210_pdata[!imvigor210_pdata$BOR_binary=="NA",c("ID","BOR_binary")]
pdata_group$BOR_binary <- ifelse(pdata_group$BOR_binary == "R", 1, 0)
#特征选择
binomial_result <- BinomialModel(x = input,
y = pdata_group,
seed = "123456",
scale = TRUE,
train_ratio = 0.7,
nfold = 10,
plot = T)
plot(binomial_result$lasso_result$model)
小编总结
IOBR 提供四个主要的分析模块,允许对肿瘤免疫学、临床、基因组学和单细胞数据进行有效和灵活的分析。作为一个整合多组学数据的软件包,IOBR的功能是十分强大的,在使用的过程中还可以同时使用一些其他的软件或者算法,对多组学数据进行分析,从而对特征评分计算以及其与临床表型相关性的系统估计 、非编码RNA特征、来自多种癌症的scRNA-seq数据和基因组特征,以及多种TME反卷积算法和快速的signaure构建来对肿瘤微环境进行分析和可视化。