Bertucci F, Ng CKY, Patsouris A, et al. Genomic characterization of metastatic breast cancers [published correction appears in Nature. 2019 Aug;572(7767):E7. doi: 10.1038/s41586-019-1380-3.]. Nature. 2019;569(7757):560-564. doi:10.1038/s41586-019-1056-z
文章对应的数据并没有公开,但是我们可以生成虚拟数据来复现一下,后面可以把自己的数据放进来试试看——
# === Step 1: 模拟数据 ===
set.seed()
samples <- paste0("S",:)
genes <-c("TP53","PIK3CA","ESR1","GATA3","CDH1","PTEN","RB1","NF1","AKT1","MAP3K1")
alterations <-c("Hotspot","Missense","Amplification","Stop_codon","Splice_site","Frameshift","Homozygous_deletion","Insertion_deletion")
mutation_matrix <- matrix(
sample(c(alterations,NA),*, replace =TRUE, prob =c(rep(0.1,),0.2)),
nrow =,dimnames=list(genes, samples)
)
mutation_list <- as.matrix(mutation_matrix)
mutation_list[is.na(mutation_list)]<-""# 必须是 "" 而不是 NA
# === Step 2: 模拟临床注释 ===
clinical <- data.frame(
Sample = samples,
Subtype = sample(c("HR+/HER2-","HER2+","TNBC","UN"),, replace =TRUE),
Biopsy = sample(c("Breast","Lung","Liver","Lymph node","Skin","Other"),, replace =TRUE),
ChemoLines = sample(c("0","1","2",">3"),, replace =TRUE),
HormoneTx = sample(c("Yes","No"),, replace =TRUE),
CDK4i = sample(c("Yes","No"),, replace =TRUE),
MTORi = sample(c("Yes","No"),, replace =TRUE),
Delay = sample(c("0-12m","12-24m",">24m"),, replace =TRUE)
)
# === Step 3: 设置突变颜色 ===
col =c(
"Hotspot"="#E41A1C","Missense"="#377EB8","Amplification"="#4DAF4A",
"Stop_codon"="#984EA3","Splice_site"="#FF7F00","Frameshift"="#FFFF33",
"Homozygous_deletion"="#A65628","Insertion_deletion"="#F781BF"
)
alter_fun =list(
background =function(x, y, w, h){
grid.rect(x, y, w, h, gp = gpar(fill ="#F0F0F0", col =NA))
}
)
for(mut innames(col)){
alter_fun[[mut]]<- local({
mycol <- col[[mut]]
function(x, y, w, h){
grid.rect(x, y, w *0.9, h *0.9, gp = gpar(fill = mycol, col =NA))
}
})
}
=== Step : 设置颜色映射 ===
subtype_col <-c("HR+/HER2-"="#66C2A5","HER2+"="#FC8D62","TNBC"="#8DA0CB","UN"="grey60")
biopsy_col <-c("Breast"="orange","Lung"="skyblue","Liver"="red","Lymph node"="green","Skin"="purple","Other"="gray")
# === Step 5: 构建顶部注释 ===
# 顶部柱状图(每列突变数)
mutation_count <- colSums(mutation_list !="")
top_anno <- HeatmapAnnotation(
MutCount = anno_barplot(
mutation_count,
gp = gpar(fill = subtype_col[clinical$Subtype]),# 按亚型着色
border =FALSE
),
Subtype = clinical$Subtype,
Biopsy = clinical$Biopsy,
Chemo = clinical$ChemoLines,
HormoneTx = clinical$HormoneTx,
CDK4i = clinical$CDK4i,
MTORi = clinical$MTORi,
Delay = clinical$Delay,
col =list(
Subtype = subtype_col,
Biopsy = biopsy_col,
Chemo =c("0"="white","1"="lightblue","2"="blue",">3"="darkblue"),
HormoneTx =c("Yes"="pink","No"="gray"),
CDK4i =c("Yes"="gold","No"="gray"),
MTORi =c("Yes"="green","No"="gray"),
Delay =c("0-12m"="#FFFFB3","12-24m"="#FDB462",">24m"="#B3DE69")
),
annotation_height = unit(c(,rep(,)),"mm"),
annotation_name_gp = gpar(fontsize =),# 设置注释标签字体
gp = gpar(fontsize =)# 设置分类注释的字体
)
原文献中左侧的注释是指:行代表的基因在不同亚群中是否显著突变
# 计算二值矩阵 (TRUE/FALSE)
mut_binary_mat <- matrix(FALSE, nrow =length(genes), ncol =length(subtypes),
dimnames=list(genes, subtypes))
# 生成颜色矩阵,TRUE用对应亚型颜色,FALSE用白色
col_mat <- matrix("white", nrow = nrow(mut_binary_mat), ncol = ncol(mut_binary_mat),
dimnames=dimnames(mut_binary_mat))
for(j inseq_len(ncol(mut_binary_mat))){
col_mat[mut_binary_mat[, j], j]<- subtype_col[colnames(mut_binary_mat)[j]]
}
⬆这段代码的作用是:生成一个颜色矩阵 col_mat
,用于可视化每个基因在每个乳腺癌亚型中是否有突变的情况。它的逻辑非常简单直观:
mut_binary_mat
是一个 逻辑矩阵,
• 行是基因TRUE
或 FALSE
,表示该基因是否在该亚型中至少有一个样本发生突变【!!!要注意这是虚拟数据,所以这个定义只是我随意写的!】总结:必须是 MutSigCV 评估下具有统计学显著性(q < 0.1),且在至少 1% 的转移性乳腺癌样本中发生突变的基因。
subtype_col
是一个 亚型颜色对照表,例如:subtype_col <-c("HR+/HER2-"="#66C2A5","HER2+"="#FC8D62","TNBC"="#8DA0CB","UN"="grey60")
col_mat <- matrix("white", nrow = nrow(mut_binary_mat), ncol = ncol(mut_binary_mat),
dimnames=dimnames(mut_binary_mat))
col_mat
,尺寸与 mut_binary_mat
相同;"white"
(表示没有突变时用白色);dimnames = dimnames(mut_binary_mat)
保留了行名(基因)和列名(亚型),方便之后对照。for(j inseq_len(ncol(mut_binary_mat))){
col_mat[mut_binary_mat[, j], j]<- subtype_col[colnames(mut_binary_mat)[j]]
}
这段 for
循环的意思是:
TRUE
(即有突变)TRUE
的位置填上该亚型对应的颜色(从 subtype_col
中取)"HER2+"
:mut_binary_mat[, "HER2+"]
是这个亚型对应的一个逻辑向量,告诉我们哪些基因有突变col_mat[mut_binary_mat[, j], j] <- subtype_col[colnames(mut_binary_mat)[j]]
TRUE
的行,在第 j
列中,填上 "HER2+"
对应的颜色 #FC8D62
你得到的 col_mat
是一个颜色矩阵,形如:
HR+/HER2- | HER2+ | TNBC | UN | |
---|---|---|---|---|
TP53 | #66C2A5 | white | #8DA0CB | white |
ESR1 | white | white | white | #grey60 |
PIK3CA | white | #FC8D62 | white | white |
这个颜色矩阵可以直接用来给 Heatmap()
画颜色格子,表达“在哪些亚型中这个基因突变过”。
# Heatmap() 支持绘制颜色矩阵,设置 col = structure(unique_colors, names=unique_colors)
unique_cols <- unique(as.vector(col_mat))
names(unique_cols)<- unique_cols
ht_left <- Heatmap(col_mat,
col = unique_cols,
cluster_rows =FALSE,
cluster_columns =FALSE,
show_row_names =FALSE,
show_column_names =TRUE,
show_heatmap_legend =FALSE)
在一般情况下,左侧注释可以使用 rowAnnotation
来实现。然而,由于 anno_simple
无法为每一列(即每个亚群)分别指定不同的颜色,同时也不支持单独显示列名,因此我们改用 Heatmap
来实现该注释。
ht_main <- oncoPrint(
mutation_list,
alter_fun = alter_fun,
col = col,
show_heatmap_legend =FALSE,
top_annotation = top_anno,
show_column_names =FALSE,
column_order = order(clinical$Subtype),
remove_empty_columns =TRUE,
row_names_gp = gpar(fontsize =),
)
draw(ht_left + ht_main, merge_legend =TRUE)