前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >单细胞数据复现-肺癌文章代码复现9

单细胞数据复现-肺癌文章代码复现9

作者头像
小胡子刺猬的生信学习123
修改2022-07-11 12:15:26
8570
修改2022-07-11 12:15:26
举报
文章被收录于专栏:文献分享及代码学习

单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648

单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619

单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043

单细胞数据复现-肺癌文章代码复现4https://cloud.tencent.com/developer/article/2006654

单细胞数据复现-肺癌文章代码复现5https://cloud.tencent.com/developer/article/2008487

单细胞数据复现-肺癌文章代码复现6https://cloud.tencent.com/developer/article/2008704

单细胞数据复现-肺癌文章代码复现7https://cloud.tencent.com/developer/article/2019634

单细胞数据复现-肺癌文章代码复现8https://cloud.tencent.com/developer/article/2026259

细胞比例计算

代码语言:javascript
复制
###cell proportion statistics

test <- cell_counts_rel %>%
  mutate(pattern = ifelse(patient_id %in% c("p032", "p018", "p019", "p024", "p031", "p033"), "N3MC", "CP2E")) %>%
  mutate(T_CD8_exhausted = T_CD8_1 + T_CD8_2)

#for Kim et al. dataset
#test <- cell_counts_rel %>%
#  mutate(pattern = ifelse(patient_id %in% c("P0009", "P0018", "P0020", "P0028"), "CP2E", "N3MC")) %>%
#  mutate(T_CD8_exhausted = T_CD8_1 + T_CD8_2)

ggplot(test, aes(x = pattern, y = CD14_Macrophages1)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = CD14_Macrophages1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_CD14_Macrophages1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = CD14_Macrophages2)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = CD14_Macrophages2~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_CD14_Macrophages2.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myeloid_Dendritic)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = Myeloid_Dendritic~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myeloid_Dendritic.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Plasmacytoid_Dendritic)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = Plasmacytoid_Dendritic~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Plasmacytoid_Dendritic.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myofibroblast1)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = Myofibroblast1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myofibroblast1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myofibroblast2)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = Myofibroblast2~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myofibroblast2.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = T_conv1)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = T_conv1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_T_conv1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = NK_cells)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = NK_cells~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_NK_cells.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = T_CD8_exhausted)) +
  geom_boxplot() +
  ggtitle(paste0("p = ", wilcox.test(formula = T_CD8_exhausted~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_T_CD8_exhausted.pdf", path = "../results", width = 10, height = 10, units = "cm")
图片.png
图片.png

拼图的时候发现我又缺了一张图,天天丢来丢去的我,不过其他的图跟这个代码是类似的,所以基本上按照上面的代码,结果还是可以出来的。

这次的内容紧接着上次的复现7的内容。

代码语言:javascript
复制
###select only cell types that occured in at least three patients

celltype_selected <- cell_counts_rel
celltype_selected[celltype_selected == 0] <- NA
celltype_selected <- cell_counts_rel[, which(colMeans(!is.na(celltype_selected)) >= 0.3)]

cell_counts_rel_selected <- cell_counts_rel[, colnames(cell_counts_rel) %in% colnames(celltype_selected)]

cell_counts_rel_mtrx <- as.matrix(cell_counts_rel_selected[,-1])
rownames(cell_counts_rel_mtrx) <- cell_counts_rel_selected$patient_id

下面是开始以前的PCA分析。

代码语言:javascript
复制
###principal component analysis

cell_counts_pca <- as.data.frame(scores(pca(cell_counts_rel_mtrx, nPcs = 10)))
cell_counts_pca$patient_id <- rownames(cell_counts_pca)


histo_data <- unique(FetchData(epi_tumor, c("patient_id", "histo_subtype")))
cell_counts_pca <- full_join(cell_counts_pca, histo_data, by = "patient_id")

ggplot(cell_counts_pca, aes(x = PC1, y = PC2, color = histo_subtype, label = patient_id)) +
  geom_point(size = 2) +
  geom_text(aes(label = patient_id), color = "black", vjust = 1.5, size = 2) +
  scale_color_manual(values = use_colors) +
  scale_x_continuous(limits = c(-1, 1.2)) +
  scale_y_continuous(limits = c(-1.2, 0.8))
ggsave2("Fig5A.pdf", path = "../results", width = 15, height = 9, units = "cm")


###add mean tumor cell signature score

epi_pca <- AddModuleScore(epi_pca, features = list(c("SCGB3A1", "PIGR", "NAPSA", "C4BPA", "SCGB3A2", "HLA-DRA", "CD74", "ADGRF5", "C16orf89", "FOLR1", "SELENBP1", "HLA-DRB1", "ID4", "MGP", "AQP3", "CA2", "LHX9", "HLA-DPB1", "FMO5", "GKN2", "C5", "MUC1", "NPC2", "RNASE1", "PIK3C2G", "SFTA2", "SLC34A2", "HLA-DPA1", "FGFR3", "PGC")), name = "tumor_signature")
epi_pca <- AddModuleScore(epi_pca, features = list(c("DSG2", "CAMK2N1", "FAM3C", "KRT7", "IFI27", "SLC2A1", "MARCKS", "PLAU", "AHNAK2", "PERP", "S100A4", "KRT19", "COL6A1", "UACA", "COL17A1", "CDA", "TPM2", "S100A16", "KRT8", "PRSS23", "DST", "LAMC2", "S100P", "PRSS3", "LAMA3", "DSP", "ITGA3", "MDK", "FAM83A", "ITGB4")), name = "anti_tumor_signature")

epi_tumor_data <- FetchData(epi_pca, c(colnames(epi_pca@meta.data)))

epi_tumor_signature <- epi_tumor_data %>%
  group_by(patient_id) %>% 
  mutate(diff_mean = mean(tumor_signature1)) %>%
  mutate(undiff_mean = mean(anti_tumor_signature1)) %>%
  dplyr::select(patient_id, diff_mean, undiff_mean) %>%
  unique()

###heatmap of normalized immune and stromal cell counts, and mean tumor cell signatures

cell_counts_rel_heatmap <- full_join(cell_counts_rel_selected, cell_counts_pca, by = "patient_id")
cell_counts_rel_heatmap <- full_join(cell_counts_rel_heatmap, epi_tumor_signature, by = "patient_id")

cell_counts_rel_heatmap_ordered <- cell_counts_rel_heatmap[order(cell_counts_rel_heatmap$PC1),]

##R语言中的ncol()函数用于返回指定矩阵的列数
cell_counts_rel_heatmap1 <- cell_counts_rel_heatmap_ordered[,2:ncol(celltype_selected)]
cell_counts_rel_heatmap2 <- cell_counts_rel_heatmap_ordered[,c("diff_mean", "undiff_mean")]

rownames(cell_counts_rel_heatmap1) <- cell_counts_rel_heatmap_ordered$patient_id
rownames(cell_counts_rel_heatmap2) <- cell_counts_rel_heatmap_ordered$patient_id

##scale()函数标准化时使用的样本标准分的计算方法
cell_counts_rel_heatmap1 <- scale(cell_counts_rel_heatmap1)

heatmap.2(as.matrix(t(cell_counts_rel_heatmap1)), trace = "none", density.info = "none", scale = "none", symbreaks = F, col = hcl.colors(100, palette = "orrd", rev = T), Rowv = T, Colv = F, dendrogram = "row", margins = c(5,10))
ggsave("Fig5B_immune_and_stromal_cells.pdf", width = 7, height = 7)

heatmap.2(as.matrix(t(cell_counts_rel_heatmap2)), trace = "none", density.info = "none", scale = "none", col = bluered(100), Rowv = F, Colv = F, dendrogram = "none")
ggsave("Fig5B_tumor_cell_signatures.pdf", width = 7, height = 5)

相关性图绘制

代码语言:javascript
复制
###correlation plots
##可以用于后续画图的比较***
# matrix of the p-value of the correlation (from http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram)
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], method = "spearman", ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

p.mat <- cor.mtest(cell_counts_rel_mtrx)

test <- cor(cell_counts_rel_mtrx, method = "spearman")

png("SuppFig8A.png", width = 30, height = 30, units = "cm", res = 600)
corrplot(test, p.mat = p.mat, sig.level = 0.05, insig = "blank", type = "full", tl.col = "black")
dev.off()

#pdf("output/fig5/corrplot.pdf", width = 7, height = 7)
#corrplot(test, p.mat = p.mat, sig.level = 0.05, insig = "blank", type = "full", tl.col = "black")
#dev.off()

### correlation network plot
##后续可以用***
test_net <- test

for (i in 1:nrow(test_net)){
  for (j in 1:ncol(test_net)){
    if(p.mat[i,j] > 0.05){
      test_net[i,j] <- 0
    }
    if(test_net[i,j] == 1){
      test_net[i,j] <- 0
    }
  }
}

library(qgraph)

pdf("Fig5C.pdf", width = 6, height = 6)
qgraph(test_net, layout = "spring", threshold = 0.7, labels = colnames(test_net), label.scale = F, label.cex = 0.7, theme = "colorblind", vsize = 3, color = "grey", borders = F)
dev.off()


#####scatter plots

cell_counts_rel <- mutate(cell_counts_rel, T_CD8_exhausted = T_CD8_1+T_CD8_2)

###plots stromal

ggplot(cell_counts_rel, aes(x = Myofibroblast1, y = Myofibroblast2)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.8)) +
  scale_y_continuous(limits = c(0,0.8)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$Myofibroblast1, cell_counts_rel$Myofibroblast2, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$Myofibroblast1, cell_counts_rel$Myofibroblast2, method = "spearman")$estimate))
ggsave2("Fig3E.pdf", path = "../results", width = 10, height = 9, units = "cm")


###plots immune

ggplot(cell_counts_rel, aes(x = CD14_Macrophages1, y = CD14_Macrophages2)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.6)) +
  scale_y_continuous(limits = c(0,0.8)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages1, cell_counts_rel$CD14_Macrophages2, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$CD14_Macrophages1, cell_counts_rel$CD14_Macrophages2, method = "spearman")$estimate))
ggsave2("Fig4D_Macrophages1_2.pdf", path = "../results", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = CD14_Macrophages2, y = Plasmacytoid_Dendritic)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.8)) +
  scale_y_continuous(limits = c(0,0.06)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$Plasmacytoid_Dendritic, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$Plasmacytoid_Dendritic, method = "spearman")$estimate))
ggsave2("Fig4D_Macrophages2_pDC.pdf", path = "../results", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = T_reg, y = T_CD8_exhausted)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.5)) +
  scale_y_continuous(limits = c(0,0.85)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$T_reg, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$T_reg, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_T_reg_exhausted.pdf",  width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = CD14_Macrophages2, y = T_CD8_exhausted)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.8)) +
  scale_y_continuous(limits = c(0,0.85)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_Macrophages_exhausted.pdf", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = Plasmacytoid_Dendritic, y = T_CD8_exhausted)) +
  geom_point(aes(color = patient_id), size = 4) +
  scale_x_continuous(limits = c(0,0.06)) +
  scale_y_continuous(limits = c(0,0.85)) +
  scale_color_manual(values = use_colors) +
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$Plasmacytoid_Dendritic, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "\nr = ",cor.test(cell_counts_rel$Plasmacytoid_Dendritic, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_pDC_exhausted.pdf",  width = 10, height = 9, units = "cm")

compare cell counts with IHC quantification

代码语言:javascript
复制
###compare cell counts with IHC quantification

CD123 <- read.table("Quantification_CD123.txt")
CD123_scrnaseq <- cell_counts_rel %>% select(Plasmacytoid_Dendritic)
CD123 <- full_join(CD123, CD123_scrnaseq, by = "patient_id")
CD123 <- pivot_longer(CD123, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CD123_average <- CD123 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, Plasmacytoid_Dendritic, mean, sd)) %>% unique()
ggplot() +
  geom_jitter(data = CD123, mapping = aes(x = Plasmacytoid_Dendritic, y = count, color = patient_id), size = 0.5, alpha = 0.3) +
  geom_smooth(data = CD123_average, mapping = aes(x = Plasmacytoid_Dendritic, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black") +
  geom_errorbar(data = CD123_average, aes(x = Plasmacytoid_Dendritic, ymin=mean-sd, ymax=mean+sd, color = patient_id), size = 0.5, width = 0.002) +
  geom_point(data = CD123_average, aes(x = Plasmacytoid_Dendritic, y=mean, color = patient_id), shape=15, size = 3) +
  scale_color_manual(values = use_colors) +
  ggtitle(paste0("p = ",cor.test(CD123_average$mean, CD123_average$Plasmacytoid_Dendritic, method = "pearson")$p.value, 
                 "\nr = ",cor.test(CD123_average$mean, CD123_average$Plasmacytoid_Dendritic, method = "pearson")$estimate))
ggsave2("Fig4E_pDC_CD123.pdf", path = "../results", width = 10, height = 9, units = "cm")

CTHRC1 <- read_excel("Quantification_CTHRC1.xlsx")
CTHRC1_scrnaseq <- cell_counts_rel %>% select(Myofibroblast2)
CTHRC1 <- full_join(CTHRC1, CTHRC1_scrnaseq, by = "patient_id")
CTHRC1 <- pivot_longer(CTHRC1, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CTHRC1_average <- CTHRC1 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, Myofibroblast2, mean, sd)) %>% unique()
ggplot() +
  geom_jitter(data = CTHRC1, mapping = aes(x = Myofibroblast2, y = count, color = patient_id), size = 0.5, alpha = 0.3) +
  geom_smooth(data = CTHRC1_average, mapping = aes(x = Myofibroblast2, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black") +
  geom_errorbar(data = CTHRC1_average, aes(x = Myofibroblast2, ymin=mean-sd, ymax=mean+sd, color = patient_id), size = 0.5) +
  geom_point(data = CTHRC1_average, aes(x = Myofibroblast2, y=mean, color = patient_id), shape=15, size = 3) +
  scale_color_manual(values = use_colors) +
  ggtitle(paste0("p = ",cor.test(CTHRC1_average$mean, CTHRC1_average$Myofibroblast2, method = "pearson")$p.value, 
                 "\nr = ",cor.test(CTHRC1_average$mean, CTHRC1_average$Myofibroblast2, method = "pearson")$estimate))
ggsave2("Fig3F_Myofibroblast2_CTHRC1.pdf", path = "../results", width = 10, height = 9, units = "cm")


CXCL9 <- read_excel("Quantification_CXCL9.xlsx")
CXCL9_scrnaseq <- cell_counts_rel %>% select(CD14_Macrophages2)
CXCL9 <- full_join(CXCL9, CXCL9_scrnaseq, by = "patient_id")
CXCL9 <- pivot_longer(CXCL9, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CXCL9_average <- CXCL9 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, CD14_Macrophages2, mean, sd)) %>% unique()
ggplot() +
  geom_jitter(data = CXCL9, mapping = aes(x = CD14_Macrophages2, y = count, color = patient_id), size = 0.5, alpha = 0.3, width = 0.02) +
  geom_smooth(data = CXCL9_average, mapping = aes(x = CD14_Macrophages2, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black") +
  geom_errorbar(data = CXCL9_average, aes(x = CD14_Macrophages2, ymin=mean-sd, ymax=mean+sd, color = patient_id), size = 0.5, width = 0.03) +
  geom_point(data = CXCL9_average, aes(x = CD14_Macrophages2, y=mean, color = patient_id), shape=15, size = 3) +
  scale_color_manual(values = use_colors) +
  ggtitle(paste0("p = ",cor.test(CXCL9_average$mean, CXCL9_average$CD14_Macrophages2, method = "pearson")$p.value, 
                 "\nr = ",cor.test(CXCL9_average$mean, CXCL9_average$CD14_Macrophages2, method = "pearson")$estimate))
ggsave2("Fig4E_CD14_Macrophages_2_CXCL9.pdf", path = "../results", width = 10, height = 9, units = "cm")

就感觉原代码可视化出来的这几个图好丑,应该是要改参数,但是我们的重点不是分析可以进行吗,我就没改,如果友友觉得需要可视化的很漂亮,可以自己改参数哦

总结

下面是做的TCGA的分析,但是我做的是植物的,因此我就没做后面的结果。

因此对上面的一系列的流程结果进行总结,可以发现是首先流程性的分析,然后开始对注释后的亚群进行了精细的分析,将不同的细胞类群叶放到脚本里面进行分析。其中有很多的比较好用的函数,比如绘制热图、计算相关性的函数,还有一些比较好用的R包,可以用来表格的数据的整理,因此需要整理一下里面的基本的函数,以及这些的基础,加强后期对R语言基础的学习。

本文系转载,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文系转载前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 细胞比例计算
  • 下面是开始以前的PCA分析。
  • 相关性图绘制
  • compare cell counts with IHC quantification
  • 总结
相关产品与服务
命令行工具
腾讯云命令行工具 TCCLI 是管理腾讯云资源的统一工具。使用腾讯云命令行工具,您可以快速调用腾讯云 API 来管理您的腾讯云资源。此外,您还可以基于腾讯云的命令行工具来做自动化和脚本处理,以更多样的方式进行组合和重用。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档