前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >单细胞数据复现-肺癌文章代码复现4

单细胞数据复现-肺癌文章代码复现4

原创
作者头像
小胡子刺猬的生信学习123
发布2022-05-18 20:31:12
9010
发布2022-05-18 20:31:12
举报

单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648

单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619

单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043

前面是主要对epi细胞进行的基本的分析。

选取癌症的样本进行分析

代码语言:txt
复制
##subset抓取部分的数据集的内容
epi_tumor <- subset(subset(epi_anno, subset = cluster_type == "Tumor"), subset = tissue_type == "Tumor")
epi_tumor <- ScaleData(epi_tumor)

###tumor cell marker genes

Idents(epi_tumor) <- epi_tumor@meta.data$patient_id
markers <- FindAllMarkers(epi_tumor, only.pos = T, min.pct = 0.25, min.diff.pct = 0.25)
##摘取top基因,更改wt = avg_logFC,先查看markers gene的参数
top_TC_markers <- markers %>% group_by(cluster) %>% top_n(10, wt = avg_log2FC)
##这个颜色相对于比seurat原包的颜色较为好看
DoHeatmap(epi_tumor, features = top_TC_markers$gene, group.by = "patient_id", draw.lines = F, group.colors = use_colors) +
  scale_fill_viridis()
ggsave2("HeatMap_Tumor.pdf", path = "output/fig2", width = 30, height = 30, units = "cm")
ggsave2("Fig2C.png", path = "../results", width = 30, height = 30, units = "cm")

分析有丝分裂的发生基因

代码语言:txt
复制
mitotic_activity <- FetchData(epi_tumor, c("tissue_type", "cell_type_epi", "Phase", "sample_id")) %>%
  mutate(sample_id = factor(sample_id, levels = c("p034t", "p033t", "p032t", "p031t", "p030t", "p027t", "p024t", "p023t", "p019t", "p018t")))

ggplot(mitotic_activity, aes(x = sample_id, fill = Phase)) +
  geom_bar(position = "fill", width = 0.75) +
  scale_fill_manual(values = use_colors) +
  coord_flip()
ggsave2("SuppFiqg3D.pdf", path = "../results", width = 12, height = 10, units = "cm")

progeny pathway scores

代码语言:txt
复制
#clustered heatmap progeny scores
##gather做数据清洗的

progeny_scores <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores$cell_id <- rownames(progeny_scores)
progeny_scores <- gather(progeny_scores, Pathway, Activity, -cell_id)

cells_clusters <- FetchData(epi_tumor, c("sample_id", "cluster_type")) %>% filter(str_detect(sample_id, "t"))
cells_clusters$cell_id <- rownames(cells_clusters)

##full_join并集;inner_join交集
progeny_scores <- inner_join(progeny_scores, cells_clusters)

summarized_progeny_scores <- progeny_scores %>% 
  group_by(Pathway, sample_id) %>% 
  summarise(avg = mean(Activity), std = sd(Activity)) %>%
  pivot_wider(id_cols = Pathway, names_from = sample_id, values_from = avg) %>%
  column_to_rownames("Pathway") %>%
  as.matrix()
##这里画图的时候我发现还是ggsave相对好一些,有的时候捕捉到不到图片
pdf("../results/Fig2E.pdf", width = 6, height = 8)
heatmap.2(summarized_progeny_scores, trace = "none", density.info = "none", col = bluered(100))
dev.off()

correlation mutational status ~ progeny scores

代码语言:javascript
复制
summarized_progeny_scores_mutations <- summarized_progeny_scores %>%
  t() %>%
  as.data.frame()

##mutate()R语言中的函数用于在DATAframe中添加新变量
summarized_progeny_scores_mutations$Sample <- rownames(summarized_progeny_scores_mutations)
summarized_progeny_scores_mutations <- summarized_progeny_scores_mutations %>%
  mutate(KRAS_status = ifelse(Sample %in% c("p018t", "p023t", "p030t", "p031t", "p032t", "p033t"), "mutated", "wildtype")) %>%
  mutate(TP53_status = ifelse(Sample %in% c("p023t", "p027t"), "mutated", "wildtype")) %>%
  mutate(PIK3CA_status = ifelse(Sample %in% c("p031t"), "mutated", "wildtype")) %>%
  mutate(KRAS_status = factor(TP53_status, levels = c("wildtype", "mutated"))) %>%
  mutate(TP53_status = factor(TP53_status, levels = c("wildtype", "mutated")))

ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = MAPK)) +
  geom_boxplot() +
  ggtitle(paste0(t.test(formula = MAPK~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~MAPK.pdf", path = "../results", width = 8, height = 8, units = "cm")

ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = EGFR)) +
  geom_boxplot() +
  ggtitle(paste0(t.test(formula = EGFR~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~EGFR.pdf", path = "../results", width = 8, height = 8, units = "cm")

ggplot(summarized_progeny_scores_mutations, aes(x = TP53_status, y = p53)) +
  geom_boxplot() +
  ggtitle(paste0(t.test(formula = p53~TP53_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_p53~TP53.pdf", path = "../results", width = 8, height = 8, units = "cm")

rerun PCA

代码语言:javascript
复制
// epi_pca <- epi_tumor

epi_pca <- RunPCA(epi_pca)

DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(1,2))
DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(3,4))

DimPlot(epi_pca, reduction = "pca", group.by = "histo_subtype")

DimHeatmap(epi_pca, dims = 1, cells = 1000, balanced = T, fast = F, nfeatures = 60) +
  scale_fill_viridis()
#ggsave2("DimHeatmap_epitumor_PC1.pdf", path = "output/fig2", width = 10, height = 20, units = "cm")
ggsave2("SuppFig3F.png", path = "../results", width = 10, height = 20, units = "cm")


### low-dimensional UMAPs

DimPlot(epi_pca, group.by = "patient_id")
DimPlot(epi_pca, group.by = "histo_subtype")

##可以用于后续的服务器R文件的保存
for (i in c(4, 6, 8)){
  umaptest <- RunUMAP(epi_pca, dims = 1:i, verbose = F)
  print(DimPlot(umaptest, reduction = "umap", group.by = "patient_id", cols = use_colors) + labs(title = paste0(i, " dimensions")) + coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_patient.png"), path = "../results", width = 15, height = 10, units = "cm")
  print(DimPlot(umaptest, reduction = "umap", group.by = "histo_subtype", cols = use_colors) + labs(title = paste0(i, " dimensions")) + coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_histo.png"), path = "../results", width = 15, height = 10, units = "cm")
  print(FeaturePlot(umaptest, features = "PC_1", order = T, cols = viridis(10)) + labs(title = paste0(i, " dimensions")) + coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_PC1.png"), path = "../results", width = 15, height = 10, units = "cm")
  remove(umaptest)
}


### bin cells along differentation gradients

# tumor cell signatures "alveolar/club-like" = tumor_signature, "undifferentiated" = anti_tumor_signature

epi_pca <- AddModuleScore(epi_pca, features = list(c("SCGB3A1", "PIGR", "NAPSA", "C4BPA", "SCGB3A2", "HLA-DRA", "CD74", "ADGRF5", "C16orf89", "FOLR1", "SELENBP1", "HLA-DRB1", "ID4", "MGP", "AQP3", "CA2", "LHX9", "HLA-DPB1", "FMO5", "GKN2", "C5", "MUC1", "NPC2", "RNASE1", "PIK3C2G", "SFTA2", "SLC34A2", "HLA-DPA1", "FGFR3", "PGC")), name = "tumor_signature")
epi_pca <- AddModuleScore(epi_pca, features = list(c("DSG2", "CAMK2N1", "FAM3C", "KRT7", "IFI27", "SLC2A1", "MARCKS", "PLAU", "AHNAK2", "PERP", "S100A4", "KRT19", "COL6A1", "UACA", "COL17A1", "CDA", "TPM2", "S100A16", "KRT8", "PRSS23", "DST", "LAMC2", "S100P", "PRSS3", "LAMA3", "DSP", "ITGA3", "MDK", "FAM83A", "ITGB4")), name = "anti_tumor_signature")


epi_tumor_data <- FetchData(epi_pca, c(colnames(epi_pca@meta.data), paste0("PC_", 1:10)))

progeny_scores_data <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores_data$cell_id <- rownames(progeny_scores_data)

epi_tumor_data <- full_join(epi_tumor_data, progeny_scores_data, by = "cell_id")


##rownames:接受参数(向量),类矩阵的对象
##cut_number:切割函数
progeny_names <- rownames(epi_pca@assays$progeny)

epi_tumor_data$bin <- cut_number(epi_tumor_data$PC_1, n = 10, labels = c(1:10))

ggplot(epi_tumor_data, mapping = aes(x = bin)) +
  geom_bar()

# patients along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(patient_id, levels = c("p032", "p018", "p019", "p024", "p031", "p030", "p033", "p023", "p027", "p034")))) +
  geom_bar() +
  theme(legend.title = element_blank()) +
  scale_fill_manual(values = use_colors)
ggsave2("SuppFig3G.pdf", path = "../results", width = 30, height = 30, units = "cm")

# histological subtypes along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(histo_subtype, levels = c("lepidic", "acinar", "mucinuous (papillary)", "(micro)papillary", "solid", "sarcomatoid")))) +
  geom_bar() +
  theme(legend.title = element_blank()) +
  scale_fill_manual(values = use_colors)
ggsave2("Fig2F.pdf", path = "../results", width = 30, height = 30, units = "cm")

# cell cycle phase along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = Phase)) +
  geom_bar() +
  scale_fill_manual(values = use_colors)
ggsave2("SuppFig3H.pdf", path = "../results", width = 30, height = 30, units = "cm")

# normal cell type signatures along PC1
##猖狂数据互换函数pivot_longer
epi_tumor_data %>%
  select(c(bin, VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10)) %>%
  pivot_longer(cols = c(VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10), names_to = "cell_type") %>%
  group_by(bin, cell_type) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = cell_type)) +
  geom_line(size = 1) +
  scale_color_brewer(type = "qual", palette = "Paired") +
  scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2G.pdf", path = "../results", width = 30, height = 30, units = "cm")

# progeny pathway scores along PC1

getcolors <- colorRampPalette(brewer.pal(14, "Dark2"))

epi_tumor_data %>%
  select(bin, progeny_names) %>%
  pivot_longer(cols = progeny_names, names_to = "progeny") %>%
  group_by(bin, progeny) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = progeny)) +
  geom_line(size = 1) +
  scale_color_manual(values = getcolors(14)) +
  scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2H.pdf", path = "../results", width = 30, height = 30, units = "cm")

# tumor cell signatures along PC1

epi_tumor_data %>%
  select(bin, tumor_signature1, anti_tumor_signature1) %>%
  pivot_longer(cols = c(tumor_signature1, anti_tumor_signature1), names_to = "genes") %>%
  group_by(bin, genes) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = genes)) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width = 0.3) +
  scale_x_continuous(breaks = c(1:20))
ggsave2("SuppFig3I.pdf", path = "../results", width = 30, height = 30, units = "cm")

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 选取癌症的样本进行分析
  • 分析有丝分裂的发生基因
  • progeny pathway scores
  • correlation mutational status ~ progeny scores
  • rerun PCA
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档