对于只有只有部分重叠的datasets
,合并方法我们依然可以采用Seurat
、Harmony
,rliger
包,本期介绍一下Harmony
包的用法。🤩
rm(list = ls())
library(Seurat)
library(SeuratDisk)
library(SeuratWrappers)
library(patchwork)
library(harmony)
library(rliger)
library(RColorBrewer)
library(tidyverse)
library(reshape2)
library(ggsci)
library(ggstatsplot)
这里我们提供1
个3’ PBMC dataset
和1
个whole blood dataset
。🥰
umi_gz <- gzfile("./GSE149938_umi_matrix.csv.gz",'rt')
umi <- read.csv(umi_gz,check.names = F,quote = "")
matrix_3p <- Read10X_h5("./3p_pbmc10k_filt.h5",use.names = T)
创建Seurat
对象。🐶
srat_wb <- CreateSeuratObject(t(umi),project = "whole_blood")
srat_3p <- CreateSeuratObject(matrix_3p,project = "pbmc10k_3p")
rm(umi_gz)
rm(umi)
rm(matrix_3p)
srat_wb
srat_3p
为了方便后续分析,这里我们对metadata
进行一下注释修改。
colnames(srat_wb@meta.data)[1] <- "cell_type"
srat_wb@meta.data$orig.ident <- "whole_blood"
srat_wb@meta.data$orig.ident <- as.factor(srat_wb@meta.data$orig.ident)
head(srat_wb[[]])
这里我们先用merge
将2个数据集简单合并在一起。(这里我们默认做过初步过滤了哈,具体的大家可以看一下上期的教学。)😘
wb_harmony <- merge(srat_3p,srat_wb)
我们在这里做一下Normalization
,寻找高变基因等等标准操作。👀
wb_harmony <- NormalizeData(wb_harmony, verbose = F)
wb_harmony <- FindVariableFeatures(wb_harmony, selection.method = "vst", nfeatures = 2000, verbose = F)
wb_harmony <- ScaleData(wb_harmony, verbose = F)
wb_harmony <- RunPCA(wb_harmony, npcs = 30, verbose = F)
wb_harmony <- RunUMAP(wb_harmony, reduction = "pca", dims = 1:30, verbose = F)
p1 <- DimPlot(object = wb_harmony, reduction = "pca",
pt.size = .1, group.by = "orig.ident") +
scale_color_npg()+
NoLegend()
p2 <- VlnPlot(object = wb_harmony, features = "PC_1",
group.by = "orig.ident", pt.size = .1) +
scale_color_npg()+
NoLegend()
p1+p2
DimPlot(wb_harmony,reduction = "umap",
group.by = "orig.ident") +
scale_color_npg()+
plot_annotation(title = "10k 3' PBMC and whole blood, before integration")
wb_harmony <- wb_harmony %>%
RunHarmony("orig.ident", plot_convergence = T)
harmony_embeddings <- Embeddings(wb_harmony, 'harmony')
harmony_embeddings[1:5, 1:5]
harmony合并后。
p1 <- DimPlot(object = wb_harmony, reduction = "harmony", pt.size = .1, group.by = "orig.ident") +
scale_color_npg()+
NoLegend()
p2 <- VlnPlot(object = wb_harmony, features = "harmony_1", group.by = "orig.ident", pt.size = .1) +
scale_fill_npg()+
NoLegend()
p1 +p2
harmony合并后。
wb_harmony <- SetIdent(wb_harmony,value = "orig.ident")
DimPlot(wb_harmony,reduction = "umap") +
scale_color_npg()+
plot_annotation(title = "10k 3' PBMC and whole blood, after integration (Harmony)")
wb_harmony <- wb_harmony %>%
RunUMAP(reduction = "harmony", dims = 1:30, verbose = F) %>%
FindNeighbors(reduction = "harmony", k.param = 10, dims = 1:30) %>%
FindClusters() %>%
identity()
wb_harmony <- SetIdent(wb_harmony,value = "seurat_clusters")
ncluster <- length(unique(wb_harmony[[]]$seurat_clusters))
mycol <- colorRampPalette(brewer.pal(8, "Set2"))(ncluster)
DimPlot(wb_harmony,label = T,
cols = mycol, repel = T) +
NoLegend()
我们看下各个clusters
在两个datasets
各有多少细胞。
count_table <- table(wb_harmony@meta.data$seurat_clusters, wb_harmony@meta.data$orig.ident)
count_table
#### 可视化
count_table %>%
as.data.frame() %>%
ggbarstats(x = Var2,
y = Var1,
counts = Freq)+
scale_fill_npg()
最后祝大家早日不卷!~