library (Seurat)
library (dplyr)
library (ggplot2)
library (harmony)
library (patchwork)
library (SoupX)
library (DoubletFinder)
library(reticulate)
library(sceasy)
library(anndata)
library(patchwork)
library(SeuratDisk)
#================================Ambient RNA Correction===============================#
Control1 = load10X('path/to/your/cellranger/outs/folder')
Control1 = autoEstCont(Control1)
Control1 = adjustCounts(Control1)
#Do it for all samples
#================================Doublet Removal===============================#
Control1.S <- CreateSeuratObject(counts = Control1, project = "Control1", min.cells = 3, min.features = 300) # create Seurat object
Control1.S <- NormalizeData(Control1.S)
Control1.S <- FindVariableFeatures(Control1.S, selection.method = "vst", nfeatures = 2000)
Control1.S <- ScaleData(Control1.S)
Control1.S <- RunPCA(Control1.S)
Control1.S <- RunUMAP(Control1.S, dims = 1:50)
sweep.res.list_kidney <- paramSweep_v3(Control1.S, PCs = 1:30, sct = FALSE)
sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE)
bcmvn_kidney <- find.pK(sweep.stats_kidney) #determine the pk
nExp_poi <- round(0.075*nrow(Control1.S@meta.data))
Control1.S <- doubletFinder_v3(seu_kidney, PCs = 1:30, pN = 0.25, pK = #depends on the previous sterp, nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
Control1.S.Filtered <- subset(Control1.S, subset = #Doublet_related_column == "singlet")
#Do it for all samples
#============================Merge Datssets======================================================#
HK_SC_RNA = merge (Control1.S.Filtered, y = c (#All other sc filtered objects))
#============================Add percent.mt to dataset======================================================#
HK_SC_RNA[["percent.mt"]] <- PercentageFeatureSet(HK_SC_RNA, pattern = "^MT-")
#============================Pre Processing======================================================#
HK_SC_RNA <- subset(HK_SC_RNA, subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 50 & percent.rpl < 15 & percent.rps < 15)
HK_SC_RNA <- FindVariableFeatures(object = HK_SC_RNA,selection.method = "vst")
HK_SC_RNA <- NormalizeData(HK_SC_RNA)
HK_SC_RNA <- ScaleData(HK_SC_RNA, vars.to.regress = c ("nCount_RNA", "percent.mt", "percent.rpl", "percent.rps"))
HK_SC_RNA <- RunPCA(HK_SC_RNA, assay = 'RNA', npcs = 30, features = VariableFeatures(object = HK_SC_RNA),
verbose = TRUE, ndims.print = 1:5, nfeatures.print = 10)
#============= Run Harmony =============
HK_SC_RNA <- RunHarmony(HK_SC_RNA, group.by.vars = "orig.ident")
HK_SC_RNA <- RunUMAP(HK_SC_RNA, reduction = "harmony", dims = 1:30)
HK_SC_RNA <- FindNeighbors(HK_SC_RNA, reduction = "harmony", dims = 1:30) %>% FindClusters()
DimPlot(HK_SC_RNA, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
#================================== DEG List and cell type annotation==============================#
HK_SC_RNA.Markers <- FindAllMarkers(HK_SC_RNA, only.pos = TRUE, logfc.threshold = 0.25)
#================================== Annotation and store data as Idents==============================#
HK_SC_RNA <- RenameIdents(HK_SC_RNA, `0` = "PT")
#==================================Convert to adata using sceasy ==============================#
HK_SC_RNA_anndata <- convertFormat(HK_SC_RNA, from="seurat", to="anndata", main_layer="counts", drop_single_values=FALSE)
write_h5ad(HK_SC_RNA_anndata, filename = "HK_SC_RNA_anndata.h5ad")
library (Signac)
library(Seurat)
library (dplyr)
library(ggplot2)
library (harmony)
library (patchwork)
library (EnsDb.Hsapiens.v86)
library(rtracklayer)
library(reticulate)
library(sceasy)
library(anndata)
library(patchwork)
library(SeuratDisk)
#--------------------------Prepare Object---------------------------------#
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
genome(annotation) <- "hg38"
seqlevelsStyle(annotation) <- "UCSC"
counts <- Read10X_h5(filename = "/Your_Path/filtered_peak_bc_matrix.h5")
metadata <- read.csv(file = "/Your_Path/singlecell.csv", header = TRUE,row.names = 1)
fragpath <- "/Your_Path/outs/fragments.tsv.gz"
chromatin.assay <- CreateChromatinAssay(counts = counts, sep = c(":", "-"),fragments = fragpath,annotation = annotation)
Control1.S <- CreateSeuratObject(counts = chromatin.assay,assay = "peaks",meta.data = metadata)
#Annotation
hg38 <- import("/Your_Path_To_GTF_File/genes.gtf.gz")
genome(hg38) <- "hg38"
seqlevelsStyle(hg38) <- "UCSC"
hg38$gene_biotype <- hg38$gene_type
Annotation(Control1.S) <- hg38
#QC Filtering
Control1.S <- NucleosomeSignal(object = Control1.S)
Control1.S <- TSSEnrichment(Control1.S, fast = FALSE)
Control1.S$pct_reads_in_peaks <- Control1.S$peak_region_fragments / Control1.S$passed_filters * 100
Control1.S$blacklist_ratio <- Control1.S$blacklist_region_fragments / Control1.S$peak_region_fragments
Control1.S$high.tss <- ifelse(Control1.S$TSS.enrichment > 2, 'High', 'Low')
pdf("Viloin.HK.Fetal.Mito.pdf", width=6, height=4)
TSSPlot(Control1.S, group.by = 'high.tss') + NoLegend()
Control1.S$nucleosome_group <- ifelse(Control1.S$nucleosome_signal > 4, 'NS > 4', 'NS < 4')
FragmentHistogram(object = Control1.S, group.by = 'nucleosome_group')
pdf("HK2481_1.ATAC.QC.pdf", width=18, height=4)
VlnPlot(object = Control1.S, features = c('pct_reads_in_peaks', 'peak_region_fragments', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'), pt.size = 0.1, ncol = 5)
Control1.S <- subset(x = Control1.S, subset = peak_region_fragments > 3000 & peak_region_fragments < 20000 & pct_reads_in_peaks > 15 & blacklist_ratio < 0.05 & nucleosome_signal < 4 & TSS.enrichment > 2)
#Do this step for all 20 samples
#--------------------------Merge Datasets---------------------------------#
HK.SN.ATAC = merge (x = Control1.S, y = list(#list of all other objects))
#--------------------------Pre-processing---------------------------------#
HK.SN.ATAC <- RunTFIDF(HK.SN.ATAC)
HK.SN.ATAC <- FindTopFeatures(HK.SN.ATAC, min.cutoff = 'q0')
HK.SN.ATAC <- RunSVD(HK.SN.ATAC)
HK.SN.ATAC <- RunHarmony(
object = HK.SN.ATAC,
group.by.vars = 'dataset',
reduction = 'lsi',
assay.use = 'peaks',
project.dim = FALSE
)
HK.SN.ATAC <- RunUMAP(HK.SN.ATAC, dims = 2:50, reduction = 'harmony')
HK.SN.ATAC <- RunUMAP(HK.SN.ATAC, reduction = "harmony", dims = 2:50)
HK.SN.ATAC <- FindNeighbors(HK.SN.ATAC, reduction = "harmony", dims = 2:50) %>% FindClusters()
#--------------------------DAR---------------------------------#
DARs <- FindAllMarkers(object = HK.SN.ATAC, min.pct = 0.05, test.use = 'LR', latent.vars = 'peak_region_fragments')
HK_SN_ATAC_anndata <- convertFormat(HK_SN_ATAC_Filtered, from="seurat", to="anndata", main_layer="counts", drop_single_values=FALSE)
write_h5ad(HK_SN_ATAC_anndata, filename = "HK_SN_ATAC_anndata.h5ad")
library (dplyr)
library (ggplot2)
library(SeuratData)
library (harmony)
library (patchwork)
library (Seurat)
options(stringsAsFactors = F)
library(CellTrek)
library(Seurat)
library(viridis)
library(ConsensusClusterPlus)
library(SpotClean)
library(S4Vectors)
library (STUtility)
set.seed(123)
# Load 10x Visium data
Control1.ST_raw <- read10xRaw("/path/to/matrix/folder")
Control1.ST_slide_info <- read10xSlide("/path/to/tissue/csv",
"/path/to/tissue/image",
"/path/to/scale/factor")
# Visualize raw data
Control1.ST_obj <- createSlide(count_mat = Control1.ST_raw,
slide_info = Control1.ST_slide_info)
visualizeSlide(slide_obj = Control1.ST_obj)
visualizeHeatmap(Control1.ST_obj,rownames(Control1.ST_raw)[1])
# Decontaminate raw data
Control1.ST.decont_obj <- spotclean(Control1.ST_obj)
# Repeat this step for all 14 ST samples
#-----------------------------Preprocessing------------------------------#
Control1.ST <- SCTransform(Control1.ST_obj, assay = "Spatial", verbose = FALSE)
#Do it for all samples
#-----------------------------Merge All SCT spatial samples------------------------------#
HK.ST = merge (Control1.ST, y =c (#All the ST objects))
#-----------------------------Merge All SCT spatial samples------------------------------#
HK.ST <- RunPCA(HK.ST, assay = "SCT", verbose = FALSE)
HK.ST <- RunHarmony(HK.ST, group.by.vars = "orig.ident")
HK.ST <- RunUMAP(HK.ST, reduction = "harmony", dims = 1:20)
HK.ST <- FindNeighbors(HK.ST, reduction = "harmony", dims = 1:20) %>% FindClusters()
#-----------------------------Find DEGs between Clusters------------------------------#
HK.ST <- FindAllMarkers(HK.ST, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#-----------------------------Run NMF for microenvironments------------------------------#
RunNMF(
HK.ST,
features = NULL,
nfactors = 20,
rescale = TRUE,
reduction.name = "NMF")
HK.ST <- RunHarmony(HK.ST, group.by.vars = "orig.ident", reduction = "NMF")
HK.ST <- RunUMAP(HK.ST, reduction = "harmony", dims = 1:20)
HK.ST <- FindNeighbors(HK.ST, reduction = "harmony", dims = 1:20) %>% FindClusters()
#-----------------------------Find DEGs between MEs------------------------------#
HK.ST <- FindAllMarkers(HK.ST, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#-----------------------------------Mapping Back snRNA-seq, scRNA-seq, and snATAC-seq to Spatial dataset----------------------#
HK.SC.SN.ATAC <- HK.SC.SN.ATAC[, sample(colnames(HK.SN), size =20000, replace=F)] #Downsample the annotated snRNA-seq matrix
Control1.ST = Load10X_Spatial("/Your_10X-Folder/outs", filename = "filtered_feature_bc_matrix.h5", assay = "Spatial", slice = "slice1")
Control1.ST <- RenameCells(Control1.ST, new.names=make.names(Cells(Control1.ST)))
HK.SC.SN.ATAC <- RenameCells(HK.SC.SN.ATAC, new.names=make.names(Cells(HK.SC.SN.ATAC)))
Coltrol1.ST.Samples_traint <- CellTrek::traint(st_data=Control1.ST, sc_data=HK.SC.SN.ATAC, sc_assay='RNA', cell_names='seurat_clusters')
Control1.ST.HK.SN_celltrek <- CellTrek::celltrek(st_sc_int=Coltrol1.ST.Samples_traint, int_assay='traint', sc_data=HK.SC.SN.ATAC, sc_assay = 'RNA', reduction='pca', intp=T, intp_pnt=5000, intp_lin=F, nPCs=30, ntree=1000, dist_thresh=0.55, top_spot=5, spot_n=5, repel_r=20, repel_iter=20, keep_model=T)$celltrek
Control1.ST.HK.SN_celltrek$seurat_clusters <- factor(Control1.ST.HK.SN_celltrek$seurat_clusters, levels=sort(Control1.ST.HK.SN_celltrek$seurat_clusters)))
Idents = Control1.ST.HK.SN_celltrek[["Idents2"]]
Idents (Control1.ST.HK.SN_celltrek) = Idents
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import os
import gc
import cell2location
import matplotlib as mpl
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns
# silence scanpy that prints a lot of warnings
import warnings
warnings.filterwarnings('ignore')
from google.colab import drive
drive.mount('/content/drive')
import cell2location
results_folder = '/content/drive/MyDrive/Spatial/HK2529_ST'
ref_run_name = f'{results_folder}/reference_signatures'
run_name = f'{results_folder}/cell2location_map'
adata = sc.read_visium(path ='/content/drive/MyDrive/HK2529_ST/', count_file= "filtered_feature_bc_matrix.h5", load_images=True)
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)
sns.distplot(adata.obs["total_counts"], kde=False)
#sc.pp.filter_cells(adata, min_counts=5000)
#sc.pp.filter_cells(adata, max_counts=35000)
#adata = adata[adata.obs["pct_counts_mt"] < 50]
print(f"#cells after MT filter: {adata.n_obs}")
sc.pp.filter_genes(adata, min_cells=10)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.tl.leiden(adata, key_added="clusters")
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.tl.leiden(adata, key_added="clusters")
sc.pl.spatial(adata,color=["COL1A1", "NPHS1"], img_key=None, size=1,vmin=0, cmap='magma', vmax='p99.0', gene_symbols='SYMBOL')
adata_ref = sc.read ("/content/drive/MyDrive/adata_ref_withmodel_4.3.2023.h5ad")
# export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_ref.varm.keys():
inf_aver = adata_ref.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}'
for i in adata_ref.uns['mod']['factor_names']]].copy()
else:
inf_aver = adata_ref.var[[f'means_per_cluster_mu_fg_{i}'
for i in adata_ref.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_ref.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]
intersect = np.intersect1d(adata.var_names, inf_aver.index)
adata = adata[:, intersect].copy()
inf_aver = inf_aver.loc[intersect, :].copy()
# prepare anndata for cell2location model
cell2location.models.Cell2location.setup_anndata(adata=adata)
# can add additional slides to adata and add in a batch_key
mod = cell2location.models.Cell2location(
adata, cell_state_df=inf_aver,
# the expected average cell abundance: tissue-dependent
# hyper-prior which can be estimated from paired histology:
N_cells_per_location=30,
# hyperparameter controlling normalisation of
# within-experiment variation in RNA detection:
detection_alpha=20
)
mod.view_anndata_setup()
mod.train(max_epochs=30000,
# train using full data (batch_size=None)
batch_size=None,
# use all data points in training because
# we need to estimate cell abundance at all locations
train_size=1,
use_gpu=True)
# plot ELBO loss history during training, removing first 100 epochs from the plot
mod.plot_history(1000)
plt.legend(labels=['full data training']);
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata = mod.export_posterior(
adata, sample_kwargs={'num_samples': 1000, 'batch_size': mod.adata.n_obs, 'use_gpu': True}
)
adata.obs[adata.uns['mod']['factor_names']] = adata.obsm['q05_cell_abundance_w_sf']
# Save model
mod.save(f"{run_name}", overwrite=True)
# mod = cell2location.models.Cell2location.load(f"{run_name}", adata)
# Save anndata object with results
adata_file = f"{run_name}/sp.h5ad"
adata.write(adata_file)
adata_file
# select one slide
from cell2location.utils import select_slide
sc.pl.spatial(adata, cmap='magma',
color=['Podo',"Endo_G","PEC", "Mes", "iPT", "GS_Stromal", "PT_S1", "PT_S2", "PT_S3", "Fibroblast_1", "Fibroblast_2", "DCT1" , "M_TAL", "DTL",
"CD4T", "CD8T", "Mac", "Dedifferentiated_Tubule"],
ncols=4, size=1.3,
img_key='hires',
# limit color scale at 99.2% quantile of cell abundance
vmin=0, vmax='p99.2'
)
sc.pp.neighbors(adata, use_rep='q05_cell_abundance_w_sf',
n_neighbors = 15)
sc.tl.leiden(adata, resolution=1.1)
adata.obs["region_cluster"] = adata.obs["leiden"].astype("category")
sc.tl.umap(adata, min_dist = 0.3, spread = 1)
sc.pl.spatial(adata, color=['region_cluster'],
size=1.3, img_key='hires', alpha=0.5)
# Set the value of the "sample" column for all rows to "HK3513_ST"
adata.obs['sample'] = "HK2529_ST"
# Print the updated "obs" attribute
print(adata.obs)
# Now we use cell2location plotter that allows showing multiple cell types in one panel
from cell2location.plt import plot_spatial
# select up to 6 clusters
clust_labels = [ 'Podo', 'PEC', "GS_Stromal", "Fibroblast_1"]
clust_col = ['' + str(i) for i in clust_labels] # in case column names differ from labels
slide = select_slide(adata, 'HK2529_ST')
with mpl.rc_context({'figure.figsize': (15, 15)}):
fig = plot_spatial(
adata=slide,
# labels to show on a plot
color=clust_col, labels=clust_labels,
show_img=True,
# 'fast' (white background) or 'dark_background'
style='fast',
# limit color scale at 99.2% quantile of cell abundance
max_color_quantile=0.992,
# size of locations (adjust depending on figure size)
circle_diameter=12,
colorbar_position='right'
)
from cell2location import run_colocation
res_dict, adata = run_colocation(
adata,
model_name='CoLocatedGroupsSklearnNMF',
train_args={'n_fact': np.arange(5, 25), 'n_restarts': 3 })
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。