1、分子矩阵,gene X barcode,这是最开始大家拿到的矩阵 2、细胞矩阵,空间解卷积之后的矩阵,细胞 X Barcode 3、分子niche矩阵, 即分子生态位矩阵,主要研究分子微环境,包括邻域通讯等, gene X Barcode 4、细胞niche矩阵,即细胞生态位矩阵,主要用来研究细胞的空间排布,例如侵袭性的肿瘤细胞空间临近巨噬,所有细胞的空间排布形成了细胞niche矩阵.
import pandas as pd
import os
import CellNeighborEX
# Check the path of your root directory.
os.getcwd()
# Make a folder to save data files.
if not os.path.exists('Datasets'):
os.makedirs('Datasets')
# Download data files.
# (i) pre-processed expression data
!wget https://figshare.com/ndownloader/files/42334083 -O Datasets/SSliver_log_data.txt
!wget https://figshare.com/ndownloader/files/42334077 -O Datasets/SSliver_cell_id.txt
!wget https://figshare.com/ndownloader/files/42334080 -O Datasets/SSliver_gene_name.txt
# (ii) data of annotated cell types and spatial coordinates
!wget https://figshare.com/ndownloader/files/42333705 -O Datasets/SSliver_RCTD.csv
##Load data
# Set the path of data files regarding annotated cell types.
path = '/Users/kimh15/Downloads/Datasets/'
df_processed = pd.read_csv(path + 'SSliver_RCTD.csv', header=0)
# All categorzied files (index_, matchComb_, neiCombUnique_, prop_ .csv) are saved in the "categorized_data folder" in the root directory.
CellNeighborEX.categorization.generate_input_files(data_type = "NGS", df = df_processed, sample_size=30, min_sample_size=1)
# Set the path of the directory where all the categorized data files are saved.
path_categorization = '/Users/kimh15/Downloads/categorized_data/'
####Get log-normalized expression data
# Save the data into dataframes.
df_cell_id = pd.read_csv(path + "SSliver_cell_id.txt", delimiter="\t", header=None)
df_gene_name = pd.read_csv(path + "SSliver_gene_name.txt", delimiter="\t", header=None)
df_log_data = pd.read_csv(path + "SSliver_log_data.txt", delimiter="\t", header=None)
# Set argument values for CellNeighborEX.DEanalysis.analyze_data().
data_type = "NGS" # Image: image-based ST data, NGS: NGS-based ST data
lrCutoff = 0.4 # log ratio
pCutoff = 0.01 # p-value
pCutoff2 = 0.01 # false discovery rate
direction = 'up' # up: up-reguated genes, down: down-regulated genes
normality_test = False # True: depending on the result of the normality test, the statistical test is determined. If the data is normal, the parametric test is used. Otherwise, the non-parametric test is used.
# False: when sample size (number of cells/spots) is larger than 30, the parameteric test is used. Otherwise, the non-parametric test is used.
top_genes = 10 # Top 10 DEGs are annotated in the volcano plot.
# If save=True, all result files (DEG list: csv, heatmaps and volcano plots: pdf, gene expression values: txt) are saved in the "DE_results" folder in the root directory.
DEG_list = CellNeighborEX.DEanalysis.analyze_data(df_cell_id, df_gene_name, df_log_data, path_categorization, data_type, lrCutoff, pCutoff, pCutoff2, direction, normality_test, top_genes, save=True)
# Select a cell type and a DEG for spatial visualization and then load the data.
# For example, F13a1 is one of up-regulated genes identified from the heterotypic spots of TumorIII+Monocyte.
path_selected = '/Users/kimh15/Downloads/DE_results/TumorIII+Monocyte/'
column_names = ['barcode', 'logdata', 'zscore']
heterotypic = pd.read_csv(path_selected + "TumorIII+Monocyte_F13a1.txt", delimiter=",", names = column_names)
homotypic1 = pd.read_csv(path_selected + "TumorIII+TumorIII_F13a1.txt", delimiter=",", names = column_names)
homotypic2 = pd.read_csv(path_selected + "Monocyte+Monocyte_F13a1.txt", delimiter=",", names = column_names)
heterotypic['type'] = 'TumorIII+Monocyte'
homotypic1['type'] = 'TumorIII+TumorIII'
homotypic2['type'] = 'Monocyte+Monocyte'
df_exp = pd.concat([heterotypic, homotypic1, homotypic2])
# Set parameter values.
df_bg, df_red, df_blue, df_black = CellNeighborEX.visualization.set_parameters(df_processed, df_exp, beadsize_bg=10, edgecolor_bg=(0.85,0.85,0.85), beadcolor_bg=(0.85,0.85,0.85), beadsize_red=600, beadsize_blue=200, beadsize_black=200, type_red='TumorIII+Monocyte', type_blue='TumorIII+TumorIII', type_black='Monocyte+Monocyte')
# Get the spatial map.
# zorder_red, zorder_blue, and zorder_black are parameters that determine the drawing order in the spatial map.
# If save=True, the spatial map (F13a1.pdf) is saved in the "spatialMap" folder in the root directory.
CellNeighborEX.visualization.get_spatialPlot(df_bg, df_red, df_blue, df_black, label_red='TumorIII+Monocyte', label_blue='TumorIII', label_black='Monocyte', label_gene='F13a1', zorder_red=3.0, zorder_blue=2.0, zorder_black=4.0, figsize=(28,28), save=True)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。