脚本更新----空间转录组社区网络

原创

追风少年i

发布于 2025-01-25 11:11:10

5700

代码可运行

运行总次数：0

代码可运行

作者，Evil Genius

雪

大雪

好大的雪

太原下了好大的雪

2025年的第一场雪

今日目标，社区网络，大家要活学活用，高低精度都有，大家如果学的仔细的话，应该会联想到之前分享的hotspot。

首先我们来低精度

# Import necessary libraries
import muspan as ms
import numpy as np

# Imports to make our plots look nice
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

# Set plot parameters
mpl.rcParams['figure.dpi'] = 270
sns.set_theme(style='white', font_scale=2)

# Additional imports to extract the data from the relevant files
import json
import tarfile
import pandas as pd

# Define the path to the data directory
path_to_the_data = 'path/where/you/saved/the/data/Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)'

# Define the number of topics to import from the deconvolution data
number_of_topics = 16

# Define the paths to the relevant data files within the dataset directory
spatial_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_spatial.tar.gz'
cluster_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_analysis.tar.gz'
deconv_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_deconvolution.tar.gz'

# Open the spatial data tar file
spatial_tar = tarfile.open(spatial_data_file, "r")
spatial_tar_folder_name = spatial_tar.getnames()[0]

# Extract the scalefactors_json.json file
spatial_meta_name = spatial_tar_folder_name + '/scalefactors_json.json'
spatial_meta = spatial_tar.extractfile(spatial_meta_name)
spatial_meta_dict = spatial_meta.read()
spot_centre_scalefactors = json.loads(spatial_meta_dict)

# Get the scale factor and spot diameter from the scalefactors_json.json file
px_to_dist_scale_factor = spot_centre_scalefactors['regist_target_img_scalef']
spot_diameter = spot_centre_scalefactors['spot_diameter_fullres']

# Get the locations and metadata of the spots
spatial_positions_name = spatial_tar_folder_name + '/tissue_positions.csv'
spatial_positions_file = spatial_tar.extractfile(spatial_positions_name)
spatial_positions = pd.read_csv(spatial_positions_file)

# Open the cluster data tar file
cluster_tar = tarfile.open(cluster_data_file, "r")
cluster_tar_folder_name = cluster_tar.getnames()[0]

# Extract the clustering data
clustering_name = cluster_tar_folder_name + '/clustering/gene_expression_graphclust/clusters.csv'
clustering_file = cluster_tar.extractfile(clustering_name)
clustering_csv = pd.read_csv(clustering_file)
clustering_csv['Cluster'] = pd.to_numeric(clustering_csv['Cluster'])

# Open the deconvolution data tar file
deconv_tar = tarfile.open(deconv_data_file, "r")
deconv_folders = deconv_tar.getnames()
deconv_folders_name = deconv_folders[0]

# Extract the deconvolution data for the specified number of topics
this_deconv_name = deconv_folders_name + '/deconvolution_k' + str(number_of_topics) + '/deconvolved_spots_k' + str(number_of_topics) + '.csv'
if this_deconv_name in deconv_folders:
    deconv_file = deconv_tar.extractfile(this_deconv_name)
    deconv_csv = pd.read_csv(deconv_file)

    # Convert the deconvolution data to numeric values
    for col_name in deconv_csv.columns.values.tolist():
        if col_name != 'Barcode':
            deconv_csv[col_name] = pd.to_numeric(deconv_csv[col_name])
else:
    print(f'Error: Number of topics is not stored in Visium experiment data. No deconvolution data has been added to the domain.')

# Create a mask for spots that are in the tissue
spot_mask = np.where(np.array(spatial_positions.in_tissue) > 0)[0]

# Filter spots that are in the tissue and convert their positions to the correct scale
spot_centres = px_to_dist_scale_factor * np.array([spatial_positions.pxl_col_in_fullres, -spatial_positions.pxl_row_in_fullres]).T
spot_centres = spot_centres[spot_mask, :]

# Filter barcodes that are in the tissue
barcode_id = spatial_positions.barcode[spot_mask]

# Initialize lists and dictionary to store cluster IDs and deconvolution data
cluster_id = []
deconv = {}

# Setting up the deconvolution dictionary with empty lists for each topic
for col_name in deconv_csv.columns.values.tolist():
    if col_name != 'Barcode':
        deconv[col_name] = []

# For each barcode ID, find the associated cluster and deconvolution data
for b_id in barcode_id:
    # Find the index of the current barcode in the clustering data
    cluster_index = np.where(clustering_csv['Barcode'] == b_id)[0][0]
    # Append the corresponding cluster ID to the cluster_id list
    cluster_id.append(clustering_csv['Cluster'][cluster_index])

    # Find the index of the current barcode in the deconvolution data
    deconv_index = np.where(deconv_csv['Barcode'] == b_id)[0][0]
    # Append the corresponding deconvolution data to the deconv dictionary
    for col_name in deconv_csv.columns.values.tolist():
        if col_name != 'Barcode':
            deconv[col_name].append(deconv_csv[col_name][deconv_index])

# Create a list of spot diameters to use as labels
spot_diameter_labels = [spot_diameter] * len(spot_centres)

# Convert barcode_id to a list
barcode_id = barcode_id.tolist()

# Create a MuSpAn domain for the Visium dataset
vis_domain = ms.domain('Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)')

# Add the spot positions to the domain as point objects
vis_domain.add_points(spot_centres, 'Spots')

# Add the barcode labels to the domain
vis_domain.add_labels('Barcode', barcode_id, 'Spots')

# Add the spot cluster labels to the domain
vis_domain.add_labels('Spot cluster', cluster_id, 'Spots')

# Add the spot diameter labels to the domain as continuous labels
vis_domain.add_labels('Spot diameter', spot_diameter_labels, 'Spots', label_type='continuous')

# Add the deconvolution data to the domain as continuous labels
for col_name in deconv_csv.columns.values.tolist():
    if col_name != 'Barcode':
        vis_domain.add_labels(col_name, deconv[col_name], 'Spots', label_type='continuous', cmap='viridis')

# Print the domain to ensure everything is as expected
print(vis_domain)

# Create a figure and axis for the plot
fig, ax = plt.subplots(figsize=(10, 10), nrows=1, ncols=1)

# Visualize the Visium domain, coloring by spot cluster labels
ms.visualise.visualise(
    vis_domain,
    color_by=('label', 'Spot cluster'),
    show_boundary=False,
    marker_size=10,
    vmin=0,
    vmax=0.5,
    ax=ax
)

# Create a figure and axis for the plot
fig, ax = plt.subplots(figsize=(10, 10), nrows=1, ncols=1)

# Visualize the Visium domain, coloring by Topic 16
ms.visualise.visualise(
    vis_domain,
    color_by=('label', 'Topic 16'),
    show_boundary=False,
    marker_size=10,
    vmin=0,
    vmax=0.5,
    ax=ax
)

# Calculate the radius of each spot from the spot diameter
spot_radius = vis_domain.labels['Spot diameter']['labels'][0] / 2

# Generate a Delaunay-based network for the Visium domain
# The network is distance-weighted with edges limited by the spot radius
G = ms.networks.generate_network(
    vis_domain,
    network_name='Spot network',
    network_type='delaunay',
    distance_weighted=True,
    min_edge_distance=0,
    max_edge_distance=spot_radius
)

# Create a figure and axis for the plot with specified size
fig, ax = plt.subplots(figsize=(5, 6), nrows=1, ncols=1)

# Visualize the network of spots in the Visium domain
# Color the spots by their cluster labels, without adding a color bar
# Set marker size and edge width for better visualization
ms.visualise.visualise_network(
    vis_domain,
    network_name='Spot network',
    color_by=('label', 'Spot cluster'),
    add_cbar=False,
    ax=ax,
    marker_size=1,
    edge_width=0.5
)

接下来来看高精度

# Import necessary libraries
import muspan as ms
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Load example domain dataset
example_domain = ms.datasets.load_example_domain('Synthetic-Points-Aggregation')

# Visualise the example domain, coloring by 'Celltype'
ms.visualise.visualise(example_domain, color_by='Celltype')

# Generate the 'Centroid Delaunay' network for the example domain
ms.networks.generate_network(
    example_domain,
    network_name='Centroid Delaunay',
    network_type='Delaunay',
    max_edge_distance=70
)

# Visualise the generated network, coloring by 'Celltype'
ms.visualise.visualise_network(
    example_domain,
    network_name='Centroid Delaunay',
    visualise_kwargs=dict(color_by='Celltype', marker_size=10),
    figure_kwargs=dict(figsize=(10, 7))
)

# Perform community detection using the Louvain method with a resolution of 1
communities_res_1=ms.networks.community_detection(
    example_domain,
    network_name='Centroid Delaunay',
    edge_weight_name=None,
    community_method='louvain',
    community_method_parameters=dict(resolution=1),
    community_label_name='Communities : Res = 1'
)
# Visualise the network with communities detected at resolution 1
ms.visualise.visualise_network(
    example_domain,
    network_name='Centroid Delaunay',
    edge_weight_name='Distance',
    visualise_kwargs=dict(
        color_by='Communities : Res = 1',
        marker_size=15,
        scatter_kwargs=dict(linewidth=0.1, edgecolor='black')
    ),
    figure_kwargs=dict(figsize=(10, 7))
)

# Perform community detection using the Louvain method with a resolution of 0.3
communities_res_03=ms.networks.community_detection(
    example_domain,
    network_name='Centroid Delaunay',
    edge_weight_name=None,
    community_method='louvain',
    community_method_parameters=dict(resolution=0.3),
    community_label_name='Communities : Res = 0.3'
)

# Visualise the network with communities detected at resolution 0.3
ms.visualise.visualise_network(
    example_domain,
    network_name='Centroid Delaunay',
    edge_weight_name='Distance',
    visualise_kwargs=dict(
        color_by='Communities : Res = 0.3',
        marker_size=15,
        scatter_kwargs=dict(linewidth=0.1, edgecolor='black')
    ),
    figure_kwargs=dict(figsize=(10, 7))
)