# Import necessary libraries
import muspan as ms
import numpy as np
# Imports to make our plots look nice
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
# Set plot parameters
mpl.rcParams['figure.dpi'] = 270
sns.set_theme(style='white', font_scale=2)
# Additional imports to extract the data from the relevant files
import json
import tarfile
import pandas as pd
# Define the path to the data directory
path_to_the_data = 'path/where/you/saved/the/data/Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)'
# Define the number of topics to import from the deconvolution data
number_of_topics = 16
# Define the paths to the relevant data files within the dataset directory
spatial_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_spatial.tar.gz'
cluster_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_analysis.tar.gz'
deconv_data_file = path_to_the_data + '/' + 'CytAssist_FFPE_Human_Colon_Rep1_deconvolution.tar.gz'
# Open the spatial data tar file
spatial_tar = tarfile.open(spatial_data_file, "r")
spatial_tar_folder_name = spatial_tar.getnames()[0]
# Extract the scalefactors_json.json file
spatial_meta_name = spatial_tar_folder_name + '/scalefactors_json.json'
spatial_meta = spatial_tar.extractfile(spatial_meta_name)
spatial_meta_dict = spatial_meta.read()
spot_centre_scalefactors = json.loads(spatial_meta_dict)
# Get the scale factor and spot diameter from the scalefactors_json.json file
px_to_dist_scale_factor = spot_centre_scalefactors['regist_target_img_scalef']
spot_diameter = spot_centre_scalefactors['spot_diameter_fullres']
# Get the locations and metadata of the spots
spatial_positions_name = spatial_tar_folder_name + '/tissue_positions.csv'
spatial_positions_file = spatial_tar.extractfile(spatial_positions_name)
spatial_positions = pd.read_csv(spatial_positions_file)
# Open the cluster data tar file
cluster_tar = tarfile.open(cluster_data_file, "r")
cluster_tar_folder_name = cluster_tar.getnames()[0]
# Extract the clustering data
clustering_name = cluster_tar_folder_name + '/clustering/gene_expression_graphclust/clusters.csv'
clustering_file = cluster_tar.extractfile(clustering_name)
clustering_csv = pd.read_csv(clustering_file)
clustering_csv['Cluster'] = pd.to_numeric(clustering_csv['Cluster'])
# Open the deconvolution data tar file
deconv_tar = tarfile.open(deconv_data_file, "r")
deconv_folders = deconv_tar.getnames()
deconv_folders_name = deconv_folders[0]
# Extract the deconvolution data for the specified number of topics
this_deconv_name = deconv_folders_name + '/deconvolution_k' + str(number_of_topics) + '/deconvolved_spots_k' + str(number_of_topics) + '.csv'
if this_deconv_name in deconv_folders:
deconv_file = deconv_tar.extractfile(this_deconv_name)
deconv_csv = pd.read_csv(deconv_file)
# Convert the deconvolution data to numeric values
for col_name in deconv_csv.columns.values.tolist():
if col_name != 'Barcode':
deconv_csv[col_name] = pd.to_numeric(deconv_csv[col_name])
else:
print(f'Error: Number of topics is not stored in Visium experiment data. No deconvolution data has been added to the domain.')
# Create a mask for spots that are in the tissue
spot_mask = np.where(np.array(spatial_positions.in_tissue) > 0)[0]
# Filter spots that are in the tissue and convert their positions to the correct scale
spot_centres = px_to_dist_scale_factor * np.array([spatial_positions.pxl_col_in_fullres, -spatial_positions.pxl_row_in_fullres]).T
spot_centres = spot_centres[spot_mask, :]
# Filter barcodes that are in the tissue
barcode_id = spatial_positions.barcode[spot_mask]
# Initialize lists and dictionary to store cluster IDs and deconvolution data
cluster_id = []
deconv = {}
# Setting up the deconvolution dictionary with empty lists for each topic
for col_name in deconv_csv.columns.values.tolist():
if col_name != 'Barcode':
deconv[col_name] = []
# For each barcode ID, find the associated cluster and deconvolution data
for b_id in barcode_id:
# Find the index of the current barcode in the clustering data
cluster_index = np.where(clustering_csv['Barcode'] == b_id)[0][0]
# Append the corresponding cluster ID to the cluster_id list
cluster_id.append(clustering_csv['Cluster'][cluster_index])
# Find the index of the current barcode in the deconvolution data
deconv_index = np.where(deconv_csv['Barcode'] == b_id)[0][0]
# Append the corresponding deconvolution data to the deconv dictionary
for col_name in deconv_csv.columns.values.tolist():
if col_name != 'Barcode':
deconv[col_name].append(deconv_csv[col_name][deconv_index])
# Create a list of spot diameters to use as labels
spot_diameter_labels = [spot_diameter] * len(spot_centres)
# Convert barcode_id to a list
barcode_id = barcode_id.tolist()
# Create a MuSpAn domain for the Visium dataset
vis_domain = ms.domain('Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)')
# Add the spot positions to the domain as point objects
vis_domain.add_points(spot_centres, 'Spots')
# Add the barcode labels to the domain
vis_domain.add_labels('Barcode', barcode_id, 'Spots')
# Add the spot cluster labels to the domain
vis_domain.add_labels('Spot cluster', cluster_id, 'Spots')
# Add the spot diameter labels to the domain as continuous labels
vis_domain.add_labels('Spot diameter', spot_diameter_labels, 'Spots', label_type='continuous')
# Add the deconvolution data to the domain as continuous labels
for col_name in deconv_csv.columns.values.tolist():
if col_name != 'Barcode':
vis_domain.add_labels(col_name, deconv[col_name], 'Spots', label_type='continuous', cmap='viridis')
# Print the domain to ensure everything is as expected
print(vis_domain)
# Create a figure and axis for the plot
fig, ax = plt.subplots(figsize=(10, 10), nrows=1, ncols=1)
# Visualize the Visium domain, coloring by spot cluster labels
ms.visualise.visualise(
vis_domain,
color_by=('label', 'Spot cluster'),
show_boundary=False,
marker_size=10,
vmin=0,
vmax=0.5,
ax=ax
)
# Create a figure and axis for the plot
fig, ax = plt.subplots(figsize=(10, 10), nrows=1, ncols=1)
# Visualize the Visium domain, coloring by Topic 16
ms.visualise.visualise(
vis_domain,
color_by=('label', 'Topic 16'),
show_boundary=False,
marker_size=10,
vmin=0,
vmax=0.5,
ax=ax
)
# Calculate the radius of each spot from the spot diameter
spot_radius = vis_domain.labels['Spot diameter']['labels'][0] / 2
# Generate a Delaunay-based network for the Visium domain
# The network is distance-weighted with edges limited by the spot radius
G = ms.networks.generate_network(
vis_domain,
network_name='Spot network',
network_type='delaunay',
distance_weighted=True,
min_edge_distance=0,
max_edge_distance=spot_radius
)
# Create a figure and axis for the plot with specified size
fig, ax = plt.subplots(figsize=(5, 6), nrows=1, ncols=1)
# Visualize the network of spots in the Visium domain
# Color the spots by their cluster labels, without adding a color bar
# Set marker size and edge width for better visualization
ms.visualise.visualise_network(
vis_domain,
network_name='Spot network',
color_by=('label', 'Spot cluster'),
add_cbar=False,
ax=ax,
marker_size=1,
edge_width=0.5
)
# Import necessary libraries
import muspan as ms
import numpy as np
# Set random seed for reproducibility
np.random.seed(42)
# Load example domain dataset
example_domain = ms.datasets.load_example_domain('Synthetic-Points-Aggregation')
# Visualise the example domain, coloring by 'Celltype'
ms.visualise.visualise(example_domain, color_by='Celltype')
# Generate the 'Centroid Delaunay' network for the example domain
ms.networks.generate_network(
example_domain,
network_name='Centroid Delaunay',
network_type='Delaunay',
max_edge_distance=70
)
# Visualise the generated network, coloring by 'Celltype'
ms.visualise.visualise_network(
example_domain,
network_name='Centroid Delaunay',
visualise_kwargs=dict(color_by='Celltype', marker_size=10),
figure_kwargs=dict(figsize=(10, 7))
)
# Perform community detection using the Louvain method with a resolution of 1
communities_res_1=ms.networks.community_detection(
example_domain,
network_name='Centroid Delaunay',
edge_weight_name=None,
community_method='louvain',
community_method_parameters=dict(resolution=1),
community_label_name='Communities : Res = 1'
)
# Visualise the network with communities detected at resolution 1
ms.visualise.visualise_network(
example_domain,
network_name='Centroid Delaunay',
edge_weight_name='Distance',
visualise_kwargs=dict(
color_by='Communities : Res = 1',
marker_size=15,
scatter_kwargs=dict(linewidth=0.1, edgecolor='black')
),
figure_kwargs=dict(figsize=(10, 7))
)
# Perform community detection using the Louvain method with a resolution of 0.3
communities_res_03=ms.networks.community_detection(
example_domain,
network_name='Centroid Delaunay',
edge_weight_name=None,
community_method='louvain',
community_method_parameters=dict(resolution=0.3),
community_label_name='Communities : Res = 0.3'
)
# Visualise the network with communities detected at resolution 0.3
ms.visualise.visualise_network(
example_domain,
network_name='Centroid Delaunay',
edge_weight_name='Distance',
visualise_kwargs=dict(
color_by='Communities : Res = 0.3',
marker_size=15,
scatter_kwargs=dict(linewidth=0.1, edgecolor='black')
),
figure_kwargs=dict(figsize=(10, 7))
)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。