ScaffoldGraph是一个开放源代码化学信息库,使用RDKit和NetworkX构建,用于生成和分析骨架网络和支架树。
1
特点
2
与现有软件的比较
3
安装
ScaffoldGraph目前仅支持Python 3
conda config --add channels conda-forge conda install -c uclcheminformatics scaffoldgraph
pip install scaffoldgraph
4
ScaffoldGraph例:骨架网络与骨架树
导入库
import scaffoldgraph as sg
import networkx as nx
import matplotlib.pyplot as plt
from rdkit.Chem import Draw
from rdkit import Chem
import random
import os
载入数据,绘制分子
sdf_file = os.path.dirname(sg.__file__).replace('scaffoldgraph', 'examples/example.sdf') # Example SDF file (200 PubChem compounds)
supplier = Chem.SDMolSupplier(sdf_file)
peek = 6
Draw.MolsToGridImage([supplier[x] for x in range(peek)])
骨架网络
生成骨架网络
network = sg.ScaffoldNetwork.from_sdf(sdf_file, progress=True)
# We can access the number of molecule nodes and scaffold nodes in the graph
n_scaffolds = network.num_scaffold_nodes
n_molecules = network.num_molecule_nodes
print('\nGenerated scaffold network from {} molecules with {} scaffolds\n'.format(n_molecules, n_scaffolds))
绘制骨架网络
scaffolds = list(network.get_scaffold_nodes())
print(scaffolds[0:5])
# Visualize a few of the scaffolds
sample = 6
Draw.MolsToGridImage([Chem.MolFromSmiles(x) for x in scaffolds[:sample]])
骨架分布
counts = network.get_hierarchy_sizes() # returns a collections Counter object
lists = sorted(counts.items())
x, y = zip(*lists)
# Plot sizes as bar chart
plt.figure(figsize=(8, 6))
plt.bar(x, y)
plt.xlabel('Hierarchy')
plt.ylabel('Scaffold Count')
plt.title('Number of Scaffolds per Hierarchy (Network)')
plt.show()
骨架匹配与高亮
query_smiles = 'c1ccncc1' # lets use this subscaffold as a query
query_mol = Chem.MolFromSmiles(query_smiles)
next_scaffolds = []
for succ in network.successors(query_smiles):
if network.nodes[succ]['type'] == 'scaffold':
next_scaffolds.append(succ)
print('Found {} scaffolds in hierarchy 2 containing {}:'.format(len(next_scaffolds), query_smiles))
mols = [Chem.MolFromSmiles(x) for x in next_scaffolds[:6]]
Draw.MolsToGridImage(mols, highlightAtomLists=[mol.GetSubstructMatch(query_mol) for mol in mols])
分子匹配与高亮
molecules = []
for succ in nx.bfs_tree(network, query_smiles, reverse=False):
if network.nodes[succ]['type'] == 'molecule':
molecules.append(succ)
print('Found {} molecules containing scaffold, {}\n'.format(len(molecules), query_smiles))
# Molecules are PubChem IDs so lets get the SMILES and view som of the molecules
smiles = [network.nodes[pid]['smiles'] for pid in molecules]
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
Draw.MolsToGridImage(mols, highlightAtomLists=[mol.GetSubstructMatch(query_mol) for mol in mols],
legends=molecules, maxMols=9)
骨架树
tree = sg.ScaffoldTree.from_sdf(sdf_file, progress=True)
# access the number of molecule nodes and scaffold nodes in the graph
n_scaffolds = tree.num_scaffold_nodes
n_molecules = tree.num_molecule_nodes
print('\nGenerated scaffold tree from {} molecules with {} scaffolds\n'.format(n_molecules, n_scaffolds))
# The output is a forest structure (multiple trees)
print('Graph is a Forest:', nx.is_forest(tree))
绘制分子骨架树
random_pubchem_id = random.choice(list(tree.get_molecule_nodes()))
print('PubChem ID:', random_pubchem_id)
predecessors = nx.bfs_tree(tree, random_pubchem_id, reverse=True)
# We can validate that one molecules scaffold set forms a tree structure
print('Predecessors of {} is Tree: {}'.format(random_pubchem_id, nx.is_tree(predecessors)))
# Draw these scaffolds
predecessors_list = list(predecessors)
predecessors_list[0] = tree.nodes[predecessors_list[0]]['smiles'] # [0] is pubchem ID
Draw.MolsToGridImage([Chem.MolFromSmiles(x) for x in predecessors_list])