用法:
pythonsplit_multimol2.pymulti-mol2.mol2out_dir
注释:python 脚本文件 mol2分子库 输出目录
split_multimol2.py:
#Python2 or Python3
#AspirinCode 2018
#Script that splits a multi-mol2 file into individual mol2 files.
#python split_multimol2.py multi-mol2.mol2 out_dir
importsys
importos
defsplit_multimol2(multimol2):
"""
Splits a multi-mol2 file.
Parameters
----------
multimol2 : str
Path to the multi-mol2 file.
Returns
----------
A generator object for lists for every extracted mol2-file. Lists contain
the molecule ID and the mol2 file contents.
e.g., ['ID1234', '@MOLECULE...'
"""
withopen(multimol2,'r')asmol2file:
line = mol2file.readline()
whilenotmol2file.tell() == os.fstat(mol2file.fileno()).st_size:
ifline.startswith("@MOLECULE"):
mol2cont = []
mol2cont.append(line)
line = mol2file.readline()
molecule_id = line.strip()
whilenotline.startswith("@MOLECULE"):
mol2cont.append(line)
line = mol2file.readline()
ifmol2file.tell() == os.fstat(mol2file.fileno()).st_size:
mol2cont.append(line)
break
mol2cont[-1] = mol2cont[-1].rstrip()# removes blank line at file end
yield[molecule_id,"".join(mol2cont)]
defwrite_multimol2(multimol2, out_dir):
"""
Splits a multi-mol2 file into smaller multi-mol2 files.
Parameters
-----------
multimol2 : str
Path to the multi-mol2 file.
out_dir : str:
Output directory. New files will be named
.mol2, ... .mol2
Returns
-----------
chunks : int
Number of files written.
"""
ifnotout_dir:
os.mkdir(out_dir)
single_mol2s = split_multimol2(args.MOL2_FILE)
formol2insingle_mol2s:
out_mol2 = os.path.join(args.OUT_DIR, mol2[]) +'.mol2'
withopen(out_mol2,'w')asout_file:
forlineinmol2[1]:
out_file.write(line)
out_file.write('\n')
defwrite_multimol2_chunks(multimol2, chunk_size, out_dir):
"""
Splits a multi-mol2 file into smaller multi-mol2 files.
Parameters
-----------
multimol2 : str
Path to the multi-mol2 file.
chunksize : int
Number of mol2 files per chunk.
out_dir : str:
Output directory. New files will be named
_1.mol2, ... _n.mol2
Returns
-----------
chunks : int
Number of files written.
"""
ifnotos.path.exists(out_dir):
os.mkdir(out_dir)
out_path_stem = os.path.dirname(multimol2)
out_file_stem = os.path.basename(multimol2).split('.mol2')[]
cnt =
chunks =1
out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2'% chunks,'w')
formol2insplit_multimol2(multimol2):
cnt +=1
ifcnt == chunk_size:
cnt =
chunks +=1
out_file.close()
out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2'% chunks,'w')
out_file.write(mol2[1] +'\n')
out_file.close()
returnchunks
if__name__ =='__main__':
importargparse
parser = argparse.ArgumentParser(
description='Splits a multi-mol2 file into individual mol2 files',
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument('MOL2_FILE')
parser.add_argument('OUT_DIR')
parser.add_argument('-c','--chunksize', help='Number of MOL2 structures per file (1 by default)', type=int)
parser.add_argument('-v','--version', action='version', version='split_multimol2 v. 1.1')
args = parser.parse_args()
ifargs.chunksize:
write_multimol2_chunks(multimol2=args.MOL2_FILE, chunk_size=args.chunksize, out_dir=args.OUT_DIR)
else:
write_multimol2(multimol2=args.MOL2_FILE, out_dir=args.OUT_DIR)
领取专属 10元无门槛券
私享最新 技术干货