首页
学习
活动
专区
工具
TVP
发布
精选内容/技术社群/优惠产品,尽在小程序
立即前往

Python脚本:将mol2分子库文件拆分为单个mol2文件

用法:

pythonsplit_multimol2.pymulti-mol2.mol2out_dir

注释:python 脚本文件 mol2分子库 输出目录

split_multimol2.py:

#Python2 or Python3

#AspirinCode 2018

#Script that splits a multi-mol2 file into individual mol2 files.

#python split_multimol2.py multi-mol2.mol2 out_dir

importsys

importos

defsplit_multimol2(multimol2):

"""

Splits a multi-mol2 file.

Parameters

----------

multimol2 : str

Path to the multi-mol2 file.

Returns

----------

A generator object for lists for every extracted mol2-file. Lists contain

the molecule ID and the mol2 file contents.

e.g., ['ID1234', '@MOLECULE...'

"""

withopen(multimol2,'r')asmol2file:

line = mol2file.readline()

whilenotmol2file.tell() == os.fstat(mol2file.fileno()).st_size:

ifline.startswith("@MOLECULE"):

mol2cont = []

mol2cont.append(line)

line = mol2file.readline()

molecule_id = line.strip()

whilenotline.startswith("@MOLECULE"):

mol2cont.append(line)

line = mol2file.readline()

ifmol2file.tell() == os.fstat(mol2file.fileno()).st_size:

mol2cont.append(line)

break

mol2cont[-1] = mol2cont[-1].rstrip()# removes blank line at file end

yield[molecule_id,"".join(mol2cont)]

defwrite_multimol2(multimol2, out_dir):

"""

Splits a multi-mol2 file into smaller multi-mol2 files.

Parameters

-----------

multimol2 : str

Path to the multi-mol2 file.

out_dir : str:

Output directory. New files will be named

.mol2, ... .mol2

Returns

-----------

chunks : int

Number of files written.

"""

ifnotout_dir:

os.mkdir(out_dir)

single_mol2s = split_multimol2(args.MOL2_FILE)

formol2insingle_mol2s:

out_mol2 = os.path.join(args.OUT_DIR, mol2[]) +'.mol2'

withopen(out_mol2,'w')asout_file:

forlineinmol2[1]:

out_file.write(line)

out_file.write('\n')

defwrite_multimol2_chunks(multimol2, chunk_size, out_dir):

"""

Splits a multi-mol2 file into smaller multi-mol2 files.

Parameters

-----------

multimol2 : str

Path to the multi-mol2 file.

chunksize : int

Number of mol2 files per chunk.

out_dir : str:

Output directory. New files will be named

_1.mol2, ... _n.mol2

Returns

-----------

chunks : int

Number of files written.

"""

ifnotos.path.exists(out_dir):

os.mkdir(out_dir)

out_path_stem = os.path.dirname(multimol2)

out_file_stem = os.path.basename(multimol2).split('.mol2')[]

cnt =

chunks =1

out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2'% chunks,'w')

formol2insplit_multimol2(multimol2):

cnt +=1

ifcnt == chunk_size:

cnt =

chunks +=1

out_file.close()

out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2'% chunks,'w')

out_file.write(mol2[1] +'\n')

out_file.close()

returnchunks

if__name__ =='__main__':

importargparse

parser = argparse.ArgumentParser(

description='Splits a multi-mol2 file into individual mol2 files',

formatter_class=argparse.RawTextHelpFormatter

)

parser.add_argument('MOL2_FILE')

parser.add_argument('OUT_DIR')

parser.add_argument('-c','--chunksize', help='Number of MOL2 structures per file (1 by default)', type=int)

parser.add_argument('-v','--version', action='version', version='split_multimol2 v. 1.1')

args = parser.parse_args()

ifargs.chunksize:

write_multimol2_chunks(multimol2=args.MOL2_FILE, chunk_size=args.chunksize, out_dir=args.OUT_DIR)

else:

write_multimol2(multimol2=args.MOL2_FILE, out_dir=args.OUT_DIR)

  • 发表于:
  • 原文链接https://kuaibao.qq.com/s/20180716G01B5900?refer=cp_1026
  • 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号(企鹅号)传播渠道之一,根据《腾讯内容开放平台服务协议》转载发布内容。
  • 如有侵权,请联系 cloudcommunity@tencent.com 删除。

扫码

添加站长 进交流群

领取专属 10元无门槛券

私享最新 技术干货

扫码加入开发者社群
领券