为了有更好的实践,我想重构我编写的一个大型Python方法。
我写了一种从FSL神经成像库中解析设计文件的方法。设计文件是带有神经成像软件设置的文本文件。我最初编写的代码是通过文件在一个循环中完成所有处理。
我正在寻找各种各样的建议,但主要是设计技巧和最佳实践。我希望对整个项目的投入,但我知道这可能超出了这个网站的范围。
代码结构:
您还可以与较大的项目一起查看GitHub上的代码。
def parse_design_file(self,fsf_lines, type):
"""
Parses design file information and return information in parsed variables that can be used by the csv methods
"""
analysis_name=''
output_path=''
zvalue=''
pvalue=''
if type == self.FIRST_TYPE or self.PRE_TYPE:
in_file=''
if type == self.FIRST_TYPE:
ev_convolves=dict()
ev_paths=dict()
ev_deriv=dict()
ev_temp=dict()
if type == self.ME_TYPE or type == self.FE_TYPE:
feat_paths=dict()
count=''
if type == self.FIRST_TYPE or type == self.FE_TYPE:
ev_names=dict()
evg_lines=list()
cope_names=dict()
cope_def_lines=list()
if type == self.PRE_TYPE:
tr=''
total_volumes=''
brain_thresh=''
motion_correction=''
smoothing=''
deleted=''
if type == self.FE_TYPE:
first_example_dir=''
if type == self.ME_TYPE:
FE_example_dir=''
for line in fsf_lines:
#regex matching
#all
output_match=re.search("set fmri\(outputdir\)",line)
feat_file_match=re.search("feat_files\(\d+\)",line)
total_vols_match=re.search("fmri\(npts\)", line)
z_match=re.search("set fmri\(z_thresh\)",line)
p_match=re.search("set fmri\(prob_thresh\)",line)
if output_match:
output_path=self.get_fsf_value(line,output_match.end())
#TODO hardcoded stripping here, make flexible
if type == self.ME_TYPE:
run=re.search("ME",line)
elif type == self.FIRST_TYPE or type == self.PRE_TYPE:
run=re.search("r\d\/",line)
elif type == self.FE_TYPE:
run=re.search("FE\d*",line)
if run:
analysis_name=self.get_fsf_value(line,run.end())
if type == self.ME_TYPE:
analysis_name=self.strip_cope(analysis_name)
if total_vols_match:
value=self.get_fsf_value(line,total_vols_match.end())
if type == self.ME_TYPE or type == self.FE_TYPE:
count=value
if z_match:
value=self.get_fsf_value(line,z_match.end())
zvalue=value
if p_match:
value=self.get_fsf_value(line,p_match.end())
pvalue=value
if feat_file_match:
if type == self.FIRST_TYPE or type == self.PRE_TYPE:
value=self.get_fsf_value(line,feat_file_match.end())
in_file=self.strip_root(value)
preproc_match=re.search("preproc.*feat",value)
#TODO inconsistent methodology here
if preproc_match:
self.preproc=value[preproc_match.start():preproc_match.end()]
print self.preproc
elif type == self.ME_TYPE or type == self.FE_TYPE:
value=self.get_fsf_value(line,feat_file_match.end())
index=self.get_fsf_indice(feat_file_match.group())
stripped=self.strip_fanal(line)
feat_paths[index]=stripped
if (type == self.ME_TYPE and not FE_example_dir) or (type == self.FE_TYPE and not first_example_dir):
set_match=re.search("set feat_files\(\d+\) \"",line)
no_cope=line[set_match.end():len(line)]
no_cope=no_cope.strip('\n')
no_cope=no_cope.strip('\"')
no_cope=self.strip_cope(no_cope)
if type == self.ME_TYPE:
FE_example_dir=no_cope
else:
first_example_dir=no_cope
if type == self.PRE_TYPE:
tr_match=re.search("fmri\(tr\)", line)
mc_match=re.search("set fmri\(mc\)",line)
smooth_match=re.search("set fmri\(smooth\)",line)
total_vols_match=re.search("fmri\(npts\)", line)
removed_volumes=re.search("fmri\(ndelete\)", line)
thresh_match=re.search("set fmri\(brain_thresh\)",line)
if tr_match:
tr=self.get_fsf_value(line,tr_match.end())
if mc_match:
value=self.get_fsf_value(line,mc_match.end())
if value == "1":
value = "Y"
else:
value = "N"
motion_correction=value
if smooth_match:
smoothing=self.get_fsf_value(line,smooth_match.end())
if removed_volumes:
deleted=self.get_fsf_value(line,removed_volumes.end())
if total_vols_match:
total_volumes=self.get_fsf_value(line,total_vols_match.end())
if thresh_match:
brain_thresh=self.get_fsf_value(line,thresh_match.end())
if type == self.FIRST_TYPE:
ev_conv_match=re.search("fmri\(convolve\d+\)", line)
ev_path_match=re.search("fmri\(custom\d+\)", line)
ev_deriv_match=re.search("fmri\(deriv_yn\d+\)", line)
ev_temps_match=re.search("fmri\(tempfilt_yn\d+\)", line)
if ev_conv_match:
conv=self.get_fsf_value(line,ev_conv_match.end())
index=self.get_fsf_indice(ev_conv_match.group())
conv_text={
"0" : "none",
"1" : "Gaussian",
"2" : "Gamma",
"3" : "Double-Gamma HRF",
"4" : "Gamma basis functions",
"5" : "Sine basis functions",
"6" : "FIR basis functions",
}
ev_convolves[index]=conv_text[conv]
if ev_deriv_match:
value=self.get_fsf_value(line,ev_deriv_match.end())
index=self.get_fsf_indice(ev_deriv_match.group())
if value == "1":
value = "Y"
else:
value = "N"
ev_deriv[index]=value
if ev_temps_match:
value=self.get_fsf_value(line,ev_temps_match.end())
index=self.get_fsf_indice(ev_temps_match.group())
if value == "1":
value = "Y"
else:
value = "N"
ev_temp[index]=value
if ev_path_match:
value=self.get_fsf_value(line,ev_path_match.end())
index=self.get_fsf_indice(ev_path_match.group())
ev_paths[index]=self.strip_root(value)
if type == self.FE_TYPE:
evg_match=re.search("fmri\(evg\d+\.\d+\)", line)
if evg_match:
evg_lines.append(line)
if type == self.FE_TYPE or type == self.FIRST_TYPE:
ev_name_match=re.search("fmri\(evtitle\d+\)", line)
cope_name_match=re.search("fmri\(conname_real\.\d+\)", line)
cope_def_match=re.search("fmri\(con_real\d+\.\d+\)", line)
if cope_name_match:
name=self.get_fsf_value(line,cope_name_match.end())
index=self.get_fsf_indice(cope_name_match.group())
cope_names[index]=name
if cope_def_match:
cope_def_lines.append(line)
if ev_name_match:
name=self.get_fsf_value(line,ev_name_match.end())
index=self.get_fsf_indice(ev_name_match.group())
ev_names[index]=name
if type == self.FIRST_TYPE or type == self.FE_TYPE:
design_matrix=[['0' for col in range(len(ev_names)+2)] for row in range(len(cope_names)+1)]
if 'ev_temp' in locals():
real_copes=list()
index_cope=1
real_copes.append(str(index_cope))
for i in range(1,len(ev_temp)+1):
ind=str(i)
if ev_temp[ind] == 'Y':
index_cope += 2
else:
index_cope += 1
real_copes.append(str(index_cope))
real_copes.pop()
design_matrix=self.fill_matrix(cope_def_lines,design_matrix,type,1,real_copes)
else:
design_matrix=self.fill_matrix(cope_def_lines,design_matrix,type,1)
for i in range(1,len(cope_names)+1):
ind=str(i)
design_matrix[i][0]=ind
for i in range(1,len(cope_names)+1):
ind=str(i)
design_matrix[i][1]=cope_names[ind]
for i in range(2,len(ev_names)+2):
ind=str(i-1)
design_matrix[0][i]=ev_names[ind]
design_matrix[0][0]='Cope #'
design_matrix[0][1]='Cope Name'
if type == self.PRE_TYPE:
return analysis_name,output_path,tr,total_volumes,deleted,in_file,motion_correction,brain_thresh,smoothing
elif type == self.FIRST_TYPE:
return analysis_name,output_path,in_file,design_matrix,ev_names,ev_paths,ev_convolves,ev_deriv,ev_temp,cope_names
elif type == self.ME_TYPE:
return analysis_name,output_path,pvalue,zvalue,feat_paths,count, FE_example_dir
elif type == self.FE_TYPE:
regressor_matrix=[['0' for col in range(int(count)+1)] for row in range(len(ev_names)+1)]
self.fill_matrix(evg_lines,regressor_matrix,5, 0)
for i in range(1,len(ev_names)+1):
ind=str(i)
regressor_matrix[i][0]=ev_names[ind]
for i in range(1,int(count)+1):
ind=str(i)
regressor_matrix[0][i]=ind
return analysis_name,output_path,feat_paths,count,design_matrix,regressor_matrix,ev_names,cope_names,first_example_dir
发布于 2012-05-23 03:38:55
根据type
参数的值,您的函数的工作方式大不相同。事实上,根据这个参数,它的作用基本上就像不同的函数。它根据type
的值返回完全不同的值集。这个函数最好分成几个函数,每个类型一个。那么每个函数都会更简单,更容易理解。
我假设您这样做是为了尝试在不同的文件类型之间共享代码。这样做是错误的。您应该通过调用公共函数或使用公共基类来共享代码。您不应该通过函数表现出非常不同的行为来共享代码。
对于解析,我建议您实际解析文件,而不仅仅是在文件上运行一些正则表达式。从我收集到的文件来看,表单的行基本上是一致的:
set something somevalue
我会编写一个将文件转换为字典的parse()
函数。所以这个:
# Threshold IC maps
set fmri(thresh_yn) 1
set fmri(mmthresh) 0.5
set fmri(ostats) 0
会变成
{'fmri(thesh_yn)' : 1, 'fmri(mmthres)' : 0.5, 'fmri(ostats)' : 0}
然后,特定类型的函数就可以在字典中查找条目。
最后,您返回一个长元组。为什么?长元组很难使用。也许您真的应该将它们存储在具有多个属性的对象上?
https://codereview.stackexchange.com/questions/11961
复制相似问题