import nltk
from nltk.corpus import cess_esp #corpus of spanish texts
new_tagged = [w for w in cess_esp.tagged_words() if w[1][0] == 'v'] #extract all verbs from the corpus
lowers = sorted([(w[0].lower(), w[1]) for w in new_tagged]) #lowercase the words, sort them alphabetically
uniends = ['ar','er','as','ad','ed',
'id','ase','an','en','es','\xc3\xa9is','emos', 'o'] #endings of conjugated verbs (it is actually much longer, but the endings are irrelevant to my question here)
uniends.sort(key=len, reverse=True) #rearrange in decreasing length, so the correct endings get cut off
def lem(list): #lemmatize a list of conjugated spanish verbs
lems = [] #create an empty list for lemmas
for t in list: #loop through the list
if t[0] == 'ir': #first, account for some irregulars that would disappear
stem = 'ir'
if t[0] == 's\xc3\xa9':
stem = 'sab'
lems.append(stem) #add stems to lems
for end in uniends: #loop through the list of endings
if t[0].endswith(end): #if the verb has an ending (which they all do)
stem = t[0][0:-len(end)] #cut off the ending
if stem == 'pued' or stem == 'pud': #account for common irregulars.
stem = 'pod'
if stem == 'estuv':
stem = 'est'
if stem == 'cuent':
stem = 'cont'
if stem == 'tien' or stem == 'tuv':
stem = 'ten'
if stem == 'hiz' or stem == 'hag':
stem = 'hac'
if stem == 'dij':
stem = 'dec'
if stem == 'vist':
stem = 'v'
if stem == 'jueg':
stem = 'jug'
if stem == 'sup':
stem = 'sab'
if stem == 'veng':
stem = 'ven'
if stem =='hub' or stem == 'h':
stem = 'hab'
lems.append(stem) #add the stem to the list of lemmas
return lems函数返回一个列表,即lems,但是如果在函数运行后尝试对lems执行任何操作,则会得到一个错误:没有定义名称'lems‘。我想我在第二行中把它定义为函数lem(list)的一部分,然后通过在它后面附加词干来填充它。在我在列表上运行函数之后,如何才能使这个列表成为我可以使用的列表呢?
发布于 2013-11-29 22:42:40
lems只在函数lem中定义,当您返回时,没有将它分配给任何一个,所以一种方法是将lems作为gloabl项,另一种方法是将len结果分配给某人。
类似于:
lems = []
def lem(list): #lemmatize a list of conjugated spanish verbs
global lems
...或
lems2 = lem(...)我不想做
lems = lem(...)因为我不知道你代码的其他部分。
https://stackoverflow.com/questions/20293766
复制相似问题