伙计们,我正在尝试创建一个爬虫,但是我一直收到错误: TypeError: file_thread()接受1个位置参数,但给出了39个,这太奇怪了,因为它接受一个列表,并给出一个列表
import sys
sys.path.insert(0, "/media/user/Data/Programming/Projects")
import re , threading , bs4 , async_lib , urllib.request
file_lock=threading.Lock()
spider_lock=threading.Lock()
def file_thread(data):
file_lock.accquire()
print('IO Operation')
async_lib.WriteAsync('spider.txt',data,).start()
file_lock.release()
def Get_Links(webpage):
byte_html=urllib.request.urlopen(webpage).read()
bad_html= byte_html.decode()
html = str(bs4.BeautifulSoup(bad_html))
all_links = re.findall('href="(http.*?)"',html)
return all_links
def spider_logic(raw_links):
for link in raw_links:
try:
links = Get_Links(link)
t1 = threading.Thread(target=file_thread,args=link).start()
t1 = threading.Thread(target=file_thread,args=links)
spider_lock.accquire()
print(links)
t2 = threading.Thread(target=spider_logic, args=links).start()
spider_lock.release()
except:
pass
def main():
startup='http://www.myegy.to'
links = Get_Links(startup)
spider_logic(links)
if __name__=='__main__':
main()发布于 2018-09-11 04:01:57
在Thread()中传递给args的内容应该是可迭代的。当您传入一个字符串时,它会遍历每个字符,并将其视为一个单独的arg。相反,您应该将其作为元组传递。
t1 = threading.Thread(target=file_thread,args=(links,))https://stackoverflow.com/questions/35851191
复制相似问题