python环境:python3.X
依赖的包:requests
下载文件到某个文件夹,然后运行,根据提示执行程序
下载,显示验证码:
class ZhiHu():
_session=None
favor_data=100
def __init__(self):
self.do_first()
def get_captcha(self):
return _captcha_url+str(int(time.time()*1000))+_captcha_url_end
def show_or_save_captcha(self,url):
global _session
r=_session.get(url,headers=header_data,verify=True)
with open("code.gif",'wb') as f:
f.write(r.content)
#显示验证码
try:
print("haha")
im = Image.open("code.gif")
im.show()
except:
print("请打开下载的验证码文件code.gif")
登录:
def input_data(self):
global email
global password
global question_url
self.username=raw_input('请输入用户名:')
self.password=raw_input('请输入密码:')
self.show_or_save_captcha(self.get_captcha())
self.captcha=raw_input('请输入验证码:')
下载某个问题下的高赞答案,存储在一个txt文件中:
def get_answer_text(self,url,answers=15):
global _session
global favor_data
r=_session.get(url,headers=header_data,verify=True)
pat=re.compile('"count">[s]*?(.*?)')
_list=re.findall(pat,r.text)
#print(_list);
#favor_list=[int(k) for k in _list]
favor_list=[]
#下面主要是将以“K”为单位的赞同数转化为数字
for i in _list:
if 'K' in i:
#print('k in'+i)
i = i.replace('K','000')
favor_list.append(int(i))
else:
#print(i)
favor_list.append(int(i))
favor_list.sort(reverse=True)
if len(favor_list)>=answers:
favor_data=favor_list[answers-1]
else:
favor_data=0
self.save_text(r)
下载某个问题下的答案中的所有图片,并且按照回答人的昵称归类
def get_answer_img(self,url):
global _session
r=_session.get(url,headers=header_data,verify=True).text
item_pattern=re.compile('
')
img_pattern=re.compile('
pattern_title=re.compile('([sS]*?)')
#author_pattern=re.compile('
author_pattern=re.compile('
([Ss]*?)
')
items=re.findall(item_pattern,r)
title=re.findall(pattern_title,r)
authors=[]
img_list=[]
i=0
try :
for item in items:
i+=1
authors.append(re.findall(author_pattern,item))
img_list.append(re.findall(img_pattern,item))
except :
print('查找出了一点问题')
traceback.print_exc()
try:
#print(authors)
j=0
for author in authors:
img_urls=img_list[j]
#print(len(img_urls))
if len(img_urls) == 0:
continue
title_text=title[0];
author_text=''
if len(author)>0:
author_text=author[0]
path=self.createPathIfNotExist(title_text+'\'+author[0])
j+=1
k=0
for url in img_urls:
if 'https' not in url:
#print('坏图:'+url)
continue
print(url)
temp=url.split('.')
suffix='jpg'
if len(temp)>0:
suffix=temp[len(temp)-1]
#print('suffix= '+suffix)
k+=1
with open(path+author_text+str(k)+'.'+suffix,'bw')as f:
print('下载第'+str(j)+'个人'+'第'+str(k)+'照片')
f.write(_session.get(url,verify=True).content)
except:
print('下载图片出了一点问题')
traceback.print_exc()
源码链接:https://pan.baidu.com/s/1o9VVnqa
领取专属 10元无门槛券
私享最新 技术干货