一个设计佬要的,外网苹果表主题图片,说是要参(抄)考(袭)用,搜集作为灵感图库而使用的素材,如果一张张点保存得什么时候,直接Python大法好,Python苹果表主题图片爬取过程。
目标网址:https://buddywatch.app/page/2/
经过测试观察,总页码数为57!
由于访问速度受限,存在两种图片xpath获取!
第一种:
imgurls = tree.xpath('//a[@class="thumb epcl-loader"]/span/@data-src')
第二种:
spans = tree.xpath('//a[@class="thumb epcl-loader"]/span/@style')
def get_response(self, url):
i = 0
while i < 4:
try:
response = requests.get(url, headers=self.headers, timeout=8)
return response
except requests.exceptions.RequestException:
i += 1
print(f">> 获取网页出错,8S后将重试获取第:{i} 次")
time.sleep(6)
附上完整源码参考:
#buddywatch 表盘
#微信:huguo00289
# -*- coding: utf-8 -*-
from fake_useragent import UserAgent
from lxml import etree
import requests,time,re
class Bu(object):
def __init__(self):
self.url = "https://buddywatch.app/page/"
self.ua = UserAgent()
self.headers = {"User-Agent": self.ua.random}
def get_list(self,page):
url = f'{self.url}{page}/'
response=self.get_response(url)
html=response.content.decode('utf-8')
tree=etree.HTML(html)
imgurls = tree.xpath('//a[@class="thumb epcl-loader"]/span/@data-src')
spans = tree.xpath('//a[@class="thumb epcl-loader"]/span/@style')
print(spans)
for span in spans:
imgurl=self.get_imgurl(span)
#imgurls.append(imgurl)
imgurls.insert(0,imgurl)
print(len(imgurls))
print(imgurls)
for imgurl in imgurls:
self.down(imgurl)
def get_imgurl(self,span):
imgurl = re.findall(r'background-image: url(.+?);', span, re.S)[0]
imgurl=imgurl[1:-1]
print(imgurl)
return imgurl
def down(self,imgurl):
imgname=imgurl.split('/')[-1]
r=self.get_response(imgurl)
with open(f'buddywatch/{imgname}', 'wb') as f:
f.write(r.content)
print(f'{imgname} 图片下载成功了!')
# 4次重试
def get_response(self, url):
i = 0
while i < 4:
try:
response = requests.get(url, headers=self.headers, timeout=8)
return response
except requests.exceptions.RequestException:
i += 1
print(f">> 获取网页出错,8S后将重试获取第:{i} 次")
time.sleep(6)
def main(pagenum):
spider = Bu()
for page in range(pagenum+1):
spider.get_list(page)
if __name__=='__main__':
pagenum = 57
main(pagenum)
·················END·················
你好,我是二大爷,
革命老区外出进城务工人员,
互联网非早期非专业站长,
喜好python,写作,阅读,英语
不入流程序,自媒体,seo . . .
本文分享自 Python与SEO学习 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!