这两天学了点爬虫,很简单,给大家分享一下,顺便分享一下我自己的爬的壁纸的demo
我爬的壁纸。
我写的爬的壁纸网站是静态的,动态的还再仔细研究中。
下面是我的代码,与君分享。(要我下载下来的90张壁纸请qq找我)
frombs4importBeautifulSoup
importrequests
fromcontextlibimportclosing
classdownloader(object):
def__init__(self):
self.server='https://www.hdwallpapers.in/'#网站首页
self.nextpage ='https://www.hdwallpapers.in/'#下一页地址存放地址
self.page=5#要下载的页数,此处我写的5页。
self.count=#页数计数
self.urls = []#存放每个壁纸的链接
self.nums =#壁纸数
self.dowland_url=[]#每个壁纸1920X1080的下载地址的存放列表
defurl_next(self):#获取每一也的壁纸页的url
while1:
ifself.count ==self.page:
print(self.page,"页了!")
break
req=requests.get(url=self.nextpage)
html=req.text
div_bf=BeautifulSoup(html)
next_page=div_bf.find_all('div',class_="pagination")
page_all=BeautifulSoup(str(next_page))
page_a=page_all.find_all('a')
fora_inpage_a:
ifa_.text =='Next ':
self.nextpage=self.server+a_.get('href')
self.count+=1
div_all=div_bf.find_all('div',class_="thumb")
a_bf=BeautifulSoup(str(div_all))
a_all=a_bf.find_all('a')
foraina_all:
self.urls.append(self.server+a.get('href'))
defdowland(self):#获取每一页壁纸的 1920x1080的壁纸下载链接
forurlinself.urls:
req=requests.get(url=url)
div_bf = BeautifulSoup(req.text)
div_all = div_bf.find_all('div',class_="wallpaper-resolutions")
a_bf = BeautifulSoup(str(div_all))
a_all = a_bf.find_all('a')
foraina_all:
ifa.get('title') =='HD 1920 x 1080 Wallpaper':
self.dowland_url.append(self.server+a.get('href'))
self.nums +=1
break
print(self.nums)
defimg(self):#下载每张壁纸
i=1
print("开始下载照片!")
forainself.dowland_url:
print("正在下载第",i,"张照片。")
withclosing(requests.get(url=a,stream=True,verify=False))asr:
withopen('%d.jpg'% i,'ab+')asf:
forchunkinr.iter_content(chunk_size=1024):
ifchunk:
f.write(chunk)
f.flush()
i +=1
if__name__ =='__main__':
dl=downloader()
dl.url_next()
dl.dowland()
dl.img()
领取专属 10元无门槛券
私享最新 技术干货