# -*- coding: utf-8 -*- from urllib.parse import urlencode import json import scrapy import os import re import urllib.request class SougouimgSpider(scrapy.Spider): name = 'sougouimg' allowed_domains = ['pic.sogou.com'] start_urls = ['https://pic.sogou.com/'] def parse(self, response): page = 1 endpage = 5 # 终点页 keywords = r'哆啦A梦' for page in range(1,endpage): yield scrapy.Request(self.geturl(keywords,page), callback=self.sougou) def sougou(self,response): # 获取get参数 # print(response.text) data = response.text js = json.loads(data) for list in js['items']: img_url = list['pic_url'] self.savve(img_url) def geturl(self, keywords, page): # 传入关键字,页码 param = { 'query': keywords, 'mode': '1', 'start': page*48, 'reqType': 'ajax', 'reqFrom': 'result', 'tn': '0' } ps = urlencode(param) url = 'https://pic.sogou.com/pics?' + ps return url def savve(self,img_url): path = os.path.dirname(os.path.abspath(__file__))+"\\搜狗图片" dir = os.path.exists(path) if not dir: os.makedirs(path) reg = re.compile('[^\/]+$') # 保存图片 title= reg.findall(img_url)[0] sougou = path + "\\" + title try: urllib.request.urlretrieve(img_url, sougou) except Exception as e: print(title+"下载失败") finally: print(title+"下载完毕")
by浅枫沐雪
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句