1 # !usr/bin/python3.4
2 # -*- coding:utf-8 -*-
3
4 import json
5 import grequests
6 import requests
7 import re
8 import time
9
10 def geturl(urls):
11
12 sn = requests.Session()
13 rs = [grequests.get(url, session=sn) for url in urls]
14
15 return grequests.map(rs)
16
17 def get(url):
18
19 header = {'User-Agent':
20 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0',
21 'Referer':'http://cn.bing.com',
22 'Host': 'cn.bing.com'}
23
24 # 解析网页
25 html_bytes = requests.get(url, headers=header)
26 return html_bytes
27
28 # 去除标题中的非法字符 (Windows)
29 def validateTitle(title):
30 # '/\:*?"<>|'
31 rstr = r"[\/\\\:\*\?\"\<\>\|]"
32 new_title = re.sub(rstr, "", title)
33 return new_title
34
35 if __name__ == '__main__':
36 i = 0
37 img = []
38 imgname = []
39 while True:
40 url = 'http://cn.bing.com/HPImageArchive.aspx?format=js&idx=' + str(i) + '&n=1'
41
42 contents =get(url)
43 data = contents.content.decode('utf-8', 'ignore')
44 data = json.loads(data)
45 try:
46 onefile = data['images']
47 for item in onefile:
48 img.append(item['url'])
49 imgname.append(item['copyright'].replace(' ', ''))
50 print(img[i])
51 i = i + 1
52 except Exception as err:
53 print(err)
54 break
55
56 print('已经搜集好网址...')
57 print('暂停3秒后开始批量下载图片,请保持网络畅通...')
58 time.sleep(3)
59 print('正在下载...')
60 pics = geturl(img)
61
62 j = 0
63 for pic in pics:
64 filenamep = '../jpg/' + validateTitle(imgname[j] + '.jpg')
65 filess = open(filenamep, 'wb')
66 filess.write(pic.content)
67 filess.close()
68 print('已经写入第' + str(j + 1) + '张图片')
69 j = j + 1
发现bing搜索的背景图非常好看:
所以写了个代码把他全部下载下来总的来说grequests非常好用