1.抓包分析
这次使用手机抓包分析,抓包工具Charles
听说做app测试或者接口测试都需要使用到抓包工具(没做过,不晓得)
网上抓包工具的配置教程很多例如https://www.jianshu.com/p/5539599c7a25
配置好之后在手机上打开斗鱼APP找到颜值频道
清空Charles里面的内容
下拉刷新当前页面,然后下翻几页
删除Charles上与douyu域名无关的信息
大概留下如图内容:
逐条查看一下找到有主播名称的内容
https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/0/20/ios?client_sys=ios
https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/20/20/ios?client_sys=ios
https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/40/20/ios?client_sys=ios
https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/60/20/ios?client_sys=ios
其中唯一区别是/xx/20/ios?client_sys=ios 合理推断这是页数
从json里面发现几个比较关键的内容
房间id:room_id
房间名字:room_name
主播名字:nickname
主播封面:vertical_src
主播城市:anchor_city
{
"roomRule": 0,
"msg": "",
"list": [{
"room_id": 2450462,
"room_name": "【第二萌】一日不见,如隔三秋",
"nickname": "南京第二萌",
"cate_id": 311,
"room_src": "https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/11/2450462_20171211203916_small.jpg",
"is_vertical": 0,
"vertical_src": "https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/11/2450462_20171211203916_big.jpg",
"online_num": 63,
"hn": 35690,
"show_status": 1,
"bid_id": 0,
"bidToken": "",
"rpos": 0,
"rankType": 0,
"recomType": 0,
"show_id": "81327515",
"iho": 0,
"guild_id": 0,
"topid": 0,
"chanid": 0,
"jump_url": "",
"client_sys": 1,
"is_noble_rec": 0,
"noble_rec_user_id": 0,
"noble_rec_nickname": "",
"anchor_city": "南京市",
"rmf1": 0,
"rmf2": 0,
"rmf3": 0,
"ofc": 0,
"lhl": 0,
"chgd": 0,
"has_al": 1,
"anchor_label": [{
"tag": "摸你奖杯",
"id": 92681
}, {
"tag": "大哥纹身",
"id": 79659
}, {
"tag": "大哥烫我",
"id": 4912
}, {
"tag": "王二怂",
"id": 96525
}],
"icon_url": "",
"nly": 0
}
2.代码编写
先创建两个url拼接,中间可以放页数的字符
self.url_1 = 'https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/'
self.url_2 = '/20/ios?client_sys=ios'
由于是用的手机端,为了防止出现什么问题,头部信息使用手机的agent
self.HEADERS={'User-Agent':'ios/3.700 (ios 11.2.6; ; iPhone X (A1865/A1902))'}
先写一个获取刚才分析了需要获得的信息的函数
import json
来对json内容进行转换
函数接受页数的传入
1.进行url的拼接
2.进行json的转换,转化成为python的字典格式
3.对转换后的字典取值
def get_message(self,page):
page = (page-1)*20
url = self.url_1+str(page)+self.url_2
res = requests.get(url = url,headers = self.HEADERS)
message_json = json.loads(res.text)
message_data = message_json['data']
if not message_data:
return
message_lists = message_data['list']
print('正在爬取第%s页' % int(page/20+1))
for message in message_lists:
item = {}
item['房间id'] = message['room_id']
item['房间名字'] = message['room_name']
item['主播名字'] = message['nickname']
item['主播封面'] = message['vertical_src']
item['主播城市'] = message['anchor_city']
self.item_lists.append(item)
self.download_pic(item)
将取出来的值传入下载图片的函数中
1.使用图片的url进行访问并转换成二进制.content
2.为了不让图片零零散散,新建一个/img
文件存放图片
3.使用wb
写入二进制
def download_pic(self,item):
content = requests.get(url = item['主播封面'],headers = self.HEADERS).content
File_Path = os.getcwd() + '/img'
if not os.path.exists(File_Path):
os.makedirs(File_Path)
with open('img/房间ID:%s---来自%s的%s.jpg' %(item['房间id'],item['主播城市'],item['主播名字']),'wb') as f:
f.write(content)
按照惯例使用进程池来加快下载速度
if __name__ == '__main__':
pool = Pool()
message = DouYuYanZhi()
print()
for i in range(1,20):
pool.apply_async(message.get_message,args=(i,))
pool.close()
pool.join()
全代码如下:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#Author: zhongxin
from multiprocessing import Pool
import requests
import json
import os
class DouYuYanZhi():
def __init__(self):
self.item_lists = []
self.url_1 = 'https://apiv2.douyucdn.cn/gv2api/rkc/roomlist/2_201/'
self.url_2 = '/20/ios?client_sys=ios'
self.HEADERS={'User-Agent':'ios/3.700 (ios 11.2.6; ; iPhone X (A1865/A1902))'}
def get_message(self,page):
page = (page-1)*
url = self.url_1+str(page)+self.url_2
res = requests.get(url = url,headers = self.HEADERS)
message_json = json.loads(res.text)
message_data = message_json['data']
if not message_data:
return
message_lists = message_data['list']
print('正在爬取第%s页' % int(page/+))
for message in message_lists:
item = {}
item['房间id'] = message['room_id']
item['房间名字'] = message['room_name']
item['主播名字'] = message['nickname']
item['主播封面'] = message['vertical_src']
item['主播城市'] = message['anchor_city']
self.item_lists.append(item)
self.download_pic(item)
def download_pic(self,item):
content = requests.get(url = item['主播封面'],headers = self.HEADERS).content
File_Path = os.getcwd() + '/img'
if not os.path.exists(File_Path):
os.makedirs(File_Path)
with open('img/房间ID:%s---来自%s的%s.jpg' %(item['房间id'],item['主播城市'],item['主播名字']),'wb') as f:
f.write(content)
if __name__ == '__main__':
pool = Pool()
message = DouYuYanZhi()
print()
for i in range(,):
pool.apply_async(message.get_message,args=(i,))
pool.close()
pool.join()
运行结果: