为了能够模拟登陆QQ,并获取信息。对扫码登录微信进行了分析。简单的用了一下Django将获取的信息映射到页面上。(python3+pycharm)
主要过程就是:
1、获取二维码
2、扫码登录(有三种状态)
3、获取联系人信息(index页面获取的是个人信息、最近联系人信息、公众号)
4、获取所有的联系人
5、发送和接收消息(接收消息打印到了后台)
创建Django项目、导入JQuery(发送AJax请求)、创建APP、创建模板(简单的弄一下,能合理显示得到的数据就好)
from django.contrib import admin
# from django.urls import path
from django.conf.urls import url
from app01 import views
urlpatterns = [
url('admin/', admin.site.urls),
url(r'^$', views.login), # 显示登录二维码
url(r'^polling/$', views.long_polling), # 长轮询
url(r'^index/$', views.index), #
url(r'^contact_list/$', views.contact_list),# 获取全部的联系人
url(r'^send_msg/$', views.send_msg), # 发送消息
url(r'^get_msg/$', views.get_msg), # 接收消息
]
扫码前
扫码后、没有点击登录按钮,显示的你的头像
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<div style="margin: 0 auto;width: 300px">
<img id="qcode" style="width: 300px;height: 300px" src="https://login.weixin.qq.com/qrcode/{{ code }}" alt="扫码登录">
</div>
<script src="/static/jquery-1.12.4.js"></script>
<script>
$(function () {
polling();
});
function polling(){
$.ajax({
url: '/polling/',
type: 'GET',
dataType: 'json',
success: function (arg) {
if(arg.status==408){
console.log(1);
polling();
}else if(arg.status==201){
$('#qcode').attr('src',arg.data);
polling();
}else if(arg.status==200) {
window.location.href ='/index/';
}
}
})
}
</script>
</body>
</html>
点击登录后的页面:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>个人信息</h1>
<div>
{# <img src="https://wx.qq.com{{ data.User.HeadImgUrl }}">#}
<img style="width: 40px;height: 40px" src={{ code }}>
</div>
<div>
{{ data.User.NickName }} - {{ data.User.UserName }}
</div>
<h1>最近联系人列表</h1>
<ul>
{% for row in data.ContactList%}
<li>{{ row.UserName }} - {{ row.NickName }}</li>
{% endfor %}
<li><a href="/contact_list/">获取更多联系人</a></li>
</ul>
<h1>公众号</h1>
{% for row in data.MPSubscribeMsgList%}
<div style="font-weight: bolder">{{ row.NickName }}</div>
{% for i in row.MPArticleList %}
<div>
<div><a href="{{ i.Url }}">{{ i.Title }}</a></div>
<div style="color: #dddddd">{{ i.Digest }}</div>
</div>
{% endfor %}
{% endfor %}
</body>
</html>
页面:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1>发送消息</h1>
<div>
<p><input id="user_id" type="text" placeholder="请输入用户唯一ID" /></p>
<p><input id='user_msg' type="text" placeholder="请输入内容" /></p>
<input id="sendMsg" type="button" value="提交" />
</div>
<ul>
{% for row in obj.MemberList %}
<li>{{ row.NickName }} - {{ row.UserName }} -{{ row.Province }}</li>
{% endfor %}
</ul>
<script src="/static/jquery-1.12.4.js"></script>
<script>
$(function () {
bindSendMessage();
fetchMessage();
});
function bindSendMessage() {
$('#sendMsg').click(function () {
$.ajax({
url: '/send_msg/',
type: 'POST',
data: {'user_id': $('#user_id').val(), 'user_msg': $('#user_msg').val()},
success:function () {
}
})
});
}
function fetchMessage(){
$.ajax({
url: '/get_msg/',
type: 'GET',
success:function (arg) {
fetchMessage();
}
})
}
</script>
</body>
</html>
import re
import time
import json
import requests
from bs4 import BeautifulSoup
from django.shortcuts import render, HttpResponse
# Create your views here.
# 一些全局变量
# 当前时间戳
CURRENT_TIME = None
# 验证码
QCODE = None
#图片
PICTURE = None
# tip
TIP = 1
# LOGININ cookies
# all_cookie_dict = {}
# 保存cookies
LOGIN_COOLIES_DICT = {}
TICKET_COOKIES_DICT = {}
TICKET_DICT = {}
# 用户信息
USER_INIT_DATA = {}
def login(request):
'''
获取登录二维码
:param request:
:return:
'''
global QCODE
global CURRENT_TIME
url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}'
CURRENT_TIME = str(time.time())
q_code_url = url.format(CURRENT_TIME)
response = requests.get(q_code_url)
# 打印返回值
# window.QRLogin.code = 200; window.QRLogin.uuid = "4c5VeLH00g==";
# print(response.text)
code = re.findall('uuid = "(.*)";', response.text)[0]
QCODE = code
# print(code)
return render(request, 'login.html', {'code': code})
def long_polling(request):
'''
长轮询,有408、201、200三种状态
:param request:
:return:
'''
global TIP
global PICTURE
print('polling')
# 默认返回Ajax状态为408
ret = {'status': 408, 'data': None}
base_login_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip={1}&r=970980966&_={1}'
login_url = base_login_url.format(QCODE, TIP, CURRENT_TIME)
response = requests.get(login_url)
# print(response.text)
# 判断
if 'window.code=201' in response.text:
TIP = 0
# 图片地址
avatar = re.findall("userAvatar = '(.*)';", response.text)[0]
# print(avatar)
ret['data'] = avatar
ret['status'] = 201
# 获得个人信息头像使用(index函数中,直接在函数返回值中提取链接获取不到图片)
PICTURE = avatar
elif 'window.code=200' in response.text:
# 获取登录时的cookie值,存到全局变量中
LOGIN_COOLIES_DICT.update(response.cookies.get_dict())
# 提取返回值中的url地址
redirect_uri = re.findall('redirect_uri="(.*)";', response.text)[0]
redirect_uri += '&fun=new&version=v2'
# 获取以后要用到的ticket等数据
response_ticket = requests.get(redirect_uri, cookies=LOGIN_COOLIES_DICT)
TICKET_COOKIES_DICT.update(response_ticket.cookies.get_dict())
# print(response_ticket.text)
# soup1 = BeautifulSoup(response_ticket.text, 'lxml')
soup2 = BeautifulSoup(response_ticket.text, 'html.parser')
# print(soup1)
# print(soup2)
for tag in soup2.find():
TICKET_DICT[tag.name] = tag.string
# print(TICKET_DICT)
ret['status'] = 200
return HttpResponse(json.dumps(ret))
def index(request):
'''
获取个人信息、最近联系人、公众号
:param request:
:return:
'''
# 用户初始化
user_init_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=855409185&pass_ticket=%s' % TICKET_DICT['pass_ticket']
form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
}
}
# 带上所有的coolkie
all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOLIES_DICT)
all_cookie_dict.update(TICKET_COOKIES_DICT)
# json= 发送过去会自动带一个请求头,用json解析
response_init = requests.post(user_init_url, json=form_data, cookies=all_cookie_dict)
# print(response_init.text)
response_init.encoding = 'utf-8'
# print(response_init.text)
user_init_data = json.loads(response_init.text)
USER_INIT_DATA.update(user_init_data)
return render(request, 'index.html', {'data': user_init_data, 'code':PICTURE})
def contact_list(request):
'''
获取所有联系人列表
:param request:
:return:
'''
# print(all_cookie_dict)
# print(TICKET_DICT)
# print(LOGIN_COOLIES_DICT)
# print(TICKET_COOKIES_DICT)
url = ("https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket=%s&r=%s&seq=0&skey=%s" % \
(TICKET_DICT['pass_ticket'], str(time.time()), TICKET_DICT['skey']))
# base_url = '{0} {1} {2}'
# url = base_url.format(TICKET_DICT['pass_ticket'], str(time.time()), TICKET_DICT['skey'])
# print(base_url)
all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOLIES_DICT)
all_cookie_dict.update(TICKET_COOKIES_DICT)
response = requests.get(url, cookies=all_cookie_dict)
response.encoding = 'utf-8'
contact_list_dict = json.loads(response.text)
# print(contact_list_dict)
return render(request, 'contact_list.html', {'obj': contact_list_dict})
# return HttpResponse("ok")
def send_msg(request):
'''
发送消息
:param request:
:return:
'''
# print(USER_INIT_DATA)
# print(TICKET_DICT)
# print(LOGIN_COOLIES_DICT)
# print(TICKET_COOKIES_DICT)
from_user_id = USER_INIT_DATA['User']['UserName']
to_user_id = request.POST.get('user_id')
msg = request.POST.get('user_msg')
send_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg'
form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
},
'Msg': {
"ClientMsgId": str(time.time()),
#"Content": msg,
"Content": '%(content)s',
"FromUserName": from_user_id,
"LocalID": str(time.time()),
"ToUserName": to_user_id,
"Type": 1
},
'Scene': 0
}
import json
# 字符串
form_data_str = json.dumps(form_data)
# 进行格式化
form_data_str = form_data_str % {'content': msg}
# 转换成字节
form_data_bytes = bytes(form_data_str, encoding='utf-8')
all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOLIES_DICT)
all_cookie_dict.update(TICKET_COOKIES_DICT)
# response = requests.post(send_url, json=form_data, cookies=all_cookie_dict)
response = requests.post(send_url, data=form_data_bytes, cookies=all_cookie_dict, headers={
'Content-Type': 'application/json'})
print(response.text)
return HttpResponse('OK')
def get_msg(request):
sync_url = 'https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck'
sync_data_list = []
for item in USER_INIT_DATA['SyncKey']['List']:
temp = "%s_%s" % (item['Key'], item['Val'])
sync_data_list.append(temp)
sync_data_str = "|".join(sync_data_list)
nid = int(time.time())
sync_dict = {
"r": nid,
"skey": TICKET_DICT['skey'],
"sid": TICKET_DICT['wxsid'],
"uin": TICKET_DICT['wxuin'],
"deviceid": "e531777446530354",
"synckey": sync_data_str
}
all_cookie = {}
all_cookie.update(LOGIN_COOLIES_DICT)
all_cookie.update(TICKET_COOKIES_DICT)
response_sync = requests.get(sync_url, params=sync_dict, cookies=all_cookie)
print(response_sync.text)
if 'selector:"2"' in response_sync.text:
fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid=%s&skey=%s&lang=zh_CN&pass_ticket=%s" % (
TICKET_DICT['wxsid'], TICKET_DICT['skey'], TICKET_DICT['pass_ticket'])
form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
},
'SyncKey': USER_INIT_DATA['SyncKey'],
'rr': str(time.time())
}
response_fetch_msg = requests.post(fetch_msg_url, json=form_data)
response_fetch_msg.encoding = 'utf-8'
res_fetch_msg_dict = json.loads(response_fetch_msg.text)
USER_INIT_DATA['SyncKey'] = res_fetch_msg_dict['SyncKey']
for item in res_fetch_msg_dict['AddMsgList']:
print(item['Content'], ":::::", item['FromUserName'], "---->", item['ToUserName'],)
return HttpResponse('ok')
爬虫入门简单,想要深入果然很难。继续努力,加油!