Python 系列文章 —— renren 实战

原创

玩转编程

发布于 2022-01-15 23:04:22

3400

发布于 2022-01-15 23:04:22

文章被收录于专栏：玩转编程

github

import scrapy
import re

class GithubSpider(scrapy.Spider):
    name = 'github'
    allowed_domains = ['github.com']
    # 登录页面 URL
    start_urls = ['https://github.com/login']

    def parse(self, response):
        # 获取请求参数
        commit = response.xpath("//input[@name='commit']/@value").extract_first()
        utf8 = response.xpath("//input[@name='utf8']/@value").extract_first()
        authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
        ga_id = response.xpath("//input[@name='ga_id']/@value").extract_first()
        if ga_id is None:
            ga_id = ""
        webauthn_support = response.xpath("//input[@name='webauthn-support']/@value").extract_first()
        webauthn_iuvpaa_support = response.xpath("//input[@name='webauthn-iuvpaa-support']/@value").extract_first()
        # required_field_157f = response.xpath("//input[@name='required_field_4ed5']/@value").extract_first()
        timestamp = response.xpath("//input[@name='timestamp']/@value").extract_first()
        timestamp_secret = response.xpath("//input[@name='timestamp_secret']/@value").extract_first()

        # 构造 post 参数
        post_data = {
            "commit": commit,
            "utf8": utf8,
            "authenticity_token": authenticity_token,
            "ga_id": ga_id,
            "login": "xxx@qq.com",
            "password": "xxx",
            "webauthn-support": webauthn_support,
            "webauthn-iuvpaa-support": webauthn_iuvpaa_support,
            # "required_field_4ed5": required_field_4ed5,
            "timestamp": timestamp,
            "timestamp_secret": timestamp_secret
        }

        # 打印参数
        print(post_data)

        # 发送 post 请求
        yield scrapy.FormRequest(
            "https://github.com/session", # 登录请求方法
            formdata=post_data,
            callback=self.after_login
        )

    # 登录成功之后操作
    def after_login(self, response):
        # 找到页面上的 Issues 字段并打印
        print(re.findall("Issues", response.body.decode()))

github2

# -*- coding: utf-8 -*-
import scrapy
import re

class Github2Spider(scrapy.Spider):
    name = 'github2'
    allowed_domains = ['github.com']
    start_urls = ['http://github.com/login']

    def parse(self, response):
        yield scrapy.FormRequest.from_response(
            response, # 自动从response中寻找form表单
            formdata={"login": "xxx@qq.com", "password": "xxx"},
            callback=self.after_login
        )
    # 登录成功之后操作
    def after_login(self, response):
        # 找到页面上的 Issues 字段并打印
        print(re.findall("Issues", response.body.decode()))

renren

# -*- coding: utf-8 -*-
import scrapy
import re

class RenrenSpider(scrapy.Spider):
    name = 'renren'
    allowed_domains = ['renren.com']
    # 个人中心页网址
    start_urls = ['http://www.renren.com/972990680/profile']

    def start_requests(self):
        # 登录之后用 chrome 的 debug 工具从请求中获取的 cookies
        cookiesstr = "anonymid=k3miegqc-hho317; depovince=ZGQT; _r01_=1; JSESSIONID=abcDdtGp7yEtG91r_U-6w; ick_login=d2631ff6-7b2d-4638-a2f5-c3a3f46b1595; ick=5499cd3f-c7a3-44ac-9146-60ac04440cb7; t=d1b681e8b5568a8f6140890d4f05c30f0; societyguester=d1b681e8b5568a8f6140890d4f05c30f0; id=972990680; xnsid=404266eb; XNESSESSIONID=62de8f52d318; jebecookies=4205498d-d0f7-4757-acd3-416f7aa0ae98|||||; ver=7.0; loginfrom=null; jebe_key=8800dc4d-e013-472b-a6aa-552ebfc11486%7Cb1a400326a5d6b2877f8c884e4fe9832%7C1575175011619%7C1%7C1575175011639; jebe_key=8800dc4d-e013-472b-a6aa-552ebfc11486%7Cb1a400326a5d6b2877f8c884e4fe9832%7C1575175011619%7C1%7C1575175011641; wp_fold=0"
        cookies = {i.split("=")[0]:i.split("=")[1] for i in cookiesstr.split("; ")}

        # 携带 cookies 的 Request 请求
        yield scrapy.Request(
            self.start_urls[0],
            callback=self.parse,
            cookies=cookies
        )

    def parse(self, response):
        # 从个人中心页查找关键词"闲欢"并打印
        print(re.findall("闲欢", response.body.decode()))

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

github

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

github

登录后参与评论

0 条评论

热度

Python 系列文章 —— renren 实战

Python 系列文章 —— renren 实战

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐