import praw
import csv
client_id="03UcgbJEBrDsN-DBsuCU-w", # 您的客户 ID
client_secret="HDMkGK9ml2sf8NahSj74LRxTCPE_gA", # 您的客户秘密
user_agent="scraping" # 您的用户代理
# Reddit API 初始化
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)
# 关键词列表
keywords = ['hash']
#创建csv文件
with open('reddit_results.csv', 'w', encoding='utf-8', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Author', 'Title', 'Content'])
# 爬取Reddit中的相关内容
for keyword in keywords:
print(f"Results for keyword: {keyword}\n")
# 使用搜索功能进行查询
results = reddit.subreddit('all').search(keyword, sort='relevance', time_filter='all')
# 输出相关内容的作者和标题
for submission in results:
author_id = submission.author.id if submission.author else 'N/A'
title = submission.title
content = submission.selftext if submission.is_self else 'Not a self-post'
print(f"Author: {submission.author}")
print(f"Title: {submission.title}")
print(f"Content: {content}")
print("\n---\n")
# 写入CSV文件
csv_writer.writerow([author_id, title, content])
当运行上述脚本时出现以下错误:
E:\Anaconda2\envs\my_env2\python.exe F:/GitHub/爬虫/venv/第三章:数据解析/reddit.py
Results for keyword: hash
Traceback (most recent call last):
File "F:\GitHub\爬虫\venv\第三章:数据解析\reddit.py", line 26, in <module>
for submission in results:
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\models\listing\generator.py", line 63, in __next__
self._next_batch()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\models\listing\generator.py", line 89, in _next_batch
self._listing = self._reddit.get(self.url, params=self.params)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 712, in get
return self._objectify_request(method="GET", params=params, path=path)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 517, in _objectify_request
self.request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\praw\reddit.py", line 941, in request
return self._core.request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 328, in request
return self._request_with_retries(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 254, in _request_with_retries
return self._do_retry(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 162, in _do_retry
return self._request_with_retries(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 234, in _request_with_retries
response, saved_exception = self._make_request(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 186, in _make_request
response = self._rate_limiter.call(
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\rate_limit.py", line 46, in call
kwargs["headers"] = set_header_callback()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\sessions.py", line 282, in _set_header_callback
self._authorizer.refresh()
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 378, in refresh
self._request_token(grant_type="client_credentials", **additional_kwargs)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 155, in _request_token
response = self._authenticator._post(url=url, **data)
File "E:\Anaconda2\envs\my_env2\lib\site-packages\prawcore\auth.py", line 59, in _post
raise ResponseException(response)
prawcore.exceptions.ResponseException: received 401 HTTP response
Process finished with exit code 1
想要新员工快速上手,首先得让文档不要再像一堆没人看的垃圾。架构师必须确保文档结构清晰,分类明确,别再搞那种散乱的文档体系,让人打开就头疼。每个功能模块的文档应该像导航图一样,一眼就能知道在哪里找所需的内容。千万别做成那种信息堆砌的文档,害得员工像 大海捞针 一样找半天。把所有内容丢到一个页面里,既不能快速上手,还能搞死他们的信心。
接下来,文档的检索功能绝对不容忽视,如果你在文档中还敢没有搜索功能,那真的是在浪费大家的时间。员工需要能够在几秒钟内找到他们需要的内容,而不是通过翻页翻到眼花缭乱。如果文档没有提供 智能搜索、关键词索引,员工根本没法找到自己需要的解答。别再搞那种懒得更新的索引了,文档必须支持 实时更新,否则你只会在加深员工的“迷茫感”——他们花了半天时间看一篇内容,结果发现已经过时了。
最最重要的是,你的 入门指南 不该只是“目录+几个傻乎乎的步骤”,要做到像老司机带新手,一步一步讲清楚到底该干什么,不要让新员工自己“随便瞎摸”才能找到代码入口。最好能给出真实业务场景的例子,让新员工能立马投入实际开发,别再让他们用半天时间去理解“Hello World”那种无聊示例。如果新员工看你的文档能产生 “太多理解障碍”,你就等着他们离职吧。