我使用telethon
在电报中从群聊中抓取成员列表,然后将这些数据保存到google中。此代码适用于小组/通道(小于2k成员/子脚本)。
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty
from google.oauth2 import service_account # import for google sheets API
from googleapiclient.discovery import build
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = 'key.json'
cred = None
cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
class Scraper():
def __init__(self):
# Enter 7-digit Telegram API ID
self.api_id = 1234567
# Enter 32-character API Hash
self.api_hash = "randomrandomrandom"
# Enter mobile number with country code
self.phone = "myphonenumber"
self.client = TelegramClient(self.phone, self.api_id, self.api_hash)
self.groups=[]
def connect(self):
# Connecting to Telegram and checking if user is already authorized.
# Otherwise send an OTP code request and ask user to enter the code
# they received on their telegram account. After logged in, a *.session file
# will be created. This is a database file which makes your session persistent.
self.client.connect()
if not self.client.is_user_authorized():
self.client.send_code_request(self.phone)
self.client.sign_in(self.phone, input("Enter OTP code: "))
def getGroups(self):
# This method will get all groups in chat list.
# offset_date and offset_peer are used for filtering the chats,
# sending empty values to these parameters so API returns all chats.
# offset_id and limit are used for pagination.
# This limit will show last 10 chats of the user.
chatslist = []
last_date = None
chunk_size = 10
result = self.client(GetDialogsRequest(
offset_date=last_date,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=chunk_size,
hash = 0
))
chatslist.extend(result.chats)
for chat in chatslist:
try:
if chat.megagroup == True:
self.groups.append(chat)
except:
continue
# Choose which group to scrape members from
for i, g in enumerate(self.groups):
print(str(i) + '- ' + g.title)
def saveToSheet(self):
# This method will save all group members
# to a google sheets.
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
SERVICE_ACCOUNT_FILE = "key.json"
cred = None
cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
g_index = input("Choose a channel to scrape members from: ")
target_group = self.groups[int(g_index)]
print("Fetching members...")
all_participants = ()
all_participants = self.client.get_participants(target_group)
print("Saving to sheet...")
uid = []
username = []
fullname = []
for user in all_participants:
# member_uid
uid.append(user.id)
# member_username
if user.username:
username.append(user.username)
else:
username.append("")
# member_fullname
if user.first_name:
first_name = user.first_name
else:
first_name = ""
if user.last_name:
last_name = user.last_name
else:
last_name = ""
fname = (first_name + " " + last_name).strip()
fullname.append(fname)
# print(uid)
# print(username)
# print(fullname)
length = len(uid)
concat = [[] for i in range(length)]
for elem in range(length):
concat[elem].append(uid[elem])
concat[elem].append(username[elem])
concat[elem].append(fullname[elem])
# The ID and range of target spreadsheet.
SAMPLE_SPREADSHEET_ID = "<MY-SHEET-ID>" #epicwar group
RANGE = "Sheet1!A2:C"
service = build("sheets", "v4", credentials=cred)
# Call the Sheets API
sheet = service.spreadsheets()
# update sheet
request = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range=RANGE,
valueInputOption="RAW",
body={"values":concat}).execute()
print(request)
print("**Members scraped successfully**")
if __name__ == '__main__':
telegram = Scraper()
telegram.connect()
telegram.getGroups()
telegram.saveToSheet()
但是,当我尝试一个更大的组(从3k到6k)时,它只能返回90%的成员,如果一个组的成员超过6k,它会抛出以下错误:
Traceback (most recent call last):
File "d:\crawler\telegram\group\main2.py", line 149, in <module>
telegram.saveToSheet()
File "d:\crawler\telegram\group\main2.py", line 88, in saveToSheet
all_participants = self.client.get_participants(target_channel)
File "G:\Program Files\Python310\lib\site-packages\telethon\sync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "G:\Program Files\Python310\lib\asyncio\base_events.py", line 641, in run_until_complete
return future.result()
File "G:\Program Files\Python310\lib\site-packages\telethon\client\chats.py", line 507, in get_participants
return await self.iter_participants(*args, **kwargs).collect()
File "G:\Program Files\Python310\lib\site-packages\telethon\requestiter.py", line 113, in collect
async for message in self:
File "G:\Program Files\Python310\lib\site-packages\telethon\requestiter.py", line 74, in __anext__
if await self._load_next_chunk():
File "G:\Program Files\Python310\lib\site-packages\telethon\client\chats.py", line 224, in _load_next_chunk
participants = results[i]
TypeError: 'ChannelParticipants' object is not subscriptable
我还阅读了方法参与者的文档。它有参数aggresive=true
,但我认为它是实际的,当使用时,代码将根本不起作用(即使是对于小型组)和抛出FloodWaitError
。
Traceback (most recent call last):
File "d:\crawler\telegram\group\main2.py", line 149, in <module>
telegram.saveToSheet()
File "d:\crawler\telegram\group\main2.py", line 88, in saveToSheet
all_participants = self.client.get_participants(target_group, aggressive = True)
File "G:\Program Files\Python310\lib\site-packages\telethon\sync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "G:\Program Files\Python310\lib\asyncio\base_events.py", line 641, in run_until_complete
return future.result()
File "G:\Program Files\Python310\lib\site-packages\telethon\client\chats.py", line 507, in get_participants
return await self.iter_participants(*args, **kwargs).collect()
File "G:\Program Files\Python310\lib\site-packages\telethon\requestiter.py", line 113, in collect
async for message in self:
File "G:\Program Files\Python310\lib\site-packages\telethon\requestiter.py", line 74, in __anext__
if await self._load_next_chunk():
File "G:\Program Files\Python310\lib\site-packages\telethon\client\chats.py", line 222, in _load_next_chunk
results = await self.client(self.requests)
File "G:\Program Files\Python310\lib\site-packages\telethon\client\users.py", line 30, in __call__
return await self._call(self._sender, request, ordered=ordered)
File "G:\Program Files\Python310\lib\site-packages\telethon\client\users.py", line 80, in _call
raise MultiError(exceptions, results, requests)
telethon.errors.common.MultiError: ([FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, FloodWaitError('A wait of
31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, None, FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)')], [None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9ECACE0>, None, None, None, None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9DE0>, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9150>, None], [<telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBBE0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBCA0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9720>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA4D0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9C90>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9F00>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA020>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA110>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9A80>])
有人能给我一些建议或替代吗?
发布于 2022-02-18 19:29:59
发布于 2022-04-02 08:56:19
all_participants = client.get_participants(target_group)
https://stackoverflow.com/questions/71070745
复制相似问题