在当今数据驱动的时代,高效获取网络数据已成为开发者必备技能。传统的同步爬虫在面对大规模数据采集时往往力不从心,而Python的asyncio库配合aiohttp等异步HTTP客户端,可以轻松实现每秒上千次的并发请求。
import aiohttp
import asyncio
from bs4 import BeautifulSoup
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def parse(url):
async with aiohttp.ClientSession() as session:
html = await fetch(session, url)
soup = BeautifulSoup(html, 'lxml')
# 提取数据逻辑
return soup.title.string
async def main(urls):
tasks = [parse(url) for url in urls]
return await asyncio.gather(*tasks)
if __name__ == '__main__':
urls = [...] # 目标URL列表
results = asyncio.run(main(urls))
print(results)本方案稍作修改即可应用于: