爬虫的 JavaScript 逆向是指对使用 JavaScript 编写的网站爬虫进行逆向工程。通常,网站会使用 JavaScript 来动态加载内容、执行操作或者进行验证,这可能会使得传统的爬虫在获取网页内容时遇到困难。因此,进行爬虫的 JavaScript 逆向工程通常包括以下步骤:
import requests
from bs4 import BeautifulSoup
def fetch_taobao_data(keyword):
url = f"https://s.taobao.com/search?q={keyword}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
else:
print("Failed to fetch data from Taobao.")
return None
def parse_taobao_data(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='item J_MouserOnverReq')
results = []
for item in items:
title = item.find('div', class_='title').text.strip()
price = item.find('div', class_='price').text.strip()
location = item.find('div', class_='location').text.strip()
results.append({
'title': title,
'price': price,
'location': location
})
return results
def main():
keyword = input("请输入要搜索的商品关键词:")
html = fetch_taobao_data(keyword)
if html:
data = parse_taobao_data(html)
print("搜索结果:")
for i, item in enumerate(data, 1):
print(f"{i}. 标题: {item['title']}")
print(f" 价格: {item['price']}")
print(f" 发货地: {item['location']}")
print()
if __name__ == "__main__":
main()