附上我抓取一个网站的代码,这网站作者的成果抓不到,只好用这种方式来抓了:
from selenium import webdriver
import time
from lxml.html import...f.read()
data_list = data.split('\n')
for dt in data_list:
j1.add(dt)
f.close()
print('j1= ', len(j1))
j2...= ', len(j2))
countSet = j1 - j2
print('countset= ', len(countSet))
AuthorsData = []
for dt in countSet.../div[3]/@title')[0]
temp_dict = {
'num': i,
'pro_name': pro_name,
'pro_url': pro_url,
'pro_author': pro_author...A=DVUy", "name": "白晓涓",
# "url": "https://www.scholarmate.com/P/73me22", "org": "", "项目": "6", "成果":