我正试图在一个网页上对所有电视连续剧的所有剧集的文本进行扫描。整个程序都是嵌套的,所以在找到links.It列表之前,它要经过3个网页,显示了一些错误,我已经粘贴在下面了。
import requests
import bs4 as bs
urls='http://dl5.lavinmovie.net/Series/'
url=requests.get(urls).text
soup=bs.BeautifulSoup(url,'lxml')
title=soup.find_all('a')
ur=[""]
names=[""]
season=[""]
quality=[""]
for i in title:
# names.append(i.text)
urlss=urls+i.text+"/"
urla=requests.get(urls).text
soupp=bs.BeautifulSoup(urla,'lxml')
ur=soupp.find_all('a')
for i in ur:
# names.append(i.text)
urls=urls+i.text+"/"
urla=requests.get(urls).text
soupp=bs.BeautifulSoup(urla,'lxml')
ur=soupp.find_all('a')
for i in ur:
# quality.append(i.text)
urls=urls+i.text+"/"
urla=requests.get(urls).text
soupp=bs.BeautifulSoup(urla,'lxml')
ur=soupp.find_all('a')
for i in ur:
print(i.text)
Traceback (most recent call last):
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 603, in urlopen
chunked=chunked)
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1321, in getresponse
response.begin()
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 296, in begin
version, status, reason = self._read_status()
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 257, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\Vedant Mamgain\AppData\Local\Programs\Python\Python37\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
发布于 2019-07-30 15:15:53
试着用这个,它对我有用:
import requests
import bs4 as bs
names = list()
name_links = list()
base_url = 'http://dl5.lavinmovie.net/Series/'
final_list = list()
soup = bs.BeautifulSoup(requests.get(base_url).text, 'lxml')
title = soup.find_all('a')
for link in title[1:]:
names.append(link.text)
current_link = link['href']
print(link.text)
name_links.append(str(current_link))
# get seasons
soup = bs.BeautifulSoup(requests.get(base_url + current_link).text, 'lxml')
title = soup.find_all('a')
for link in title[1:]:
season_link = link['href']
# get quality of the seasons
soup = bs.BeautifulSoup(requests.get(base_url + current_link +season_link).text, 'lxml')
title = soup.find_all('a')
for link in title[1:]:
quality_link = link['href']
# get list of episodes
soup = bs.BeautifulSoup(requests.get(base_url + current_link + season_link + quality_link).text, 'lxml')
title = soup.find_all('a')
for link in title[1:]:
episode_link = link['href']
final_list.a
看看这个对你有用吗。
发布于 2019-07-31 06:23:09
import requests
import bs4 as bs
urls = 'http://dl5.lavinmovie.net/Series/'
url = requests.get(urls).text
soup = bs.BeautifulSoup(url, 'lxml')
title = soup.find_all('a')
for i in title:
if(i.text != '../' and ".mp4" not in i.text):
urll = urls+i.text
# arr.append(i.text)
urll1 = requests.get(urll).text
soupp1 = bs.BeautifulSoup(urll1, 'lxml')
season = soupp1.find_all('a')
print(i.text)
for j in season:
if(j.text != '../'and ".mp4" not in j.text):
urlla = urll+j.text
urll2 = requests.get(urlla).text
soupp2 = bs.BeautifulSoup(urll2, 'lxml')
quality = soupp2.find_all('a')
print(j.text)
for k in quality:
if(k.text != '../' and ".mp4" not in k.text):
urllb = urlla+k.text
urll3 = requests.get(urllb).text
soupp3 = bs.BeautifulSoup(urll3, 'lxml')
episode = soupp3.find_all('a')
print(k.text)
for m in episode:
if(m.text != '../' and ".mp4" not in m.text):
print(m.text)
作为well.Thanks,我已经把这个问题解决给了每一个帮助我的人。
https://stackoverflow.com/questions/57264471
复制相似问题