import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import re
req = requests.get('https://www.godrejproperties.com/nricorner/nri-faqs')
soup = BeautifulSoup(req.text, "html5lib")
ist1=[]
for elem in soup(text=re.compile(r'\s*((?:how|How|Can|can|what|What|where|Where|describe|Describe|Who|who|When|when|Why|why|Should|should|is|Is|I|Do|do|Are|are|Will|will)[^.<>?]*?\s*\?)')):
print elem.parent
list1.append(elem.parent)
x=str(list1[1])
tag=x[x.find("<")+1:x.find(">")]
print tag
Ques = []
for header in soup.find_all(tag):
list_=[header]
ffff=re.findall(r'\s*((?:how|How|Can|can|what|What|where|Where|describe|Describe|Who|who|When|when|Why|why|Should|should|is|Is|I|Do|do|Are|are|Will|will)[^.<>?]*?\s*\?)',str(list_))
#print(ffff)
#print (len(ffff))
if len(ffff)>0:
Ques.append(ffff)
Ques = np.array(Ques)
print(Ques)
类似地,我需要在FAQ页面中找到答案,我需要创建一个算法,它将捕获包含标签答案的内容,并将其保存在列表中。以后我需要一对问答
发布于 2018-02-12 21:49:59
https://stackoverflow.com/questions/48760129
复制