我试图从这个网站获取数据的地方找到api,但我找不到它。
在“网络”选项卡中,我可以看到xhr响应中的数据。每次选择另一个页面时,数据都会被更改,但我必须提取数据,但不知道如何进行。我不知道网站是从哪里得到数据的。我对此完全陌生。你能指导我如何获取数据或刮这个网站吗?我试着找出与此相关的例子,但找不到正确的例子。提前谢谢。
发布于 2021-09-25 09:20:57
下面的代码将为您提供您正在寻找的数据。
使用字段recordCount
来设置需要循环的range
。
它是如何工作的
该网站正在使用和API调用,以获得JSON格式的数据。它使用分页技术--它将页面索引和页面大小传递给服务器,这样服务器就知道页面偏移量是什么,并且知道要返回哪些数据。下面的代码模拟这个活动-循环增加页面索引,这样我们就可以迭代数据。
import requests
import time
headers = {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "en-US,en;q=0.9,el;q=0.8,he;q=0.7,de;q=0.6,fr;q=0.5,it;q=0.4,es;q=0.3",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"pragma": "no-cache",
"sec-ch-ua": "\"Google Chrome\";v=\"93\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"93\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"x-requested-with": "XMLHttpRequest"
}
body = {'professionalType': 'General Contractor',
'Name': '',
'sortName': 'OverallScore',
'sortDirection': 'desc',
'pageIndex': 0,
'pageSize': 10}
url = 'https://govservices.dcra.dc.gov/contractorratingsystem/BuildingProfessionals/LoadProfessionalSearchResultsWithFilters'
for i in range(1, 3): # TODO use actual range based on 'recordCount' (in the response) and 'pageSize'
body['pageIndex'] = i
r = requests.post(url, headers=headers, data=body)
if r.status_code == 200:
print(f'{i} --> {r.json()}')
else:
print(f'status code is {r.status_code}')
time.sleep(1)
输出
/*
* 提示:该行代码过长,系统自动注释不进行高亮。一键复制会移除系统注释
* 1 --> {'buildingProfessionals': [{'buildingProfessional': 'REVOLUTION SOLAR LLC.', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410518000062', 'projectCount': 822, 'planReviewScore': 96.1732900783996, 'applicationIntakeScore': 96.2433090024331, 'inspectionScore': 100, 'overAllProjectScore': 100, 'stopWorkOrders': 0, 'planReviewScoreRating': 4.80866450391998, 'applicationIntakeScoreRating': 4.812165450121655, 'inspectionScoreRating': 5, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'mattyoungarl@gmail.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '10746 JUDY LANE COLUMBIA MD 21044', 'businessPhone': '4438655039', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'AMERICAN AUTOMATIC SPRINKLER CO', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410514000016', 'projectCount': 471, 'planReviewScore': 0.29603315571344, 'applicationIntakeScore': 0.245115452930728, 'inspectionScore': 99.7122042886194, 'overAllProjectScore': 100, 'stopWorkOrders': 12, 'planReviewScoreRating': 0.014801657785672, 'applicationIntakeScoreRating': 0.0122557726465364, 'inspectionScoreRating': 4.98561021443097, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'aasco@aasc-fp.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '3149 DRAPER DRIVE FAIRFAX VA 22031', 'businessPhone': '7038498180', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'FIRE & LIFE SAFETY AMERICA INC.', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410516000410', 'projectCount': 348, 'planReviewScore': 0.218818380743982, 'applicationIntakeScore': 0.218818380743982, 'inspectionScore': 99.781181619256, 'overAllProjectScore': 100, 'stopWorkOrders': 2, 'planReviewScoreRating': 0.0109409190371991, 'applicationIntakeScoreRating': 0.0109409190371991, 'inspectionScoreRating': 4.9890590809628, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'bdrinkard@flsamerica.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '3017 VERNON ROAD RICHMOND VA 23228', 'businessPhone': '8042221381', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'NORTHERN FIRE PROTECTION, INC.', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410516000183', 'projectCount': 250, 'planReviewScore': 0, 'applicationIntakeScore': 0, 'inspectionScore': 98.7889273356401, 'overAllProjectScore': 100, 'stopWorkOrders': 2, 'planReviewScoreRating': 0, 'applicationIntakeScoreRating': 0, 'inspectionScoreRating': 4.939446366782005, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'nstarcarol@aol.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '21530 BLACKWOOD COURT SUITE #150 STERLING VA 20166', 'businessPhone': '7034069811', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'PHOENIX FIRE PROTECTION INC.', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410518000155', 'projectCount': 174, 'planReviewScore': 0, 'applicationIntakeScore': 0, 'inspectionScore': 100, 'overAllProjectScore': 100, 'stopWorkOrders': 4, 'planReviewScoreRating': 0, 'applicationIntakeScoreRating': 0, 'inspectionScoreRating': 5, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': '', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '7901 PENN RANDALL PLACE UPPER MARLBORO MD 20772', 'businessPhone': '3016697066', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'JENSON FIRE PROTECTION INC', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410517000309', 'projectCount': 146, 'planReviewScore': 0.632911392405063, 'applicationIntakeScore': 0.632911392405063, 'inspectionScore': 100, 'overAllProjectScore': 100, 'stopWorkOrders': 0, 'planReviewScoreRating': 0.03164556962025315, 'applicationIntakeScoreRating': 0.03164556962025315, 'inspectionScoreRating': 5, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'sung@jensonfireprotection.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '8740 CHERRY LANE UNIT 13 LAUREL MD 20707', 'businessPhone': '', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'LIVINGSTON FIRE PROTECTION INC', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410516000203', 'projectCount': 145, 'planReviewScore': 0, 'applicationIntakeScore': 0, 'inspectionScore': 98.3734939759036, 'overAllProjectScore': 100, 'stopWorkOrders': 5, 'planReviewScoreRating': 0, 'applicationIntakeScoreRating': 0, 'inspectionScoreRating': 4.91867469879518, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'info@livfire.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '5150 LAWRENCE PLACE HYATTSVILLE MD 20781', 'businessPhone': '3017794466', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'RIDGEWAY CORPORATION PROFESSIONAL CORPORATION', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410518000087', 'projectCount': 145, 'planReviewScore': 0.798403193612774, 'applicationIntakeScore': 1.4251497005988, 'inspectionScore': 95.688622754491, 'overAllProjectScore': 100, 'stopWorkOrders': 4, 'planReviewScoreRating': 0.0399201596806387, 'applicationIntakeScoreRating': 0.07125748502994, 'inspectionScoreRating': 4.78443113772455, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'ridgecorpusa@outlook.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '12514 KENSINGTON LANE BOWIE MD 20715', 'businessPhone': '3014642003', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'FORTRESS PROTECTION GROUP', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410518000115', 'projectCount': 124, 'planReviewScore': 0, 'applicationIntakeScore': 0, 'inspectionScore': 99.4932432432432, 'overAllProjectScore': 100, 'stopWorkOrders': 5, 'planReviewScoreRating': 0, 'applicationIntakeScoreRating': 0, 'inspectionScoreRating': 4.97466216216216, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'todd.patterson@fortresspg.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '18618 BROKEN OAK RD BOYDS MD 20841', 'businessPhone': '', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}, {'buildingProfessional': 'PRIME FIRE PROTECTION LLC', 'buildingProfessionalType': 'General-Contractor', 'permitType': None, 'businessName': None, 'contactNumber': '410517000488', 'projectCount': 120, 'planReviewScore': 0, 'applicationIntakeScore': 0, 'inspectionScore': 94.4320987654321, 'overAllProjectScore': 100, 'stopWorkOrders': 20, 'planReviewScoreRating': 0, 'applicationIntakeScoreRating': 0, 'inspectionScoreRating': 4.721604938271605, 'overAllProjectScoreRating': 5, 'useCategory': None, 'businessEmail': 'vmalca@primefireprotection.com', 'imageName': 'noimage.png', 'imageUrl': 'https://govservices.dcra.dc.gov/ProfessionalImages/noimage.png', 'businessAddress': '13549 JAMIESON PL GERMANTOWN MD 20874', 'businessPhone': '3104736189', 'flag': '', 'professionalDisplayName': 'General Contractor', 'webAddress': 'N/A', 'bbb': 'NOT ACCREDITED'}], 'pageIndex': 1, 'pageSize': 10, 'recordCount': 1113}
*/
...
发布于 2021-09-25 04:38:29
您可以使用请求来访问生成响应的url,然后使用漂亮汤来解析它?
https://stackoverflow.com/questions/69323010
复制相似问题