我不确定为什么我的代码不能工作(从我试图从中提取信息的网站的HTMLDoc中返回公司名称、电话号码和联系方式)。你能帮我找出我做错了什么吗(最有可能的是IHTMLElement和IHTMLElementCollection数据类型,和/或通过getElementsByTagName,getElementsByClassName等访问超文本标记语言)。谢谢你!!
Option Explicit
Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"
'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String
'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement
Dim itemprop As String
Dim itemprop2 As String
Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement
Dim RowNumber As Long
'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all
'loop for business names
For Each BusinessName In BusinessNameCollection
Cells(RowNumber, 1).Value = BusinessName
RowNumber = RowNumber + 1
Next BusinessName
'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
If itemprop = "streetAddress" Then
Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
ElseIf itemprop = "addressLocality" Then
Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "addressRegion" Then
Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "postalCode" Then
Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all
'loop for business addresses
For Each BusinessAddress In BusinessAddressCollection
BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
Cells(RowNumber, 2).Value = BusinessAddress
RowNumber = RowNumber + 1
Next BusinessAddress
'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
If itemprop2 = "telephone" Then
BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all
'loop for business phones
For Each BusinessPhone In BusinessPhoneCollection
Cells(RowNumber, 3).Value = BusinessPhone
RowNumber = RowNumber + 1
Next BusinessPhone
Range("A1").Activate
Set HTMLDoc = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "Manta.com Business Contacts"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
发布于 2018-05-03 21:19:58
这将提取信息。您没有在代码中循环所有的结果页面,也没有提到它,所以我设置这个页面是为了向您展示如何执行结果的第一页。让我知道事情进展如何。
代码:
Option Explicit
Public Sub FinalMantaSub() '<== Can't have ad blocker enabled for this site
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Dim c As Object, i As Long
Set c = HTMLDoc.querySelectorAll("div.media-body")
Do While Not c(i) Is Nothing
Debug.Print "Result #" & i + 1
Debug.Print vbNewLine
Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
Debug.Print String$(20, Chr$(61))
i = i + 1
Loop
IE.Quit
End Sub
输出的快照:
更新:
有大量的结果,但您可以有一个外部循环,如下所示。然后,您可以将上面的代码转换为名为的sub。
Dim arr() As String, pageNo As Long
arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
pageNo = 1
Do While Err.Number = 0
On Error GoTo Errhand:
Dim url As String
url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
Debug.Print url
IE.navigate url
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
pageNo = pageNo + 1
Loop
Errhand:
Debug.Print "Stopped after " & pageNo & " pages."
https://stackoverflow.com/questions/50144431
复制相似问题