我从@宏荚获得了这个VBA代码,用于从下面VBA代码的"strFolder“变量中的文件夹中提取各种Word文件中的数据,但我只能提取位于关键字前面的数据,就像"TRABALHO”(附图)一词中的那样,VBA代码从前面提取内容,但我不能从下面提取数据,例如:“CONSTATA”,我无法从下面提取文本,如果有人能帮我的话,我会很感激。就在下面,我还捕获了我必须用来提取数据的文档。
Sub GetData()
'Note: this code requires a reference to the Word object model.
'See under the VBA Editor's Tools|References.
Application.ScreenUpdating = False
Dim WkSht As Worksheet, r As Long, c As Long
Dim wdApp As New Word.Application, wdDoc As Word.Document
Dim strFile As String, strFolder As String, strOut As String, StrFnd
strFolder = "C:\Users\" & Environ("UserName") & "\Desktop\Macro VBA - Trabalhos Sequenciais\Trabalhos\"
StrFnd = Array("", "", "TRABALHO", "SEQUENCIAL", "REGISTRO", "DATA DA IMPLEMENTAÇÃO", "PRAZO PARA EFETIVAÇÃO", _
"DATA DA EFETIVAÇÃO", "RESPONSÁVEL PELA ANÁLISE", "REVISOR", "CONSTATAÇÃO")
Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
strFile = Dir(strFolder & "\*.doc", vbNormal)
While strFile <> ""
r = r + 1
Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
WkSht.Cells(r, 1).Value = Split(strFile, ".doc")(0)
With wdDoc
For c = 2 To UBound(StrFnd)
With .Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Replacement.Text = ""
.Forward = True
.Format = False
.MatchCase = False
.MatchWildcards = False
.Text = StrFnd(c)
.Wrap = wdFindContinue
.Execute
End With
If .Find.Found = True Then
.End = .Paragraphs(1).Range.End
.Start = .Start + Len(StrFnd(c))
strOut = Trim(Replace(Replace(Replace(Split(.Text, vbCr)(0), vbTab, " "), Chr(11), " "), Chr(160), " "))
Do While strOut = ""
.Collapse wdCollapseEnd
.MoveEnd wdParagraph, 1
strOut = Trim(Replace(Replace(Replace(Split(.Text, vbCr)(0), vbTab, " "), Chr(11), " "), Chr(160), " "))
Loop
WkSht.Cells(r, c).Value = strOut
End If
End With
Next
.Close SaveChanges:=False
End With
strFile = Dir()
Wend
wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub
发布于 2022-05-25 00:17:00
据我所知,您的数据位于文档前三个表中的各个单元格中。在这种情况下,您需要这样的东西:
Sub GetData()
'Note: this code requires a reference to the Word object model.
'See under the VBA Editor's Tools|References.
Application.ScreenUpdating = False
Dim WkSht As Worksheet, r As Long, c As Long
Dim wdApp As New Word.Application, wdDoc As Word.Document, wdRng As Word.Range
Dim strFile As String, strFolder As String
strFolder = "C:\Users\" & Environ("UserName") & "\Desktop\Macro VBA - Trabalhos Sequenciais\Trabalhos\"
Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
strFile = Dir(strFolder & "\*.doc", vbNormal)
While strFile <> ""
r = r + 1: c = 1
Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
WkSht.Cells(r, c).Value = Split(strFile, ".doc")(0)
With wdDoc
With .Tables(1)
Set wdRng = .Cells(1, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(1, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(1, 3).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
End With
With .Tables(2)
Set wdRng = .Cells(1, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(1, 4).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(2, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(3, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
Set wdRng = .Cells(4, 2).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
End With
With .Tables(3)
Set wdRng = .Cells(2, 1).Range: c = c + 1
WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
End With
.Close SaveChanges:=False
End With
strFile = Dir()
Wend
wdApp.Quit
Set wdRng = Nothing: Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub
发布于 2022-07-16 18:18:29
我可以回答如何从多个word文件中提取数据并将其写入文本文件。
下面是从多个文件夹和子文件夹中提取多个word文件的代码
import glob
import docx2txt as d2t
input_dir=r"D:\Doc scraping\xyz"
filepaths=list(glob.glob(input_dir+"\**\*.docx", recursive=True))
def extract_data_from_docx(path_to_file, get_text=False):
text = d2t.process(path_to_file)
if(get_text): #Defining a function to extract text from docx file
return text
data=""
for filepath in filepaths:
data = data + extract_data_from_docx(filepath, get_text=True)
print(data)
https://stackoverflow.com/questions/72363439
复制相似问题