文章/答案/技术大牛

发布

社区首页 >问答首页 >如何使用VBA从多个Word文档中提取数据，以基于关键字进行excel行？

问如何使用VBA从多个Word文档中提取数据，以基于关键字进行excel行？
EN

Stack Overflow用户

提问于 2022-05-24 13:04:49

回答 2查看 309关注 0票数 -1

我从@宏荚获得了这个VBA代码，用于从下面VBA代码的"strFolder“变量中的文件夹中提取各种Word文件中的数据，但我只能提取位于关键字前面的数据，就像"TRABALHO”(附图)一词中的那样，VBA代码从前面提取内容，但我不能从下面提取数据，例如：“CONSTATA”，我无法从下面提取文本，如果有人能帮我的话，我会很感激。就在下面，我还捕获了我必须用来提取数据的文档。

图像文字文档

Sub GetData()
'Note: this code requires a reference to the Word object model.
'See under the VBA Editor's Tools|References.
Application.ScreenUpdating = False
Dim WkSht As Worksheet, r As Long, c As Long
Dim wdApp As New Word.Application, wdDoc As Word.Document
Dim strFile As String, strFolder As String, strOut As String, StrFnd
strFolder = "C:\Users\" & Environ("UserName") & "\Desktop\Macro VBA - Trabalhos Sequenciais\Trabalhos\"
StrFnd = Array("", "", "TRABALHO", "SEQUENCIAL", "REGISTRO", "DATA DA IMPLEMENTAÇÃO", "PRAZO PARA EFETIVAÇÃO", _
"DATA DA EFETIVAÇÃO", "RESPONSÁVEL PELA ANÁLISE", "REVISOR", "CONSTATAÇÃO")
Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
strFile = Dir(strFolder & "\*.doc", vbNormal)
While strFile <> ""
  r = r + 1
  Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
  WkSht.Cells(r, 1).Value = Split(strFile, ".doc")(0)
  With wdDoc
    For c = 2 To UBound(StrFnd)
      With .Range
        With .Find
          .ClearFormatting
          .Replacement.ClearFormatting
          .Replacement.Text = ""
          .Forward = True
          .Format = False
          .MatchCase = False
          .MatchWildcards = False
          .Text = StrFnd(c)
          .Wrap = wdFindContinue
          .Execute
        End With
        If .Find.Found = True Then
          .End = .Paragraphs(1).Range.End
          .Start = .Start + Len(StrFnd(c))
          strOut = Trim(Replace(Replace(Replace(Split(.Text, vbCr)(0), vbTab, " "), Chr(11), " "), Chr(160), " "))
          Do While strOut = ""
            .Collapse wdCollapseEnd
            .MoveEnd wdParagraph, 1
            strOut = Trim(Replace(Replace(Replace(Split(.Text, vbCr)(0), vbTab, " "), Chr(11), " "), Chr(160), " "))
          Loop
          WkSht.Cells(r, c).Value = strOut
        End If
      End With
    Next
    .Close SaveChanges:=False
  End With
  strFile = Dir()
Wend
wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub

excel

vba

ms-word

回答 2

Stack Overflow用户

发布于 2022-05-25 00:17:00

据我所知，您的数据位于文档前三个表中的各个单元格中。在这种情况下，您需要这样的东西：

Sub GetData()
'Note: this code requires a reference to the Word object model.
'See under the VBA Editor's Tools|References.
Application.ScreenUpdating = False
Dim WkSht As Worksheet, r As Long, c As Long
Dim wdApp As New Word.Application, wdDoc As Word.Document, wdRng As Word.Range
Dim strFile As String, strFolder As String
strFolder = "C:\Users\" & Environ("UserName") & "\Desktop\Macro VBA - Trabalhos Sequenciais\Trabalhos\"
Set WkSht = ActiveSheet: r = WkSht.Cells(WkSht.Rows.Count, 1).End(xlUp).Row
strFile = Dir(strFolder & "\*.doc", vbNormal)
While strFile <> ""
  r = r + 1: c = 1
  Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile, AddToRecentFiles:=False, Visible:=False)
  WkSht.Cells(r, c).Value = Split(strFile, ".doc")(0)
  With wdDoc
    With .Tables(1)
      Set wdRng = .Cells(1, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(1, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(1, 3).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
    End With
    With .Tables(2)
      Set wdRng = .Cells(1, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(1, 4).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(2, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(3, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
      Set wdRng = .Cells(4, 2).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
    End With
    With .Tables(3)
      Set wdRng = .Cells(2, 1).Range: c = c + 1
      WkSht.Cells(r, c).Value = Split(Rng.Text, vbCr)(0)
    End With
    .Close SaveChanges:=False
  End With
  strFile = Dir()
Wend
wdApp.Quit
Set wdRng = Nothing: Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub

票数 0

Stack Overflow用户

发布于 2022-07-16 18:18:29

我可以回答如何从多个word文件中提取数据并将其写入文本文件。

下面是从多个文件夹和子文件夹中提取多个word文件的代码

import glob
import docx2txt as d2t


input_dir=r"D:\Doc scraping\xyz"                      
filepaths=list(glob.glob(input_dir+"\**\*.docx", recursive=True))

def extract_data_from_docx(path_to_file, get_text=False):

text = d2t.process(path_to_file) 

 if(get_text):       #Defining a function to extract text from docx file
    return text
data=""

for filepath in filepaths:

  data = data + extract_data_from_docx(filepath, get_text=True)     

print(data)

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/72363439

复制

相似问题

问如何使用VBA从多个Word文档中提取数据，以基于关键字进行excel行？
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何使用VBA从多个Word文档中提取数据，以基于关键字进行excel行？EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何使用VBA从多个Word文档中提取数据，以基于关键字进行excel行？
EN