首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >在Excel中使用正则表达式的通用UDF

在Excel中使用正则表达式的通用UDF
EN

Stack Overflow用户
提问于 2013-10-20 18:28:39
回答 2查看 1.6K关注 0票数 4

我需要每周分析、总结和批次几千行文字。Excel通配符不够灵活,我想移除粘贴到Notepad++进行处理或输入脚本的额外步骤。

这是我想出的工具。他们仍然有点慢--也许在公司笔记本电脑上每秒3000行--但它们很方便。

RXMatch --返回第一次匹配,选项返回子组。

代码语言:javascript
复制
=RXMatch("Apple","A(..)",1) -> "pp"

RXCount -比赛数

代码语言:javascript
复制
=RXCount("Apple","p") -> 2

RXPrint --将第一个匹配和/或子组嵌入到模板字符串中

代码语言:javascript
复制
=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"

RXPrintAll --将每个匹配嵌入到模板字符串中,加入结果

代码语言:javascript
复制
=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"

RXMatches --返回一个垂直的匹配数组,选项返回子组

代码语言:javascript
复制
=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}
EN

回答 2

Stack Overflow用户

回答已采纳

发布于 2013-10-20 18:28:39

RXMatch

代码语言:javascript
复制
Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns the matching text
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        If (Group > 0) Then
            retval = Matches(0).submatches(Group - 1)
        Else
            retval = Matches(0)
        End If
    Else
        retval = ""
    End If

    RXMatch = retval
End Function

RXCount

代码语言:javascript
复制
Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
    Dim retval As Integer
    ' Counts the number of matches
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True

    RE.Pattern = Pattern
    Set Matches = RE.Execute(Text)

    retval = Matches.Count

    RXCount = retval
End Function

RXPrint

代码语言:javascript
复制
Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, using the first match found
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArray(0 To MatchesTemplate.Count - 1) As String
        Dim escaped As String
        Dim plaintext As String
        For i = 0 To MatchesTemplate.Count - 1
            escaped = MatchesTemplate(i).submatches(0)
            plaintext = MatchesTemplate(i).submatches(1)
            If (Len(escaped) > 0) Then
                If (IsNumeric(escaped)) Then
                    Dim groupnum As Integer
                    groupnum = CInt(escaped)
                    If groupnum = 0 Then
                        retArray(i) = MatchesText(0)
                    ElseIf (groupnum > MatchesText(0).submatches.Count) Then
                        retArray(i) = "?"
                    Else
                        retArray(i) = MatchesText(0).submatches(groupnum - 1)
                    End If
                Else
                    retArray(i) = escaped
                End If
            Else
                retArray(i) = plaintext
            End If
        Next i
        retval = Join(retArray, "")
    Else
        retval = ""
    End If

    RXPrint = retval
End Function

RXPrintAll

代码语言:javascript
复制
Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, repeated for each match
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' Delimiter (optional) specified how the results will be joined
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Global = True
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArrays(0 To MatchesText.Count - 1)
        For j = 0 To MatchesText.Count - 1
            ReDim retArray(0 To MatchesTemplate.Count - 1) As String
            Dim escaped As String
            Dim plaintext As String
            For i = 0 To MatchesTemplate.Count - 1
                escaped = MatchesTemplate(i).submatches(0)
                plaintext = MatchesTemplate(i).submatches(1)
                If (Len(escaped) > 0) Then
                    If (IsNumeric(escaped)) Then
                        Dim groupnum As Integer
                        groupnum = CInt(escaped)
                        If groupnum = 0 Then
                            retArray(i) = MatchesText(j)
                        ElseIf (groupnum > MatchesText(j).submatches.Count) Then
                            retArray(i) = "?"
                        Else
                            retArray(i) = MatchesText(j).submatches(groupnum - 1)
                        End If
                    Else
                        retArray(i) = escaped
                    End If
                Else
                    retArray(i) = plaintext
                End If
            Next i
            retArrays(j) = Join(retArray, "")
        Next j
        retval = Join(retArrays, Delimiter)
    Else
        retval = ""
    End If
    RXPrintAll = retval
End Function

RXMatches

代码语言:javascript
复制
Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
    Dim retval() As String
    ' Takes a string and returns all matches in a vertical array
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        ReDim retval(0 To Matches.Count - 1)
        For i = 0 To Matches.Count - 1
            If (Group > 0) Then
                retval(i) = Matches(i).submatches(Group - 1)
            Else
                retval(i) = Matches(i)
            End If
        Next i
    Else
        ReDim retval(1)
        retval(0) = ""
    End If

    RXMatches = Application.Transpose(retval)
End Function
票数 3
EN

Stack Overflow用户

发布于 2020-06-26 19:14:57

在处理UDF时,缓存创建的对象至关重要。

例如:

代码语言:javascript
复制
Public Function RegexTest(ByVal vHaystack As Variant, ByVal sPattern As String, Optional ByVal sFlags As String = "") As Boolean
    'If haystack is an error then return false
    If IsError(vHaystack) Then Exit Function
    
    'Stringify haystack
    Dim sHaystack As String: sHaystack = vHaystack
    
    'Cache regular expressions, especially important for formulae
    Static lookup As Object
    If lookup Is Nothing Then Set lookup = CreateObject("Scripting.Dictionary")
    
    'If cached object doesn't exist, create it
    Dim sKey As String: sKey = sPattern & "-" & sFlags
    If Not lookup.exists(sKey) Then
        'Create regex object
        Set lookup(sKey) = CreateObject("VBScript.Regexp")
        
        'Bind flags
        For i = 1 To Len(sFlags)
            Select Case Mid(sFlags, i, 1)
                Case "i"
                    lookup(sKey).IgnoreCase = True
                Case "g"
                    lookup(sKey).Global = True
            End Select
        Next
        
        'Set pattern
        lookup(sKey).Pattern = sPattern
    End If
    
    'Use test function of regex object
    RegexTest = lookup(sKey).test(sHaystack)

End Function

将其应用到您自己的函数中,您将看到这极大地提高了大量单元格上的执行速度。

票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/19481175

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档