我正在excel上编写一个VBA代码,使用循环遍历10000+ lines。
下面是表的一个示例

这是我写的代码:
Sub Find_Matches()
Dim wb As Workbook
Dim xrow As Long
Set wb = ActiveWorkbook
wb.Worksheets("Data").Activate
tCnt = Sheets("Data").UsedRange.Rows.Count
Dim e, f, a, j, h As Range
xrow = 2
Application.ScreenUpdating = False
Application.Calculation = xlManual
For xrow = 2 To tCnt Step 1
Set e = Range("E" & xrow)
Set f = e.Offset(0, 1)
Set a = e.Offset(0, -4)
Set j = e.Offset(0, 5)
Set h = e.Offset(0, 3)
For Each Cell In Range("E2:E" & tCnt)
If Cell.Value = e.Value Then
If Cell.Offset(0, 1).Value = f.Value Then
If Cell.Offset(0, -4).Value = a.Value Then
If Cell.Offset(0, 5).Value = j.Value Then
If Cell.Offset(0, 3).Value = h.Value Then
If (e.Offset(0, 7).Value) + (Cell.Offset(0, 7).Value) = 0 Then
Cell.EntireRow.Interior.Color = vbYellow
e.EntireRow.Interior.Color = vbYellow
End If
End If
End If
End If
End If
End If
Next
Next
End Sub正如您可以想象的那样,这需要花费大量的时间来遍历10000+行,我想找到一个更快的解决方案。我认为一定有一种方法可以避免过度循环
以下是条件:
对于每一行,如果文件中的另一行具有完全相同的内容:
然后,如果金额之和。( L)这两条匹配线为0,然后将两行都涂成黄色。
请注意,额外的列是存在的,没有被比较(例如- col )。( B)但对文件仍然很重要,不能删除,以简化程序。
运行前面的代码,在我的示例中,突出显示第2行和第5行:

发布于 2018-06-13 11:26:48
这是使用嵌套字典和数组检查所有条件。
使用我的测试数据的计时器:Rows: 100,001; Dupes: 70,000 - Time: 14.217 sec
Option Explicit
Public Sub FindMatches()
Const E = 5, F = 6, A = 1, J = 10, H = 8, L = 12
Dim ur As Range, x As Variant, ub As Long, d As Object, found As Object
Set ur = ThisWorkbook.Worksheets("Data").UsedRange
x = ur
Set d = CreateObject("Scripting.Dictionary")
Set found = CreateObject("Scripting.Dictionary")
Dim r As Long, rId As String, itm As Variant, dupeRows As Object
For r = ur.Row To ur.Rows.Count
rId = x(r, E) & x(r, F) & x(r, A) & x(r, J) & x(r, H)
If Not d.Exists(rId) Then
Set dupeRows = CreateObject("Scripting.Dictionary")
dupeRows(r) = 0
Set d(rId) = dupeRows
Else
For Each itm In d(rId)
If x(r, L) + x(itm, L) = 0 Then
found(r) = 0
found(itm) = 0
End If
Next
End If
Next
Application.ScreenUpdating = False
For Each itm In found
ur.Range("A" & itm).EntireRow.Interior.Color = vbYellow
Next
Application.ScreenUpdating = True
End Sub在此之前

之后

发布于 2018-06-13 07:04:02
我建议一种完全不同的方法:向数据中添加一个临时列,其中包含行中每个单元格的连接。这样,你就有:
A|B|C|D|E
1|Mr. Smith|500|A|1Mr. Smith500A然后在临时列上使用Excel的条件格式,突出显示重复的值。这里有你的复本行。现在只需要使用过滤器来检查哪些值等于零。
您可以使用级联函数;它要求您分别指定每个单元格,并且不能使用范围,但在您的情况下(只比较一些列),这似乎是一个很好的选择。
发布于 2018-06-13 10:37:12
Maciej的答案很容易实现(如果您可以在不中断任何内容的情况下将列添加到您的数据中),如果可能的话,我建议您这样做。
不过,为了回答您的问题,我也将提供一个VBA解决方案。我在比你的小一点的数据集上测试了它,但我认为它会对你有用。请注意,您可能需要稍微调整一下它(从哪一行开始,表名等等)才能适合您的工作簿。
最值得注意的是,用"Helper列“注释的段最有可能需要调整--目前,它比较当前行的A和H之间的每个单元格,这是您可能想要的,也可能不是您想要的。
我试着在代码中加入一点评论,但并不多。主要的变化是,我使用的是数组的内存中处理,而不是迭代工作表范围(对于较大的数据集,这应该是指数级的)。
Option Base 1
Option Explicit
' Uses ref Microsoft Scripting Runtime
Sub Find_Matches()
Dim wb As Workbook, ws As Worksheet
Dim xrow As Long, tCnt As Long
Dim e As Range, f As Range, a As Range, j As Range, h As Range
Dim sheetArr() As Variant, arr() As Variant
Dim colorTheseYellow As New Dictionary, colorResults() As String, dictItem As Variant
Dim arrSize As Long, i As Long, k As Long
Dim c As Variant
Set wb = ThisWorkbook
Set ws = wb.Worksheets("Data")
ws.Activate
tCnt = ws.UsedRange.Rows.Count
xrow = 2
Application.ScreenUpdating = False
Application.Calculation = xlManual
' Read range into an array so we process in-memory
sheetArr = ws.Range("A2:H" & tCnt)
arrSize = UBound(sheetArr, 1)
' Build new arr with "helper column"
ReDim arr(1 To arrSize, 1 To 9)
For i = 1 To arrSize
For k = 1 To 8
arr(i, k) = sheetArr(i, k)
arr(i, 9) = CStr(arr(i, 9)) & CStr(arr(i, k)) ' "Helper column"
Next k
Next i
' Iterate over array & build collection to indicate yellow lines
For i = LBound(arr, 1) To UBound(arr, 1)
If Not colorTheseYellow.Exists(i) Then colorResults = Split(ReturnLines(arr(i, 9), arr), ";")
For Each c In colorResults
If Not colorTheseYellow.Exists(CLng(c)) Then colorTheseYellow.Add CLng(c), CLng(c)
Next c
Next i
' Enact row colors
For Each dictItem In colorTheseYellow
'Debug.Print "dict: "; dictItem
If dictItem <> 0 Then ws.ListObjects(1).ListRows(CLng(dictItem)).Range.Interior.Color = vbYellow
Next dictItem
End Sub
Function ReturnLines(ByVal s As String, ByRef arr() As Variant) As String
' Returns a "Index;Index" string indicating the index/indices where the second, third, etc. instance(s) of s was found
' Returns "0;0" if 1 or fewer matches
Dim i As Long
Dim j As Long
Dim tmp As String
ReturnLines = 0
j = 0
tmp = "0"
'Debug.Print "arg: " & s
For i = LBound(arr, 1) To UBound(arr, 1)
If arr(i, 9) = s Then
j = j + 1
'Debug.Print "arr: " & arr(i, 9)
'Debug.Print "ReturnLine: " & i
tmp = tmp & ";" & CStr(i)
End If
Next i
'If Left(tmp, 1) = ";" Then tmp = Mid(tmp, 2, Len(tmp) - 1)
'Debug.Print "tmp: " & tmp
If j >= 2 Then
ReturnLines = tmp
Else
ReturnLines = "0;0"
End If
End Function在我的简单数据集上,它得到了这个结果(用手工绘制的颜色指示器标记得很好):

https://stackoverflow.com/questions/50830464
复制相似问题