我有大约17k封电子邮件,其中包含订单,新闻,联系人等,可以追溯到11年前。
用户的电子邮件地址被非法加密,通过将@
更改为*@*
或'at'
来阻止爬虫和垃圾邮件。
我试图创建一个逗号分隔列表,以建立我们的用户数据库。
代码用于编写文件和循环文件夹,因为如果我将发件人的电子邮件地址写入我当前使用电子邮件正文的文件,那么它就会打印得很好。
问题是,Replace
没有将*at*
等改为@
。
Private Sub Form_Load()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objInbox As MAPIFolder
Dim objFolder As MAPIFolder
Dim fldName As String
fldName = "TEST"
' Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
' Pick up the Inbox
Set objInbox = objNameSpace.GetDefaultFolder(olFolderInbox)
'Loop through the folders under the Inbox
For Each objFolder In objInbox.Folders
RecurseFolders fldName, objFolder
Next objFolder
End Sub
Public Sub RecurseFolders(targetFolder As String, currentFolder As MAPIFolder)
If currentFolder.Name = targetFolder Then
GetEmails currentFolder
Else
Dim objFolder As MAPIFolder
If currentFolder.Folders.Count > 0 Then
For Each objFolder In currentFolder.Folders
RecurseFolders targetFolder, objFolder
Next
End If
End If
End Sub
Sub WriteToATextFile(e As String)
MyFile = "c:\" & "emailist.txt"
'set and open file for output
fnum = FreeFile()
Open MyFile For Append As fnum
Print #fnum, e; ","
Close #fnum
End Sub
Sub GetEmails(folder As MAPIFolder)
Dim objMail As MailItem
' Read through all the items
For i = 1 To folder.Items.Count
Set objMail = folder.Items(i)
GetEmail objMail.Body
Next i
End Sub
Sub GetEmail(s As String)
Dim txt = s
Do Until InStr(txt, "@") <= 0
Dim tleft As Integer
Dim tright As Integer
Dim start As Integer
Dim text As String
Dim email As String
text = Replace(text, " at ", "@", VbCompareMethod.vbTextCompare)
text = Replace(text, "'at'", "@", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "@", VbCompareMethod.vbTextCompare)
text = Replace(text, "*at*", "@", VbCompareMethod.vbTextCompare)
text = Replace(text, "<", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ">", " ", VbCompareMethod.vbTextCompare)
text = Replace(text, ":", " ", VbCompareMethod.vbTextCompare)
'one two ab@bd.com one two
tleft = InStr(text, "@") '11
WriteToATextFile Str(tleft)
WriteToATextFile Str(Len(text))
start = InStrRev(text, " ", Len(text) - tleft)
'WriteToATextFile Str(start)
'WriteToATextFile Str(Len(text))
'start = Len(text) - tleft
text = left(text, start)
'ab@bd.com one two
tright = InStr(text, " ") '9
email = left(text, tright)
WriteToATextFile email
text = right(text, Len(text) - Len(email))
GetEmail txt
Loop
End Sub
发布于 2011-12-03 06:17:27
我已经破解了这一点来提取电子邮件,例如下面的示例,它将把下面示例消息中的三个黄色电子邮件地址取出到一个csv文件中。
任何有效的电子邮件都被写入csv文件Set objTF = objFSO.createtextfile("c:\myemail.csv")
temp
的Inbox
文件夹中的所有电子邮件我删除了测试的递归部分,simplicity
strMsgBody = Replace(strMsgBody, Chr(160), Chr(32)
(不太可能,但在my testing)
"(\s+at\s+|'at'|<at>|\*at\*|at)"
< code >h 118Regex2 2转换任何“点”。"(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
.Pattern = "[<:>]"
objTF.writeline objRegM
写入csv文件。
代码如下
Public Test()
Dim objOutlook As New Outlook.Application
Dim objNameSpace As Outlook.NameSpace
Dim objFolder As MAPIFolder
Dim strfld As String
Dim objRegex As Object
Dim objRegMC As Object
Dim objRegM As Object
Dim objFSO As Object
Dim oMailItem As MailItem
Dim objTF As Object
Dim strMsgBody As String
Set objRegex = CreateObject("vbscript.regexp")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objTF = objFSO.createtextfile("c:\myemail.csv")
With objRegex
.Global = True
.MultiLine = True
.ignorecase = True
strfld = "temp"
'Get the MAPI reference
Set objNameSpace = objOutlook.GetNamespace("MAPI")
'Pick up the Inbox
Set objFolder = objNameSpace.GetDefaultFolder(olFolderInbox)
Set objFolder = objFolder.Folders(strfld)
For Each oMailItem In objFolder.Items
strMsgBody = oMailItem.Body
strMsgBody = Replace(strMsgBody, Chr(160), Chr(32))
.Pattern = "(\s+at\s+|'at'|<at>|\*at\*|at)"
strMsgBody = .Replace(strMsgBody, "@")
.Pattern = "(\s+dot\s+|'dot'|<dot>|\*dot\*|dot)"
strMsgBody = .Replace(strMsgBody, ".")
.Pattern = "[<:>]"
strMsgBody = .Replace(strMsgBody, vbNullString)
.Pattern = "[\w-\.]{1,}\@([\da-zA-Z-]{1,}\.){1,}[\da-zA-Z-]{2,3}"
If .Test(strMsgBody) Then
Set objRegMC = .Execute(strMsgBody)
For Each objRegM In objRegMC
objTF.writeline objRegM
Next
End If
Next
End With
objTF.Close
End Sub
发布于 2011-12-02 20:30:41
如何使用regex (正则表达式)?
类似于:
Public Function ReplaceAT(ByVal sInput as String)
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "( at |'at'|<at>)"
End With
ReplaceAT = RegEx.Replace(sInput, "@")
Set RegEx = Nothing
End Function
只需将regexp替换为您可能得到的所有情况。
有关更多提示和信息,请参见http://www.regular-expressions.info/。
https://stackoverflow.com/questions/8361142
复制相似问题