想要使用VBA来处理customUI.xml,必须要实现编码转换的功能。
关于编码方法的知识,建议网上找找资料看看,UTF-8与UCS2之间是有规律的,完全可以根据位移来实现编码的转换。
首先声明一些需要用到的常量:
Private Const b_1000_0000 As Byte = 128
Private Const b_1100_0000 As Byte = 192
Private Const b_1110_0000 As Byte = 224
Private Const b_1111_0000 As Byte = 240
Private Const b_0001_1100 As Byte = 28
Private Const b_0000_0111 As Byte = 7
Private Const b_0000_0011 As Byte = 3
Private Const b_0011_1111 As Byte = 63
Private Const b_0000_1111 As Byte = 15
Private Const b_0011_1100 As Byte = 60
Private Const b_0000_0010 As Byte = 2
01
UTF-8转UCS2
'// UCS-2转UTF-8
'// 1 对于不大于0x007F(即00000000 01111111)的,直接把它转成一个字节,变成ASCII
'// 2 对于不大于0x07FF(即00000111 11111111)的,转换成两个字节
'// 转换的时候把右边的11位分别放到110xxxxx 10yyyyyy里边
'// 即0000 0aaa bbbb bbbb ==> 110a aabb 10bb bbbb
'// 3 剩下的会转换成三个字节,转换的时候也是把16个位分别填写到那三个字节里面
'// 即aaaaaaaa bbbbbbbb ==> 1110 aaaa 10aa aabb 10bb bbbb
Function ToUTF8(SrcUCS2() As Byte, RetUTF8() As Byte) As String
Dim ilensrc As Long
ilensrc = UBound(SrcUCS2) + 1
If ilensrc < 2 Then
ToUTF8 = "输入的UCS2字节数组太小了!"
Exit Function
End If
Dim i As Long
Dim iStart As Long
'如果是从txt文件中读取的,可能会有BOM头
If SrcUCS2(i) = &HFF And SrcUCS2(i + 1) = &HFE Then
iStart = 2
End If
If ilensrc Mod 2 Then
ToUTF8 = "输入的UCS2字节数组不是偶数!"
Exit Function
End If
ReDim RetUTF8(ilensrc / 2 * 3 - 1) As Byte
Dim p As Long
Dim tmp As Long
Dim l1 As Long, l2 As Long
For i = iStart To ilensrc - 1 Step 2
l1 = VBA.CLng(SrcUCS2(i + 1))
l2 = VBA.CLng(SrcUCS2(i))
tmp = l1 * 2 ^ 8 Or l2
If tmp <= &H7F Then
RetUTF8(p) = VBA.CByte(tmp)
p = p + 1
ElseIf tmp <= &H7FF Then
RetUTF8(p) = b_1100_0000 Or (SrcUCS2(i + 1) * (2 ^ 2)) Or (SrcUCS2(i) \ (2 ^ 6))
p = p + 1
RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
p = p + 1
Else
RetUTF8(p) = b_1110_0000 Or (SrcUCS2(i + 1) \ (2 ^ 4))
p = p + 1
RetUTF8(p) = b_1000_0000 Or ((SrcUCS2(i + 1) And b_0000_1111) * (2 ^ 2)) Or (SrcUCS2(i) \ (2 ^ 6))
p = p + 1
RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
p = p + 1
End If
Next
ReDim Preserve RetUTF8(p - 1) As Byte
End Function
02
UCS2转UTF-8
Function FromUTF8(SrcUTF8() As Byte, RetUCS2() As Byte) As String
Dim ilensrc As Long
ilensrc = UBound(SrcUTF8) + 1
Dim i As Long
Dim iStart As Long
'如果是从txt文件中读取的,可能会有BOM头
If SrcUTF8(i) = &HEF And SrcUTF8(i + 1) = &HBB And SrcUTF8(i + 2) = &HBF Then
iStart = 3
End If
ReDim RetUCS2(ilensrc * 2 - 1) As Byte
Dim p As Long
Dim tmp As Long
Dim b1 As Byte, b2 As Byte, b3 As Byte
i = iStart
Do While i < ilensrc
b1 = SrcUTF8(i)
i = i + 1
'UCS2 只有2个字节,只能转换3字节以下的UTF8
If b1 >= b_1111_0000 Then
FromUTF8 = "UCS2 只有2个字节,只能转换3字节以下的UTF8"
Exit Function
ElseIf b1 >= b_1110_0000 Then
'// 1110 aaaa 10bb bbbb 10cc cccc ==> aaaa bbbb bbcc cccc
'// 需要再读取2个字节
b2 = SrcUTF8(i)
i = i + 1
b3 = SrcUTF8(i)
i = i + 1
b1 = ((b1 And b_0000_1111) * 2 ^ 4) Or ((b2 And b_0011_1111) \ 2 ^ 2)
b2 = ((b2 And b_0000_0011) * 2 ^ 6) Or (b3 And b_0011_1111)
ElseIf b1 >= b_1100_0000 Then
'// 110a aaaa 10bb bbbb ==> 0000 0aaa aabb bbbb
'// 需要再读取1个字节
b2 = SrcUTF8(i)
i = i + 1
b2 = ((b1 And b_0000_0011) * 2 ^ 6) Or (b2 And b_0011_1111)
b1 = (b1 And b_0011_1111) \ 2 ^ 2
Else
'// 0aaa aaaa ==> 0000 0000 0aaa aaaa
b2 = b1
b1 = 0
End If
RetUCS2(p) = b2
RetUCS2(p + 1) = b1
p = p + 2
Loop
ReDim Preserve RetUCS2(p - 1) As Byte
End Function