前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >VBA编写Ribbon Custom UI编辑器02——编码转换

VBA编写Ribbon Custom UI编辑器02——编码转换

作者头像
xyj
发布2020-08-14 11:12:45
8560
发布2020-08-14 11:12:45
举报
文章被收录于专栏:VBA 学习
在Office文件的ZIP压缩包里,解压之后,customUI.xml的编码是UTF-8,VBA中的编码是UCS2(Unicode的学名是"Universal Multiple-Octet Coded Character Set",简称为UCS,VBA中使用的UCS2就是用两个字节编码)。

想要使用VBA来处理customUI.xml,必须要实现编码转换的功能。

关于编码方法的知识,建议网上找找资料看看,UTF-8与UCS2之间是有规律的,完全可以根据位移来实现编码的转换。

首先声明一些需要用到的常量:

代码语言:javascript
复制
Private Const b_1000_0000 As Byte = 128
Private Const b_1100_0000 As Byte = 192
Private Const b_1110_0000 As Byte = 224
Private Const b_1111_0000 As Byte = 240
Private Const b_0001_1100 As Byte = 28
Private Const b_0000_0111 As Byte = 7
Private Const b_0000_0011 As Byte = 3
Private Const b_0011_1111 As Byte = 63
Private Const b_0000_1111 As Byte = 15
Private Const b_0011_1100 As Byte = 60
Private Const b_0000_0010 As Byte = 2

01

UTF-8转UCS2

代码语言:javascript
复制
'// UCS-2转UTF-8
'// 1 对于不大于0x007F(即00000000 01111111)的,直接把它转成一个字节,变成ASCII
'// 2 对于不大于0x07FF(即00000111 11111111)的,转换成两个字节
'//   转换的时候把右边的11位分别放到110xxxxx 10yyyyyy里边
'//   即0000 0aaa bbbb bbbb ==> 110a aabb   10bb bbbb
'// 3 剩下的会转换成三个字节,转换的时候也是把16个位分别填写到那三个字节里面
'//   即aaaaaaaa bbbbbbbb ==> 1110 aaaa   10aa aabb   10bb bbbb
Function ToUTF8(SrcUCS2() As Byte, RetUTF8() As Byte) As String
    Dim ilensrc As Long
    ilensrc = UBound(SrcUCS2) + 1
    
    If ilensrc < 2 Then
        ToUTF8 = "输入的UCS2字节数组太小了!"
        Exit Function
    End If
    
    Dim i As Long
    Dim iStart As Long
    '如果是从txt文件中读取的,可能会有BOM头
    If SrcUCS2(i) = &HFF And SrcUCS2(i + 1) = &HFE Then
        iStart = 2
    End If
    
    If ilensrc Mod 2 Then
        ToUTF8 = "输入的UCS2字节数组不是偶数!"
        Exit Function
    End If
    
    ReDim RetUTF8(ilensrc / 2 * 3 - 1) As Byte
    Dim p As Long
    
    Dim tmp As Long
    Dim l1 As Long, l2 As Long
    For i = iStart To ilensrc - 1 Step 2
        l1 = VBA.CLng(SrcUCS2(i + 1))
        l2 = VBA.CLng(SrcUCS2(i))
        
        tmp = l1 * 2 ^ 8 Or l2
        
        If tmp <= &H7F Then
            RetUTF8(p) = VBA.CByte(tmp)
            p = p + 1
        ElseIf tmp <= &H7FF Then
            RetUTF8(p) = b_1100_0000 Or (SrcUCS2(i + 1) * (2 ^ 2)) Or (SrcUCS2(i) \ (2 ^ 6))
            p = p + 1
            
            RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
            p = p + 1
        Else
            RetUTF8(p) = b_1110_0000 Or (SrcUCS2(i + 1) \ (2 ^ 4))
            p = p + 1
                
            RetUTF8(p) = b_1000_0000 Or ((SrcUCS2(i + 1) And b_0000_1111) * (2 ^ 2)) Or (SrcUCS2(i) \ (2 ^ 6))
            p = p + 1
            
            RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
            p = p + 1
        End If
    Next
    
    ReDim Preserve RetUTF8(p - 1) As Byte
End Function

02

UCS2转UTF-8

代码语言:javascript
复制

Function FromUTF8(SrcUTF8() As Byte, RetUCS2() As Byte) As String
    Dim ilensrc As Long
    ilensrc = UBound(SrcUTF8) + 1
    
    Dim i As Long
    Dim iStart As Long
    '如果是从txt文件中读取的,可能会有BOM头
    If SrcUTF8(i) = &HEF And SrcUTF8(i + 1) = &HBB And SrcUTF8(i + 2) = &HBF Then
        iStart = 3
    End If
    
    ReDim RetUCS2(ilensrc * 2 - 1) As Byte
    Dim p As Long
    
    Dim tmp As Long
    Dim b1 As Byte, b2 As Byte, b3 As Byte
    i = iStart
    Do While i < ilensrc
        b1 = SrcUTF8(i)
        i = i + 1
        
        'UCS2 只有2个字节,只能转换3字节以下的UTF8
        If b1 >= b_1111_0000 Then
            FromUTF8 = "UCS2 只有2个字节,只能转换3字节以下的UTF8"
            Exit Function
            
        ElseIf b1 >= b_1110_0000 Then
            '// 1110 aaaa 10bb bbbb 10cc cccc ==> aaaa bbbb  bbcc cccc
            '// 需要再读取2个字节
            b2 = SrcUTF8(i)
            i = i + 1
        
            b3 = SrcUTF8(i)
            i = i + 1
            
            b1 = ((b1 And b_0000_1111) * 2 ^ 4) Or ((b2 And b_0011_1111) \ 2 ^ 2)
            b2 = ((b2 And b_0000_0011) * 2 ^ 6) Or (b3 And b_0011_1111)
        ElseIf b1 >= b_1100_0000 Then
            '// 110a aaaa 10bb bbbb ==> 0000 0aaa  aabb bbbb
            '// 需要再读取1个字节
            b2 = SrcUTF8(i)
            i = i + 1
            
            b2 = ((b1 And b_0000_0011) * 2 ^ 6) Or (b2 And b_0011_1111)
            b1 = (b1 And b_0011_1111) \ 2 ^ 2
            
        Else
            '// 0aaa aaaa ==> 0000 0000  0aaa aaaa
            b2 = b1
            b1 = 0
        End If
        
        RetUCS2(p) = b2
        RetUCS2(p + 1) = b1
        p = p + 2
    Loop
    
    ReDim Preserve RetUCS2(p - 1) As Byte
End Function
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2020-08-13,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 VBA 学习 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档