前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >哈夫曼编码

哈夫曼编码

作者头像
裴来凡
发布2022-05-29 09:31:58
3340
发布2022-05-29 09:31:58
举报
文章被收录于专栏:图像处理与模式识别研究所
代码语言:javascript
复制
import os
import cv2
from queue import PriorityQueue
import numpy as np
import math
import struct
import matplotlib.pyplot as plt
class HuffmanNode(object):
    def __init__(self,value,key=None,symbol='',left_child=None,right_child=None):
        self.left_child=left_child
        self.right_child=right_child
        self.value=value
        self.key=key
        assert symbol==''
        self.symbol=symbol
    def __eq__(self,other):
        return self.value==other.value
    def __gt__(self,other):
        return self.value>other.value
    def __lt__(self,other):
        return self.value < other.value
def createTree(hist_dict:dict)->HuffmanNode:
    q=PriorityQueue()
    for k, v in hist_dict.items():
        q.put(HuffmanNode(value=v,key=k))
    while q.qsize()>1:
        l_freq, r_freq=q.get(),q.get()
        node=HuffmanNode(value=l_freq.value+r_freq.value,left_child=l_freq,right_child=r_freq)
        q.put(node)
    return q.get()
def walkTree_VLR(root_node:HuffmanNode,symbol=''):
    global Huffman_encode_dict
    if isinstance(root_node,HuffmanNode):
        root_node.symbol+=symbol
        if root_node.key!=None:
            Huffman_encode_dict[root_node.key]=root_node.symbol
        walkTree_VLR(root_node.left_child,symbol=root_node.symbol+'0')
        walkTree_VLR(root_node.right_child, symbol=root_node.symbol+'1')
    return
def encodeImage(src_img: np.ndarray, encode_dict: dict):
    img_encode=""
    assert len(src_img.shape)==1, '`src_img` must be a vector'
    for pixel in src_img:
        img_encode+=encode_dict[pixel]
    return img_encode
def writeBinImage(img_encode:str,huffman_file:str):
    with open(huffman_file,'wb') as f:
        for i in range(0,len(img_encode),8):
            img_encode_dec=int(img_encode[i:i+8],2)
            img_encode_bin=struct.pack('>B',img_encode_dec)
            f.write(img_encode_bin)
def readBinImage(huffman_file: str,img_encode_len:int):
    code_bin_str=""
    with open(huffman_file,'rb') as f:
        content=f.read()
        code_dec_tuple=struct.unpack('>'+'B'*len(content),content)
        for code_dec in code_dec_tuple:
            code_bin_str+=bin(code_dec)[2:].zfill(8)
        len_diff=len(code_bin_str)-img_encode_len
        code_bin_str=code_bin_str[:-8]+code_bin_str[-(8-len_diff):]
    return code_bin_str
def decodeHuffman(img_encode:str,huffman_tree_root:HuffmanNode):
    img_src_val_list=[]
    root_node=huffman_tree_root
    for code in img_encode:
        if code=='0':
            root_node=root_node.left_child
        elif code=='1':
            root_node=root_node.right_child
        if root_node.key!=None:
            img_src_val_list.append(root_node.key)
            root_node=huffman_tree_root
    return np.asarray(img_src_val_list)
def decodeHuffmanByDict(img_encode:str,encode_dict:dict):
    img_src_val_list=[]
    decode_dict={}
    for k, v in encode_dict.items():
        decode_dict[v]=k
    s=0
    while len(img_encode)>s+1:
        for k in decode_dict.keys():
            if k==img_encode[s:s+len(k)]:
                img_src_val_list.append(decode_dict[k])
                s+=len(k)
                break
    return np.asarray(img_src_val_list)
def put(path):
    src_img=cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    src_img_w, src_img_h=src_img.shape[:2]
    src_img_ravel=src_img.ravel()
    hist_dict={}
    for p in src_img_ravel:
        if p not in hist_dict:
            hist_dict[p]=1
        else:
            hist_dict[p]+=1
    #构造哈夫曼树
    huffman_root_node=createTree(hist_dict)
    #遍历哈夫曼树
    walkTree_VLR(huffman_root_node)
    global Huffman_encode_dict
    print('哈夫曼编码字典:',Huffman_encode_dict)
    img_encode=encodeImage(src_img_ravel,Huffman_encode_dict)
    writeBinImage(img_encode,'huffman_bin_img_file.bin')
    img_read_code=readBinImage('huffman_bin_img_file.bin',len(img_encode))
    #解码二进制编码数据字符串得到原始图像展开的向量
    #根据哈夫曼树进行解码的方式
    img_src_val_array = decodeHuffman(img_read_code, huffman_root_node)
    assert len(img_src_val_array)==src_img_w*src_img_h
    #恢复原始图像
    img_decode=np.reshape(img_src_val_array,[src_img_w,src_img_h])
    #计算平均编码长度和编码效率
    total_code_len=0
    total_code_num=sum(hist_dict.values())
    avg_code_len=0
    I_entropy=0
    for key in hist_dict.keys():
        count=hist_dict[key]
        code_len=len(Huffman_encode_dict[key])
        prob=count/total_code_num
        avg_code_len+=prob*code_len
        I_entropy+=-(prob*math.log2(prob))
    S_eff=I_entropy/avg_code_len
    print("平均编码长度为{:.3f}".format(avg_code_len))
    print("编码效率为{:.6f}".format(S_eff))
    #压缩率
    ori_size=src_img_w*src_img_h*8/(1024*8)
    comp_size=len(img_encode)/(1024*8)
    comp_rate=1-comp_size/ori_size
    print('原图灰度图大小',ori_size,'KB  压缩后大小',comp_size,'KB  压缩率',comp_rate,'%')
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.subplot(121),plt.imshow(src_img,plt.cm.gray),plt.title('原图灰度图像'),plt.axis('off')
    plt.subplot(122),plt.imshow(img_decode,plt.cm.gray),plt.title('解压后'),plt.axis('off')
    plt.show()
if __name__=='__main__':
    #哈夫曼编码字典{pixel_value:code}
    Huffman_encode_dict={}
    put(r'C:/Users/xpp/Desktop/Lena.png')

哈夫曼编码字典: {103: '0000000', 227: '00000010000', 229: '000000100010', 228: '000000100011', 224: '0000001001', 225: '00000010100', 222: '00000010101', 223: '0000001011', 185: '00000011', 87: '0000010', 106: '0000011', 61: '00001', 80: '0001000', 120: '0001001', 68: '000101', 122: '0001100', 93: '0001101', 125: '0001110', 90: '0001111', 56: '00100', 78: '0010100', 89: '0010101', 132: '0010110', 123: '0010111', 127: '0011000', 184: '0011001', 129: '0011010', 134: '0011011', 119: '0011100', 215: '00111010', 219: '001110110', 218: '001110111', 76: '0011110', 133: '0011111', 164: '010000', 107: '0100010', 124: '0100011', 126: '0100100', 131: '0100101', 130: '0100110', 128: '0100111', 59: '01010', 117: '0101100', 109: '0101101', 145: '0101110', 141: '0101111', 183: '01100000', 174: '01100001', 118: '0110001', 144: '011001', 143: '0110100', 204: '011010100', 189: '011010101', 181: '01101011', 139: '0110110', 136: '0110111', 135: '0111000', 214: '01110010', 206: '01110011', 160: '011101', 110: '0111100', 163: '0111101', 226: '01111100000', 230: '0111110000100', 231: '01111100001010', 232: '011111000010110', 235: '01111100001011100', 34: '01111100001011101', 239: '011111000010111100', 238: '011111000010111101', 233: '01111100001011111', 39: '011111000011', 221: '0111110001', 211: '011111001', 177: '01111101', 173: '0111111', 175: '1000000', 161: '1000001', 142: '1000010', 146: '1000011', 74: '1000100', 170: '1000101', 208: '100011000', 191: '100011001', 212: '10001101', 116: '1000111', 162: '1001000', 182: '1001001', 138: '1001010', 179: '10010110', 207: '10010111', 72: '1001100', 147: '1001101', 159: '1001110', 190: '10011110', 43: '100111110', 201: '100111111', 66: '101000', 53: '101001', 140: '1010100', 194: '10101010', 176: '10101011', 188: '10101100', 210: '10101101', 158: '1010111', 178: '1011000', 213: '10110010', 192: '10110011', 111: '1011010', 209: '10110110', 171: '10110111', 167: '1011100', 115: '1011101', 156: '101111', 148: '1100000', 149: '1100001', 157: '1100010', 202: '11000110', 168: '11000111', 137: '1100100', 180: '1100101', 187: '110011000', 217: '110011001', 205: '11001101', 114: '1100111', 50: '1101000', 172: '11010010', 220: '1101001100', 216: '1101001101', 196: '110100111', 70: '1101010', 195: '11010110', 186: '11010111', 203: '11011000', 199: '11011001', 154: '1101101', 169: '11011100', 98: '11011101', 155: '1101111', 151: '111000', 112: '1110010', 46: '11100110', 193: '111001110', 198: '111001111', 64: '111010', 150: '1110110', 197: '11101110', 99: '11101111', 200: '11110000', 165: '11110001', 153: '1111001', 96: '11110100', 85: '11110101', 152: '1111011', 102: '11111000', 95: '11111001', 105: '11111010', 101: '11111011', 166: '11111100', 92: '11111101', 84: '11111110', 82: '11111111'} 平均编码长度为6.995 编码效率为0.996009 原图灰度图大小 206.640625 KB 压缩后大小 180.6787109375 KB 压缩率 0.12563799621928162 %

算法:哈夫曼编码是一种根据词频变化的变长二进制编码方式,多用于压缩算法。

本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2021-09-26,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 图像处理与模式识别研究所 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档