哈夫曼编码

裴来凡

发布于 2022-05-29 09:31:58

3340

发布于 2022-05-29 09:31:58

import os
import cv2
from queue import PriorityQueue
import numpy as np
import math
import struct
import matplotlib.pyplot as plt
class HuffmanNode(object):
    def __init__(self,value,key=None,symbol='',left_child=None,right_child=None):
        self.left_child=left_child
        self.right_child=right_child
        self.value=value
        self.key=key
        assert symbol==''
        self.symbol=symbol
    def __eq__(self,other):
        return self.value==other.value
    def __gt__(self,other):
        return self.value>other.value
    def __lt__(self,other):
        return self.value < other.value
def createTree(hist_dict:dict)->HuffmanNode:
    q=PriorityQueue()
    for k, v in hist_dict.items():
        q.put(HuffmanNode(value=v,key=k))
    while q.qsize()>1:
        l_freq, r_freq=q.get(),q.get()
        node=HuffmanNode(value=l_freq.value+r_freq.value,left_child=l_freq,right_child=r_freq)
        q.put(node)
    return q.get()
def walkTree_VLR(root_node:HuffmanNode,symbol=''):
    global Huffman_encode_dict
    if isinstance(root_node,HuffmanNode):
        root_node.symbol+=symbol
        if root_node.key!=None:
            Huffman_encode_dict[root_node.key]=root_node.symbol
        walkTree_VLR(root_node.left_child,symbol=root_node.symbol+'0')
        walkTree_VLR(root_node.right_child, symbol=root_node.symbol+'1')
    return
def encodeImage(src_img: np.ndarray, encode_dict: dict):
    img_encode=""
    assert len(src_img.shape)==1, '`src_img` must be a vector'
    for pixel in src_img:
        img_encode+=encode_dict[pixel]
    return img_encode
def writeBinImage(img_encode:str,huffman_file:str):
    with open(huffman_file,'wb') as f:
        for i in range(0,len(img_encode),8):
            img_encode_dec=int(img_encode[i:i+8],2)
            img_encode_bin=struct.pack('>B',img_encode_dec)
            f.write(img_encode_bin)
def readBinImage(huffman_file: str,img_encode_len:int):
    code_bin_str=""
    with open(huffman_file,'rb') as f:
        content=f.read()
        code_dec_tuple=struct.unpack('>'+'B'*len(content),content)
        for code_dec in code_dec_tuple:
            code_bin_str+=bin(code_dec)[2:].zfill(8)
        len_diff=len(code_bin_str)-img_encode_len
        code_bin_str=code_bin_str[:-8]+code_bin_str[-(8-len_diff):]
    return code_bin_str
def decodeHuffman(img_encode:str,huffman_tree_root:HuffmanNode):
    img_src_val_list=[]
    root_node=huffman_tree_root
    for code in img_encode:
        if code=='0':
            root_node=root_node.left_child
        elif code=='1':
            root_node=root_node.right_child
        if root_node.key!=None:
            img_src_val_list.append(root_node.key)
            root_node=huffman_tree_root
    return np.asarray(img_src_val_list)
def decodeHuffmanByDict(img_encode:str,encode_dict:dict):
    img_src_val_list=[]
    decode_dict={}
    for k, v in encode_dict.items():
        decode_dict[v]=k
    s=0
    while len(img_encode)>s+1:
        for k in decode_dict.keys():
            if k==img_encode[s:s+len(k)]:
                img_src_val_list.append(decode_dict[k])
                s+=len(k)
                break
    return np.asarray(img_src_val_list)
def put(path):
    src_img=cv2.imread(path,cv2.IMREAD_GRAYSCALE)
    src_img_w, src_img_h=src_img.shape[:2]
    src_img_ravel=src_img.ravel()
    hist_dict={}
    for p in src_img_ravel:
        if p not in hist_dict:
            hist_dict[p]=1
        else:
            hist_dict[p]+=1
    #构造哈夫曼树
    huffman_root_node=createTree(hist_dict)
    #遍历哈夫曼树
    walkTree_VLR(huffman_root_node)
    global Huffman_encode_dict
    print('哈夫曼编码字典：',Huffman_encode_dict)
    img_encode=encodeImage(src_img_ravel,Huffman_encode_dict)
    writeBinImage(img_encode,'huffman_bin_img_file.bin')
    img_read_code=readBinImage('huffman_bin_img_file.bin',len(img_encode))
    #解码二进制编码数据字符串得到原始图像展开的向量
    #根据哈夫曼树进行解码的方式
    img_src_val_array = decodeHuffman(img_read_code, huffman_root_node)
    assert len(img_src_val_array)==src_img_w*src_img_h
    #恢复原始图像
    img_decode=np.reshape(img_src_val_array,[src_img_w,src_img_h])
    #计算平均编码长度和编码效率
    total_code_len=0
    total_code_num=sum(hist_dict.values())
    avg_code_len=0
    I_entropy=0
    for key in hist_dict.keys():
        count=hist_dict[key]
        code_len=len(Huffman_encode_dict[key])
        prob=count/total_code_num
        avg_code_len+=prob*code_len
        I_entropy+=-(prob*math.log2(prob))
    S_eff=I_entropy/avg_code_len
    print("平均编码长度为{:.3f}".format(avg_code_len))
    print("编码效率为{:.6f}".format(S_eff))
    #压缩率
    ori_size=src_img_w*src_img_h*8/(1024*8)
    comp_size=len(img_encode)/(1024*8)
    comp_rate=1-comp_size/ori_size
    print('原图灰度图大小',ori_size,'KB  压缩后大小',comp_size,'KB  压缩率',comp_rate,'%')
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.subplot(121),plt.imshow(src_img,plt.cm.gray),plt.title('原图灰度图像'),plt.axis('off')
    plt.subplot(122),plt.imshow(img_decode,plt.cm.gray),plt.title('解压后'),plt.axis('off')
    plt.show()
if __name__=='__main__':
    #哈夫曼编码字典{pixel_value:code}
    Huffman_encode_dict={}
    put(r'C:/Users/xpp/Desktop/Lena.png')

哈夫曼编码字典： {103: '0000000', 227: '00000010000', 229: '000000100010', 228: '000000100011', 224: '0000001001', 225: '00000010100', 222: '00000010101', 223: '0000001011', 185: '00000011', 87: '0000010', 106: '0000011', 61: '00001', 80: '0001000', 120: '0001001', 68: '000101', 122: '0001100', 93: '0001101', 125: '0001110', 90: '0001111', 56: '00100', 78: '0010100', 89: '0010101', 132: '0010110', 123: '0010111', 127: '0011000', 184: '0011001', 129: '0011010', 134: '0011011', 119: '0011100', 215: '00111010', 219: '001110110', 218: '001110111', 76: '0011110', 133: '0011111', 164: '010000', 107: '0100010', 124: '0100011', 126: '0100100', 131: '0100101', 130: '0100110', 128: '0100111', 59: '01010', 117: '0101100', 109: '0101101', 145: '0101110', 141: '0101111', 183: '01100000', 174: '01100001', 118: '0110001', 144: '011001', 143: '0110100', 204: '011010100', 189: '011010101', 181: '01101011', 139: '0110110', 136: '0110111', 135: '0111000', 214: '01110010', 206: '01110011', 160: '011101', 110: '0111100', 163: '0111101', 226: '01111100000', 230: '0111110000100', 231: '01111100001010', 232: '011111000010110', 235: '01111100001011100', 34: '01111100001011101', 239: '011111000010111100', 238: '011111000010111101', 233: '01111100001011111', 39: '011111000011', 221: '0111110001', 211: '011111001', 177: '01111101', 173: '0111111', 175: '1000000', 161: '1000001', 142: '1000010', 146: '1000011', 74: '1000100', 170: '1000101', 208: '100011000', 191: '100011001', 212: '10001101', 116: '1000111', 162: '1001000', 182: '1001001', 138: '1001010', 179: '10010110', 207: '10010111', 72: '1001100', 147: '1001101', 159: '1001110', 190: '10011110', 43: '100111110', 201: '100111111', 66: '101000', 53: '101001', 140: '1010100', 194: '10101010', 176: '10101011', 188: '10101100', 210: '10101101', 158: '1010111', 178: '1011000', 213: '10110010', 192: '10110011', 111: '1011010', 209: '10110110', 171: '10110111', 167: '1011100', 115: '1011101', 156: '101111', 148: '1100000', 149: '1100001', 157: '1100010', 202: '11000110', 168: '11000111', 137: '1100100', 180: '1100101', 187: '110011000', 217: '110011001', 205: '11001101', 114: '1100111', 50: '1101000', 172: '11010010', 220: '1101001100', 216: '1101001101', 196: '110100111', 70: '1101010', 195: '11010110', 186: '11010111', 203: '11011000', 199: '11011001', 154: '1101101', 169: '11011100', 98: '11011101', 155: '1101111', 151: '111000', 112: '1110010', 46: '11100110', 193: '111001110', 198: '111001111', 64: '111010', 150: '1110110', 197: '11101110', 99: '11101111', 200: '11110000', 165: '11110001', 153: '1111001', 96: '11110100', 85: '11110101', 152: '1111011', 102: '11111000', 95: '11111001', 105: '11111010', 101: '11111011', 166: '11111100', 92: '11111101', 84: '11111110', 82: '11111111'} 平均编码长度为6.995 编码效率为0.996009 原图灰度图大小 206.640625 KB 压缩后大小 180.6787109375 KB 压缩率 0.12563799621928162 %