哈夫曼编码是一种用于数据压缩的无损熵编码,根据压缩数据符号出现频率大小进行编码, 出现频率越高,编码后占bit 越少的变长编码。(其他详细介绍见参考)
刚好这两天看到,大学时信息论学完后基本忘记,顺便复习以下,并尝试C代码实现。
假设, 准备压缩的数据源, 评估得到各个符号出现的频率如下, 则其编码过程如下图
这里写图片描述
详细参考 huffman编码
static void cal_char_freq_table(char *array, uint32 len)
{
memset(freq_table, 0, sizeof(freq_table));
while (len--) {
++freq_table[*array++];
}
}
static int8 build_char_freq_list(struct hfm_node *list)
{
uint32 i = 0;
for (; i < 256; ++i) {
if (freq_table[i] > 0) {
struct hfm_node *new = new_hfm_node((char)i, freq_table[i]);
if (new != NULL) {
list_insert(list, new);
} else {
printf("New ffm_node failue!!\n");
return -1;
}
}
}
return 0;
}
static struct hfm_node *build_char_freq_tree(struct hfm_node *list)
{
struct hfm_node *new = NULL;
struct hfm_node *left, *right;
while (1) {
// 取权中最小的两个节点合并
left = list_pop(list);
if (left == NULL) {
return NULL;
}
right = list_pop(list);
if (right == NULL) {
// root
return left;
}
new = new_hfm_node(0, left->freq + right->freq);
new->left = left;
new->right = right;
list_insert(list, new);
}
}
static uint8 hfm_code = 0;
static uint8 hfm_deep = 0;
static void _build_hfm_code_table(struct hfm_node *root)
{
++hfm_deep;
if (root->left == NULL && root->right == NULL) {
hfm_code_table[root->val].code = hfm_code;
hfm_code_table[root->val].len = hfm_deep - 1;
} else {
hfm_code = (hfm_code << 1);
if (root->left != NULL) {
_build_hfm_code_table(root->left);
}
if (root->right != NULL) {
hfm_code |= 1;
_build_hfm_code_table(root->right);
}
hfm_code = (hfm_code >> 1);
}
--hfm_deep;
}
static void build_hfm_code_table(struct hfm_node *root)
{
hfm_code = 0;
hfm_deep = 0;
memset(hfm_code_table, 0, sizeof(hfm_code_table));
if (root != NULL) _build_hfm_code_table(root);
}
得到对应的编码映射, 便可以对应编码了 解码时, 也需要二叉树, 依据编码值, 寻得叶节点,得到对应的符号。