首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >C语言中的Bloom滤波器

C语言中的Bloom滤波器
EN

Code Review用户
提问于 2017-10-26 11:42:52
回答 1查看 1.9K关注 0票数 4

又一次出现了一个过滤问题。我有这个:

BloomFilter.h

代码语言:javascript
运行
复制
#ifndef NET_CODERODDE_UTIL_BLOOM_FILTER_H
#define NET_CODERODDE_UTIL_BLOOM_FILTER_H

#include <stdlib.h>

typedef unsigned char uint8_t;

typedef struct bloom_filter_t {
    uint8_t* array;
    size_t capacity;
    size_t (**hash_functions)(const void*);
    size_t hash_function_count;
} bloom_filter_t;

/***********************************************************************
* Allocates a new Bloom filter with given capacity and hash functions. *
***********************************************************************/
bloom_filter_t* bloom_filter_t_alloc(size_t capacity,
                                     size_t (**hash_functions)(const void*),
                                     size_t hash_function_count);

/********************************************************************
* Initializes a Bloom filter with given capacity and hash function. *
********************************************************************/
void bloom_filter_t_init(bloom_filter_t* bloom_filter,
                         size_t capacity,
                         size_t (**hash_functions)(const void*),
                         size_t hash_function_count);

/*********************************************
* Adds an element to the given Bloom filter. *
*********************************************/
void bloom_filter_t_add(bloom_filter_t* bloom_filter, const void* element);

/***********************************************************
* Queries whether an element is in the given Bloom filter. *
***********************************************************/
int bloom_filter_t_contains(bloom_filter_t* bloom_filter, const void* element);

/***********************************************************
* Restores the state of the Bloom filter to the empty one. *
***********************************************************/
void bloom_filter_t_clear(bloom_filter_t* bloom_filter);

/*****************************************************************
* Releases all the internal resources of the given Bloom filter. *
*****************************************************************/
void bloom_filter_t_deinit(bloom_filter_t* bloom_filter);

/************************************
* Releases the entire Bloom filter. *
************************************/
void bloom_filter_t_free(bloom_filter_t* bloom_filter);

#endif /* NET_CODERODDE_UTIL_BLOOM_FILTER_H */

BloomFilter.c

代码语言:javascript
运行
复制
#include "net/coderodde/util/BloomFilter.h"
#include <stdlib.h>

#define BITS_PER_UINT8_T 8

bloom_filter_t* bloom_filter_t_alloc(size_t capacity,
                                     size_t (**hash_functions)(const void*),
                                     size_t hash_function_count)
{
    bloom_filter_t* bloom_filter;
    size_t i;

    if (hash_function_count == 0)
    {
        abort();
    }

    for (i = 0; i != hash_function_count; ++i)
    {
        if (!hash_functions[i])
        {
            abort();
        }
    }

    bloom_filter = malloc(sizeof *bloom_filter);

    if (!bloom_filter)
    {
        abort();
    }

    bloom_filter->array = calloc(capacity, sizeof(uint64_t));

    if (!bloom_filter->array)
    {
        free(bloom_filter);
        abort();
    }

    bloom_filter->capacity = capacity;
    bloom_filter->hash_functions = hash_functions;
    bloom_filter->hash_function_count = hash_function_count;
    return bloom_filter;
}

void bloom_filter_t_init(bloom_filter_t* bloom_filter,
                         size_t capacity,
                         size_t (**hash_functions)(const void*),
                         size_t hash_function_count)
{
    size_t i;

    if (hash_function_count == 0)
    {
        abort();
    }

    for (i = 0; i != hash_function_count; ++i)
    {
        if (!hash_functions[i])
        {
            abort();
        }
    }

    if (!bloom_filter)
    {
        abort();
    }

    bloom_filter->array = calloc(capacity, sizeof(uint64_t));

    if (!bloom_filter->array)
    {
        abort();
    }

    bloom_filter->capacity = capacity;
    bloom_filter->hash_functions = hash_functions;
    bloom_filter->hash_function_count = hash_function_count;
}

static void write_bit_impl(uint64_t* word, size_t bit_index)
{
    uint64_t integer = 1;
    size_t i;

    for (i = 0; i != bit_index; ++i)
    {
        integer <<= 1;
    }

    *word |= integer;
}

static void write_bit(uint64_t* array, size_t bit_index)
{
    size_t word_index = bit_index / BITS_PER_UINT8_T;
    uint64_t* integer = &array[word_index];
    write_bit_impl(integer, bit_index % BITS_PER_UINT8_T);
}

void bloom_filter_t_add(bloom_filter_t* bloom_filter, const void* element)
{
    size_t i;
    size_t bit_index;
    size_t total_bits;

    if (!bloom_filter)
    {
        abort();
    }

    total_bits = bloom_filter->capacity * BITS_PER_UINT8_T;

    for (i = 0; i != bloom_filter->hash_function_count; ++i)
    {
        bit_index = bloom_filter->hash_functions[i](element) % total_bits;
        write_bit(bloom_filter->array, bit_index);
    }
}

static int read_bit_impl(uint64_t* word, size_t bit_index)
{
    uint64_t integer = 1;
    size_t i;

    for (i = 0; i != bit_index; ++i)
    {
        integer <<= 1;
    }

    return ((*word & integer) != 0) ? 1 : 0;
}

static int read_bit(uint64_t* array, size_t bit_index)
{
    size_t word_index = bit_index / BITS_PER_UINT8_T;
    uint64_t* integer = &array[word_index];
    return read_bit_impl(integer, bit_index % BITS_PER_UINT8_T);
}

int bloom_filter_t_contains(bloom_filter_t* bloom_filter, const void* element)
{
    size_t i;
    size_t bit_index;
    size_t total_bits;

    if (!bloom_filter)
    {
        abort();
    }

    total_bits = bloom_filter->capacity * BITS_PER_UINT8_T;

    for (i = 0; i != bloom_filter->hash_function_count; ++i)
    {
        bit_index = bloom_filter->hash_functions[i](element) % total_bits;

        if (!read_bit(bloom_filter->array, bit_index))
        {
            return 0;
        }
    }

    return 1;
}

void bloom_filter_t_clear(bloom_filter_t* bloom_filter)
{
    size_t i;

    if (!bloom_filter)
    {
        abort();
    }

    for (i = 0; i != bloom_filter->capacity; ++i)
    {
        bloom_filter->array[i] = 0;
    }
}

void bloom_filter_t_deinit(bloom_filter_t* bloom_filter)
{
    if (!bloom_filter)
    {
        abort();
    }

    free(bloom_filter->array);
    bloom_filter->array = NULL;
    bloom_filter->capacity = 0;
    bloom_filter->hash_function_count = 0;
    bloom_filter->hash_functions = NULL;
}

void bloom_filter_t_free(bloom_filter_t* bloom_filter)
{
    if (!bloom_filter)
    {
        abort();
    }

    bloom_filter_t_deinit(bloom_filter);
    free(bloom_filter);
}

main.c

代码语言:javascript
运行
复制
#include "net/coderodde/util/BloomFilter.h"
#include <stdio.h>

size_t hash_function_1(const void* arg)
{
    size_t hash = 0;
    size_t i = 2;
    char* c = (char*) arg;

    while (*c)
    {
        hash += *c * i;
        i++;
        c++;
    }

    return hash;
}

size_t hash_function_2(const void* arg)
{
    size_t hash = 0;
    size_t i = 1;
    char* c = (char*) arg;

    while (*c)
    {
        hash += i * *c;
        i += 2;
        c++;
    }

    return hash;
}

int main(int argc, const char * argv[]) {
    size_t i;
    bloom_filter_t* bloom_filter;

    char* names[] = {
        "Jack",
        "Alice",
        "Bob",
        "Richard",
        "Rolf",
        "Benjamin",
        "Ada",
        "Leif",
        "Funky",
        "DJ",
    };

    size_t (*hash_functions[])(const void*) = {
        hash_function_1,
        hash_function_2
    };

    bloom_filter = bloom_filter_t_alloc(1, hash_functions, 2);

    for (i = 0; i != 8; ++i)
    {
        bloom_filter_t_add(bloom_filter, names[i]);
    }

    for (i = 0; i != 10; ++i)
    {
        printf("%s: %d\n",
               names[i],
               bloom_filter_t_contains(bloom_filter, names[i]));
    }

    return 0;
}

评论请求

我最关心的是处理错误:我应该只做abort还是做其他的事情?

EN

回答 1

Code Review用户

回答已采纳

发布于 2017-10-29 19:31:45

Bug

在这项职能中:

静态空洞write_bit(uint64_t*数组,size_t bit_index) { size_t word_index = bit_index / BITS_PER_UINT8_T;uint64_t* integer = & array 单词_索引;write_bit_impl(integer,bit_index % BITS_PER_UINT8_T);}

您正在使用64位整数的数组来保存这些位。因此,您应该使用BITS_PER_UINT64_T,它是64,而不是BITS_PER_UINT8_T,它是8。同样的事情发生在read_bit()中。

不必要的循环

write_bit_impl()read_bit_impl()内部的循环可以被一次移位所取代。换言之,这是:

静态write_bit_impl(uint64_t* word,size_t bit_index) { uint64_t整数= 1;size_t i;for (i = 0;i != bit_index;++i) {整数<<= 1;} *word |=整数;}

可以重写如下:

代码语言:javascript
运行
复制
static void write_bit_impl(uint64_t* word, size_t bit_index)
{
    *word |= ((uint64_t) 1) << bit_index;
}
票数 3
EN
页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://codereview.stackexchange.com/questions/178843

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档