范蠡
(三)dict哈希结构3
关注作者
前往小程序,Get
更优
阅读体验!
立即前往
腾讯云
开发者社区
文档
建议反馈
控制台
登录/注册
首页
学习
活动
专区
工具
TVP
最新优惠活动
文章/答案/技术大牛
搜索
搜索
关闭
发布
首页
学习
活动
专区
工具
TVP
最新优惠活动
返回腾讯云官网
范蠡
首页
学习
活动
专区
工具
TVP
最新优惠活动
返回腾讯云官网
社区首页
>
专栏
>
(三)dict哈希结构3
(三)dict哈希结构3
范蠡
关注
发布于 2018-04-13 15:05:11
584
0
发布于 2018-04-13 15:05:11
举报
文章被收录于专栏:
高性能服务器开发
/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
/* 当没有迭代器时候,进行重定位算法 */
static
void
_dictRehashStep(dict *d) {
if
(d->iterators == 0) dictRehash(d,1);
}
/* Add an element to the target hash table */
/* 添加一个dicEntry */
int
dictAdd(dict *d,
void
*key,
void
*val)
{
dictEntry *entry = dictAddRaw(d,key);
if
(!entry)
return
DICT_ERR;
dictSetVal(d, entry, val);
return
DICT_OK;
}
/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
/* 添加一个指定key值的Entry */
dictEntry *dictAddRaw(dict *d,
void
*key)
{
int
index;
dictEntry *entry;
dictht *ht;
if
(dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
/* 如果指定的key已经存在,则直接返回NULL说明添加失败 */
if
((index = _dictKeyIndex(d, key)) == -1)
return
NULL;
/* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(
sizeof
(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return
entry;
}
/* Add an element, discarding the old if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
/* 替换一个子字典集,如果不存在直接添加,存在,覆盖val的值 */
int
dictReplace(dict *d,
void
*key,
void
*val)
{
dictEntry *entry, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
//不存在,这个key直接添加
if
(dictAdd(d, key, val) == DICT_OK)
return
1;
/* It already exists, get the entry */
entry = dictFind(d, key);
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
//赋值方法
auxentry = *entry;
dictSetVal(d, entry, val);
dictFreeVal(d, &auxentry);
return
0;
}
/* dictReplaceRaw() is simply a version of dictAddRaw() that always
* returns the hash entry of the specified key, even if the key already
* exists and can't be added (in that case the entry of the already
* existing key is returned.)
*
* See dictAddRaw() for more information. */
/* 添加字典,没有函数方法,如果存在,就不添加 */
dictEntry *dictReplaceRaw(dict *d,
void
*key) {
dictEntry *entry = dictFind(d,key);
return
entry ? entry : dictAddRaw(d,key);
}
/* Search and remove an element */
/* 删除给定key的结点,可控制是否调用释放方法 */
static
int
dictGenericDelete(dict *d,
const
void
*key,
int
nofree)
{
unsigned
int
h, idx;
dictEntry *he, *prevHe;
int
table;
if
(d->ht[0].size == 0)
return
DICT_ERR; /* d->ht[0].table is NULL */
if
(dictIsRehashing(d)) _dictRehashStep(d);
//计算key对应的哈希索引
h = dictHashKey(d, key);
for
(table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
//找到具体的索引对应的结点
he = d->ht[table].table[idx];
prevHe = NULL;
while
(he) {
if
(dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if
(prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if
(!nofree) {
//判断是否需要调用dict定义的free方法
dictFreeKey(d, he);
dictFreeVal(d, he);
}
zfree(he);
d->ht[table].used--;
return
DICT_OK;
}
prevHe = he;
he = he->next;
}
if
(!dictIsRehashing(d))
break
;
}
return
DICT_ERR; /* not found */
}
/* 会调用free方法的删除方法 */
int
dictDelete(dict *ht,
const
void
*key) {
return
dictGenericDelete(ht,key,0);
}
/* 不会调用free方法的删除方法 */
int
dictDeleteNoFree(dict *ht,
const
void
*key) {
return
dictGenericDelete(ht,key,1);
}
/* Destroy an entire dictionary */
/* 清空整个哈希表 */
int
_dictClear(dict *d, dictht *ht,
void
(callback)(
void
*)) {
unsigned
long
i;
/* Free all the elements */
for
(i = 0; i < ht->size && ht->used > 0; i++) {
dictEntry *he, *nextHe;
//每次情况会调用回调方法
if
(callback && (i & 65535) == 0) callback(d->privdata);
if
((he = ht->table[i]) == NULL)
continue
;
while
(he) {
//依次释放结点
nextHe = he->next;
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
ht->used--;
he = nextHe;
}
}
/* Free the table and the allocated cache structure */
zfree(ht->table);
/* Re-initialize the table */
_dictReset(ht);
return
DICT_OK; /* never fails */
}
/* Clear & Release the hash table */
/* 重置字典总类,清空2张表 */
void
dictRelease(dict *d)
{
_dictClear(d,&d->ht[0],NULL);
_dictClear(d,&d->ht[1],NULL);
zfree(d);
}
/* 根据key返回具体的字典集 */
dictEntry *dictFind(dict *d,
const
void
*key)
{
dictEntry *he;
unsigned
int
h, idx, table;
if
(d->ht[0].size == 0)
return
NULL; /* We don't have a table at all */
if
(dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for
(table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while
(he) {
if
(dictCompareKeys(d, key, he->key))
return
he;
he = he->next;
}
if
(!dictIsRehashing(d))
return
NULL;
}
return
NULL;
}
/* 获取目标字典集的方法 */
void
*dictFetchValue(dict *d,
const
void
*key) {
dictEntry *he;
he = dictFind(d,key);
/* 获取字典集的方法 */
return
he ? dictGetVal(he) : NULL;
}
/* A fingerprint is a 64 bit number that represents the state of the dictionary
* at a given time, it's just a few dict properties xored together.
* When an unsafe iterator is initialized, we get the dict fingerprint, and check
* the fingerprint again when the iterator is released.
* If the two fingerprints are different it means that the user of the iterator
* performed forbidden operations against the dictionary while iterating. */
/* 通过指纹来禁止每个不安全的哈希迭代器的非法操作,每个不安全迭代器只能有一个指纹 */
long
long
dictFingerprint(dict *d) {
long
long
integers[6], hash = 0;
int
j;
integers[0] = (
long
) d->ht[0].table;
integers[1] = d->ht[0].size;
integers[2] = d->ht[0].used;
integers[3] = (
long
) d->ht[1].table;
integers[4] = d->ht[1].size;
integers[5] = d->ht[1].used;
/* We hash N integers by summing every successive integer with the integer
* hashing of the previous sum. Basically:
*
* Result = hash(hash(hash(int1)+int2)+int3) ...
*
* This way the same set of integers in a different order will (likely) hash
* to a different number. */
for
(j = 0; j < 6; j++) {
hash += integers[j];
/* For the hashing step we use Tomas Wang's 64 bit integer hash. */
hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
hash = hash ^ (hash >> 24);
hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
hash = hash ^ (hash >> 14);
hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
hash = hash ^ (hash >> 28);
hash = hash + (hash << 31);
}
return
hash;
}
/* 获取哈希迭代器,默认不安全的 */
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(
sizeof
(*iter));
iter->d = d;
iter->table = 0;
iter->index = -1;
iter->safe = 0;
iter->entry = NULL;
iter->nextEntry = NULL;
return
iter;
}
/* 获取安全哈希迭代器 */
dictIterator *dictGetSafeIterator(dict *d) {
dictIterator *i = dictGetIterator(d);
i->safe = 1;
return
i;
}
/* 迭代器获取下一个集合点 */
dictEntry *dictNext(dictIterator *iter)
{
while
(1) {
if
(iter->entry == NULL) {
dictht *ht = &iter->d->ht[iter->table];
if
(iter->index == -1 && iter->table == 0) {
//如果迭代器index下标为-1说明还没开始使用,设置迭代器的指纹或增加引用计数量
if
(iter->safe)
iter->d->iterators++;
else
iter->fingerprint = dictFingerprint(iter->d);
}
//迭代器下标递增
iter->index++;
if
(iter->index >= (
long
) ht->size) {
if
(dictIsRehashing(iter->d) && iter->table == 0) {
iter->table++;
iter->index = 0;
ht = &iter->d->ht[1];
}
else
{
break
;
}
}
//根据下标选择集合点
iter->entry = ht->table[iter->index];
}
else
{
iter->entry = iter->nextEntry;
}
if
(iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
iter->nextEntry = iter->entry->next;
return
iter->entry;
}
}
return
NULL;
}
/* 释放迭代器 */
void
dictReleaseIterator(dictIterator *iter)
{
if
(!(iter->index == -1 && iter->table == 0)) {
if
(iter->safe)
iter->d->iterators--;
else
//这时判断指纹是否还是之前定义的那个
assert(iter->fingerprint == dictFingerprint(iter->d));
}
zfree(iter);
}
/* Return a random entry from the hash table. Useful to
* implement randomized algorithms */
/* 随机获取一个集合点 */
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
unsigned
int
h;
int
listlen, listele;
if
(dictSize(d) == 0)
return
NULL;
if
(dictIsRehashing(d)) _dictRehashStep(d);
if
(dictIsRehashing(d)) {
do
{
//随机数向2个表格的总数求余运算
h = random() % (d->ht[0].size+d->ht[1].size);
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
}
while
(he == NULL);
}
else
{
do
{
h = random() & d->ht[0].sizemask;
he = d->ht[0].table[h];
}
while
(he == NULL);
}
/* Now we found a non empty bucket, but it is a linked
* list and we need to get a random element from the list.
* The only sane way to do so is counting the elements and
* select a random index. */
listlen = 0;
orighe = he;
while
(he) {
he = he->next;
listlen++;
}
listele = random() % listlen;
he = orighe;
while
(listele--) he = he->next;
return
he;
}
/* Function to reverse bits. Algorithm from:
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
/* 很神奇的翻转位 */
static
unsigned
long
rev(unsigned
long
v) {
unsigned
long
s = 8 *
sizeof
(v); // bit size; must be power of 2
unsigned
long
mask = ~0;
while
((s >>= 1) > 0) {
mask ^= (mask << s);
v = ((v >> s) & mask) | ((v << s) & ~mask);
}
return
v;
}
/* dictScan() is used to iterate over the elements of a dictionary.
*
* Iterating works in the following way:
*
* 1) Initially you call the function using a cursor (v) value of 0.
* 2) The function performs one step of the iteration, and returns the
* new cursor value that you must use in the next call.
* 3) When the returned cursor is 0, the iteration is complete.
*
* The function guarantees that all the elements that are present in the
* dictionary from the start to the end of the iteration are returned.
* However it is possible that some element is returned multiple time.
*
* For every element returned, the callback 'fn' passed as argument is
* called, with 'privdata' as first argument and the dictionar entry
* 'de' as second argument.
*
* HOW IT WORKS.
*
* The algorithm used in the iteration was designed by Pieter Noordhuis.
* The main idea is to increment a cursor starting from the higher order
* bits, that is, instead of incrementing the cursor normally, the bits
* of the cursor are reversed, then the cursor is incremented, and finally
* the bits are reversed again.
*
* This strategy is needed because the hash table may be resized from one
* call to the other call of the same iteration.
*
* dict.c hash tables are always power of two in size, and they
* use chaining, so the position of an element in a given table is given
* always by computing the bitwise AND between Hash(key) and SIZE-1
* (where SIZE-1 is always the mask that is equivalent to taking the rest
* of the division between the Hash of the key and SIZE).
*
* For example if the current hash table size is 16, the mask is
* (in binary) 1111. The position of a key in the hash table will be always
* the last four bits of the hash output, and so forth.
*
* WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
*
* If the hash table grows, elements can go anyway in one multiple of
* the old bucket: for example let's say that we already iterated with
* a 4 bit cursor 1100, since the mask is 1111 (hash table size = 16).
*
* If the hash table will be resized to 64 elements, and the new mask will
* be 111111, the new buckets that you obtain substituting in ??1100
* either 0 or 1, can be targeted only by keys that we already visited
* when scanning the bucket 1100 in the smaller hash table.
*
* By iterating the higher bits first, because of the inverted counter, the
* cursor does not need to restart if the table size gets bigger, and will
* just continue iterating with cursors that don't have '1100' at the end,
* nor any other combination of final 4 bits already explored.
*
* Similarly when the table size shrinks over time, for example going from
* 16 to 8, If a combination of the lower three bits (the mask for size 8
* is 111) was already completely explored, it will not be visited again
* as we are sure that, we tried for example, both 0111 and 1111 (all the
* variations of the higher bit) so we don't need to test it again.
*
* WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
*
* Yes, this is true, but we always iterate the smaller one of the tables,
* testing also all the expansions of the current cursor into the larger
* table. So for example if the current cursor is 101 and we also have a
* larger table of size 16, we also test (0)101 and (1)101 inside the larger
* table. This reduces the problem back to having only one table, where
* the larger one, if exists, is just an expansion of the smaller one.
*
* LIMITATIONS
*
* This iterator is completely stateless, and this is a huge advantage,
* including no additional memory used.
*
* The disadvantages resulting from this design are:
*
* 1) It is possible that we return duplicated elements. However this is usually
* easy to deal with in the application level.
* 2) The iterator must return multiple elements per call, as it needs to always
* return all the keys chained in a given bucket, and all the expansions, so
* we are sure we don't miss keys moving.
* 3) The reverse cursor is somewhat hard to understand at first, but this
* comment is supposed to help.
*/
/* 扫描方法 */
unsigned
long
dictScan(dict *d,
unsigned
long
v,
dictScanFunction *fn,
void
*privdata)
{
dictht *t0, *t1;
const
dictEntry *de;
unsigned
long
m0, m1;
if
(dictSize(d) == 0)
return
0;
if
(!dictIsRehashing(d)) {
t0 = &(d->ht[0]);
m0 = t0->sizemask;
/* Emit entries at cursor */
de = t0->table[v & m0];
while
(de) {
fn(privdata, de);
de = de->next;
}
}
else
{
t0 = &d->ht[0];
t1 = &d->ht[1];
/* Make sure t0 is the smaller and t1 is the bigger table */
if
(t0->size > t1->size) {
t0 = &d->ht[1];
t1 = &d->ht[0];
}
m0 = t0->sizemask;
m1 = t1->sizemask;
/* Emit entries at cursor */
de = t0->table[v & m0];
while
(de) {
fn(privdata, de);
de = de->next;
}
/* Iterate over indices in larger table that are the expansion
* of the index pointed to by the cursor in the smaller table */
do
{
/* Emit entries at cursor */
de = t1->table[v & m1];
while
(de) {
fn(privdata, de);
de = de->next;
}
/* Increment bits not covered by the smaller mask */
v = (((v | m0) + 1) & ~m0) | (v & m0);
/* Continue while bits covered by mask difference is non-zero */
}
while
(v & (m0 ^ m1));
}
/* Set unmasked bits so incrementing the reversed cursor
* operates on the masked bits of the smaller table */
v |= ~m0;
/* Increment the reverse cursor */
v = rev(v);
v++;
v = rev(v);
return
v;
}
本文参与
腾讯云自媒体同步曝光计划
,分享自微信公众号。
原始发表:2018-04-05,如有侵权请联系
cloudcommunity@tencent.com
删除
编程算法
本文分享自
高性能服务器开发
微信公众号,
前往查看
如有侵权,请联系
cloudcommunity@tencent.com
删除。
本文参与
腾讯云自媒体同步曝光计划
,欢迎热爱写作的你一起参与!
编程算法
评论
登录
后参与评论
0 条评论
热度
最新
推荐阅读
LV.
文章
0
获赞
0
领券
问题归档
专栏文章
快讯文章归档
关键词归档
开发者手册归档
开发者手册 Section 归档
0
0
0
推荐