(三)dict哈希结构2

dict.c;

/* Hash Tables Implementation. 
 * 
 * This file implements in memory hash tables with insert/del/replace/find/ 
 * get-random-element operations. Hash tables will auto resize if needed 
 * tables of power of two in size are used, collisions are handled by 
 * chaining. See the source code for more information... :) 
 * 
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com> 
 * All rights reserved. 
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met: 
 * 
 *   * Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *   * Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the 
 *     documentation and/or other materials provided with the distribution. 
 *   * Neither the name of Redis nor the names of its contributors may be used 
 *     to endorse or promote products derived from this software without 
 *     specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 * POSSIBILITY OF SUCH DAMAGE. 
 */ 
 
#include "fmacros.h" 
 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <stdarg.h> 
#include <limits.h> 
#include <sys/time.h> 
#include <ctype.h> 
 
#include "dict.h" 
#include "zmalloc.h" 
#include "redisassert.h" 
 
/* Using dictEnableResize() / dictDisableResize() we make possible to 
 * enable/disable resizing of the hash table as needed. This is very important 
 * for Redis, as we use copy-on-write and don't want to move too much memory 
 * around when there is a child performing saving operations. 
 * 
 * Note that even when dict_can_resize is set to 0, not all resizes are 
 * prevented: a hash table is still allowed to grow if the ratio between 
 * the number of elements and the buckets > dict_force_resize_ratio. */ 
/* redis用了dictEnableResize() / dictDisableResize()方法可以重新调整哈希表的长度, 
 *因为redis采用的是写时复制的算法,不会挪动太多的内存,只有当调整数量大于一定比例才可能有效 */ 
static int dict_can_resize = 1;  
static unsigned int dict_force_resize_ratio = 5;  
 
/* -------------------------- private prototypes ---------------------------- */ 
/* 私有方法 */ 
static int _dictExpandIfNeeded(dict *ht);    //字典是否需要扩展 
static unsigned long _dictNextPower(unsigned long size);  
static int _dictKeyIndex(dict *ht, const void *key);  
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);  //字典初始化方法 
 
/* -------------------------- hash functions -------------------------------- */ 
/* 哈希索引计算的方法 */ 
 
/* Thomas Wang's 32 bit Mix Function */ 
/* Thomas Wang's 32 bit Mix 的哈希算法直接输入key值,获取索引值,据说这种冲突的概率很低 */ 
unsigned int dictIntHashFunction(unsigned int key)  
{  
    key += ~(key << 15);  
    key ^=  (key >> 10);  
    key +=  (key << 3);  
    key ^=  (key >> 6);  
    key += ~(key << 11);  
    key ^=  (key >> 16);  
 return key;  
}  
 
//哈希方法种子,跟产生随机数的种子作用应该是一样的 
static uint32_t dict_hash_function_seed = 5381;  
 
/* 重设哈希种子 */ 
void dictSetHashFunctionSeed(uint32_t seed) {  
    dict_hash_function_seed = seed;  
}  
 
/* 获取哈希种子 */ 
uint32_t dictGetHashFunctionSeed(void) {  
 return dict_hash_function_seed;  
}  
 
/* MurmurHash2, by Austin Appleby 
 * Note - This code makes a few assumptions about how your machine behaves - 
 * 1. We can read a 4-byte value from any address without crashing 
 * 2. sizeof(int) == 4 
 * 
 * And it has a few limitations - 
 * 
 * 1. It will not work incrementally. 
 * 2. It will not produce the same results on little-endian and big-endian 
 *    machines. 
 */ 
/* 输入的key值,目标长度,此方法帮你计算出索引值,此方法特别表明, 
 *  不会因为机器之间高低位存储的不同而产生相同的结果 */ 
unsigned int dictGenHashFunction(const void *key, int len) {  
 /* 'm' and 'r' are mixing constants generated offline. 
     They're not really 'magic', they just happen to work well.  */ 
 //seed种子,m,r的值都将会参与到计算中 
    uint32_t seed = dict_hash_function_seed;  
 const uint32_t m = 0x5bd1e995;  
 const int r = 24;  
 
 /* Initialize the hash to a 'random' value */ 
    uint32_t h = seed ^ len;  
 
 /* Mix 4 bytes at a time into the hash */ 
 const unsigned char *data = (const unsigned char *)key;  
 
 while(len >= 4) {  
        uint32_t k = *(uint32_t*)data;  
 
        k *= m;  
        k ^= k >> r;  
        k *= m;  
 
        h *= m;  
        h ^= k;  
 
        data += 4;  
        len -= 4;  
    }  
 
 /* Handle the last few bytes of the input array  */ 
 switch(len) {  
 case 3: h ^= data[2] << 16;  
 case 2: h ^= data[1] << 8;  
 case 1: h ^= data[0]; h *= m;  
    };  
 
 /* Do a few final mixes of the hash to ensure the last few 
     * bytes are well-incorporated. */ 
    h ^= h >> 13;  
    h *= m;  
    h ^= h >> 15;  
 
 return (unsigned int)h;  
}  
 
/* And a case insensitive hash function (based on djb hash) */ 
/* 这里提供了一种比较简单的哈希算法 */ 
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {  
 //以djb hash为基础,俗称“times33”就是不断的乘33 
 //几乎所有的流行的hash map都采用了DJB hash function 
    unsigned int hash = (unsigned int)dict_hash_function_seed;  
 
 while (len--)  
        hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */ 
 return hash;  
}  
 
/* ----------------------------- API implementation ------------------------- */ 
 
/* Reset a hash table already initialized with ht_init(). 
 * NOTE: This function should only be called by ht_destroy(). */ 
/* 重置哈希表方法,只在ht_destroy时使用 */ 
static void _dictReset(dictht *ht)  
{  
 //清空相应的变量,ht->table的类型其实是dictEntry,叫table名字太有歧义了 
    ht->table = NULL;  
    ht->size = 0;  
    ht->sizemask = 0;  
    ht->used = 0;  
}  
 
/* Create a new hash table */ 
/* 创建dict操作类 */ 
dict *dictCreate(dictType *type,  
 void *privDataPtr)  
{  
    dict *d = zmalloc(sizeof(*d));  
 
 //创建好空间之后调用初始化方法 
    _dictInit(d,type,privDataPtr);  
 return d;  
}  
 
/* Initialize the hash table */ 
/* 初始化dict类中的type,ht等变量 */ 
int _dictInit(dict *d, dictType *type,  
 void *privDataPtr)  
{  
 //重置2个ht哈希表 
    _dictReset(&d->ht[0]);  
    _dictReset(&d->ht[1]);  
 //赋值dictType 
    d->type = type;  
    d->privdata = privDataPtr;  
 //-1代表还没有rehash过, 
    d->rehashidx = -1;  
 //当前使用中的迭代器为0 
    d->iterators = 0;  
 
 //返回DICT_OK,代表初始化成功 
 return DICT_OK;  
}  
 
/* Resize the table to the minimal size that contains all the elements, 
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ 
/* 调整哈希表,用最少的值容纳所有的字典集合 */ 
int dictResize(dict *d)  
{  
 int minimal;  
 
 //如果系统默认调整值不大于0或已经调rehash过的就提示出错,拒绝操作 
 if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;  
 
 //最少数等于哈希标准鸿正在使用的数 
    minimal = d->ht[0].used;  
 if (minimal < DICT_HT_INITIAL_SIZE)  
        minimal = DICT_HT_INITIAL_SIZE;  
 
 //调用expand扩容 
 return dictExpand(d, minimal);  
}  
 
/* Expand or create the hash table */ 
/* 哈希表扩增方法 */ 
int dictExpand(dict *d, unsigned long size)  
{  
    dictht n; /* the new hash table */ 
 //获取调整值,以2的幂次向上取 
    unsigned long realsize = _dictNextPower(size);  
 
 /* the size is invalid if it is smaller than the number of 
     * elements already inside the hash table */ 
 //再次判断数量符合不符合 
 if (dictIsRehashing(d) || d->ht[0].used > size)  
 return DICT_ERR;  
 
 /* Allocate the new hash table and initialize all pointers to NULL */ 
 //初始化大小 
    n.size = realsize;  
    n.sizemask = realsize-1;  
 //为表格申请realsize个字典集的大小 
    n.table = zcalloc(realsize*sizeof(dictEntry*));  
    n.used = 0;  
 
 /* Is this the first initialization? If so it's not really a rehashing 
     * we just set the first hash table so that it can accept keys. */ 
 if (d->ht[0].table == NULL) {  
        d->ht[0] = n;  
 return DICT_OK;  
    }  
 
 /* Prepare a second hash table for incremental rehashing */ 
 //赋值给第二张表格 
    d->ht[1] = n;  
    d->rehashidx = 0;  
 return DICT_OK;  
}  
 
/* Performs N steps of incremental rehashing. Returns 1 if there are still 
 * keys to move from the old to the new hash table, otherwise 0 is returned. 
 * Note that a rehashing step consists in moving a bucket (that may have more 
 * than one key as we use chaining) from the old to the new hash table. */ 
/* hash重定位,主要从旧的表映射到新表中 
 * 如果返回1说明旧的表中还存在key迁移到新表中,0代表没有 */ 
int dictRehash(dict *d, int n) {  
 if (!dictIsRehashing(d)) return 0;  
 
 /* 根据参数分n步多次循环操作 */ 
 while(n--) {  
        dictEntry *de, *nextde;  
 
 /* Check if we already rehashed the whole table... */ 
 if (d->ht[0].used == 0) {  
            zfree(d->ht[0].table);  
            d->ht[0] = d->ht[1];  
            _dictReset(&d->ht[1]);  
            d->rehashidx = -1;  
 return 0;  
        }  
 
 /* Note that rehashidx can't overflow as we are sure there are more 
         * elements because ht[0].used != 0 */ 
        assert(d->ht[0].size > (unsigned long)d->rehashidx);  
 while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;  
        de = d->ht[0].table[d->rehashidx];  
 /* Move all the keys in this bucket from the old to the new hash HT */ 
 /* 移动的关键操作 */ 
 while(de) {  
            unsigned int h;  
 
            nextde = de->next;  
 /* Get the index in the new hash table */ 
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;  
            de->next = d->ht[1].table[h];  
            d->ht[1].table[h] = de;  
            d->ht[0].used--;  
            d->ht[1].used++;  
            de = nextde;  
        }  
        d->ht[0].table[d->rehashidx] = NULL;  
        d->rehashidx++;  
    }  
 return 1;  
}  
 
/* 获取当前毫秒的时间 */ 
long long timeInMilliseconds(void) {  
 struct timeval tv;  
 
    gettimeofday(&tv,NULL);  
 return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);  
}  
 
/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */ 
/* 在给定时间内,循环执行哈希重定位 */ 
int dictRehashMilliseconds(dict *d, int ms) {  
 long long start = timeInMilliseconds();  
 int rehashes = 0;  
 
 while(dictRehash(d,100)) {  
 //重定位的次数累加 
        rehashes += 100;  
 //时间超出给定时间范围,则终止 
 if (timeInMilliseconds()-start > ms) break;  
    }  
 return rehashes;  
}

原文发布于微信公众号 - 高性能服务器开发(easyserverdev)

原文发表时间:2018-04-02

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏GIS讲堂

geotools获取给定点的DEM高程值

1、在web端绘制一条曲线; 2、获取各节点处的高程值; 3、根据高程值绘制高程堆积图。

20650
来自专栏高性能服务器开发

(五)sparkline微线图

sparkline这个单词,我第一次看的时候,也不知道这什么意思啊,以前根本没听过啊,但是这真真实实的出现在了redis的代码中了,刚刚开始以为这也是属于普通...

393120
来自专栏PPV课数据科学社区

【学习】七天搞定SAS(二):基本操作(判断、运算、基本函数)

? 今天开始注重变量操作。 SAS生成新变量 SAS支持基本的加减乘除,值得一提的是它的**代表指数,而不是^。* Modify homegarden dat...

49140
来自专栏计算机视觉与深度学习基础

Leetcode 题目列表(难度、出现频率、知识点)

不全,但好像没看到有更好的版本,刷前132题暂时凑合着用吧! 转载自:LeetCode Question Difficulty Distribution ?...

45760
来自专栏机器学习与自然语言处理

03-树3. Tree Traversals Again (25)将先序遍历和中序遍历转为后序遍历

03-树3. Tree Traversals Again (25) 题目来源:http://www.patest.cn/contests/mooc-ds/03-...

35490
来自专栏yl 成长笔记

web 上读取图片,并转化为指定格式

9710
来自专栏葡萄城控件技术团队

深入浅出OOP(三): 多态和继承(动态绑定/运行时多态)

在前面的文章中,我们介绍了编译期多态、params关键字、实例化、base关键字等。本节我们来关注另外一种多态:运行时多态, 运行时多态也叫迟绑定。 运行时多态...

20480
来自专栏码匠的流水账

聊聊storm的IEventLogger

storm-2.0.0/storm-client/src/jvm/org/apache/storm/metric/IEventLogger.java

14730
来自专栏YG小书屋

orc文件格式对常用系统的支持

24530
来自专栏拂晓风起

cocos2d-js Shader系列2:在cc.Sprite上使用Shader(黑白、灰度、造旧效果)

17040

扫码关注云+社区

领取腾讯云代金券