(三)dict哈希结构3

  1. /* This function performs just a step of rehashing, and only if there are
  2. * no safe iterators bound to our hash table. When we have iterators in the
  3. * middle of a rehashing we can't mess with the two hash tables otherwise
  4. * some element can be missed or duplicated.
  5. *
  6. * This function is called by common lookup or update operations in the
  7. * dictionary so that the hash table automatically migrates from H1 to H2
  8. * while it is actively used. */
  9. /* 当没有迭代器时候,进行重定位算法 */
  10. static void _dictRehashStep(dict *d) {
  11. if (d->iterators == 0) dictRehash(d,1);
  12. }
  13. /* Add an element to the target hash table */
  14. /* 添加一个dicEntry */
  15. int dictAdd(dict *d, void *key, void *val)
  16. {
  17. dictEntry *entry = dictAddRaw(d,key);
  18. if (!entry) return DICT_ERR;
  19. dictSetVal(d, entry, val);
  20. return DICT_OK;
  21. }
  22. /* Low level add. This function adds the entry but instead of setting
  23. * a value returns the dictEntry structure to the user, that will make
  24. * sure to fill the value field as he wishes.
  25. *
  26. * This function is also directly exposed to user API to be called
  27. * mainly in order to store non-pointers inside the hash value, example:
  28. *
  29. * entry = dictAddRaw(dict,mykey);
  30. * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
  31. *
  32. * Return values:
  33. *
  34. * If key already exists NULL is returned.
  35. * If key was added, the hash entry is returned to be manipulated by the caller.
  36. */
  37. /* 添加一个指定key值的Entry */
  38. dictEntry *dictAddRaw(dict *d, void *key)
  39. {
  40. int index;
  41. dictEntry *entry;
  42. dictht *ht;
  43. if (dictIsRehashing(d)) _dictRehashStep(d);
  44. /* Get the index of the new element, or -1 if
  45. * the element already exists. */
  46. /* 如果指定的key已经存在,则直接返回NULL说明添加失败 */
  47. if ((index = _dictKeyIndex(d, key)) == -1)
  48. return NULL;
  49. /* Allocate the memory and store the new entry */
  50. ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
  51. entry = zmalloc(sizeof(*entry));
  52. entry->next = ht->table[index];
  53. ht->table[index] = entry;
  54. ht->used++;
  55. /* Set the hash entry fields. */
  56. dictSetKey(d, entry, key);
  57. return entry;
  58. }
  59. /* Add an element, discarding the old if the key already exists.
  60. * Return 1 if the key was added from scratch, 0 if there was already an
  61. * element with such key and dictReplace() just performed a value update
  62. * operation. */
  63. /* 替换一个子字典集,如果不存在直接添加,存在,覆盖val的值 */
  64. int dictReplace(dict *d, void *key, void *val)
  65. {
  66. dictEntry *entry, auxentry;
  67. /* Try to add the element. If the key
  68. * does not exists dictAdd will suceed. */
  69. //不存在,这个key直接添加
  70. if (dictAdd(d, key, val) == DICT_OK)
  71. return 1;
  72. /* It already exists, get the entry */
  73. entry = dictFind(d, key);
  74. /* Set the new value and free the old one. Note that it is important
  75. * to do that in this order, as the value may just be exactly the same
  76. * as the previous one. In this context, think to reference counting,
  77. * you want to increment (set), and then decrement (free), and not the
  78. * reverse. */
  79. //赋值方法
  80. auxentry = *entry;
  81. dictSetVal(d, entry, val);
  82. dictFreeVal(d, &auxentry);
  83. return 0;
  84. }
  85. /* dictReplaceRaw() is simply a version of dictAddRaw() that always
  86. * returns the hash entry of the specified key, even if the key already
  87. * exists and can't be added (in that case the entry of the already
  88. * existing key is returned.)
  89. *
  90. * See dictAddRaw() for more information. */
  91. /* 添加字典,没有函数方法,如果存在,就不添加 */
  92. dictEntry *dictReplaceRaw(dict *d, void *key) {
  93. dictEntry *entry = dictFind(d,key);
  94. return entry ? entry : dictAddRaw(d,key);
  95. }
  96. /* Search and remove an element */
  97. /* 删除给定key的结点,可控制是否调用释放方法 */
  98. static int dictGenericDelete(dict *d, const void *key, int nofree)
  99. {
  100. unsigned int h, idx;
  101. dictEntry *he, *prevHe;
  102. int table;
  103. if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
  104. if (dictIsRehashing(d)) _dictRehashStep(d);
  105. //计算key对应的哈希索引
  106. h = dictHashKey(d, key);
  107. for (table = 0; table <= 1; table++) {
  108. idx = h & d->ht[table].sizemask;
  109. //找到具体的索引对应的结点
  110. he = d->ht[table].table[idx];
  111. prevHe = NULL;
  112. while(he) {
  113. if (dictCompareKeys(d, key, he->key)) {
  114. /* Unlink the element from the list */
  115. if (prevHe)
  116. prevHe->next = he->next;
  117. else
  118. d->ht[table].table[idx] = he->next;
  119. if (!nofree) {
  120. //判断是否需要调用dict定义的free方法
  121. dictFreeKey(d, he);
  122. dictFreeVal(d, he);
  123. }
  124. zfree(he);
  125. d->ht[table].used--;
  126. return DICT_OK;
  127. }
  128. prevHe = he;
  129. he = he->next;
  130. }
  131. if (!dictIsRehashing(d)) break;
  132. }
  133. return DICT_ERR; /* not found */
  134. }
  135. /* 会调用free方法的删除方法 */
  136. int dictDelete(dict *ht, const void *key) {
  137. return dictGenericDelete(ht,key,0);
  138. }
  139. /* 不会调用free方法的删除方法 */
  140. int dictDeleteNoFree(dict *ht, const void *key) {
  141. return dictGenericDelete(ht,key,1);
  142. }
  143. /* Destroy an entire dictionary */
  144. /* 清空整个哈希表 */
  145. int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
  146. unsigned long i;
  147. /* Free all the elements */
  148. for (i = 0; i < ht->size && ht->used > 0; i++) {
  149. dictEntry *he, *nextHe;
  150. //每次情况会调用回调方法
  151. if (callback && (i & 65535) == 0) callback(d->privdata);
  152. if ((he = ht->table[i]) == NULL) continue;
  153. while(he) {
  154. //依次释放结点
  155. nextHe = he->next;
  156. dictFreeKey(d, he);
  157. dictFreeVal(d, he);
  158. zfree(he);
  159. ht->used--;
  160. he = nextHe;
  161. }
  162. }
  163. /* Free the table and the allocated cache structure */
  164. zfree(ht->table);
  165. /* Re-initialize the table */
  166. _dictReset(ht);
  167. return DICT_OK; /* never fails */
  168. }
  169. /* Clear & Release the hash table */
  170. /* 重置字典总类,清空2张表 */
  171. void dictRelease(dict *d)
  172. {
  173. _dictClear(d,&d->ht[0],NULL);
  174. _dictClear(d,&d->ht[1],NULL);
  175. zfree(d);
  176. }
  177. /* 根据key返回具体的字典集 */
  178. dictEntry *dictFind(dict *d, const void *key)
  179. {
  180. dictEntry *he;
  181. unsigned int h, idx, table;
  182. if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
  183. if (dictIsRehashing(d)) _dictRehashStep(d);
  184. h = dictHashKey(d, key);
  185. for (table = 0; table <= 1; table++) {
  186. idx = h & d->ht[table].sizemask;
  187. he = d->ht[table].table[idx];
  188. while(he) {
  189. if (dictCompareKeys(d, key, he->key))
  190. return he;
  191. he = he->next;
  192. }
  193. if (!dictIsRehashing(d)) return NULL;
  194. }
  195. return NULL;
  196. }
  197. /* 获取目标字典集的方法 */
  198. void *dictFetchValue(dict *d, const void *key) {
  199. dictEntry *he;
  200. he = dictFind(d,key);
  201. /* 获取字典集的方法 */
  202. return he ? dictGetVal(he) : NULL;
  203. }
  204. /* A fingerprint is a 64 bit number that represents the state of the dictionary
  205. * at a given time, it's just a few dict properties xored together.
  206. * When an unsafe iterator is initialized, we get the dict fingerprint, and check
  207. * the fingerprint again when the iterator is released.
  208. * If the two fingerprints are different it means that the user of the iterator
  209. * performed forbidden operations against the dictionary while iterating. */
  210. /* 通过指纹来禁止每个不安全的哈希迭代器的非法操作,每个不安全迭代器只能有一个指纹 */
  211. long long dictFingerprint(dict *d) {
  212. long long integers[6], hash = 0;
  213. int j;
  214. integers[0] = (long) d->ht[0].table;
  215. integers[1] = d->ht[0].size;
  216. integers[2] = d->ht[0].used;
  217. integers[3] = (long) d->ht[1].table;
  218. integers[4] = d->ht[1].size;
  219. integers[5] = d->ht[1].used;
  220. /* We hash N integers by summing every successive integer with the integer
  221. * hashing of the previous sum. Basically:
  222. *
  223. * Result = hash(hash(hash(int1)+int2)+int3) ...
  224. *
  225. * This way the same set of integers in a different order will (likely) hash
  226. * to a different number. */
  227. for (j = 0; j < 6; j++) {
  228. hash += integers[j];
  229. /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
  230. hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
  231. hash = hash ^ (hash >> 24);
  232. hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
  233. hash = hash ^ (hash >> 14);
  234. hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
  235. hash = hash ^ (hash >> 28);
  236. hash = hash + (hash << 31);
  237. }
  238. return hash;
  239. }
  240. /* 获取哈希迭代器,默认不安全的 */
  241. dictIterator *dictGetIterator(dict *d)
  242. {
  243. dictIterator *iter = zmalloc(sizeof(*iter));
  244. iter->d = d;
  245. iter->table = 0;
  246. iter->index = -1;
  247. iter->safe = 0;
  248. iter->entry = NULL;
  249. iter->nextEntry = NULL;
  250. return iter;
  251. }
  252. /* 获取安全哈希迭代器 */
  253. dictIterator *dictGetSafeIterator(dict *d) {
  254. dictIterator *i = dictGetIterator(d);
  255. i->safe = 1;
  256. return i;
  257. }
  258. /* 迭代器获取下一个集合点 */
  259. dictEntry *dictNext(dictIterator *iter)
  260. {
  261. while (1) {
  262. if (iter->entry == NULL) {
  263. dictht *ht = &iter->d->ht[iter->table];
  264. if (iter->index == -1 && iter->table == 0) {
  265. //如果迭代器index下标为-1说明还没开始使用,设置迭代器的指纹或增加引用计数量
  266. if (iter->safe)
  267. iter->d->iterators++;
  268. else
  269. iter->fingerprint = dictFingerprint(iter->d);
  270. }
  271. //迭代器下标递增
  272. iter->index++;
  273. if (iter->index >= (long) ht->size) {
  274. if (dictIsRehashing(iter->d) && iter->table == 0) {
  275. iter->table++;
  276. iter->index = 0;
  277. ht = &iter->d->ht[1];
  278. } else {
  279. break;
  280. }
  281. }
  282. //根据下标选择集合点
  283. iter->entry = ht->table[iter->index];
  284. } else {
  285. iter->entry = iter->nextEntry;
  286. }
  287. if (iter->entry) {
  288. /* We need to save the 'next' here, the iterator user
  289. * may delete the entry we are returning. */
  290. iter->nextEntry = iter->entry->next;
  291. return iter->entry;
  292. }
  293. }
  294. return NULL;
  295. }
  296. /* 释放迭代器 */
  297. void dictReleaseIterator(dictIterator *iter)
  298. {
  299. if (!(iter->index == -1 && iter->table == 0)) {
  300. if (iter->safe)
  301. iter->d->iterators--;
  302. else
  303. //这时判断指纹是否还是之前定义的那个
  304. assert(iter->fingerprint == dictFingerprint(iter->d));
  305. }
  306. zfree(iter);
  307. }
  308. /* Return a random entry from the hash table. Useful to
  309. * implement randomized algorithms */
  310. /* 随机获取一个集合点 */
  311. dictEntry *dictGetRandomKey(dict *d)
  312. {
  313. dictEntry *he, *orighe;
  314. unsigned int h;
  315. int listlen, listele;
  316. if (dictSize(d) == 0) return NULL;
  317. if (dictIsRehashing(d)) _dictRehashStep(d);
  318. if (dictIsRehashing(d)) {
  319. do {
  320. //随机数向2个表格的总数求余运算
  321. h = random() % (d->ht[0].size+d->ht[1].size);
  322. he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
  323. d->ht[0].table[h];
  324. } while(he == NULL);
  325. } else {
  326. do {
  327. h = random() & d->ht[0].sizemask;
  328. he = d->ht[0].table[h];
  329. } while(he == NULL);
  330. }
  331. /* Now we found a non empty bucket, but it is a linked
  332. * list and we need to get a random element from the list.
  333. * The only sane way to do so is counting the elements and
  334. * select a random index. */
  335. listlen = 0;
  336. orighe = he;
  337. while(he) {
  338. he = he->next;
  339. listlen++;
  340. }
  341. listele = random() % listlen;
  342. he = orighe;
  343. while(listele--) he = he->next;
  344. return he;
  345. }
  346. /* Function to reverse bits. Algorithm from:
  347. * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
  348. /* 很神奇的翻转位 */
  349. static unsigned long rev(unsigned long v) {
  350. unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
  351. unsigned long mask = ~0;
  352. while ((s >>= 1) > 0) {
  353. mask ^= (mask << s);
  354. v = ((v >> s) & mask) | ((v << s) & ~mask);
  355. }
  356. return v;
  357. }
  358. /* dictScan() is used to iterate over the elements of a dictionary.
  359. *
  360. * Iterating works in the following way:
  361. *
  362. * 1) Initially you call the function using a cursor (v) value of 0.
  363. * 2) The function performs one step of the iteration, and returns the
  364. * new cursor value that you must use in the next call.
  365. * 3) When the returned cursor is 0, the iteration is complete.
  366. *
  367. * The function guarantees that all the elements that are present in the
  368. * dictionary from the start to the end of the iteration are returned.
  369. * However it is possible that some element is returned multiple time.
  370. *
  371. * For every element returned, the callback 'fn' passed as argument is
  372. * called, with 'privdata' as first argument and the dictionar entry
  373. * 'de' as second argument.
  374. *
  375. * HOW IT WORKS.
  376. *
  377. * The algorithm used in the iteration was designed by Pieter Noordhuis.
  378. * The main idea is to increment a cursor starting from the higher order
  379. * bits, that is, instead of incrementing the cursor normally, the bits
  380. * of the cursor are reversed, then the cursor is incremented, and finally
  381. * the bits are reversed again.
  382. *
  383. * This strategy is needed because the hash table may be resized from one
  384. * call to the other call of the same iteration.
  385. *
  386. * dict.c hash tables are always power of two in size, and they
  387. * use chaining, so the position of an element in a given table is given
  388. * always by computing the bitwise AND between Hash(key) and SIZE-1
  389. * (where SIZE-1 is always the mask that is equivalent to taking the rest
  390. * of the division between the Hash of the key and SIZE).
  391. *
  392. * For example if the current hash table size is 16, the mask is
  393. * (in binary) 1111. The position of a key in the hash table will be always
  394. * the last four bits of the hash output, and so forth.
  395. *
  396. * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
  397. *
  398. * If the hash table grows, elements can go anyway in one multiple of
  399. * the old bucket: for example let's say that we already iterated with
  400. * a 4 bit cursor 1100, since the mask is 1111 (hash table size = 16).
  401. *
  402. * If the hash table will be resized to 64 elements, and the new mask will
  403. * be 111111, the new buckets that you obtain substituting in ??1100
  404. * either 0 or 1, can be targeted only by keys that we already visited
  405. * when scanning the bucket 1100 in the smaller hash table.
  406. *
  407. * By iterating the higher bits first, because of the inverted counter, the
  408. * cursor does not need to restart if the table size gets bigger, and will
  409. * just continue iterating with cursors that don't have '1100' at the end,
  410. * nor any other combination of final 4 bits already explored.
  411. *
  412. * Similarly when the table size shrinks over time, for example going from
  413. * 16 to 8, If a combination of the lower three bits (the mask for size 8
  414. * is 111) was already completely explored, it will not be visited again
  415. * as we are sure that, we tried for example, both 0111 and 1111 (all the
  416. * variations of the higher bit) so we don't need to test it again.
  417. *
  418. * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
  419. *
  420. * Yes, this is true, but we always iterate the smaller one of the tables,
  421. * testing also all the expansions of the current cursor into the larger
  422. * table. So for example if the current cursor is 101 and we also have a
  423. * larger table of size 16, we also test (0)101 and (1)101 inside the larger
  424. * table. This reduces the problem back to having only one table, where
  425. * the larger one, if exists, is just an expansion of the smaller one.
  426. *
  427. * LIMITATIONS
  428. *
  429. * This iterator is completely stateless, and this is a huge advantage,
  430. * including no additional memory used.
  431. *
  432. * The disadvantages resulting from this design are:
  433. *
  434. * 1) It is possible that we return duplicated elements. However this is usually
  435. * easy to deal with in the application level.
  436. * 2) The iterator must return multiple elements per call, as it needs to always
  437. * return all the keys chained in a given bucket, and all the expansions, so
  438. * we are sure we don't miss keys moving.
  439. * 3) The reverse cursor is somewhat hard to understand at first, but this
  440. * comment is supposed to help.
  441. */
  442. /* 扫描方法 */
  443. unsigned long dictScan(dict *d,
  444. unsigned long v,
  445. dictScanFunction *fn,
  446. void *privdata)
  447. {
  448. dictht *t0, *t1;
  449. const dictEntry *de;
  450. unsigned long m0, m1;
  451. if (dictSize(d) == 0) return 0;
  452. if (!dictIsRehashing(d)) {
  453. t0 = &(d->ht[0]);
  454. m0 = t0->sizemask;
  455. /* Emit entries at cursor */
  456. de = t0->table[v & m0];
  457. while (de) {
  458. fn(privdata, de);
  459. de = de->next;
  460. }
  461. } else {
  462. t0 = &d->ht[0];
  463. t1 = &d->ht[1];
  464. /* Make sure t0 is the smaller and t1 is the bigger table */
  465. if (t0->size > t1->size) {
  466. t0 = &d->ht[1];
  467. t1 = &d->ht[0];
  468. }
  469. m0 = t0->sizemask;
  470. m1 = t1->sizemask;
  471. /* Emit entries at cursor */
  472. de = t0->table[v & m0];
  473. while (de) {
  474. fn(privdata, de);
  475. de = de->next;
  476. }
  477. /* Iterate over indices in larger table that are the expansion
  478. * of the index pointed to by the cursor in the smaller table */
  479. do {
  480. /* Emit entries at cursor */
  481. de = t1->table[v & m1];
  482. while (de) {
  483. fn(privdata, de);
  484. de = de->next;
  485. }
  486. /* Increment bits not covered by the smaller mask */
  487. v = (((v | m0) + 1) & ~m0) | (v & m0);
  488. /* Continue while bits covered by mask difference is non-zero */
  489. } while (v & (m0 ^ m1));
  490. }
  491. /* Set unmasked bits so incrementing the reversed cursor
  492. * operates on the masked bits of the smaller table */
  493. v |= ~m0;
  494. /* Increment the reverse cursor */
  495. v = rev(v);
  496. v++;
  497. v = rev(v);
  498. return v;
  499. }

原文发布于微信公众号 - 高性能服务器开发(easyserverdev)

原文发表时间:2018-04-05

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏IT笔记

SpringBoot开发案例之整合Spring-data-jpa进阶篇

前言 有人说 从 jdbc->jdbctemplate->hibernation/mybatis 再到 jpa,真当开发人员的学习时间不要钱?我觉得到 h/m ...

3267
来自专栏

Cassandra API60 Java 代码示例

文档地址 http://wiki.apache.org/cassandra/API06,实现了绝大部分示例

1974
来自专栏函数式编程语言及工具

SDP(3):ScalikeJDBC- JDBC-Engine:Fetching

  ScalikeJDBC在覆盖JDBC基本功能上是比较完整的,而且实现这些功能的方式比较简洁,运算效率方面自然会稍高一筹了。理论上用ScalikeJDBC作为...

3825
来自专栏技术之路

设计模式:装饰者模式

  装饰者模式么,在生活中我们是经常接触的。比如像我们这么快节奏的生活,好多都是早上去买煎饼。一般我们会这么说:“来一个粗粮煎饼,加两个鸡蛋,加一根肠” 或者:...

2108
来自专栏程序员同行者

python 解析json loads dumps

1412
来自专栏GIS讲堂

geotools中shp和geojson格式的相互转换

9203
来自专栏函数式编程语言及工具

SDP(9):MongoDB-Scala - data access and modeling

    MongoDB是一种文件型数据库,对数据格式没有硬性要求,所以可以实现灵活多变的数据存储和读取。MongoDB又是一种分布式数据库,与传统关系数据库不同...

3934
来自专栏跟着阿笨一起玩NET

JSON入门

JSON:JavaScript Object Notation 【JavaScript 对象表示法】

973
来自专栏葡萄城控件技术团队

深入浅出OOP(五): C#访问修饰符(Public/Private/Protected/Internal/Sealed/Constants)

访问修饰符(或者叫访问控制符)是面向对象语言的特性之一,用于对类、类成员函数、类成员变量进行访问控制。同时,访问控制符也是语法保留关键字,用于封装组件。 Pub...

3299
来自专栏小樱的经验随笔

BZOJ 1012: [JSOI2008]最大数maxnumber【线段树单点更新求最值,单调队列,多解】

1012: [JSOI2008]最大数maxnumber Time Limit: 3 Sec  Memory Limit: 162 MB Submit: 103...

3036

扫码关注云+社区

领取腾讯云代金券