前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >(三)dict哈希结构3

(三)dict哈希结构3

作者头像
范蠡
发布2018-04-13 15:05:11
5690
发布2018-04-13 15:05:11
举报
  1. /* This function performs just a step of rehashing, and only if there are
  2. * no safe iterators bound to our hash table. When we have iterators in the
  3. * middle of a rehashing we can't mess with the two hash tables otherwise
  4. * some element can be missed or duplicated.
  5. *
  6. * This function is called by common lookup or update operations in the
  7. * dictionary so that the hash table automatically migrates from H1 to H2
  8. * while it is actively used. */
  9. /* 当没有迭代器时候,进行重定位算法 */
  10. static void _dictRehashStep(dict *d) {
  11. if (d->iterators == 0) dictRehash(d,1);
  12. }
  13. /* Add an element to the target hash table */
  14. /* 添加一个dicEntry */
  15. int dictAdd(dict *d, void *key, void *val)
  16. {
  17. dictEntry *entry = dictAddRaw(d,key);
  18. if (!entry) return DICT_ERR;
  19. dictSetVal(d, entry, val);
  20. return DICT_OK;
  21. }
  22. /* Low level add. This function adds the entry but instead of setting
  23. * a value returns the dictEntry structure to the user, that will make
  24. * sure to fill the value field as he wishes.
  25. *
  26. * This function is also directly exposed to user API to be called
  27. * mainly in order to store non-pointers inside the hash value, example:
  28. *
  29. * entry = dictAddRaw(dict,mykey);
  30. * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
  31. *
  32. * Return values:
  33. *
  34. * If key already exists NULL is returned.
  35. * If key was added, the hash entry is returned to be manipulated by the caller.
  36. */
  37. /* 添加一个指定key值的Entry */
  38. dictEntry *dictAddRaw(dict *d, void *key)
  39. {
  40. int index;
  41. dictEntry *entry;
  42. dictht *ht;
  43. if (dictIsRehashing(d)) _dictRehashStep(d);
  44. /* Get the index of the new element, or -1 if
  45. * the element already exists. */
  46. /* 如果指定的key已经存在,则直接返回NULL说明添加失败 */
  47. if ((index = _dictKeyIndex(d, key)) == -1)
  48. return NULL;
  49. /* Allocate the memory and store the new entry */
  50. ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
  51. entry = zmalloc(sizeof(*entry));
  52. entry->next = ht->table[index];
  53. ht->table[index] = entry;
  54. ht->used++;
  55. /* Set the hash entry fields. */
  56. dictSetKey(d, entry, key);
  57. return entry;
  58. }
  59. /* Add an element, discarding the old if the key already exists.
  60. * Return 1 if the key was added from scratch, 0 if there was already an
  61. * element with such key and dictReplace() just performed a value update
  62. * operation. */
  63. /* 替换一个子字典集,如果不存在直接添加,存在,覆盖val的值 */
  64. int dictReplace(dict *d, void *key, void *val)
  65. {
  66. dictEntry *entry, auxentry;
  67. /* Try to add the element. If the key
  68. * does not exists dictAdd will suceed. */
  69. //不存在,这个key直接添加
  70. if (dictAdd(d, key, val) == DICT_OK)
  71. return 1;
  72. /* It already exists, get the entry */
  73. entry = dictFind(d, key);
  74. /* Set the new value and free the old one. Note that it is important
  75. * to do that in this order, as the value may just be exactly the same
  76. * as the previous one. In this context, think to reference counting,
  77. * you want to increment (set), and then decrement (free), and not the
  78. * reverse. */
  79. //赋值方法
  80. auxentry = *entry;
  81. dictSetVal(d, entry, val);
  82. dictFreeVal(d, &auxentry);
  83. return 0;
  84. }
  85. /* dictReplaceRaw() is simply a version of dictAddRaw() that always
  86. * returns the hash entry of the specified key, even if the key already
  87. * exists and can't be added (in that case the entry of the already
  88. * existing key is returned.)
  89. *
  90. * See dictAddRaw() for more information. */
  91. /* 添加字典,没有函数方法,如果存在,就不添加 */
  92. dictEntry *dictReplaceRaw(dict *d, void *key) {
  93. dictEntry *entry = dictFind(d,key);
  94. return entry ? entry : dictAddRaw(d,key);
  95. }
  96. /* Search and remove an element */
  97. /* 删除给定key的结点,可控制是否调用释放方法 */
  98. static int dictGenericDelete(dict *d, const void *key, int nofree)
  99. {
  100. unsigned int h, idx;
  101. dictEntry *he, *prevHe;
  102. int table;
  103. if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
  104. if (dictIsRehashing(d)) _dictRehashStep(d);
  105. //计算key对应的哈希索引
  106. h = dictHashKey(d, key);
  107. for (table = 0; table <= 1; table++) {
  108. idx = h & d->ht[table].sizemask;
  109. //找到具体的索引对应的结点
  110. he = d->ht[table].table[idx];
  111. prevHe = NULL;
  112. while(he) {
  113. if (dictCompareKeys(d, key, he->key)) {
  114. /* Unlink the element from the list */
  115. if (prevHe)
  116. prevHe->next = he->next;
  117. else
  118. d->ht[table].table[idx] = he->next;
  119. if (!nofree) {
  120. //判断是否需要调用dict定义的free方法
  121. dictFreeKey(d, he);
  122. dictFreeVal(d, he);
  123. }
  124. zfree(he);
  125. d->ht[table].used--;
  126. return DICT_OK;
  127. }
  128. prevHe = he;
  129. he = he->next;
  130. }
  131. if (!dictIsRehashing(d)) break;
  132. }
  133. return DICT_ERR; /* not found */
  134. }
  135. /* 会调用free方法的删除方法 */
  136. int dictDelete(dict *ht, const void *key) {
  137. return dictGenericDelete(ht,key,0);
  138. }
  139. /* 不会调用free方法的删除方法 */
  140. int dictDeleteNoFree(dict *ht, const void *key) {
  141. return dictGenericDelete(ht,key,1);
  142. }
  143. /* Destroy an entire dictionary */
  144. /* 清空整个哈希表 */
  145. int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
  146. unsigned long i;
  147. /* Free all the elements */
  148. for (i = 0; i < ht->size && ht->used > 0; i++) {
  149. dictEntry *he, *nextHe;
  150. //每次情况会调用回调方法
  151. if (callback && (i & 65535) == 0) callback(d->privdata);
  152. if ((he = ht->table[i]) == NULL) continue;
  153. while(he) {
  154. //依次释放结点
  155. nextHe = he->next;
  156. dictFreeKey(d, he);
  157. dictFreeVal(d, he);
  158. zfree(he);
  159. ht->used--;
  160. he = nextHe;
  161. }
  162. }
  163. /* Free the table and the allocated cache structure */
  164. zfree(ht->table);
  165. /* Re-initialize the table */
  166. _dictReset(ht);
  167. return DICT_OK; /* never fails */
  168. }
  169. /* Clear & Release the hash table */
  170. /* 重置字典总类,清空2张表 */
  171. void dictRelease(dict *d)
  172. {
  173. _dictClear(d,&d->ht[0],NULL);
  174. _dictClear(d,&d->ht[1],NULL);
  175. zfree(d);
  176. }
  177. /* 根据key返回具体的字典集 */
  178. dictEntry *dictFind(dict *d, const void *key)
  179. {
  180. dictEntry *he;
  181. unsigned int h, idx, table;
  182. if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
  183. if (dictIsRehashing(d)) _dictRehashStep(d);
  184. h = dictHashKey(d, key);
  185. for (table = 0; table <= 1; table++) {
  186. idx = h & d->ht[table].sizemask;
  187. he = d->ht[table].table[idx];
  188. while(he) {
  189. if (dictCompareKeys(d, key, he->key))
  190. return he;
  191. he = he->next;
  192. }
  193. if (!dictIsRehashing(d)) return NULL;
  194. }
  195. return NULL;
  196. }
  197. /* 获取目标字典集的方法 */
  198. void *dictFetchValue(dict *d, const void *key) {
  199. dictEntry *he;
  200. he = dictFind(d,key);
  201. /* 获取字典集的方法 */
  202. return he ? dictGetVal(he) : NULL;
  203. }
  204. /* A fingerprint is a 64 bit number that represents the state of the dictionary
  205. * at a given time, it's just a few dict properties xored together.
  206. * When an unsafe iterator is initialized, we get the dict fingerprint, and check
  207. * the fingerprint again when the iterator is released.
  208. * If the two fingerprints are different it means that the user of the iterator
  209. * performed forbidden operations against the dictionary while iterating. */
  210. /* 通过指纹来禁止每个不安全的哈希迭代器的非法操作,每个不安全迭代器只能有一个指纹 */
  211. long long dictFingerprint(dict *d) {
  212. long long integers[6], hash = 0;
  213. int j;
  214. integers[0] = (long) d->ht[0].table;
  215. integers[1] = d->ht[0].size;
  216. integers[2] = d->ht[0].used;
  217. integers[3] = (long) d->ht[1].table;
  218. integers[4] = d->ht[1].size;
  219. integers[5] = d->ht[1].used;
  220. /* We hash N integers by summing every successive integer with the integer
  221. * hashing of the previous sum. Basically:
  222. *
  223. * Result = hash(hash(hash(int1)+int2)+int3) ...
  224. *
  225. * This way the same set of integers in a different order will (likely) hash
  226. * to a different number. */
  227. for (j = 0; j < 6; j++) {
  228. hash += integers[j];
  229. /* For the hashing step we use Tomas Wang's 64 bit integer hash. */
  230. hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;
  231. hash = hash ^ (hash >> 24);
  232. hash = (hash + (hash << 3)) + (hash << 8); // hash * 265
  233. hash = hash ^ (hash >> 14);
  234. hash = (hash + (hash << 2)) + (hash << 4); // hash * 21
  235. hash = hash ^ (hash >> 28);
  236. hash = hash + (hash << 31);
  237. }
  238. return hash;
  239. }
  240. /* 获取哈希迭代器,默认不安全的 */
  241. dictIterator *dictGetIterator(dict *d)
  242. {
  243. dictIterator *iter = zmalloc(sizeof(*iter));
  244. iter->d = d;
  245. iter->table = 0;
  246. iter->index = -1;
  247. iter->safe = 0;
  248. iter->entry = NULL;
  249. iter->nextEntry = NULL;
  250. return iter;
  251. }
  252. /* 获取安全哈希迭代器 */
  253. dictIterator *dictGetSafeIterator(dict *d) {
  254. dictIterator *i = dictGetIterator(d);
  255. i->safe = 1;
  256. return i;
  257. }
  258. /* 迭代器获取下一个集合点 */
  259. dictEntry *dictNext(dictIterator *iter)
  260. {
  261. while (1) {
  262. if (iter->entry == NULL) {
  263. dictht *ht = &iter->d->ht[iter->table];
  264. if (iter->index == -1 && iter->table == 0) {
  265. //如果迭代器index下标为-1说明还没开始使用,设置迭代器的指纹或增加引用计数量
  266. if (iter->safe)
  267. iter->d->iterators++;
  268. else
  269. iter->fingerprint = dictFingerprint(iter->d);
  270. }
  271. //迭代器下标递增
  272. iter->index++;
  273. if (iter->index >= (long) ht->size) {
  274. if (dictIsRehashing(iter->d) && iter->table == 0) {
  275. iter->table++;
  276. iter->index = 0;
  277. ht = &iter->d->ht[1];
  278. } else {
  279. break;
  280. }
  281. }
  282. //根据下标选择集合点
  283. iter->entry = ht->table[iter->index];
  284. } else {
  285. iter->entry = iter->nextEntry;
  286. }
  287. if (iter->entry) {
  288. /* We need to save the 'next' here, the iterator user
  289. * may delete the entry we are returning. */
  290. iter->nextEntry = iter->entry->next;
  291. return iter->entry;
  292. }
  293. }
  294. return NULL;
  295. }
  296. /* 释放迭代器 */
  297. void dictReleaseIterator(dictIterator *iter)
  298. {
  299. if (!(iter->index == -1 && iter->table == 0)) {
  300. if (iter->safe)
  301. iter->d->iterators--;
  302. else
  303. //这时判断指纹是否还是之前定义的那个
  304. assert(iter->fingerprint == dictFingerprint(iter->d));
  305. }
  306. zfree(iter);
  307. }
  308. /* Return a random entry from the hash table. Useful to
  309. * implement randomized algorithms */
  310. /* 随机获取一个集合点 */
  311. dictEntry *dictGetRandomKey(dict *d)
  312. {
  313. dictEntry *he, *orighe;
  314. unsigned int h;
  315. int listlen, listele;
  316. if (dictSize(d) == 0) return NULL;
  317. if (dictIsRehashing(d)) _dictRehashStep(d);
  318. if (dictIsRehashing(d)) {
  319. do {
  320. //随机数向2个表格的总数求余运算
  321. h = random() % (d->ht[0].size+d->ht[1].size);
  322. he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
  323. d->ht[0].table[h];
  324. } while(he == NULL);
  325. } else {
  326. do {
  327. h = random() & d->ht[0].sizemask;
  328. he = d->ht[0].table[h];
  329. } while(he == NULL);
  330. }
  331. /* Now we found a non empty bucket, but it is a linked
  332. * list and we need to get a random element from the list.
  333. * The only sane way to do so is counting the elements and
  334. * select a random index. */
  335. listlen = 0;
  336. orighe = he;
  337. while(he) {
  338. he = he->next;
  339. listlen++;
  340. }
  341. listele = random() % listlen;
  342. he = orighe;
  343. while(listele--) he = he->next;
  344. return he;
  345. }
  346. /* Function to reverse bits. Algorithm from:
  347. * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
  348. /* 很神奇的翻转位 */
  349. static unsigned long rev(unsigned long v) {
  350. unsigned long s = 8 * sizeof(v); // bit size; must be power of 2
  351. unsigned long mask = ~0;
  352. while ((s >>= 1) > 0) {
  353. mask ^= (mask << s);
  354. v = ((v >> s) & mask) | ((v << s) & ~mask);
  355. }
  356. return v;
  357. }
  358. /* dictScan() is used to iterate over the elements of a dictionary.
  359. *
  360. * Iterating works in the following way:
  361. *
  362. * 1) Initially you call the function using a cursor (v) value of 0.
  363. * 2) The function performs one step of the iteration, and returns the
  364. * new cursor value that you must use in the next call.
  365. * 3) When the returned cursor is 0, the iteration is complete.
  366. *
  367. * The function guarantees that all the elements that are present in the
  368. * dictionary from the start to the end of the iteration are returned.
  369. * However it is possible that some element is returned multiple time.
  370. *
  371. * For every element returned, the callback 'fn' passed as argument is
  372. * called, with 'privdata' as first argument and the dictionar entry
  373. * 'de' as second argument.
  374. *
  375. * HOW IT WORKS.
  376. *
  377. * The algorithm used in the iteration was designed by Pieter Noordhuis.
  378. * The main idea is to increment a cursor starting from the higher order
  379. * bits, that is, instead of incrementing the cursor normally, the bits
  380. * of the cursor are reversed, then the cursor is incremented, and finally
  381. * the bits are reversed again.
  382. *
  383. * This strategy is needed because the hash table may be resized from one
  384. * call to the other call of the same iteration.
  385. *
  386. * dict.c hash tables are always power of two in size, and they
  387. * use chaining, so the position of an element in a given table is given
  388. * always by computing the bitwise AND between Hash(key) and SIZE-1
  389. * (where SIZE-1 is always the mask that is equivalent to taking the rest
  390. * of the division between the Hash of the key and SIZE).
  391. *
  392. * For example if the current hash table size is 16, the mask is
  393. * (in binary) 1111. The position of a key in the hash table will be always
  394. * the last four bits of the hash output, and so forth.
  395. *
  396. * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
  397. *
  398. * If the hash table grows, elements can go anyway in one multiple of
  399. * the old bucket: for example let's say that we already iterated with
  400. * a 4 bit cursor 1100, since the mask is 1111 (hash table size = 16).
  401. *
  402. * If the hash table will be resized to 64 elements, and the new mask will
  403. * be 111111, the new buckets that you obtain substituting in ??1100
  404. * either 0 or 1, can be targeted only by keys that we already visited
  405. * when scanning the bucket 1100 in the smaller hash table.
  406. *
  407. * By iterating the higher bits first, because of the inverted counter, the
  408. * cursor does not need to restart if the table size gets bigger, and will
  409. * just continue iterating with cursors that don't have '1100' at the end,
  410. * nor any other combination of final 4 bits already explored.
  411. *
  412. * Similarly when the table size shrinks over time, for example going from
  413. * 16 to 8, If a combination of the lower three bits (the mask for size 8
  414. * is 111) was already completely explored, it will not be visited again
  415. * as we are sure that, we tried for example, both 0111 and 1111 (all the
  416. * variations of the higher bit) so we don't need to test it again.
  417. *
  418. * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
  419. *
  420. * Yes, this is true, but we always iterate the smaller one of the tables,
  421. * testing also all the expansions of the current cursor into the larger
  422. * table. So for example if the current cursor is 101 and we also have a
  423. * larger table of size 16, we also test (0)101 and (1)101 inside the larger
  424. * table. This reduces the problem back to having only one table, where
  425. * the larger one, if exists, is just an expansion of the smaller one.
  426. *
  427. * LIMITATIONS
  428. *
  429. * This iterator is completely stateless, and this is a huge advantage,
  430. * including no additional memory used.
  431. *
  432. * The disadvantages resulting from this design are:
  433. *
  434. * 1) It is possible that we return duplicated elements. However this is usually
  435. * easy to deal with in the application level.
  436. * 2) The iterator must return multiple elements per call, as it needs to always
  437. * return all the keys chained in a given bucket, and all the expansions, so
  438. * we are sure we don't miss keys moving.
  439. * 3) The reverse cursor is somewhat hard to understand at first, but this
  440. * comment is supposed to help.
  441. */
  442. /* 扫描方法 */
  443. unsigned long dictScan(dict *d,
  444. unsigned long v,
  445. dictScanFunction *fn,
  446. void *privdata)
  447. {
  448. dictht *t0, *t1;
  449. const dictEntry *de;
  450. unsigned long m0, m1;
  451. if (dictSize(d) == 0) return 0;
  452. if (!dictIsRehashing(d)) {
  453. t0 = &(d->ht[0]);
  454. m0 = t0->sizemask;
  455. /* Emit entries at cursor */
  456. de = t0->table[v & m0];
  457. while (de) {
  458. fn(privdata, de);
  459. de = de->next;
  460. }
  461. } else {
  462. t0 = &d->ht[0];
  463. t1 = &d->ht[1];
  464. /* Make sure t0 is the smaller and t1 is the bigger table */
  465. if (t0->size > t1->size) {
  466. t0 = &d->ht[1];
  467. t1 = &d->ht[0];
  468. }
  469. m0 = t0->sizemask;
  470. m1 = t1->sizemask;
  471. /* Emit entries at cursor */
  472. de = t0->table[v & m0];
  473. while (de) {
  474. fn(privdata, de);
  475. de = de->next;
  476. }
  477. /* Iterate over indices in larger table that are the expansion
  478. * of the index pointed to by the cursor in the smaller table */
  479. do {
  480. /* Emit entries at cursor */
  481. de = t1->table[v & m1];
  482. while (de) {
  483. fn(privdata, de);
  484. de = de->next;
  485. }
  486. /* Increment bits not covered by the smaller mask */
  487. v = (((v | m0) + 1) & ~m0) | (v & m0);
  488. /* Continue while bits covered by mask difference is non-zero */
  489. } while (v & (m0 ^ m1));
  490. }
  491. /* Set unmasked bits so incrementing the reversed cursor
  492. * operates on the masked bits of the smaller table */
  493. v |= ~m0;
  494. /* Increment the reverse cursor */
  495. v = rev(v);
  496. v++;
  497. v = rev(v);
  498. return v;
  499. }
本文参与 腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2018-04-05,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 高性能服务器开发 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档