1024555-20161113235348670-746615111.png
public V put(K key, V value) { if (table == EMPTY_TABLE) { inflateTable(threshold); } if (key == null) return putForNullKey(value); int hash = sun.misc.Hashing.singleWordWangJenkinsHash(key); int i = indexFor(hash, table.length); for (HashMapEntry<K,V> e = table[i]; e != null; e = e.next) { Object k; if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; addEntry(hash, key, value, i); return null; }
首先,先看下inflateTable方法,这个是初始化HashMap里面的线性表的空间:
private void inflateTable(int toSize) { // Find a power of 2 >= toSize int capacity = roundUpToPowerOf2(toSize); // Android-changed: Replace usage of Math.min() here because this method is // called from the <clinit> of runtime, at which point the native libraries // needed by Float.* might not be loaded. float thresholdFloat = capacity * loadFactor; if (thresholdFloat > MAXIMUM_CAPACITY + 1) { thresholdFloat = MAXIMUM_CAPACITY + 1; } threshold = (int) thresholdFloat; table = new HashMapEntry[capacity]; }
capacit就是这个线性表的长度,而capacity是通过roundUpToPowerOf2方法计算而来:
private static int roundUpToPowerOf2(int number) { // assert number >= 0 : "number must be non-negative"; int rounded = number >= MAXIMUM_CAPACITY ? MAXIMUM_CAPACITY : (rounded = Integer.highestOneBit(number)) != 0 ? (Integer.bitCount(number) > 1) ? rounded << 1 : rounded : 1; return rounded; }
先说下roundUpToPowerOf2的计算结果:计算出大于或者等于距离number最近的2次幂的值。什么意思呢?直接举个例子:比如number为16,那么roundUpToPowerOf2结果就是16;number为17,roundUpToPowerOf2结果就是32;为什么呢?首先Integer.highestOneBit(number))这个作用就是计算出距离number最近的并且不大于number的2次幂的值,也就是说number如果为16,Integer.highestOneBit(number))结果为16;number为17,Integer.highestOneBit(number))结果为16,Integer.highestOneBit(number))当中是对number进行一个位操作去实现的,Integer.bitCount(number)的作用是求number的二进制表示的1的个数,对于一个正数来说Integer.bitCount(number) > 1和number > 1是一样的,因此roundUpToPowerOf2返回的结果一定是2的n次方,如:
number= 4 那么roundUpToPowerOf2的返回值就是2^2 = 4 number = 5 那么roundUpToPowerOf2的返回值就是2^3 = 8
Android中的HashMap的index计算方式如下,h为key的hash值,length为HashMap的基础链表的长度,就是上面计算出来的capacit(2的n次方):
static int indexFor(int h, int length) { // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2"; return h & (length-1); }
所以length - 1就等于2^n - 1,你会发现2^n - 1的结果的二进制表示每一位都是1,比如(24-1)2=1111,这时候把2^n - 1和h做与操作,得出的结果一定会小于或者等于2^n - 1,也就是说得出的index一定不会大于基础链表的长度,所以数组下标不会产生越界;另外一点是当数组长度为2的n次幂的时候,不同的key算得得index相同的几率较小,index分布均匀,因为和h进行与操作的时候,低位与完结果都是1,高位与的结果根据hash的高位是0或者是1来定,如果是0那么这位与完的结果为0,如果为1那么这位与完的结果为1,也就是说h & 2^n -1的结果取决于h(key的hash值),只有相同的hash值才会得出相同的index,而不会由于数据长度的改变影响到index的计算,那么只要根据key计算出来的hash值足够合理就能保证index是比较均匀分布的,所以我觉得indexFor这个方法里面的实现h & (length-1)的目的就是规避基础表的长度对于index计算的影响,让index的计算只取决于hash值,只要index计算结果分布均匀,那么基础表的碰撞冲突就会减少,基础表中item存取的元素也会分布均匀,那么存取速度就提高了,而不用遍历链表,下面继续分析HashMap的put实现原理,通过indexFor计算出HashMap中基础表的数组下标后可直接取获取表中的item信息,然后对比item的key,value以及hash(key),如果都相等,那么就进行替换,否则就再向基础表中调价一个item,代码如下:
for (HashMapEntry<K,V> e = table[i]; e != null; e = e.next) { Object k; if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } }
在put方法中,添加item之前执行了modCount++,这个是记录基础表被修改(增删改)的次数,在多线程操作HashMap的时候做验证用的,后面再提,添加item的方法:
void addEntry(int hash, K key, V value, int bucketIndex) { if ((size >= threshold) && (null != table[bucketIndex])) { resize(2 * table.length); hash = (null != key) ? sun.misc.Hashing.singleWordWangJenkinsHash(key) : 0; bucketIndex = indexFor(hash, table.length); } createEntry(hash, key, value, bucketIndex); } void createEntry(int hash, K key, V value, int bucketIndex) { HashMapEntry<K,V> e = table[bucketIndex]; table[bucketIndex] = new HashMapEntry<>(hash, key, value, e); size++; }
void addEntry(int hash, K key, V value, int bucketIndex) { if ((size >= threshold) && (null != table[bucketIndex])) { resize(2 * table.length); hash = (null != key) ? sun.misc.Hashing.singleWordWangJenkinsHash(key) : 0; bucketIndex = indexFor(hash, table.length); } createEntry(hash, key, value, bucketIndex); }
void resize(int newCapacity) { HashMapEntry[] oldTable = table; int oldCapacity = oldTable.length; if (oldCapacity == MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return; } HashMapEntry[] newTable = new HashMapEntry[newCapacity]; transfer(newTable); table = newTable; threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1); }
void transfer(HashMapEntry[] newTable) { int newCapacity = newTable.length; for (HashMapEntry<K,V> e : table) { while(null != e) { HashMapEntry<K,V> next = e.next; int i = indexFor(e.hash, newCapacity); e.next = newTable[i]; newTable[i] = e; e = next; } } }
当size >= threshold(threshold为整个基础表size的0.75,在inflateTable方法中有定义):
private void inflateTable(int toSize) { // Find a power of 2 >= toSize int capacity = roundUpToPowerOf2(toSize); // Android-changed: Replace usage of Math.min() here because this method is // called from the <clinit> of runtime, at which point the native libraries // needed by Float.* might not be loaded. float thresholdFloat = capacity * loadFactor; if (thresholdFloat > MAXIMUM_CAPACITY + 1) { thresholdFloat = MAXIMUM_CAPACITY + 1; } threshold = (int) thresholdFloat; table = new HashMapEntry[capacity]; }
loadFactor为0.75,所以在put的时候发现当前已经put的item个数已经达到了整个基础表空间的0.75,HashMap就会进行扩容,就是扩大基础表的大小,扩大为原来的两倍大小,然后把现有的表中的值一个个的复制到新表中,再用新表去覆盖旧表,就完成了基础表的扩容操作,所以这里我们来计算下put的复杂度:
综上:HashMap的put操作的复杂度为O(m),m为发现相同元素的时候遍历链表的次数,其中1 <= m <= n,n为聊表的长度,所以如果HashMap的碰撞越少,那么n就越接近1,此时put的复杂度就越低,基本就是O(1),所以HashMap可以做到高速存储
public V get(Object key) { if (key == null) return getForNullKey(); Entry<K,V> entry = getEntry(key); return null == entry ? null : entry.getValue(); }
final Entry<K,V> getEntry(Object key) { if (size == 0) { return null; } int hash = (key == null) ? 0 : sun.misc.Hashing.singleWordWangJenkinsHash(key); for (HashMapEntry<K,V> e = table[indexFor(hash, table.length)]; e != null; e = e.next) { Object k; if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } return null; }
熟悉了put操作,那么get操作就是反过来的,通过key计算出hash,在通过indexFor计算出相应的数组下标index,然后从基础表中取出这个index中的元素,对比元素中的信息就可以了,get的复杂度:O(m),其中1 <= m <= n,所以put和get的复杂度是一样的,最坏的情况就是所有key算出来的hash值都一样,那么此时基础表中只有1个元素,而这个元素将会是一个很长的链表,此时HashMap的存取就退化为这个链表的遍历操作
private abstract class HashIterator<E> implements Iterator<E> { HashMapEntry<K,V> next; // next entry to return int expectedModCount; // For fast-fail int index; // current slot HashMapEntry<K,V> current; // current entry HashIterator() { expectedModCount = modCount; if (size > 0) { // advance to first entry HashMapEntry[] t = table; while (index < t.length && (next = t[index++]) == null) ; } } public final boolean hasNext() { return next != null; } final Entry<K,V> nextEntry() { if (modCount != expectedModCount) throw new ConcurrentModificationException(); HashMapEntry<K,V> e = next; if (e == null) throw new NoSuchElementException(); if ((next = e.next) == null) { HashMapEntry[] t = table; while (index < t.length && (next = t[index++]) == null) ; } current = e; return e; } public void remove() { if (current == null) throw new IllegalStateException(); if (modCount != expectedModCount) throw new ConcurrentModificationException(); Object k = current.key; current = null; HashMap.this.removeEntryForKey(k); expectedModCount = modCount; } }
这里面就涉及到了modCount,这个就是记录当前基础表的修改次数,无论是增,删,改,只要操作成功都会进行增加1,expectedModCount是在迭代器创建的时候会把创建的时候的modCount赋值给它,从字面意思理解就是预期的修改次数,当expectedModCount != modCount的时候就会抛出ConcurrentModificationException,那么什么时候才会发生这种情况呢?多线程,当线程一put完一个item之后,modCount++,然后线程一开始通过迭代器去遍历HashMap,在还没遍历结束的时候线程二又去put或者remove了HashMap找那个的item,此时modCount++,这时候就会导致线程一在遍历的时候expectedModCount和modCount不一致,也就是说HashMap已经被修改过了,因此会抛出ConcurrentModificationException,在HashMap中实现了三种迭代器,键迭代器:KeyIterator,值迭代器:ValueIterator,键值对迭代器:EntryIterator,那么如何保证HashMap的线程安全性呢?
private static class SynchronizedMap<K,V> implements Map<K,V>, Serializable { private static final long serialVersionUID = 1978198479659022715L; private final Map<K,V> m; // Backing Map final Object mutex; // Object on which to synchronize SynchronizedMap(Map<K,V> m) { this.m = Objects.requireNonNull(m); mutex = this; } SynchronizedMap(Map<K,V> m, Object mutex) { this.m = m; this.mutex = mutex; } public int size() { synchronized (mutex) {return m.size();} } public boolean isEmpty() { synchronized (mutex) {return m.isEmpty();} } public boolean containsKey(Object key) { synchronized (mutex) {return m.containsKey(key);} } public boolean containsValue(Object value) { synchronized (mutex) {return m.containsValue(value);} } public V get(Object key) { synchronized (mutex) {return m.get(key);} } public V put(K key, V value) { synchronized (mutex) {return m.put(key, value);} } public V remove(Object key) { synchronized (mutex) {return m.remove(key);} } public void putAll(Map<? extends K, ? extends V> map) { synchronized (mutex) {m.putAll(map);} } public void clear() { synchronized (mutex) {m.clear();} } private transient Set<K> keySet = null; private transient Set<Map.Entry<K,V>> entrySet = null; private transient Collection<V> values = null; public Set<K> keySet() { synchronized (mutex) { if (keySet==null) keySet = new SynchronizedSet<>(m.keySet(), mutex); return keySet; } } public Set<Map.Entry<K,V>> entrySet() { synchronized (mutex) { if (entrySet==null) entrySet = new SynchronizedSet<>(m.entrySet(), mutex); return entrySet; } } public Collection<V> values() { synchronized (mutex) { if (values==null) values = new SynchronizedCollection<>(m.values(), mutex); return values; } } public boolean equals(Object o) { if (this == o) return true; synchronized (mutex) {return m.equals(o);} } public int hashCode() { synchronized (mutex) {return m.hashCode();} } public String toString() { synchronized (mutex) {return m.toString();} } // Override default methods in Map @Override public V getOrDefault(Object k, V defaultValue) { synchronized (mutex) {return m.getOrDefault(k, defaultValue);} } @Override public void forEach(BiConsumer<? super K, ? super V> action) { synchronized (mutex) {m.forEach(action);} } @Override public void replaceAll(BiFunction<? super K, ? super V, ? extends V> function) { synchronized (mutex) {m.replaceAll(function);} } @Override public V putIfAbsent(K key, V value) { synchronized (mutex) {return m.putIfAbsent(key, value);} } @Override public boolean remove(Object key, Object value) { synchronized (mutex) {return m.remove(key, value);} } @Override public boolean replace(K key, V oldValue, V newValue) { synchronized (mutex) {return m.replace(key, oldValue, newValue);} } @Override public V replace(K key, V value) { synchronized (mutex) {return m.replace(key, value);} } @Override public V computeIfAbsent(K key, Function<? super K, ? extends V> mappingFunction) { synchronized (mutex) {return m.computeIfAbsent(key, mappingFunction);} } @Override public V computeIfPresent(K key, BiFunction<? super K, ? super V, ? extends V> remappingFunction) { synchronized (mutex) {return m.computeIfPresent(key, remappingFunction);} } @Override public V compute(K key, BiFunction<? super K, ? super V, ? extends V> remappingFunction) { synchronized (mutex) {return m.compute(key, remappingFunction);} } @Override public V merge(K key, V value, BiFunction<? super V, ? super V, ? extends V> remappingFunction) { synchronized (mutex) {return m.merge(key, value, remappingFunction);} } private void writeObject(ObjectOutputStream s) throws IOException { synchronized (mutex) {s.defaultWriteObject();} } }
这个其实跟Hashtable类似,就是对方法级别的操作进行synchronized修饰以达到方法级的操作是线程安全的
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句