说到哈希表,相信初通数据结构的人士应该耳熟能详,其相关的结构细节虽然并不繁复,但就快速查找数据而言,该结构优异的性能表现绝对可算一枝独秀,平均情况下O(1)的时间复杂度更是令人心旷神怡 :),这不,在近几天编写的一个简短程序中,我自己便遇到了需要使用哈希表的情况,由于自己惯于使用MinGW,其中的STL(SGI版本)刚好提供了一个优雅的哈希表的模板实现,名曰hashtable,并在此基础之上进一步构建起了hash_map、hash_multimap、hash_set以及hash_multiset,正好与标准模板库中的map与set容器一一对应,此番作为的确大快人心,可惜的是,作为SGI单独的扩展模块,哈希表现今仍然不在C++标准之列,这不能不令人扼腕叹息,所以即便我在MinGW中将hashtable用的生龙活虎,但只要稍稍转变一下编程环境,譬如转至MS的VS,那么等待我的大抵也就是一大堆的未定义错误,而上述的什么hashtable则更是踪迹全无……虽然有心人士早已提供了很多第三方库(如STLPort)用以解围,但这般编程境况仍然给我带来了些许不和谐之感,总觉着不是太合乎标准正道( 在此严重期待C++新一代标准的早日降临 :) ),没办法,最后想想还是决定走一走重造车轮的荆棘路,自己来实现一个简单的hashtable,当然,追求如STL库中那般的通用性并不是我的编程初衷,相反,简单够用倒是我的编写原则,既然如此,那么事不宜迟,就让我马上动手吧 :)。
既然需要编写一个ADT,那么就先让我做一个最简单的哈希表设计,首先哈希函数,以及哈希键值函数,感觉应该以模板参数提供,以此来增加灵活性,具体的当以仿函数(函数对象)的形式实现,而原程序中则应该提供针对部分常用类型的仿函数实现,以方便使用。
然后的便是冲突的处理,对于哈希值相同的元素,我本想采用简单的一次线性探测方式,但经过后来的几番实践,发现线性探测的实现方式会引发很多问题,其中对于探测失败的处理尤为恼人,建立公共溢出区或是将原哈希表增长等处理感觉都不是很清晰,所以最终还是改成了链地址法(拉链法),顺便说一句,SGI版本中的哈希实现也是用了这种方法 :)
最后就是模块应该提供的外部接口了,首先自然是插入和删除操作,接着便是查找,除了这些必要的功能之外,我想在不甚影响程序整体结构以及效率的情况下仍可以适当添加,不过本着KISS原则,这些就以后再说吧 :)
好了,废话到此结束,对于编程这个行当,有时候代码胜于千言,所以接下来便贴出我编写一点代码,总体来说颇为拙劣,希望大家不吝嘲笑,多多海涵 :)
/*
Name: heHashTable.h
Copyright: No Copyright
Author: Hugo Yu
Date: 16-11-09 16:20
Description: Provide A Very Simple Hash Table :) ( Because Of The C++ Standard ... :( )
*/
#ifndef HE_HASHTABLE_H
#define HE_HASHTABLE_H
#include <vector>
using std::vector;
#include <algorithm>
using std::lower_bound;
#include <string>
using std::string;
//prime array
static const unsigned long PrimeTable[] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//the size of prime array
static const unsigned long PrimeTableSize = sizeof( PrimeTable );
//get the first prime that is bigger than size
unsigned long GetUpperPrime( unsigned long size )
{
const unsigned long* first = PrimeTable;
const unsigned long* last = PrimeTable + PrimeTableSize;
const unsigned long* pos = std::lower_bound( first, last, size );
if( pos != last )
return *pos;
else
return PrimeTable[PrimeTableSize-1];
}
template<typename T>
class DefaultHashKey
{
public:
unsigned long operator() ( const T& t ) const
{
return (unsigned long)t;
}
};
template<>
class DefaultHashKey<std::string>//ELFhash algorithm
{
public:
unsigned long operator() ( const std::string& str ) const
{
const char* key = str.c_str();
unsigned long h = 0;
while( *key )
{
h = (h<<4) + *key++;
unsigned long g = h & 0xF0000000L;
if( g ) h ^= g>>24;
h &= ~g;
}
return h;
}
};
class DefaultHashFunc
{
public:
unsigned long operator() ( unsigned long hashKey, unsigned long tableSize ) const
{
return hashKey % tableSize;
}
};
template<typename T, typename HashKey = DefaultHashKey<T>, typename HashFunc = DefaultHashFunc>
class heHashTable
{
private:
template<typename T1>
struct ValueList//the value list
{
T1 value;
ValueList* pNext;
ValueList():value(T1()),pNext(NULL) {};
ValueList( const T1& val, ValueList* p = NULL ):value(val),pNext(p) {};
private:
ValueList( const ValueList& vl );
ValueList& operator = ( const ValueList& vl );
};
public:
typedef ValueList<T>* Iterator;//just define simple iterator
typedef const ValueList<T>* ConstIterator;
public:
heHashTable( unsigned long size );
~heHashTable();
Iterator Insert( const T& val );//return the element's iterator that is inserted
Iterator Remove( Iterator it );
Iterator Remove( const T& val );//return the following element's iterator that is removed
ConstIterator Find( const T& val ) const;//find the element by iterator
Iterator Find( const T& val );
bool IsIn( const T& val ) const;//just for the convience
unsigned long Elements() const { return m_num_elements; }//get the elements count
private:
unsigned long getPos( const T& val ) const;//get the insert position by HashKey and HashFunc
vector< ValueList<T>* > m_buckets;
unsigned long m_num_elements;
HashKey m_hashKey;
HashFunc m_hashFunc;
};
#endif
#ifndef HE_HASHTABLE_CPP
#define HE_HASHTABLE_CPP
#include "heHashTable.h"
//should use memory pool...
template<typename T,typename HashKey,typename HashFunc>
heHashTable<T,HashKey,HashFunc>::heHashTable<T,HashKey,HashFunc>( unsigned long size )
:m_buckets( GetUpperPrime( size ), NULL ),m_num_elements(0)
{
}
template<typename T,typename HashKey,typename HashFunc>
heHashTable<T,HashKey,HashFunc>::~heHashTable<T,HashKey,HashFunc>()
{
typename std::vector< ValueList<T>* >::iterator it = m_buckets.begin();
for( ; it != m_buckets.end(); ++it )
{
if( *it != NULL )
{
ValueList<T>* pNxt = (*it)->pNext;
cout<<(*it)->value<<endl;
delete (*it);
while( pNxt )
{
pNxt = this->Remove( pNxt );
}
}
}
}
template<typename T,typename HashKey,typename HashFunc>
typename heHashTable<T,HashKey,HashFunc>::Iterator
heHashTable<T,HashKey,HashFunc>::Insert( const T& val )
{
unsigned long pos = getPos( val );
if( m_buckets[pos] == NULL )
{
m_buckets[pos] = new ValueList<T>( val );
}
else
{
ValueList<T>* pNxt = m_buckets[pos];
m_buckets[pos] = new ValueList<T>( val, pNxt );
}
++m_num_elements;
return m_buckets[pos];
}
template<typename T,typename HashKey,typename HashFunc>
typename heHashTable<T,HashKey,HashFunc>::Iterator
heHashTable<T,HashKey,HashFunc>::Remove( Iterator it )
{
if( it != NULL )
{
ValueList<T>* pNxt = it->pNext;
delete it;
--m_num_elements;
return pNxt;
}
return NULL;
}
template<typename T,typename HashKey,typename HashFunc>
typename heHashTable<T,HashKey,HashFunc>::Iterator
heHashTable<T,HashKey,HashFunc>::Remove( const T& val )
{
unsigned long pos = getPos( val );
if( m_buckets[pos]->value == val )
{
ValueList<T>* tmp = m_buckets[pos]->pNext;
delete m_buckets[pos];
m_buckets[pos] = tmp;
--m_num_elements;
return m_buckets[pos];
}
else
{
ValueList<T>* pPre = m_buckets[pos];
ValueList<T>* pNxt = m_buckets[pos]->pNext;
while( pNxt )
{
if( pNxt->value == val )
{
pPre->pNext = pNxt->pNext;
delete pNxt;
--m_num_elements;
return pPre->pNext;
}
pPre = pNxt;
pNxt = pNxt->pNext;
}
}
return NULL;
}
template<typename T,typename HashKey,typename HashFunc>
typename heHashTable<T,HashKey,HashFunc>::ConstIterator
heHashTable<T,HashKey,HashFunc>::Find( const T& val ) const
{
unsigned long pos = getPos( val );
if( m_buckets[pos] == NULL || m_buckets[pos]->value == val )//val need to support operator '==‘
return m_buckets[pos];
else
{
ValueList<T>* pNxt = m_buckets[pos]->pNext;
while( pNxt )
{
if( pNxt->value == val )
return pNxt;
pNxt = pNxt->pNext;
}
}
return NULL;
}
template<typename T,typename HashKey,typename HashFunc>
typename heHashTable<T,HashKey,HashFunc>::Iterator
heHashTable<T,HashKey,HashFunc>::Find( const T& val )
{
unsigned long pos = getPos( val );
if( m_buckets[pos]->value == val )//val need to support operator '==‘
return m_buckets[pos];
else
{
ValueList<T>* pNxt = m_buckets[pos]->pNext;
while( pNxt )
{
if( pNxt->value == val )
return pNxt;
pNxt = pNxt->pNext;
}
}
return NULL;
}
template<typename T,typename HashKey,typename HashFunc>
inline bool heHashTable<T,HashKey,HashFunc>::IsIn( const T& val ) const
{
return this->Find( val ) == NULL ? false : true;
}
template<typename T,typename HashKey,typename HashFunc>
inline unsigned long heHashTable<T,HashKey,HashFunc>::getPos( const T& val ) const
{
return m_hashFunc( m_hashKey( val ), m_buckets.size() ) ;
}
#endif