【C++】unordered_set和unordered_map的实现

羚羊角

发布于 2025-07-11 11:09:27

11900

代码可运行

文章被收录于专栏：羚羊角的特别专栏羚羊角的特别专栏

运行总次数：0

代码可运行

本篇unordered_set和unordered_map的实现基于：

自己实现的hash表【C++】哈希表的实现（链地址法）

与map和set差不多的实现手法：【C++】模拟实现map和set

1.实现出复⽤哈希表的框架，并⽀持insert

创建3个头文件和一个源文件。

这个HashTable.h文件可以用之前写好的，可直接拷贝，再进行修改。

key参数就⽤K，value参数就⽤V，哈希表中的数据类型，我们使⽤T。

//HashTable.h文件
#pragma once
#include <iostream>
#include <vector>
#include<string>
using namespace std;

inline unsigned long __stl_next_prime(unsigned long n)
{
	// Note: assumes long is at least 32 bits.
	static const int __stl_num_primes = 28;
	static const unsigned long __stl_prime_list[__stl_num_primes] =
	{
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		 1572869, 3145739, 6291469, 12582917, 25165843,
		 50331653, 100663319, 201326611, 402653189, 805306457,
		 1610612741, 3221225473, 4294967291

	};
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}
namespace hash_bucket  // 链地址法/哈希桶
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

	template<class K>
	struct HashFunc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& s)
		{
			size_t hashi = 0;
			for(auto ch : s)
			{ 
				hashi += ch;
				hashi *= 131;
			}
			return hashi;
		}
	};

	template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		HashTable()
			:_table(__stl_next_prime(0))
			,_n(0)
		{}

		bool Insert(const T& data)
		{
			KeyOfT kot;
			if (Find(kot(data)));
				return false;

			Hash hash;
			if (_n == _table.size()) //扩容
			{
				vector<Node*> newtable(__stl_next_prime(_table.size() + 1));//创建新表
				for (int i = 0; i < _table.size(); i++) //遍历旧表
				{
					Node* cur = _table[i];
					while (cur) //遍历当前位置挂的所有节点
					{
						Node* next = cur->_next; //记录cur的下一个节点
						size_t hash0 = hash(kot(cur->_data)) % newtable.size(); //计算在新表的位置
						//直接把节点头插到新表
						cur->_next = newtable[hash0];
						newtable[hash0] = cur;
						cur = next; //遍历这条链上的下一个节点
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable);//旧表新表互换
			}

			size_t hashi = hash(kot(data)) % _table.size(); //映射位置
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			_n++;
			return true;
		}

		Node* Find(const K& key)
		{
			KeyOfT kot;
			Hash hash;
			size_t hashi = hash(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return cur; //找到了
				}
				cur = cur->_next;
			}
			return nullptr; //没找到
		}

		bool Erase(const K& key)
		{
			KeyOfT kot;
			Hash hash;
			size_t hashi = hash(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				Node* prev = nullptr;
				if (cur->_kv.first == key) //找到了
				{
					if (prev == nullptr) //删除节点为头节点
					{
						_table[hashi] = cur->_next;
					}
					else //删除节点为中间节点或尾节点
					{
						prev->_next = cur->_next;
					}
					delete cur;
					_n--;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false; //删除失败
		}

		~HashTable()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}

	private:
		vector<Node*> _table;
		size_t _n;
	};
}

//UnorderedSet.h文件
#include "HashTable.h"

template<class K>
class Unordered_Set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	bool insert(const K& key)
	{
		return _sht.Insert(key);
	}
private:
	hash_bucket::HashTable<K, K, SetKeyOfT> _sht;
};

//UnorderMap.h文件
#include "HashTable.h"
template<class K, class V>
class Unordered_Map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	bool insert(const pair<K, V>& kv)
	{
		return _mht.Insert(kv);
	}
private:
	hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT> _mht;
};

这里的逻辑和自己实现map/set的逻辑是一样的，可参考那篇文章。

在test.cpp中测试一下，下面代码没出问题的话，就实现好了。

#include "UnorderedMap.h"
#include "UnorderedSet.h"

int main()
{
	int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3,3,15 };
	Unordered_Set<int> us;
	for (auto i : a)
	{
		us.insert(i);
	}
	return 0;
}

2.⽀持iterator的实现

先把迭代器的结构写出来。

T是数据的类型，Ref是数据的引用，Ptr是数据的指针，KeyOfT是取Key，Hash是解决Key不能取模的仿函数。

//这里需要HashTable的前置声明
template<class K, class T, class KeyOfT, class Hash> 
class HashTable;

template< class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT; //这里使用了HashTable，之前需要有他的声明
	typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

	Node* _node;
	HT* _ht;

	HTIterator(Node* node, HT* ht)
		:_node(node)
		,_ht(ht)
	{}

};

然后把几个简单的运算符重载的实现写出来。

template< class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

	Node* _node;
	HT* _ht;

	HTIterator(Node* node, HT* ht)
		:_node(node)
		,_ht(ht)
	{}

	Ref operator*()
	{
		return _node->_data;
	}

	Ptr operator->()
	{
		return &_node->_data;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
};

2.1 operator++的实现

Self& operator++()
{
	if (_node->_next)//如果当前桶不为空
	{
		_node = _node->_next;
	}
	else //当前桶不为空
	{
		//就要找下一个桶

	}
}

这里主要是解决怎么从一个桶找到下一个桶。

可以先计算当前位置的hashi，然后对这个hashi进行+1的操作，就能找到下一个桶。

Self& operator++()
{
	if (_node->_next)//当前位置的下一个节点不为空
	{
		_node = _node->_next;
	}
	else //如果是当前桶的最后一个位置
	{
		//找下一个桶
		KeyOfT kot; //取出Key
		Hash hash;  //解决Key不能取模问题
		size_t hashi = hash(kot(_node->_data)) % _ht->_table.size();//找映射位置
		hashi++; //下一个桶

	}
}

解释一下这句代码： hash(kot(_node->_data))。我们不知道_node->_data是一个key还是一个pair，所以用kot(_node->_data)取出_data的key部分，这个key又不一定是能取模的类型，可能是string，所以用 hash(kot(_node->_data))来转换成能取模的。

下一个桶可能没有数据，我们要找下一个不为空的桶，所以还要while循环找，并且找的时候不能越界。

Self& operator++()
{
	if (_node->_next)//如果当前桶不为空
	{
		_node = _node->_next;
	}
	else //当前桶不为空
	{
		//找下一个桶
		KeyOfT kot;
		Hash hash;
		size_t hashi = hash(kot(_node->_data)) % _ht->_table.size();//找映射位置
		hashi++;
		while (hashi < _ht->_table.size() && _ht->_table[hashi] == nullptr)
		{
			hashi++;
		}

	}
	
}

hashi < _ht->_table.size() && _ht->_table[hashi] == nullptr 这句代码的顺序不可交换，否则会发生越界访问。

出while循环后，可能是走到end位置了，也可能是找到不为空的桶了，所以要分情况讨论。

Self& operator++()
{
	if (_node->_next)//如果当前桶不为空
	{
		_node = _node->_next;
	}
	else //当前桶不为空
	{
		//找下一个桶
		KeyOfT kot;
		Hash hash;
		size_t hashi = hash(kot(_node->_data)) % _ht->_table.size();//找映射位置
		hashi++;
		while (_ht->_table[hashi] == nullptr && hashi < _ht->_table.size())
		{
			hashi++;
		}

		if (hashi == _ht->_table.size()) //走到表尾了，就是end的位置
		{
			_node = nullptr;
		}
		else //找到了不为空的桶
		{
			_node = _ht->_table[hashi];
		}
	}
	return *this;
}

解决 _table 为私有成员的问题

_table是HashTable这个类的私有成员，在HashTable类外不能访问，这里的解决方法就是用友元。在HashTable类里加上下面这两句话。

template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
friend struct HTIterator;

这是类模板的友元声明，不能只写friend struct HTIterator; 模板的友元声明连模板参数也要写上。

2.2 begin 和 end

在HashTable类里public对迭代器和const迭代器重命名。

public:
	typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
	typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> Const_Iterator;

在HashTable类里public实现begin和end。

begin的实现就是找到这个表的第一个节点，也就是第一个不为空的桶。找到之后，我们返回一个迭代器，这个迭代器用一个结点的指针和HashTable的指针(this指针)构造。

Iterator Begin()
{
	for (int i = 0; i < _table.size(); i++)
	{
		Node* cur = _table[i];
		if (cur)
			return Iterator(cur, this);
	}
}

如果遍历完了都没找到不为空的桶，就返回end。但是这里还可以加一个判断，_n为0的时候，这个表就是一个数据也没有，直接返回end。

Iterator Begin()
{
	if (_n == 0)
		return End();
	for (int i = 0; i < _table.size(); i++)
	{
		Node* cur = _table[i];
		if (cur)
			return Iterator(cur, this);
	}
	return End();
}

这个end就定义为空指针，但是实现的时候不能直接写return nullptr; 因为我们要返回一个迭代器，要用nullptr去构造这个迭代器。

Iterator End()
{
	return Iterator(nullptr, this);
}

现在我们要在UnorderSet.h文件和UnorderMap.h文件里实现begin和end。

//UnorderedSet.h文件
public:
	typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::Iterator iterator;
	iterator begin()
	{
		_sht.Begin();
	}

	iterator end()
	{
		_sht.End();
	}

//UnorderedMap.h文件
public:
	typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT>::Iterator iterator;
	iterator begin()
	{
		_mht.Begin();
	}

	iterator end()
	{
		_mht.End();
	}

在test.cpp中测试一下。

#include "UnorderedMap.h"
#include "UnorderedSet.h"

int main()
{
	int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3, 55, 15 };
	Unordered_Set<int> us;
	for (auto i : a)
	{
		us.insert(i);
	}
	auto it = us.begin();
	while (it != us.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;
	return 0;
}

2.3 const迭代器

把const迭代器实现出来，和普通迭代器实现逻辑一样，返回值不同。

在HashTable.h文件中的HashTable类里添加下面代码。

Const_Iterator Begin() const
{
	if (_n == 0)
		return End();
	for (int i = 0; i < _table.size(); i++)
	{
		Node* cur = _table[i];
		if (cur)
			return Const_Iterator(cur, this);
	}
	return End();
}

Const_Iterator End() const
{
	return Const_Iterator(nullptr, this);
}

此时参数列表括号后面的那个const修饰的是this，也就意味着this是一个const的，而我们前面在定义的时候，这个this类型并不是const的。

为了解决这个问题，我们需要把上面这个地方改为const。

在UnorderedSet.h文件中的Unordered_Set类里添加下面代码。

typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::Const_Iterator const_iterator;
const_iterator begin() const
{
	return _sht.Begin();
}

const_iterator end()const
{
	return _sht.End();
}

在UnorderedMap.h文件中的Unordered_Map类里添加下面代码。

typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT>::Const_Iterator const_iterator;

const_iterator begin() const
{
	return _mht.Begin();
}

const_iterator end() const
{
	return _mht.End();
}

在test.cpp中测试一下。

先测试unordered_set的。

void Print(const Unordered_Set<int>& s) //这里会用到const迭代器
{
	Unordered_Set<int>::const_iterator cit = s.begin();
	//auto cit = s.begin();
	while (cit != s.end())//迭代器遍历
	{
		cout << *cit << " ";
		++cit;
	}
	cout << endl;

	for (auto i : s)//范围for遍历
	{
		cout << i << " ";
	}
	cout << endl;
}

int main()
{
	int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3, 55, 15 };
	Unordered_Set<int> us;
	for (auto i : a)
	{
		us.insert(i);
	}
	Print(us);
	return 0;
}

两种遍历方式都是没有问题的。

再测试一下unordered_map的。

void PrintMap(const Unordered_Map<string, string>& m)
{
	Unordered_Map<string, string>::const_iterator cit = m.begin();
	//auto cit = s.begin();
	while (cit != m.end())//迭代器遍历
	{
		cout << cit->first << ":" << cit->second << endl;
		++cit;
	}
	cout << endl;

	for (auto s : m)//范围for遍历
	{
		cout << s.first << ":" << s.second << endl;
	}
}

int main()
{
	Unordered_Map<string, string> dic;
	dic.insert({ "sort", "排序" });
	dic.insert({ "left", "左边" });
	dic.insert({ "watermelon", "西瓜" });
	PrintMap(dic);
	return 0;
}

依旧是没有问题。

2.4 Key不可修改的实现

正常情况下，对于set而言，K不可修改，对于map而言，K不可修改，V可修改。

unordered_set修改如下。

unordered_map修改如下。

我们测试一下。

int main()
{
	Unordered_Map<string, string> dic;
	dic.insert({ "sort", "排序" });
	dic.insert({ "left", "左边" });
	dic.insert({ "watermelon", "西瓜" });

	Unordered_Map<string, string>::iterator cit = dic.begin();
	while (cit != dic.end())//迭代器遍历
	{
		//cit->first += 'x'; //K不可修改
		cit->second += 'x'; //V可修改
		cout << cit->first << ":" << cit->second << endl;
		++cit;
	}
	cout << endl;
	return 0;
}

3.operator[]的实现

operator[]的实现要用到find和insert，所以我们先把find和insert的返回值稍作修改，insert是返回一个pair，find返回一个迭代器。

在HashTable.h文件中要修改的代码。

Iterator Find(const K& key)
{
	KeyOfT kot;
	Hash hash;
	size_t hashi = hash(key) % _table.size();
	Node* cur = _table[hashi];
	while (cur)
	{
		if (kot(cur->_data) == key)
		{
			return Iterator(cur, this); //找到了
		}
		cur = cur->_next;
	}
	return End(); //没找到,返回end
}

pair<Iterator, bool> Insert(const T& data)
{
	KeyOfT kot;
	Iterator it = Find(kot(data));
	if (it != End()) //插入的值已存在，插入失败
		return {it, false};//返回已存在的这个值的迭代器和false

	Hash hash;
	if (_n == _table.size()) //扩容
	{
        //...
	}

	size_t hashi = hash(kot(data)) % _table.size(); //映射位置
	Node* newnode = new Node(data);
	newnode->_next = _table[hashi];
	_table[hashi] = newnode;
	_n++;
	return {Iterator(newnode, this), true};//插入成功，返回 新插入的值的迭代器 和 true
}

在UnorderedSet.h文件中要修改的代码并补充find的封装。

iterator find(const K& key)
{
	return _sht.Find(key);
}

pair<iterator, bool> insert(const K& key)
{
	return _sht.Insert(key);
}

在UnorderedMap.h文件中要修改的代码并补充find的封装。

iterator find(const pair<K, V>& kv)
{
	return _mht.Find(kv.first);
}

pair<iterator, bool> insert(const pair<K, V>& kv)
{
	return _mht.Insert(kv);
}

operator[]的实现和之前是一样的，代码如下。

//在 UnorderedMap.h 文件
V& operator[](const K& key)
{
	pair<iterator, bool> ret = insert({ key, V() });
	return ret.first->second;
}

测试一下。

void PrintMap(const Unordered_Map<string, string>& m)
{
	Unordered_Map<string, string>::const_iterator cit = m.begin();
	while (cit != m.end())//迭代器遍历
	{
		cout << cit->first << ":" << cit->second << endl;
		++cit;
	}
}

int main()
{
	Unordered_Map<string, string> dic;
	dic.insert({ "sort", "排序" });
	dic.insert({ "left", "左边" });
	dic.insert({ "watermelon", "西瓜" });
	PrintMap(dic);
	cout << endl;

	dic["left"] = "剩余"; //left已存在，修改
	dic["right"] = "右边"; //right不存在，插入+修改
	dic["banana"]; //banana不存在，插入
	PrintMap(dic);
	return 0;
}