目录
- 前言
- 一、哈希表的模拟实现
- 1.1 哈希表的改造
- 1.1.1 模板参数列表的改造
- 1.1.2 增加迭代器操作
- 1.2 哈希表的模拟实现
- 1.2.1 哈希表中仿函数的实现
- 1.2.2 哈希表中节点类的实现
- 1.2.3 哈希表中迭代器类的实现
- 1.2.4 哈希表中构造函数、析构函数和 Clear() 函数的实现
- 1.2.5 哈希表中Swap 和 Size 函数的实现
- 1.2.6 哈希表中 begin 和 end 函数的实现
- 1.2.7 哈希表中 Find 函数的实现
- 1.2.8 哈希表中 Insert 和 Erase 函数的实现
- 1.2.9 哈希表的整体实现
- 二、unordered_set的实现及测试
- 三、unordered_map的实现及测试
- 结尾
前言
上一篇文章中讲到了哈希的概念和STL中关于哈希容器的使用,并且在后面对开散列进行了模拟实现,那么在这一篇文章中在开散列的基础上进行改造成哈希表,并且在哈希表的基础上封装 unordered_set 和 unordered_map。
一、哈希表的模拟实现
1.1 哈希表的改造
1.1.1 模板参数列表的改造
K:表示关键码类型
T:不同容器V的类型不同,如果是unordered_map,K代表一个键值对,如果是unordered_set,T 为 K。
KOfT: 因为V的类型不同,通过value取key的方式就不同,详细见
unordered_map/set的实现
HF: 哈希函数仿函数对象类型,哈希函数使用除留余数法,需要将Key转换为整形数字才能取模
template<class K, class T , class KOfT , class HF>
class hash_table;
1.1.2 增加迭代器操作
// 前置声明,否则后面的__HTIterator找不到hash_tabletemplate<class K, class T, class KOfT, class HF>class hash_table;template<class K, class T , class Ref, class Ptr, class KOfT, class HF>struct __HTIterator{typedef HashNode<T> Node;typedef __HTIterator<K, T, Ref , Ptr , KOfT, HF> self;// 成员变量Node* _node;// 由于const迭代器中const修饰this// 所以这里需要加const,否则会导致权限放大// 编译无法通过的情况const hash_table<K, T, KOfT, HF>* _pht;// size_t hashi;// 构造函数__HTIterator(Node* node, hash_table<K, T, KOfT, HF>* pht , size_t i):_node(node),_pht(pht),hashi(i){}// 构造函数__HTIterator(Node* node, const hash_table<K, T, KOfT, HF>* pht, size_t i):_node(node), _pht(pht), hashi(i){}self& operator++(){if (_node->_next){// 若当前桶其他节点没有走完// 那么继续走下一个节点_node = _node->_next;}else{// 若当前桶的所有节点都已经走完// 那么继续向下一个桶不为空的桶走hashi++;while (hashi < _pht->_table.size()){if (_pht->_table[hashi]){_node = _pht->_table[hashi];break;}hashi++;}if (hashi == _pht->_table.size())_node = nullptr;}return *this;}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const self& s){return _node != s._node;}};
1.2 哈希表的模拟实现
1.2.1 哈希表中仿函数的实现
若存储key的类型不是整形时,需要将其转化为整数,整形不需要处理,日常中我们最常用到的类型是string,那么这里就写一个string转化为整形的版本。
// 整数类型
template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};// 特化
template<>
struct HashFunc<string>
{size_t operator()(const string& s){size_t sum = 0;for (size_t i = 0; i < s.size(); i++){sum = sum * 31 + s[i];}return sum;}
};
1.2.2 哈希表中节点类的实现
namespace hash_bucket
{template<class T>struct HashNode{HashNode* _next;T _data;HashNode(const T& data):_data(data), _next(nullptr){}};}
1.2.3 哈希表中迭代器类的实现
namespace hash_bucket
{// 前置声明,否则后面的__HTIterator找不到hash_tabletemplate<class K, class T, class KOfT, class HF>class hash_table;template<class K, class T, class Ref, class Ptr, class KOfT, class HF>struct __HTIterator{typedef HashNode<T> Node;typedef __HTIterator<K, T, Ref, Ptr, KOfT, HF> self;// 成员变量Node* _node;// 由于const迭代器中const修饰this// 所以这里需要加const,否则会导致权限放大// 编译无法通过的情况const hash_table<K, T, KOfT, HF>* _pht;// size_t hashi;// 构造函数__HTIterator(Node* node, hash_table<K, T, KOfT, HF>* pht, size_t i):_node(node), _pht(pht), hashi(i){}// 构造函数__HTIterator(Node* node, const hash_table<K, T, KOfT, HF>* pht, size_t i):_node(node), _pht(pht), hashi(i){}self& operator++(){if (_node->_next){// 若当前桶其他节点没有走完// 那么继续走下一个节点_node = _node->_next;}else{// 若当前桶的所有节点都已经走完// 那么继续向下一个桶不为空的桶走hashi++;while (hashi < _pht->_table.size()){if (_pht->_table[hashi]){_node = _pht->_table[hashi];break;}hashi++;}if (hashi == _pht->_table.size())_node = nullptr;}return *this;}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const self& s){return _node != s._node;}};
}
1.2.4 哈希表中构造函数、析构函数和 Clear() 函数的实现
namespace hash_bucket
{template<class K, class T, class KOfT, class HF>class hash_table{public:typedef HashNode<T> Node;public:// 构造函数hash_table(int n = 10):_table(vector<Node*>(n)), _n(0){}// 析构函数~hash_table(){Clear();}void Clear(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_table[i] = nullptr;}}private:vector<Node*> _table;int _n;};
}
1.2.5 哈希表中Swap 和 Size 函数的实现
namespace hash_bucket
{template<class K, class T, class KOfT, class HF>class hash_table{public:size_t Size(){return _n;}void Swap(hash_table<K, T, KOfT, HF>& ht){_table.swap(ht._table);swap(_n, ht._n);}private:vector<Node*> _table;int _n;};
}
1.2.6 哈希表中 begin 和 end 函数的实现
namespace hash_bucket
{template<class K, class T, class KOfT, class HF>class hash_table{public:typedef HashNode<T> Node;typedef __HTIterator<K, T, T&, T*, KOfT, HF> iterator;typedef __HTIterator<K, T, const T&, const T*, KOfT, HF> const_iterator;// 将__HTIterator设置为hash_table的友元// 使__HTIterator可以访问hash_table的私有template<class K, class T, class Ref, class Ptr, class KOfT, class HF>friend struct __HTIterator;public:// 迭代器iterator begin(){for (size_t i = 0; i < _table.size(); i++){if (_table[i])return iterator(_table[i], this, i);}return end();}iterator end(){return iterator(nullptr, this, -1);}const_iterator begin()const{for (size_t i = 0; i < _table.size(); i++){if (_table[i])return const_iterator(_table[i], this, i);}return end();}const_iterator end()const{return const_iterator(nullptr, this, -1);}private:vector<Node*> _table;int _n;};
}
1.2.7 哈希表中 Find 函数的实现
namespace hash_bucket
{template<class K, class T, class KOfT, class HF>class hash_table{public:typedef HashNode<T> Node;typedef __HTIterator<K, T, T&, T*, KOfT, HF> iterator;typedef __HTIterator<K, T, const T&, const T*, KOfT, HF> const_iterator;// 将__HTIterator设置为hash_table的友元// 使__HTIterator可以访问hash_table的私有template<class K, class T, class Ref, class Ptr, class KOfT, class HF>friend struct __HTIterator;public:// 查找iterator Find(const K& key){HF hf;KOfT koft;int hashi = hf(key) % _table.size();Node* cur = _table[hashi];while (cur){if (koft(cur->_data) == key)return iterator(cur, this, hashi);cur = cur->_next;}// 找不到返回空return end();}private:vector<Node*> _table;int _n;};
}
1.2.8 哈希表中 Insert 和 Erase 函数的实现
namespace hash_bucket
{template<class K, class T, class KOfT, class HF>class hash_table{public:typedef HashNode<T> Node;typedef __HTIterator<K, T, T&, T*, KOfT, HF> iterator;typedef __HTIterator<K, T, const T&, const T*, KOfT, HF> const_iterator;// 将__HTIterator设置为hash_table的友元// 使__HTIterator可以访问hash_table的私有template<class K, class T, class Ref, class Ptr, class KOfT, class HF>friend struct __HTIterator;public:// 插入pair<iterator, bool> Insert(const T& data){HF hf;KOfT koft;iterator tmp = Find(koft(data));// 不允许有相同的值if (tmp != end())return make_pair(tmp, false);// 扩容if (_n == _table.size()){int newcapacity = 2 * _table.size();hash_table newtable(newcapacity);for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;int hashi = hf(koft(cur->_data)) % newcapacity;cur->_next = newtable._table[hashi];newtable._table[hashi] = cur;cur = next;}_table[i] = nullptr;}_table.swap(newtable._table);}// 头插 int hashi = hf(koft(data)) % _table.size();Node* newnode = new Node(data);newnode->_next = _table[hashi];_table[hashi] = newnode;_n++;return make_pair(iterator(newnode, this, hashi), true);}// 删除bool Erase(const K& key){Node* tmp = Find(key);// 找不到则删除失败if (tmp == nullptr)return false;HF hf;KOfT koft;int hashi = hf(key) % _table.size();Node* prev = nullptr;Node* cur = _table[hashi];while (cur){Node* next = cur->_next;if (koft(cur->_data) == key){if (prev == nullptr){_table[hashi] = next;}else{prev->_next = next;}_n--;delete cur;return true;}else{prev = cur;cur = next;}}return false;}private:vector<Node*> _table;int _n;};
}
1.2.9 哈希表的整体实现
#pragma once#include<iostream>
#include<string>
#include<vector>
#include<unordered_set>
#include<set>using namespace std;// 整数类型
template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};// 特化
template<>
struct HashFunc<string>
{size_t operator()(const string& s){size_t sum = 0;for (size_t i = 0; i < s.size(); i++){sum = sum * 31 + s[i];}return sum;}
};namespace hash_bucket
{template<class T>struct HashNode{HashNode* _next;T _data;HashNode(const T& data):_data(data), _next(nullptr){}};// 前置声明,否则后面的__HTIterator找不到hash_tabletemplate<class K, class T, class KOfT, class HF>class hash_table;template<class K, class T , class Ref, class Ptr, class KOfT, class HF>struct __HTIterator{typedef HashNode<T> Node;typedef __HTIterator<K, T, Ref , Ptr , KOfT, HF> self;// 成员变量Node* _node;// 由于const迭代器中const修饰this// 所以这里需要加const,否则会导致权限放大// 编译无法通过的情况const hash_table<K, T, KOfT, HF>* _pht;// size_t hashi;// 构造函数__HTIterator(Node* node, hash_table<K, T, KOfT, HF>* pht , size_t i):_node(node),_pht(pht),hashi(i){}// 构造函数__HTIterator(Node* node, const hash_table<K, T, KOfT, HF>* pht, size_t i):_node(node), _pht(pht), hashi(i){}self& operator++(){if (_node->_next){// 若当前桶其他节点没有走完// 那么继续走下一个节点_node = _node->_next;}else{// 若当前桶的所有节点都已经走完// 那么继续向下一个桶不为空的桶走hashi++;while (hashi < _pht->_table.size()){if (_pht->_table[hashi]){_node = _pht->_table[hashi];break;}hashi++;}if (hashi == _pht->_table.size())_node = nullptr;}return *this;}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const self& s){return _node != s._node;}};template<class K, class T , class KOfT , class HF>class hash_table{public:typedef HashNode<T> Node;typedef __HTIterator<K, T, T& , T* ,KOfT, HF> iterator;typedef __HTIterator<K, T, const T&, const T*, KOfT, HF> const_iterator;// 将__HTIterator设置为hash_table的友元// 使__HTIterator可以访问hash_table的私有template<class K, class T, class Ref, class Ptr, class KOfT, class HF>friend struct __HTIterator;public:// 构造函数hash_table(int n = 10):_table(vector<Node*>(n)),_n(0){}// 析构函数~hash_table(){Clear();}void Clear(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_table[i] = nullptr;}}size_t Size(){return _n;}void Swap(hash_table<K,T,KOfT,HF>& ht){_table.swap(ht._table);swap(_n, ht._n);}// 迭代器iterator begin(){for (size_t i = 0; i < _table.size(); i++){if (_table[i])return iterator(_table[i], this, i);}return end();}iterator end(){return iterator(nullptr, this, -1);}const_iterator begin()const{for (size_t i = 0; i < _table.size(); i++){if (_table[i])return const_iterator(_table[i], this, i);}return end();}const_iterator end()const{return const_iterator(nullptr, this, -1);}// 插入pair<iterator,bool> Insert(const T& data){HF hf;KOfT koft;iterator tmp = Find(koft(data));// 不允许有相同的值if (tmp != end())return make_pair(tmp,false);// 扩容if (_n == _table.size()){int newcapacity = 2 * _table.size();hash_table newtable(newcapacity);for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;int hashi = hf(koft(cur->_data)) % newcapacity;cur->_next = newtable._table[hashi];newtable._table[hashi] = cur;cur = next;}_table[i] = nullptr;}_table.swap(newtable._table);}// 头插 int hashi = hf(koft(data)) % _table.size();Node* newnode = new Node(data);newnode->_next = _table[hashi];_table[hashi] = newnode;_n++;return make_pair(iterator(newnode,this,hashi),true);}// 删除bool Erase(const K& key){Node* tmp = Find(key);// 找不到则删除失败if (tmp == nullptr)return false;HF hf;KOfT koft;int hashi = hf(key) % _table.size();Node* prev = nullptr;Node* cur = _table[hashi];while (cur){Node* next = cur->_next;if (koft(cur->_data) == key){if (prev == nullptr){_table[hashi] = next;}else{prev->_next = next;}_n--;delete cur;return true;}else{prev = cur;cur = next;}}return false;}// 查找iterator Find(const K& key){HF hf;KOfT koft;int hashi = hf(key) % _table.size();Node* cur = _table[hashi];while (cur){if (koft(cur->_data) == key)return iterator(cur,this,hashi);cur = cur->_next;}// 找不到返回空return end();}void Some(){size_t bucketSize = 0;size_t maxBucketLen = 0;size_t sum = 0;double averageBucketLen = 0;for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];if (cur){++bucketSize;}size_t bucketLen = 0;while (cur){++bucketLen;cur = cur->_next;}sum += bucketLen;if (bucketLen > maxBucketLen){maxBucketLen = bucketLen;}}averageBucketLen = (double)sum / (double)bucketSize;printf("all bucketSize:%d\n", _table.size());printf("bucketSize:%d\n", bucketSize);printf("maxBucketLen:%d\n", maxBucketLen);printf("averageBucketLen:%lf\n\n", averageBucketLen);}private:vector<Node*> _table;int _n;public:void TestHT1(){hash_table<int, int> ht;int a[] = { 4,14,24,34,5,7,1 };for (auto e : a){ht.Insert(make_pair(e, e));}ht.Insert(make_pair(3, 3));ht.Insert(make_pair(3, 3));ht.Insert(make_pair(-3, -3));ht.Some();}void TestHT2(){string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };//HashTable<string, int, HashFuncString> ht;hash_table<string, int> ht;for (auto& e : arr){//auto ret = ht.Find(e);HashNode<string, int>* ret = ht.Find(e);if (ret){ret->_kv.second++;}else{ht.Insert(make_pair(e, 1));}}ht.Insert(make_pair("apple", 1));ht.Insert(make_pair("sort", 1));ht.Insert(make_pair("abc", 1));ht.Insert(make_pair("acb", 1));ht.Insert(make_pair("aad", 1));}void TestHT3(){const size_t N = 1000;unordered_set<int> us;set<int> s;hash_table<int, int> ht;vector<int> v;v.reserve(N);srand(time(0));for (size_t i = 0; i < N; ++i){//v.push_back(rand()); // N比较大时,重复值比较多v.push_back(rand() + i); // 重复值相对少//v.push_back(i); // 没有重复,有序}size_t begin1 = clock();for (auto e : v){s.insert(e);}size_t end1 = clock();cout << "set insert:" << end1 - begin1 << endl;size_t begin2 = clock();for (auto e : v){us.insert(e);}size_t end2 = clock();cout << "unordered_set insert:" << end2 - begin2 << endl;size_t begin3 = clock();for (auto e : v){ht.Insert(make_pair(e, e));}size_t end3 = clock();cout << "hash_table insert:" << end3 - begin3 << endl << endl;size_t begin4 = clock();for (auto e : v){s.find(e);}size_t end4 = clock();cout << "set find:" << end4 - begin4 << endl;size_t begin5 = clock();for (auto e : v){us.find(e);}size_t end5 = clock();cout << "unordered_set find:" << end5 - begin5 << endl;size_t begin6 = clock();for (auto e : v){ht.Find(e);}size_t end6 = clock();cout << "hash_table find:" << end6 - begin6 << endl << endl;cout << "插入数据个数:" << us.size() << endl << endl;ht.Some();size_t begin7 = clock();for (auto e : v){s.erase(e);}size_t end7 = clock();cout << "set erase:" << end7 - begin7 << endl;size_t begin8 = clock();for (auto e : v){us.erase(e);}size_t end8 = clock();cout << "unordered_set erase:" << end8 - begin8 << endl;size_t begin9 = clock();for (auto e : v){ht.Erase(e);}size_t end9 = clock();cout << "hash_table Erase:" << end9 - begin9 << endl << endl;}};}
二、unordered_set的实现及测试
#pragma once#include"HashTable.h"namespace aj
{template<class K>class unordered_set{public:struct SetKeyOfT{const K& operator()(const K& k){return k;}};public:// 因为unordered_set中K是不能被改变// 所以普通迭代器也无法改变K的值// 所以让普通迭代器也是const迭代器typedef typename hash_bucket::hash_table<const K, K, SetKeyOfT, HashFunc<K>>::const_iterator iterator;typedef typename hash_bucket::hash_table<const K, K, SetKeyOfT, HashFunc<K>>::const_iterator const_iterator;// 由于_ht.Insert(key)返回的普通迭代器// 而pair<iterator, bool>中的迭代器// 看起来是普通迭代器,但实际上是const迭代器// 所以这里不能直接 return _ht.Insert(key)pair<iterator, bool> insert(const K& key){auto tmp = _ht.Insert(key);return make_pair(iterator(tmp.first._node, tmp.first._pht, tmp.first.hashi), tmp.second);}// 由于这里普通迭代器也是const迭代器// 那么这里只保留const迭代器版本/*iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}*/const_iterator begin()const{return _ht.begin();}const_iterator end()const{return _ht.end();}bool erase(const K& key){return _ht.Erase(key);}iterator find(const K& key){return _ht.Find(key);}void clear(){_ht.Clear();}size_t size(){return _ht.Size();}void swap(unordered_set<K>& us){_ht.Swap(us._ht);}private:// unordered_set中K是不能被改变的hash_bucket::hash_table <const K, K , SetKeyOfT , HashFunc<K>> _ht;};void test_unordered_set(){// 17:05/*unordered_set<int> us;us.insert(5);us.insert(15);us.insert(52);us.insert(3);*///unordered_set<int>::iterator it = us.begin();//while (it != us.end())//{// // *it += 5;// cout << *it << " ";// ++it;//}//cout << endl;unordered_set<int> us1;us1.insert(1);us1.insert(2);us1.insert(3);us1.insert(4);unordered_set<int> us2;us2.insert(10);us2.insert(20);us2.insert(30);us2.insert(40);for (auto e : us1){cout << e << " ";}cout << endl;for (auto e : us2){cout << e << " ";}cout << endl;us1.swap(us2);for (auto e : us1){cout << e << " ";}cout << endl;for (auto e : us2){cout << e << " ";}cout << endl;}
}
三、unordered_map的实现及测试
#pragma once#include"HashTable.h"namespace aj
{template<class K , class V>class unordered_map{public:struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::hash_table<K, pair<const K, V>, MapKeyOfT, HashFunc<K>>::iterator iterator;typedef typename hash_bucket::hash_table<K, pair<const K, V>, MapKeyOfT, HashFunc<K>>::const_iterator const_iterator;pair<iterator,bool> insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin()const{return _ht.begin();}const_iterator end()const{return _ht.end();}V& operator[](const K& key){auto tmp = _ht.Insert(make_pair(key,V()));return tmp.first->second;}const V& operator[](const K& key)const{auto tmp = _ht.Insert(make_pair(key, V()));return tmp.first->second;}bool erase(const K& key){return _ht.Erase(key);}iterator find(const K& key){return _ht.Find(key);}void clear(){_ht.Clear();}size_t size(){return _ht.Size();}void swap(unordered_map<K,V>& um){_ht.Swap(um._ht);}private:// unordered_map中K是不能被改变的hash_bucket::hash_table <K, pair<const K, V>, MapKeyOfT , HashFunc<K>> _ht;};void test_unordered_map1(){unordered_map<int, int> um;um.insert(make_pair(1, 1));um.insert(make_pair(2, 2));um.insert(make_pair(3, 3));um.insert(make_pair(4, 4));unordered_map<int, int>::iterator it = um.begin();while (it != um.end()){cout << it->first << ' ' << it->second << endl;++it;}}void test_unordered_map2(){unordered_map<string, string> dict;dict.insert(make_pair("sort", "排序"));dict.insert(make_pair("insert", "插入"));dict.insert(make_pair("string", "字符串"));for (auto& kv : dict){// kv.first += 'x';kv.second += 'x';cout << kv.first << ":" << kv.second << endl;}cout << endl;string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };unordered_map<string, int> count_map;for (auto& e : arr){count_map[e]++;}for (auto& kv : count_map){cout << kv.first << ":" << kv.second << endl;}cout << endl;}
}
结尾
到目前已经将哈希的大部分内容讲述完了,那么下一篇文章将继续完善哈希,讲述位图和布隆过滤器。
如果有什么建议和疑问,或是有什么错误,大家可以在评论区中提出。
希望大家以后也能和我一起进步!!🌹🌹
如果这篇文章对你有用的话,希望大家给一个三连支持一下!!🌹🌹