一、源码及框架分析
SGI-STL30版本源代码中没有unordered_map和unordered_set,SGI-STL30版本是C++11之前的STL版本,这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表,只容器的名字是hash_map和hash_set,他是作为非标准的容器出现的,非标准是指非C++标准规定必须实现的,源代码在hash_map /hash_set / stl_hash_map / stl_hash_set / stl_hashtable.h 中,hash_map和hash_set的实现结构框架核心部分截取出来如下:
// stl_hash_set
template <class Value, class HashFcn = hash<Value>,class EqualKey = equal_to<Value>,class Alloc = alloc>
class hash_set
{
private:typedef hashtable<Value, Value, HashFcn, identity<Value>, EqualKey, Alloc> ht;ht rep;
public:typedef typename ht::key_type key_type;typedef typename ht::value_type value_type;typedef typename ht::hasher hasher;typedef typename ht::key_equal key_equal;typedef typename ht::const_iterator iterator;typedef typename ht::const_iterator const_iterator;hasher hash_funct() const { return rep.hash_funct(); }key_equal key_eq() const { return rep.key_eq(); }
};
// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,class EqualKey = equal_to<Key>, class Alloc = alloc>
class hash_map
{
private:typedef hashtable<pair<const Key, T>, Key, HashFcn,select1st<pair<const Key, T> >, EqualKey, Alloc> ht;ht rep;
public:typedef typename ht::key_type key_type;typedef T data_type;typedef T mapped_type;typedef typename ht::value_type value_type;typedef typename ht::hasher hasher;typedef typename ht::key_equal key_equal;typedef typename ht::iterator iterator;typedef typename ht::const_iterator const_iterator;
};// stl_hashtable.h
template <class Value, class Key, class HashFcn, class ExtractKey, class EqualKey, class Alloc>
class hashtable {
public:typedef Key key_type;typedef Value value_type;typedef HashFcn hasher;typedef EqualKey key_equal;
private:hasher hash;key_equal equals;ExtractKey get_key;typedef __hashtable_node<Value> node;vector<node*,Alloc> buckets;size_type num_elements;
public:typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> iterator;pair<iterator, bool> insert_unique(const value_type& obj);const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{__hashtable_node* next;Value val;
};
• 这里我们就不再画图分析了,通过源码可以看到,结构上hash_map和hash_set跟map和set的完全类似,复用同一个hashtable实现key和key/value结构,hash_set传给hash_table的是两个key,hash_map传给hash_table的是pair<const key, value>。
• 需要注意的是源码里面跟map/set源码类似,命名风格比较乱,这里比map和set还乱,下面我们模拟一份自己的出来,就按自己的风格走了。
二、模拟实现unordered_set 和unordered_map
2.1 实现出符合要求的哈希表
2.1.1 iterator的实现
iterator实现思路分析
• iterator实现的大框架跟list的iterator思路是一致的,用一个类型封装结点的指针,再通过重载运算符实现,迭代器像指针一样访问的行为,要注意的是哈希表的迭代器是单向迭代器。
• 这里的难点是operator++的实现。iterator中有一个指向结点的指针,如果当前桶下面还有结点,则结点的指针指向下一个结点即可。如果当前桶走完了,则需要想办法计算找到下一个桶。这里的难点是反而是结构设计的问题,我们想到iterator中除了有结点的指针,还有哈希表对象的指针,这样当前桶走完了,要计算下一个桶就相对容易多了,用key值计算出当前桶位置,依次往后找下一个不为空的桶即可。
• begin()返回第一个桶中第一个节点指针构造的迭代器,这里end()返回迭代器可以用空表示。
• unordered_set的iterator也不支持修改,我们把unordered_set的第二个模板参数改成const K即可, HashTable<K, const K, SetKeyOfT, Hash> _ht;
• unordered_map的iterator不支持修改key但是可以修改value,我们把unordered_map的第二个模板参数pair的第一个参数改成const K即可, HashTable<K, pair<const K, V>,MapKeyOfT, Hash> _ht;
2.2.2 改造后的哈希表代码
#pragma once#include<iostream>
#include<vector>
#include<cassert>
#include<algorithm>
using namespace std;//状态
enum State
{Empty,Delete,Exit
};//哈希表中的数据
template<class T>
struct HashNode
{T _t;HashNode* next;HashNode(const T& t) :_t(t), next(nullptr) {}
};template<class K, class T, class Ref,class Ptr,class KeyOfT, class Hash>
struct HashIterator
{typedef HashNode<T> Node;typedef HashIterator self;Node* _node;vector<Node*> _ht;HashIterator(Node* node,const vector<Node*>&ht):_node(node),_ht(ht){}self& operator++(){//有下一个节点if (_node->next){_node = _node->next;}else{size_t hashi = Hash()(KeyOfT()(_node->_t)) % _ht.size();hashi++;while (hashi < _ht.size()){if (_ht[hashi]) break;hashi++;}if (hashi == _ht.size())_node = nullptr;else_node = _ht[hashi];}return *this;}self operator++(int){self temp(_node);++_node;return temp;}Ptr operator->(){return &_node->_t;}Ref operator*(){return _node->_t;}bool operator!=(const HashIterator& ht){return _node != ht._node;}bool operator==(const HashIterator& ht){return _node == ht._node;}
};template<class K, class T,class KeyOfT, class Hash>
class HashTable
{typedef HashNode<T> Node;
public://迭代器typedef HashIterator<K, T, T&,T*,KeyOfT, Hash> iterator;typedef HashIterator<K, T, const T&,const T*,KeyOfT, Hash> const_iterator;iterator begin(){if(_n==0)return end();size_t i = 0;while ( i<_ht.size()){if(_ht[i])return iterator(_ht[i], _ht);i++;}return end();}const_iterator begin()const{if (_n == 0)return end();size_t i = 0;while (i < _ht.size()){if (_ht[i])return iterator(_ht[i], _ht);i++;}return end();}iterator end(){return iterator(nullptr, _ht);}const_iterator end()const{return iterator(nullptr, _ht);}HashTable(){_ht.resize(10);_n = 0;}//查找iterator find(const K& key){size_t hashi = Hash()(key) % _ht.size();//定位Node* cur = _ht[hashi];while (cur){if (KeyOfT()(cur->_t) == key)return iterator(cur,_ht);cur = cur->next;}return iterator(nullptr, _ht);}pair<iterator,bool> insert(const T& t){iterator ret = find(KeyOfT()(t));if (ret._node)return {ret,false };//扩容if (_n == _ht.size()){vector<Node*> newht(_ht.size() * 2,nullptr);for (int i = 0; i < _ht.size(); i++){Node* cur = _ht[i];while (cur){Node* next = cur->next;size_t hashi = Hash()(KeyOfT()(_ht[i]->_t)) % newht.size();if (newht[i] == nullptr)newht[i] = cur;else{cur->next = newht[i];newht[i] = cur;}_ht[i] = next;}}_ht.swap(newht);}size_t hashi = Hash()(KeyOfT()(t)) % _ht.size();//定位Node* newnode = new Node(t);newnode->next = _ht[hashi];_ht[hashi] = newnode;_n++;return { {newnode,_ht},true };}iterator erase(const K& key){size_t hashi = Hash()(key) % _ht.size();//定位Node* prev = nullptr;Node* cur = _ht[hashi];Node* ret = nullptr;while (cur){if (KeyOfT()(cur->_t) == key){Node* temp = cur;ret = ++temp;if (prev == nullptr){ _ht[hashi] = nullptr;}else{prev->next = cur->next;}delete cur;cur = nullptr;_n--;return{ ret, _ht };}prev = cur;cur = cur->next;}return { nullptr,_ht };}
private:vector<Node*> _ht;size_t _n;
};
2.2 复用哈希表实现unordered_set
#pragma once#include"HashBucket.h"
//对于int 、double、size_t 、int* 等类型
template<class K>
struct HashFunc_set
{size_t operator()(const K& key){return size_t(key);}
};//对于string 的特化处理
template<>
struct HashFunc_set<string>
{size_t operator()(const string& key){size_t ret = 0;for (const auto& e : key)ret = ret * 31 + e;return ret;}
};template<class K,class Hash = HashFunc_set<K>>
class unordered_set
{typedef K T;//和map相称struct KeyOfT{const K& operator()(const T& t){return t;}};typedef typename HashTable<K, T, KeyOfT, Hash>::iterator iterator;typedef typename HashTable<K, T, KeyOfT, Hash>::const_iterator const_iterator;
public:iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin()const{return _ht.begin();}const_iterator end()const{return _ht.end();}iterator find(const K& key){return _ht.find(key);}pair<iterator, bool> insert(const T& t){return _ht.insert(t);}iterator erase(const K& key){return _ht.erase(key);}
private:HashTable<K, T, KeyOfT, Hash> _ht;
};
2.3 复用哈希表实现unordered_map
#pragma once#include"HashBucket.h"
//对于int 、double、size_t 、int* 等类型
template<class K>
struct HashFunc_map
{size_t operator()(const K& key){return size_t(key);}
};//对于string 的特化处理
template<>
struct HashFunc_map<string>
{size_t operator()(const string& key){size_t ret = 0;for (const auto& e : key)ret = ret * 31 + e;return ret;}
};template<class K,class V,class Hash= HashFunc_map<K>>
class unordered_map
{typedef pair<K, V> T;//和map相称struct KeyOfT{const K& operator()(const T& t){return t.first;}};typedef typename HashTable<K, T,KeyOfT, Hash>::iterator iterator;typedef typename HashTable<K, T,KeyOfT, Hash>::const_iterator const_iterator;
public:iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin()const{return _ht.begin();}const_iterator end()const{return _ht.end();}iterator find(const K& key){return _ht.find(key);}pair<iterator, bool> insert(const T& t){return _ht.insert(t);}iterator erase(const K& key){return _ht.erase(key);}V& operator[](const K& key){pair<iterator, bool> ret = insert({ key,V() });return ret.first->second;}
private:HashTable<K, T,KeyOfT,Hash> _ht;
};