当前位置：首页 > news >正文

用哈希表封装Myunordered_map和Myunordered_set

news 2025/7/28 6:17:19

set/map和unordered_set/map的区别

1.源码及框架分析

SGI-STL30版本源代码中没有unordered_map和unordered_set，SGI-STL30版本是C++11之前的STL版本，这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表，只容器的名字是hash_map和hash_set，他是作为⾮标准的容器出现的，⾮标准是指⾮C++标准规定必须实现的，源代码在hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h中

hash_map和hash_set的实现结构框架核⼼部分截取出来如下：

// stl_hash_set
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
class hash_set
{
private:
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
};// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
class hash_map
{
private:
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
};
// stl_hashtable.h
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey,
class Alloc>
class hashtable {
public:
typedef Key key_type;
typedef Value value_type;
typedef HashFcn hasher;
typedef EqualKey key_equal;
private:
hasher hash;
key_equal equals;
ExtractKey get_key;
typedef __hashtable_node<Value> node;
vector<node*,Alloc> buckets;
size_type num_elements;
public:
typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
Alloc> iterator;
pair<iterator, bool> insert_unique(const value_type& obj);
const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{
__hashtable_node* next;
Value val;
};

通过源码可以看到，结构上hash_map和hash_set跟map和set的完全类似，复⽤同⼀个hashtable实现key和key/value结构，hash_set传给hash_table的是两个key，hash_map传给hash_table的是pair<const key, value>。

2.模拟实现unordered_map和unordered_set

实现步骤：

1.实现哈希表

2.封装unordered_set和unordered_map的框架解决KeyOfT

3.实现iterator和const_iterator

4.key不支持修改的问题

5.operator[]的实现

2.1 实现出复用哈希表的框架，并支持insert

我们这里相比源码调整一下，key参数就用K，value参数就要V，哈希表中数据类型使用T。

其次跟map和set相比而言unordered_map和unordered_set的模拟实现类结构复杂一点，但是大框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数到底是K，还是pair<K,V>，那么insert内部进行插入时要用K对象转换成整形取模和K比较相等，因为pair的value不参与计算取模，且默认支持的是key和value一起比较相等，我们需要时的任何时候只需要比较K对象，所以我们在unordered_map和unordered_set层分别实现一个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT，然后HashTable中通过KeyOfT仿函数取出T类型中的K对象，再转换成整形取模和K比较相等。

unordered_map

#include "hashtable.h"namespace wxw
{template<class K, class V, class Hash = HashFunc<K>>struct unordered_map{struct MapKeyofT{const K& operator()(const pair<K, V>& kv){return kv.first;}};public:bool insert(const pair<K, V>& kv){return _ht.Insert(kv);}private:hash_bucket::HashTable<K, pair<K, V>, MapKeyofT, Hash> _ht;};
}

unordered_set

#include "hashtable.h"namespace wxw
{template<class K, class Hash = HashFunc<K>>class unordered_set{struct SetKeyofT{const K& operator()(const K& key){return key;}};public:bool insert(const K& key){return _ht.Insert(key);}private:hash_bucket::HashTable<K, K, SetKeyofT, Hash> _ht;};
}

HashTable

namespace hash_bucket
{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T & data):_data(data),_next(nullptr){}};template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>class HashTable{typedef HashNode<T> Node;public:HashTable():_tables(__stl_next_prime(0)),_n(0){}~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}_n = 0;}bool Insert(const T& data){KeyOfT kot;Hash hash;if (Find(kot(data)))return false;if (_n == _tables.size())//扩容{vector<Node*> newht(__stl_next_prime(_tables.size() + 1));for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;size_t hashi = hash(kot(cur->_data)) % newht.size();cur->_next = newht[hashi];newht[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newht);}size_t hashi = hash(kot(data)) % _tables.size();Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;_n++;return true;}Node* Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return cur;}cur = cur->_next;}return nullptr;}bool Erase(const K& key){Hash hash;KeyOfT kot;size_t hashi = hash(key) % _tables.size();Node* prev = nullptr;//前一个节点Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){if (prev == nullptr)//头结点{_tables[hashi] = cur->_next;}else//中间节点{prev->_next = cur->_next;}delete cur;--_n;return true;}else{prev = cur;cur = cur->_next;}}return false;}private:vector<Node*> _tables;size_t _n = 0;};
}

2.2 iterator的实现

iterator实现思路

iterator实现的大框架跟list的iterator思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器。
这里的难点是operator++的实现。iterator中有一个指向节点的指针，如果当前桶下面还有节点，则节点的指针指向下一个结点即可。如果当前桶走完了，则需要想办法计算找到下一个桶。这里的难点反而是结构设计的问题，参考上面的源码，我们可以看到iterator中除了有节点的指针，还有哈希表对象的指针，这样当桶走完了，要计算下一个桶就相对容易多了，用key值计算出当前桶位置，依次往后找下一个不为空的桶即可。
begin()返回第一个桶第一个节点指针构造的迭代器，end()返回迭代器可以用空表示。
unordered_set的iterator也不支持修改，我们把unordered_set的第二个模版参数改成const K即可，HashTable<K, const K, SetKeyOfT, Hash> _ht。
unordered_map的iterator也不支持修改key但是可以修改value，我们把unordered_map的第二个模版参数pair的第一次参数改为const K即可， HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht。

封装HTIterator

//HashTable 和 HTIterator互相调用，所以要前置声明一下，前置声明不需要加缺省值template<class K, class T, class KeyOfT, class Hash>class HashTable;template<class K, class T, class Ref, class Ptr,class KeyOfT, class Hash>struct HTIterator{typedef HashNode<T> Node;typedef HashTable<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;Node* _node;HT* _ht;HTIterator(Node* node, HT* ht):_node(node),_ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const Self& s){return _node != s._node;}Self& operator++(){if (_node->_next){//当前桶还有数据，找下一个不为空的桶_node = _node->_next;}else{//当前桶都走完了，找下一个不为空的桶KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}if (hashi == _ht->_tables.size())_node = nullptr;}return *this;}};

在HTIterator中访问不到_tables，这里需要加友元声明

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>class HashTable{//友元声明template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator Begin(){if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}return End();}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}HashTable():_tables(__stl_next_prime(0)),_n(0){}~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}_n = 0;}
}

unordered_set

typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}bool insert(const K& key){return _ht.Insert(key);}private:hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;

unordered_map

typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIetartor const_iterator;bool insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;

2.3 map支持[]

unordered_map要支持[]主要修改insert返回值，修改HashTable中的insert返回值为pair<Iterator, bool> Insert(const T& data)。

pair<Iterator, bool> Insert(const T& data){KeyOfT kot;Hash hash;Iterator it = Find(kot(data));if (it != End())return { it, false };if (_n == _tables.size())//扩容{vector<Node*> newht(__stl_next_prime(_tables.size() + 1));for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;size_t hashi = hash(kot(cur->_data)) % newht.size();cur->_next = newht[hashi];newht[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newht);}size_t hashi = hash(kot(data)) % _tables.size();Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;_n++;return { Iterator(newnode, this), true };}Iterator Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return Iterator(cur, this);}cur = cur->_next;}return End();}

V& operator[](const K& key){pair<iterator, bool> ret = insert({ key, V() });return ret.first->second;}

ret.first是pair<iterator,bool>中的iterator，然后iterator->second，因为上面重载了->，就直接取出了HTIterator中_node->_data的地址，second就是_data的第二个元素。

3.unordered_set

#pragma once
#include "hashtable.h"
#include<iostream>
using namespace std;
namespace wxw
{template<class K, class Hash = HashFunc<K>>class unordered_set{struct SetKeyOfT{const K& operator()(const K& key){return key;}};public:typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}pair<iterator, bool> insert(const K& key){return _ht.Insert(key);}iterator find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;};void print(const unordered_set<int>& s){unordered_set<int>::const_iterator it = s.begin();while (it != s.end()){cout << *it << " ";++it;}cout << endl;}void test_set(){int a[] = { 3, 11, 86, 7, 88, 1, 881, 6, 4, 66 };unordered_set<int> s;for (auto e : a){s.insert(e);}/*unordered_set<int>::iterator it = s.begin();while (it != s.end()){cout << *it << " ";++it;}cout << endl;for (auto e : s){cout << e << " ";}cout << endl;*/print(s);}
}

4.unordered_map

#pragma once
#include "hashtable.h"namespace wxw
{template<class K, class V, class Hash = HashFunc<K>>class unordered_map{struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}V& operator[](const K& key){pair<iterator, bool> ret = insert({ key, V() });return ret.first->second;}pair<iterator, bool> insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;};void test_map(){unordered_map<string, string> dict;dict.insert({ "sort", "排序" });dict.insert({ "string", "字符串" });dict.insert({ "left", "左" });dict.insert({ "right", "右边" });dict["left"] = "左边，剩余";dict["insert"] = "插入";dict["string"];unordered_map<string, string>::iterator it = dict.begin();while (it != dict.end()){// 不能修改first，可以修改second//it->first += 'x';it->second += 'x';cout << it->first << ":" << it->second << endl;++it;}cout << endl;for (auto& e : dict){cout << e.first << ":" << e.second << endl;}}
}

5.hashtable

namespace hash_bucket
{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T & data):_data(data),_next(nullptr){}};//HashTable 和 HTIterator互相调用，所以要前置声明一下，前置声明不需要加缺省值template<class K, class T, class KeyOfT, class Hash>class HashTable;template<class K, class T, class Ref, class Ptr,class KeyOfT, class Hash>struct HTIterator{typedef HashNode<T> Node;typedef HashTable<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;Node* _node;const HT* _ht;//如果不加const 在用const迭代器时会导致权限放大，但是权限不可以放大，所以要加constHTIterator(Node* node, const HT* ht):_node(node),_ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const Self& s){return _node != s._node;}Self& operator++(){if (_node->_next){//当前桶还有数据，找下一个不为空的桶_node = _node->_next;}else{//当前桶都走完了，找下一个不为空的桶KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}if (hashi == _ht->_tables.size())_node = nullptr;}return *this;}};template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>class HashTable{//友元声明template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator Begin(){if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}return End();}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}HashTable():_tables(__stl_next_prime(0)),_n(0){}~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}_n = 0;}pair<Iterator, bool> Insert(const T& data){KeyOfT kot;Hash hash;Iterator it = Find(kot(data));if (it != End())return { it, false };if (_n == _tables.size())//扩容{vector<Node*> newht(__stl_next_prime(_tables.size() + 1));for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;size_t hashi = hash(kot(cur->_data)) % newht.size();cur->_next = newht[hashi];newht[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newht);}size_t hashi = hash(kot(data)) % _tables.size();Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;_n++;return { Iterator(newnode, this), true };}Iterator Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return Iterator(cur, this);}cur = cur->_next;}return End();}bool Erase(const K& key){Hash hash;KeyOfT kot;size_t hashi = hash(key) % _tables.size();Node* prev = nullptr;//前一个节点Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){if (prev == nullptr)//头结点{_tables[hashi] = cur->_next;}else//中间节点{prev->_next = cur->_next;}delete cur;--_n;return true;}else{prev = cur;cur = cur->_next;}}return false;}private:vector<Node*> _tables;size_t _n = 0;};
}

查看全文

http://www.xdnf.cn/news/1196713.html