123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- // Copyright (c) 2013 Baidu.com, Inc. All Rights Reserved
- //
- // This closed addressing hash-map puts first linked node in bucket array
- // directly to save an extra memory indirection. As a result, this map yields
- // close performance to raw array on nearly all operations, probably being the
- // fastest hashmap for small-sized key/value ever.
- //
- // Performance comparisons between several maps:
- // [ value = 8 bytes ]
- // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/108/55/58
- // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/123/55/37
- // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/92/55/54
- // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/67/51/35
- // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/100/66/54
- // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/72/55/35
- // [ value = 32 bytes ]
- // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/108/56/56
- // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/77/53/38
- // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/94/54/53
- // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/66/49/36
- // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 13/106/62/54
- // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/69/53/36
- // [ value = 128 bytes ]
- // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 31/182/96/96
- // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/117/51/44
- // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 29/191/100/97
- // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/100/49/44
- // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 30/184/113/114
- // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/99/52/43
- // [ value = 8 bytes ]
- // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/171/108/60
- // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/158/126/37
- // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/159/117/54
- // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/153/135/36
- // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 12/223/180/55
- // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/237/210/48
- // [ value = 32 bytes ]
- // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 16/179/108/57
- // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/157/120/38
- // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 15/168/127/54
- // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/164/135/39
- // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 19/241/201/56
- // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/235/218/54
- // [ value = 128 bytes ]
- // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/242/154/97
- // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/185/119/56
- // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/262/182/99
- // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/215/157/66
- // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 44/330/278/114
- // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/307/242/90
- // [ value = 8 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/51/52/13
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/98/82/14
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/175/170/14
- // [ value = 32 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/52/52/14
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/84/82/13
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/164/156/14
- // [ value = 128 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/54/53/14
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/90/13
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/178/185/14
- // [ value = 8 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/51/49/14
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/86/94/14
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/171/14
- // [ value = 32 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/51/53/14
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/98/82/13
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/163/156/14
- // [ value = 128 bytes ]
- // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/55/53/14
- // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/89/13
- // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/185/14
- //
- // Author: Ge,Jun (gejun@baidu.com)
- // Date: Wed Nov 27 12:59:20 CST 2013
- #ifndef BASE_FLAT_MAP_H
- #define BASE_FLAT_MAP_H
- #include <stdint.h>
- #include <functional>
- #include <iostream> // std::ostream
- #include "base/type_traits.h"
- #include "base/logging.h"
- #include "base/find_cstr.h"
- #include "base/single_threaded_pool.h" // SingleThreadedPool
- #include "base/containers/hash_tables.h" // hash<>
- #include "base/bit_array.h" // bit_array_*
- #include "base/strings/string_piece.h" // StringPiece
- namespace base {
- template <typename _Map, typename _Element> class FlatMapIterator;
- template <typename _Map, typename _Element> class SparseFlatMapIterator;
- template <typename K, typename T> struct FlatMapElement;
- struct FlatMapVoid {}; // Replace void which is not constructible.
- template <typename K> struct DefaultHasher;
- template <typename K> struct DefaultEqualTo;
- struct BucketInfo {
- size_t longest_length;
- double average_length;
- };
- // NOTE: Objects stored in FlatMap MUST be copyable.
- template <typename _K, typename _T,
- // Compute hash code from key.
- // Use public/murmurhash3 to make better distributions.
- typename _Hash = DefaultHasher<_K>,
- // Test equivalence between stored-key and passed-key.
- // stored-key is always on LHS, passed-key is always on RHS.
- typename _Equal = DefaultEqualTo<_K>,
- bool _Sparse = false>
- class FlatMap {
- public:
- typedef _K key_type;
- typedef _T mapped_type;
- typedef FlatMapElement<_K, _T> Element;
- typedef typename Element::value_type value_type;
- typedef typename conditional<
- _Sparse, SparseFlatMapIterator<FlatMap, value_type>,
- FlatMapIterator<FlatMap, value_type> >::type iterator;
- typedef typename conditional<
- _Sparse, SparseFlatMapIterator<FlatMap, const value_type>,
- FlatMapIterator<FlatMap, const value_type> >::type const_iterator;
- typedef _Hash hasher;
- typedef _Equal key_equal;
- struct PositionHint {
- size_t nbucket;
- size_t offset;
- bool at_entry;
- key_type key;
- };
-
- FlatMap(const hasher& hashfn = hasher(), const key_equal& eql = key_equal());
- ~FlatMap();
- FlatMap(const FlatMap& rhs);
- void operator=(const FlatMap& rhs);
- void swap(FlatMap & rhs);
- // Must be called to initialize this map, otherwise insert/operator[]
- // crashes, and seek/erase fails.
- // `nbucket' is the initial number of buckets. `load_factor' is the
- // maximum value of size()*100/nbucket, if the value is reached, nbucket
- // will be doubled and all items stored will be rehashed which is costly.
- // Choosing proper values for these 2 parameters reduces costs.
- int init(size_t nbucket, u_int load_factor = 80);
-
- // Insert a pair of |key| and |value|. If size()*100/bucket_count() is
- // more than load_factor(), a resize() will be done.
- // Returns address of the inserted value, NULL on error.
- mapped_type* insert(const key_type& key, const mapped_type& value);
- // Remove |key| and the associated value
- // Returns: 1 on erased, 0 otherwise.
- template <typename K2> size_t erase(const K2& key);
-
- // Remove all items. Allocated spaces are NOT returned by system.
- void clear();
- // Remove all items and return all allocated spaces to system.
- void clear_and_reset_pool();
-
- // Search for the value associated with |key|
- // Returns: address of the value
- template <typename K2> mapped_type* seek(const K2& key) const;
- // Get the value associated with |key|. If |key| does not exist,
- // insert with a default-constructed value. If size()*100/bucket_count()
- // is more than load_factor, a resize will be done.
- // Returns reference of the value
- mapped_type& operator[](const key_type& key);
- // Resize this map. This is optional because resizing will be triggered by
- // insert() or operator[] if there're too many items.
- // Returns successful or not.
- bool resize(size_t nbucket);
-
- // Iterators
- iterator begin();
- iterator end();
- const_iterator begin() const;
- const_iterator end() const;
- // Iterate FlatMap inconsistently in more-than-one passes. This is used
- // in multi-threaded environment to divide the critical sections of
- // iterating big maps into smaller ones. "inconsistently" means that:
- // * elements added during iteration may be missed.
- // * some elements may be iterated more than once.
- // * iteration is restarted at beginning when the map is resized.
- // Example: (copying all keys in multi-threaded environment)
- // LOCK;
- // size_t n = 0;
- // for (Map::const_iterator it = map.begin(); it != map.end(); ++it) {
- // if (++n >= 256/*max iterated one pass*/) {
- // Map::PositionHint hint;
- // map.save_iterator(it, &hint);
- // n = 0;
- // UNLOCK;
- // LOCK;
- // it = map.restore_iterator(hint);
- // if (it == map.begin()) { // resized
- // keys->clear();
- // }
- // if (it == map.end()) {
- // break;
- // }
- // }
- // keys->push_back(it->first);
- // }
- // UNLOCK;
- void save_iterator(const const_iterator&, PositionHint*) const;
- const_iterator restore_iterator(const PositionHint&) const;
- // True if init() was successfully called.
- bool initialized() const { return _buckets != NULL; }
- bool empty() const { return _size == 0; }
- size_t size() const { return _size; }
- size_t bucket_count() const { return _nbucket; }
- u_int load_factor () const { return _load_factor; }
- // Returns #nodes of longest bucket in this map. This scans all buckets.
- BucketInfo bucket_info() const;
- struct Bucket {
- explicit Bucket(const _K& k) : next(NULL)
- { new (element_spaces) Element(k); }
- Bucket(const Bucket& other) : next(NULL)
- { new (element_spaces) Element(other.element()); }
- bool is_valid() const { return next != (const Bucket*)-1UL; }
- void set_invalid() { next = (Bucket*)-1UL; }
- // NOTE: Only be called when in_valid() is true.
- Element& element() {
- void* spaces = element_spaces; // Suppress strict-aliasing
- return *reinterpret_cast<Element*>(spaces);
- }
- const Element& element() const {
- const void* spaces = element_spaces;
- return *reinterpret_cast<const Element*>(spaces);
- }
- Bucket* next;
- char element_spaces[sizeof(Element)];
- };
- private:
- template <typename _Map, typename _Element> friend class FlatMapIterator;
- template <typename _Map, typename _Element> friend class FlatMapSparseIterator;
- // True if buckets need to be resized before holding `size' elements.
- inline bool is_too_crowded(size_t size) const
- { return size * 100 >= _nbucket * _load_factor; }
-
- size_t _size;
- size_t _nbucket;
- Bucket* _buckets;
- uint64_t* _thumbnail;
- u_int _load_factor;
- hasher _hashfn;
- key_equal _eql;
- SingleThreadedPool<sizeof(Bucket), 1024, 16> _pool;
- };
- template <typename _K,
- typename _Hash = DefaultHasher<_K>,
- typename _Equal = DefaultEqualTo<_K>,
- bool _Sparse = false>
- class FlatSet {
- public:
- typedef FlatMap<_K, FlatMapVoid, _Hash, _Equal, _Sparse> Map;
- typedef typename Map::key_type key_type;
- typedef typename Map::value_type value_type;
- typedef typename Map::Bucket Bucket;
- typedef typename Map::iterator iterator;
- typedef typename Map::const_iterator const_iterator;
- typedef typename Map::hasher hasher;
- typedef typename Map::key_equal key_equal;
-
- FlatSet(const hasher& hashfn = hasher(), const key_equal& eql = key_equal())
- : _map(hashfn, eql) {}
- void swap(FlatSet & rhs) { _map.swap(rhs._map); }
- int init(size_t nbucket, u_int load_factor = 80)
- { return _map.init(nbucket, load_factor); }
- const void* insert(const key_type& key)
- { return _map.insert(key, FlatMapVoid()); }
- template <typename K2>
- size_t erase(const K2& key) { return _map.erase(key); }
- void clear() { return _map.clear(); }
- void clear_and_reset_pool() { return _map.clear_and_reset_pool(); }
- template <typename K2>
- const void* seek(const K2& key) const { return _map.seek(key); }
- bool resize(size_t nbucket) { return _map.resize(nbucket); }
-
- iterator begin() { return _map.begin(); }
- iterator end() { return _map.end(); }
- const_iterator begin() const { return _map.begin(); }
- const_iterator end() const { return _map.end(); }
- bool initialized() const { return _map.initialized(); }
- bool empty() const { return _map.empty(); }
- size_t size() const { return _map.size(); }
- size_t bucket_count() const { return _map.bucket_count(); }
- u_int load_factor () const { return _map.load_factor(); }
- BucketInfo bucket_info() const { return _map.bucket_info(); }
- private:
- Map _map;
- };
- template <typename _K, typename _T,
- typename _Hash = DefaultHasher<_K>,
- typename _Equal = DefaultEqualTo<_K> >
- class SparseFlatMap : public FlatMap<_K, _T, _Hash, _Equal, true> {
- };
- template <typename _K,
- typename _Hash = DefaultHasher<_K>,
- typename _Equal = DefaultEqualTo<_K> >
- class SparseFlatSet : public FlatSet<_K, _Hash, _Equal, true> {
- };
- // Implement FlatMapElement
- template <typename K, typename T>
- class FlatMapElement {
- public:
- typedef std::pair<const K, T> value_type;
- // NOTE: Have to initialize _value in this way which is treated by GCC
- // specially that _value is zeroized(POD) or constructed(non-POD). Other
- // methods do not work. For example, if we put _value into the std::pair
- // and do initialization by calling _pair(k, T()), _value will be copy
- // constructed from the defaultly constructed instance(not zeroized for
- // POD) which is wrong generally.
- explicit FlatMapElement(const K& k) : _key(k), _value(T()) {}
- // ^^^^^^^^^^^
- const K& first_ref() const { return _key; }
- T& second_ref() { return _value; }
- value_type& value_ref() { return *reinterpret_cast<value_type*>(this); }
- inline static const K& first_ref_from_value(const value_type& v)
- { return v.first; }
- inline static const T& second_ref_from_value(const value_type& v)
- { return v.second; }
- private:
- const K _key;
- T _value;
- };
- template <typename K>
- class FlatMapElement<K, FlatMapVoid> {
- public:
- typedef const K value_type;
- explicit FlatMapElement(const K& k) : _key(k) {}
- const K& first_ref() const { return _key; }
- FlatMapVoid& second_ref() { return second_ref_from_value(_key); }
- value_type& value_ref() { return _key; }
- inline static const K& first_ref_from_value(value_type& v) { return v; }
- inline static FlatMapVoid& second_ref_from_value(value_type&) {
- static FlatMapVoid dummy;
- return dummy;
- }
- private:
- K _key;
- };
- // Implement DefaultHasher and DefaultEqualTo
- template <typename K>
- struct DefaultHasher : public BASE_HASH_NAMESPACE::hash<K> {
- };
- template <>
- struct DefaultHasher<std::string> {
- std::size_t operator()(const base::StringPiece& s) const {
- std::size_t result = 0;
- for (base::StringPiece::const_iterator
- i = s.begin(); i != s.end(); ++i) {
- result = result * 101 + *i;
- }
- return result;
- }
- std::size_t operator()(const char* s) const {
- std::size_t result = 0;
- for (; *s; ++s) {
- result = result * 101 + *s;
- }
- return result;
- }
- std::size_t operator()(const std::string& s) const {
- std::size_t result = 0;
- for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
- result = result * 101 + *i;
- }
- return result;
- }
- };
- template <typename K>
- struct DefaultEqualTo : public std::equal_to<K> {
- };
- template <>
- struct DefaultEqualTo<std::string> {
- bool operator()(const std::string& s1, const std::string& s2) const
- { return s1 == s2; }
- bool operator()(const std::string& s1, const base::StringPiece& s2) const
- { return s1 == s2; }
- bool operator()(const std::string& s1, const char* s2) const
- { return s1 == s2; }
- };
- // find_cstr and find_lowered_cstr
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key) {
- return m.seek(key);
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- _T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key) {
- return m.seek(key);
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key, size_t length) {
- return m.seek(base::StringPiece(key, length));
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- _T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key, size_t length) {
- return m.seek(base::StringPiece(key, length));
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- const _T* find_lowered_cstr(
- const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key) {
- return m.seek(*tls_stringmap_temp.get_lowered_string(key));
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- _T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key) {
- return m.seek(*tls_stringmap_temp.get_lowered_string(key));
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- const _T* find_lowered_cstr(
- const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key, size_t length) {
- return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
- }
- template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
- _T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
- const char* key, size_t length) {
- return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
- }
- } // namespace baidu
- #include "base/containers/flat_map_inl.h"
- #endif //BASE_FLAT_MAP_H
|