flat_map.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. // Copyright (c) 2013 Baidu.com, Inc. All Rights Reserved
  2. //
  3. // This closed addressing hash-map puts first linked node in bucket array
  4. // directly to save an extra memory indirection. As a result, this map yields
  5. // close performance to raw array on nearly all operations, probably being the
  6. // fastest hashmap for small-sized key/value ever.
  7. //
  8. // Performance comparisons between several maps:
  9. // [ value = 8 bytes ]
  10. // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/108/55/58
  11. // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/123/55/37
  12. // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/92/55/54
  13. // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/67/51/35
  14. // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/100/66/54
  15. // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/72/55/35
  16. // [ value = 32 bytes ]
  17. // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/108/56/56
  18. // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/77/53/38
  19. // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/94/54/53
  20. // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/66/49/36
  21. // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 13/106/62/54
  22. // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/69/53/36
  23. // [ value = 128 bytes ]
  24. // Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 31/182/96/96
  25. // Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/117/51/44
  26. // Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 29/191/100/97
  27. // Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/100/49/44
  28. // Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 30/184/113/114
  29. // Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/99/52/43
  30. // [ value = 8 bytes ]
  31. // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/171/108/60
  32. // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/158/126/37
  33. // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/159/117/54
  34. // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/153/135/36
  35. // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 12/223/180/55
  36. // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/237/210/48
  37. // [ value = 32 bytes ]
  38. // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 16/179/108/57
  39. // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/157/120/38
  40. // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 15/168/127/54
  41. // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/164/135/39
  42. // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 19/241/201/56
  43. // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/235/218/54
  44. // [ value = 128 bytes ]
  45. // Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/242/154/97
  46. // Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/185/119/56
  47. // Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/262/182/99
  48. // Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/215/157/66
  49. // Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 44/330/278/114
  50. // Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/307/242/90
  51. // [ value = 8 bytes ]
  52. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/51/52/13
  53. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/98/82/14
  54. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/175/170/14
  55. // [ value = 32 bytes ]
  56. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/52/52/14
  57. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/84/82/13
  58. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/164/156/14
  59. // [ value = 128 bytes ]
  60. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/54/53/14
  61. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/90/13
  62. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/178/185/14
  63. // [ value = 8 bytes ]
  64. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/51/49/14
  65. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/86/94/14
  66. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/171/14
  67. // [ value = 32 bytes ]
  68. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/51/53/14
  69. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/98/82/13
  70. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/163/156/14
  71. // [ value = 128 bytes ]
  72. // Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/55/53/14
  73. // Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/89/13
  74. // Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/185/14
  75. //
  76. // Author: Ge,Jun (gejun@baidu.com)
  77. // Date: Wed Nov 27 12:59:20 CST 2013
  78. #ifndef BASE_FLAT_MAP_H
  79. #define BASE_FLAT_MAP_H
  80. #include <stdint.h>
  81. #include <functional>
  82. #include <iostream> // std::ostream
  83. #include "base/type_traits.h"
  84. #include "base/logging.h"
  85. #include "base/find_cstr.h"
  86. #include "base/single_threaded_pool.h" // SingleThreadedPool
  87. #include "base/containers/hash_tables.h" // hash<>
  88. #include "base/bit_array.h" // bit_array_*
  89. #include "base/strings/string_piece.h" // StringPiece
  90. namespace base {
  91. template <typename _Map, typename _Element> class FlatMapIterator;
  92. template <typename _Map, typename _Element> class SparseFlatMapIterator;
  93. template <typename K, typename T> struct FlatMapElement;
  94. struct FlatMapVoid {}; // Replace void which is not constructible.
  95. template <typename K> struct DefaultHasher;
  96. template <typename K> struct DefaultEqualTo;
  97. struct BucketInfo {
  98. size_t longest_length;
  99. double average_length;
  100. };
  101. // NOTE: Objects stored in FlatMap MUST be copyable.
  102. template <typename _K, typename _T,
  103. // Compute hash code from key.
  104. // Use public/murmurhash3 to make better distributions.
  105. typename _Hash = DefaultHasher<_K>,
  106. // Test equivalence between stored-key and passed-key.
  107. // stored-key is always on LHS, passed-key is always on RHS.
  108. typename _Equal = DefaultEqualTo<_K>,
  109. bool _Sparse = false>
  110. class FlatMap {
  111. public:
  112. typedef _K key_type;
  113. typedef _T mapped_type;
  114. typedef FlatMapElement<_K, _T> Element;
  115. typedef typename Element::value_type value_type;
  116. typedef typename conditional<
  117. _Sparse, SparseFlatMapIterator<FlatMap, value_type>,
  118. FlatMapIterator<FlatMap, value_type> >::type iterator;
  119. typedef typename conditional<
  120. _Sparse, SparseFlatMapIterator<FlatMap, const value_type>,
  121. FlatMapIterator<FlatMap, const value_type> >::type const_iterator;
  122. typedef _Hash hasher;
  123. typedef _Equal key_equal;
  124. struct PositionHint {
  125. size_t nbucket;
  126. size_t offset;
  127. bool at_entry;
  128. key_type key;
  129. };
  130. FlatMap(const hasher& hashfn = hasher(), const key_equal& eql = key_equal());
  131. ~FlatMap();
  132. FlatMap(const FlatMap& rhs);
  133. void operator=(const FlatMap& rhs);
  134. void swap(FlatMap & rhs);
  135. // Must be called to initialize this map, otherwise insert/operator[]
  136. // crashes, and seek/erase fails.
  137. // `nbucket' is the initial number of buckets. `load_factor' is the
  138. // maximum value of size()*100/nbucket, if the value is reached, nbucket
  139. // will be doubled and all items stored will be rehashed which is costly.
  140. // Choosing proper values for these 2 parameters reduces costs.
  141. int init(size_t nbucket, u_int load_factor = 80);
  142. // Insert a pair of |key| and |value|. If size()*100/bucket_count() is
  143. // more than load_factor(), a resize() will be done.
  144. // Returns address of the inserted value, NULL on error.
  145. mapped_type* insert(const key_type& key, const mapped_type& value);
  146. // Remove |key| and the associated value
  147. // Returns: 1 on erased, 0 otherwise.
  148. template <typename K2> size_t erase(const K2& key);
  149. // Remove all items. Allocated spaces are NOT returned by system.
  150. void clear();
  151. // Remove all items and return all allocated spaces to system.
  152. void clear_and_reset_pool();
  153. // Search for the value associated with |key|
  154. // Returns: address of the value
  155. template <typename K2> mapped_type* seek(const K2& key) const;
  156. // Get the value associated with |key|. If |key| does not exist,
  157. // insert with a default-constructed value. If size()*100/bucket_count()
  158. // is more than load_factor, a resize will be done.
  159. // Returns reference of the value
  160. mapped_type& operator[](const key_type& key);
  161. // Resize this map. This is optional because resizing will be triggered by
  162. // insert() or operator[] if there're too many items.
  163. // Returns successful or not.
  164. bool resize(size_t nbucket);
  165. // Iterators
  166. iterator begin();
  167. iterator end();
  168. const_iterator begin() const;
  169. const_iterator end() const;
  170. // Iterate FlatMap inconsistently in more-than-one passes. This is used
  171. // in multi-threaded environment to divide the critical sections of
  172. // iterating big maps into smaller ones. "inconsistently" means that:
  173. // * elements added during iteration may be missed.
  174. // * some elements may be iterated more than once.
  175. // * iteration is restarted at beginning when the map is resized.
  176. // Example: (copying all keys in multi-threaded environment)
  177. // LOCK;
  178. // size_t n = 0;
  179. // for (Map::const_iterator it = map.begin(); it != map.end(); ++it) {
  180. // if (++n >= 256/*max iterated one pass*/) {
  181. // Map::PositionHint hint;
  182. // map.save_iterator(it, &hint);
  183. // n = 0;
  184. // UNLOCK;
  185. // LOCK;
  186. // it = map.restore_iterator(hint);
  187. // if (it == map.begin()) { // resized
  188. // keys->clear();
  189. // }
  190. // if (it == map.end()) {
  191. // break;
  192. // }
  193. // }
  194. // keys->push_back(it->first);
  195. // }
  196. // UNLOCK;
  197. void save_iterator(const const_iterator&, PositionHint*) const;
  198. const_iterator restore_iterator(const PositionHint&) const;
  199. // True if init() was successfully called.
  200. bool initialized() const { return _buckets != NULL; }
  201. bool empty() const { return _size == 0; }
  202. size_t size() const { return _size; }
  203. size_t bucket_count() const { return _nbucket; }
  204. u_int load_factor () const { return _load_factor; }
  205. // Returns #nodes of longest bucket in this map. This scans all buckets.
  206. BucketInfo bucket_info() const;
  207. struct Bucket {
  208. explicit Bucket(const _K& k) : next(NULL)
  209. { new (element_spaces) Element(k); }
  210. Bucket(const Bucket& other) : next(NULL)
  211. { new (element_spaces) Element(other.element()); }
  212. bool is_valid() const { return next != (const Bucket*)-1UL; }
  213. void set_invalid() { next = (Bucket*)-1UL; }
  214. // NOTE: Only be called when in_valid() is true.
  215. Element& element() {
  216. void* spaces = element_spaces; // Suppress strict-aliasing
  217. return *reinterpret_cast<Element*>(spaces);
  218. }
  219. const Element& element() const {
  220. const void* spaces = element_spaces;
  221. return *reinterpret_cast<const Element*>(spaces);
  222. }
  223. Bucket* next;
  224. char element_spaces[sizeof(Element)];
  225. };
  226. private:
  227. template <typename _Map, typename _Element> friend class FlatMapIterator;
  228. template <typename _Map, typename _Element> friend class FlatMapSparseIterator;
  229. // True if buckets need to be resized before holding `size' elements.
  230. inline bool is_too_crowded(size_t size) const
  231. { return size * 100 >= _nbucket * _load_factor; }
  232. size_t _size;
  233. size_t _nbucket;
  234. Bucket* _buckets;
  235. uint64_t* _thumbnail;
  236. u_int _load_factor;
  237. hasher _hashfn;
  238. key_equal _eql;
  239. SingleThreadedPool<sizeof(Bucket), 1024, 16> _pool;
  240. };
  241. template <typename _K,
  242. typename _Hash = DefaultHasher<_K>,
  243. typename _Equal = DefaultEqualTo<_K>,
  244. bool _Sparse = false>
  245. class FlatSet {
  246. public:
  247. typedef FlatMap<_K, FlatMapVoid, _Hash, _Equal, _Sparse> Map;
  248. typedef typename Map::key_type key_type;
  249. typedef typename Map::value_type value_type;
  250. typedef typename Map::Bucket Bucket;
  251. typedef typename Map::iterator iterator;
  252. typedef typename Map::const_iterator const_iterator;
  253. typedef typename Map::hasher hasher;
  254. typedef typename Map::key_equal key_equal;
  255. FlatSet(const hasher& hashfn = hasher(), const key_equal& eql = key_equal())
  256. : _map(hashfn, eql) {}
  257. void swap(FlatSet & rhs) { _map.swap(rhs._map); }
  258. int init(size_t nbucket, u_int load_factor = 80)
  259. { return _map.init(nbucket, load_factor); }
  260. const void* insert(const key_type& key)
  261. { return _map.insert(key, FlatMapVoid()); }
  262. template <typename K2>
  263. size_t erase(const K2& key) { return _map.erase(key); }
  264. void clear() { return _map.clear(); }
  265. void clear_and_reset_pool() { return _map.clear_and_reset_pool(); }
  266. template <typename K2>
  267. const void* seek(const K2& key) const { return _map.seek(key); }
  268. bool resize(size_t nbucket) { return _map.resize(nbucket); }
  269. iterator begin() { return _map.begin(); }
  270. iterator end() { return _map.end(); }
  271. const_iterator begin() const { return _map.begin(); }
  272. const_iterator end() const { return _map.end(); }
  273. bool initialized() const { return _map.initialized(); }
  274. bool empty() const { return _map.empty(); }
  275. size_t size() const { return _map.size(); }
  276. size_t bucket_count() const { return _map.bucket_count(); }
  277. u_int load_factor () const { return _map.load_factor(); }
  278. BucketInfo bucket_info() const { return _map.bucket_info(); }
  279. private:
  280. Map _map;
  281. };
  282. template <typename _K, typename _T,
  283. typename _Hash = DefaultHasher<_K>,
  284. typename _Equal = DefaultEqualTo<_K> >
  285. class SparseFlatMap : public FlatMap<_K, _T, _Hash, _Equal, true> {
  286. };
  287. template <typename _K,
  288. typename _Hash = DefaultHasher<_K>,
  289. typename _Equal = DefaultEqualTo<_K> >
  290. class SparseFlatSet : public FlatSet<_K, _Hash, _Equal, true> {
  291. };
  292. // Implement FlatMapElement
  293. template <typename K, typename T>
  294. class FlatMapElement {
  295. public:
  296. typedef std::pair<const K, T> value_type;
  297. // NOTE: Have to initialize _value in this way which is treated by GCC
  298. // specially that _value is zeroized(POD) or constructed(non-POD). Other
  299. // methods do not work. For example, if we put _value into the std::pair
  300. // and do initialization by calling _pair(k, T()), _value will be copy
  301. // constructed from the defaultly constructed instance(not zeroized for
  302. // POD) which is wrong generally.
  303. explicit FlatMapElement(const K& k) : _key(k), _value(T()) {}
  304. // ^^^^^^^^^^^
  305. const K& first_ref() const { return _key; }
  306. T& second_ref() { return _value; }
  307. value_type& value_ref() { return *reinterpret_cast<value_type*>(this); }
  308. inline static const K& first_ref_from_value(const value_type& v)
  309. { return v.first; }
  310. inline static const T& second_ref_from_value(const value_type& v)
  311. { return v.second; }
  312. private:
  313. const K _key;
  314. T _value;
  315. };
  316. template <typename K>
  317. class FlatMapElement<K, FlatMapVoid> {
  318. public:
  319. typedef const K value_type;
  320. explicit FlatMapElement(const K& k) : _key(k) {}
  321. const K& first_ref() const { return _key; }
  322. FlatMapVoid& second_ref() { return second_ref_from_value(_key); }
  323. value_type& value_ref() { return _key; }
  324. inline static const K& first_ref_from_value(value_type& v) { return v; }
  325. inline static FlatMapVoid& second_ref_from_value(value_type&) {
  326. static FlatMapVoid dummy;
  327. return dummy;
  328. }
  329. private:
  330. K _key;
  331. };
  332. // Implement DefaultHasher and DefaultEqualTo
  333. template <typename K>
  334. struct DefaultHasher : public BASE_HASH_NAMESPACE::hash<K> {
  335. };
  336. template <>
  337. struct DefaultHasher<std::string> {
  338. std::size_t operator()(const base::StringPiece& s) const {
  339. std::size_t result = 0;
  340. for (base::StringPiece::const_iterator
  341. i = s.begin(); i != s.end(); ++i) {
  342. result = result * 101 + *i;
  343. }
  344. return result;
  345. }
  346. std::size_t operator()(const char* s) const {
  347. std::size_t result = 0;
  348. for (; *s; ++s) {
  349. result = result * 101 + *s;
  350. }
  351. return result;
  352. }
  353. std::size_t operator()(const std::string& s) const {
  354. std::size_t result = 0;
  355. for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
  356. result = result * 101 + *i;
  357. }
  358. return result;
  359. }
  360. };
  361. template <typename K>
  362. struct DefaultEqualTo : public std::equal_to<K> {
  363. };
  364. template <>
  365. struct DefaultEqualTo<std::string> {
  366. bool operator()(const std::string& s1, const std::string& s2) const
  367. { return s1 == s2; }
  368. bool operator()(const std::string& s1, const base::StringPiece& s2) const
  369. { return s1 == s2; }
  370. bool operator()(const std::string& s1, const char* s2) const
  371. { return s1 == s2; }
  372. };
  373. // find_cstr and find_lowered_cstr
  374. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  375. const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  376. const char* key) {
  377. return m.seek(key);
  378. }
  379. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  380. _T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  381. const char* key) {
  382. return m.seek(key);
  383. }
  384. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  385. const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  386. const char* key, size_t length) {
  387. return m.seek(base::StringPiece(key, length));
  388. }
  389. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  390. _T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  391. const char* key, size_t length) {
  392. return m.seek(base::StringPiece(key, length));
  393. }
  394. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  395. const _T* find_lowered_cstr(
  396. const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  397. const char* key) {
  398. return m.seek(*tls_stringmap_temp.get_lowered_string(key));
  399. }
  400. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  401. _T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  402. const char* key) {
  403. return m.seek(*tls_stringmap_temp.get_lowered_string(key));
  404. }
  405. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  406. const _T* find_lowered_cstr(
  407. const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  408. const char* key, size_t length) {
  409. return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
  410. }
  411. template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
  412. _T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
  413. const char* key, size_t length) {
  414. return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
  415. }
  416. } // namespace baidu
  417. #include "base/containers/flat_map_inl.h"
  418. #endif //BASE_FLAT_MAP_H