SynonymsDict.h 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* ***** BEGIN LICENSE BLOCK *****
  3. * Version: GPL 2.0
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License. You should have
  7. * received a copy of the GPL license along with this program; if you
  8. * did not, you can find it at http://www.gnu.org/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * The Original Code is Coreseek.com code.
  16. *
  17. * Copyright (C) 2007-2008. All Rights Reserved.
  18. *
  19. * Author:
  20. * Li monan <li.monan@gmail.com>
  21. *
  22. * ***** END LICENSE BLOCK ***** */
  23. #ifndef css_SynonymsDict_h
  24. #define css_SynonymsDict_h
  25. #include <string>
  26. #include <map>
  27. #include "darts.h"
  28. #include "csr_mmap.h"
  29. namespace css {
  30. typedef struct _csr_sybarray_trie_tag {
  31. i4 base;
  32. u4 check;
  33. // u4 flag; //used to tell how may features. only low-4bit used now.
  34. size_t offset; // the base offset.
  35. } _csr_sybarray_trie;
  36. /**
  37. * How to find item fast is a real problem here.
  38. * @return the string(utf-8,encoded) of the id.
  39. */
  40. class SynonymsDict {
  41. public:
  42. typedef Darts::DoubleArray::result_pair_type result_pair_type;
  43. typedef struct _tag_result_pair_type {
  44. i4 value;
  45. u1 length;
  46. i4 dict_id;
  47. } Result;
  48. public:
  49. SynonymsDict() : m_file(NULL), array_(NULL) { string_pool = NULL; };
  50. virtual ~SynonymsDict() {
  51. if (m_file) {
  52. csr_munmap_file(m_file);
  53. }
  54. }
  55. virtual int load(const char* filename);
  56. virtual int import(const char* filename);
  57. virtual int save(const char* filename);
  58. virtual const char* exactMatch(const char* key, int len = 0);
  59. virtual const char* maxMatch(const char* key, int& len);
  60. protected:
  61. _csr_mmap_t* m_file;
  62. Darts::DoubleArray m_da;
  63. std::map<std::string, size_t> rKeys;
  64. // std::set<std::string, size_t> rKeys;
  65. std::map<std::string, size_t> lKeys;
  66. size_t m_string_pool_size;
  67. _csr_sybarray_trie* array_;
  68. const char* string_pool;
  69. typedef i4 array_type_;
  70. typedef u4 array_u_type_;
  71. typedef u1 node_u_type_;
  72. inline void set_result(Result& x, i4 r, u1 l) {
  73. x.value = r;
  74. x.length = l;
  75. x.dict_id = 0;
  76. }
  77. inline void set_result(Result& x, i4 r, u1 l, i4 id) {
  78. x.value = r;
  79. x.length = l;
  80. x.dict_id = id;
  81. }
  82. protected:
  83. int exactMatchID(const char* key);
  84. };
  85. } /* End of namespace css */
  86. #endif