mmseg_wrapper.h 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #pragma once
  2. #include <stdint.h>
  3. #include <atomic>
  4. #include <mutex>
  5. #include <set>
  6. #include <string>
  7. #ifdef WIN32
  8. #include "bsd_getopt_win.h"
  9. #else
  10. #include "bsd_getopt.h"
  11. #endif
  12. #include "Segmenter.h"
  13. #include "SegmenterManager.h"
  14. #include "SynonymsDict.h"
  15. #include "ThesaurusDict.h"
  16. #include "UnigramCorpusReader.h"
  17. #include "UnigramDict.h"
  18. #include "csr_utils.h"
  19. namespace mmseg {
  20. class HashLockSupplier {
  21. public:
  22. virtual ~HashLockSupplier() {}
  23. virtual void Lock(uint32_t hash) = 0;
  24. virtual void unlock(uint32_t hash) = 0;
  25. };
  26. class ThreadHashLock : public HashLockSupplier {
  27. private:
  28. std::mutex *locks_;
  29. uint32_t locks_num_;
  30. public:
  31. ThreadHashLock(uint32_t locks_num) {
  32. this->locks_num_ = locks_num;
  33. this->locks_ = new std::mutex[locks_num_];
  34. }
  35. virtual ~ThreadHashLock() {
  36. if (nullptr != locks_) {
  37. delete locks_;
  38. locks_ = nullptr;
  39. }
  40. }
  41. virtual void Lock(uint32_t hash) { locks_[hash % locks_num_].lock(); }
  42. virtual void unlock(uint32_t hash) { locks_[hash % locks_num_].unlock(); }
  43. private:
  44. };
  45. class MMSEGWrapper {
  46. private:
  47. std::vector<css::SegmenterManager *> managers_;
  48. size_t manager_num_;
  49. HashLockSupplier *hash_lock_;
  50. std::atomic<std::uint64_t> seq_;
  51. public:
  52. MMSEGWrapper() : hash_lock_(NULL) {}
  53. virtual ~MMSEGWrapper() {
  54. for (auto seg : managers_) delete seg;
  55. if (hash_lock_) delete hash_lock_;
  56. }
  57. // Note:
  58. // file: dict_path/uni.lib must exist
  59. // file: dict_path/mmseg.ini must exist
  60. void Init(const char *dict_path, size_t segmenter_num = 5000) {
  61. hash_lock_ = new ThreadHashLock(segmenter_num);
  62. manager_num_ = segmenter_num;
  63. assert(manager_num_ > 0);
  64. for (size_t i = 0; i < manager_num_; i++) {
  65. css::SegmenterManager *seg = new css::SegmenterManager();
  66. assert(0 == seg->init(dict_path));
  67. managers_.push_back(seg);
  68. }
  69. }
  70. static MMSEGWrapper *Instance() {
  71. static MMSEGWrapper segment;
  72. return &segment;
  73. }
  74. virtual bool BuildTerms(const char *buffer, size_t buffer_size,
  75. std::set<std::string> &terms);
  76. private:
  77. };
  78. } // namespace mmseg