UnigramDict.h 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* ***** BEGIN LICENSE BLOCK *****
  3. * Version: GPL 2.0
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License. You should have
  7. * received a copy of the GPL license along with this program; if you
  8. * did not, you can find it at http://www.gnu.org/
  9. *
  10. * Software distributed under the License is distributed on an "AS IS" basis,
  11. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12. * for the specific language governing rights and limitations under the
  13. * License.
  14. *
  15. * The Original Code is Coreseek.com code.
  16. *
  17. * Copyright (C) 2007-2008. All Rights Reserved.
  18. *
  19. * Author:
  20. * Li monan <li.monan@gmail.com>
  21. *
  22. * ***** END LICENSE BLOCK ***** */
  23. #ifndef css_UnigramDict_h
  24. #define css_UnigramDict_h
  25. #include <string>
  26. #include "darts.h"
  27. namespace css {
  28. class UnigramCorpusReader;
  29. } /* End of namespace css */
  30. namespace css {
  31. /**
  32. * How to find item fast is a real problem here.
  33. * @return the string(utf-8,encoded) of the id.
  34. */
  35. class UnigramDict {
  36. public:
  37. typedef Darts::DoubleArray::result_pair_type result_pair_type;
  38. UnigramDict(){};
  39. virtual ~UnigramDict(){};
  40. public:
  41. virtual int load(const char* filename);
  42. virtual int isLoad();
  43. /**
  44. * This function should be used only, in Debug mode.
  45. */
  46. virtual std::string getString(int id);
  47. /**
  48. * Find all word item in UnigramDict, which buf as a prefix
  49. * @return total items found
  50. */
  51. virtual int findHits(const char* buf, result_pair_type* result = NULL,
  52. size_t result_len = 0, int keylen = 0);
  53. virtual int import(UnigramCorpusReader& ur);
  54. virtual int save(const char* filename);
  55. virtual int exactMatch(const char* key, int* id = NULL);
  56. protected:
  57. Darts::DoubleArray m_da;
  58. };
  59. } /* End of namespace css */
  60. #endif