utf8_suffixbuilder.h 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #pragma once
  19. #include "header.h"
  20. namespace wwsearch {
  21. class UTF8SuffixBuilder {
  22. private:
  23. const char *buffer_;
  24. char *it;
  25. size_t buffer_size_;
  26. size_t pos_;
  27. bool error;
  28. uint32_t min_suffix_len_;
  29. public:
  30. // make sure,buffer is valid utf-8 string
  31. UTF8SuffixBuilder(const char *buffer, size_t buffer_size,
  32. uint32_t min_suffix_len)
  33. : buffer_(buffer),
  34. it((char *)buffer),
  35. buffer_size_(buffer_size),
  36. pos_(0),
  37. error(false),
  38. min_suffix_len_(min_suffix_len) {
  39. // fix min len
  40. if (min_suffix_len > buffer_size_) {
  41. min_suffix_len_ = buffer_size_;
  42. }
  43. }
  44. virtual ~UTF8SuffixBuilder() {}
  45. // return false,if reach end.
  46. bool Next();
  47. const char *Term();
  48. size_t TermSize();
  49. inline void Reset();
  50. private:
  51. };
  52. } // namespace wwsearch