doclist_compression.h 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #pragma once
  19. #include "codec_doclist.h"
  20. #include "coding.h"
  21. #include "document.h"
  22. #include "header.h"
  23. namespace wwsearch {
  24. enum DocListCompressionType {
  25. DocListCompressionFixType = 0,
  26. // Format:
  27. // [header][delete flag block][doc list block]
  28. DocListCompressionVarLenBlockType = 1,
  29. };
  30. struct DocListCompressionVarLenBlockFlag_t {
  31. uint8_t flag_ : 3;
  32. DocListCompressionVarLenBlockFlag_t() : flag_(0) {}
  33. DocListCompressionVarLenBlockFlag_t(uint8_t flag) : flag_(flag) {}
  34. void SetHasDelete() { this->flag_ = 1; }
  35. bool HasDelete() { return flag_; }
  36. uint8_t Value() { return this->flag_; }
  37. } __attribute__((packed));
  38. typedef struct DocListCompressionVarLenBlockFlag_t
  39. DocListCompressionVarLenBlockFlag;
  40. class DocListDelDeltaBuffer {
  41. private:
  42. uint32_t del_num_;
  43. uint32_t last_pos_;
  44. std::vector<uint32_t> del_position_;
  45. public:
  46. DocListDelDeltaBuffer() {
  47. del_num_ = 0;
  48. last_pos_ = 0;
  49. }
  50. virtual ~DocListDelDeltaBuffer() {}
  51. bool AddDeletePos(size_t pos);
  52. virtual bool SerializeToString(std::string &buffer);
  53. private:
  54. };
  55. class DocListDeltaBuffer {
  56. private:
  57. DocumentID last_docid_;
  58. std::vector<DocumentID> doc_list_;
  59. public:
  60. DocListDeltaBuffer() {}
  61. virtual ~DocListDeltaBuffer() {}
  62. bool AddDoc(DocumentID doc_id);
  63. virtual bool SerializeToString(std::string &buffer);
  64. private:
  65. };
  66. class DocListCompressionEncoder {
  67. private:
  68. DocListHeader header;
  69. DocListDelDeltaBuffer del_buffer;
  70. DocListDeltaBuffer delta_buffer;
  71. size_t pos_;
  72. bool has_del_;
  73. public:
  74. DocListCompressionEncoder() {
  75. header.version = DocListCompressionVarLenBlockType;
  76. pos_ = 0;
  77. has_del_ = false;
  78. }
  79. virtual ~DocListCompressionEncoder() {}
  80. // Must keep doc_id in order,otherwise will return false;
  81. virtual bool AddDoc(DocumentID doc_id, bool is_del = false);
  82. virtual bool SerializeToString(std::string &buffer);
  83. private:
  84. };
  85. class DocListCompressionDecoder {
  86. private:
  87. public:
  88. virtual bool Decode(const std::string &buffer, std::string &fix_doclist);
  89. virtual bool Decode(const char *ptr, size_t len, std::string &fix_doclist);
  90. private:
  91. };
  92. } // namespace wwsearch