document.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #pragma once
  19. #include "index_field.h"
  20. #include "search_status.h"
  21. #include "serialize.h"
  22. #include "storage_type.h"
  23. namespace wwsearch {
  24. struct DocumentMeta {
  25. uint64_t total_documents;
  26. uint64_t delete_documents;
  27. uint64_t increase_seq;
  28. uint64_t terms_count;
  29. void Clear() {
  30. total_documents = delete_documents = increase_seq = terms_count = 0;
  31. }
  32. } __attribute__((__packed__));
  33. typedef struct DocumentMeta DocumentMeta;
  34. class Document /*: public SerializeAble*/ {
  35. private:
  36. std::vector<IndexField *> fields_;
  37. DocumentID document_id_;
  38. // Only use for post_score in collector_top.cpp
  39. int match_field_id_;
  40. DocumentScore document_score_;
  41. public:
  42. Document();
  43. ~Document();
  44. Document(const Document &) = delete;
  45. Document &operator=(const Document &) = delete;
  46. // Do not add same field with same field id.
  47. IndexField *AddField();
  48. void ClearField();
  49. inline IndexField *FindField(FieldID field_id) {
  50. for (size_t i = 0; i < fields_.size(); i++) {
  51. if (field_id == fields_[i]->ID()) {
  52. return fields_[i];
  53. }
  54. }
  55. return nullptr;
  56. }
  57. std::vector<IndexField *> &Fields();
  58. void SetID(DocumentID document_id);
  59. const DocumentID &ID() const;
  60. void SetMatchFieldID(int match_field_id) { match_field_id_ = match_field_id; }
  61. int MatchFieldId() { return match_field_id_; }
  62. void SetScore(DocumentScore document_score);
  63. void AddScore(DocumentScore document_score);
  64. const DocumentScore &Score() const;
  65. // if flag = 0,all will be srialized.
  66. // if flag = 1,only doc value field will be serialized.
  67. bool SerializeToBytes(std::string &buffer, int flag, bool &have_field);
  68. bool SerializeToBytes(std::string &buffer, int flag) {
  69. bool have_field = false;
  70. return SerializeToBytes(buffer, flag, have_field);
  71. }
  72. bool DeSerializeFromByte(const char *buffer, uint32_t buffer_len);
  73. void BuildDocValue(Document &document);
  74. void PrintToReadStr(std::string &str) {
  75. char buffer[20];
  76. snprintf(buffer, sizeof(buffer), "Document[%llu]\n", document_id_);
  77. str.append(buffer);
  78. for (auto field : fields_) {
  79. field->PrintToString(str);
  80. }
  81. }
  82. bool EncodeToPBDocument(lsmsearch::StoreDocument *document) {
  83. document->set_document_id(this->document_id_);
  84. for (auto field : this->fields_) {
  85. if (!field->EncodeToStoreField(document->add_fields())) return false;
  86. }
  87. return true;
  88. }
  89. private:
  90. };
  91. class IndexWriter;
  92. enum kDocumentUpdaterType {
  93. kDocumentAddType = 1,
  94. kDocumentUpdateType = 2,
  95. kDocumentAddOrUpdateType = 3,
  96. kDocumentDeleteType = 4,
  97. kDocumentReplaceType = 5,
  98. kDocumentAddWithoutReadType = 6 // Just add into index but not read
  99. };
  100. class DocumentUpdater {
  101. friend class IndexWriter;
  102. private:
  103. SearchStatus status_;
  104. Document old_document_;
  105. Document new_document_;
  106. Document old_docvalue_document_;
  107. Document new_docvalue_document_;
  108. kDocumentUpdaterType update_type_; // inner use
  109. public:
  110. DocumentUpdater();
  111. ~DocumentUpdater();
  112. DocumentUpdater(const DocumentUpdater &) = delete;
  113. DocumentUpdater &operator=(const DocumentUpdater &) = delete;
  114. Document &Old();
  115. Document &New();
  116. Document &OldDocValue();
  117. Document &NewDocValue();
  118. SearchStatus &Status();
  119. bool Delete() { return update_type_ == kDocumentDeleteType; }
  120. bool NeedMergeDocValue() {
  121. return (kDocumentUpdateType == update_type_) ||
  122. (kDocumentAddOrUpdateType == update_type_);
  123. }
  124. kDocumentUpdaterType UpdateType() { return this->update_type_; }
  125. private:
  126. void SetUpdateType(kDocumentUpdaterType type) { this->update_type_ = type; }
  127. };
  128. class InvertIndexItem {
  129. private:
  130. FieldID field_id_;
  131. std::string term_;
  132. std::set<DocumentID, std::greater<DocumentID>>
  133. doc_list_; // in decrease order.
  134. public:
  135. InvertIndexItem() {}
  136. virtual ~InvertIndexItem() {}
  137. FieldID GetFieldID() const { return this->field_id_; }
  138. const std::string &GetTerm() const { return this->term_; }
  139. void Set(FieldID field_id, const std::string &term);
  140. void ClearDocList();
  141. void AddDocID(DocumentID doc_id);
  142. inline const std::set<DocumentID, std::greater<DocumentID>> &DocList() const {
  143. return this->doc_list_;
  144. }
  145. private:
  146. };
  147. class InvertIndexItemList {
  148. private:
  149. std::list<InvertIndexItem *> items_;
  150. public:
  151. InvertIndexItemList() {}
  152. virtual ~InvertIndexItemList();
  153. InvertIndexItem *AddItem();
  154. std::list<InvertIndexItem *> List() { return this->items_; }
  155. private:
  156. };
  157. } // namespace wwsearch