index_writer.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #pragma once
  19. #include "document.h"
  20. #include "document_writer.h"
  21. #include "index_config.h"
  22. #include "index_writer.h"
  23. #include "storage_type.h"
  24. #include "tracer.h"
  25. namespace wwsearch {
  26. /* Notice : Index document user interface.
  27. * Input : std::vector<DocumentUpdater *>
  28. * (DocumentUpdater includes New document which user wants to put.)
  29. * Output : success or not
  30. * 1. Check status of DocumentUpdater new document 's id from db.
  31. * 2. Merge with old document if need and exist.
  32. * 3. Build forward table kv.
  33. * 4. Build inverted table kv, attention this procedure need word segmentation.
  34. * 5. Build docvalue table kv.
  35. * 6. Flush to db.
  36. * 3-6 kv format refers to `codec_impl.h`.
  37. */
  38. class IndexWriter {
  39. private:
  40. IndexConfig *config_;
  41. DocumentWriter document_writer_;
  42. public:
  43. IndexWriter();
  44. ~IndexWriter();
  45. bool Open(IndexConfig *config);
  46. // Return config instance. Need set config before IndexWriter is useable.
  47. const IndexConfig &Config();
  48. // Add document while the old document is not exist in database.
  49. // DO NOT have same DocumentID in documents.Each DocumentUpdater have its own
  50. // status code return. NOTE,if store_buffer not null,record will flush to this
  51. // buffer but not flush to db.
  52. bool AddDocuments(const TableID &table,
  53. std::vector<DocumentUpdater *> &documents,
  54. std::string *store_buffer = nullptr,
  55. SearchTracer *tracer = nullptr);
  56. // Update document while the old document is exist in database.
  57. bool UpdateDocuments(const TableID &table,
  58. std::vector<DocumentUpdater *> &documents,
  59. std::string *store_buffer = nullptr,
  60. SearchTracer *tracer = nullptr);
  61. // Update document no matter whether the old document exist in database or
  62. // not.
  63. bool AddOrUpdateDocuments(const TableID &table,
  64. std::vector<DocumentUpdater *> &documents,
  65. std::string *store_buffer = nullptr,
  66. SearchTracer *tracer = nullptr);
  67. // replace with new documents nomatter whether the old document exist or not.
  68. bool ReplaceDocuments(const TableID &table,
  69. std::vector<DocumentUpdater *> &documents,
  70. std::string *store_buffer = nullptr,
  71. SearchTracer *tracer = nullptr);
  72. // Just delete all match document in database.
  73. bool DeleteDocuments(const TableID &table,
  74. std::vector<DocumentUpdater *> &documents,
  75. std::string *store_buffer = nullptr,
  76. SearchTracer *tracer = nullptr);
  77. // WARNING: Document will be insert into index without read from disk.Add
  78. // twice will make DocumentID duplicate in doc list.After merge,Only single
  79. // same DocumentID will be left.
  80. bool AddDocumentsWithoutRead(const TableID &table,
  81. std::vector<DocumentUpdater *> &documents,
  82. std::string *store_buffer = nullptr,
  83. SearchTracer *tracer = nullptr);
  84. // Ingest InvertIndex only write inverted index but not write other data.
  85. bool IngestInvertIndex(const TableID &table, InvertIndexItemList &indices,
  86. std::string *store_buffer,
  87. SearchTracer *tracer = nullptr);
  88. // Read current max sequence from db.
  89. bool AcquireCurrentSequence(
  90. const TableID &table, uint64_t &current_max,
  91. std::vector<DocumentWriter::AllocSequence> &sequence_list,
  92. SearchTracer *tracer = nullptr);
  93. // Read current max sequence,If some not found,then alloc empty id to sequence
  94. // that start from current max.
  95. bool AcquireNewSequence(
  96. const TableID &table,
  97. std::vector<DocumentWriter::AllocSequence> &sequence_list,
  98. std::string *store_buffer, SearchTracer *tracer = nullptr);
  99. bool DropTable(const TableID &table, std::string *store_buffer,
  100. SearchTracer *tracer = nullptr);
  101. // For certain
  102. // delete data
  103. SearchStatus DeleteTableData(TableID &table,
  104. wwsearch::StorageColumnType column,
  105. std::string &start_key, size_t max_len);
  106. private:
  107. // mode:
  108. // * 1->add
  109. // * 2->update
  110. // * 3->add or update
  111. bool InnerWriteDocuments(const TableID &table,
  112. std::vector<DocumentUpdater *> &documents,
  113. std::string *store_buffer, kDocumentUpdaterType mode,
  114. SearchTracer *tracer = nullptr);
  115. };
  116. } // namespace wwsearch