indexwriter_unit.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #include <gtest/gtest.h>
  19. #include "include/index_wrapper.h"
  20. #include "include/search_util.h"
  21. #include "unittest_util.h"
  22. extern bool g_debug;
  23. extern bool g_use_rocksdb;
  24. extern bool g_use_compression;
  25. namespace wwsearch {
  26. class IndexWriterTest : public ::testing::Test {
  27. public:
  28. static DefaultIndexWrapper *index;
  29. static uint64_t document_id;
  30. static uint64_t numeric_value;
  31. wwsearch::TableID table;
  32. std::vector<DocumentUpdater *> documents;
  33. std::list<DocumentID> match_documentsid;
  34. public:
  35. IndexWriterTest() {
  36. table.business_type = 1;
  37. table.partition_set = 1;
  38. }
  39. static void SetUpTestCase() {
  40. index = new DefaultIndexWrapper();
  41. index->DBParams().path =
  42. std::string("/tmp/unit_") + std::string("indexwriter");
  43. index->Config().SetLogLevel(g_debug ? wwsearch::kSearchLogLevelDebug
  44. : wwsearch::kSearchLogLevelError);
  45. auto status = index->Open(g_use_rocksdb, g_use_compression);
  46. ASSERT_TRUE(status.GetCode() == 0);
  47. }
  48. static void TearDownTestCase() {
  49. if (index != nullptr) {
  50. index->vdb_->DropDB();
  51. delete index;
  52. index = nullptr;
  53. }
  54. }
  55. virtual void SetUp() override {
  56. table.partition_set++;
  57. match_documentsid.clear();
  58. }
  59. virtual void TearDown() override {
  60. for (auto du : documents) {
  61. delete du;
  62. }
  63. documents.clear();
  64. match_documentsid.clear();
  65. }
  66. void Clear() {
  67. for (auto du : documents) {
  68. delete du;
  69. }
  70. documents.clear();
  71. }
  72. uint64_t GetDocumentID() { return document_id++; }
  73. uint64_t GetNumeric(uint64_t alloc_len = 1000) {
  74. auto temp = numeric_value;
  75. numeric_value += alloc_len;
  76. return temp;
  77. }
  78. private:
  79. };
  80. DefaultIndexWrapper *IndexWriterTest::index = nullptr;
  81. DocumentID IndexWriterTest::document_id = 1;
  82. DocumentID IndexWriterTest::numeric_value = 1;
  83. TEST_F(IndexWriterTest, AddDocumentsEmtpy) {
  84. this->table.business_type = 1;
  85. SearchTracer tracer;
  86. // std::string field_str{"hello1,我是中文字符PAXOSSEARCHpaxossearch1929"};
  87. std::string field_str{"hello123456789"};
  88. uint64_t doc_id = GetDocumentID();
  89. this->table.partition_set = 1000;
  90. auto base = GetNumeric(10000);
  91. documents.push_back(
  92. TestUtil::NewDocument(doc_id, field_str, base, base + 100, base + 69));
  93. bool ret =
  94. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  95. EXPECT_TRUE(ret);
  96. { // store field
  97. std::string key;
  98. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  99. SearchLogDebug(
  100. "table.business_type = %d, table.partition_set = %d, doc_id = %d",
  101. this->table.business_type, this->table.partition_set, doc_id);
  102. std::string value;
  103. index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  104. Document document;
  105. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  106. EXPECT_TRUE(document.ID() == doc_id);
  107. for (IndexField *field : document.Fields()) {
  108. if (field->ID() == 1) {
  109. SearchLogDebug("old_field_str[%d, %s] get_field_str[%d, %s]",
  110. field_str.size(), field_str.c_str(),
  111. field->StringValue().size(),
  112. field->StringValue().c_str());
  113. EXPECT_TRUE(field->StringValue().compare(field_str) == 0);
  114. }
  115. }
  116. }
  117. { // docvalue field
  118. std::string key;
  119. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  120. std::string value;
  121. index->vdb_->Get(kDocValueColumn, key, value, nullptr);
  122. Document document;
  123. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  124. EXPECT_TRUE(document.ID() == doc_id);
  125. for (IndexField *field : document.Fields()) {
  126. if (field->ID() == 1) {
  127. SearchLogDebug("old_field_str[%d, %s] get_field_str[%d, %s]",
  128. field_str.size(), field_str.c_str(),
  129. field->StringValue().size(),
  130. field->StringValue().c_str());
  131. EXPECT_TRUE(field->StringValue().compare(field_str) == 0);
  132. }
  133. }
  134. }
  135. }
  136. TEST_F(IndexWriterTest, AddDocumentsDuplicate) {
  137. this->table.business_type = 1;
  138. SearchTracer tracer;
  139. std::string field_str{"hello1,我是中文字符PAXOSSEARCHpaxossearch1929"};
  140. uint64_t doc_id = GetDocumentID();
  141. this->table.partition_set = 1000;
  142. auto base = GetNumeric(10000);
  143. DocumentUpdater *doc_updater =
  144. TestUtil::NewDocument(doc_id, field_str, base, base + 100, base + 69);
  145. documents.push_back(doc_updater);
  146. bool ret =
  147. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  148. EXPECT_TRUE(ret);
  149. ret = index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  150. EXPECT_FALSE(ret);
  151. Clear();
  152. }
  153. TEST_F(IndexWriterTest, UpdateDocumentsAndCheck) {
  154. this->table.business_type = 1;
  155. SearchTracer tracer;
  156. std::string field_str{"hello1,我是中文字符PAXOSSEARCHpaxossearch1929"};
  157. uint64_t doc_id = GetDocumentID();
  158. this->table.partition_set = 1000;
  159. auto base = GetNumeric(10000);
  160. DocumentUpdater *doc_updater =
  161. TestUtil::NewDocument(doc_id, field_str, base, base + 100, base + 69);
  162. documents.push_back(doc_updater);
  163. bool ret =
  164. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  165. EXPECT_TRUE(ret);
  166. std::string field_str2{"world!!!"};
  167. Clear();
  168. this->table.partition_set = 1000;
  169. base = GetNumeric(10000);
  170. doc_updater =
  171. TestUtil::NewDocument(doc_id, field_str2, base, base + 100, base + 69);
  172. documents.push_back(doc_updater);
  173. ret =
  174. index->index_writer_->UpdateDocuments(table, documents, nullptr, &tracer);
  175. EXPECT_TRUE(ret);
  176. { // store field
  177. std::string key;
  178. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  179. std::string value;
  180. index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  181. Document document;
  182. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  183. EXPECT_TRUE(document.ID() == doc_id);
  184. for (IndexField *field : document.Fields()) {
  185. if (field->ID() == 1) {
  186. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  187. }
  188. }
  189. }
  190. { // docvalue field
  191. std::string key;
  192. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  193. std::string value;
  194. index->vdb_->Get(kDocValueColumn, key, value, nullptr);
  195. Document document;
  196. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  197. EXPECT_TRUE(document.ID() == doc_id);
  198. for (IndexField *field : document.Fields()) {
  199. if (field->ID() == 1) {
  200. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  201. }
  202. }
  203. }
  204. Clear();
  205. }
  206. TEST_F(IndexWriterTest, AddOrUpdateDocumentsAndCheck) {
  207. this->table.business_type = 1;
  208. SearchTracer tracer;
  209. std::string field_str{"hello1,我是中文字符PAXOSSEARCHpaxossearch1929"};
  210. uint64_t doc_id = GetDocumentID();
  211. this->table.partition_set = 1000;
  212. auto base = GetNumeric(10000);
  213. DocumentUpdater *doc_updater =
  214. TestUtil::NewDocument(doc_id, field_str, base, base + 100, base + 69);
  215. documents.push_back(doc_updater);
  216. bool ret =
  217. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  218. EXPECT_TRUE(ret);
  219. std::string field_str2{"world!!!"};
  220. Clear();
  221. this->table.partition_set = 1000;
  222. base = GetNumeric(10000);
  223. doc_updater =
  224. TestUtil::NewDocument(doc_id, field_str2, base, base + 100, base + 69);
  225. documents.push_back(doc_updater);
  226. ret = index->index_writer_->AddOrUpdateDocuments(table, documents, nullptr,
  227. &tracer);
  228. EXPECT_TRUE(ret);
  229. { // store field
  230. std::string key;
  231. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  232. std::string value;
  233. index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  234. Document document;
  235. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  236. EXPECT_TRUE(document.ID() == doc_id);
  237. for (IndexField *field : document.Fields()) {
  238. if (field->ID() == 1) {
  239. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  240. }
  241. }
  242. }
  243. { // docvalue field
  244. std::string key;
  245. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  246. std::string value;
  247. index->vdb_->Get(kDocValueColumn, key, value, nullptr);
  248. Document document;
  249. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  250. EXPECT_TRUE(document.ID() == doc_id);
  251. for (IndexField *field : document.Fields()) {
  252. if (field->ID() == 1) {
  253. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  254. }
  255. }
  256. }
  257. Clear();
  258. }
  259. TEST_F(IndexWriterTest, UpdateFieldAndCheck) {
  260. this->table.business_type = 1;
  261. SearchTracer tracer;
  262. std::string field_str{"hello1,我是中文字符PAXOSSEARCHpaxossearch1929"};
  263. uint64_t doc_id = GetDocumentID();
  264. this->table.partition_set = 1000;
  265. auto base = GetNumeric(10000);
  266. DocumentUpdater *doc_updater =
  267. TestUtil::NewDocument(doc_id, field_str, base, base + 100, base + 69);
  268. documents.push_back(doc_updater);
  269. bool ret =
  270. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  271. EXPECT_TRUE(ret);
  272. std::string field_str2{"world!!!"};
  273. Clear();
  274. this->table.partition_set = 1000;
  275. base = GetNumeric(10000);
  276. doc_updater =
  277. TestUtil::NewDocument(doc_id, field_str2, base, base + 100, base + 69, 5);
  278. documents.push_back(doc_updater);
  279. ret = index->index_writer_->AddOrUpdateDocuments(table, documents, nullptr,
  280. &tracer);
  281. EXPECT_TRUE(ret);
  282. { // store field
  283. std::string key;
  284. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  285. std::string value;
  286. index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  287. Document document;
  288. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  289. EXPECT_TRUE(document.ID() == doc_id);
  290. for (IndexField *field : document.Fields()) {
  291. if (field->ID() == 1) {
  292. EXPECT_TRUE(field->StringValue().compare(field_str) == 0);
  293. }
  294. }
  295. }
  296. { // docvalue field
  297. std::string key;
  298. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  299. std::string value;
  300. index->vdb_->Get(kDocValueColumn, key, value, nullptr);
  301. Document document;
  302. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  303. EXPECT_TRUE(document.ID() == doc_id);
  304. for (IndexField *field : document.Fields()) {
  305. if (field->ID() == 1) {
  306. EXPECT_TRUE(field->StringValue().compare(field_str) == 0);
  307. }
  308. }
  309. }
  310. { // store field
  311. std::string key;
  312. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  313. std::string value;
  314. index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  315. Document document;
  316. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  317. EXPECT_TRUE(document.ID() == doc_id);
  318. for (IndexField *field : document.Fields()) {
  319. if (field->ID() == 5) {
  320. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  321. }
  322. }
  323. }
  324. { // docvalue field
  325. std::string key;
  326. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  327. std::string value;
  328. index->vdb_->Get(kDocValueColumn, key, value, nullptr);
  329. Document document;
  330. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  331. EXPECT_TRUE(document.ID() == doc_id);
  332. for (IndexField *field : document.Fields()) {
  333. if (field->ID() == 5) {
  334. EXPECT_TRUE(field->StringValue().compare(field_str2) == 0);
  335. }
  336. }
  337. }
  338. Clear();
  339. }
  340. TEST_F(IndexWriterTest, DeleteTableData) {
  341. // 20 business type * 3 document
  342. this->table.business_type = 1;
  343. SearchTracer tracer;
  344. {
  345. this->table.partition_set = 1000;
  346. auto base = GetNumeric(10000);
  347. documents.push_back(TestUtil::NewDocument(
  348. GetDocumentID(), "hello1,我是中文字符PAXOSSEARCHpaxossearch1929", base,
  349. base + 100, base + 69));
  350. }
  351. bool ret = index->index_writer_->AddOrUpdateDocuments(table, documents,
  352. nullptr, &tracer);
  353. EXPECT_TRUE(ret);
  354. for (size_t column = 0; column < kMaxColumn; column++) {
  355. // delete
  356. std::string start_key;
  357. do {
  358. auto status = index->index_writer_->DeleteTableData(
  359. table, (StorageColumnType)column, start_key, 10);
  360. ASSERT_TRUE(status.OK());
  361. } while (!start_key.empty());
  362. SearchLogDebug("");
  363. // db is empty now.
  364. VirtualDBReadOption options;
  365. auto iterator =
  366. index->vdb_->NewIterator((StorageColumnType)column, &options);
  367. iterator->SeekToFirst();
  368. EXPECT_FALSE(iterator->Valid());
  369. delete iterator;
  370. }
  371. Clear();
  372. }
  373. TEST_F(IndexWriterTest, AcquireSequence) {
  374. // 20 business type * 3 document
  375. this->table.business_type = 1;
  376. SearchTracer tracer;
  377. this->table.partition_set = 1000;
  378. bool ret;
  379. uint64_t current_max;
  380. std::vector<wwsearch::DocumentWriter::AllocSequence> sequence;
  381. // GetCurrent
  382. ASSERT_TRUE(index->index_writer_->AcquireCurrentSequence(table, current_max,
  383. sequence, &tracer));
  384. ASSERT_TRUE(0 == current_max);
  385. wwsearch::DocumentWriter::AllocSequence seq1, seq2;
  386. seq1.user_id.assign("sdfdksfksk");
  387. seq2.user_id.assign("sdfiweiii");
  388. sequence.push_back(seq1);
  389. sequence.push_back(seq2);
  390. // Add Mapping twice
  391. // Return same info
  392. for (int i = 0; i < 2; i++) {
  393. ASSERT_TRUE(index->index_writer_->AcquireNewSequence(table, sequence,
  394. nullptr, &tracer));
  395. for (int j = 0; j < 2; j++) {
  396. ASSERT_TRUE(sequence[j].status.OK());
  397. ASSERT_TRUE(sequence[j].sequence == j + 1);
  398. }
  399. }
  400. // GetCurrent Again
  401. ASSERT_TRUE(index->index_writer_->AcquireCurrentSequence(table, current_max,
  402. sequence, &tracer));
  403. ASSERT_TRUE(2 == current_max);
  404. }
  405. TEST_F(IndexWriterTest, DropTable) {
  406. SearchTracer tracer;
  407. this->table.business_type = 10;
  408. this->table.partition_set = 999;
  409. uint64_t doc_id = GetDocumentID();
  410. for (int i = 0; i < 100; ++i) {
  411. std::string field_str = std::string{"hello123456789"} + std::to_string(i);
  412. auto base = GetNumeric(10000);
  413. documents.clear();
  414. documents.push_back(TestUtil::NewDocument(doc_id + i, field_str, base,
  415. base + 100, base + 69));
  416. bool ret =
  417. index->index_writer_->AddDocuments(table, documents, nullptr, &tracer);
  418. EXPECT_TRUE(ret);
  419. }
  420. {
  421. std::string field_str = std::string{"hello123456789"};
  422. auto base = GetNumeric(10000);
  423. wwsearch::DocumentUpdater *doc_updater = TestUtil::NewDocument(
  424. doc_id + 1000, field_str, base, base + 100, base + 69);
  425. InitStringField(doc_updater->New().AddField(), UINT8_MAX,
  426. std::string{"123"});
  427. std::vector<wwsearch::DocumentUpdater *> doc_updater_list{doc_updater};
  428. bool ret = index->index_writer_->AddDocuments(table, doc_updater_list,
  429. nullptr, &tracer);
  430. EXPECT_TRUE(ret);
  431. }
  432. {
  433. std::string key;
  434. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  435. std::string value;
  436. SearchStatus s = index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  437. EXPECT_TRUE(s.OK());
  438. Document document;
  439. EXPECT_TRUE(document.DeSerializeFromByte(value.c_str(), value.size()));
  440. EXPECT_TRUE(document.ID() == doc_id);
  441. }
  442. {
  443. // read inverted index
  444. std::string key;
  445. index->codec_->EncodeInvertedKey(table, UINT8_MAX, std::string{"123"}, key);
  446. std::string value;
  447. SearchStatus s =
  448. index->vdb_->Get(kInvertedIndexColumn, key, value, nullptr);
  449. EXPECT_TRUE(s.OK());
  450. }
  451. bool ret = index->index_writer_->DropTable(this->table, nullptr, nullptr);
  452. EXPECT_TRUE(ret == true);
  453. {
  454. std::string key;
  455. index->codec_->EncodeStoredFieldKey(table, doc_id, key);
  456. std::string value;
  457. SearchStatus s = index->vdb_->Get(kStoredFieldColumn, key, value, nullptr);
  458. EXPECT_TRUE(s.DocumentNotExist());
  459. }
  460. {
  461. // read inverted index
  462. std::string key;
  463. index->codec_->EncodeInvertedKey(table, UINT8_MAX, std::string{"123"}, key);
  464. std::string value;
  465. SearchStatus s =
  466. index->vdb_->Get(kInvertedIndexColumn, key, value, nullptr);
  467. EXPECT_TRUE(s.DocumentNotExist());
  468. }
  469. }
  470. } // namespace wwsearch