searcher_unit.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. * Tencent is pleased to support the open source community by making wwsearch
  3. * available.
  4. *
  5. * Copyright (C) 2018-present Tencent. All Rights Reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  8. * use this file except in compliance with the License. You may obtain a copy of
  9. * the License at
  10. *
  11. * https://opensource.org/licenses/Apache-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. * WARRANTIES OF ANY KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations under the License.
  17. */
  18. #include <gtest/gtest.h>
  19. #include "include/codec_doclist.h"
  20. #include "include/codec_doclist_impl.h"
  21. #include "include/codec_impl.h"
  22. #include "include/index_wrapper.h"
  23. #include "include/search_util.h"
  24. #include "unittest_util.h"
  25. extern bool g_debug;
  26. extern bool g_use_rocksdb;
  27. extern bool g_use_compression;
  28. namespace wwsearch {
  29. class SearcherTest : public ::testing::Test {
  30. public:
  31. static DefaultIndexWrapper *index;
  32. static uint64_t document_id;
  33. static uint64_t numeric_value;
  34. wwsearch::TableID table;
  35. std::vector<DocumentUpdater *> documents;
  36. std::list<DocumentID> match_documentsid;
  37. public:
  38. SearcherTest() {
  39. table.business_type = 1;
  40. table.partition_set = 1;
  41. }
  42. static void SetUpTestCase() {
  43. index = new DefaultIndexWrapper();
  44. index->DBParams().path =
  45. std::string("/tmp/unit_") + std::string("searcher");
  46. index->Config().SetLogLevel(g_debug ? wwsearch::kSearchLogLevelDebug
  47. : wwsearch::kSearchLogLevelError);
  48. auto status = index->Open(g_use_rocksdb, g_use_compression);
  49. ASSERT_TRUE(status.GetCode() == 0);
  50. }
  51. static void TearDownTestCase() {
  52. if (index != nullptr) {
  53. index->vdb_->DropDB();
  54. delete index;
  55. index = nullptr;
  56. }
  57. }
  58. virtual void SetUp() override {
  59. table.partition_set++;
  60. match_documentsid.clear();
  61. }
  62. virtual void TearDown() override {
  63. for (auto du : documents) {
  64. delete du;
  65. }
  66. documents.clear();
  67. match_documentsid.clear();
  68. }
  69. uint64_t GetDocumentID() { return document_id++; }
  70. uint64_t GetNumeric(uint64_t alloc_len = 1000) {
  71. auto temp = numeric_value;
  72. numeric_value += alloc_len;
  73. return temp;
  74. }
  75. private:
  76. };
  77. DefaultIndexWrapper *SearcherTest::index = nullptr;
  78. DocumentID SearcherTest::document_id = 1;
  79. DocumentID SearcherTest::numeric_value = 1;
  80. /*
  81. TEST_F(SearcherTest, ScanBusinessType) {
  82. // 30 business type * 3 document
  83. VirtualDBSnapshot *snapshots[1];
  84. snapshots[0] = nullptr;
  85. this->table.business_type = 1;
  86. for (size_t base_set = 0; base_set < 30; base_set++) {
  87. {
  88. this->table.partition_set = base_set;
  89. auto base = GetNumeric(10000);
  90. documents.push_back(TestUtil::NewDocument(GetDocumentID(), "hello1", base,
  91. base + 100, base + 69));
  92. documents.push_back(TestUtil::NewDocument(
  93. GetDocumentID(), "girl1", base + 1, base + 101, base + 69));
  94. documents.push_back(TestUtil::NewDocument(
  95. GetDocumentID(), "hello1", base + 2, base + 102, base + 69));
  96. }
  97. bool ret = index->index_writer_->AddOrUpdateDocuments(table, documents,
  98. nullptr, nullptr);
  99. EXPECT_TRUE(ret);
  100. if (g_debug) {
  101. for (const auto &du : documents) {
  102. EXPECT_EQ(0, du->Status().GetCode());
  103. wwsearch::Document &document = du->New();
  104. std::string debug_str;
  105. document.PrintToReadStr(debug_str);
  106. SearchLogDebug("%s\n", debug_str.c_str());
  107. }
  108. }
  109. for (auto du : documents) {
  110. delete du;
  111. }
  112. documents.clear();
  113. if (base_set == 19) {
  114. snapshots[0] = index->vdb_->NewSnapshot();
  115. }
  116. }
  117. // we have 20 business type now
  118. for (auto item : snapshots) {
  119. ASSERT_TRUE(item != nullptr);
  120. wwsearch::Searcher searcher(&index->Config());
  121. uint8_t business_type = table.business_type;
  122. uint64_t start_partition_set = 0;
  123. std::vector<uint64_t> sets;
  124. VirtualDBSnapshot *snapshot = item;
  125. uint64_t base = 0;
  126. {
  127. sets.clear();
  128. start_partition_set = 0;
  129. do {
  130. auto status = searcher.ScanBusinessType(
  131. business_type, start_partition_set, 3, sets, snapshot);
  132. ASSERT_TRUE(status.OK());
  133. } while (start_partition_set != 0);
  134. }
  135. {
  136. sets.clear();
  137. start_partition_set = 0;
  138. auto status = searcher.ScanBusinessType(
  139. business_type, start_partition_set, 100, sets, snapshot);
  140. ASSERT_TRUE(status.OK());
  141. ASSERT_EQ(sets.size(), 20);
  142. }
  143. {
  144. sets.clear();
  145. start_partition_set = 0;
  146. auto status = searcher.ScanBusinessType(
  147. business_type, start_partition_set, 10, sets, snapshot);
  148. ASSERT_TRUE(status.OK());
  149. ASSERT_EQ(sets.size(), 10);
  150. for (auto set : sets) {
  151. EXPECT_TRUE(set >= 0 && set < 10);
  152. }
  153. EXPECT_TRUE(start_partition_set == 9);
  154. sets.clear();
  155. start_partition_set = 10;
  156. status = searcher.ScanBusinessType(business_type, start_partition_set, 10,
  157. sets, snapshot);
  158. ASSERT_TRUE(status.OK());
  159. ASSERT_EQ(sets.size(), 10);
  160. for (auto set : sets) {
  161. EXPECT_TRUE(set >= 10 && set < 20);
  162. }
  163. }
  164. if (snapshot != nullptr) {
  165. index->vdb_->ReleaseSnapshot(snapshot);
  166. }
  167. }
  168. }
  169. */
  170. TEST_F(SearcherTest, ScanTableData) {
  171. // 20 business type * 3 document
  172. this->table.business_type = 1;
  173. SearchTracer tracer;
  174. auto base = GetNumeric(10000);
  175. const uint32_t doc_num = 2019;
  176. for (int i = 0; i < doc_num; i++) {
  177. this->table.partition_set = 1000;
  178. documents.push_back(TestUtil::NewDocument(GetDocumentID(), "hello", base++,
  179. base++, base++));
  180. }
  181. bool ret = index->index_writer_->AddOrUpdateDocuments(table, documents,
  182. nullptr, &tracer);
  183. EXPECT_TRUE(ret);
  184. /*
  185. kStoredFieldColumn = 0, // store document
  186. kInvertedIndexColumn = 1, // store invert doc list of match term
  187. kDocValueColumn = 2, // store table doc value of every document
  188. kMetaColumn = 3, // store user'id mapping currently
  189. kDictionaryColumn = 4, // store nothing
  190. */
  191. {
  192. wwsearch::Searcher searcher(&index->Config());
  193. uint8_t business_type = table.business_type;
  194. uint64_t start_partition_set = 0;
  195. std::vector<uint64_t> sets;
  196. VirtualDBSnapshot *snapshot = index->vdb_->NewSnapshot();
  197. uint64_t base = 0;
  198. start_partition_set = 0;
  199. wwsearch::StorageColumnType columns[] = {
  200. kStoredFieldColumn,
  201. kInvertedIndexColumn, // store invert doc list of match term
  202. kDocValueColumn, // store table doc value of every document
  203. kMetaColumn, // store user'id mapping currently
  204. kDictionaryColumn // store nothing
  205. };
  206. int columns_expect_keys_delta[] = {1, 3, 1, 0, 0};
  207. int columns_expect_keys_constant[] = {0, 1, 0, 0, 0};
  208. for (size_t i = 0; i < sizeof(columns) / sizeof(StorageColumnType); i++) {
  209. std::string write_batch;
  210. std::string start_key;
  211. uint64_t count = 0;
  212. int total_key_count = 0;
  213. do {
  214. wwsearch::SearchStatus status;
  215. status = searcher.ScanTableData(table, columns[i], start_key, 10,
  216. write_batch, snapshot);
  217. ASSERT_TRUE(status.OK());
  218. if (!start_key.empty()) {
  219. count++;
  220. EXPECT_TRUE(write_batch.size() != 0);
  221. }
  222. WriteBuffer *write_buffer = index->vdb_->NewWriteBuffer(&write_batch);
  223. total_key_count += write_buffer->KvCount();
  224. index->vdb_->ReleaseWriteBuffer(write_buffer);
  225. // rocksdb::WriteBatch batch(write_batch);
  226. // total_key_count += batch.Count();
  227. write_batch.clear();
  228. } while (!start_key.empty());
  229. int expected_count = doc_num * columns_expect_keys_delta[i] +
  230. columns_expect_keys_constant[i];
  231. SearchLogDebug("expect:%d,real:%d,count:%d\n", expected_count,
  232. total_key_count, count);
  233. EXPECT_EQ(total_key_count, expected_count);
  234. if (i < 3) {
  235. EXPECT_TRUE(count > 0);
  236. }
  237. }
  238. index->vdb_->ReleaseSnapshot(snapshot);
  239. }
  240. }
  241. TEST_F(SearcherTest, DocListOrderWriterCodecImplDebug) {
  242. std::unique_ptr<wwsearch::Codec> codec(new wwsearch::CodecImpl);
  243. std::string data;
  244. wwsearch::DocListWriterCodec *doc_list_order_writer_codec =
  245. codec->NewOrderDocListWriterCodec();
  246. // must keep decrease order
  247. doc_list_order_writer_codec->AddDocID(456, 2);
  248. doc_list_order_writer_codec->AddDocID(123, 1);
  249. // doc_list_order_writer_codec->AddDocID(456, 2);
  250. SearchLogDebug("DocListWriterCodec debug : %s\n",
  251. doc_list_order_writer_codec->DebugString().c_str());
  252. codec->ReleaseOrderDocListWriterCodec(doc_list_order_writer_codec);
  253. }
  254. TEST_F(SearcherTest, DocListReaderCodecImplTest1) {
  255. std::unique_ptr<wwsearch::Codec> codec(new wwsearch::CodecImpl);
  256. std::string data;
  257. {
  258. wwsearch::DocListWriterCodec *doc_list_order_writer_codec =
  259. codec->NewOrderDocListWriterCodec();
  260. doc_list_order_writer_codec->AddDocID(10, 1);
  261. doc_list_order_writer_codec->AddDocID(8, 0);
  262. doc_list_order_writer_codec->AddDocID(5, 0);
  263. doc_list_order_writer_codec->AddDocID(3, 0);
  264. SearchLogDebug("DocListWriterCodec debug : %s\n",
  265. doc_list_order_writer_codec->DebugString().c_str());
  266. doc_list_order_writer_codec->SerializeToBytes(data, 0);
  267. codec->ReleaseOrderDocListWriterCodec(doc_list_order_writer_codec);
  268. }
  269. {
  270. // small all
  271. DocumentID target = 2;
  272. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  273. EXPECT_EQ(wwsearch::DocIdSetIterator::NO_MORE_DOCS, reader.Advance(target));
  274. }
  275. {
  276. // bigger all
  277. DocumentID target = 13;
  278. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  279. EXPECT_EQ(10, reader.Advance(target));
  280. }
  281. {
  282. // not include search
  283. DocumentID target = 6;
  284. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  285. EXPECT_EQ(5, reader.Advance(target));
  286. }
  287. {
  288. // not include search
  289. DocumentID target = 4;
  290. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  291. EXPECT_EQ(3, reader.Advance(target));
  292. }
  293. {
  294. // include search
  295. DocumentID target = 5;
  296. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  297. EXPECT_EQ(5, reader.Advance(target));
  298. }
  299. {
  300. // include search
  301. DocumentID target = 8;
  302. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  303. EXPECT_EQ(8, reader.Advance(target));
  304. }
  305. {
  306. DocumentID target = 3;
  307. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  308. EXPECT_EQ(3, reader.Advance(target));
  309. }
  310. {
  311. DocumentID target = 10;
  312. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  313. EXPECT_EQ(10, reader.Advance(target));
  314. }
  315. }
  316. TEST_F(SearcherTest, DocListReaderCodecImplTest2) {
  317. std::unique_ptr<wwsearch::Codec> codec(new wwsearch::CodecImpl);
  318. std::string data;
  319. {
  320. wwsearch::DocListWriterCodec *doc_list_order_writer_codec =
  321. codec->NewOrderDocListWriterCodec();
  322. doc_list_order_writer_codec->AddDocID(18, 1);
  323. doc_list_order_writer_codec->AddDocID(13, 0);
  324. doc_list_order_writer_codec->AddDocID(8, 0);
  325. doc_list_order_writer_codec->AddDocID(6, 0);
  326. doc_list_order_writer_codec->AddDocID(3, 0);
  327. SearchLogDebug("DocListWriterCodec debug : %s\n",
  328. doc_list_order_writer_codec->DebugString().c_str());
  329. doc_list_order_writer_codec->SerializeToBytes(data, 0);
  330. codec->ReleaseOrderDocListWriterCodec(doc_list_order_writer_codec);
  331. }
  332. {
  333. // small all
  334. DocumentID target = 2;
  335. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  336. EXPECT_EQ(wwsearch::DocIdSetIterator::NO_MORE_DOCS, reader.Advance(target));
  337. }
  338. {
  339. // bigger all
  340. DocumentID target = 23;
  341. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  342. EXPECT_EQ(18, reader.Advance(target));
  343. }
  344. {
  345. // not include search
  346. DocumentID target = 7;
  347. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  348. EXPECT_EQ(6, reader.Advance(target));
  349. }
  350. {
  351. // not include search
  352. DocumentID target = 9;
  353. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  354. EXPECT_EQ(8, reader.Advance(target));
  355. }
  356. {
  357. // include search
  358. DocumentID target = 6;
  359. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  360. EXPECT_EQ(6, reader.Advance(target));
  361. }
  362. {
  363. // include search
  364. DocumentID target = 8;
  365. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  366. EXPECT_EQ(8, reader.Advance(target));
  367. }
  368. {
  369. DocumentID target = 18;
  370. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  371. EXPECT_EQ(18, reader.Advance(target));
  372. }
  373. {
  374. DocumentID target = 3;
  375. wwsearch::DocListReaderCodecImpl reader(data.c_str(), data.size());
  376. EXPECT_EQ(3, reader.Advance(target));
  377. }
  378. }
  379. } // namespace wwsearch