string_split_unittest.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "butil/strings/string_split.h"
  5. #include "butil/strings/utf_string_conversions.h"
  6. #include <gtest/gtest.h>
  7. namespace butil {
  8. namespace {
  9. void AssertElements(std::vector<std::string>& result,
  10. const char* const expected_data[],
  11. size_t data_size) {
  12. ASSERT_EQ(data_size, result.size());
  13. for (size_t i = 0; i < data_size; ++i) {
  14. ASSERT_STREQ(expected_data[i], result[i].c_str());
  15. }
  16. }
  17. #if !defined(WCHAR_T_IS_UTF16)
  18. // Overload SplitString with a wide-char version to make it easier to
  19. // test the string16 version with wide character literals.
  20. void SplitString(const std::wstring& str,
  21. wchar_t c,
  22. std::vector<std::wstring>* result) {
  23. std::vector<string16> result16;
  24. SplitString(WideToUTF16(str), c, &result16);
  25. for (size_t i = 0; i < result16.size(); ++i)
  26. result->push_back(UTF16ToWide(result16[i]));
  27. }
  28. #endif
  29. } // anonymous namespace
  30. class SplitStringIntoKeyValuePairsTest : public testing::Test {
  31. protected:
  32. std::vector<std::pair<std::string, std::string> > kv_pairs;
  33. };
  34. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyString) {
  35. EXPECT_TRUE(SplitStringIntoKeyValuePairs(std::string(),
  36. ':', // Key-value delimiter
  37. ',', // Key-value pair delimiter
  38. &kv_pairs));
  39. EXPECT_TRUE(kv_pairs.empty());
  40. }
  41. TEST_F(SplitStringIntoKeyValuePairsTest, MissingKeyValueDelimiter) {
  42. EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1,key2:value2",
  43. ':', // Key-value delimiter
  44. ',', // Key-value pair delimiter
  45. &kv_pairs));
  46. ASSERT_EQ(2U, kv_pairs.size());
  47. EXPECT_TRUE(kv_pairs[0].first.empty());
  48. EXPECT_TRUE(kv_pairs[0].second.empty());
  49. EXPECT_EQ("key2", kv_pairs[1].first);
  50. EXPECT_EQ("value2", kv_pairs[1].second);
  51. }
  52. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyKeyWithKeyValueDelimiter) {
  53. EXPECT_TRUE(SplitStringIntoKeyValuePairs(":value1,key2:value2",
  54. ':', // Key-value delimiter
  55. ',', // Key-value pair delimiter
  56. &kv_pairs));
  57. ASSERT_EQ(2U, kv_pairs.size());
  58. EXPECT_TRUE(kv_pairs[0].first.empty());
  59. EXPECT_EQ("value1", kv_pairs[0].second);
  60. EXPECT_EQ("key2", kv_pairs[1].first);
  61. EXPECT_EQ("value2", kv_pairs[1].second);
  62. }
  63. TEST_F(SplitStringIntoKeyValuePairsTest, TrailingAndLeadingPairDelimiter) {
  64. EXPECT_TRUE(SplitStringIntoKeyValuePairs(",key1:value1,key2:value2,",
  65. ':', // Key-value delimiter
  66. ',', // Key-value pair delimiter
  67. &kv_pairs));
  68. ASSERT_EQ(2U, kv_pairs.size());
  69. EXPECT_EQ("key1", kv_pairs[0].first);
  70. EXPECT_EQ("value1", kv_pairs[0].second);
  71. EXPECT_EQ("key2", kv_pairs[1].first);
  72. EXPECT_EQ("value2", kv_pairs[1].second);
  73. }
  74. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyPair) {
  75. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1,,key3:value3",
  76. ':', // Key-value delimiter
  77. ',', // Key-value pair delimiter
  78. &kv_pairs));
  79. ASSERT_EQ(2U, kv_pairs.size());
  80. EXPECT_EQ("key1", kv_pairs[0].first);
  81. EXPECT_EQ("value1", kv_pairs[0].second);
  82. EXPECT_EQ("key3", kv_pairs[1].first);
  83. EXPECT_EQ("value3", kv_pairs[1].second);
  84. }
  85. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyValue) {
  86. EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1:,key2:value2",
  87. ':', // Key-value delimiter
  88. ',', // Key-value pair delimiter
  89. &kv_pairs));
  90. ASSERT_EQ(2U, kv_pairs.size());
  91. EXPECT_EQ("key1", kv_pairs[0].first);
  92. EXPECT_EQ("", kv_pairs[0].second);
  93. EXPECT_EQ("key2", kv_pairs[1].first);
  94. EXPECT_EQ("value2", kv_pairs[1].second);
  95. }
  96. TEST_F(SplitStringIntoKeyValuePairsTest, UntrimmedWhitespace) {
  97. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1 : value1",
  98. ':', // Key-value delimiter
  99. ',', // Key-value pair delimiter
  100. &kv_pairs));
  101. ASSERT_EQ(1U, kv_pairs.size());
  102. EXPECT_EQ("key1 ", kv_pairs[0].first);
  103. EXPECT_EQ(" value1", kv_pairs[0].second);
  104. }
  105. TEST_F(SplitStringIntoKeyValuePairsTest, TrimmedWhitespace) {
  106. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1 , key2:value2",
  107. ':', // Key-value delimiter
  108. ',', // Key-value pair delimiter
  109. &kv_pairs));
  110. ASSERT_EQ(2U, kv_pairs.size());
  111. EXPECT_EQ("key1", kv_pairs[0].first);
  112. EXPECT_EQ("value1", kv_pairs[0].second);
  113. EXPECT_EQ("key2", kv_pairs[1].first);
  114. EXPECT_EQ("value2", kv_pairs[1].second);
  115. }
  116. TEST_F(SplitStringIntoKeyValuePairsTest, MultipleKeyValueDelimiters) {
  117. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:::value1,key2:value2",
  118. ':', // Key-value delimiter
  119. ',', // Key-value pair delimiter
  120. &kv_pairs));
  121. ASSERT_EQ(2U, kv_pairs.size());
  122. EXPECT_EQ("key1", kv_pairs[0].first);
  123. EXPECT_EQ("value1", kv_pairs[0].second);
  124. EXPECT_EQ("key2", kv_pairs[1].first);
  125. EXPECT_EQ("value2", kv_pairs[1].second);
  126. }
  127. TEST_F(SplitStringIntoKeyValuePairsTest, OnlySplitAtGivenSeparator) {
  128. std::string a("a ?!@#$%^&*()_+:/{}\\\t\nb");
  129. EXPECT_TRUE(SplitStringIntoKeyValuePairs(a + "X" + a + "Y" + a + "X" + a,
  130. 'X', // Key-value delimiter
  131. 'Y', // Key-value pair delimiter
  132. &kv_pairs));
  133. ASSERT_EQ(2U, kv_pairs.size());
  134. EXPECT_EQ(a, kv_pairs[0].first);
  135. EXPECT_EQ(a, kv_pairs[0].second);
  136. EXPECT_EQ(a, kv_pairs[1].first);
  137. EXPECT_EQ(a, kv_pairs[1].second);
  138. }
  139. TEST_F(SplitStringIntoKeyValuePairsTest, DelimiterInValue) {
  140. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:va:ue1,key2:value2",
  141. ':', // Key-value delimiter
  142. ',', // Key-value pair delimiter
  143. &kv_pairs));
  144. ASSERT_EQ(2U, kv_pairs.size());
  145. EXPECT_EQ("key1", kv_pairs[0].first);
  146. EXPECT_EQ("va:ue1", kv_pairs[0].second);
  147. EXPECT_EQ("key2", kv_pairs[1].first);
  148. EXPECT_EQ("value2", kv_pairs[1].second);
  149. }
  150. TEST(SplitStringUsingSubstrTest, EmptyString) {
  151. std::vector<std::string> results;
  152. SplitStringUsingSubstr(std::string(), "DELIMITER", &results);
  153. const char* const expected[] = { "" };
  154. AssertElements(results, expected, arraysize(expected));
  155. }
  156. TEST(StringUtilTest, SplitString) {
  157. std::vector<std::wstring> r;
  158. SplitString(std::wstring(), L',', &r);
  159. EXPECT_EQ(0U, r.size());
  160. r.clear();
  161. SplitString(L"a,b,c", L',', &r);
  162. ASSERT_EQ(3U, r.size());
  163. EXPECT_EQ(r[0], L"a");
  164. EXPECT_EQ(r[1], L"b");
  165. EXPECT_EQ(r[2], L"c");
  166. r.clear();
  167. SplitString(L"a, b, c", L',', &r);
  168. ASSERT_EQ(3U, r.size());
  169. EXPECT_EQ(r[0], L"a");
  170. EXPECT_EQ(r[1], L"b");
  171. EXPECT_EQ(r[2], L"c");
  172. r.clear();
  173. SplitString(L"a,,c", L',', &r);
  174. ASSERT_EQ(3U, r.size());
  175. EXPECT_EQ(r[0], L"a");
  176. EXPECT_EQ(r[1], L"");
  177. EXPECT_EQ(r[2], L"c");
  178. r.clear();
  179. SplitString(L" ", L'*', &r);
  180. EXPECT_EQ(0U, r.size());
  181. r.clear();
  182. SplitString(L"foo", L'*', &r);
  183. ASSERT_EQ(1U, r.size());
  184. EXPECT_EQ(r[0], L"foo");
  185. r.clear();
  186. SplitString(L"foo ,", L',', &r);
  187. ASSERT_EQ(2U, r.size());
  188. EXPECT_EQ(r[0], L"foo");
  189. EXPECT_EQ(r[1], L"");
  190. r.clear();
  191. SplitString(L",", L',', &r);
  192. ASSERT_EQ(2U, r.size());
  193. EXPECT_EQ(r[0], L"");
  194. EXPECT_EQ(r[1], L"");
  195. r.clear();
  196. SplitString(L"\t\ta\t", L'\t', &r);
  197. ASSERT_EQ(4U, r.size());
  198. EXPECT_EQ(r[0], L"");
  199. EXPECT_EQ(r[1], L"");
  200. EXPECT_EQ(r[2], L"a");
  201. EXPECT_EQ(r[3], L"");
  202. r.clear();
  203. SplitString(L"\ta\t\nb\tcc", L'\n', &r);
  204. ASSERT_EQ(2U, r.size());
  205. EXPECT_EQ(r[0], L"a");
  206. EXPECT_EQ(r[1], L"b\tcc");
  207. r.clear();
  208. }
  209. TEST(StringUtilTest, SplitStringStringPiece) {
  210. std::vector<butil::StringPiece> r;
  211. SplitString(butil::StringPiece(), ',', &r);
  212. EXPECT_EQ(0U, r.size());
  213. r.clear();
  214. SplitString(butil::StringPiece("a,b,c"), ',', &r);
  215. ASSERT_EQ(3U, r.size());
  216. EXPECT_EQ(r[0], "a");
  217. EXPECT_EQ(r[1], "b");
  218. EXPECT_EQ(r[2], "c");
  219. r.clear();
  220. SplitString(butil::StringPiece("a, b, c"), ',', &r);
  221. ASSERT_EQ(3U, r.size());
  222. EXPECT_EQ(r[0], "a");
  223. EXPECT_EQ(r[1], "b");
  224. EXPECT_EQ(r[2], "c");
  225. r.clear();
  226. SplitString(butil::StringPiece("a,,c"), ',', &r);
  227. ASSERT_EQ(3U, r.size());
  228. EXPECT_EQ(r[0], "a");
  229. EXPECT_EQ(r[1], "");
  230. EXPECT_EQ(r[2], "c");
  231. r.clear();
  232. SplitString(butil::StringPiece(" "), '*', &r);
  233. EXPECT_EQ(0U, r.size());
  234. r.clear();
  235. SplitString(butil::StringPiece("foo"), '*', &r);
  236. ASSERT_EQ(1U, r.size());
  237. EXPECT_EQ(r[0], "foo");
  238. r.clear();
  239. SplitString(butil::StringPiece("foo ,"), ',', &r);
  240. ASSERT_EQ(2U, r.size());
  241. EXPECT_EQ(r[0], "foo");
  242. EXPECT_EQ(r[1], "");
  243. r.clear();
  244. SplitString(butil::StringPiece(","), ',', &r);
  245. ASSERT_EQ(2U, r.size());
  246. EXPECT_EQ(r[0], "");
  247. EXPECT_EQ(r[1], "");
  248. r.clear();
  249. SplitString(butil::StringPiece("\t\ta\t"), '\t', &r);
  250. ASSERT_EQ(4U, r.size());
  251. EXPECT_EQ(r[0], "");
  252. EXPECT_EQ(r[1], "");
  253. EXPECT_EQ(r[2], "a");
  254. EXPECT_EQ(r[3], "");
  255. r.clear();
  256. SplitString(butil::StringPiece("\ta\t\nb\tcc"), '\n', &r);
  257. ASSERT_EQ(2U, r.size());
  258. EXPECT_EQ(r[0], "a");
  259. EXPECT_EQ(r[1], "b\tcc");
  260. r.clear();
  261. }
  262. TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
  263. std::vector<std::string> results;
  264. SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
  265. const char* const expected[] = { "alongwordwithnodelimiter" };
  266. AssertElements(results, expected, arraysize(expected));
  267. }
  268. TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
  269. std::vector<std::string> results;
  270. SplitStringUsingSubstr(
  271. "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
  272. "DELIMITER",
  273. &results);
  274. const char* const expected[] = { "", "", "", "one", "two", "three" };
  275. AssertElements(results, expected, arraysize(expected));
  276. }
  277. TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
  278. std::vector<std::string> results;
  279. SplitStringUsingSubstr(
  280. "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
  281. "DELIMITER",
  282. &results);
  283. const char* const expected[] = { "uno", "", "", "dos", "tres", "", "cuatro" };
  284. AssertElements(results, expected, arraysize(expected));
  285. }
  286. TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
  287. std::vector<std::string> results;
  288. SplitStringUsingSubstr(
  289. "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
  290. "DELIMITER",
  291. &results);
  292. const char* const expected[] = { "un", "deux", "trois", "quatre", "", "", "" };
  293. AssertElements(results, expected, arraysize(expected));
  294. }
  295. TEST(StringSplitTest, StringSplitDontTrim) {
  296. std::vector<std::string> r;
  297. SplitStringDontTrim(" ", '*', &r);
  298. ASSERT_EQ(1U, r.size());
  299. EXPECT_EQ(r[0], " ");
  300. SplitStringDontTrim("\t \ta\t ", '\t', &r);
  301. ASSERT_EQ(4U, r.size());
  302. EXPECT_EQ(r[0], "");
  303. EXPECT_EQ(r[1], " ");
  304. EXPECT_EQ(r[2], "a");
  305. EXPECT_EQ(r[3], " ");
  306. SplitStringDontTrim("\ta\t\nb\tcc", '\n', &r);
  307. ASSERT_EQ(2U, r.size());
  308. EXPECT_EQ(r[0], "\ta\t");
  309. EXPECT_EQ(r[1], "b\tcc");
  310. }
  311. TEST(StringSplitTest, SplitStringAlongWhitespace) {
  312. struct TestData {
  313. const char* input;
  314. const size_t expected_result_count;
  315. const char* output1;
  316. const char* output2;
  317. } data[] = {
  318. { "a", 1, "a", "" },
  319. { " ", 0, "", "" },
  320. { " a", 1, "a", "" },
  321. { " ab ", 1, "ab", "" },
  322. { " ab c", 2, "ab", "c" },
  323. { " ab c ", 2, "ab", "c" },
  324. { " ab cd", 2, "ab", "cd" },
  325. { " ab cd ", 2, "ab", "cd" },
  326. { " \ta\t", 1, "a", "" },
  327. { " b\ta\t", 2, "b", "a" },
  328. { " b\tat", 2, "b", "at" },
  329. { "b\tat", 2, "b", "at" },
  330. { "b\t at", 2, "b", "at" },
  331. };
  332. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
  333. std::vector<std::string> results;
  334. SplitStringAlongWhitespace(data[i].input, &results);
  335. ASSERT_EQ(data[i].expected_result_count, results.size());
  336. if (data[i].expected_result_count > 0) {
  337. ASSERT_EQ(data[i].output1, results[0]);
  338. }
  339. if (data[i].expected_result_count > 1) {
  340. ASSERT_EQ(data[i].output2, results[1]);
  341. }
  342. }
  343. }
  344. } // namespace butil