string_split_unittest.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "butil/strings/string_split.h"
  5. #include "butil/strings/utf_string_conversions.h"
  6. #include <gtest/gtest.h>
  7. namespace butil {
  8. namespace {
  9. void AssertElements(std::vector<std::string>& result,
  10. const char* const expected_data[],
  11. size_t data_size) {
  12. ASSERT_EQ(data_size, result.size());
  13. for (size_t i = 0; i < data_size; ++i) {
  14. ASSERT_STREQ(expected_data[i], result[i].c_str());
  15. }
  16. }
  17. #if !defined(WCHAR_T_IS_UTF16)
  18. // Overload SplitString with a wide-char version to make it easier to
  19. // test the string16 version with wide character literals.
  20. void SplitString(const std::wstring& str,
  21. wchar_t c,
  22. std::vector<std::wstring>* result) {
  23. std::vector<string16> result16;
  24. SplitString(WideToUTF16(str), c, &result16);
  25. for (size_t i = 0; i < result16.size(); ++i)
  26. result->push_back(UTF16ToWide(result16[i]));
  27. }
  28. #endif
  29. } // anonymous namespace
  30. class SplitStringIntoKeyValuePairsTest : public testing::Test {
  31. protected:
  32. std::vector<std::pair<std::string, std::string> > kv_pairs;
  33. };
  34. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyString) {
  35. EXPECT_TRUE(SplitStringIntoKeyValuePairs(std::string(),
  36. ':', // Key-value delimiter
  37. ',', // Key-value pair delimiter
  38. &kv_pairs));
  39. EXPECT_TRUE(kv_pairs.empty());
  40. }
  41. TEST_F(SplitStringIntoKeyValuePairsTest, MissingKeyValueDelimiter) {
  42. EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1,key2:value2",
  43. ':', // Key-value delimiter
  44. ',', // Key-value pair delimiter
  45. &kv_pairs));
  46. ASSERT_EQ(2U, kv_pairs.size());
  47. EXPECT_TRUE(kv_pairs[0].first.empty());
  48. EXPECT_TRUE(kv_pairs[0].second.empty());
  49. EXPECT_EQ("key2", kv_pairs[1].first);
  50. EXPECT_EQ("value2", kv_pairs[1].second);
  51. }
  52. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyKeyWithKeyValueDelimiter) {
  53. EXPECT_TRUE(SplitStringIntoKeyValuePairs(":value1,key2:value2",
  54. ':', // Key-value delimiter
  55. ',', // Key-value pair delimiter
  56. &kv_pairs));
  57. ASSERT_EQ(2U, kv_pairs.size());
  58. EXPECT_TRUE(kv_pairs[0].first.empty());
  59. EXPECT_EQ("value1", kv_pairs[0].second);
  60. EXPECT_EQ("key2", kv_pairs[1].first);
  61. EXPECT_EQ("value2", kv_pairs[1].second);
  62. }
  63. TEST_F(SplitStringIntoKeyValuePairsTest, TrailingAndLeadingPairDelimiter) {
  64. EXPECT_TRUE(SplitStringIntoKeyValuePairs(",key1:value1,key2:value2,",
  65. ':', // Key-value delimiter
  66. ',', // Key-value pair delimiter
  67. &kv_pairs));
  68. ASSERT_EQ(2U, kv_pairs.size());
  69. EXPECT_EQ("key1", kv_pairs[0].first);
  70. EXPECT_EQ("value1", kv_pairs[0].second);
  71. EXPECT_EQ("key2", kv_pairs[1].first);
  72. EXPECT_EQ("value2", kv_pairs[1].second);
  73. }
  74. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyPair) {
  75. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1,,key3:value3",
  76. ':', // Key-value delimiter
  77. ',', // Key-value pair delimiter
  78. &kv_pairs));
  79. ASSERT_EQ(2U, kv_pairs.size());
  80. EXPECT_EQ("key1", kv_pairs[0].first);
  81. EXPECT_EQ("value1", kv_pairs[0].second);
  82. EXPECT_EQ("key3", kv_pairs[1].first);
  83. EXPECT_EQ("value3", kv_pairs[1].second);
  84. }
  85. TEST_F(SplitStringIntoKeyValuePairsTest, EmptyValue) {
  86. EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1:,key2:value2",
  87. ':', // Key-value delimiter
  88. ',', // Key-value pair delimiter
  89. &kv_pairs));
  90. ASSERT_EQ(2U, kv_pairs.size());
  91. EXPECT_EQ("key1", kv_pairs[0].first);
  92. EXPECT_EQ("", kv_pairs[0].second);
  93. EXPECT_EQ("key2", kv_pairs[1].first);
  94. EXPECT_EQ("value2", kv_pairs[1].second);
  95. }
  96. TEST_F(SplitStringIntoKeyValuePairsTest, UntrimmedWhitespace) {
  97. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1 : value1",
  98. ':', // Key-value delimiter
  99. ',', // Key-value pair delimiter
  100. &kv_pairs));
  101. ASSERT_EQ(1U, kv_pairs.size());
  102. EXPECT_EQ("key1 ", kv_pairs[0].first);
  103. EXPECT_EQ(" value1", kv_pairs[0].second);
  104. }
  105. TEST_F(SplitStringIntoKeyValuePairsTest, TrimmedWhitespace) {
  106. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1 , key2:value2",
  107. ':', // Key-value delimiter
  108. ',', // Key-value pair delimiter
  109. &kv_pairs));
  110. ASSERT_EQ(2U, kv_pairs.size());
  111. EXPECT_EQ("key1", kv_pairs[0].first);
  112. EXPECT_EQ("value1", kv_pairs[0].second);
  113. EXPECT_EQ("key2", kv_pairs[1].first);
  114. EXPECT_EQ("value2", kv_pairs[1].second);
  115. }
  116. TEST_F(SplitStringIntoKeyValuePairsTest, MultipleKeyValueDelimiters) {
  117. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:::value1,key2:value2",
  118. ':', // Key-value delimiter
  119. ',', // Key-value pair delimiter
  120. &kv_pairs));
  121. ASSERT_EQ(2U, kv_pairs.size());
  122. EXPECT_EQ("key1", kv_pairs[0].first);
  123. EXPECT_EQ("value1", kv_pairs[0].second);
  124. EXPECT_EQ("key2", kv_pairs[1].first);
  125. EXPECT_EQ("value2", kv_pairs[1].second);
  126. }
  127. TEST_F(SplitStringIntoKeyValuePairsTest, OnlySplitAtGivenSeparator) {
  128. std::string a("a ?!@#$%^&*()_+:/{}\\\t\nb");
  129. EXPECT_TRUE(SplitStringIntoKeyValuePairs(a + "X" + a + "Y" + a + "X" + a,
  130. 'X', // Key-value delimiter
  131. 'Y', // Key-value pair delimiter
  132. &kv_pairs));
  133. ASSERT_EQ(2U, kv_pairs.size());
  134. EXPECT_EQ(a, kv_pairs[0].first);
  135. EXPECT_EQ(a, kv_pairs[0].second);
  136. EXPECT_EQ(a, kv_pairs[1].first);
  137. EXPECT_EQ(a, kv_pairs[1].second);
  138. }
  139. TEST_F(SplitStringIntoKeyValuePairsTest, DelimiterInValue) {
  140. EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:va:ue1,key2:value2",
  141. ':', // Key-value delimiter
  142. ',', // Key-value pair delimiter
  143. &kv_pairs));
  144. ASSERT_EQ(2U, kv_pairs.size());
  145. EXPECT_EQ("key1", kv_pairs[0].first);
  146. EXPECT_EQ("va:ue1", kv_pairs[0].second);
  147. EXPECT_EQ("key2", kv_pairs[1].first);
  148. EXPECT_EQ("value2", kv_pairs[1].second);
  149. }
  150. TEST(SplitStringUsingSubstrTest, EmptyString) {
  151. std::vector<std::string> results;
  152. SplitStringUsingSubstr(std::string(), "DELIMITER", &results);
  153. const char* const expected[] = { "" };
  154. AssertElements(results, expected, arraysize(expected));
  155. }
  156. TEST(StringUtilTest, SplitString) {
  157. std::vector<std::wstring> r;
  158. SplitString(std::wstring(), L',', &r);
  159. EXPECT_EQ(0U, r.size());
  160. r.clear();
  161. SplitString(L"a,b,c", L',', &r);
  162. ASSERT_EQ(3U, r.size());
  163. EXPECT_EQ(r[0], L"a");
  164. EXPECT_EQ(r[1], L"b");
  165. EXPECT_EQ(r[2], L"c");
  166. r.clear();
  167. SplitString(L"a, b, c", L',', &r);
  168. ASSERT_EQ(3U, r.size());
  169. EXPECT_EQ(r[0], L"a");
  170. EXPECT_EQ(r[1], L"b");
  171. EXPECT_EQ(r[2], L"c");
  172. r.clear();
  173. SplitString(L"a,,c", L',', &r);
  174. ASSERT_EQ(3U, r.size());
  175. EXPECT_EQ(r[0], L"a");
  176. EXPECT_EQ(r[1], L"");
  177. EXPECT_EQ(r[2], L"c");
  178. r.clear();
  179. SplitString(L" ", L'*', &r);
  180. EXPECT_EQ(0U, r.size());
  181. r.clear();
  182. SplitString(L"foo", L'*', &r);
  183. ASSERT_EQ(1U, r.size());
  184. EXPECT_EQ(r[0], L"foo");
  185. r.clear();
  186. SplitString(L"foo ,", L',', &r);
  187. ASSERT_EQ(2U, r.size());
  188. EXPECT_EQ(r[0], L"foo");
  189. EXPECT_EQ(r[1], L"");
  190. r.clear();
  191. SplitString(L",", L',', &r);
  192. ASSERT_EQ(2U, r.size());
  193. EXPECT_EQ(r[0], L"");
  194. EXPECT_EQ(r[1], L"");
  195. r.clear();
  196. SplitString(L"\t\ta\t", L'\t', &r);
  197. ASSERT_EQ(4U, r.size());
  198. EXPECT_EQ(r[0], L"");
  199. EXPECT_EQ(r[1], L"");
  200. EXPECT_EQ(r[2], L"a");
  201. EXPECT_EQ(r[3], L"");
  202. r.clear();
  203. SplitString(L"\ta\t\nb\tcc", L'\n', &r);
  204. ASSERT_EQ(2U, r.size());
  205. EXPECT_EQ(r[0], L"a");
  206. EXPECT_EQ(r[1], L"b\tcc");
  207. r.clear();
  208. }
  209. TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
  210. std::vector<std::string> results;
  211. SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
  212. const char* const expected[] = { "alongwordwithnodelimiter" };
  213. AssertElements(results, expected, arraysize(expected));
  214. }
  215. TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
  216. std::vector<std::string> results;
  217. SplitStringUsingSubstr(
  218. "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
  219. "DELIMITER",
  220. &results);
  221. const char* const expected[] = { "", "", "", "one", "two", "three" };
  222. AssertElements(results, expected, arraysize(expected));
  223. }
  224. TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
  225. std::vector<std::string> results;
  226. SplitStringUsingSubstr(
  227. "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
  228. "DELIMITER",
  229. &results);
  230. const char* const expected[] = { "uno", "", "", "dos", "tres", "", "cuatro" };
  231. AssertElements(results, expected, arraysize(expected));
  232. }
  233. TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
  234. std::vector<std::string> results;
  235. SplitStringUsingSubstr(
  236. "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
  237. "DELIMITER",
  238. &results);
  239. const char* const expected[] = { "un", "deux", "trois", "quatre", "", "", "" };
  240. AssertElements(results, expected, arraysize(expected));
  241. }
  242. TEST(StringSplitTest, StringSplitDontTrim) {
  243. std::vector<std::string> r;
  244. SplitStringDontTrim(" ", '*', &r);
  245. ASSERT_EQ(1U, r.size());
  246. EXPECT_EQ(r[0], " ");
  247. SplitStringDontTrim("\t \ta\t ", '\t', &r);
  248. ASSERT_EQ(4U, r.size());
  249. EXPECT_EQ(r[0], "");
  250. EXPECT_EQ(r[1], " ");
  251. EXPECT_EQ(r[2], "a");
  252. EXPECT_EQ(r[3], " ");
  253. SplitStringDontTrim("\ta\t\nb\tcc", '\n', &r);
  254. ASSERT_EQ(2U, r.size());
  255. EXPECT_EQ(r[0], "\ta\t");
  256. EXPECT_EQ(r[1], "b\tcc");
  257. }
  258. TEST(StringSplitTest, SplitStringAlongWhitespace) {
  259. struct TestData {
  260. const char* input;
  261. const size_t expected_result_count;
  262. const char* output1;
  263. const char* output2;
  264. } data[] = {
  265. { "a", 1, "a", "" },
  266. { " ", 0, "", "" },
  267. { " a", 1, "a", "" },
  268. { " ab ", 1, "ab", "" },
  269. { " ab c", 2, "ab", "c" },
  270. { " ab c ", 2, "ab", "c" },
  271. { " ab cd", 2, "ab", "cd" },
  272. { " ab cd ", 2, "ab", "cd" },
  273. { " \ta\t", 1, "a", "" },
  274. { " b\ta\t", 2, "b", "a" },
  275. { " b\tat", 2, "b", "at" },
  276. { "b\tat", 2, "b", "at" },
  277. { "b\t at", 2, "b", "at" },
  278. };
  279. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
  280. std::vector<std::string> results;
  281. SplitStringAlongWhitespace(data[i].input, &results);
  282. ASSERT_EQ(data[i].expected_result_count, results.size());
  283. if (data[i].expected_result_count > 0)
  284. ASSERT_EQ(data[i].output1, results[0]);
  285. if (data[i].expected_result_count > 1)
  286. ASSERT_EQ(data[i].output2, results[1]);
  287. }
  288. }
  289. } // namespace butil