string_splitter_unittest.cpp 10 KB


  1. // Licensed to the Apache Software Foundation (ASF) under one
  2. // or more contributor license agreements. See the NOTICE file
  3. // distributed with this work for additional information
  4. // regarding copyright ownership. The ASF licenses this file
  5. // to you under the Apache License, Version 2.0 (the
  6. // "License"); you may not use this file except in compliance
  7. // with the License. You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing,
  12. // software distributed under the License is distributed on an
  13. // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. // KIND, either express or implied. See the License for the
  15. // specific language governing permissions and limitations
  16. // under the License.
  17. #include <gtest/gtest.h>
  18. #include "butil/string_splitter.h"
  19. #include <stdlib.h>
  20. namespace {
  21. class StringSplitterTest : public ::testing::Test{
  22. protected:
  23. StringSplitterTest(){};
  24. virtual ~StringSplitterTest(){};
  25. virtual void SetUp() {
  26. srand (time(0));
  27. };
  28. virtual void TearDown() {
  29. };
  30. };
  31. TEST_F(StringSplitterTest, sanity) {
  32. const char* str = "hello there! man ";
  33. butil::StringSplitter ss(str, ' ');
  34. // "hello"
  35. ASSERT_TRUE(ss != NULL);
  36. ASSERT_EQ(5ul, ss.length());
  37. ASSERT_EQ(ss.field(), str);
  38. // "there!"
  39. ++ss;
  40. ASSERT_NE(ss, (void*)NULL);
  41. ASSERT_EQ(6ul, ss.length());
  42. ASSERT_EQ(ss.field(), str+6);
  43. // "man"
  44. ++ss;
  45. ASSERT_TRUE(ss);
  46. ASSERT_EQ(3ul, ss.length());
  47. ASSERT_EQ(ss.field(), str+15);
  48. ++ss;
  49. ASSERT_FALSE(ss);
  50. ASSERT_EQ(0ul, ss.length());
  51. ASSERT_EQ(ss.field(), str + strlen(str));
  52. // consecutive separators are treated as zero-length field inside
  53. butil::StringSplitter ss2(str, ' ', butil::ALLOW_EMPTY_FIELD);
  54. // "hello"
  55. ASSERT_TRUE(ss2);
  56. ASSERT_EQ(5ul, ss2.length());
  57. ASSERT_FALSE(strncmp(ss2.field(), "hello", ss2.length()));
  58. // "there!"
  59. ++ss2;
  60. ASSERT_TRUE(ss2);
  61. ASSERT_EQ(6ul, ss2.length());
  62. ASSERT_FALSE(strncmp(ss2.field(), "there!", ss2.length()));
  63. // ""
  64. ++ss2;
  65. ASSERT_TRUE(ss2);
  66. ASSERT_EQ(0ul, ss2.length());
  67. ASSERT_EQ(ss2.field(), str+13);
  68. // ""
  69. ++ss2;
  70. ASSERT_TRUE(ss2);
  71. ASSERT_EQ(0ul, ss2.length());
  72. ASSERT_EQ(ss2.field(), str+14);
  73. // "man"
  74. ++ss2;
  75. ASSERT_TRUE(ss2);
  76. ASSERT_EQ(3ul, ss2.length());
  77. ASSERT_EQ(ss2.field(), str+15);
  78. ++ss2;
  79. ASSERT_FALSE(ss2);
  80. ASSERT_EQ(0ul, ss2.length());
  81. ASSERT_EQ(ss2.field(), str+19);
  82. }
  83. TEST_F(StringSplitterTest, single_word)
  84. {
  85. const char* str = "apple";
  86. butil::StringSplitter ss(str, ' ');
  87. // "apple"
  88. ASSERT_TRUE(ss);
  89. ASSERT_EQ(5ul, ss.length());
  90. ASSERT_EQ(ss.field(), str);
  91. ++ss;
  92. ASSERT_FALSE(ss);
  93. ASSERT_EQ(0ul, ss.length());
  94. ASSERT_EQ(ss.field(), str+5);
  95. }
  96. TEST_F(StringSplitterTest, starting_with_separator) {
  97. const char* str = " apple";
  98. butil::StringSplitter ss(str, ' ');
  99. // "apple"
  100. ASSERT_TRUE(ss);
  101. ASSERT_EQ(5ul, ss.length());
  102. ASSERT_FALSE(strncmp(ss.field(), "apple", ss.length()));
  103. ++ss;
  104. ASSERT_FALSE(ss);
  105. ASSERT_EQ(0ul, ss.length());
  106. ASSERT_EQ(ss.field(), str + strlen(str));
  107. butil::StringSplitter ss2(str, ' ', butil::ALLOW_EMPTY_FIELD);
  108. // ""
  109. ASSERT_TRUE(ss2);
  110. ASSERT_EQ(0ul, ss2.length());
  111. ASSERT_EQ(ss2.field(), str);
  112. // ""
  113. ++ss2;
  114. ASSERT_TRUE(ss2);
  115. ASSERT_EQ(0ul, ss2.length());
  116. ASSERT_EQ(ss2.field(), str+1);
  117. // "apple"
  118. ++ss2;
  119. ASSERT_TRUE(ss2);
  120. ASSERT_EQ(5ul, ss2.length());
  121. ASSERT_FALSE(strncmp(ss2.field(), "apple", ss2.length()));
  122. ++ss2;
  123. ASSERT_FALSE(ss2);
  124. ASSERT_EQ(0ul, ss2.length());
  125. ASSERT_EQ(ss2.field(), str + strlen(str));
  126. }
  127. TEST_F(StringSplitterTest, site_id_as_example) {
  128. const char* str = "|123|12||1|21|4321";
  129. butil::StringSplitter ss(str, '|');
  130. ASSERT_TRUE(ss);
  131. ASSERT_EQ(3ul, ss.length());
  132. ASSERT_FALSE(strncmp(ss.field(), "123", ss.length()));
  133. ss++;
  134. ASSERT_TRUE(ss);
  135. ASSERT_EQ(2ul, ss.length());
  136. ASSERT_FALSE(strncmp(ss.field(), "12", ss.length()));
  137. ss++;
  138. ASSERT_TRUE(ss);
  139. ASSERT_EQ(1ul, ss.length());
  140. ASSERT_FALSE(strncmp(ss.field(), "1", ss.length()));
  141. ss++;
  142. ASSERT_TRUE(ss);
  143. ASSERT_EQ(2ul, ss.length());
  144. ASSERT_FALSE(strncmp(ss.field(), "21", ss.length()));
  145. ss++;
  146. ASSERT_TRUE(ss);
  147. ASSERT_EQ(4ul, ss.length());
  148. ASSERT_FALSE(strncmp(ss.field(), "4321", ss.length()));
  149. ++ss;
  150. ASSERT_FALSE(ss);
  151. ASSERT_EQ(0ul, ss.length());
  152. ASSERT_EQ(ss.field(), str + strlen(str));
  153. }
  154. TEST_F(StringSplitterTest, number_list) {
  155. const char* str = " 123,,12,1, 21 4321\00056";
  156. butil::StringMultiSplitter ss(str, ", ");
  157. ASSERT_TRUE(ss);
  158. ASSERT_EQ(3ul, ss.length());
  159. ASSERT_FALSE(strncmp(ss.field(), "123", ss.length()));
  160. ss++;
  161. ASSERT_TRUE(ss);
  162. ASSERT_EQ(2ul, ss.length());
  163. ASSERT_FALSE(strncmp(ss.field(), "12", ss.length()));
  164. ss++;
  165. ASSERT_TRUE(ss);
  166. ASSERT_EQ(1ul, ss.length());
  167. ASSERT_FALSE(strncmp(ss.field(), "1", ss.length()));
  168. ss++;
  169. ASSERT_TRUE(ss);
  170. ASSERT_EQ(2ul, ss.length());
  171. ASSERT_FALSE(strncmp(ss.field(), "21", ss.length()));
  172. ss++;
  173. ASSERT_TRUE(ss);
  174. ASSERT_EQ(4ul, ss.length());
  175. ASSERT_FALSE(strncmp(ss.field(), "4321", ss.length()));
  176. ++ss;
  177. ASSERT_FALSE(ss);
  178. ASSERT_EQ(0ul, ss.length());
  179. ASSERT_EQ(ss.field(), str + strlen(str));
  180. // contains embedded '\0'
  181. const size_t str_len = 23;
  182. butil::StringMultiSplitter ss2(str, str + str_len, ", ");
  183. ASSERT_TRUE(ss2);
  184. ASSERT_EQ(3ul, ss2.length());
  185. ASSERT_FALSE(strncmp(ss2.field(), "123", ss2.length()));
  186. ss2++;
  187. ASSERT_TRUE(ss2);
  188. ASSERT_EQ(2ul, ss2.length());
  189. ASSERT_FALSE(strncmp(ss2.field(), "12", ss2.length()));
  190. ss2++;
  191. ASSERT_TRUE(ss2);
  192. ASSERT_EQ(1ul, ss2.length());
  193. ASSERT_FALSE(strncmp(ss2.field(), "1", ss2.length()));
  194. ss2++;
  195. ASSERT_TRUE(ss2);
  196. ASSERT_EQ(2ul, ss2.length());
  197. ASSERT_FALSE(strncmp(ss2.field(), "21", ss2.length()));
  198. ss2++;
  199. ASSERT_TRUE(ss2);
  200. ASSERT_EQ(7ul, ss2.length());
  201. ASSERT_FALSE(strncmp(ss2.field(), "4321\00056", ss2.length()));
  202. ++ss2;
  203. ASSERT_FALSE(ss2);
  204. ASSERT_EQ(0ul, ss2.length());
  205. ASSERT_EQ(ss2.field(), str + str_len);
  206. }
  207. TEST_F(StringSplitterTest, cast_type) {
  208. const char* str = "-1\t123\t111\t1\t10\t11\t1.3\t3.1415926\t127\t128\t256";
  209. int i = 0;
  210. unsigned int u = 0;
  211. long l = 0;
  212. unsigned long ul = 0;
  213. long long ll = 0;
  214. unsigned long long ull = 0;
  215. float f = 0.0;
  216. double d = 0.0;
  217. butil::StringSplitter ss(str, '\t');
  218. ASSERT_TRUE(ss);
  219. ASSERT_EQ(0, ss.to_int(&i));
  220. ASSERT_EQ(-1, i);
  221. ASSERT_TRUE(++ss);
  222. ASSERT_EQ(0, ss.to_uint(&u));
  223. ASSERT_EQ(123u, u);
  224. ASSERT_TRUE(++ss);
  225. ASSERT_EQ(0, ss.to_long(&l));
  226. ASSERT_EQ(111, l);
  227. ASSERT_TRUE(++ss);
  228. ASSERT_EQ(0, ss.to_ulong(&ul));
  229. ASSERT_EQ(1ul, ul);
  230. ASSERT_TRUE(++ss);
  231. ASSERT_EQ(0, ss.to_longlong(&ll));
  232. ASSERT_EQ(10, ll);
  233. ASSERT_TRUE(++ss);
  234. ASSERT_EQ(0, ss.to_ulonglong(&ull));
  235. ASSERT_EQ(11ull, ull);
  236. ASSERT_TRUE(++ss);
  237. ASSERT_EQ(0, ss.to_float(&f));
  238. ASSERT_FLOAT_EQ(1.3, f);
  239. ASSERT_TRUE(++ss);
  240. ASSERT_EQ(0, ss.to_double(&d));
  241. ASSERT_DOUBLE_EQ(3.1415926, d);
  242. ASSERT_TRUE(++ss);
  243. int8_t c = 0;
  244. ASSERT_EQ(0, ss.to_int8(&c));
  245. ASSERT_EQ(127, c);
  246. ASSERT_TRUE(++ss);
  247. uint8_t uc = 0;
  248. ASSERT_EQ(0, ss.to_uint8(&uc));
  249. ASSERT_EQ(128U, uc);
  250. ASSERT_TRUE(++ss);
  251. ASSERT_EQ(-1, ss.to_uint8(&uc));
  252. }
  253. TEST_F(StringSplitterTest, split_limit_len) {
  254. const char* str = "1\t1\0003\t111\t1\t10\t11\t1.3\t3.1415926";
  255. butil::StringSplitter ss(str, str + 5, '\t');
  256. ASSERT_TRUE(ss);
  257. ASSERT_EQ(1ul, ss.length());
  258. ASSERT_FALSE(strncmp(ss.field(), "1", ss.length()));
  259. ++ss;
  260. ASSERT_TRUE(ss);
  261. ASSERT_EQ(3ul, ss.length());
  262. ASSERT_FALSE(strncmp(ss.field(), "1\0003", ss.length()));
  263. ++ss;
  264. ASSERT_FALSE(ss);
  265. // Allows using '\0' as separator
  266. butil::StringSplitter ss2(str, str + 5, '\0');
  267. ASSERT_TRUE(ss2);
  268. ASSERT_EQ(3ul, ss2.length());
  269. ASSERT_FALSE(strncmp(ss2.field(), "1\t1", ss2.length()));
  270. ++ss2;
  271. ASSERT_TRUE(ss2);
  272. ASSERT_EQ(1ul, ss2.length());
  273. ASSERT_FALSE(strncmp(ss2.field(), "3", ss2.length()));
  274. ++ss2;
  275. ASSERT_FALSE(ss2);
  276. butil::StringPiece sp(str, 5);
  277. // Allows using '\0' as separator
  278. butil::StringSplitter ss3(sp, '\0');
  279. ASSERT_TRUE(ss3);
  280. ASSERT_EQ(3ul, ss3.length());
  281. ASSERT_FALSE(strncmp(ss3.field(), "1\t1", ss3.length()));
  282. ++ss3;
  283. ASSERT_TRUE(ss3);
  284. ASSERT_EQ(1ul, ss3.length());
  285. ASSERT_FALSE(strncmp(ss3.field(), "3", ss3.length()));
  286. ++ss3;
  287. ASSERT_FALSE(ss3);
  288. }
  289. TEST_F(StringSplitterTest, key_value_pairs_splitter_sanity) {
  290. std::string kvstr = "key1=value1&&&key2=value2&key3=value3&===&key4=&=&=value5";
  291. for (int i = 0 ; i < 3; ++i) {
  292. // Test three constructors
  293. butil::KeyValuePairsSplitter* psplitter = NULL;
  294. if (i == 0) {
  295. psplitter = new butil::KeyValuePairsSplitter(kvstr, '&', '=');
  296. } else if (i == 1) {
  297. psplitter = new butil::KeyValuePairsSplitter(
  298. kvstr.data(), kvstr.data() + kvstr.size(), '&', '=');
  299. } else if (i == 2) {
  300. psplitter = new butil::KeyValuePairsSplitter(kvstr.c_str(), '&', '=');
  301. }
  302. butil::KeyValuePairsSplitter& splitter = *psplitter;
  303. ASSERT_TRUE(splitter);
  304. ASSERT_EQ(splitter.key(), "key1");
  305. ASSERT_EQ(splitter.value(), "value1");
  306. ++splitter;
  307. ASSERT_TRUE(splitter);
  308. ASSERT_EQ(splitter.key(), "key2");
  309. ASSERT_EQ(splitter.value(), "value2");
  310. ++splitter;
  311. ASSERT_TRUE(splitter);
  312. ASSERT_EQ(splitter.key(), "key3");
  313. ASSERT_EQ(splitter.value(), "value3");
  314. ++splitter;
  315. ASSERT_TRUE(splitter);
  316. ASSERT_EQ(splitter.key(), "");
  317. ASSERT_EQ(splitter.value(), "==");
  318. ++splitter;
  319. ASSERT_TRUE(splitter);
  320. ASSERT_EQ(splitter.key(), "key4");
  321. ASSERT_EQ(splitter.value(), "");
  322. ++splitter;
  323. ASSERT_TRUE(splitter);
  324. ASSERT_EQ(splitter.key(), "");
  325. ASSERT_EQ(splitter.value(), "");
  326. ++splitter;
  327. ASSERT_TRUE(splitter);
  328. ASSERT_EQ(splitter.key(), "");
  329. ASSERT_EQ(splitter.value(), "value5");
  330. ++splitter;
  331. ASSERT_FALSE(splitter);
  332. delete psplitter;
  333. }
  334. }
  335. }