string_util_unittest.cc 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. // Copyright 2013 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "butil/strings/string_util.h"
  5. #include <math.h>
  6. #include <stdarg.h>
  7. #include <algorithm>
  8. #include "butil/basictypes.h"
  9. #include "butil/strings/string16.h"
  10. #include "butil/strings/utf_string_conversions.h"
  11. #include <gtest/gtest.h>
  12. namespace butil {
  13. static const struct trim_case {
  14. const wchar_t* input;
  15. const TrimPositions positions;
  16. const wchar_t* output;
  17. const TrimPositions return_value;
  18. } trim_cases[] = {
  19. {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  20. {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  21. {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  22. {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  23. {L"", TRIM_ALL, L"", TRIM_NONE},
  24. {L" ", TRIM_LEADING, L"", TRIM_LEADING},
  25. {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
  26. {L" ", TRIM_ALL, L"", TRIM_ALL},
  27. {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  28. {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  29. };
  30. static const struct trim_case_ascii {
  31. const char* input;
  32. const TrimPositions positions;
  33. const char* output;
  34. const TrimPositions return_value;
  35. } trim_cases_ascii[] = {
  36. {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  37. {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  38. {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  39. {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  40. {"", TRIM_ALL, "", TRIM_NONE},
  41. {" ", TRIM_LEADING, "", TRIM_LEADING},
  42. {" ", TRIM_TRAILING, "", TRIM_TRAILING},
  43. {" ", TRIM_ALL, "", TRIM_ALL},
  44. {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  45. };
  46. namespace {
  47. // Helper used to test TruncateUTF8ToByteSize.
  48. bool Truncated(const std::string& input, const size_t byte_size,
  49. std::string* output) {
  50. size_t prev = input.length();
  51. TruncateUTF8ToByteSize(input, byte_size, output);
  52. return prev != output->length();
  53. }
  54. } // namespace
  55. TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  56. std::string output;
  57. // Empty strings and invalid byte_size arguments
  58. EXPECT_FALSE(Truncated(std::string(), 0, &output));
  59. EXPECT_EQ(output, "");
  60. EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  61. EXPECT_EQ(output, "");
  62. EXPECT_FALSE(Truncated("\xe1\x80\xbf", (size_t)-1, &output));
  63. EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  64. // Testing the truncation of valid UTF8 correctly
  65. EXPECT_TRUE(Truncated("abc", 2, &output));
  66. EXPECT_EQ(output, "ab");
  67. EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  68. EXPECT_EQ(output.compare("\xc2\x81"), 0);
  69. EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  70. EXPECT_EQ(output.compare("\xc2\x81"), 0);
  71. EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  72. EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  73. {
  74. const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  75. const std::string array_string(array, arraysize(array));
  76. EXPECT_TRUE(Truncated(array_string, 4, &output));
  77. EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  78. }
  79. {
  80. const char array[] = "\x00\xc2\x81\xc2\x81";
  81. const std::string array_string(array, arraysize(array));
  82. EXPECT_TRUE(Truncated(array_string, 4, &output));
  83. EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
  84. }
  85. // Testing invalid UTF8
  86. EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
  87. EXPECT_EQ(output.compare(""), 0);
  88. EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
  89. EXPECT_EQ(output.compare(""), 0);
  90. EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
  91. EXPECT_EQ(output.compare(""), 0);
  92. // Testing invalid UTF8 mixed with valid UTF8
  93. EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
  94. EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
  95. EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
  96. EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
  97. EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
  98. 10, &output));
  99. EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
  100. EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
  101. 10, &output));
  102. EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
  103. EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
  104. EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
  105. // Overlong sequences
  106. EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
  107. EXPECT_EQ(output.compare(""), 0);
  108. EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
  109. EXPECT_EQ(output.compare(""), 0);
  110. EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
  111. EXPECT_EQ(output.compare(""), 0);
  112. EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
  113. EXPECT_EQ(output.compare(""), 0);
  114. EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
  115. EXPECT_EQ(output.compare(""), 0);
  116. EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
  117. EXPECT_EQ(output.compare(""), 0);
  118. EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
  119. EXPECT_EQ(output.compare(""), 0);
  120. EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
  121. EXPECT_EQ(output.compare(""), 0);
  122. EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
  123. EXPECT_EQ(output.compare(""), 0);
  124. EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
  125. EXPECT_EQ(output.compare(""), 0);
  126. EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
  127. EXPECT_EQ(output.compare(""), 0);
  128. // Beyond U+10FFFF (the upper limit of Unicode codespace)
  129. EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
  130. EXPECT_EQ(output.compare(""), 0);
  131. EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
  132. EXPECT_EQ(output.compare(""), 0);
  133. EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
  134. EXPECT_EQ(output.compare(""), 0);
  135. // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
  136. EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
  137. EXPECT_EQ(output.compare(""), 0);
  138. EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
  139. EXPECT_EQ(output.compare(""), 0);
  140. {
  141. const char array[] = "\x00\x00\xfe\xff";
  142. const std::string array_string(array, arraysize(array));
  143. EXPECT_TRUE(Truncated(array_string, 4, &output));
  144. EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
  145. }
  146. // Variants on the previous test
  147. {
  148. const char array[] = "\xff\xfe\x00\x00";
  149. const std::string array_string(array, 4);
  150. EXPECT_FALSE(Truncated(array_string, 4, &output));
  151. EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
  152. }
  153. {
  154. const char array[] = "\xff\x00\x00\xfe";
  155. const std::string array_string(array, arraysize(array));
  156. EXPECT_TRUE(Truncated(array_string, 4, &output));
  157. EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
  158. }
  159. // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
  160. EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
  161. EXPECT_EQ(output.compare(""), 0);
  162. EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
  163. EXPECT_EQ(output.compare(""), 0);
  164. EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
  165. EXPECT_EQ(output.compare(""), 0);
  166. EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
  167. EXPECT_EQ(output.compare(""), 0);
  168. EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
  169. EXPECT_EQ(output.compare(""), 0);
  170. // Strings in legacy encodings that are valid in UTF-8, but
  171. // are invalid as UTF-8 in real data.
  172. EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
  173. EXPECT_EQ(output.compare("caf"), 0);
  174. EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
  175. EXPECT_EQ(output.compare(""), 0);
  176. EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
  177. EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
  178. EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
  179. &output));
  180. EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
  181. // Testing using the same string as input and output.
  182. EXPECT_FALSE(Truncated(output, 4, &output));
  183. EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
  184. EXPECT_TRUE(Truncated(output, 3, &output));
  185. EXPECT_EQ(output.compare("\xa7\x41"), 0);
  186. // "abc" with U+201[CD] in windows-125[0-8]
  187. EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
  188. EXPECT_EQ(output.compare("\x93" "abc"), 0);
  189. // U+0639 U+064E U+0644 U+064E in ISO-8859-6
  190. EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
  191. EXPECT_EQ(output.compare(""), 0);
  192. // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
  193. EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
  194. EXPECT_EQ(output.compare(""), 0);
  195. }
  196. TEST(StringUtilTest, TrimWhitespace) {
  197. string16 output; // Allow contents to carry over to next testcase
  198. for (size_t i = 0; i < arraysize(trim_cases); ++i) {
  199. const trim_case& value = trim_cases[i];
  200. EXPECT_EQ(value.return_value,
  201. TrimWhitespace(WideToUTF16(value.input), value.positions,
  202. &output));
  203. EXPECT_EQ(WideToUTF16(value.output), output);
  204. }
  205. // Test that TrimWhitespace() can take the same string for input and output
  206. output = ASCIIToUTF16(" This is a test \r\n");
  207. EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
  208. EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
  209. // Once more, but with a string of whitespace
  210. output = ASCIIToUTF16(" \r\n");
  211. EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
  212. EXPECT_EQ(string16(), output);
  213. std::string output_ascii;
  214. for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
  215. const trim_case_ascii& value = trim_cases_ascii[i];
  216. EXPECT_EQ(value.return_value,
  217. TrimWhitespace(value.input, value.positions, &output_ascii));
  218. EXPECT_EQ(value.output, output_ascii);
  219. }
  220. }
  221. static const struct collapse_case {
  222. const wchar_t* input;
  223. const bool trim;
  224. const wchar_t* output;
  225. } collapse_cases[] = {
  226. {L" Google Video ", false, L"Google Video"},
  227. {L"Google Video", false, L"Google Video"},
  228. {L"", false, L""},
  229. {L" ", false, L""},
  230. {L"\t\rTest String\n", false, L"Test String"},
  231. {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
  232. {L" Test \n \t String ", false, L"Test String"},
  233. {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
  234. {L" Test String", false, L"Test String"},
  235. {L"Test String ", false, L"Test String"},
  236. {L"Test String", false, L"Test String"},
  237. {L"", true, L""},
  238. {L"\n", true, L""},
  239. {L" \r ", true, L""},
  240. {L"\nFoo", true, L"Foo"},
  241. {L"\r Foo ", true, L"Foo"},
  242. {L" Foo bar ", true, L"Foo bar"},
  243. {L" \tFoo bar \n", true, L"Foo bar"},
  244. {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
  245. };
  246. TEST(StringUtilTest, CollapseWhitespace) {
  247. for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
  248. const collapse_case& value = collapse_cases[i];
  249. EXPECT_EQ(WideToUTF16(value.output),
  250. CollapseWhitespace(WideToUTF16(value.input), value.trim));
  251. }
  252. }
  253. static const struct collapse_case_ascii {
  254. const char* input;
  255. const bool trim;
  256. const char* output;
  257. } collapse_cases_ascii[] = {
  258. {" Google Video ", false, "Google Video"},
  259. {"Google Video", false, "Google Video"},
  260. {"", false, ""},
  261. {" ", false, ""},
  262. {"\t\rTest String\n", false, "Test String"},
  263. {" Test \n \t String ", false, "Test String"},
  264. {" Test String", false, "Test String"},
  265. {"Test String ", false, "Test String"},
  266. {"Test String", false, "Test String"},
  267. {"", true, ""},
  268. {"\n", true, ""},
  269. {" \r ", true, ""},
  270. {"\nFoo", true, "Foo"},
  271. {"\r Foo ", true, "Foo"},
  272. {" Foo bar ", true, "Foo bar"},
  273. {" \tFoo bar \n", true, "Foo bar"},
  274. {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
  275. };
  276. TEST(StringUtilTest, CollapseWhitespaceASCII) {
  277. for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
  278. const collapse_case_ascii& value = collapse_cases_ascii[i];
  279. EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
  280. }
  281. }
  282. TEST(StringUtilTest, IsStringUTF8) {
  283. EXPECT_TRUE(IsStringUTF8("abc"));
  284. EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
  285. EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
  286. EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
  287. EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
  288. EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
  289. // surrogate code points
  290. EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
  291. EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
  292. EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
  293. // overlong sequences
  294. EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
  295. EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
  296. EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
  297. EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
  298. EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
  299. EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
  300. EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
  301. EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
  302. EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
  303. EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
  304. EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
  305. // Beyond U+10FFFF (the upper limit of Unicode codespace)
  306. EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
  307. EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
  308. EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
  309. // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
  310. EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
  311. EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
  312. EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
  313. EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
  314. // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
  315. EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
  316. EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
  317. EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
  318. EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
  319. EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
  320. // Strings in legacy encodings. We can certainly make up strings
  321. // in a legacy encoding that are valid in UTF-8, but in real data,
  322. // most of them are invalid as UTF-8.
  323. EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
  324. EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
  325. EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
  326. // "abc" with U+201[CD] in windows-125[0-8]
  327. EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
  328. // U+0639 U+064E U+0644 U+064E in ISO-8859-6
  329. EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
  330. // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
  331. EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
  332. // Check that we support Embedded Nulls. The first uses the canonical UTF-8
  333. // representation, and the second uses a 2-byte sequence. The second version
  334. // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
  335. // given codepoint must be used.
  336. static const char kEmbeddedNull[] = "embedded\0null";
  337. EXPECT_TRUE(IsStringUTF8(
  338. std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
  339. EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
  340. }
  341. TEST(StringUtilTest, ConvertASCII) {
  342. static const char* char_cases[] = {
  343. "Google Video",
  344. "Hello, world\n",
  345. "0123ABCDwxyz \a\b\t\r\n!+,.~"
  346. };
  347. static const wchar_t* const wchar_cases[] = {
  348. L"Google Video",
  349. L"Hello, world\n",
  350. L"0123ABCDwxyz \a\b\t\r\n!+,.~"
  351. };
  352. for (size_t i = 0; i < arraysize(char_cases); ++i) {
  353. EXPECT_TRUE(IsStringASCII(char_cases[i]));
  354. string16 utf16 = ASCIIToUTF16(char_cases[i]);
  355. EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
  356. std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
  357. EXPECT_EQ(char_cases[i], ascii);
  358. }
  359. EXPECT_FALSE(IsStringASCII("Google \x80Video"));
  360. // Convert empty strings.
  361. string16 empty16;
  362. std::string empty;
  363. EXPECT_EQ(empty, UTF16ToASCII(empty16));
  364. EXPECT_EQ(empty16, ASCIIToUTF16(empty));
  365. // Convert strings with an embedded NUL character.
  366. const char chars_with_nul[] = "test\0string";
  367. const int length_with_nul = arraysize(chars_with_nul) - 1;
  368. std::string string_with_nul(chars_with_nul, length_with_nul);
  369. std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
  370. EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
  371. wide_with_nul.length());
  372. std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
  373. EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
  374. narrow_with_nul.length());
  375. EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
  376. }
  377. TEST(StringUtilTest, ToUpperASCII) {
  378. EXPECT_EQ('C', ToUpperASCII('C'));
  379. EXPECT_EQ('C', ToUpperASCII('c'));
  380. EXPECT_EQ('2', ToUpperASCII('2'));
  381. EXPECT_EQ(L'C', ToUpperASCII(L'C'));
  382. EXPECT_EQ(L'C', ToUpperASCII(L'c'));
  383. EXPECT_EQ(L'2', ToUpperASCII(L'2'));
  384. std::string in_place_a("Cc2");
  385. StringToUpperASCII(&in_place_a);
  386. EXPECT_EQ("CC2", in_place_a);
  387. std::wstring in_place_w(L"Cc2");
  388. StringToUpperASCII(&in_place_w);
  389. EXPECT_EQ(L"CC2", in_place_w);
  390. std::string original_a("Cc2");
  391. std::string upper_a = StringToUpperASCII(original_a);
  392. EXPECT_EQ("CC2", upper_a);
  393. std::wstring original_w(L"Cc2");
  394. std::wstring upper_w = StringToUpperASCII(original_w);
  395. EXPECT_EQ(L"CC2", upper_w);
  396. }
  397. TEST(StringUtilTest, LowerCaseEqualsASCII) {
  398. static const struct {
  399. const char* src_a;
  400. const char* dst;
  401. } lowercase_cases[] = {
  402. { "FoO", "foo" },
  403. { "foo", "foo" },
  404. { "FOO", "foo" },
  405. };
  406. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
  407. EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
  408. lowercase_cases[i].dst));
  409. EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
  410. lowercase_cases[i].dst));
  411. }
  412. }
  413. TEST(StringUtilTest, FormatBytesUnlocalized) {
  414. static const struct {
  415. int64_t bytes;
  416. const char* expected;
  417. } cases[] = {
  418. // Expected behavior: we show one post-decimal digit when we have
  419. // under two pre-decimal digits, except in cases where it makes no
  420. // sense (zero or bytes).
  421. // Since we switch units once we cross the 1000 mark, this keeps
  422. // the display of file sizes or bytes consistently around three
  423. // digits.
  424. {0, "0 B"},
  425. {512, "512 B"},
  426. {1024*1024, "1.0 MB"},
  427. {1024*1024*1024, "1.0 GB"},
  428. {10LL*1024*1024*1024, "10.0 GB"},
  429. {99LL*1024*1024*1024, "99.0 GB"},
  430. {105LL*1024*1024*1024, "105 GB"},
  431. {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
  432. {~(1LL<<63), "8192 PB"},
  433. {99*1024 + 103, "99.1 kB"},
  434. {1024*1024 + 103, "1.0 MB"},
  435. {1024*1024 + 205 * 1024, "1.2 MB"},
  436. {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
  437. {10LL*1024*1024*1024, "10.0 GB"},
  438. {100LL*1024*1024*1024, "100 GB"},
  439. };
  440. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
  441. EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
  442. FormatBytesUnlocalized(cases[i].bytes));
  443. }
  444. }
  445. TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
  446. static const struct {
  447. const char* str;
  448. string16::size_type start_offset;
  449. const char* find_this;
  450. const char* replace_with;
  451. const char* expected;
  452. } cases[] = {
  453. {"aaa", 0, "a", "b", "bbb"},
  454. {"abb", 0, "ab", "a", "ab"},
  455. {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
  456. {"Not found", 0, "x", "0", "Not found"},
  457. {"Not found again", 5, "x", "0", "Not found again"},
  458. {" Making it much longer ", 0, " ", "Four score and seven years ago",
  459. "Four score and seven years agoMakingFour score and seven years agoit"
  460. "Four score and seven years agomuchFour score and seven years agolonger"
  461. "Four score and seven years ago"},
  462. {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
  463. {"Replace me only me once", 9, "me ", "", "Replace me only once"},
  464. {"abababab", 2, "ab", "c", "abccc"},
  465. };
  466. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
  467. string16 str = ASCIIToUTF16(cases[i].str);
  468. ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
  469. ASCIIToUTF16(cases[i].find_this),
  470. ASCIIToUTF16(cases[i].replace_with));
  471. EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
  472. }
  473. }
  474. TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
  475. static const struct {
  476. const char* str;
  477. string16::size_type start_offset;
  478. const char* find_this;
  479. const char* replace_with;
  480. const char* expected;
  481. } cases[] = {
  482. {"aaa", 0, "a", "b", "baa"},
  483. {"abb", 0, "ab", "a", "ab"},
  484. {"Removing some substrings inging", 0, "ing", "",
  485. "Remov some substrings inging"},
  486. {"Not found", 0, "x", "0", "Not found"},
  487. {"Not found again", 5, "x", "0", "Not found again"},
  488. {" Making it much longer ", 0, " ", "Four score and seven years ago",
  489. "Four score and seven years agoMaking it much longer "},
  490. {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
  491. {"Replace me only me once", 4, "me ", "", "Replace only me once"},
  492. {"abababab", 2, "ab", "c", "abcabab"},
  493. };
  494. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
  495. string16 str = ASCIIToUTF16(cases[i].str);
  496. ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
  497. ASCIIToUTF16(cases[i].find_this),
  498. ASCIIToUTF16(cases[i].replace_with));
  499. EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
  500. }
  501. }
  502. TEST(StringUtilTest, HexDigitToInt) {
  503. EXPECT_EQ(0, HexDigitToInt('0'));
  504. EXPECT_EQ(1, HexDigitToInt('1'));
  505. EXPECT_EQ(2, HexDigitToInt('2'));
  506. EXPECT_EQ(3, HexDigitToInt('3'));
  507. EXPECT_EQ(4, HexDigitToInt('4'));
  508. EXPECT_EQ(5, HexDigitToInt('5'));
  509. EXPECT_EQ(6, HexDigitToInt('6'));
  510. EXPECT_EQ(7, HexDigitToInt('7'));
  511. EXPECT_EQ(8, HexDigitToInt('8'));
  512. EXPECT_EQ(9, HexDigitToInt('9'));
  513. EXPECT_EQ(10, HexDigitToInt('A'));
  514. EXPECT_EQ(11, HexDigitToInt('B'));
  515. EXPECT_EQ(12, HexDigitToInt('C'));
  516. EXPECT_EQ(13, HexDigitToInt('D'));
  517. EXPECT_EQ(14, HexDigitToInt('E'));
  518. EXPECT_EQ(15, HexDigitToInt('F'));
  519. // Verify the lower case as well.
  520. EXPECT_EQ(10, HexDigitToInt('a'));
  521. EXPECT_EQ(11, HexDigitToInt('b'));
  522. EXPECT_EQ(12, HexDigitToInt('c'));
  523. EXPECT_EQ(13, HexDigitToInt('d'));
  524. EXPECT_EQ(14, HexDigitToInt('e'));
  525. EXPECT_EQ(15, HexDigitToInt('f'));
  526. }
  527. // This checks where we can use the assignment operator for a va_list. We need
  528. // a way to do this since Visual C doesn't support va_copy, but assignment on
  529. // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
  530. // capability.
  531. static void VariableArgsFunc(const char* format, ...) {
  532. va_list org;
  533. va_start(org, format);
  534. va_list dup;
  535. GG_VA_COPY(dup, org);
  536. int i1 = va_arg(org, int);
  537. int j1 = va_arg(org, int);
  538. char* s1 = va_arg(org, char*);
  539. double d1 = va_arg(org, double);
  540. va_end(org);
  541. int i2 = va_arg(dup, int);
  542. int j2 = va_arg(dup, int);
  543. char* s2 = va_arg(dup, char*);
  544. double d2 = va_arg(dup, double);
  545. EXPECT_EQ(i1, i2);
  546. EXPECT_EQ(j1, j2);
  547. EXPECT_STREQ(s1, s2);
  548. EXPECT_EQ(d1, d2);
  549. va_end(dup);
  550. }
  551. TEST(StringUtilTest, VAList) {
  552. VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
  553. }
  554. // Test for Tokenize
  555. template <typename STR>
  556. void TokenizeTest() {
  557. std::vector<STR> r;
  558. size_t size;
  559. size = Tokenize(STR("This is a string"), STR(" "), &r);
  560. EXPECT_EQ(4U, size);
  561. ASSERT_EQ(4U, r.size());
  562. EXPECT_EQ(r[0], STR("This"));
  563. EXPECT_EQ(r[1], STR("is"));
  564. EXPECT_EQ(r[2], STR("a"));
  565. EXPECT_EQ(r[3], STR("string"));
  566. r.clear();
  567. size = Tokenize(STR("one,two,three"), STR(","), &r);
  568. EXPECT_EQ(3U, size);
  569. ASSERT_EQ(3U, r.size());
  570. EXPECT_EQ(r[0], STR("one"));
  571. EXPECT_EQ(r[1], STR("two"));
  572. EXPECT_EQ(r[2], STR("three"));
  573. r.clear();
  574. size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
  575. EXPECT_EQ(3U, size);
  576. ASSERT_EQ(3U, r.size());
  577. EXPECT_EQ(r[0], STR("one"));
  578. EXPECT_EQ(r[1], STR("two"));
  579. EXPECT_EQ(r[2], STR("three;four"));
  580. r.clear();
  581. size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
  582. EXPECT_EQ(4U, size);
  583. ASSERT_EQ(4U, r.size());
  584. EXPECT_EQ(r[0], STR("one"));
  585. EXPECT_EQ(r[1], STR("two"));
  586. EXPECT_EQ(r[2], STR("three"));
  587. EXPECT_EQ(r[3], STR("four"));
  588. r.clear();
  589. size = Tokenize(STR("one, two, three"), STR(","), &r);
  590. EXPECT_EQ(3U, size);
  591. ASSERT_EQ(3U, r.size());
  592. EXPECT_EQ(r[0], STR("one"));
  593. EXPECT_EQ(r[1], STR(" two"));
  594. EXPECT_EQ(r[2], STR(" three"));
  595. r.clear();
  596. size = Tokenize(STR("one, two, three, "), STR(","), &r);
  597. EXPECT_EQ(4U, size);
  598. ASSERT_EQ(4U, r.size());
  599. EXPECT_EQ(r[0], STR("one"));
  600. EXPECT_EQ(r[1], STR(" two"));
  601. EXPECT_EQ(r[2], STR(" three"));
  602. EXPECT_EQ(r[3], STR(" "));
  603. r.clear();
  604. size = Tokenize(STR("one, two, three,"), STR(","), &r);
  605. EXPECT_EQ(3U, size);
  606. ASSERT_EQ(3U, r.size());
  607. EXPECT_EQ(r[0], STR("one"));
  608. EXPECT_EQ(r[1], STR(" two"));
  609. EXPECT_EQ(r[2], STR(" three"));
  610. r.clear();
  611. size = Tokenize(STR(), STR(","), &r);
  612. EXPECT_EQ(0U, size);
  613. ASSERT_EQ(0U, r.size());
  614. r.clear();
  615. size = Tokenize(STR(","), STR(","), &r);
  616. EXPECT_EQ(0U, size);
  617. ASSERT_EQ(0U, r.size());
  618. r.clear();
  619. size = Tokenize(STR(",;:."), STR(".:;,"), &r);
  620. EXPECT_EQ(0U, size);
  621. ASSERT_EQ(0U, r.size());
  622. r.clear();
  623. size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
  624. EXPECT_EQ(1U, size);
  625. ASSERT_EQ(1U, r.size());
  626. EXPECT_EQ(r[0], STR("a"));
  627. r.clear();
  628. size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
  629. EXPECT_EQ(2U, size);
  630. ASSERT_EQ(2U, r.size());
  631. EXPECT_EQ(r[0], STR("\ta\t"));
  632. EXPECT_EQ(r[1], STR("b\tcc"));
  633. r.clear();
  634. }
  635. TEST(StringUtilTest, TokenizeStdString) {
  636. TokenizeTest<std::string>();
  637. }
  638. TEST(StringUtilTest, TokenizeStringPiece) {
  639. TokenizeTest<butil::StringPiece>();
  640. }
  641. // Test for JoinString
  642. TEST(StringUtilTest, JoinString) {
  643. std::vector<std::string> in;
  644. EXPECT_EQ("", JoinString(in, ','));
  645. in.push_back("a");
  646. EXPECT_EQ("a", JoinString(in, ','));
  647. in.push_back("b");
  648. in.push_back("c");
  649. EXPECT_EQ("a,b,c", JoinString(in, ','));
  650. in.push_back(std::string());
  651. EXPECT_EQ("a,b,c,", JoinString(in, ','));
  652. in.push_back(" ");
  653. EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
  654. }
  655. // Test for JoinString overloaded with std::string separator
  656. TEST(StringUtilTest, JoinStringWithString) {
  657. std::string separator(", ");
  658. std::vector<std::string> parts;
  659. EXPECT_EQ(std::string(), JoinString(parts, separator));
  660. parts.push_back("a");
  661. EXPECT_EQ("a", JoinString(parts, separator));
  662. parts.push_back("b");
  663. parts.push_back("c");
  664. EXPECT_EQ("a, b, c", JoinString(parts, separator));
  665. parts.push_back(std::string());
  666. EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
  667. parts.push_back(" ");
  668. EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
  669. }
  670. // Test for JoinString overloaded with string16 separator
  671. TEST(StringUtilTest, JoinStringWithString16) {
  672. string16 separator = ASCIIToUTF16(", ");
  673. std::vector<string16> parts;
  674. EXPECT_EQ(string16(), JoinString(parts, separator));
  675. parts.push_back(ASCIIToUTF16("a"));
  676. EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
  677. parts.push_back(ASCIIToUTF16("b"));
  678. parts.push_back(ASCIIToUTF16("c"));
  679. EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
  680. parts.push_back(ASCIIToUTF16(""));
  681. EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
  682. parts.push_back(ASCIIToUTF16(" "));
  683. EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
  684. }
  685. TEST(StringUtilTest, StartsWith) {
  686. EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
  687. EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
  688. EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
  689. EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
  690. EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
  691. EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
  692. EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
  693. EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
  694. EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
  695. EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
  696. EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
  697. ASCIIToUTF16("javascript"), true));
  698. EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
  699. ASCIIToUTF16("javascript"), true));
  700. EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
  701. ASCIIToUTF16("javascript"), false));
  702. EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
  703. ASCIIToUTF16("javascript"), false));
  704. EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
  705. ASCIIToUTF16("javascript"), true));
  706. EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
  707. ASCIIToUTF16("javascript"), false));
  708. EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
  709. EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
  710. EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
  711. EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
  712. }
  713. TEST(StringUtilTest, EndsWith) {
  714. EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
  715. ASCIIToUTF16(".plugin"), true));
  716. EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
  717. ASCIIToUTF16(".plugin"), true));
  718. EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
  719. ASCIIToUTF16(".plugin"), false));
  720. EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
  721. ASCIIToUTF16(".plugin"), false));
  722. EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
  723. EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
  724. EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
  725. ASCIIToUTF16(".plugin"), true));
  726. EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
  727. ASCIIToUTF16(".plugin"), false));
  728. EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
  729. EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
  730. EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
  731. EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
  732. EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
  733. ASCIIToUTF16(".plugin"), false));
  734. EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
  735. EXPECT_TRUE(EndsWith(string16(), string16(), false));
  736. EXPECT_TRUE(EndsWith(string16(), string16(), true));
  737. }
  738. TEST(StringUtilTest, GetStringFWithOffsets) {
  739. std::vector<string16> subst;
  740. subst.push_back(ASCIIToUTF16("1"));
  741. subst.push_back(ASCIIToUTF16("2"));
  742. std::vector<size_t> offsets;
  743. ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
  744. subst,
  745. &offsets);
  746. EXPECT_EQ(2U, offsets.size());
  747. EXPECT_EQ(7U, offsets[0]);
  748. EXPECT_EQ(25U, offsets[1]);
  749. offsets.clear();
  750. ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
  751. subst,
  752. &offsets);
  753. EXPECT_EQ(2U, offsets.size());
  754. EXPECT_EQ(25U, offsets[0]);
  755. EXPECT_EQ(7U, offsets[1]);
  756. offsets.clear();
  757. }
  758. TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
  759. // Test whether replacestringplaceholders works as expected when there
  760. // are fewer inputs than outputs.
  761. std::vector<string16> subst;
  762. subst.push_back(ASCIIToUTF16("9a"));
  763. subst.push_back(ASCIIToUTF16("8b"));
  764. subst.push_back(ASCIIToUTF16("7c"));
  765. string16 formatted =
  766. ReplaceStringPlaceholders(
  767. ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
  768. EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
  769. }
  770. TEST(StringUtilTest, ReplaceStringPlaceholders) {
  771. std::vector<string16> subst;
  772. subst.push_back(ASCIIToUTF16("9a"));
  773. subst.push_back(ASCIIToUTF16("8b"));
  774. subst.push_back(ASCIIToUTF16("7c"));
  775. subst.push_back(ASCIIToUTF16("6d"));
  776. subst.push_back(ASCIIToUTF16("5e"));
  777. subst.push_back(ASCIIToUTF16("4f"));
  778. subst.push_back(ASCIIToUTF16("3g"));
  779. subst.push_back(ASCIIToUTF16("2h"));
  780. subst.push_back(ASCIIToUTF16("1i"));
  781. string16 formatted =
  782. ReplaceStringPlaceholders(
  783. ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
  784. EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
  785. }
  786. TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
  787. std::vector<string16> subst;
  788. subst.push_back(ASCIIToUTF16("9a"));
  789. subst.push_back(ASCIIToUTF16("8b"));
  790. subst.push_back(ASCIIToUTF16("7c"));
  791. subst.push_back(ASCIIToUTF16("6d"));
  792. subst.push_back(ASCIIToUTF16("5e"));
  793. subst.push_back(ASCIIToUTF16("4f"));
  794. subst.push_back(ASCIIToUTF16("3g"));
  795. subst.push_back(ASCIIToUTF16("2h"));
  796. subst.push_back(ASCIIToUTF16("1i"));
  797. subst.push_back(ASCIIToUTF16("0j"));
  798. subst.push_back(ASCIIToUTF16("-1k"));
  799. subst.push_back(ASCIIToUTF16("-2l"));
  800. subst.push_back(ASCIIToUTF16("-3m"));
  801. subst.push_back(ASCIIToUTF16("-4n"));
  802. string16 formatted =
  803. ReplaceStringPlaceholders(
  804. ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
  805. "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
  806. EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
  807. "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
  808. }
  809. TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
  810. std::vector<std::string> subst;
  811. subst.push_back("9a");
  812. subst.push_back("8b");
  813. subst.push_back("7c");
  814. subst.push_back("6d");
  815. subst.push_back("5e");
  816. subst.push_back("4f");
  817. subst.push_back("3g");
  818. subst.push_back("2h");
  819. subst.push_back("1i");
  820. std::string formatted =
  821. ReplaceStringPlaceholders(
  822. "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
  823. EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
  824. }
  825. TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
  826. std::vector<std::string> subst;
  827. subst.push_back("a");
  828. subst.push_back("b");
  829. subst.push_back("c");
  830. EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
  831. "$1 $$2 $$$3");
  832. }
  833. TEST(StringUtilTest, MatchPatternTest) {
  834. EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
  835. EXPECT_TRUE(MatchPattern("www.google.com", "*"));
  836. EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
  837. EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
  838. EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
  839. EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
  840. EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
  841. EXPECT_FALSE(MatchPattern("", "*.*"));
  842. EXPECT_TRUE(MatchPattern("", "*"));
  843. EXPECT_TRUE(MatchPattern("", "?"));
  844. EXPECT_TRUE(MatchPattern("", ""));
  845. EXPECT_FALSE(MatchPattern("Hello", ""));
  846. EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
  847. // Stop after a certain recursion depth.
  848. EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
  849. // Test UTF8 matching.
  850. EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
  851. EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
  852. EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
  853. // Invalid sequences should be handled as a single invalid character.
  854. EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
  855. // If the pattern has invalid characters, it shouldn't match anything.
  856. EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
  857. // Test UTF16 character matching.
  858. EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
  859. UTF8ToUTF16("*.com")));
  860. EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
  861. UTF8ToUTF16("He??o\\*1*")));
  862. // This test verifies that consecutive wild cards are collapsed into 1
  863. // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
  864. // recursion depth).
  865. EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
  866. UTF8ToUTF16("He********************************o")));
  867. }
  868. TEST(StringUtilTest, LcpyTest) {
  869. // Test the normal case where we fit in our buffer.
  870. {
  871. char dst[10];
  872. wchar_t wdst[10];
  873. EXPECT_EQ(7U, butil::strlcpy(dst, "abcdefg", arraysize(dst)));
  874. EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
  875. EXPECT_EQ(7U, butil::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
  876. EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
  877. }
  878. // Test dst_size == 0, nothing should be written to |dst| and we should
  879. // have the equivalent of strlen(src).
  880. {
  881. char dst[2] = {1, 2};
  882. wchar_t wdst[2] = {1, 2};
  883. EXPECT_EQ(7U, butil::strlcpy(dst, "abcdefg", 0));
  884. EXPECT_EQ(1, dst[0]);
  885. EXPECT_EQ(2, dst[1]);
  886. EXPECT_EQ(7U, butil::wcslcpy(wdst, L"abcdefg", 0));
  887. EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
  888. EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
  889. }
  890. // Test the case were we _just_ competely fit including the null.
  891. {
  892. char dst[8];
  893. wchar_t wdst[8];
  894. EXPECT_EQ(7U, butil::strlcpy(dst, "abcdefg", arraysize(dst)));
  895. EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
  896. EXPECT_EQ(7U, butil::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
  897. EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
  898. }
  899. // Test the case were we we are one smaller, so we can't fit the null.
  900. {
  901. char dst[7];
  902. wchar_t wdst[7];
  903. EXPECT_EQ(7U, butil::strlcpy(dst, "abcdefg", arraysize(dst)));
  904. EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
  905. EXPECT_EQ(7U, butil::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
  906. EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
  907. }
  908. // Test the case were we are just too small.
  909. {
  910. char dst[3];
  911. wchar_t wdst[3];
  912. EXPECT_EQ(7U, butil::strlcpy(dst, "abcdefg", arraysize(dst)));
  913. EXPECT_EQ(0, memcmp(dst, "ab", 3));
  914. EXPECT_EQ(7U, butil::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
  915. EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
  916. }
  917. }
  918. TEST(StringUtilTest, WprintfFormatPortabilityTest) {
  919. static const struct {
  920. const wchar_t* input;
  921. bool portable;
  922. } cases[] = {
  923. { L"%ls", true },
  924. { L"%s", false },
  925. { L"%S", false },
  926. { L"%lS", false },
  927. { L"Hello, %s", false },
  928. { L"%lc", true },
  929. { L"%c", false },
  930. { L"%C", false },
  931. { L"%lC", false },
  932. { L"%ls %s", false },
  933. { L"%s %ls", false },
  934. { L"%s %ls %s", false },
  935. { L"%f", true },
  936. { L"%f %F", false },
  937. { L"%d %D", false },
  938. { L"%o %O", false },
  939. { L"%u %U", false },
  940. { L"%f %d %o %u", true },
  941. { L"%-8d (%02.1f%)", true },
  942. { L"% 10s", false },
  943. { L"% 10ls", true }
  944. };
  945. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
  946. EXPECT_EQ(cases[i].portable, butil::IsWprintfFormatPortable(cases[i].input));
  947. }
  948. TEST(StringUtilTest, RemoveChars) {
  949. const char* kRemoveChars = "-/+*";
  950. std::string input = "A-+bc/d!*";
  951. EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
  952. EXPECT_EQ("Abcd!", input);
  953. // No characters match kRemoveChars.
  954. EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
  955. EXPECT_EQ("Abcd!", input);
  956. // Empty string.
  957. input.clear();
  958. EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
  959. EXPECT_EQ(std::string(), input);
  960. }
  961. TEST(StringUtilTest, ReplaceChars) {
  962. struct TestData {
  963. const char* input;
  964. const char* replace_chars;
  965. const char* replace_with;
  966. const char* output;
  967. bool result;
  968. } cases[] = {
  969. { "", "", "", "", false },
  970. { "test", "", "", "test", false },
  971. { "test", "", "!", "test", false },
  972. { "test", "z", "!", "test", false },
  973. { "test", "e", "!", "t!st", true },
  974. { "test", "e", "!?", "t!?st", true },
  975. { "test", "ez", "!", "t!st", true },
  976. { "test", "zed", "!?", "t!?st", true },
  977. { "test", "t", "!?", "!?es!?", true },
  978. { "test", "et", "!>", "!>!>s!>", true },
  979. { "test", "zest", "!", "!!!!", true },
  980. { "test", "szt", "!", "!e!!", true },
  981. { "test", "t", "test", "testestest", true },
  982. };
  983. for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
  984. std::string output;
  985. bool result = ReplaceChars(cases[i].input,
  986. cases[i].replace_chars,
  987. cases[i].replace_with,
  988. &output);
  989. EXPECT_EQ(cases[i].result, result);
  990. EXPECT_EQ(cases[i].output, output);
  991. }
  992. }
  993. TEST(StringUtilTest, ContainsOnlyChars) {
  994. // Providing an empty list of characters should return false but for the empty
  995. // string.
  996. EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
  997. EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
  998. EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
  999. EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
  1000. EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
  1001. EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
  1002. EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
  1003. EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
  1004. EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
  1005. EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
  1006. EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
  1007. EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
  1008. EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
  1009. EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
  1010. EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
  1011. EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
  1012. EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
  1013. EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
  1014. EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
  1015. kWhitespaceUTF16));
  1016. }
  1017. class WriteIntoTest : public testing::Test {
  1018. protected:
  1019. static void WritesCorrectly(size_t num_chars) {
  1020. std::string buffer;
  1021. char kOriginal[] = "supercali";
  1022. strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
  1023. // Using std::string(buffer.c_str()) instead of |buffer| truncates the
  1024. // string at the first \0.
  1025. EXPECT_EQ(std::string(kOriginal,
  1026. std::min(num_chars, arraysize(kOriginal) - 1)),
  1027. std::string(buffer.c_str()));
  1028. EXPECT_EQ(num_chars, buffer.size());
  1029. }
  1030. };
  1031. TEST_F(WriteIntoTest, WriteInto) {
  1032. // Validate that WriteInto reserves enough space and
  1033. // sizes a string correctly.
  1034. WritesCorrectly(1);
  1035. WritesCorrectly(2);
  1036. WritesCorrectly(5000);
  1037. // Validate that WriteInto doesn't modify other strings
  1038. // when using a Copy-on-Write implementation.
  1039. const char kLive[] = "live";
  1040. const char kDead[] = "dead";
  1041. const std::string live = kLive;
  1042. std::string dead = live;
  1043. strncpy(WriteInto(&dead, 5), kDead, 4);
  1044. EXPECT_EQ(kDead, dead);
  1045. EXPECT_EQ(4u, dead.size());
  1046. EXPECT_EQ(kLive, live);
  1047. EXPECT_EQ(4u, live.size());
  1048. }
  1049. } // namespace butil