Переглянути джерело

make butil::BasicStringPiece<T> support string split functions-family

lrita 3 роки тому
батько
коміт
101526466e

+ 7 - 0
src/butil/strings/string_piece.h

@@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
       : ptr_(str.data()), length_(str.size()) {}
   BasicStringPiece(const value_type* offset, size_type len)
       : ptr_(offset), length_(len) {}
+  BasicStringPiece(const BasicStringPiece& str, size_type pos, size_type len = npos)
+      : ptr_(str.data() + pos), length_(std::min(len, str.length() - pos)) {}
   BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
                     const typename STRING_TYPE::const_iterator& end)
       : ptr_((end > begin) ? &(*begin) : NULL),
@@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
     ptr_ = NULL;
     length_ = 0;
   }
+  BasicStringPiece& assign(const BasicStringPiece& str, size_type pos, size_type len = npos) {
+    ptr_ = str.data() + pos;
+    length_ = std::min(len, str.length() - pos);
+    return *this;
+  }
   void set(const value_type* data, size_type len) {
     ptr_ = data;
     length_ = len;

+ 89 - 14
src/butil/strings/string_split.cc

@@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
   }
 }
 
-bool SplitStringIntoKeyValue(const std::string& line,
-                             char key_value_delimiter,
-                             std::string* key,
-                             std::string* value) {
+template <typename STR>
+bool SplitStringIntoKeyValueT(const STR& line,
+                             typename STR::value_type key_value_delimiter,
+                             STR* key,
+                             STR* value) {
   key->clear();
   value->clear();
 
   // Find the delimiter.
   size_t end_key_pos = line.find_first_of(key_value_delimiter);
-  if (end_key_pos == std::string::npos) {
+  if (end_key_pos == STR::npos) {
     DVLOG(1) << "cannot find delimiter in: " << line;
     return false;    // no delimiter
   }
   key->assign(line, 0, end_key_pos);
 
   // Find the value string.
-  std::string remains(line, end_key_pos, line.size() - end_key_pos);
+  STR remains(line, end_key_pos, line.size() - end_key_pos);
   size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
-  if (begin_value_pos == std::string::npos) {
+  if (begin_value_pos == STR::npos) {
     DVLOG(1) << "cannot parse value from line: " << line;
     return false;   // no value
   }
@@ -134,6 +135,13 @@ void SplitString(const string16& str,
   SplitStringT(str, c, true, r);
 }
 
+void SplitString(const butil::StringPiece16& str,
+                 char16 c,
+                 std::vector<butil::StringPiece16>* r) {
+  DCHECK(CBU16_IS_SINGLE(c));
+  SplitStringT(str, c, true, r);
+}
+
 void SplitString(const std::string& str,
                  char c,
                  std::vector<std::string>* r) {
@@ -144,13 +152,24 @@ void SplitString(const std::string& str,
   SplitStringT(str, c, true, r);
 }
 
-bool SplitStringIntoKeyValuePairs(const std::string& line,
+void SplitString(const StringPiece& str,
+                 char c,
+                 std::vector<StringPiece>* r) {
+#if CHAR_MIN < 0
+  DCHECK(c >= 0);
+#endif
+  DCHECK(c < 0x7F);
+  SplitStringT(str, c, true, r);
+}
+
+template<typename STR>
+bool SplitStringIntoKeyValuePairsT(const STR& line,
                                   char key_value_delimiter,
                                   char key_value_pair_delimiter,
-                                  StringPairs* key_value_pairs) {
+                                  std::vector<std::pair<STR, STR> >* key_value_pairs) {
   key_value_pairs->clear();
 
-  std::vector<std::string> pairs;
+  std::vector<STR> pairs;
   SplitString(line, key_value_pair_delimiter, &pairs);
 
   bool success = true;
@@ -159,30 +178,58 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
     if (pairs[i].empty())
       continue;
 
-    std::string key;
-    std::string value;
-    if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
+    STR key;
+    STR value;
+    if (!SplitStringIntoKeyValueT(pairs[i], key_value_delimiter, &key, &value)) {
       // Don't return here, to allow for pairs without associated
       // value or key; just record that the split failed.
       success = false;
     }
-    key_value_pairs->push_back(make_pair(key, value));
+    key_value_pairs->push_back(std::make_pair(key, value));
   }
   return success;
 }
 
+bool SplitStringIntoKeyValuePairs(const std::string& line,
+                                  char key_value_delimiter,
+                                  char key_value_pair_delimiter,
+                                  StringPairs* key_value_pairs) {
+  return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
+                                       key_value_pair_delimiter, key_value_pairs);
+}
+
+bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
+                                  char key_value_delimiter,
+                                  char key_value_pair_delimiter,
+                                  StringPiecePairs* key_value_pairs) {
+  return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
+                                       key_value_pair_delimiter, key_value_pairs);
+}
+
 void SplitStringUsingSubstr(const string16& str,
                             const string16& s,
                             std::vector<string16>* r) {
   SplitStringUsingSubstrT(str, s, r);
 }
 
+void SplitStringUsingSubstr(const butil::StringPiece16& str,
+                            const butil::StringPiece16& s,
+                            std::vector<butil::StringPiece16>* r) {
+  SplitStringUsingSubstrT(str, s, r);
+}
+
 void SplitStringUsingSubstr(const std::string& str,
                             const std::string& s,
                             std::vector<std::string>* r) {
   SplitStringUsingSubstrT(str, s, r);
 }
 
+void SplitStringUsingSubstr(const butil::StringPiece& str,
+                            const butil::StringPiece& s,
+                            std::vector<butil::StringPiece>* r) {
+  SplitStringUsingSubstrT(str, s, r);
+}
+
 void SplitStringDontTrim(const string16& str,
                          char16 c,
                          std::vector<string16>* r) {
@@ -190,6 +237,13 @@ void SplitStringDontTrim(const string16& str,
   SplitStringT(str, c, false, r);
 }
 
+void SplitStringDontTrim(const butil::StringPiece16& str,
+                         char16 c,
+                         std::vector<butil::StringPiece16>* r) {
+  DCHECK(CBU16_IS_SINGLE(c));
+  SplitStringT(str, c, false, r);
+}
+
 void SplitStringDontTrim(const std::string& str,
                          char c,
                          std::vector<std::string>* r) {
@@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
   SplitStringT(str, c, false, r);
 }
 
+void SplitStringDontTrim(const butil::StringPiece& str,
+                         char c,
+                         std::vector<butil::StringPiece>* r) {
+  DCHECK(IsStringUTF8(str));
+#if CHAR_MIN < 0
+  DCHECK(c >= 0);
+#endif
+  DCHECK(c < 0x7F);
+  SplitStringT(str, c, false, r);
+}
+
 void SplitStringAlongWhitespace(const string16& str,
                                 std::vector<string16>* result) {
   SplitStringAlongWhitespaceT(str, result);
 }
 
+void SplitStringAlongWhitespace(const butil::StringPiece16& str,
+                                std::vector<butil::StringPiece16>* result) {
+  SplitStringAlongWhitespaceT(str, result);
+}
+
 void SplitStringAlongWhitespace(const std::string& str,
                                 std::vector<std::string>* result) {
   SplitStringAlongWhitespaceT(str, result);
 }
 
+void SplitStringAlongWhitespace(const butil::StringPiece& str,
+                                std::vector<butil::StringPiece>* result) {
+  SplitStringAlongWhitespaceT(str, result);
+}
+
 }  // namespace butil

+ 28 - 0
src/butil/strings/string_split.h

@@ -11,6 +11,7 @@
 
 #include "butil/base_export.h"
 #include "butil/strings/string16.h"
+#include "butil/strings/string_piece.h"
 
 namespace butil {
 
@@ -23,6 +24,9 @@ namespace butil {
 BUTIL_EXPORT void SplitString(const string16& str,
                              char16 c,
                              std::vector<string16>* r);
+BUTIL_EXPORT void SplitString(const butil::StringPiece16& str,
+                             char16 c,
+                             std::vector<butil::StringPiece16>* r);
 
 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
 // the trailing byte of a multi-byte character can be in the ASCII range.
@@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
 BUTIL_EXPORT void SplitString(const std::string& str,
                              char c,
                              std::vector<std::string>* r);
+BUTIL_EXPORT void SplitString(const butil::StringPiece& str,
+                             char c,
+                             std::vector<butil::StringPiece>* r);
 
 typedef std::vector<std::pair<std::string, std::string> > StringPairs;
+typedef std::vector<std::pair<butil::StringPiece, butil::StringPiece> > StringPiecePairs;
 
 // Splits |line| into key value pairs according to the given delimiters and
 // removes whitespace leading each key and trailing each value. Returns true
@@ -42,20 +50,33 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
                                               char key_value_delimiter,
                                               char key_value_pair_delimiter,
                                               StringPairs* key_value_pairs);
+BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
+                                              char key_value_delimiter,
+                                              char key_value_pair_delimiter,
+                                              StringPiecePairs* key_value_pairs);
 
 // The same as SplitString, but use a substring delimiter instead of a char.
 BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str,
                                         const string16& s,
                                         std::vector<string16>* r);
+BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece16& str,
+                                        const butil::StringPiece16& s,
+                                        std::vector<butil::StringPiece16>* r);
 BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str,
                                         const std::string& s,
                                         std::vector<std::string>* r);
+BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece& str,
+                                        const butil::StringPiece& s,
+                                        std::vector<butil::StringPiece>* r);
 
 // The same as SplitString, but don't trim white space.
 // NOTE: |c| must be in BMP (Basic Multilingual Plane)
 BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
                                      char16 c,
                                      std::vector<string16>* r);
+BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece16& str,
+                                     char16 c,
+                                     std::vector<butil::StringPiece16>* r);
 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
 // the trailing byte of a multi-byte character can be in the ASCII range.
 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
@@ -63,6 +84,9 @@ BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
 BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
                                      char c,
                                      std::vector<std::string>* r);
+BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece& str,
+                                     char c,
+                                     std::vector<butil::StringPiece>* r);
 
 // WARNING: this uses whitespace as defined by the HTML5 spec. If you need
 // a function similar to this but want to trim all types of whitespace, then
@@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
 // characters is added to result.
 BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str,
                                             std::vector<string16>* result);
+BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece16& str,
+                                            std::vector<butil::StringPiece16>* result);
 BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str,
                                             std::vector<std::string>* result);
+BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece& str,
+                                            std::vector<butil::StringPiece>* result);
 
 }  // namespace butil
 

+ 24 - 1
src/butil/strings/string_util.cc

@@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
                      output);
 }
 
+TrimPositions TrimWhitespace(const butil::StringPiece16& input,
+                             TrimPositions positions,
+                             butil::StringPiece16* output) {
+  return TrimStringT(input, butil::StringPiece16(kWhitespaceUTF16), positions,
+                     output);
+}
+
 TrimPositions TrimWhitespaceASCII(const std::string& input,
                                   TrimPositions positions,
                                   std::string* output) {
   return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
 }
 
+TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
+                                  TrimPositions positions,
+                                  butil::StringPiece* output) {
+  return TrimStringT(input, butil::StringPiece(kWhitespaceASCII), positions, output);
+}
+
 // This function is only for backward-compatibility.
 // To be removed when all callers are updated.
 TrimPositions TrimWhitespace(const std::string& input,
@@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
   return TrimWhitespaceASCII(input, positions, output);
 }
 
+TrimPositions TrimWhitespace(const butil::StringPiece& input,
+                             TrimPositions positions,
+                             butil::StringPiece* output) {
+  return TrimWhitespaceASCII(input, positions, output);
+}
+
 template<typename STR>
 STR CollapseWhitespaceT(const STR& text,
                         bool trim_sequences_with_line_breaks) {
@@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
   return DoIsStringASCII(str);
 }
 
-bool IsStringUTF8(const std::string& str) {
+bool IsStringUTF8(const StringPiece& str) {
   const char *src = str.data();
   int32_t src_len = static_cast<int32_t>(str.length());
   int32_t char_index = 0;
@@ -354,6 +373,10 @@ bool IsStringUTF8(const std::string& str) {
   return true;
 }
 
+bool IsStringUTF8(const std::string& str) {
+  return IsStringUTF8(StringPiece(str));
+}
+
 }  // namespace butil
 
 template<typename Iter>

+ 10 - 0
src/butil/strings/string_util.h

@@ -202,15 +202,24 @@ enum TrimPositions {
 BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input,
                                          TrimPositions positions,
                                          butil::string16* output);
+BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece16& input,
+                                         TrimPositions positions,
+                                         butil::StringPiece16* output);
 BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
                                               TrimPositions positions,
                                               std::string* output);
+BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
+                                              TrimPositions positions,
+                                              butil::StringPiece* output);
 
 // Deprecated. This function is only for backward compatibility and calls
 // TrimWhitespaceASCII().
 BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input,
                                          TrimPositions positions,
                                          std::string* output);
+BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece& input,
+                                         TrimPositions positions,
+                                         butil::StringPiece* output);
 
 // Searches  for CR or LF characters.  Removes all contiguous whitespace
 // strings that contain them.  This is useful when trying to deal with text
@@ -246,6 +255,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
 // there's a use case for just checking the structural validity, we have to
 // add a new function for that.
 BUTIL_EXPORT bool IsStringUTF8(const std::string& str);
+BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str);
 BUTIL_EXPORT bool IsStringASCII(const StringPiece& str);
 BUTIL_EXPORT bool IsStringASCII(const string16& str);
 

+ 64 - 0
test/string_split_unittest.cc

@@ -239,6 +239,70 @@ TEST(StringUtilTest, SplitString) {
   r.clear();
 }
 
+TEST(StringUtilTest, SplitStringStringPiece) {
+  std::vector<butil::StringPiece> r;
+
+  SplitString(butil::StringPiece(), ',', &r);
+  EXPECT_EQ(0U, r.size());
+  r.clear();
+
+  SplitString(butil::StringPiece("a,b,c"), ',', &r);
+  ASSERT_EQ(3U, r.size());
+  EXPECT_EQ(r[0], "a");
+  EXPECT_EQ(r[1], "b");
+  EXPECT_EQ(r[2], "c");
+  r.clear();
+
+  SplitString(butil::StringPiece("a, b, c"), ',', &r);
+  ASSERT_EQ(3U, r.size());
+  EXPECT_EQ(r[0], "a");
+  EXPECT_EQ(r[1], "b");
+  EXPECT_EQ(r[2], "c");
+  r.clear();
+
+  SplitString(butil::StringPiece("a,,c"), ',', &r);
+  ASSERT_EQ(3U, r.size());
+  EXPECT_EQ(r[0], "a");
+  EXPECT_EQ(r[1], "");
+  EXPECT_EQ(r[2], "c");
+  r.clear();
+
+  SplitString(butil::StringPiece("   "), '*', &r);
+  EXPECT_EQ(0U, r.size());
+  r.clear();
+
+  SplitString(butil::StringPiece("foo"), '*', &r);
+  ASSERT_EQ(1U, r.size());
+  EXPECT_EQ(r[0], "foo");
+  r.clear();
+
+  SplitString(butil::StringPiece("foo ,"), ',', &r);
+  ASSERT_EQ(2U, r.size());
+  EXPECT_EQ(r[0], "foo");
+  EXPECT_EQ(r[1], "");
+  r.clear();
+
+  SplitString(butil::StringPiece(","), ',', &r);
+  ASSERT_EQ(2U, r.size());
+  EXPECT_EQ(r[0], "");
+  EXPECT_EQ(r[1], "");
+  r.clear();
+
+  SplitString(butil::StringPiece("\t\ta\t"), '\t', &r);
+  ASSERT_EQ(4U, r.size());
+  EXPECT_EQ(r[0], "");
+  EXPECT_EQ(r[1], "");
+  EXPECT_EQ(r[2], "a");
+  EXPECT_EQ(r[3], "");
+  r.clear();
+
+  SplitString(butil::StringPiece("\ta\t\nb\tcc"), '\n', &r);
+  ASSERT_EQ(2U, r.size());
+  EXPECT_EQ(r[0], "a");
+  EXPECT_EQ(r[1], "b\tcc");
+  r.clear();
+}
+
 TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
   std::vector<std::string> results;
   SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);