stringpiece.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // A StringPiece points to part or all of a string, Cord, double-quoted string
  31. // literal, or other string-like object. A StringPiece does *not* own the
  32. // string to which it points. A StringPiece is not null-terminated.
  33. //
  34. // You can use StringPiece as a function or method parameter. A StringPiece
  35. // parameter can receive a double-quoted string literal argument, a "const
  36. // char*" argument, a string argument, or a StringPiece argument with no data
  37. // copying. Systematic use of StringPiece for arguments reduces data
  38. // copies and strlen() calls.
  39. //
  40. // Prefer passing StringPieces by value:
  41. // void MyFunction(StringPiece arg);
  42. // If circumstances require, you may also pass by const reference:
  43. // void MyFunction(const StringPiece& arg); // not preferred
  44. // Both of these have the same lifetime semantics. Passing by value
  45. // generates slightly smaller code. For more discussion, see the thread
  46. // go/stringpiecebyvalue on c-users.
  47. //
  48. // StringPiece is also suitable for local variables if you know that
  49. // the lifetime of the underlying object is longer than the lifetime
  50. // of your StringPiece variable.
  51. //
  52. // Beware of binding a StringPiece to a temporary:
  53. // StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem
  54. //
  55. // This code is okay:
  56. // string str = obj.MethodReturningString(); // str owns its contents
  57. // StringPiece sp(str); // GOOD, because str outlives sp
  58. //
  59. // StringPiece is sometimes a poor choice for a return value and usually a poor
  60. // choice for a data member. If you do use a StringPiece this way, it is your
  61. // responsibility to ensure that the object pointed to by the StringPiece
  62. // outlives the StringPiece.
  63. //
  64. // A StringPiece may represent just part of a string; thus the name "Piece".
  65. // For example, when splitting a string, vector<StringPiece> is a natural data
  66. // type for the output. For another example, a Cord is a non-contiguous,
  67. // potentially very long string-like object. The Cord class has an interface
  68. // that iteratively provides StringPiece objects that point to the
  69. // successive pieces of a Cord object.
  70. //
  71. // A StringPiece is not null-terminated. If you write code that scans a
  72. // StringPiece, you must check its length before reading any characters.
  73. // Common idioms that work on null-terminated strings do not work on
  74. // StringPiece objects.
  75. //
  76. // There are several ways to create a null StringPiece:
  77. // StringPiece()
  78. // StringPiece(NULL)
  79. // StringPiece(NULL, 0)
  80. // For all of the above, sp.data() == NULL, sp.length() == 0,
  81. // and sp.empty() == true. Also, if you create a StringPiece with
  82. // a non-NULL pointer then sp.data() != NULL. Once created,
  83. // sp.data() will stay either NULL or not-NULL, except if you call
  84. // sp.clear() or sp.set().
  85. //
  86. // Thus, you can use StringPiece(NULL) to signal an out-of-band value
  87. // that is different from other StringPiece values. This is similar
  88. // to the way that const char* p1 = NULL; is different from
  89. // const char* p2 = "";.
  90. //
  91. // There are many ways to create an empty StringPiece:
  92. // StringPiece()
  93. // StringPiece(NULL)
  94. // StringPiece(NULL, 0)
  95. // StringPiece("")
  96. // StringPiece("", 0)
  97. // StringPiece("abcdef", 0)
  98. // StringPiece("abcdef"+6, 0)
  99. // For all of the above, sp.length() will be 0 and sp.empty() will be true.
  100. // For some empty StringPiece values, sp.data() will be NULL.
  101. // For some empty StringPiece values, sp.data() will not be NULL.
  102. //
  103. // Be careful not to confuse: null StringPiece and empty StringPiece.
  104. // The set of empty StringPieces properly includes the set of null StringPieces.
  105. // That is, every null StringPiece is an empty StringPiece,
  106. // but some non-null StringPieces are empty Stringpieces too.
  107. //
  108. // All empty StringPiece values compare equal to each other.
  109. // Even a null StringPieces compares equal to a non-null empty StringPiece:
  110. // StringPiece() == StringPiece("", 0)
  111. // StringPiece(NULL) == StringPiece("abc", 0)
  112. // StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0)
  113. //
  114. // Look carefully at this example:
  115. // StringPiece("") == NULL
  116. // True or false? TRUE, because StringPiece::operator== converts
  117. // the right-hand side from NULL to StringPiece(NULL),
  118. // and then compares two zero-length spans of characters.
  119. // However, we are working to make this example produce a compile error.
  120. //
  121. // Suppose you want to write:
  122. // bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD
  123. // Do not do that. Write one of these instead:
  124. // bool TestNull(StringPiece sp) { return sp.data() == NULL; }
  125. // bool TestEmpty(StringPiece sp) { return sp.empty(); }
  126. // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty?
  127. // Right now, TestWhat? behaves likes TestEmpty.
  128. // We are working to make TestWhat? produce a compile error.
  129. // TestNull is good to test for an out-of-band signal.
  130. // TestEmpty is good to test for an empty StringPiece.
  131. //
  132. // Caveats (again):
  133. // (1) The lifetime of the pointed-to string (or piece of a string)
  134. // must be longer than the lifetime of the StringPiece.
  135. // (2) There may or may not be a '\0' character after the end of
  136. // StringPiece data.
  137. // (3) A null StringPiece is empty.
  138. // An empty StringPiece may or may not be a null StringPiece.
  139. #ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
  140. #define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
  141. #include <assert.h>
  142. #include <stddef.h>
  143. #include <string.h>
  144. #include <iosfwd>
  145. #include <limits>
  146. #include <string>
  147. #include <google/protobuf/stubs/common.h>
  148. #include <google/protobuf/stubs/hash.h>
  149. namespace google {
  150. namespace protobuf {
  151. // StringPiece has *two* size types.
  152. // StringPiece::size_type
  153. // is unsigned
  154. // is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
  155. // no future changes intended
  156. // stringpiece_ssize_type
  157. // is signed
  158. // is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
  159. // future changes intended: http://go/64BitStringPiece
  160. //
  161. typedef string::difference_type stringpiece_ssize_type;
  162. // STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows.
  163. // TODO(mec): delete this after stringpiece_ssize_type goes 64 bit.
  164. #if !defined(NDEBUG)
  165. #define STRINGPIECE_CHECK_SIZE 1
  166. #elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
  167. #define STRINGPIECE_CHECK_SIZE 1
  168. #else
  169. #define STRINGPIECE_CHECK_SIZE 0
  170. #endif
  171. class LIBPROTOBUF_EXPORT StringPiece {
  172. private:
  173. const char* ptr_;
  174. stringpiece_ssize_type length_;
  175. // Prevent overflow in debug mode or fortified mode.
  176. // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t).
  177. static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) {
  178. #if STRINGPIECE_CHECK_SIZE > 0
  179. #ifdef max
  180. #undef max
  181. #endif
  182. if (size > static_cast<size_t>(
  183. std::numeric_limits<stringpiece_ssize_type>::max())) {
  184. // Some people grep for this message in logs
  185. // so take care if you ever change it.
  186. LogFatalSizeTooBig(size, "size_t to int conversion");
  187. }
  188. #endif
  189. return static_cast<stringpiece_ssize_type>(size);
  190. }
  191. // Out-of-line error path.
  192. static void LogFatalSizeTooBig(size_t size, const char* details);
  193. public:
  194. // We provide non-explicit singleton constructors so users can pass
  195. // in a "const char*" or a "string" wherever a "StringPiece" is
  196. // expected.
  197. //
  198. // Style guide exception granted:
  199. // http://goto/style-guide-exception-20978288
  200. StringPiece() : ptr_(NULL), length_(0) {}
  201. StringPiece(const char* str) // NOLINT(runtime/explicit)
  202. : ptr_(str), length_(0) {
  203. if (str != NULL) {
  204. length_ = CheckedSsizeTFromSizeT(strlen(str));
  205. }
  206. }
  207. template <class Allocator>
  208. StringPiece( // NOLINT(runtime/explicit)
  209. const std::basic_string<char, std::char_traits<char>, Allocator>& str)
  210. : ptr_(str.data()), length_(0) {
  211. length_ = CheckedSsizeTFromSizeT(str.size());
  212. }
  213. StringPiece(const char* offset, stringpiece_ssize_type len)
  214. : ptr_(offset), length_(len) {
  215. assert(len >= 0);
  216. }
  217. // Substring of another StringPiece.
  218. // pos must be non-negative and <= x.length().
  219. StringPiece(StringPiece x, stringpiece_ssize_type pos);
  220. // Substring of another StringPiece.
  221. // pos must be non-negative and <= x.length().
  222. // len must be non-negative and will be pinned to at most x.length() - pos.
  223. StringPiece(StringPiece x,
  224. stringpiece_ssize_type pos,
  225. stringpiece_ssize_type len);
  226. // data() may return a pointer to a buffer with embedded NULs, and the
  227. // returned buffer may or may not be null terminated. Therefore it is
  228. // typically a mistake to pass data() to a routine that expects a NUL
  229. // terminated string.
  230. const char* data() const { return ptr_; }
  231. stringpiece_ssize_type size() const { return length_; }
  232. stringpiece_ssize_type length() const { return length_; }
  233. bool empty() const { return length_ == 0; }
  234. void clear() {
  235. ptr_ = NULL;
  236. length_ = 0;
  237. }
  238. void set(const char* data, stringpiece_ssize_type len) {
  239. assert(len >= 0);
  240. ptr_ = data;
  241. length_ = len;
  242. }
  243. void set(const char* str) {
  244. ptr_ = str;
  245. if (str != NULL)
  246. length_ = CheckedSsizeTFromSizeT(strlen(str));
  247. else
  248. length_ = 0;
  249. }
  250. void set(const void* data, stringpiece_ssize_type len) {
  251. ptr_ = reinterpret_cast<const char*>(data);
  252. length_ = len;
  253. }
  254. char operator[](stringpiece_ssize_type i) const {
  255. assert(0 <= i);
  256. assert(i < length_);
  257. return ptr_[i];
  258. }
  259. void remove_prefix(stringpiece_ssize_type n) {
  260. assert(length_ >= n);
  261. ptr_ += n;
  262. length_ -= n;
  263. }
  264. void remove_suffix(stringpiece_ssize_type n) {
  265. assert(length_ >= n);
  266. length_ -= n;
  267. }
  268. // returns {-1, 0, 1}
  269. int compare(StringPiece x) const {
  270. const stringpiece_ssize_type min_size =
  271. length_ < x.length_ ? length_ : x.length_;
  272. int r = memcmp(ptr_, x.ptr_, static_cast<size_t>(min_size));
  273. if (r < 0) return -1;
  274. if (r > 0) return 1;
  275. if (length_ < x.length_) return -1;
  276. if (length_ > x.length_) return 1;
  277. return 0;
  278. }
  279. string as_string() const {
  280. return ToString();
  281. }
  282. // We also define ToString() here, since many other string-like
  283. // interfaces name the routine that converts to a C++ string
  284. // "ToString", and it's confusing to have the method that does that
  285. // for a StringPiece be called "as_string()". We also leave the
  286. // "as_string()" method defined here for existing code.
  287. string ToString() const {
  288. if (ptr_ == NULL) return string();
  289. return string(data(), static_cast<size_type>(size()));
  290. }
  291. operator string() const {
  292. return ToString();
  293. }
  294. void CopyToString(string* target) const;
  295. void AppendToString(string* target) const;
  296. bool starts_with(StringPiece x) const {
  297. return (length_ >= x.length_) &&
  298. (memcmp(ptr_, x.ptr_, static_cast<size_t>(x.length_)) == 0);
  299. }
  300. bool ends_with(StringPiece x) const {
  301. return ((length_ >= x.length_) &&
  302. (memcmp(ptr_ + (length_-x.length_), x.ptr_,
  303. static_cast<size_t>(x.length_)) == 0));
  304. }
  305. // Checks whether StringPiece starts with x and if so advances the beginning
  306. // of it to past the match. It's basically a shortcut for starts_with
  307. // followed by remove_prefix.
  308. bool Consume(StringPiece x);
  309. // Like above but for the end of the string.
  310. bool ConsumeFromEnd(StringPiece x);
  311. // standard STL container boilerplate
  312. typedef char value_type;
  313. typedef const char* pointer;
  314. typedef const char& reference;
  315. typedef const char& const_reference;
  316. typedef size_t size_type;
  317. typedef ptrdiff_t difference_type;
  318. static const size_type npos;
  319. typedef const char* const_iterator;
  320. typedef const char* iterator;
  321. typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
  322. typedef std::reverse_iterator<iterator> reverse_iterator;
  323. iterator begin() const { return ptr_; }
  324. iterator end() const { return ptr_ + length_; }
  325. const_reverse_iterator rbegin() const {
  326. return const_reverse_iterator(ptr_ + length_);
  327. }
  328. const_reverse_iterator rend() const {
  329. return const_reverse_iterator(ptr_);
  330. }
  331. stringpiece_ssize_type max_size() const { return length_; }
  332. stringpiece_ssize_type capacity() const { return length_; }
  333. // cpplint.py emits a false positive [build/include_what_you_use]
  334. stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT
  335. bool contains(StringPiece s) const;
  336. stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const;
  337. stringpiece_ssize_type find(char c, size_type pos = 0) const;
  338. stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const;
  339. stringpiece_ssize_type rfind(char c, size_type pos = npos) const;
  340. stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const;
  341. stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const {
  342. return find(c, pos);
  343. }
  344. stringpiece_ssize_type find_first_not_of(StringPiece s,
  345. size_type pos = 0) const;
  346. stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const;
  347. stringpiece_ssize_type find_last_of(StringPiece s,
  348. size_type pos = npos) const;
  349. stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const {
  350. return rfind(c, pos);
  351. }
  352. stringpiece_ssize_type find_last_not_of(StringPiece s,
  353. size_type pos = npos) const;
  354. stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const;
  355. StringPiece substr(size_type pos, size_type n = npos) const;
  356. };
  357. // This large function is defined inline so that in a fairly common case where
  358. // one of the arguments is a literal, the compiler can elide a lot of the
  359. // following comparisons.
  360. inline bool operator==(StringPiece x, StringPiece y) {
  361. stringpiece_ssize_type len = x.size();
  362. if (len != y.size()) {
  363. return false;
  364. }
  365. return x.data() == y.data() || len <= 0 ||
  366. memcmp(x.data(), y.data(), static_cast<size_t>(len)) == 0;
  367. }
  368. inline bool operator!=(StringPiece x, StringPiece y) {
  369. return !(x == y);
  370. }
  371. inline bool operator<(StringPiece x, StringPiece y) {
  372. const stringpiece_ssize_type min_size =
  373. x.size() < y.size() ? x.size() : y.size();
  374. const int r = memcmp(x.data(), y.data(), static_cast<size_t>(min_size));
  375. return (r < 0) || (r == 0 && x.size() < y.size());
  376. }
  377. inline bool operator>(StringPiece x, StringPiece y) {
  378. return y < x;
  379. }
  380. inline bool operator<=(StringPiece x, StringPiece y) {
  381. return !(x > y);
  382. }
  383. inline bool operator>=(StringPiece x, StringPiece y) {
  384. return !(x < y);
  385. }
  386. // allow StringPiece to be logged
  387. extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
  388. namespace internal {
  389. // StringPiece is not a POD and can not be used in an union (pre C++11). We
  390. // need a POD version of it.
  391. struct StringPiecePod {
  392. // Create from a StringPiece.
  393. static StringPiecePod CreateFromStringPiece(StringPiece str) {
  394. StringPiecePod pod;
  395. pod.data_ = str.data();
  396. pod.size_ = str.size();
  397. return pod;
  398. }
  399. // Cast to StringPiece.
  400. operator StringPiece() const { return StringPiece(data_, size_); }
  401. bool operator==(const char* value) const {
  402. return StringPiece(data_, size_) == StringPiece(value);
  403. }
  404. char operator[](stringpiece_ssize_type i) const {
  405. assert(0 <= i);
  406. assert(i < size_);
  407. return data_[i];
  408. }
  409. const char* data() const { return data_; }
  410. stringpiece_ssize_type size() const {
  411. return size_;
  412. }
  413. std::string ToString() const {
  414. return std::string(data_, static_cast<size_t>(size_));
  415. }
  416. private:
  417. const char* data_;
  418. stringpiece_ssize_type size_;
  419. };
  420. } // namespace internal
  421. } // namespace protobuf
  422. } // namespace google
  423. GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START
  424. template<> struct hash<StringPiece> {
  425. size_t operator()(const StringPiece& s) const {
  426. size_t result = 0;
  427. for (const char *str = s.data(), *end = str + s.size(); str < end; str++) {
  428. result = 5 * result + static_cast<size_t>(*str);
  429. }
  430. return result;
  431. }
  432. };
  433. GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END
  434. #endif // STRINGS_STRINGPIECE_H_