coded_stream.h 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: kenton@google.com (Kenton Varda)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // This file contains the CodedInputStream and CodedOutputStream classes,
  35. // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
  36. // and allow you to read or write individual pieces of data in various
  37. // formats. In particular, these implement the varint encoding for
  38. // integers, a simple variable-length encoding in which smaller numbers
  39. // take fewer bytes.
  40. //
  41. // Typically these classes will only be used internally by the protocol
  42. // buffer library in order to encode and decode protocol buffers. Clients
  43. // of the library only need to know about this class if they wish to write
  44. // custom message parsing or serialization procedures.
  45. //
  46. // CodedOutputStream example:
  47. // // Write some data to "myfile". First we write a 4-byte "magic number"
  48. // // to identify the file type, then write a length-delimited string. The
  49. // // string is composed of a varint giving the length followed by the raw
  50. // // bytes.
  51. // int fd = open("myfile", O_CREAT | O_WRONLY);
  52. // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
  53. // CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
  54. //
  55. // int magic_number = 1234;
  56. // char text[] = "Hello world!";
  57. // coded_output->WriteLittleEndian32(magic_number);
  58. // coded_output->WriteVarint32(strlen(text));
  59. // coded_output->WriteRaw(text, strlen(text));
  60. //
  61. // delete coded_output;
  62. // delete raw_output;
  63. // close(fd);
  64. //
  65. // CodedInputStream example:
  66. // // Read a file created by the above code.
  67. // int fd = open("myfile", O_RDONLY);
  68. // ZeroCopyInputStream* raw_input = new FileInputStream(fd);
  69. // CodedInputStream coded_input = new CodedInputStream(raw_input);
  70. //
  71. // coded_input->ReadLittleEndian32(&magic_number);
  72. // if (magic_number != 1234) {
  73. // cerr << "File not in expected format." << endl;
  74. // return;
  75. // }
  76. //
  77. // uint32 size;
  78. // coded_input->ReadVarint32(&size);
  79. //
  80. // char* text = new char[size + 1];
  81. // coded_input->ReadRaw(buffer, size);
  82. // text[size] = '\0';
  83. //
  84. // delete coded_input;
  85. // delete raw_input;
  86. // close(fd);
  87. //
  88. // cout << "Text is: " << text << endl;
  89. // delete [] text;
  90. //
  91. // For those who are interested, varint encoding is defined as follows:
  92. //
  93. // The encoding operates on unsigned integers of up to 64 bits in length.
  94. // Each byte of the encoded value has the format:
  95. // * bits 0-6: Seven bits of the number being encoded.
  96. // * bit 7: Zero if this is the last byte in the encoding (in which
  97. // case all remaining bits of the number are zero) or 1 if
  98. // more bytes follow.
  99. // The first byte contains the least-significant 7 bits of the number, the
  100. // second byte (if present) contains the next-least-significant 7 bits,
  101. // and so on. So, the binary number 1011000101011 would be encoded in two
  102. // bytes as "10101011 00101100".
  103. //
  104. // In theory, varint could be used to encode integers of any length.
  105. // However, for practicality we set a limit at 64 bits. The maximum encoded
  106. // length of a number is thus 10 bytes.
  107. #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
  108. #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
  109. #include <assert.h>
  110. #include <atomic>
  111. #include <climits>
  112. #include <string>
  113. #include <utility>
  114. #ifdef _MSC_VER
  115. // Assuming windows is always little-endian.
  116. #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
  117. #define PROTOBUF_LITTLE_ENDIAN 1
  118. #endif
  119. #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
  120. // If MSVC has "/RTCc" set, it will complain about truncating casts at
  121. // runtime. This file contains some intentional truncating casts.
  122. #pragma runtime_checks("c", off)
  123. #endif
  124. #else
  125. #include <sys/param.h> // __BYTE_ORDER
  126. #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
  127. (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
  128. !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
  129. #define PROTOBUF_LITTLE_ENDIAN 1
  130. #endif
  131. #endif
  132. #include <google/protobuf/stubs/common.h>
  133. #include <google/protobuf/stubs/port.h>
  134. #include <google/protobuf/stubs/port.h>
  135. namespace google {
  136. namespace protobuf {
  137. class DescriptorPool;
  138. class MessageFactory;
  139. namespace internal { void MapTestForceDeterministic(); }
  140. namespace io {
  141. // Defined in this file.
  142. class CodedInputStream;
  143. class CodedOutputStream;
  144. // Defined in other files.
  145. class ZeroCopyInputStream; // zero_copy_stream.h
  146. class ZeroCopyOutputStream; // zero_copy_stream.h
  147. // Class which reads and decodes binary data which is composed of varint-
  148. // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
  149. // Most users will not need to deal with CodedInputStream.
  150. //
  151. // Most methods of CodedInputStream that return a bool return false if an
  152. // underlying I/O error occurs or if the data is malformed. Once such a
  153. // failure occurs, the CodedInputStream is broken and is no longer useful.
  154. class LIBPROTOBUF_EXPORT CodedInputStream {
  155. public:
  156. // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
  157. explicit CodedInputStream(ZeroCopyInputStream* input);
  158. // Create a CodedInputStream that reads from the given flat array. This is
  159. // faster than using an ArrayInputStream. PushLimit(size) is implied by
  160. // this constructor.
  161. explicit CodedInputStream(const uint8* buffer, int size);
  162. // Destroy the CodedInputStream and position the underlying
  163. // ZeroCopyInputStream at the first unread byte. If an error occurred while
  164. // reading (causing a method to return false), then the exact position of
  165. // the input stream may be anywhere between the last value that was read
  166. // successfully and the stream's byte limit.
  167. ~CodedInputStream();
  168. // Return true if this CodedInputStream reads from a flat array instead of
  169. // a ZeroCopyInputStream.
  170. inline bool IsFlat() const;
  171. // Skips a number of bytes. Returns false if an underlying read error
  172. // occurs.
  173. inline bool Skip(int count);
  174. // Sets *data to point directly at the unread part of the CodedInputStream's
  175. // underlying buffer, and *size to the size of that buffer, but does not
  176. // advance the stream's current position. This will always either produce
  177. // a non-empty buffer or return false. If the caller consumes any of
  178. // this data, it should then call Skip() to skip over the consumed bytes.
  179. // This may be useful for implementing external fast parsing routines for
  180. // types of data not covered by the CodedInputStream interface.
  181. bool GetDirectBufferPointer(const void** data, int* size);
  182. // Like GetDirectBufferPointer, but this method is inlined, and does not
  183. // attempt to Refresh() if the buffer is currently empty.
  184. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  185. void GetDirectBufferPointerInline(const void** data, int* size);
  186. // Read raw bytes, copying them into the given buffer.
  187. bool ReadRaw(void* buffer, int size);
  188. // Like the above, with inlined optimizations. This should only be used
  189. // by the protobuf implementation.
  190. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  191. bool InternalReadRawInline(void* buffer, int size);
  192. // Like ReadRaw, but reads into a string.
  193. bool ReadString(string* buffer, int size);
  194. // Like the above, with inlined optimizations. This should only be used
  195. // by the protobuf implementation.
  196. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  197. bool InternalReadStringInline(string* buffer, int size);
  198. // Read a 32-bit little-endian integer.
  199. bool ReadLittleEndian32(uint32* value);
  200. // Read a 64-bit little-endian integer.
  201. bool ReadLittleEndian64(uint64* value);
  202. // These methods read from an externally provided buffer. The caller is
  203. // responsible for ensuring that the buffer has sufficient space.
  204. // Read a 32-bit little-endian integer.
  205. static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
  206. uint32* value);
  207. // Read a 64-bit little-endian integer.
  208. static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
  209. uint64* value);
  210. // Read an unsigned integer with Varint encoding, truncating to 32 bits.
  211. // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
  212. // it to uint32, but may be more efficient.
  213. bool ReadVarint32(uint32* value);
  214. // Read an unsigned integer with Varint encoding.
  215. bool ReadVarint64(uint64* value);
  216. // Reads a varint off the wire into an "int". This should be used for reading
  217. // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
  218. //
  219. // The value from the wire is interpreted as unsigned. If its value exceeds
  220. // the representable value of an integer on this platform, instead of
  221. // truncating we return false. Truncating (as performed by ReadVarint32()
  222. // above) is an acceptable approach for fields representing an integer, but
  223. // when we are parsing a size from the wire, truncating the value would result
  224. // in us misparsing the payload.
  225. bool ReadVarintSizeAsInt(int* value);
  226. // Read a tag. This calls ReadVarint32() and returns the result, or returns
  227. // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag
  228. // (but not ReadTagNoLastTag) updates the last tag value, which can be checked
  229. // with LastTagWas().
  230. //
  231. // Always inline because this is only called in one place per parse loop
  232. // but it is called for every iteration of said loop, so it should be fast.
  233. // GCC doesn't want to inline this by default.
  234. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag() {
  235. return last_tag_ = ReadTagNoLastTag();
  236. }
  237. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTagNoLastTag();
  238. // This usually a faster alternative to ReadTag() when cutoff is a manifest
  239. // constant. It does particularly well for cutoff >= 127. The first part
  240. // of the return value is the tag that was read, though it can also be 0 in
  241. // the cases where ReadTag() would return 0. If the second part is true
  242. // then the tag is known to be in [0, cutoff]. If not, the tag either is
  243. // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
  244. // because that can arise in several ways, and for best performance we want
  245. // to avoid an extra "is tag == 0?" check here.)
  246. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  247. std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) {
  248. std::pair<uint32, bool> result = ReadTagWithCutoffNoLastTag(cutoff);
  249. last_tag_ = result.first;
  250. return result;
  251. }
  252. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  253. std::pair<uint32, bool> ReadTagWithCutoffNoLastTag(uint32 cutoff);
  254. // Usually returns true if calling ReadVarint32() now would produce the given
  255. // value. Will always return false if ReadVarint32() would not return the
  256. // given value. If ExpectTag() returns true, it also advances past
  257. // the varint. For best performance, use a compile-time constant as the
  258. // parameter.
  259. // Always inline because this collapses to a small number of instructions
  260. // when given a constant parameter, but GCC doesn't want to inline by default.
  261. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
  262. // Like above, except this reads from the specified buffer. The caller is
  263. // responsible for ensuring that the buffer is large enough to read a varint
  264. // of the expected size. For best performance, use a compile-time constant as
  265. // the expected tag parameter.
  266. //
  267. // Returns a pointer beyond the expected tag if it was found, or NULL if it
  268. // was not.
  269. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  270. static const uint8* ExpectTagFromArray(const uint8* buffer, uint32 expected);
  271. // Usually returns true if no more bytes can be read. Always returns false
  272. // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
  273. // call to LastTagWas() will act as if ReadTag() had been called and returned
  274. // zero, and ConsumedEntireMessage() will return true.
  275. bool ExpectAtEnd();
  276. // If the last call to ReadTag() or ReadTagWithCutoff() returned the given
  277. // value, returns true. Otherwise, returns false.
  278. // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last
  279. // returned value.
  280. //
  281. // This is needed because parsers for some types of embedded messages
  282. // (with field type TYPE_GROUP) don't actually know that they've reached the
  283. // end of a message until they see an ENDGROUP tag, which was actually part
  284. // of the enclosing message. The enclosing message would like to check that
  285. // tag to make sure it had the right number, so it calls LastTagWas() on
  286. // return from the embedded parser to check.
  287. bool LastTagWas(uint32 expected);
  288. void SetLastTag(uint32 tag) { last_tag_ = tag; }
  289. // When parsing message (but NOT a group), this method must be called
  290. // immediately after MergeFromCodedStream() returns (if it returns true)
  291. // to further verify that the message ended in a legitimate way. For
  292. // example, this verifies that parsing did not end on an end-group tag.
  293. // It also checks for some cases where, due to optimizations,
  294. // MergeFromCodedStream() can incorrectly return true.
  295. bool ConsumedEntireMessage();
  296. // Limits ----------------------------------------------------------
  297. // Limits are used when parsing length-delimited embedded messages.
  298. // After the message's length is read, PushLimit() is used to prevent
  299. // the CodedInputStream from reading beyond that length. Once the
  300. // embedded message has been parsed, PopLimit() is called to undo the
  301. // limit.
  302. // Opaque type used with PushLimit() and PopLimit(). Do not modify
  303. // values of this type yourself. The only reason that this isn't a
  304. // struct with private internals is for efficiency.
  305. typedef int Limit;
  306. // Places a limit on the number of bytes that the stream may read,
  307. // starting from the current position. Once the stream hits this limit,
  308. // it will act like the end of the input has been reached until PopLimit()
  309. // is called.
  310. //
  311. // As the names imply, the stream conceptually has a stack of limits. The
  312. // shortest limit on the stack is always enforced, even if it is not the
  313. // top limit.
  314. //
  315. // The value returned by PushLimit() is opaque to the caller, and must
  316. // be passed unchanged to the corresponding call to PopLimit().
  317. Limit PushLimit(int byte_limit);
  318. // Pops the last limit pushed by PushLimit(). The input must be the value
  319. // returned by that call to PushLimit().
  320. void PopLimit(Limit limit);
  321. // Returns the number of bytes left until the nearest limit on the
  322. // stack is hit, or -1 if no limits are in place.
  323. int BytesUntilLimit() const;
  324. // Returns current position relative to the beginning of the input stream.
  325. int CurrentPosition() const;
  326. // Total Bytes Limit -----------------------------------------------
  327. // To prevent malicious users from sending excessively large messages
  328. // and causing memory exhaustion, CodedInputStream imposes a hard limit on
  329. // the total number of bytes it will read.
  330. // Sets the maximum number of bytes that this CodedInputStream will read
  331. // before refusing to continue. To prevent servers from allocating enormous
  332. // amounts of memory to hold parsed messages, the maximum message length
  333. // should be limited to the shortest length that will not harm usability.
  334. // The default limit is INT_MAX (~2GB) and apps should set shorter limits
  335. // if possible. An error will always be printed to stderr if the limit is
  336. // reached.
  337. //
  338. // Note: setting a limit less than the current read position is interpreted
  339. // as a limit on the current position.
  340. //
  341. // This is unrelated to PushLimit()/PopLimit().
  342. void SetTotalBytesLimit(int total_bytes_limit);
  343. PROTOBUF_RUNTIME_DEPRECATED(
  344. "Please use the single parameter version of SetTotalBytesLimit(). The "
  345. "second parameter is ignored.")
  346. void SetTotalBytesLimit(int total_bytes_limit, int) {
  347. SetTotalBytesLimit(total_bytes_limit);
  348. }
  349. // The Total Bytes Limit minus the Current Position, or -1 if the total bytes
  350. // limit is INT_MAX.
  351. int BytesUntilTotalBytesLimit() const;
  352. // Recursion Limit -------------------------------------------------
  353. // To prevent corrupt or malicious messages from causing stack overflows,
  354. // we must keep track of the depth of recursion when parsing embedded
  355. // messages and groups. CodedInputStream keeps track of this because it
  356. // is the only object that is passed down the stack during parsing.
  357. // Sets the maximum recursion depth. The default is 100.
  358. void SetRecursionLimit(int limit);
  359. // Increments the current recursion depth. Returns true if the depth is
  360. // under the limit, false if it has gone over.
  361. bool IncrementRecursionDepth();
  362. // Decrements the recursion depth if possible.
  363. void DecrementRecursionDepth();
  364. // Decrements the recursion depth blindly. This is faster than
  365. // DecrementRecursionDepth(). It should be used only if all previous
  366. // increments to recursion depth were successful.
  367. void UnsafeDecrementRecursionDepth();
  368. // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
  369. // Using this can reduce code size and complexity in some cases. The caller
  370. // is expected to check that the second part of the result is non-negative (to
  371. // bail out if the depth of recursion is too high) and, if all is well, to
  372. // later pass the first part of the result to PopLimit() or similar.
  373. std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
  374. int byte_limit);
  375. // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
  376. Limit ReadLengthAndPushLimit();
  377. // Helper that is equivalent to: {
  378. // bool result = ConsumedEntireMessage();
  379. // PopLimit(limit);
  380. // UnsafeDecrementRecursionDepth();
  381. // return result; }
  382. // Using this can reduce code size and complexity in some cases.
  383. // Do not use unless the current recursion depth is greater than zero.
  384. bool DecrementRecursionDepthAndPopLimit(Limit limit);
  385. // Helper that is equivalent to: {
  386. // bool result = ConsumedEntireMessage();
  387. // PopLimit(limit);
  388. // return result; }
  389. // Using this can reduce code size and complexity in some cases.
  390. bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
  391. // Extension Registry ----------------------------------------------
  392. // ADVANCED USAGE: 99.9% of people can ignore this section.
  393. //
  394. // By default, when parsing extensions, the parser looks for extension
  395. // definitions in the pool which owns the outer message's Descriptor.
  396. // However, you may call SetExtensionRegistry() to provide an alternative
  397. // pool instead. This makes it possible, for example, to parse a message
  398. // using a generated class, but represent some extensions using
  399. // DynamicMessage.
  400. // Set the pool used to look up extensions. Most users do not need to call
  401. // this as the correct pool will be chosen automatically.
  402. //
  403. // WARNING: It is very easy to misuse this. Carefully read the requirements
  404. // below. Do not use this unless you are sure you need it. Almost no one
  405. // does.
  406. //
  407. // Let's say you are parsing a message into message object m, and you want
  408. // to take advantage of SetExtensionRegistry(). You must follow these
  409. // requirements:
  410. //
  411. // The given DescriptorPool must contain m->GetDescriptor(). It is not
  412. // sufficient for it to simply contain a descriptor that has the same name
  413. // and content -- it must be the *exact object*. In other words:
  414. // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
  415. // m->GetDescriptor());
  416. // There are two ways to satisfy this requirement:
  417. // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
  418. // because this is the pool that would be used anyway if you didn't call
  419. // SetExtensionRegistry() at all.
  420. // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
  421. // "underlay". Read the documentation for DescriptorPool for more
  422. // information about underlays.
  423. //
  424. // You must also provide a MessageFactory. This factory will be used to
  425. // construct Message objects representing extensions. The factory's
  426. // GetPrototype() MUST return non-NULL for any Descriptor which can be found
  427. // through the provided pool.
  428. //
  429. // If the provided factory might return instances of protocol-compiler-
  430. // generated (i.e. compiled-in) types, or if the outer message object m is
  431. // a generated type, then the given factory MUST have this property: If
  432. // GetPrototype() is given a Descriptor which resides in
  433. // DescriptorPool::generated_pool(), the factory MUST return the same
  434. // prototype which MessageFactory::generated_factory() would return. That
  435. // is, given a descriptor for a generated type, the factory must return an
  436. // instance of the generated class (NOT DynamicMessage). However, when
  437. // given a descriptor for a type that is NOT in generated_pool, the factory
  438. // is free to return any implementation.
  439. //
  440. // The reason for this requirement is that generated sub-objects may be
  441. // accessed via the standard (non-reflection) extension accessor methods,
  442. // and these methods will down-cast the object to the generated class type.
  443. // If the object is not actually of that type, the results would be undefined.
  444. // On the other hand, if an extension is not compiled in, then there is no
  445. // way the code could end up accessing it via the standard accessors -- the
  446. // only way to access the extension is via reflection. When using reflection,
  447. // DynamicMessage and generated messages are indistinguishable, so it's fine
  448. // if these objects are represented using DynamicMessage.
  449. //
  450. // Using DynamicMessageFactory on which you have called
  451. // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
  452. // above requirement.
  453. //
  454. // If either pool or factory is NULL, both must be NULL.
  455. //
  456. // Note that this feature is ignored when parsing "lite" messages as they do
  457. // not have descriptors.
  458. void SetExtensionRegistry(const DescriptorPool* pool,
  459. MessageFactory* factory);
  460. // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
  461. // has been provided.
  462. const DescriptorPool* GetExtensionPool();
  463. // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
  464. // factory has been provided.
  465. MessageFactory* GetExtensionFactory();
  466. private:
  467. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
  468. const uint8* buffer_;
  469. const uint8* buffer_end_; // pointer to the end of the buffer.
  470. ZeroCopyInputStream* input_;
  471. int total_bytes_read_; // total bytes read from input_, including
  472. // the current buffer
  473. // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
  474. // so that we can BackUp() on destruction.
  475. int overflow_bytes_;
  476. // LastTagWas() stuff.
  477. uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
  478. // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
  479. // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
  480. // reach the end of a message and attempt to read another tag.
  481. bool legitimate_message_end_;
  482. // See EnableAliasing().
  483. bool aliasing_enabled_;
  484. // Limits
  485. Limit current_limit_; // if position = -1, no limit is applied
  486. // For simplicity, if the current buffer crosses a limit (either a normal
  487. // limit created by PushLimit() or the total bytes limit), buffer_size_
  488. // only tracks the number of bytes before that limit. This field
  489. // contains the number of bytes after it. Note that this implies that if
  490. // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
  491. // hit a limit. However, if both are zero, it doesn't necessarily mean
  492. // we aren't at a limit -- the buffer may have ended exactly at the limit.
  493. int buffer_size_after_limit_;
  494. // Maximum number of bytes to read, period. This is unrelated to
  495. // current_limit_. Set using SetTotalBytesLimit().
  496. int total_bytes_limit_;
  497. // Current recursion budget, controlled by IncrementRecursionDepth() and
  498. // similar. Starts at recursion_limit_ and goes down: if this reaches
  499. // -1 we are over budget.
  500. int recursion_budget_;
  501. // Recursion depth limit, set by SetRecursionLimit().
  502. int recursion_limit_;
  503. // See SetExtensionRegistry().
  504. const DescriptorPool* extension_pool_;
  505. MessageFactory* extension_factory_;
  506. // Private member functions.
  507. // Fallback when Skip() goes past the end of the current buffer.
  508. bool SkipFallback(int count, int original_buffer_size);
  509. // Advance the buffer by a given number of bytes.
  510. void Advance(int amount);
  511. // Back up input_ to the current buffer position.
  512. void BackUpInputToCurrentPosition();
  513. // Recomputes the value of buffer_size_after_limit_. Must be called after
  514. // current_limit_ or total_bytes_limit_ changes.
  515. void RecomputeBufferLimits();
  516. // Writes an error message saying that we hit total_bytes_limit_.
  517. void PrintTotalBytesLimitError();
  518. // Called when the buffer runs out to request more data. Implies an
  519. // Advance(BufferSize()).
  520. bool Refresh();
  521. // When parsing varints, we optimize for the common case of small values, and
  522. // then optimize for the case when the varint fits within the current buffer
  523. // piece. The Fallback method is used when we can't use the one-byte
  524. // optimization. The Slow method is yet another fallback when the buffer is
  525. // not large enough. Making the slow path out-of-line speeds up the common
  526. // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
  527. // message crosses multiple buffers. Note: ReadVarint32Fallback() and
  528. // ReadVarint64Fallback() are called frequently and generally not inlined, so
  529. // they have been optimized to avoid "out" parameters. The former returns -1
  530. // if it fails and the uint32 it read otherwise. The latter has a bool
  531. // indicating success or failure as part of its return type.
  532. int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
  533. int ReadVarintSizeAsIntFallback();
  534. std::pair<uint64, bool> ReadVarint64Fallback();
  535. bool ReadVarint32Slow(uint32* value);
  536. bool ReadVarint64Slow(uint64* value);
  537. int ReadVarintSizeAsIntSlow();
  538. bool ReadLittleEndian32Fallback(uint32* value);
  539. bool ReadLittleEndian64Fallback(uint64* value);
  540. // Fallback/slow methods for reading tags. These do not update last_tag_,
  541. // but will set legitimate_message_end_ if we are at the end of the input
  542. // stream.
  543. uint32 ReadTagFallback(uint32 first_byte_or_zero);
  544. uint32 ReadTagSlow();
  545. bool ReadStringFallback(string* buffer, int size);
  546. // Return the size of the buffer.
  547. int BufferSize() const;
  548. static const int kDefaultTotalBytesLimit = INT_MAX;
  549. static int default_recursion_limit_; // 100 by default.
  550. };
  551. // Class which encodes and writes binary data which is composed of varint-
  552. // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
  553. // Most users will not need to deal with CodedOutputStream.
  554. //
  555. // Most methods of CodedOutputStream which return a bool return false if an
  556. // underlying I/O error occurs. Once such a failure occurs, the
  557. // CodedOutputStream is broken and is no longer useful. The Write* methods do
  558. // not return the stream status, but will invalidate the stream if an error
  559. // occurs. The client can probe HadError() to determine the status.
  560. //
  561. // Note that every method of CodedOutputStream which writes some data has
  562. // a corresponding static "ToArray" version. These versions write directly
  563. // to the provided buffer, returning a pointer past the last written byte.
  564. // They require that the buffer has sufficient capacity for the encoded data.
  565. // This allows an optimization where we check if an output stream has enough
  566. // space for an entire message before we start writing and, if there is, we
  567. // call only the ToArray methods to avoid doing bound checks for each
  568. // individual value.
  569. // i.e., in the example above:
  570. //
  571. // CodedOutputStream coded_output = new CodedOutputStream(raw_output);
  572. // int magic_number = 1234;
  573. // char text[] = "Hello world!";
  574. //
  575. // int coded_size = sizeof(magic_number) +
  576. // CodedOutputStream::VarintSize32(strlen(text)) +
  577. // strlen(text);
  578. //
  579. // uint8* buffer =
  580. // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
  581. // if (buffer != NULL) {
  582. // // The output stream has enough space in the buffer: write directly to
  583. // // the array.
  584. // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
  585. // buffer);
  586. // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
  587. // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
  588. // } else {
  589. // // Make bound-checked writes, which will ask the underlying stream for
  590. // // more space as needed.
  591. // coded_output->WriteLittleEndian32(magic_number);
  592. // coded_output->WriteVarint32(strlen(text));
  593. // coded_output->WriteRaw(text, strlen(text));
  594. // }
  595. //
  596. // delete coded_output;
  597. class LIBPROTOBUF_EXPORT CodedOutputStream {
  598. public:
  599. // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
  600. explicit CodedOutputStream(ZeroCopyOutputStream* output);
  601. CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
  602. // Destroy the CodedOutputStream and position the underlying
  603. // ZeroCopyOutputStream immediately after the last byte written.
  604. ~CodedOutputStream();
  605. // Trims any unused space in the underlying buffer so that its size matches
  606. // the number of bytes written by this stream. The underlying buffer will
  607. // automatically be trimmed when this stream is destroyed; this call is only
  608. // necessary if the underlying buffer is accessed *before* the stream is
  609. // destroyed.
  610. void Trim();
  611. // Skips a number of bytes, leaving the bytes unmodified in the underlying
  612. // buffer. Returns false if an underlying write error occurs. This is
  613. // mainly useful with GetDirectBufferPointer().
  614. bool Skip(int count);
  615. // Sets *data to point directly at the unwritten part of the
  616. // CodedOutputStream's underlying buffer, and *size to the size of that
  617. // buffer, but does not advance the stream's current position. This will
  618. // always either produce a non-empty buffer or return false. If the caller
  619. // writes any data to this buffer, it should then call Skip() to skip over
  620. // the consumed bytes. This may be useful for implementing external fast
  621. // serialization routines for types of data not covered by the
  622. // CodedOutputStream interface.
  623. bool GetDirectBufferPointer(void** data, int* size);
  624. // If there are at least "size" bytes available in the current buffer,
  625. // returns a pointer directly into the buffer and advances over these bytes.
  626. // The caller may then write directly into this buffer (e.g. using the
  627. // *ToArray static methods) rather than go through CodedOutputStream. If
  628. // there are not enough bytes available, returns NULL. The return pointer is
  629. // invalidated as soon as any other non-const method of CodedOutputStream
  630. // is called.
  631. inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
  632. // Write raw bytes, copying them from the given buffer.
  633. void WriteRaw(const void* buffer, int size);
  634. // Like WriteRaw() but will try to write aliased data if aliasing is
  635. // turned on.
  636. void WriteRawMaybeAliased(const void* data, int size);
  637. // Like WriteRaw() but writing directly to the target array.
  638. // This is _not_ inlined, as the compiler often optimizes memcpy into inline
  639. // copy loops. Since this gets called by every field with string or bytes
  640. // type, inlining may lead to a significant amount of code bloat, with only a
  641. // minor performance gain.
  642. static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
  643. // Equivalent to WriteRaw(str.data(), str.size()).
  644. void WriteString(const string& str);
  645. // Like WriteString() but writing directly to the target array.
  646. static uint8* WriteStringToArray(const string& str, uint8* target);
  647. // Write the varint-encoded size of str followed by str.
  648. static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
  649. // Instructs the CodedOutputStream to allow the underlying
  650. // ZeroCopyOutputStream to hold pointers to the original structure instead of
  651. // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
  652. // underlying stream does not support aliasing, then enabling it has no
  653. // affect. For now, this only affects the behavior of
  654. // WriteRawMaybeAliased().
  655. //
  656. // NOTE: It is caller's responsibility to ensure that the chunk of memory
  657. // remains live until all of the data has been consumed from the stream.
  658. void EnableAliasing(bool enabled);
  659. // Write a 32-bit little-endian integer.
  660. void WriteLittleEndian32(uint32 value);
  661. // Like WriteLittleEndian32() but writing directly to the target array.
  662. static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
  663. // Write a 64-bit little-endian integer.
  664. void WriteLittleEndian64(uint64 value);
  665. // Like WriteLittleEndian64() but writing directly to the target array.
  666. static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
  667. // Write an unsigned integer with Varint encoding. Writing a 32-bit value
  668. // is equivalent to casting it to uint64 and writing it as a 64-bit value,
  669. // but may be more efficient.
  670. void WriteVarint32(uint32 value);
  671. // Like WriteVarint32() but writing directly to the target array.
  672. static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
  673. // Write an unsigned integer with Varint encoding.
  674. void WriteVarint64(uint64 value);
  675. // Like WriteVarint64() but writing directly to the target array.
  676. static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
  677. // Equivalent to WriteVarint32() except when the value is negative,
  678. // in which case it must be sign-extended to a full 10 bytes.
  679. void WriteVarint32SignExtended(int32 value);
  680. // Like WriteVarint32SignExtended() but writing directly to the target array.
  681. static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
  682. // This is identical to WriteVarint32(), but optimized for writing tags.
  683. // In particular, if the input is a compile-time constant, this method
  684. // compiles down to a couple instructions.
  685. // Always inline because otherwise the aformentioned optimization can't work,
  686. // but GCC by default doesn't want to inline this.
  687. void WriteTag(uint32 value);
  688. // Like WriteTag() but writing directly to the target array.
  689. GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
  690. static uint8* WriteTagToArray(uint32 value, uint8* target);
  691. // Returns the number of bytes needed to encode the given value as a varint.
  692. static size_t VarintSize32(uint32 value);
  693. // Returns the number of bytes needed to encode the given value as a varint.
  694. static size_t VarintSize64(uint64 value);
  695. // If negative, 10 bytes. Otheriwse, same as VarintSize32().
  696. static size_t VarintSize32SignExtended(int32 value);
  697. // Compile-time equivalent of VarintSize32().
  698. template <uint32 Value>
  699. struct StaticVarintSize32 {
  700. static const size_t value =
  701. (Value < (1 << 7))
  702. ? 1
  703. : (Value < (1 << 14))
  704. ? 2
  705. : (Value < (1 << 21))
  706. ? 3
  707. : (Value < (1 << 28))
  708. ? 4
  709. : 5;
  710. };
  711. // Returns the total number of bytes written since this object was created.
  712. inline int ByteCount() const;
  713. // Returns true if there was an underlying I/O error since this object was
  714. // created.
  715. bool HadError() const { return had_error_; }
  716. // Deterministic serialization, if requested, guarantees that for a given
  717. // binary, equal messages will always be serialized to the same bytes. This
  718. // implies:
  719. // . repeated serialization of a message will return the same bytes
  720. // . different processes of the same binary (which may be executing on
  721. // different machines) will serialize equal messages to the same bytes.
  722. //
  723. // Note the deterministic serialization is NOT canonical across languages; it
  724. // is also unstable across different builds with schema changes due to unknown
  725. // fields. Users who need canonical serialization, e.g., persistent storage in
  726. // a canonical form, fingerprinting, etc., should define their own
  727. // canonicalization specification and implement the serializer using
  728. // reflection APIs rather than relying on this API.
  729. //
  730. // If deterministic serialization is requested, the serializer will
  731. // sort map entries by keys in lexicographical order or numerical order.
  732. // (This is an implementation detail and may subject to change.)
  733. //
  734. // There are two ways to determine whether serialization should be
  735. // deterministic for this CodedOutputStream. If SetSerializationDeterministic
  736. // has not yet been called, then the default comes from the global default,
  737. // which is false, until SetDefaultSerializationDeterministic has been called.
  738. // Otherwise, SetSerializationDeterministic has been called, and the last
  739. // value passed to it is all that matters.
  740. void SetSerializationDeterministic(bool value) {
  741. is_serialization_deterministic_ = value;
  742. }
  743. // See above. Also, note that users of this CodedOutputStream may need to
  744. // call IsSerializationDeterministic() to serialize in the intended way. This
  745. // CodedOutputStream cannot enforce a desire for deterministic serialization
  746. // by itself.
  747. bool IsSerializationDeterministic() const {
  748. return is_serialization_deterministic_;
  749. }
  750. static bool IsDefaultSerializationDeterministic() {
  751. return default_serialization_deterministic_.load(std::memory_order_relaxed) != 0;
  752. }
  753. private:
  754. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
  755. ZeroCopyOutputStream* output_;
  756. uint8* buffer_;
  757. int buffer_size_;
  758. int total_bytes_; // Sum of sizes of all buffers seen so far.
  759. bool had_error_; // Whether an error occurred during output.
  760. bool aliasing_enabled_; // See EnableAliasing().
  761. bool is_serialization_deterministic_;
  762. static std::atomic<bool> default_serialization_deterministic_;
  763. // Advance the buffer by a given number of bytes.
  764. void Advance(int amount);
  765. // Called when the buffer runs out to request more data. Implies an
  766. // Advance(buffer_size_).
  767. bool Refresh();
  768. // Like WriteRaw() but may avoid copying if the underlying
  769. // ZeroCopyOutputStream supports it.
  770. void WriteAliasedRaw(const void* buffer, int size);
  771. // If this write might cross the end of the buffer, we compose the bytes first
  772. // then use WriteRaw().
  773. void WriteVarint32SlowPath(uint32 value);
  774. void WriteVarint64SlowPath(uint64 value);
  775. // See above. Other projects may use "friend" to allow them to call this.
  776. // After SetDefaultSerializationDeterministic() completes, all protocol
  777. // buffer serializations will be deterministic by default. Thread safe.
  778. // However, the meaning of "after" is subtle here: to be safe, each thread
  779. // that wants deterministic serialization by default needs to call
  780. // SetDefaultSerializationDeterministic() or ensure on its own that another
  781. // thread has done so.
  782. friend void ::google::protobuf::internal::MapTestForceDeterministic();
  783. static void SetDefaultSerializationDeterministic() {
  784. default_serialization_deterministic_.store(true, std::memory_order_relaxed);
  785. }
  786. };
  787. // inline methods ====================================================
  788. // The vast majority of varints are only one byte. These inline
  789. // methods optimize for that case.
  790. inline bool CodedInputStream::ReadVarint32(uint32* value) {
  791. uint32 v = 0;
  792. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
  793. v = *buffer_;
  794. if (v < 0x80) {
  795. *value = v;
  796. Advance(1);
  797. return true;
  798. }
  799. }
  800. int64 result = ReadVarint32Fallback(v);
  801. *value = static_cast<uint32>(result);
  802. return result >= 0;
  803. }
  804. inline bool CodedInputStream::ReadVarint64(uint64* value) {
  805. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
  806. *value = *buffer_;
  807. Advance(1);
  808. return true;
  809. }
  810. std::pair<uint64, bool> p = ReadVarint64Fallback();
  811. *value = p.first;
  812. return p.second;
  813. }
  814. inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
  815. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
  816. int v = *buffer_;
  817. if (v < 0x80) {
  818. *value = v;
  819. Advance(1);
  820. return true;
  821. }
  822. }
  823. *value = ReadVarintSizeAsIntFallback();
  824. return *value >= 0;
  825. }
  826. // static
  827. inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
  828. const uint8* buffer,
  829. uint32* value) {
  830. #if defined(PROTOBUF_LITTLE_ENDIAN)
  831. memcpy(value, buffer, sizeof(*value));
  832. return buffer + sizeof(*value);
  833. #else
  834. *value = (static_cast<uint32>(buffer[0]) ) |
  835. (static_cast<uint32>(buffer[1]) << 8) |
  836. (static_cast<uint32>(buffer[2]) << 16) |
  837. (static_cast<uint32>(buffer[3]) << 24);
  838. return buffer + sizeof(*value);
  839. #endif
  840. }
  841. // static
  842. inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
  843. const uint8* buffer,
  844. uint64* value) {
  845. #if defined(PROTOBUF_LITTLE_ENDIAN)
  846. memcpy(value, buffer, sizeof(*value));
  847. return buffer + sizeof(*value);
  848. #else
  849. uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
  850. (static_cast<uint32>(buffer[1]) << 8) |
  851. (static_cast<uint32>(buffer[2]) << 16) |
  852. (static_cast<uint32>(buffer[3]) << 24);
  853. uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
  854. (static_cast<uint32>(buffer[5]) << 8) |
  855. (static_cast<uint32>(buffer[6]) << 16) |
  856. (static_cast<uint32>(buffer[7]) << 24);
  857. *value = static_cast<uint64>(part0) |
  858. (static_cast<uint64>(part1) << 32);
  859. return buffer + sizeof(*value);
  860. #endif
  861. }
  862. inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
  863. #if defined(PROTOBUF_LITTLE_ENDIAN)
  864. if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
  865. buffer_ = ReadLittleEndian32FromArray(buffer_, value);
  866. return true;
  867. } else {
  868. return ReadLittleEndian32Fallback(value);
  869. }
  870. #else
  871. return ReadLittleEndian32Fallback(value);
  872. #endif
  873. }
  874. inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
  875. #if defined(PROTOBUF_LITTLE_ENDIAN)
  876. if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
  877. buffer_ = ReadLittleEndian64FromArray(buffer_, value);
  878. return true;
  879. } else {
  880. return ReadLittleEndian64Fallback(value);
  881. }
  882. #else
  883. return ReadLittleEndian64Fallback(value);
  884. #endif
  885. }
  886. inline uint32 CodedInputStream::ReadTagNoLastTag() {
  887. uint32 v = 0;
  888. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
  889. v = *buffer_;
  890. if (v < 0x80) {
  891. Advance(1);
  892. return v;
  893. }
  894. }
  895. v = ReadTagFallback(v);
  896. return v;
  897. }
  898. inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoffNoLastTag(
  899. uint32 cutoff) {
  900. // In performance-sensitive code we can expect cutoff to be a compile-time
  901. // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
  902. // compile time.
  903. uint32 first_byte_or_zero = 0;
  904. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
  905. // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
  906. // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
  907. // is large enough then is it better to check for the two-byte case first?
  908. first_byte_or_zero = buffer_[0];
  909. if (static_cast<int8>(buffer_[0]) > 0) {
  910. const uint32 kMax1ByteVarint = 0x7f;
  911. uint32 tag = buffer_[0];
  912. Advance(1);
  913. return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
  914. }
  915. // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
  916. // and tag is two bytes. The latter is tested by bitwise-and-not of the
  917. // first byte and the second byte.
  918. if (cutoff >= 0x80 && GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
  919. GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
  920. const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
  921. uint32 tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
  922. Advance(2);
  923. // It might make sense to test for tag == 0 now, but it is so rare that
  924. // that we don't bother. A varint-encoded 0 should be one byte unless
  925. // the encoder lost its mind. The second part of the return value of
  926. // this function is allowed to be either true or false if the tag is 0,
  927. // so we don't have to check for tag == 0. We may need to check whether
  928. // it exceeds cutoff.
  929. bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
  930. return std::make_pair(tag, at_or_below_cutoff);
  931. }
  932. }
  933. // Slow path
  934. const uint32 tag = ReadTagFallback(first_byte_or_zero);
  935. return std::make_pair(tag, static_cast<uint32>(tag - 1) < cutoff);
  936. }
  937. inline bool CodedInputStream::LastTagWas(uint32 expected) {
  938. return last_tag_ == expected;
  939. }
  940. inline bool CodedInputStream::ConsumedEntireMessage() {
  941. return legitimate_message_end_;
  942. }
  943. inline bool CodedInputStream::ExpectTag(uint32 expected) {
  944. if (expected < (1 << 7)) {
  945. if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
  946. Advance(1);
  947. return true;
  948. } else {
  949. return false;
  950. }
  951. } else if (expected < (1 << 14)) {
  952. if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
  953. buffer_[0] == static_cast<uint8>(expected | 0x80) &&
  954. buffer_[1] == static_cast<uint8>(expected >> 7)) {
  955. Advance(2);
  956. return true;
  957. } else {
  958. return false;
  959. }
  960. } else {
  961. // Don't bother optimizing for larger values.
  962. return false;
  963. }
  964. }
  965. inline const uint8* CodedInputStream::ExpectTagFromArray(
  966. const uint8* buffer, uint32 expected) {
  967. if (expected < (1 << 7)) {
  968. if (buffer[0] == expected) {
  969. return buffer + 1;
  970. }
  971. } else if (expected < (1 << 14)) {
  972. if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
  973. buffer[1] == static_cast<uint8>(expected >> 7)) {
  974. return buffer + 2;
  975. }
  976. }
  977. return NULL;
  978. }
  979. inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
  980. int* size) {
  981. *data = buffer_;
  982. *size = static_cast<int>(buffer_end_ - buffer_);
  983. }
  984. inline bool CodedInputStream::ExpectAtEnd() {
  985. // If we are at a limit we know no more bytes can be read. Otherwise, it's
  986. // hard to say without calling Refresh(), and we'd rather not do that.
  987. if (buffer_ == buffer_end_ &&
  988. ((buffer_size_after_limit_ != 0) ||
  989. (total_bytes_read_ == current_limit_))) {
  990. last_tag_ = 0; // Pretend we called ReadTag()...
  991. legitimate_message_end_ = true; // ... and it hit EOF.
  992. return true;
  993. } else {
  994. return false;
  995. }
  996. }
  997. inline int CodedInputStream::CurrentPosition() const {
  998. return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
  999. }
  1000. inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
  1001. if (buffer_size_ < size) {
  1002. return NULL;
  1003. } else {
  1004. uint8* result = buffer_;
  1005. Advance(size);
  1006. return result;
  1007. }
  1008. }
  1009. inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
  1010. uint8* target) {
  1011. while (value >= 0x80) {
  1012. *target = static_cast<uint8>(value | 0x80);
  1013. value >>= 7;
  1014. ++target;
  1015. }
  1016. *target = static_cast<uint8>(value);
  1017. return target + 1;
  1018. }
  1019. inline uint8* CodedOutputStream::WriteVarint64ToArray(uint64 value,
  1020. uint8* target) {
  1021. while (value >= 0x80) {
  1022. *target = static_cast<uint8>(value | 0x80);
  1023. value >>= 7;
  1024. ++target;
  1025. }
  1026. *target = static_cast<uint8>(value);
  1027. return target + 1;
  1028. }
  1029. inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
  1030. WriteVarint64(static_cast<uint64>(value));
  1031. }
  1032. inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
  1033. int32 value, uint8* target) {
  1034. return WriteVarint64ToArray(static_cast<uint64>(value), target);
  1035. }
  1036. inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
  1037. uint8* target) {
  1038. #if defined(PROTOBUF_LITTLE_ENDIAN)
  1039. memcpy(target, &value, sizeof(value));
  1040. #else
  1041. target[0] = static_cast<uint8>(value);
  1042. target[1] = static_cast<uint8>(value >> 8);
  1043. target[2] = static_cast<uint8>(value >> 16);
  1044. target[3] = static_cast<uint8>(value >> 24);
  1045. #endif
  1046. return target + sizeof(value);
  1047. }
  1048. inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
  1049. uint8* target) {
  1050. #if defined(PROTOBUF_LITTLE_ENDIAN)
  1051. memcpy(target, &value, sizeof(value));
  1052. #else
  1053. uint32 part0 = static_cast<uint32>(value);
  1054. uint32 part1 = static_cast<uint32>(value >> 32);
  1055. target[0] = static_cast<uint8>(part0);
  1056. target[1] = static_cast<uint8>(part0 >> 8);
  1057. target[2] = static_cast<uint8>(part0 >> 16);
  1058. target[3] = static_cast<uint8>(part0 >> 24);
  1059. target[4] = static_cast<uint8>(part1);
  1060. target[5] = static_cast<uint8>(part1 >> 8);
  1061. target[6] = static_cast<uint8>(part1 >> 16);
  1062. target[7] = static_cast<uint8>(part1 >> 24);
  1063. #endif
  1064. return target + sizeof(value);
  1065. }
  1066. inline void CodedOutputStream::WriteVarint32(uint32 value) {
  1067. if (buffer_size_ >= 5) {
  1068. // Fast path: We have enough bytes left in the buffer to guarantee that
  1069. // this write won't cross the end, so we can skip the checks.
  1070. uint8* target = buffer_;
  1071. uint8* end = WriteVarint32ToArray(value, target);
  1072. int size = static_cast<int>(end - target);
  1073. Advance(size);
  1074. } else {
  1075. WriteVarint32SlowPath(value);
  1076. }
  1077. }
  1078. inline void CodedOutputStream::WriteVarint64(uint64 value) {
  1079. if (buffer_size_ >= 10) {
  1080. // Fast path: We have enough bytes left in the buffer to guarantee that
  1081. // this write won't cross the end, so we can skip the checks.
  1082. uint8* target = buffer_;
  1083. uint8* end = WriteVarint64ToArray(value, target);
  1084. int size = static_cast<int>(end - target);
  1085. Advance(size);
  1086. } else {
  1087. WriteVarint64SlowPath(value);
  1088. }
  1089. }
  1090. inline void CodedOutputStream::WriteTag(uint32 value) {
  1091. WriteVarint32(value);
  1092. }
  1093. inline uint8* CodedOutputStream::WriteTagToArray(
  1094. uint32 value, uint8* target) {
  1095. return WriteVarint32ToArray(value, target);
  1096. }
  1097. inline size_t CodedOutputStream::VarintSize32(uint32 value) {
  1098. // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
  1099. // Use an explicit multiplication to implement the divide of
  1100. // a number in the 1..31 range.
  1101. // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
  1102. // undefined.
  1103. uint32 log2value = Bits::Log2FloorNonZero(value | 0x1);
  1104. return static_cast<size_t>((log2value * 9 + 73) / 64);
  1105. }
  1106. inline size_t CodedOutputStream::VarintSize64(uint64 value) {
  1107. // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
  1108. // Use an explicit multiplication to implement the divide of
  1109. // a number in the 1..63 range.
  1110. // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
  1111. // undefined.
  1112. uint32 log2value = Bits::Log2FloorNonZero64(value | 0x1);
  1113. return static_cast<size_t>((log2value * 9 + 73) / 64);
  1114. }
  1115. inline size_t CodedOutputStream::VarintSize32SignExtended(int32 value) {
  1116. if (value < 0) {
  1117. return 10; // TODO(kenton): Make this a symbolic constant.
  1118. } else {
  1119. return VarintSize32(static_cast<uint32>(value));
  1120. }
  1121. }
  1122. inline void CodedOutputStream::WriteString(const string& str) {
  1123. WriteRaw(str.data(), static_cast<int>(str.size()));
  1124. }
  1125. inline void CodedOutputStream::WriteRawMaybeAliased(
  1126. const void* data, int size) {
  1127. if (aliasing_enabled_) {
  1128. WriteAliasedRaw(data, size);
  1129. } else {
  1130. WriteRaw(data, size);
  1131. }
  1132. }
  1133. inline uint8* CodedOutputStream::WriteStringToArray(
  1134. const string& str, uint8* target) {
  1135. return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
  1136. }
  1137. inline int CodedOutputStream::ByteCount() const {
  1138. return total_bytes_ - buffer_size_;
  1139. }
  1140. inline void CodedInputStream::Advance(int amount) {
  1141. buffer_ += amount;
  1142. }
  1143. inline void CodedOutputStream::Advance(int amount) {
  1144. buffer_ += amount;
  1145. buffer_size_ -= amount;
  1146. }
  1147. inline void CodedInputStream::SetRecursionLimit(int limit) {
  1148. recursion_budget_ += limit - recursion_limit_;
  1149. recursion_limit_ = limit;
  1150. }
  1151. inline bool CodedInputStream::IncrementRecursionDepth() {
  1152. --recursion_budget_;
  1153. return recursion_budget_ >= 0;
  1154. }
  1155. inline void CodedInputStream::DecrementRecursionDepth() {
  1156. if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
  1157. }
  1158. inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
  1159. assert(recursion_budget_ < recursion_limit_);
  1160. ++recursion_budget_;
  1161. }
  1162. inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
  1163. MessageFactory* factory) {
  1164. extension_pool_ = pool;
  1165. extension_factory_ = factory;
  1166. }
  1167. inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
  1168. return extension_pool_;
  1169. }
  1170. inline MessageFactory* CodedInputStream::GetExtensionFactory() {
  1171. return extension_factory_;
  1172. }
  1173. inline int CodedInputStream::BufferSize() const {
  1174. return static_cast<int>(buffer_end_ - buffer_);
  1175. }
  1176. inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
  1177. : buffer_(NULL),
  1178. buffer_end_(NULL),
  1179. input_(input),
  1180. total_bytes_read_(0),
  1181. overflow_bytes_(0),
  1182. last_tag_(0),
  1183. legitimate_message_end_(false),
  1184. aliasing_enabled_(false),
  1185. current_limit_(kint32max),
  1186. buffer_size_after_limit_(0),
  1187. total_bytes_limit_(kDefaultTotalBytesLimit),
  1188. recursion_budget_(default_recursion_limit_),
  1189. recursion_limit_(default_recursion_limit_),
  1190. extension_pool_(NULL),
  1191. extension_factory_(NULL) {
  1192. // Eagerly Refresh() so buffer space is immediately available.
  1193. Refresh();
  1194. }
  1195. inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
  1196. : buffer_(buffer),
  1197. buffer_end_(buffer + size),
  1198. input_(NULL),
  1199. total_bytes_read_(size),
  1200. overflow_bytes_(0),
  1201. last_tag_(0),
  1202. legitimate_message_end_(false),
  1203. aliasing_enabled_(false),
  1204. current_limit_(size),
  1205. buffer_size_after_limit_(0),
  1206. total_bytes_limit_(kDefaultTotalBytesLimit),
  1207. recursion_budget_(default_recursion_limit_),
  1208. recursion_limit_(default_recursion_limit_),
  1209. extension_pool_(NULL),
  1210. extension_factory_(NULL) {
  1211. // Note that setting current_limit_ == size is important to prevent some
  1212. // code paths from trying to access input_ and segfaulting.
  1213. }
  1214. inline bool CodedInputStream::IsFlat() const {
  1215. return input_ == NULL;
  1216. }
  1217. inline bool CodedInputStream::Skip(int count) {
  1218. if (count < 0) return false; // security: count is often user-supplied
  1219. const int original_buffer_size = BufferSize();
  1220. if (count <= original_buffer_size) {
  1221. // Just skipping within the current buffer. Easy.
  1222. Advance(count);
  1223. return true;
  1224. }
  1225. return SkipFallback(count, original_buffer_size);
  1226. }
  1227. } // namespace io
  1228. } // namespace protobuf
  1229. #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
  1230. #pragma runtime_checks("c", restore)
  1231. #endif // _MSC_VER && !defined(__INTEL_COMPILER)
  1232. } // namespace google
  1233. #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__