json_stream_parser.h 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
  31. #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
  32. #include <stack>
  33. #include <string>
  34. #include <google/protobuf/stubs/common.h>
  35. #include <google/protobuf/stubs/stringpiece.h>
  36. #include <google/protobuf/stubs/status.h>
  37. namespace google {
  38. namespace util {
  39. class Status;
  40. } // namespace util
  41. namespace protobuf {
  42. namespace util {
  43. namespace converter {
  44. class ObjectWriter;
  45. // A JSON parser that can parse a stream of JSON chunks rather than needing the
  46. // entire JSON string up front. It is a modified version of the parser in
  47. // //net/proto/json/json-parser.h that has been changed in the following ways:
  48. // - Changed from recursion to an explicit stack to allow resumption
  49. // - Added support for int64 and uint64 numbers
  50. // - Removed support for octal and decimal escapes
  51. // - Removed support for numeric keys
  52. // - Removed support for functions (javascript)
  53. // - Removed some lax-comma support (but kept trailing comma support)
  54. // - Writes directly to an ObjectWriter rather than using subclassing
  55. //
  56. // Here is an example usage:
  57. // JsonStreamParser parser(ow_.get());
  58. // util::Status result = parser.Parse(chunk1);
  59. // result.Update(parser.Parse(chunk2));
  60. // result.Update(parser.FinishParse());
  61. // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
  62. //
  63. // This parser is thread-compatible as long as only one thread is calling a
  64. // Parse() method at a time.
  65. class LIBPROTOBUF_EXPORT JsonStreamParser {
  66. public:
  67. // Creates a JsonStreamParser that will write to the given ObjectWriter.
  68. explicit JsonStreamParser(ObjectWriter* ow);
  69. virtual ~JsonStreamParser();
  70. // Parses a UTF-8 encoded JSON string from a StringPiece.
  71. util::Status Parse(StringPiece json);
  72. // Finish parsing the JSON string.
  73. util::Status FinishParse();
  74. private:
  75. enum TokenType {
  76. BEGIN_STRING, // " or '
  77. BEGIN_NUMBER, // - or digit
  78. BEGIN_TRUE, // true
  79. BEGIN_FALSE, // false
  80. BEGIN_NULL, // null
  81. BEGIN_OBJECT, // {
  82. END_OBJECT, // }
  83. BEGIN_ARRAY, // [
  84. END_ARRAY, // ]
  85. ENTRY_SEPARATOR, // :
  86. VALUE_SEPARATOR, // ,
  87. BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit
  88. UNKNOWN // Unknown token or we ran out of the stream.
  89. };
  90. enum ParseType {
  91. VALUE, // Expects a {, [, true, false, null, string or number
  92. OBJ_MID, // Expects a ',' or }
  93. ENTRY, // Expects a key or }
  94. ENTRY_MID, // Expects a :
  95. ARRAY_VALUE, // Expects a value or ]
  96. ARRAY_MID // Expects a ',' or ]
  97. };
  98. // Holds the result of parsing a number
  99. struct NumberResult {
  100. enum Type { DOUBLE, INT, UINT };
  101. Type type;
  102. union {
  103. double double_val;
  104. int64 int_val;
  105. uint64 uint_val;
  106. };
  107. };
  108. // Parses a single chunk of JSON, returning an error if the JSON was invalid.
  109. util::Status ParseChunk(StringPiece json);
  110. // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
  111. // out of data. If we unexpectedly run out of p_ we push the latest back onto
  112. // the stack and return.
  113. util::Status RunParser();
  114. // Parses a value from p_ and writes it to ow_.
  115. // A value may be an object, array, true, false, null, string or number.
  116. util::Status ParseValue(TokenType type);
  117. // Parses a string and writes it out to the ow_.
  118. util::Status ParseString();
  119. // Parses a string, storing the result in parsed_.
  120. util::Status ParseStringHelper();
  121. // This function parses unicode escape sequences in strings. It returns an
  122. // error when there's a parsing error, either the size is not the expected
  123. // size or a character is not a hex digit. When it returns str will contain
  124. // what has been successfully parsed so far.
  125. util::Status ParseUnicodeEscape();
  126. // Expects p_ to point to a JSON number, writes the number to the writer using
  127. // the appropriate Render method based on the type of number.
  128. util::Status ParseNumber();
  129. // Parse a number into a NumberResult, reporting an error if no number could
  130. // be parsed. This method will try to parse into a uint64, int64, or double
  131. // based on whether the number was positive or negative or had a decimal
  132. // component.
  133. util::Status ParseNumberHelper(NumberResult* result);
  134. // Parse a number as double into a NumberResult.
  135. util::Status ParseDoubleHelper(const string& number, NumberResult* result);
  136. // Handles a { during parsing of a value.
  137. util::Status HandleBeginObject();
  138. // Parses from the ENTRY state.
  139. util::Status ParseEntry(TokenType type);
  140. // Parses from the ENTRY_MID state.
  141. util::Status ParseEntryMid(TokenType type);
  142. // Parses from the OBJ_MID state.
  143. util::Status ParseObjectMid(TokenType type);
  144. // Handles a [ during parsing of a value.
  145. util::Status HandleBeginArray();
  146. // Parses from the ARRAY_VALUE state.
  147. util::Status ParseArrayValue(TokenType type);
  148. // Parses from the ARRAY_MID state.
  149. util::Status ParseArrayMid(TokenType type);
  150. // Expects p_ to point to an unquoted literal
  151. util::Status ParseTrue();
  152. util::Status ParseFalse();
  153. util::Status ParseNull();
  154. util::Status ParseEmptyNull();
  155. // Whether an empty-null is allowed in the current state.
  156. bool IsEmptyNullAllowed(TokenType type);
  157. // Report a failure as a util::Status.
  158. util::Status ReportFailure(StringPiece message);
  159. // Report a failure due to an UNKNOWN token type. We check if we hit the
  160. // end of the stream and if we're finishing or not to detect what type of
  161. // status to return in this case.
  162. util::Status ReportUnknown(StringPiece message);
  163. // Advance p_ past all whitespace or until the end of the string.
  164. void SkipWhitespace();
  165. // Advance p_ one UTF-8 character
  166. void Advance();
  167. // Expects p_ to point to the beginning of a key.
  168. util::Status ParseKey();
  169. // Return the type of the next token at p_.
  170. TokenType GetNextTokenType();
  171. // The object writer to write parse events to.
  172. ObjectWriter* ow_;
  173. // The stack of parsing we still need to do. When the stack runs empty we will
  174. // have parsed a single value from the root (e.g. an object or list).
  175. std::stack<ParseType> stack_;
  176. // Contains any leftover text from a previous chunk that we weren't able to
  177. // fully parse, for example the start of a key or number.
  178. string leftover_;
  179. // The current chunk of JSON being parsed. Primarily used for providing
  180. // context during error reporting.
  181. StringPiece json_;
  182. // A pointer within the current JSON being parsed, used to track location.
  183. StringPiece p_;
  184. // Stores the last key read, as we separate parsing of keys and values.
  185. StringPiece key_;
  186. // Storage for key_ if we need to keep ownership, for example between chunks
  187. // or if the key was unescaped from a JSON string.
  188. string key_storage_;
  189. // True during the FinishParse() call, so we know that any errors are fatal.
  190. // For example an unterminated string will normally result in cancelling and
  191. // trying during the next chunk, but during FinishParse() it is an error.
  192. bool finishing_;
  193. // String we parsed during a call to ParseStringHelper().
  194. StringPiece parsed_;
  195. // Storage for the string we parsed. This may be empty if the string was able
  196. // to be parsed directly from the input.
  197. string parsed_storage_;
  198. // The character that opened the string, either ' or ".
  199. // A value of 0 indicates that string parsing is not in process.
  200. char string_open_;
  201. // Storage for the chunk that are being parsed in ParseChunk().
  202. string chunk_storage_;
  203. // Whether to allow non UTF-8 encoded input and replace invalid code points.
  204. bool coerce_to_utf8_;
  205. // Whether allows empty string represented null array value or object entry
  206. // value.
  207. bool allow_empty_null_;
  208. // Whether allows out-of-range floating point numbers or reject them.
  209. bool loose_float_number_conversion_;
  210. GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser);
  211. };
  212. } // namespace converter
  213. } // namespace util
  214. } // namespace protobuf
  215. } // namespace google
  216. #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__