| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861 |
- // Protocol Buffers - Google's data interchange format
- // Copyright 2008 Google Inc. All rights reserved.
- // https://developers.google.com/protocol-buffers/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #include <google/protobuf/util/internal/json_stream_parser.h>
- #include <algorithm>
- #include <cctype>
- #include <cerrno>
- #include <cstdlib>
- #include <cstring>
- #include <memory>
- #include <google/protobuf/stubs/logging.h>
- #include <google/protobuf/stubs/common.h>
- #include <google/protobuf/util/internal/object_writer.h>
- #include <google/protobuf/util/internal/json_escaping.h>
- #include <google/protobuf/stubs/strutil.h>
- #include <google/protobuf/stubs/mathlimits.h>
- namespace google {
- namespace protobuf {
- namespace util {
- // Allow these symbols to be referenced as util::Status, util::error::* in
- // this file.
- using util::Status;
- namespace error {
- using util::error::CANCELLED;
- using util::error::INTERNAL;
- using util::error::INVALID_ARGUMENT;
- } // namespace error
- namespace converter {
- // Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X)
- static const int kUnicodeEscapedLength = 6;
- // Length of the true, false, and null literals.
- static const int true_len = strlen("true");
- static const int false_len = strlen("false");
- static const int null_len = strlen("null");
- inline bool IsLetter(char c) {
- return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') ||
- (c == '$');
- }
- inline bool IsAlphanumeric(char c) {
- return IsLetter(c) || ('0' <= c && c <= '9');
- }
- static bool ConsumeKey(StringPiece* input, StringPiece* key) {
- if (input->empty() || !IsLetter((*input)[0])) return false;
- int len = 1;
- for (; len < input->size(); ++len) {
- if (!IsAlphanumeric((*input)[len])) {
- break;
- }
- }
- *key = StringPiece(input->data(), len);
- *input = StringPiece(input->data() + len, input->size() - len);
- return true;
- }
- static bool MatchKey(StringPiece input) {
- return !input.empty() && IsLetter(input[0]);
- }
- JsonStreamParser::JsonStreamParser(ObjectWriter* ow)
- : ow_(ow),
- stack_(),
- leftover_(),
- json_(),
- p_(),
- key_(),
- key_storage_(),
- finishing_(false),
- parsed_(),
- parsed_storage_(),
- string_open_(0),
- chunk_storage_(),
- coerce_to_utf8_(false),
- allow_empty_null_(false),
- loose_float_number_conversion_(false) {
- // Initialize the stack with a single value to be parsed.
- stack_.push(VALUE);
- }
- JsonStreamParser::~JsonStreamParser() {}
- util::Status JsonStreamParser::Parse(StringPiece json) {
- StringPiece chunk = json;
- // If we have leftovers from a previous chunk, append the new chunk to it
- // and create a new StringPiece pointing at the string's data. This could
- // be large but we rely on the chunks to be small, assuming they are
- // fragments of a Cord.
- if (!leftover_.empty()) {
- // Don't point chunk to leftover_ because leftover_ will be updated in
- // ParseChunk(chunk).
- chunk_storage_.swap(leftover_);
- StrAppend(&chunk_storage_, json);
- chunk = StringPiece(chunk_storage_);
- }
- // Find the structurally valid UTF8 prefix and parse only that.
- int n = internal::UTF8SpnStructurallyValid(chunk);
- if (n > 0) {
- util::Status status = ParseChunk(chunk.substr(0, n));
- // Any leftover characters are stashed in leftover_ for later parsing when
- // there is more data available.
- StrAppend(&leftover_, chunk.substr(n));
- return status;
- } else {
- leftover_.assign(chunk.data(), chunk.size());
- return util::Status();
- }
- }
- util::Status JsonStreamParser::FinishParse() {
- // If we do not expect anything and there is nothing left to parse we're all
- // done.
- if (stack_.empty() && leftover_.empty()) {
- return util::Status();
- }
- // Storage for UTF8-coerced string.
- std::unique_ptr<char[]> utf8;
- if (coerce_to_utf8_) {
- utf8.reset(new char[leftover_.size()]);
- char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' ');
- p_ = json_ = StringPiece(coerced, leftover_.size());
- } else {
- p_ = json_ = leftover_;
- if (!internal::IsStructurallyValidUTF8(leftover_)) {
- return ReportFailure("Encountered non UTF-8 code points.");
- }
- }
- // Parse the remainder in finishing mode, which reports errors for things like
- // unterminated strings or unknown tokens that would normally be retried.
- finishing_ = true;
- util::Status result = RunParser();
- if (result.ok()) {
- SkipWhitespace();
- if (!p_.empty()) {
- result = ReportFailure("Parsing terminated before end of input.");
- }
- }
- return result;
- }
- util::Status JsonStreamParser::ParseChunk(StringPiece chunk) {
- // Do not do any work if the chunk is empty.
- if (chunk.empty()) return util::Status();
- p_ = json_ = chunk;
- finishing_ = false;
- util::Status result = RunParser();
- if (!result.ok()) return result;
- SkipWhitespace();
- if (p_.empty()) {
- // If we parsed everything we had, clear the leftover.
- leftover_.clear();
- } else {
- // If we do not expect anything i.e. stack is empty, and we have non-empty
- // string left to parse, we report an error.
- if (stack_.empty()) {
- return ReportFailure("Parsing terminated before end of input.");
- }
- // If we expect future data i.e. stack is non-empty, and we have some
- // unparsed data left, we save it for later parse.
- leftover_ = p_.ToString();
- }
- return util::Status();
- }
- util::Status JsonStreamParser::RunParser() {
- while (!stack_.empty()) {
- ParseType type = stack_.top();
- TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING;
- stack_.pop();
- util::Status result;
- switch (type) {
- case VALUE:
- result = ParseValue(t);
- break;
- case OBJ_MID:
- result = ParseObjectMid(t);
- break;
- case ENTRY:
- result = ParseEntry(t);
- break;
- case ENTRY_MID:
- result = ParseEntryMid(t);
- break;
- case ARRAY_VALUE:
- result = ParseArrayValue(t);
- break;
- case ARRAY_MID:
- result = ParseArrayMid(t);
- break;
- default:
- result = util::Status(util::error::INTERNAL,
- StrCat("Unknown parse type: ", type));
- break;
- }
- if (!result.ok()) {
- // If we were cancelled, save our state and try again later.
- if (!finishing_ && result == util::Status(error::CANCELLED, "")) {
- stack_.push(type);
- // If we have a key we still need to render, make sure to save off the
- // contents in our own storage.
- if (!key_.empty() && key_storage_.empty()) {
- StrAppend(&key_storage_, key_);
- key_ = StringPiece(key_storage_);
- }
- result = util::Status();
- }
- return result;
- }
- }
- return util::Status();
- }
- util::Status JsonStreamParser::ParseValue(TokenType type) {
- switch (type) {
- case BEGIN_OBJECT:
- return HandleBeginObject();
- case BEGIN_ARRAY:
- return HandleBeginArray();
- case BEGIN_STRING:
- return ParseString();
- case BEGIN_NUMBER:
- return ParseNumber();
- case BEGIN_TRUE:
- return ParseTrue();
- case BEGIN_FALSE:
- return ParseFalse();
- case BEGIN_NULL:
- return ParseNull();
- case UNKNOWN:
- return ReportUnknown("Expected a value.");
- default: {
- if (allow_empty_null_ && IsEmptyNullAllowed(type)) {
- return ParseEmptyNull();
- }
- // Special case for having been cut off while parsing, wait for more data.
- // This handles things like 'fals' being at the end of the string, we
- // don't know if the next char would be e, completing it, or something
- // else, making it invalid.
- if (!finishing_ && p_.length() < false_len) {
- return util::Status(error::CANCELLED, "");
- }
- return ReportFailure("Unexpected token.");
- }
- }
- }
- util::Status JsonStreamParser::ParseString() {
- util::Status result = ParseStringHelper();
- if (result.ok()) {
- ow_->RenderString(key_, parsed_);
- key_ = StringPiece();
- parsed_ = StringPiece();
- parsed_storage_.clear();
- }
- return result;
- }
- util::Status JsonStreamParser::ParseStringHelper() {
- // If we haven't seen the start quote, grab it and remember it for later.
- if (string_open_ == 0) {
- string_open_ = *p_.data();
- GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\'');
- Advance();
- }
- // Track where we last copied data from so we can minimize copying.
- const char* last = p_.data();
- while (!p_.empty()) {
- const char* data = p_.data();
- if (*data == '\\') {
- // We're about to handle an escape, copy all bytes from last to data.
- if (last < data) {
- parsed_storage_.append(last, data - last);
- }
- // If we ran out of string after the \, cancel or report an error
- // depending on if we expect more data later.
- if (p_.length() == 1) {
- if (!finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- return ReportFailure("Closing quote expected in string.");
- }
- // Parse a unicode escape if we found \u in the string.
- if (data[1] == 'u') {
- util::Status result = ParseUnicodeEscape();
- if (!result.ok()) {
- return result;
- }
- // Move last pointer past the unicode escape and continue.
- last = p_.data();
- continue;
- }
- // Handle the standard set of backslash-escaped characters.
- switch (data[1]) {
- case 'b':
- parsed_storage_.push_back('\b');
- break;
- case 'f':
- parsed_storage_.push_back('\f');
- break;
- case 'n':
- parsed_storage_.push_back('\n');
- break;
- case 'r':
- parsed_storage_.push_back('\r');
- break;
- case 't':
- parsed_storage_.push_back('\t');
- break;
- case 'v':
- parsed_storage_.push_back('\v');
- break;
- default:
- parsed_storage_.push_back(data[1]);
- }
- // We handled two characters, so advance past them and continue.
- p_.remove_prefix(2);
- last = p_.data();
- continue;
- }
- // If we found the closing quote note it, advance past it, and return.
- if (*data == string_open_) {
- // If we didn't copy anything, reuse the input buffer.
- if (parsed_storage_.empty()) {
- parsed_ = StringPiece(last, data - last);
- } else {
- if (last < data) {
- parsed_storage_.append(last, data - last);
- }
- parsed_ = StringPiece(parsed_storage_);
- }
- // Clear the quote char so next time we try to parse a string we'll
- // start fresh.
- string_open_ = 0;
- Advance();
- return util::Status();
- }
- // Normal character, just advance past it.
- Advance();
- }
- // If we ran out of characters, copy over what we have so far.
- if (last < p_.data()) {
- parsed_storage_.append(last, p_.data() - last);
- }
- // If we didn't find the closing quote but we expect more data, cancel for now
- if (!finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- // End of string reached without a closing quote, report an error.
- string_open_ = 0;
- return ReportFailure("Closing quote expected in string.");
- }
- // Converts a unicode escaped character to a decimal value stored in a char32
- // for use in UTF8 encoding utility. We assume that str begins with \uhhhh and
- // convert that from the hex number to a decimal value.
- //
- // There are some security exploits with UTF-8 that we should be careful of:
- // - http://www.unicode.org/reports/tr36/#UTF-8_Exploit
- // - http://sites/intl-eng/design-guide/core-application
- util::Status JsonStreamParser::ParseUnicodeEscape() {
- if (p_.length() < kUnicodeEscapedLength) {
- if (!finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- return ReportFailure("Illegal hex string.");
- }
- GOOGLE_DCHECK_EQ('\\', p_.data()[0]);
- GOOGLE_DCHECK_EQ('u', p_.data()[1]);
- uint32 code = 0;
- for (int i = 2; i < kUnicodeEscapedLength; ++i) {
- if (!isxdigit(p_.data()[i])) {
- return ReportFailure("Invalid escape sequence.");
- }
- code = (code << 4) + hex_digit_to_int(p_.data()[i]);
- }
- if (code >= JsonEscaping::kMinHighSurrogate &&
- code <= JsonEscaping::kMaxHighSurrogate) {
- if (p_.length() < 2 * kUnicodeEscapedLength) {
- if (!finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- if (!coerce_to_utf8_) {
- return ReportFailure("Missing low surrogate.");
- }
- } else if (p_.data()[kUnicodeEscapedLength] == '\\' &&
- p_.data()[kUnicodeEscapedLength + 1] == 'u') {
- uint32 low_code = 0;
- for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength;
- ++i) {
- if (!isxdigit(p_.data()[i])) {
- return ReportFailure("Invalid escape sequence.");
- }
- low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]);
- }
- if (low_code >= JsonEscaping::kMinLowSurrogate &&
- low_code <= JsonEscaping::kMaxLowSurrogate) {
- // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint.
- code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) +
- JsonEscaping::kMinSupplementaryCodePoint;
- // Advance past the first code unit escape.
- p_.remove_prefix(kUnicodeEscapedLength);
- } else if (!coerce_to_utf8_) {
- return ReportFailure("Invalid low surrogate.");
- }
- } else if (!coerce_to_utf8_) {
- return ReportFailure("Missing low surrogate.");
- }
- }
- if (!coerce_to_utf8_ && !IsValidCodePoint(code)) {
- return ReportFailure("Invalid unicode code point.");
- }
- char buf[UTFmax];
- int len = EncodeAsUTF8Char(code, buf);
- // Advance past the [final] code unit escape.
- p_.remove_prefix(kUnicodeEscapedLength);
- parsed_storage_.append(buf, len);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseNumber() {
- NumberResult number;
- util::Status result = ParseNumberHelper(&number);
- if (result.ok()) {
- switch (number.type) {
- case NumberResult::DOUBLE:
- ow_->RenderDouble(key_, number.double_val);
- key_ = StringPiece();
- break;
- case NumberResult::INT:
- ow_->RenderInt64(key_, number.int_val);
- key_ = StringPiece();
- break;
- case NumberResult::UINT:
- ow_->RenderUint64(key_, number.uint_val);
- key_ = StringPiece();
- break;
- default:
- return ReportFailure("Unable to parse number.");
- }
- }
- return result;
- }
- util::Status JsonStreamParser::ParseDoubleHelper(
- const string& number, NumberResult* result) {
- if (!safe_strtod(number, &result->double_val)) {
- return ReportFailure("Unable to parse number.");
- }
- if (!loose_float_number_conversion_ &&
- !MathLimits<double>::IsFinite(result->double_val)) {
- return ReportFailure("Number exceeds the range of double.");
- }
- result->type = NumberResult::DOUBLE;
- return util::Status();
- }
- util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) {
- const char* data = p_.data();
- int length = p_.length();
- // Look for the first non-numeric character, or the end of the string.
- int index = 0;
- bool floating = false;
- bool negative = data[index] == '-';
- // Find the first character that cannot be part of the number. Along the way
- // detect if the number needs to be parsed as a double.
- // Note that this restricts numbers to the JSON specification, so for example
- // we do not support hex or octal notations.
- for (; index < length; ++index) {
- char c = data[index];
- if (isdigit(c)) continue;
- if (c == '.' || c == 'e' || c == 'E') {
- floating = true;
- continue;
- }
- if (c == '+' || c == '-' || c == 'x') continue;
- // Not a valid number character, break out.
- break;
- }
- // If the entire input is a valid number, and we may have more content in the
- // future, we abort for now and resume when we know more.
- if (index == length && !finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- // Create a string containing just the number, so we can use safe_strtoX
- string number = p_.substr(0, index).ToString();
- // Floating point number, parse as a double.
- if (floating) {
- util::Status status = ParseDoubleHelper(number, result);
- if (status.ok()) {
- p_.remove_prefix(index);
- }
- return status;
- }
- // Positive non-floating point number, parse as a uint64.
- if (!negative) {
- // Octal/Hex numbers are not valid JSON values.
- if (number.length() >= 2 && number[0] == '0') {
- return ReportFailure("Octal/hex numbers are not valid JSON values.");
- }
- if (safe_strtou64(number, &result->uint_val)) {
- result->type = NumberResult::UINT;
- p_.remove_prefix(index);
- return util::Status();
- } else {
- // If the value is too large, parse it as double.
- util::Status status = ParseDoubleHelper(number, result);
- if (status.ok()) {
- p_.remove_prefix(index);
- }
- return status;
- }
- }
- // Octal/Hex numbers are not valid JSON values.
- if (number.length() >= 3 && number[1] == '0') {
- return ReportFailure("Octal/hex numbers are not valid JSON values.");
- }
- // Negative non-floating point number, parse as an int64.
- if (safe_strto64(number, &result->int_val)) {
- result->type = NumberResult::INT;
- p_.remove_prefix(index);
- return util::Status();
- } else {
- // If the value is too large, parse it as double.
- util::Status status = ParseDoubleHelper(number, result);
- if (status.ok()) {
- p_.remove_prefix(index);
- }
- return status;
- }
- }
- util::Status JsonStreamParser::HandleBeginObject() {
- GOOGLE_DCHECK_EQ('{', *p_.data());
- Advance();
- ow_->StartObject(key_);
- key_ = StringPiece();
- stack_.push(ENTRY);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseObjectMid(TokenType type) {
- if (type == UNKNOWN) {
- return ReportUnknown("Expected , or } after key:value pair.");
- }
- // Object is complete, advance past the comma and render the EndObject.
- if (type == END_OBJECT) {
- Advance();
- ow_->EndObject();
- return util::Status();
- }
- // Found a comma, advance past it and get ready for an entry.
- if (type == VALUE_SEPARATOR) {
- Advance();
- stack_.push(ENTRY);
- return util::Status();
- }
- // Illegal token after key:value pair.
- return ReportFailure("Expected , or } after key:value pair.");
- }
- util::Status JsonStreamParser::ParseEntry(TokenType type) {
- if (type == UNKNOWN) {
- return ReportUnknown("Expected an object key or }.");
- }
- // Close the object and return. This allows for trailing commas.
- if (type == END_OBJECT) {
- ow_->EndObject();
- Advance();
- return util::Status();
- }
- util::Status result;
- if (type == BEGIN_STRING) {
- // Key is a string (standard JSON), parse it and store the string.
- result = ParseStringHelper();
- if (result.ok()) {
- key_storage_.clear();
- if (!parsed_storage_.empty()) {
- parsed_storage_.swap(key_storage_);
- key_ = StringPiece(key_storage_);
- } else {
- key_ = parsed_;
- }
- parsed_ = StringPiece();
- }
- } else if (type == BEGIN_KEY) {
- // Key is a bare key (back compat), create a StringPiece pointing to it.
- result = ParseKey();
- } else {
- // Unknown key type, report an error.
- result = ReportFailure("Expected an object key or }.");
- }
- // On success we next expect an entry mid ':' then an object mid ',' or '}'
- if (result.ok()) {
- stack_.push(OBJ_MID);
- stack_.push(ENTRY_MID);
- }
- return result;
- }
- util::Status JsonStreamParser::ParseEntryMid(TokenType type) {
- if (type == UNKNOWN) {
- return ReportUnknown("Expected : between key:value pair.");
- }
- if (type == ENTRY_SEPARATOR) {
- Advance();
- stack_.push(VALUE);
- return util::Status();
- }
- return ReportFailure("Expected : between key:value pair.");
- }
- util::Status JsonStreamParser::HandleBeginArray() {
- GOOGLE_DCHECK_EQ('[', *p_.data());
- Advance();
- ow_->StartList(key_);
- key_ = StringPiece();
- stack_.push(ARRAY_VALUE);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseArrayValue(TokenType type) {
- if (type == UNKNOWN) {
- return ReportUnknown("Expected a value or ] within an array.");
- }
- if (type == END_ARRAY) {
- ow_->EndList();
- Advance();
- return util::Status();
- }
- // The ParseValue call may push something onto the stack so we need to make
- // sure an ARRAY_MID is after it, so we push it on now. Also, the parsing of
- // empty-null array value is relying on this ARRAY_MID token.
- stack_.push(ARRAY_MID);
- util::Status result = ParseValue(type);
- if (result == util::Status(error::CANCELLED, "")) {
- // If we were cancelled, pop back off the ARRAY_MID so we don't try to
- // push it on again when we try over.
- stack_.pop();
- }
- return result;
- }
- util::Status JsonStreamParser::ParseArrayMid(TokenType type) {
- if (type == UNKNOWN) {
- return ReportUnknown("Expected , or ] after array value.");
- }
- if (type == END_ARRAY) {
- ow_->EndList();
- Advance();
- return util::Status();
- }
- // Found a comma, advance past it and expect an array value next.
- if (type == VALUE_SEPARATOR) {
- Advance();
- stack_.push(ARRAY_VALUE);
- return util::Status();
- }
- // Illegal token after array value.
- return ReportFailure("Expected , or ] after array value.");
- }
- util::Status JsonStreamParser::ParseTrue() {
- ow_->RenderBool(key_, true);
- key_ = StringPiece();
- p_.remove_prefix(true_len);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseFalse() {
- ow_->RenderBool(key_, false);
- key_ = StringPiece();
- p_.remove_prefix(false_len);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseNull() {
- ow_->RenderNull(key_);
- key_ = StringPiece();
- p_.remove_prefix(null_len);
- return util::Status();
- }
- util::Status JsonStreamParser::ParseEmptyNull() {
- ow_->RenderNull(key_);
- key_ = StringPiece();
- return util::Status();
- }
- bool JsonStreamParser::IsEmptyNullAllowed(TokenType type) {
- if (stack_.empty()) return false;
- return (stack_.top() == ARRAY_MID && type == VALUE_SEPARATOR) ||
- stack_.top() == OBJ_MID;
- }
- util::Status JsonStreamParser::ReportFailure(StringPiece message) {
- static const int kContextLength = 20;
- const char* p_start = p_.data();
- const char* json_start = json_.data();
- const char* begin = std::max(p_start - kContextLength, json_start);
- const char* end =
- std::min(p_start + kContextLength, json_start + json_.size());
- StringPiece segment(begin, end - begin);
- string location(p_start - begin, ' ');
- location.push_back('^');
- return util::Status(util::error::INVALID_ARGUMENT,
- StrCat(message, "\n", segment, "\n", location));
- }
- util::Status JsonStreamParser::ReportUnknown(StringPiece message) {
- // If we aren't finishing the parse, cancel parsing and try later.
- if (!finishing_) {
- return util::Status(error::CANCELLED, "");
- }
- if (p_.empty()) {
- return ReportFailure(StrCat("Unexpected end of string. ", message));
- }
- return ReportFailure(message);
- }
- void JsonStreamParser::SkipWhitespace() {
- while (!p_.empty() && ascii_isspace(*p_.data())) {
- Advance();
- }
- }
- void JsonStreamParser::Advance() {
- // Advance by moving one UTF8 character while making sure we don't go beyond
- // the length of StringPiece.
- p_.remove_prefix(std::min<int>(
- p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length())));
- }
- util::Status JsonStreamParser::ParseKey() {
- StringPiece original = p_;
- if (!ConsumeKey(&p_, &key_)) {
- return ReportFailure("Invalid key or variable name.");
- }
- // If we consumed everything but expect more data, reset p_ and cancel since
- // we can't know if the key was complete or not.
- if (!finishing_ && p_.empty()) {
- p_ = original;
- return util::Status(error::CANCELLED, "");
- }
- // Since we aren't using the key storage, clear it out.
- key_storage_.clear();
- return util::Status();
- }
- JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() {
- SkipWhitespace();
- int size = p_.size();
- if (size == 0) {
- // If we ran out of data, report unknown and we'll place the previous parse
- // type onto the stack and try again when we have more data.
- return UNKNOWN;
- }
- // TODO(sven): Split this method based on context since different contexts
- // support different tokens. Would slightly speed up processing?
- const char* data = p_.data();
- if (*data == '\"' || *data == '\'') return BEGIN_STRING;
- if (*data == '-' || ('0' <= *data && *data <= '9')) {
- return BEGIN_NUMBER;
- }
- if (size >= true_len && !strncmp(data, "true", true_len)) {
- return BEGIN_TRUE;
- }
- if (size >= false_len && !strncmp(data, "false", false_len)) {
- return BEGIN_FALSE;
- }
- if (size >= null_len && !strncmp(data, "null", null_len)) {
- return BEGIN_NULL;
- }
- if (*data == '{') return BEGIN_OBJECT;
- if (*data == '}') return END_OBJECT;
- if (*data == '[') return BEGIN_ARRAY;
- if (*data == ']') return END_ARRAY;
- if (*data == ':') return ENTRY_SEPARATOR;
- if (*data == ',') return VALUE_SEPARATOR;
- if (MatchKey(p_)) {
- return BEGIN_KEY;
- }
- // We don't know that we necessarily have an invalid token here, just that we
- // can't parse what we have so far. So we don't report an error and just
- // return UNKNOWN so we can try again later when we have more data, or if we
- // finish and we have leftovers.
- return UNKNOWN;
- }
- } // namespace converter
- } // namespace util
- } // namespace protobuf
- } // namespace google
|