base/json/json_parser.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/json/json_parser.h"
   6
   7 #include <cmath>
   8
   9 #include "base/logging.h"
  10 #include "base/memory/scoped_ptr.h"
  11 #include "base/strings/string_number_conversions.h"
  12 #include "base/strings/string_piece.h"
  13 #include "base/strings/string_util.h"
  14 #include "base/strings/stringprintf.h"
  15 #include "base/strings/utf_string_conversion_utils.h"
  16 #include "base/strings/utf_string_conversions.h"
  17 #include "base/third_party/icu/icu_utf.h"
  18 #include "base/values.h"
  19
  20 namespace base {
  21 namespace internal {
  22
  23 namespace {
  24
  25 const int kStackMaxDepth = 100;
  26
  27 const int32 kExtendedASCIIStart = 0x80;
  28
  29 // This and the class below are used to own the JSON input string for when
  30 // string tokens are stored as StringPiece instead of std::string. This
  31 // optimization avoids about 2/3rds of string memory copies. The constructor
  32 // takes ownership of the input string. The real root value is Swap()ed into
  33 // the new instance.
  34 class DictionaryHiddenRootValue : public DictionaryValue {
  35  public:
  36   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
  37     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
  38     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
  39   }
  40
  41   void Swap(DictionaryValue* other) override {
  42     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
  43
  44     // First deep copy to convert JSONStringValue to std::string and swap that
  45     // copy with |other|, which contains the new contents of |this|.
  46     scoped_ptr<DictionaryValue> copy(DeepCopy());
  47     copy->Swap(other);
  48
  49     // Then erase the contents of the current dictionary and swap in the
  50     // new contents, originally from |other|.
  51     Clear();
  52     json_.reset();
  53     DictionaryValue::Swap(copy.get());
  54   }
  55
  56   // Not overriding DictionaryValue::Remove because it just calls through to
  57   // the method below.
  58
  59   bool RemoveWithoutPathExpansion(const std::string& key,
  60                                   scoped_ptr<Value>* out) override {
  61     // If the caller won't take ownership of the removed value, just call up.
  62     if (!out)
  63       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
  64
  65     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
  66
  67     // Otherwise, remove the value while its still "owned" by this and copy it
  68     // to convert any JSONStringValues to std::string.
  69     scoped_ptr<Value> out_owned;
  70     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
  71       return false;
  72
  73     out->reset(out_owned->DeepCopy());
  74
  75     return true;
  76   }
  77
  78  private:
  79   scoped_ptr<std::string> json_;
  80
  81   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
  82 };
  83
  84 class ListHiddenRootValue : public ListValue {
  85  public:
  86   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
  87     DCHECK(root->IsType(Value::TYPE_LIST));
  88     ListValue::Swap(static_cast<ListValue*>(root));
  89   }
  90
  91   void Swap(ListValue* other) override {
  92     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
  93
  94     // First deep copy to convert JSONStringValue to std::string and swap that
  95     // copy with |other|, which contains the new contents of |this|.
  96     scoped_ptr<ListValue> copy(DeepCopy());
  97     copy->Swap(other);
  98
  99     // Then erase the contents of the current list and swap in the new contents,
 100     // originally from |other|.
 101     Clear();
 102     json_.reset();
 103     ListValue::Swap(copy.get());
 104   }
 105
 106   bool Remove(size_t index, scoped_ptr<Value>* out) override {
 107     // If the caller won't take ownership of the removed value, just call up.
 108     if (!out)
 109       return ListValue::Remove(index, out);
 110
 111     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
 112
 113     // Otherwise, remove the value while its still "owned" by this and copy it
 114     // to convert any JSONStringValues to std::string.
 115     scoped_ptr<Value> out_owned;
 116     if (!ListValue::Remove(index, &out_owned))
 117       return false;
 118
 119     out->reset(out_owned->DeepCopy());
 120
 121     return true;
 122   }
 123
 124  private:
 125   scoped_ptr<std::string> json_;
 126
 127   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
 128 };
 129
 130 // A variant on StringValue that uses StringPiece instead of copying the string
 131 // into the Value. This can only be stored in a child of hidden root (above),
 132 // otherwise the referenced string will not be guaranteed to outlive it.
 133 class JSONStringValue : public Value {
 134  public:
 135   explicit JSONStringValue(const StringPiece& piece)
 136       : Value(TYPE_STRING),
 137         string_piece_(piece) {
 138   }
 139
 140   // Overridden from Value:
 141   bool GetAsString(std::string* out_value) const override {
 142     string_piece_.CopyToString(out_value);
 143     return true;
 144   }
 145   bool GetAsString(string16* out_value) const override {
 146     *out_value = UTF8ToUTF16(string_piece_);
 147     return true;
 148   }
 149   Value* DeepCopy() const override {
 150     return new StringValue(string_piece_.as_string());
 151   }
 152   bool Equals(const Value* other) const override {
 153     std::string other_string;
 154     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
 155         StringPiece(other_string) == string_piece_;
 156   }
 157
 158  private:
 159   // The location in the original input stream.
 160   StringPiece string_piece_;
 161
 162   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
 163 };
 164
 165 // Simple class that checks for maximum recursion/"stack overflow."
 166 class StackMarker {
 167  public:
 168   explicit StackMarker(int* depth) : depth_(depth) {
 169     ++(*depth_);
 170     DCHECK_LE(*depth_, kStackMaxDepth);
 171   }
 172   ~StackMarker() {
 173     --(*depth_);
 174   }
 175
 176   bool IsTooDeep() const {
 177     return *depth_ >= kStackMaxDepth;
 178   }
 179
 180  private:
 181   int* const depth_;
 182
 183   DISALLOW_COPY_AND_ASSIGN(StackMarker);
 184 };
 185
 186 }  // namespace
 187
 188 JSONParser::JSONParser(int options)
 189     : options_(options),
 190       start_pos_(NULL),
 191       pos_(NULL),
 192       end_pos_(NULL),
 193       index_(0),
 194       stack_depth_(0),
 195       line_number_(0),
 196       index_last_line_(0),
 197       error_code_(JSONReader::JSON_NO_ERROR),
 198       error_line_(0),
 199       error_column_(0) {
 200 }
 201
 202 JSONParser::~JSONParser() {
 203 }
 204
 205 Value* JSONParser::Parse(const StringPiece& input) {
 206   scoped_ptr<std::string> input_copy;
 207   // If the children of a JSON root can be detached, then hidden roots cannot
 208   // be used, so do not bother copying the input because StringPiece will not
 209   // be used anywhere.
 210   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
 211     input_copy.reset(new std::string(input.as_string()));
 212     start_pos_ = input_copy->data();
 213   } else {
 214     start_pos_ = input.data();
 215   }
 216   pos_ = start_pos_;
 217   end_pos_ = start_pos_ + input.length();
 218   index_ = 0;
 219   line_number_ = 1;
 220   index_last_line_ = 0;
 221
 222   error_code_ = JSONReader::JSON_NO_ERROR;
 223   error_line_ = 0;
 224   error_column_ = 0;
 225
 226   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
 227   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
 228   // ParseNextToken function mis-treating a Unicode BOM as an invalid
 229   // character and returning NULL.
 230   if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
 231       static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
 232       static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
 233     NextNChars(3);
 234   }
 235
 236   // Parse the first and any nested tokens.
 237   scoped_ptr<Value> root(ParseNextToken());
 238   if (!root.get())
 239     return NULL;
 240
 241   // Make sure the input stream is at an end.
 242   if (GetNextToken() != T_END_OF_INPUT) {
 243     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
 244       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
 245       return NULL;
 246     }
 247   }
 248
 249   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
 250   // hidden root.
 251   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
 252     if (root->IsType(Value::TYPE_DICTIONARY)) {
 253       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
 254     } else if (root->IsType(Value::TYPE_LIST)) {
 255       return new ListHiddenRootValue(input_copy.release(), root.get());
 256     } else if (root->IsType(Value::TYPE_STRING)) {
 257       // A string type could be a JSONStringValue, but because there's no
 258       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
 259       // preserve it.
 260       return root->DeepCopy();
 261     }
 262   }
 263
 264   // All other values can be returned directly.
 265   return root.release();
 266 }
 267
 268 JSONReader::JsonParseError JSONParser::error_code() const {
 269   return error_code_;
 270 }
 271
 272 std::string JSONParser::GetErrorMessage() const {
 273   return FormatErrorMessage(error_line_, error_column_,
 274       JSONReader::ErrorCodeToString(error_code_));
 275 }
 276
 277 // StringBuilder ///////////////////////////////////////////////////////////////
 278
 279 JSONParser::StringBuilder::StringBuilder()
 280     : pos_(NULL),
 281       length_(0),
 282       string_(NULL) {
 283 }
 284
 285 JSONParser::StringBuilder::StringBuilder(const char* pos)
 286     : pos_(pos),
 287       length_(0),
 288       string_(NULL) {
 289 }
 290
 291 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
 292   std::swap(other->string_, string_);
 293   std::swap(other->pos_, pos_);
 294   std::swap(other->length_, length_);
 295 }
 296
 297 JSONParser::StringBuilder::~StringBuilder() {
 298   delete string_;
 299 }
 300
 301 void JSONParser::StringBuilder::Append(const char& c) {
 302   DCHECK_GE(c, 0);
 303   DCHECK_LT(c, 128);
 304
 305   if (string_)
 306     string_->push_back(c);
 307   else
 308     ++length_;
 309 }
 310
 311 void JSONParser::StringBuilder::AppendString(const std::string& str) {
 312   DCHECK(string_);
 313   string_->append(str);
 314 }
 315
 316 void JSONParser::StringBuilder::Convert() {
 317   if (string_)
 318     return;
 319   string_  = new std::string(pos_, length_);
 320 }
 321
 322 bool JSONParser::StringBuilder::CanBeStringPiece() const {
 323   return !string_;
 324 }
 325
 326 StringPiece JSONParser::StringBuilder::AsStringPiece() {
 327   if (string_)
 328     return StringPiece();
 329   return StringPiece(pos_, length_);
 330 }
 331
 332 const std::string& JSONParser::StringBuilder::AsString() {
 333   if (!string_)
 334     Convert();
 335   return *string_;
 336 }
 337
 338 // JSONParser private //////////////////////////////////////////////////////////
 339
 340 inline bool JSONParser::CanConsume(int length) {
 341   return pos_ + length <= end_pos_;
 342 }
 343
 344 const char* JSONParser::NextChar() {
 345   DCHECK(CanConsume(1));
 346   ++index_;
 347   ++pos_;
 348   return pos_;
 349 }
 350
 351 void JSONParser::NextNChars(int n) {
 352   DCHECK(CanConsume(n));
 353   index_ += n;
 354   pos_ += n;
 355 }
 356
 357 JSONParser::Token JSONParser::GetNextToken() {
 358   EatWhitespaceAndComments();
 359   if (!CanConsume(1))
 360     return T_END_OF_INPUT;
 361
 362   switch (*pos_) {
 363     case '{':
 364       return T_OBJECT_BEGIN;
 365     case '}':
 366       return T_OBJECT_END;
 367     case '[':
 368       return T_ARRAY_BEGIN;
 369     case ']':
 370       return T_ARRAY_END;
 371     case '"':
 372       return T_STRING;
 373     case '0':
 374     case '1':
 375     case '2':
 376     case '3':
 377     case '4':
 378     case '5':
 379     case '6':
 380     case '7':
 381     case '8':
 382     case '9':
 383     case '-':
 384       return T_NUMBER;
 385     case 't':
 386       return T_BOOL_TRUE;
 387     case 'f':
 388       return T_BOOL_FALSE;
 389     case 'n':
 390       return T_NULL;
 391     case ',':
 392       return T_LIST_SEPARATOR;
 393     case ':':
 394       return T_OBJECT_PAIR_SEPARATOR;
 395     default:
 396       return T_INVALID_TOKEN;
 397   }
 398 }
 399
 400 void JSONParser::EatWhitespaceAndComments() {
 401   while (pos_ < end_pos_) {
 402     switch (*pos_) {
 403       case '\r':
 404       case '\n':
 405         index_last_line_ = index_;
 406         // Don't increment line_number_ twice for "\r\n".
 407         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
 408           ++line_number_;
 409         // Fall through.
 410       case ' ':
 411       case '\t':
 412         NextChar();
 413         break;
 414       case '/':
 415         if (!EatComment())
 416           return;
 417         break;
 418       default:
 419         return;
 420     }
 421   }
 422 }
 423
 424 bool JSONParser::EatComment() {
 425   if (*pos_ != '/' || !CanConsume(1))
 426     return false;
 427
 428   char next_char = *NextChar();
 429   if (next_char == '/') {
 430     // Single line comment, read to newline.
 431     while (CanConsume(1)) {
 432       next_char = *NextChar();
 433       if (next_char == '\n' || next_char == '\r')
 434         return true;
 435     }
 436   } else if (next_char == '*') {
 437     char previous_char = '\0';
 438     // Block comment, read until end marker.
 439     while (CanConsume(1)) {
 440       next_char = *NextChar();
 441       if (previous_char == '*' && next_char == '/') {
 442         // EatWhitespaceAndComments will inspect pos_, which will still be on
 443         // the last / of the comment, so advance once more (which may also be
 444         // end of input).
 445         NextChar();
 446         return true;
 447       }
 448       previous_char = next_char;
 449     }
 450
 451     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
 452   }
 453
 454   return false;
 455 }
 456
 457 Value* JSONParser::ParseNextToken() {
 458   return ParseToken(GetNextToken());
 459 }
 460
 461 Value* JSONParser::ParseToken(Token token) {
 462   switch (token) {
 463     case T_OBJECT_BEGIN:
 464       return ConsumeDictionary();
 465     case T_ARRAY_BEGIN:
 466       return ConsumeList();
 467     case T_STRING:
 468       return ConsumeString();
 469     case T_NUMBER:
 470       return ConsumeNumber();
 471     case T_BOOL_TRUE:
 472     case T_BOOL_FALSE:
 473     case T_NULL:
 474       return ConsumeLiteral();
 475     default:
 476       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
 477       return NULL;
 478   }
 479 }
 480
 481 Value* JSONParser::ConsumeDictionary() {
 482   if (*pos_ != '{') {
 483     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
 484     return NULL;
 485   }
 486
 487   StackMarker depth_check(&stack_depth_);
 488   if (depth_check.IsTooDeep()) {
 489     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
 490     return NULL;
 491   }
 492
 493   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
 494
 495   NextChar();
 496   Token token = GetNextToken();
 497   while (token != T_OBJECT_END) {
 498     if (token != T_STRING) {
 499       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
 500       return NULL;
 501     }
 502
 503     // First consume the key.
 504     StringBuilder key;
 505     if (!ConsumeStringRaw(&key)) {
 506       return NULL;
 507     }
 508
 509     // Read the separator.
 510     NextChar();
 511     token = GetNextToken();
 512     if (token != T_OBJECT_PAIR_SEPARATOR) {
 513       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 514       return NULL;
 515     }
 516
 517     // The next token is the value. Ownership transfers to |dict|.
 518     NextChar();
 519     Value* value = ParseNextToken();
 520     if (!value) {
 521       // ReportError from deeper level.
 522       return NULL;
 523     }
 524
 525     dict->SetWithoutPathExpansion(key.AsString(), value);
 526
 527     NextChar();
 528     token = GetNextToken();
 529     if (token == T_LIST_SEPARATOR) {
 530       NextChar();
 531       token = GetNextToken();
 532       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
 533         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
 534         return NULL;
 535       }
 536     } else if (token != T_OBJECT_END) {
 537       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
 538       return NULL;
 539     }
 540   }
 541
 542   return dict.release();
 543 }
 544
 545 Value* JSONParser::ConsumeList() {
 546   if (*pos_ != '[') {
 547     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
 548     return NULL;
 549   }
 550
 551   StackMarker depth_check(&stack_depth_);
 552   if (depth_check.IsTooDeep()) {
 553     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
 554     return NULL;
 555   }
 556
 557   scoped_ptr<ListValue> list(new ListValue);
 558
 559   NextChar();
 560   Token token = GetNextToken();
 561   while (token != T_ARRAY_END) {
 562     Value* item = ParseToken(token);
 563     if (!item) {
 564       // ReportError from deeper level.
 565       return NULL;
 566     }
 567
 568     list->Append(item);
 569
 570     NextChar();
 571     token = GetNextToken();
 572     if (token == T_LIST_SEPARATOR) {
 573       NextChar();
 574       token = GetNextToken();
 575       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
 576         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
 577         return NULL;
 578       }
 579     } else if (token != T_ARRAY_END) {
 580       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 581       return NULL;
 582     }
 583   }
 584
 585   return list.release();
 586 }
 587
 588 Value* JSONParser::ConsumeString() {
 589   StringBuilder string;
 590   if (!ConsumeStringRaw(&string))
 591     return NULL;
 592
 593   // Create the Value representation, using a hidden root, if configured
 594   // to do so, and if the string can be represented by StringPiece.
 595   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
 596     return new JSONStringValue(string.AsStringPiece());
 597   } else {
 598     if (string.CanBeStringPiece())
 599       string.Convert();
 600     return new StringValue(string.AsString());
 601   }
 602 }
 603
 604 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
 605   if (*pos_ != '"') {
 606     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
 607     return false;
 608   }
 609
 610   // StringBuilder will internally build a StringPiece unless a UTF-16
 611   // conversion occurs, at which point it will perform a copy into a
 612   // std::string.
 613   StringBuilder string(NextChar());
 614
 615   int length = end_pos_ - start_pos_;
 616   int32 next_char = 0;
 617
 618   while (CanConsume(1)) {
 619     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
 620     CBU8_NEXT(start_pos_, index_, length, next_char);
 621     if (next_char < 0 || !IsValidCharacter(next_char)) {
 622       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
 623       return false;
 624     }
 625
 626     // If this character is an escape sequence...
 627     if (next_char == '\\') {
 628       // The input string will be adjusted (either by combining the two
 629       // characters of an encoded escape sequence, or with a UTF conversion),
 630       // so using StringPiece isn't possible -- force a conversion.
 631       string.Convert();
 632
 633       if (!CanConsume(1)) {
 634         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
 635         return false;
 636       }
 637
 638       switch (*NextChar()) {
 639         // Allowed esape sequences:
 640         case 'x': {  // UTF-8 sequence.
 641           // UTF-8 \x escape sequences are not allowed in the spec, but they
 642           // are supported here for backwards-compatiblity with the old parser.
 643           if (!CanConsume(2)) {
 644             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
 645             return false;
 646           }
 647
 648           int hex_digit = 0;
 649           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
 650             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
 651             return false;
 652           }
 653           NextChar();
 654
 655           if (hex_digit < kExtendedASCIIStart)
 656             string.Append(static_cast<char>(hex_digit));
 657           else
 658             DecodeUTF8(hex_digit, &string);
 659           break;
 660         }
 661         case 'u': {  // UTF-16 sequence.
 662           // UTF units are of the form \uXXXX.
 663           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
 664             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
 665             return false;
 666           }
 667
 668           // Skip the 'u'.
 669           NextChar();
 670
 671           std::string utf8_units;
 672           if (!DecodeUTF16(&utf8_units)) {
 673             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
 674             return false;
 675           }
 676
 677           string.AppendString(utf8_units);
 678           break;
 679         }
 680         case '"':
 681           string.Append('"');
 682           break;
 683         case '\\':
 684           string.Append('\\');
 685           break;
 686         case '/':
 687           string.Append('/');
 688           break;
 689         case 'b':
 690           string.Append('\b');
 691           break;
 692         case 'f':
 693           string.Append('\f');
 694           break;
 695         case 'n':
 696           string.Append('\n');
 697           break;
 698         case 'r':
 699           string.Append('\r');
 700           break;
 701         case 't':
 702           string.Append('\t');
 703           break;
 704         case 'v':  // Not listed as valid escape sequence in the RFC.
 705           string.Append('\v');
 706           break;
 707         // All other escape squences are illegal.
 708         default:
 709           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
 710           return false;
 711       }
 712     } else if (next_char == '"') {
 713       --index_;  // Rewind by one because of CBU8_NEXT.
 714       out->Swap(&string);
 715       return true;
 716     } else {
 717       if (next_char < kExtendedASCIIStart)
 718         string.Append(static_cast<char>(next_char));
 719       else
 720         DecodeUTF8(next_char, &string);
 721     }
 722   }
 723
 724   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
 725   return false;
 726 }
 727
 728 // Entry is at the first X in \uXXXX.
 729 bool JSONParser::DecodeUTF16(std::string* dest_string) {
 730   if (!CanConsume(4))
 731     return false;
 732
 733   // This is a 32-bit field because the shift operations in the
 734   // conversion process below cause MSVC to error about "data loss."
 735   // This only stores UTF-16 code units, though.
 736   // Consume the UTF-16 code unit, which may be a high surrogate.
 737   int code_unit16_high = 0;
 738   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
 739     return false;
 740
 741   // Only add 3, not 4, because at the end of this iteration, the parser has
 742   // finished working with the last digit of the UTF sequence, meaning that
 743   // the next iteration will advance to the next byte.
 744   NextNChars(3);
 745
 746   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
 747   // code unit sequence.
 748   char code_unit8[8] = { 0 };
 749   size_t offset = 0;
 750
 751   // If this is a high surrogate, consume the next code unit to get the
 752   // low surrogate.
 753   if (CBU16_IS_SURROGATE(code_unit16_high)) {
 754     // Make sure this is the high surrogate. If not, it's an encoding
 755     // error.
 756     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
 757       return false;
 758
 759     // Make sure that the token has more characters to consume the
 760     // lower surrogate.
 761     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
 762       return false;
 763     if (*NextChar() != '\\' || *NextChar() != 'u')
 764       return false;
 765
 766     NextChar();  // Read past 'u'.
 767     int code_unit16_low = 0;
 768     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
 769       return false;
 770
 771     NextNChars(3);
 772
 773     if (!CBU16_IS_TRAIL(code_unit16_low)) {
 774       return false;
 775     }
 776
 777     uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
 778                                                 code_unit16_low);
 779     if (!IsValidCharacter(code_point))
 780       return false;
 781
 782     offset = 0;
 783     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
 784   } else {
 785     // Not a surrogate.
 786     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
 787     if (!IsValidCharacter(code_unit16_high))
 788       return false;
 789
 790     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
 791   }
 792
 793   dest_string->append(code_unit8);
 794   return true;
 795 }
 796
 797 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
 798   DCHECK(IsValidCharacter(point));
 799
 800   // Anything outside of the basic ASCII plane will need to be decoded from
 801   // int32 to a multi-byte sequence.
 802   if (point < kExtendedASCIIStart) {
 803     dest->Append(static_cast<char>(point));
 804   } else {
 805     char utf8_units[4] = { 0 };
 806     int offset = 0;
 807     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
 808     dest->Convert();
 809     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
 810     // zero terminated at this point.  |offset| contains the correct length.
 811     dest->AppendString(std::string(utf8_units, offset));
 812   }
 813 }
 814
 815 Value* JSONParser::ConsumeNumber() {
 816   const char* num_start = pos_;
 817   const int start_index = index_;
 818   int end_index = start_index;
 819
 820   if (*pos_ == '-')
 821     NextChar();
 822
 823   if (!ReadInt(false)) {
 824     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 825     return NULL;
 826   }
 827   end_index = index_;
 828
 829   // The optional fraction part.
 830   if (*pos_ == '.') {
 831     if (!CanConsume(1)) {
 832       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 833       return NULL;
 834     }
 835     NextChar();
 836     if (!ReadInt(true)) {
 837       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 838       return NULL;
 839     }
 840     end_index = index_;
 841   }
 842
 843   // Optional exponent part.
 844   if (*pos_ == 'e' || *pos_ == 'E') {
 845     NextChar();
 846     if (*pos_ == '-' || *pos_ == '+')
 847       NextChar();
 848     if (!ReadInt(true)) {
 849       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 850       return NULL;
 851     }
 852     end_index = index_;
 853   }
 854
 855   // ReadInt is greedy because numbers have no easily detectable sentinel,
 856   // so save off where the parser should be on exit (see Consume invariant at
 857   // the top of the header), then make sure the next token is one which is
 858   // valid.
 859   const char* exit_pos = pos_ - 1;
 860   int exit_index = index_ - 1;
 861
 862   switch (GetNextToken()) {
 863     case T_OBJECT_END:
 864     case T_ARRAY_END:
 865     case T_LIST_SEPARATOR:
 866     case T_END_OF_INPUT:
 867       break;
 868     default:
 869       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 870       return NULL;
 871   }
 872
 873   pos_ = exit_pos;
 874   index_ = exit_index;
 875
 876   StringPiece num_string(num_start, end_index - start_index);
 877
 878   int num_int;
 879   if (StringToInt(num_string, &num_int))
 880     return new FundamentalValue(num_int);
 881
 882   double num_double;
 883   if (StringToDouble(num_string.as_string(), &num_double) &&
 884       std::isfinite(num_double)) {
 885     return new FundamentalValue(num_double);
 886   }
 887
 888   return NULL;
 889 }
 890
 891 bool JSONParser::ReadInt(bool allow_leading_zeros) {
 892   char first = *pos_;
 893   int len = 0;
 894
 895   char c = first;
 896   while (CanConsume(1) && IsAsciiDigit(c)) {
 897     c = *NextChar();
 898     ++len;
 899   }
 900
 901   if (len == 0)
 902     return false;
 903
 904   if (!allow_leading_zeros && len > 1 && first == '0')
 905     return false;
 906
 907   return true;
 908 }
 909
 910 Value* JSONParser::ConsumeLiteral() {
 911   switch (*pos_) {
 912     case 't': {
 913       const char kTrueLiteral[] = "true";
 914       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
 915       if (!CanConsume(kTrueLen - 1) ||
 916           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
 917         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 918         return NULL;
 919       }
 920       NextNChars(kTrueLen - 1);
 921       return new FundamentalValue(true);
 922     }
 923     case 'f': {
 924       const char kFalseLiteral[] = "false";
 925       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
 926       if (!CanConsume(kFalseLen - 1) ||
 927           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
 928         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 929         return NULL;
 930       }
 931       NextNChars(kFalseLen - 1);
 932       return new FundamentalValue(false);
 933     }
 934     case 'n': {
 935       const char kNullLiteral[] = "null";
 936       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
 937       if (!CanConsume(kNullLen - 1) ||
 938           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
 939         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
 940         return NULL;
 941       }
 942       NextNChars(kNullLen - 1);
 943       return Value::CreateNullValue().release();
 944     }
 945     default:
 946       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
 947       return NULL;
 948   }
 949 }
 950
 951 // static
 952 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
 953   return strncmp(one, two, len) == 0;
 954 }
 955
 956 void JSONParser::ReportError(JSONReader::JsonParseError code,
 957                              int column_adjust) {
 958   error_code_ = code;
 959   error_line_ = line_number_;
 960   error_column_ = index_ - index_last_line_ + column_adjust;
 961 }
 962
 963 // static
 964 std::string JSONParser::FormatErrorMessage(int line, int column,
 965                                            const std::string& description) {
 966   if (line || column) {
 967     return StringPrintf("Line: %i, column: %i, %s",
 968         line, column, description.c_str());
 969   }
 970   return description;
 971 }
 972
 973 }  // namespace internal
 974 }  // namespace base