interface/modules/zend_modules/library/Zend/Json/Decoder.php

   1 <?php
   2 /**
   3  * Zend Framework (http://framework.zend.com/)
   4  *
   5  * @link      http://github.com/zendframework/zf2 for the canonical source repository
   6  * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com)
   7  * @license   http://framework.zend.com/license/new-bsd New BSD License
   8  */
   9
  10 namespace Zend\Json;
  11
  12 use stdClass;
  13 use Zend\Json\Exception\InvalidArgumentException;
  14 use Zend\Json\Exception\RuntimeException;
  15
  16 /**
  17  * Decode JSON encoded string to PHP variable constructs
  18  */
  19 class Decoder
  20 {
  21
  22     /**
  23      * Parse tokens used to decode the JSON object. These are not
  24      * for public consumption, they are just used internally to the
  25      * class.
  26      */
  27     const EOF       = 0;
  28     const DATUM     = 1;
  29     const LBRACE    = 2;
  30     const LBRACKET  = 3;
  31     const RBRACE    = 4;
  32     const RBRACKET  = 5;
  33     const COMMA     = 6;
  34     const COLON     = 7;
  35
  36     /**
  37      * Use to maintain a "pointer" to the source being decoded
  38      *
  39      * @var string
  40      */
  41     protected $source;
  42
  43     /**
  44      * Caches the source length
  45      *
  46      * @var int
  47      */
  48     protected $sourceLength;
  49
  50     /**
  51      * The offset within the source being decoded
  52      *
  53      * @var int
  54      *
  55      */
  56     protected $offset;
  57
  58     /**
  59      * The current token being considered in the parser cycle
  60      *
  61      * @var int
  62      */
  63     protected $token;
  64
  65     /**
  66      * Flag indicating how objects should be decoded
  67      *
  68      * @var int
  69      * @access protected
  70      */
  71     protected $decodeType;
  72
  73     /**
  74      * @var $_tokenValue
  75      */
  76     protected $tokenValue;
  77
  78     /**
  79      * Constructor
  80      *
  81      * @param string $source     String source to decode
  82      * @param int    $decodeType How objects should be decoded -- see
  83      * {@link Zend\Json\Json::TYPE_ARRAY} and {@link Zend\Json\Json::TYPE_OBJECT} for
  84      * valid values
  85      * @throws InvalidArgumentException
  86      */
  87     protected function __construct($source, $decodeType)
  88     {
  89         // Set defaults
  90         $this->source       = self::decodeUnicodeString($source);
  91         $this->sourceLength = strlen($this->source);
  92         $this->token        = self::EOF;
  93         $this->offset       = 0;
  94
  95         switch ($decodeType) {
  96             case Json::TYPE_ARRAY:
  97             case Json::TYPE_OBJECT:
  98                 $this->decodeType = $decodeType;
  99                 break;
 100             default:
 101                 throw new InvalidArgumentException("Unknown decode type '{$decodeType}', please use one of the constants Json::TYPE_*");
 102         }
 103
 104         // Set pointer at first token
 105         $this->_getNextToken();
 106     }
 107
 108     /**
 109      * Decode a JSON source string
 110      *
 111      * Decodes a JSON encoded string. The value returned will be one of the
 112      * following:
 113      *        - integer
 114      *        - float
 115      *        - boolean
 116      *        - null
 117      *      - stdClass
 118      *      - array
 119      *         - array of one or more of the above types
 120      *
 121      * By default, decoded objects will be returned as associative arrays; to
 122      * return a stdClass object instead, pass {@link Zend\Json\Json::TYPE_OBJECT} to
 123      * the $objectDecodeType parameter.
 124      *
 125      * @static
 126      * @access public
 127      * @param string $source String to be decoded
 128      * @param int $objectDecodeType How objects should be decoded; should be
 129      * either or {@link Zend\Json\Json::TYPE_ARRAY} or
 130      * {@link Zend\Json\Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
 131      * @return mixed
 132      */
 133     public static function decode($source, $objectDecodeType = Json::TYPE_OBJECT)
 134     {
 135         $decoder = new static($source, $objectDecodeType);
 136         return $decoder->_decodeValue();
 137     }
 138
 139     /**
 140      * Recursive driving routine for supported toplevel tops
 141      *
 142      * @return mixed
 143      */
 144     protected function _decodeValue()
 145     {
 146         switch ($this->token) {
 147             case self::DATUM:
 148                 $result  = $this->tokenValue;
 149                 $this->_getNextToken();
 150                 return($result);
 151                 break;
 152             case self::LBRACE:
 153                 return($this->_decodeObject());
 154                 break;
 155             case self::LBRACKET:
 156                 return($this->_decodeArray());
 157                 break;
 158             default:
 159                 return null;
 160                 break;
 161         }
 162     }
 163
 164     /**
 165      * Decodes an object of the form:
 166      *  { "attribute: value, "attribute2" : value,...}
 167      *
 168      * If Zend\Json\Encoder was used to encode the original object then
 169      * a special attribute called __className which specifies a class
 170      * name that should wrap the data contained within the encoded source.
 171      *
 172      * Decodes to either an array or stdClass object, based on the value of
 173      * {@link $decodeType}. If invalid $decodeType present, returns as an
 174      * array.
 175      *
 176      * @return array|stdClass
 177      * @throws RuntimeException
 178      */
 179     protected function _decodeObject()
 180     {
 181         $members = array();
 182         $tok = $this->_getNextToken();
 183
 184         while ($tok && $tok != self::RBRACE) {
 185             if ($tok != self::DATUM || ! is_string($this->tokenValue)) {
 186                 throw new RuntimeException('Missing key in object encoding: ' . $this->source);
 187             }
 188
 189             $key = $this->tokenValue;
 190             $tok = $this->_getNextToken();
 191
 192             if ($tok != self::COLON) {
 193                 throw new RuntimeException('Missing ":" in object encoding: ' . $this->source);
 194             }
 195
 196             $tok = $this->_getNextToken();
 197             $members[$key] = $this->_decodeValue();
 198             $tok = $this->token;
 199
 200             if ($tok == self::RBRACE) {
 201                 break;
 202             }
 203
 204             if ($tok != self::COMMA) {
 205                 throw new RuntimeException('Missing "," in object encoding: ' . $this->source);
 206             }
 207
 208             $tok = $this->_getNextToken();
 209         }
 210
 211         switch ($this->decodeType) {
 212             case Json::TYPE_OBJECT:
 213                 // Create new stdClass and populate with $members
 214                 $result = new stdClass();
 215                 foreach ($members as $key => $value) {
 216                     if ($key === '') {
 217                         $key = '_empty_';
 218                     }
 219                     $result->$key = $value;
 220                 }
 221                 break;
 222             case Json::TYPE_ARRAY:
 223             default:
 224                 $result = $members;
 225                 break;
 226         }
 227
 228         $this->_getNextToken();
 229         return $result;
 230     }
 231
 232     /**
 233      * Decodes a JSON array format:
 234      *    [element, element2,...,elementN]
 235      *
 236      * @return array
 237      * @throws RuntimeException
 238      */
 239     protected function _decodeArray()
 240     {
 241         $result = array();
 242         $tok = $this->_getNextToken(); // Move past the '['
 243         $index  = 0;
 244
 245         while ($tok && $tok != self::RBRACKET) {
 246             $result[$index++] = $this->_decodeValue();
 247
 248             $tok = $this->token;
 249
 250             if ($tok == self::RBRACKET || !$tok) {
 251                 break;
 252             }
 253
 254             if ($tok != self::COMMA) {
 255                 throw new RuntimeException('Missing "," in array encoding: ' . $this->source);
 256             }
 257
 258             $tok = $this->_getNextToken();
 259         }
 260
 261         $this->_getNextToken();
 262         return $result;
 263     }
 264
 265
 266     /**
 267      * Removes whitespace characters from the source input
 268      */
 269     protected function _eatWhitespace()
 270     {
 271         if (preg_match(
 272                 '/([\t\b\f\n\r ])*/s',
 273                 $this->source,
 274                 $matches,
 275                 PREG_OFFSET_CAPTURE,
 276                 $this->offset)
 277             && $matches[0][1] == $this->offset)
 278         {
 279             $this->offset += strlen($matches[0][0]);
 280         }
 281     }
 282
 283
 284     /**
 285      * Retrieves the next token from the source stream
 286      *
 287      * @return int Token constant value specified in class definition
 288      * @throws RuntimeException
 289      */
 290     protected function _getNextToken()
 291     {
 292         $this->token      = self::EOF;
 293         $this->tokenValue = null;
 294         $this->_eatWhitespace();
 295
 296         if ($this->offset >= $this->sourceLength) {
 297             return(self::EOF);
 298         }
 299
 300         $str       = $this->source;
 301         $strLength = $this->sourceLength;
 302         $i         = $this->offset;
 303         $start     = $i;
 304
 305         switch ($str{$i}) {
 306             case '{':
 307                $this->token = self::LBRACE;
 308                break;
 309             case '}':
 310                 $this->token = self::RBRACE;
 311                 break;
 312             case '[':
 313                 $this->token = self::LBRACKET;
 314                 break;
 315             case ']':
 316                 $this->token = self::RBRACKET;
 317                 break;
 318             case ',':
 319                 $this->token = self::COMMA;
 320                 break;
 321             case ':':
 322                 $this->token = self::COLON;
 323                 break;
 324             case  '"':
 325                 $result = '';
 326                 do {
 327                     $i++;
 328                     if ($i >= $strLength) {
 329                         break;
 330                     }
 331
 332                     $chr = $str{$i};
 333
 334                     if ($chr == '\\') {
 335                         $i++;
 336                         if ($i >= $strLength) {
 337                             break;
 338                         }
 339                         $chr = $str{$i};
 340                         switch ($chr) {
 341                             case '"' :
 342                                 $result .= '"';
 343                                 break;
 344                             case '\\':
 345                                 $result .= '\\';
 346                                 break;
 347                             case '/' :
 348                                 $result .= '/';
 349                                 break;
 350                             case 'b' :
 351                                 $result .= "\x08";
 352                                 break;
 353                             case 'f' :
 354                                 $result .= "\x0c";
 355                                 break;
 356                             case 'n' :
 357                                 $result .= "\x0a";
 358                                 break;
 359                             case 'r' :
 360                                 $result .= "\x0d";
 361                                 break;
 362                             case 't' :
 363                                 $result .= "\x09";
 364                                 break;
 365                             case '\'' :
 366                                 $result .= '\'';
 367                                 break;
 368                             default:
 369                                 throw new RuntimeException("Illegal escape sequence '{$chr}'");
 370                         }
 371                     } elseif ($chr == '"') {
 372                         break;
 373                     } else {
 374                         $result .= $chr;
 375                     }
 376                 } while ($i < $strLength);
 377
 378                 $this->token = self::DATUM;
 379                 //$this->tokenValue = substr($str, $start + 1, $i - $start - 1);
 380                 $this->tokenValue = $result;
 381                 break;
 382             case 't':
 383                 if (($i+ 3) < $strLength && substr($str, $start, 4) == "true") {
 384                     $this->token = self::DATUM;
 385                 }
 386                 $this->tokenValue = true;
 387                 $i += 3;
 388                 break;
 389             case 'f':
 390                 if (($i+ 4) < $strLength && substr($str, $start, 5) == "false") {
 391                     $this->token = self::DATUM;
 392                 }
 393                 $this->tokenValue = false;
 394                 $i += 4;
 395                 break;
 396             case 'n':
 397                 if (($i+ 3) < $strLength && substr($str, $start, 4) == "null") {
 398                     $this->token = self::DATUM;
 399                 }
 400                 $this->tokenValue = NULL;
 401                 $i += 3;
 402                 break;
 403         }
 404
 405         if ($this->token != self::EOF) {
 406             $this->offset = $i + 1; // Consume the last token character
 407             return($this->token);
 408         }
 409
 410         $chr = $str{$i};
 411         if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
 412             if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
 413                 $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
 414
 415                 $datum = $matches[0][0];
 416
 417                 if (is_numeric($datum)) {
 418                     if (preg_match('/^0\d+$/', $datum)) {
 419                         throw new RuntimeException("Octal notation not supported by JSON (value: {$datum})");
 420                     } else {
 421                         $val  = intval($datum);
 422                         $fVal = floatval($datum);
 423                         $this->tokenValue = ($val == $fVal ? $val : $fVal);
 424                     }
 425                 } else {
 426                     throw new RuntimeException("Illegal number format: {$datum}");
 427                 }
 428
 429                 $this->token = self::DATUM;
 430                 $this->offset = $start + strlen($datum);
 431             }
 432         } else {
 433             throw new RuntimeException('Illegal Token');
 434         }
 435
 436         return $this->token;
 437     }
 438
 439     /**
 440      * Decode Unicode Characters from \u0000 ASCII syntax.
 441      *
 442      * This algorithm was originally developed for the
 443      * Solar Framework by Paul M. Jones
 444      *
 445      * @link   http://solarphp.com/
 446      * @link   http://svn.solarphp.com/core/trunk/Solar/Json.php
 447      * @param  string $chrs
 448      * @return string
 449      */
 450     public static function decodeUnicodeString($chrs)
 451     {
 452         $chrs       = (string) $chrs;
 453         $utf8       = '';
 454         $strlenChrs = strlen($chrs);
 455
 456         for ($i = 0; $i < $strlenChrs; $i++) {
 457             $ordChrsC = ord($chrs[$i]);
 458
 459             switch (true) {
 460                 case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
 461                     // single, escaped unicode character
 462                     $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
 463                            . chr(hexdec(substr($chrs, ($i + 4), 2)));
 464                     $utf8char = self::_utf162utf8($utf16);
 465                     $search  = array('\\', "\n", "\t", "\r", chr(0x08), chr(0x0C), '"', '\'', '/');
 466                     if (in_array($utf8char, $search)) {
 467                         $replace = array('\\\\', '\\n', '\\t', '\\r', '\\b', '\\f', '\\"', '\\\'', '\\/');
 468                         $utf8char  = str_replace($search, $replace, $utf8char);
 469                     }
 470                     $utf8 .= $utf8char;
 471                     $i += 5;
 472                     break;
 473                 case ($ordChrsC >= 0x20) && ($ordChrsC <= 0x7F):
 474                     $utf8 .= $chrs{$i};
 475                     break;
 476                 case ($ordChrsC & 0xE0) == 0xC0:
 477                     // characters U-00000080 - U-000007FF, mask 110XXXXX
 478                     //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 479                     $utf8 .= substr($chrs, $i, 2);
 480                     ++$i;
 481                     break;
 482                 case ($ordChrsC & 0xF0) == 0xE0:
 483                     // characters U-00000800 - U-0000FFFF, mask 1110XXXX
 484                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 485                     $utf8 .= substr($chrs, $i, 3);
 486                     $i += 2;
 487                     break;
 488                 case ($ordChrsC & 0xF8) == 0xF0:
 489                     // characters U-00010000 - U-001FFFFF, mask 11110XXX
 490                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 491                     $utf8 .= substr($chrs, $i, 4);
 492                     $i += 3;
 493                     break;
 494                 case ($ordChrsC & 0xFC) == 0xF8:
 495                     // characters U-00200000 - U-03FFFFFF, mask 111110XX
 496                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 497                     $utf8 .= substr($chrs, $i, 5);
 498                     $i += 4;
 499                     break;
 500                 case ($ordChrsC & 0xFE) == 0xFC:
 501                     // characters U-04000000 - U-7FFFFFFF, mask 1111110X
 502                     // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 503                     $utf8 .= substr($chrs, $i, 6);
 504                     $i += 5;
 505                     break;
 506             }
 507         }
 508
 509         return $utf8;
 510     }
 511
 512     /**
 513      * Convert a string from one UTF-16 char to one UTF-8 char.
 514      *
 515      * Normally should be handled by mb_convert_encoding, but
 516      * provides a slower PHP-only method for installations
 517      * that lack the multibyte string extension.
 518      *
 519      * This method is from the Solar Framework by Paul M. Jones
 520      *
 521      * @link   http://solarphp.com
 522      * @param  string $utf16 UTF-16 character
 523      * @return string UTF-8 character
 524      */
 525     protected static function _utf162utf8($utf16)
 526     {
 527         // Check for mb extension otherwise do by hand.
 528         if (function_exists('mb_convert_encoding')) {
 529             return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
 530         }
 531
 532         $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
 533
 534         switch (true) {
 535             case ((0x7F & $bytes) == $bytes):
 536                 // this case should never be reached, because we are in ASCII range
 537                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 538                 return chr(0x7F & $bytes);
 539
 540             case (0x07FF & $bytes) == $bytes:
 541                 // return a 2-byte UTF-8 character
 542                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 543                 return chr(0xC0 | (($bytes >> 6) & 0x1F))
 544                      . chr(0x80 | ($bytes & 0x3F));
 545
 546             case (0xFFFF & $bytes) == $bytes:
 547                 // return a 3-byte UTF-8 character
 548                 // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
 549                 return chr(0xE0 | (($bytes >> 12) & 0x0F))
 550                      . chr(0x80 | (($bytes >> 6) & 0x3F))
 551                      . chr(0x80 | ($bytes & 0x3F));
 552         }
 553
 554         // ignoring UTF-32 for now, sorry
 555         return '';
 556     }
 557 }