Hotfix Release 2017-02-19c "Frusterick Manners"
[dokuwiki.git] / inc / JSON.php
blobe01488e14690e9bdfff46df0fed71225f2cf7af9
1 <?php
2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
4 /**
5 * Converts to and from JSON format.
7 * JSON (JavaScript Object Notation) is a lightweight data-interchange
8 * format. It is easy for humans to read and write. It is easy for machines
9 * to parse and generate. It is based on a subset of the JavaScript
10 * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
11 * This feature can also be found in Python. JSON is a text format that is
12 * completely language independent but uses conventions that are familiar
13 * to programmers of the C-family of languages, including C, C++, C#, Java,
14 * JavaScript, Perl, TCL, and many others. These properties make JSON an
15 * ideal data-interchange language.
17 * This package provides a simple encoder and decoder for JSON notation. It
18 * is intended for use with client-side Javascript applications that make
19 * use of HTTPRequest to perform server communication functions - data can
20 * be encoded into JSON notation for use in a client-side javascript, or
21 * decoded from incoming Javascript requests. JSON format is native to
22 * Javascript, and can be directly eval()'ed with no further parsing
23 * overhead
25 * All strings should be in ASCII or UTF-8 format!
27 * PHP versions 4 and 5
29 * LICENSE: Redistribution and use in source and binary forms, with or
30 * without modification, are permitted provided that the following
31 * conditions are met: Redistributions of source code must retain the
32 * above copyright notice, this list of conditions and the following
33 * disclaimer. Redistributions in binary form must reproduce the above
34 * copyright notice, this list of conditions and the following disclaimer
35 * in the documentation and/or other materials provided with the
36 * distribution.
38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
40 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
41 * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
42 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
44 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
46 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
47 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
48 * DAMAGE.
50 * @author Michal Migurski <mike-json@teczno.com>
51 * @author Matt Knapp <mdknapp[at]gmail[dot]com>
52 * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
53 * @copyright 2005 Michal Migurski
54 * @license http://www.freebsd.org/copyright/freebsd-license.html
55 * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
58 // for DokuWiki
59 if(!defined('DOKU_INC')) die('meh.');
61 /**
62 * Marker constant for JSON::decode(), used to flag stack state
64 define('JSON_SLICE', 1);
66 /**
67 * Marker constant for JSON::decode(), used to flag stack state
69 define('JSON_IN_STR', 2);
71 /**
72 * Marker constant for JSON::decode(), used to flag stack state
74 define('JSON_IN_ARR', 4);
76 /**
77 * Marker constant for JSON::decode(), used to flag stack state
79 define('JSON_IN_OBJ', 8);
81 /**
82 * Marker constant for JSON::decode(), used to flag stack state
84 define('JSON_IN_CMT', 16);
86 /**
87 * Behavior switch for JSON::decode()
89 define('JSON_LOOSE_TYPE', 10);
91 /**
92 * Behavior switch for JSON::decode()
94 define('JSON_STRICT_TYPE', 11);
96 /**
97 * Converts to and from JSON format.
99 class JSON {
102 * Disables the use of PHP5's native json_decode()
104 * You shouldn't change this usually because the native function is much
105 * faster. However, this non-native will also parse slightly broken JSON
106 * which might be handy when talking to a non-conform endpoint
108 public $skipnative = false;
111 * constructs a new JSON instance
113 * @param int $use object behavior: when encoding or decoding,
114 * be loose or strict about object/array usage
116 * possible values:
117 * JSON_STRICT_TYPE - strict typing, default
118 * "{...}" syntax creates objects in decode.
119 * JSON_LOOSE_TYPE - loose typing
120 * "{...}" syntax creates associative arrays in decode.
122 function __construct($use=JSON_STRICT_TYPE) {
123 $this->use = $use;
127 * encodes an arbitrary variable into JSON format
128 * If available the native PHP JSON implementation is used.
130 * @param mixed $var any number, boolean, string, array, or object to be encoded.
131 * see argument 1 to JSON() above for array-parsing behavior.
132 * if var is a strng, note that encode() always expects it
133 * to be in ASCII or UTF-8 format!
135 * @return string JSON string representation of input var
136 * @access public
138 function encode($var) {
139 if (!$this->skipnative && function_exists('json_encode')){
140 return json_encode($var);
142 switch (gettype($var)) {
143 case 'boolean':
144 return $var ? 'true' : 'false';
146 case 'NULL':
147 return 'null';
149 case 'integer':
150 return sprintf('%d', $var);
152 case 'double':
153 case 'float':
154 return sprintf('%f', $var);
156 case 'string':
157 // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
158 $ascii = '';
159 $strlen_var = strlen($var);
162 * Iterate over every character in the string,
163 * escaping with a slash or encoding to UTF-8 where necessary
165 for ($c = 0; $c < $strlen_var; ++$c) {
167 $ord_var_c = ord($var{$c});
169 switch ($ord_var_c) {
170 case 0x08:
171 $ascii .= '\b';
172 break;
173 case 0x09:
174 $ascii .= '\t';
175 break;
176 case 0x0A:
177 $ascii .= '\n';
178 break;
179 case 0x0C:
180 $ascii .= '\f';
181 break;
182 case 0x0D:
183 $ascii .= '\r';
184 break;
186 case 0x22:
187 case 0x2F:
188 case 0x5C:
189 // double quote, slash, slosh
190 $ascii .= '\\'.$var{$c};
191 break;
193 case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
194 // characters U-00000000 - U-0000007F (same as ASCII)
195 $ascii .= $var{$c};
196 break;
198 case (($ord_var_c & 0xE0) == 0xC0):
199 // characters U-00000080 - U-000007FF, mask 110XXXXX
200 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
201 $char = pack('C*', $ord_var_c, ord($var{$c+1}));
202 $c+=1;
203 //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
204 $utf16 = utf8_to_utf16be($char);
205 $ascii .= sprintf('\u%04s', bin2hex($utf16));
206 break;
208 case (($ord_var_c & 0xF0) == 0xE0):
209 // characters U-00000800 - U-0000FFFF, mask 1110XXXX
210 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
211 $char = pack('C*', $ord_var_c,
212 ord($var{$c+1}),
213 ord($var{$c+2}));
214 $c+=2;
215 //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
216 $utf16 = utf8_to_utf16be($char);
217 $ascii .= sprintf('\u%04s', bin2hex($utf16));
218 break;
220 case (($ord_var_c & 0xF8) == 0xF0):
221 // characters U-00010000 - U-001FFFFF, mask 11110XXX
222 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
223 $char = pack('C*', $ord_var_c,
224 ord($var{$c+1}),
225 ord($var{$c+2}),
226 ord($var{$c+3}));
227 $c+=3;
228 //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
229 $utf16 = utf8_to_utf16be($char);
230 $ascii .= sprintf('\u%04s', bin2hex($utf16));
231 break;
233 case (($ord_var_c & 0xFC) == 0xF8):
234 // characters U-00200000 - U-03FFFFFF, mask 111110XX
235 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
236 $char = pack('C*', $ord_var_c,
237 ord($var{$c+1}),
238 ord($var{$c+2}),
239 ord($var{$c+3}),
240 ord($var{$c+4}));
241 $c+=4;
242 //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
243 $utf16 = utf8_to_utf16be($char);
244 $ascii .= sprintf('\u%04s', bin2hex($utf16));
245 break;
247 case (($ord_var_c & 0xFE) == 0xFC):
248 // characters U-04000000 - U-7FFFFFFF, mask 1111110X
249 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
250 $char = pack('C*', $ord_var_c,
251 ord($var{$c+1}),
252 ord($var{$c+2}),
253 ord($var{$c+3}),
254 ord($var{$c+4}),
255 ord($var{$c+5}));
256 $c+=5;
257 //$utf16 = mb_convert_encoding($char, 'UTF-16', 'UTF-8');
258 $utf16 = utf8_to_utf16be($char);
259 $ascii .= sprintf('\u%04s', bin2hex($utf16));
260 break;
264 return '"'.$ascii.'"';
266 case 'array':
268 * As per JSON spec if any array key is not an integer
269 * we must treat the the whole array as an object. We
270 * also try to catch a sparsely populated associative
271 * array with numeric keys here because some JS engines
272 * will create an array with empty indexes up to
273 * max_index which can cause memory issues and because
274 * the keys, which may be relevant, will be remapped
275 * otherwise.
277 * As per the ECMA and JSON specification an object may
278 * have any string as a property. Unfortunately due to
279 * a hole in the ECMA specification if the key is a
280 * ECMA reserved word or starts with a digit the
281 * parameter is only accessible using ECMAScript's
282 * bracket notation.
285 // treat as a JSON object
286 if (is_array($var) && count($var) && (array_keys($var) !== range(0, count($var) - 1))) {
287 return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
288 array_keys($var),
289 array_values($var))));
292 // treat it like a regular array
293 return sprintf('[%s]', join(',', array_map(array($this, 'encode'), $var)));
295 case 'object':
296 $vars = get_object_vars($var);
297 return sprintf('{%s}', join(',', array_map(array($this, 'name_value'),
298 array_keys($vars),
299 array_values($vars))));
301 default:
302 return '';
307 * encodes an arbitrary variable into JSON format, alias for encode()
309 function enc($var) {
310 return $this->encode($var);
313 /** function name_value
314 * array-walking function for use in generating JSON-formatted name-value pairs
316 * @param string $name name of key to use
317 * @param mixed $value reference to an array element to be encoded
319 * @return string JSON-formatted name-value pair, like '"name":value'
320 * @access private
322 function name_value($name, $value) {
323 return (sprintf("%s:%s", $this->encode(strval($name)), $this->encode($value)));
327 * reduce a string by removing leading and trailing comments and whitespace
329 * @param $str string string value to strip of comments and whitespace
331 * @return string string value stripped of comments and whitespace
332 * @access private
334 function reduce_string($str) {
335 $str = preg_replace(array(
337 // eliminate single line comments in '// ...' form
338 '#^\s*//(.+)$#m',
340 // eliminate multi-line comments in '/* ... */' form, at start of string
341 '#^\s*/\*(.+)\*/#Us',
343 // eliminate multi-line comments in '/* ... */' form, at end of string
344 '#/\*(.+)\*/\s*$#Us'
346 ), '', $str);
348 // eliminate extraneous space
349 return trim($str);
353 * decodes a JSON string into appropriate variable
354 * If available the native PHP JSON implementation is used.
356 * @param string $str JSON-formatted string
358 * @return mixed number, boolean, string, array, or object
359 * corresponding to given JSON input string.
360 * See argument 1 to JSON() above for object-output behavior.
361 * Note that decode() always returns strings
362 * in ASCII or UTF-8 format!
363 * @access public
365 function decode($str) {
366 if (!$this->skipnative && function_exists('json_decode')){
367 return json_decode($str,($this->use == JSON_LOOSE_TYPE));
370 $str = $this->reduce_string($str);
372 switch (strtolower($str)) {
373 case 'true':
374 return true;
376 case 'false':
377 return false;
379 case 'null':
380 return null;
382 default:
383 if (is_numeric($str)) {
384 // Lookie-loo, it's a number
386 // This would work on its own, but I'm trying to be
387 // good about returning integers where appropriate:
388 // return (float)$str;
390 // Return float or int, as appropriate
391 return ((float)$str == (integer)$str)
392 ? (integer)$str
393 : (float)$str;
395 } elseif (preg_match('/^("|\').+("|\')$/s', $str, $m) && $m[1] == $m[2]) {
396 // STRINGS RETURNED IN UTF-8 FORMAT
397 $delim = substr($str, 0, 1);
398 $chrs = substr($str, 1, -1);
399 $utf8 = '';
400 $strlen_chrs = strlen($chrs);
402 for ($c = 0; $c < $strlen_chrs; ++$c) {
404 $substr_chrs_c_2 = substr($chrs, $c, 2);
405 $ord_chrs_c = ord($chrs{$c});
407 switch ($substr_chrs_c_2) {
408 case '\b':
409 $utf8 .= chr(0x08);
410 $c+=1;
411 break;
412 case '\t':
413 $utf8 .= chr(0x09);
414 $c+=1;
415 break;
416 case '\n':
417 $utf8 .= chr(0x0A);
418 $c+=1;
419 break;
420 case '\f':
421 $utf8 .= chr(0x0C);
422 $c+=1;
423 break;
424 case '\r':
425 $utf8 .= chr(0x0D);
426 $c+=1;
427 break;
429 case '\\"':
430 case '\\\'':
431 case '\\\\':
432 case '\\/':
433 if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
434 ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
435 $utf8 .= $chrs{++$c};
437 break;
439 default:
440 if (preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6))) {
441 // single, escaped unicode character
442 $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
443 . chr(hexdec(substr($chrs, ($c+4), 2)));
444 //$utf8 .= mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
445 $utf8 .= utf16be_to_utf8($utf16);
446 $c+=5;
448 } elseif(($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F)) {
449 $utf8 .= $chrs{$c};
451 } elseif(($ord_chrs_c & 0xE0) == 0xC0) {
452 // characters U-00000080 - U-000007FF, mask 110XXXXX
453 //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
454 $utf8 .= substr($chrs, $c, 2);
455 $c += 1;
457 } elseif(($ord_chrs_c & 0xF0) == 0xE0) {
458 // characters U-00000800 - U-0000FFFF, mask 1110XXXX
459 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
460 $utf8 .= substr($chrs, $c, 3);
461 $c += 2;
463 } elseif(($ord_chrs_c & 0xF8) == 0xF0) {
464 // characters U-00010000 - U-001FFFFF, mask 11110XXX
465 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
466 $utf8 .= substr($chrs, $c, 4);
467 $c += 3;
469 } elseif(($ord_chrs_c & 0xFC) == 0xF8) {
470 // characters U-00200000 - U-03FFFFFF, mask 111110XX
471 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
472 $utf8 .= substr($chrs, $c, 5);
473 $c += 4;
475 } elseif(($ord_chrs_c & 0xFE) == 0xFC) {
476 // characters U-04000000 - U-7FFFFFFF, mask 1111110X
477 // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
478 $utf8 .= substr($chrs, $c, 6);
479 $c += 5;
482 break;
488 return $utf8;
490 } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
491 // array, or object notation
493 if ($str{0} == '[') {
494 $stk = array(JSON_IN_ARR);
495 $arr = array();
496 } else {
497 if ($this->use == JSON_LOOSE_TYPE) {
498 $stk = array(JSON_IN_OBJ);
499 $obj = array();
500 } else {
501 $stk = array(JSON_IN_OBJ);
502 $obj = new stdClass();
506 array_push($stk, array('what' => JSON_SLICE,
507 'where' => 0,
508 'delim' => false));
510 $chrs = substr($str, 1, -1);
511 $chrs = $this->reduce_string($chrs);
513 if ($chrs == '') {
514 if (reset($stk) == JSON_IN_ARR) {
515 return $arr;
517 } else {
518 return $obj;
523 //print("\nparsing {$chrs}\n");
525 $strlen_chrs = strlen($chrs);
527 for ($c = 0; $c <= $strlen_chrs; ++$c) {
529 $top = end($stk);
530 $substr_chrs_c_2 = substr($chrs, $c, 2);
532 if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == JSON_SLICE))) {
533 // found a comma that is not inside a string, array, etc.,
534 // OR we've reached the end of the character list
535 $slice = substr($chrs, $top['where'], ($c - $top['where']));
536 array_push($stk, array('what' => JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
537 //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
539 if (reset($stk) == JSON_IN_ARR) {
540 // we are in an array, so just push an element onto the stack
541 array_push($arr, $this->decode($slice));
543 } elseif (reset($stk) == JSON_IN_OBJ) {
544 // we are in an object, so figure
545 // out the property name and set an
546 // element in an associative array,
547 // for now
548 if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
549 // "name":value pair
550 $key = $this->decode($parts[1]);
551 $val = $this->decode($parts[2]);
553 if ($this->use == JSON_LOOSE_TYPE) {
554 $obj[$key] = $val;
555 } else {
556 $obj->$key = $val;
558 } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
559 // name:value pair, where name is unquoted
560 $key = $parts[1];
561 $val = $this->decode($parts[2]);
563 if ($this->use == JSON_LOOSE_TYPE) {
564 $obj[$key] = $val;
565 } else {
566 $obj->$key = $val;
572 } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != JSON_IN_STR)) {
573 // found a quote, and we are not inside a string
574 array_push($stk, array('what' => JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
575 //print("Found start of string at {$c}\n");
577 } elseif (($chrs{$c} == $top['delim']) &&
578 ($top['what'] == JSON_IN_STR) &&
579 ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
580 // found a quote, we're in a string, and it's not escaped
581 // we know that it's not escaped becase there is _not_ an
582 // odd number of backslashes at the end of the string so far
583 array_pop($stk);
584 //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
586 } elseif (($chrs{$c} == '[') &&
587 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
588 // found a left-bracket, and we are in an array, object, or slice
589 array_push($stk, array('what' => JSON_IN_ARR, 'where' => $c, 'delim' => false));
590 //print("Found start of array at {$c}\n");
592 } elseif (($chrs{$c} == ']') && ($top['what'] == JSON_IN_ARR)) {
593 // found a right-bracket, and we're in an array
594 array_pop($stk);
595 //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
597 } elseif (($chrs{$c} == '{') &&
598 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
599 // found a left-brace, and we are in an array, object, or slice
600 array_push($stk, array('what' => JSON_IN_OBJ, 'where' => $c, 'delim' => false));
601 //print("Found start of object at {$c}\n");
603 } elseif (($chrs{$c} == '}') && ($top['what'] == JSON_IN_OBJ)) {
604 // found a right-brace, and we're in an object
605 array_pop($stk);
606 //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
608 } elseif (($substr_chrs_c_2 == '/*') &&
609 in_array($top['what'], array(JSON_SLICE, JSON_IN_ARR, JSON_IN_OBJ))) {
610 // found a comment start, and we are in an array, object, or slice
611 array_push($stk, array('what' => JSON_IN_CMT, 'where' => $c, 'delim' => false));
612 $c++;
613 //print("Found start of comment at {$c}\n");
615 } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == JSON_IN_CMT)) {
616 // found a comment end, and we're in one now
617 array_pop($stk);
618 $c++;
620 for ($i = $top['where']; $i <= $c; ++$i)
621 $chrs = substr_replace($chrs, ' ', $i, 1);
623 //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
629 if (reset($stk) == JSON_IN_ARR) {
630 return $arr;
632 } elseif (reset($stk) == JSON_IN_OBJ) {
633 return $obj;
642 * decodes a JSON string into appropriate variable; alias for decode()
644 function dec($var) {
645 return $this->decode($var);