6 Copyright (c) 2005 JSON.org
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice shall be included in all
16 copies or substantial portions of the Software.
18 The Software shall be used for Good, not Evil.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 #include "hphp/runtime/ext/json/JSON_parser.h"
32 #include "hphp/runtime/base/complex-types.h"
33 #include "hphp/runtime/base/type-conversions.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/utf8-decode.h"
36 #include "hphp/system/systemlib.h"
37 #include "hphp/runtime/base/thread-init-fini.h"
38 #include "hphp/runtime/ext/json/ext_json.h"
39 #include "hphp/runtime/ext/ext_collections.h"
41 #define MAX_LENGTH_OF_LONG 20
42 static const char long_min_digits
[] = "9223372036854775808";
58 Characters are mapped into these 32 symbol classes. This allows for
59 significant reductions in the size of the state transition table.
68 /* other whitespace */
155 /* everything else */
160 This table maps the 128 ASCII characters into the 32 character classes.
161 The remaining Unicode characters should be mapped to S_ETC.
163 static const int ascii_class
[128] = {
164 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
165 S_ERR
, S_WSP
, S_WSP
, S_ERR
, S_ERR
, S_WSP
, S_ERR
, S_ERR
,
166 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
167 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
169 S_SPA
, S_ETC
, S_QUO
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
170 S_ETC
, S_ETC
, S_ETC
, S_PLU
, S_COM
, S_MIN
, S_DOT
, S_SLA
,
171 S_ZER
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
,
172 S_DIG
, S_DIG
, S_COL
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
174 S_ETC
, S_A_F
, S_A_F
, S_A_F
, S_A_F
, S_E
, S_A_F
, S_ETC
,
175 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
176 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
177 S_ETC
, S_ETC
, S_ETC
, S_LBT
, S_BAC
, S_RBT
, S_ETC
, S_ETC
,
179 S_ETC
, S__A_
, S__B_
, S__C_
, S__D_
, S__E_
, S__F_
, S_ETC
,
180 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S__L_
, S_ETC
, S__N_
, S_ETC
,
181 S_ETC
, S_ETC
, S__R_
, S__S_
, S__T_
, S__U_
, S_ETC
, S_ETC
,
182 S_ETC
, S_ETC
, S_ETC
, S_LBE
, S_ETC
, S_RBE
, S_ETC
, S_ETC
186 static const int loose_ascii_class
[128] = {
187 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
188 S_ERR
, S_WSP
, S_WSP
, S_ERR
, S_ERR
, S_WSP
, S_ERR
, S_ERR
,
189 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
190 S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
, S_ERR
,
192 S_SPA
, S_ETC
, S_QUO
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_QUO
,
193 S_ETC
, S_ETC
, S_ETC
, S_PLU
, S_COM
, S_MIN
, S_DOT
, S_SLA
,
194 S_ZER
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
, S_DIG
,
195 S_DIG
, S_DIG
, S_COL
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
197 S_ETC
, S_A_F
, S_A_F
, S_A_F
, S_A_F
, S_E
, S_A_F
, S_ETC
,
198 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
199 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
, S_ETC
,
200 S_ETC
, S_ETC
, S_ETC
, S_LBT
, S_BAC
, S_RBT
, S_ETC
, S_ETC
,
202 S_ETC
, S__A_
, S__B_
, S__C_
, S__D_
, S__E_
, S__F_
, S_ETC
,
203 S_ETC
, S_ETC
, S_ETC
, S_ETC
, S__L_
, S_ETC
, S__N_
, S_ETC
,
204 S_ETC
, S_ETC
, S__R_
, S__S_
, S__T_
, S__U_
, S_ETC
, S_ETC
,
205 S_ETC
, S_ETC
, S_ETC
, S_LBE
, S_ETC
, S_RBE
, S_ETC
, S_ETC
212 The state transition table takes the current state and the current symbol,
213 and returns either a new state or an action. A new state is a number between
214 0 and 29. An action is a negative number between -1 and -9. A JSON text is
215 accepted if the end of the text is in state 9 and mode is MODE_DONE.
217 static const int state_transition_table
[30][31] = {
218 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
219 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
220 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
221 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
222 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
223 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
224 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
225 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
226 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
227 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
228 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
229 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
230 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
231 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
232 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
233 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
234 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
235 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
236 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
237 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
238 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
239 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
240 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
241 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
242 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
243 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
244 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
245 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
246 /*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
247 /*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
252 Alternate "loose" transition table to support unquoted keys.
254 static const int loose_state_transition_table
[31][31] = {
255 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
256 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
257 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
258 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
259 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
260 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
261 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
262 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
263 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
264 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
265 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
266 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
267 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
268 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
269 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
270 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
271 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
272 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
273 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
274 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
275 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
276 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
277 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
278 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
279 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
280 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
281 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
282 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
283 /*28*/ {28,28,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
284 /*29*/ {29,29,-1,-7,-1,-1,-1,-7, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
285 /*30*/ {30,-1,30,30,30,30,-10,30,-4,4,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}
290 #define JSON_PARSER_DEFAULT_DEPTH 512
293 * A stack maintains the states of nested structures.
296 std::vector
<int> the_stack
;
297 std::vector
<Variant
> the_zstack
;
298 std::vector
<String
> the_kstack
;
300 int the_mark
; // the watermark
302 json_error_codes error_code
;
303 json_parser() : the_stack(JSON_PARSER_DEFAULT_DEPTH
),
304 the_zstack(JSON_PARSER_DEFAULT_DEPTH
),
305 the_kstack(JSON_PARSER_DEFAULT_DEPTH
) {};
309 IMPLEMENT_THREAD_LOCAL(json_parser
, s_json_parser
);
311 // In Zend, the json_parser struct is publicly
312 // accessible. Thus the fields could be accessed
313 // directly. Just using setter/accessor functions
314 // to get around that.
315 json_error_codes
json_get_last_error_code() {
316 return s_json_parser
->error_code
;
318 void json_set_last_error_code(json_error_codes ec
) {
319 s_json_parser
->error_code
= ec
;
322 const char *json_get_last_error_msg() {
323 switch (s_json_parser
->error_code
) {
324 case JSON_ERROR_NONE
:
326 case JSON_ERROR_DEPTH
:
327 return "Maximum stack depth exceeded";
328 case JSON_ERROR_STATE_MISMATCH
:
329 return "State mismatch (invalid or malformed JSON)";
330 case JSON_ERROR_CTRL_CHAR
:
331 return "Control character error, possibly incorrectly encoded";
332 case JSON_ERROR_SYNTAX
:
333 return "Syntax error";
334 case JSON_ERROR_UTF8
:
335 return "Malformed UTF-8 characters, possibly incorrectly encoded";
337 return "Unknown error";
341 // For each request, make sure we start with the default error code.
342 // Inline the function to do that reset.
343 static InitFiniNode
init(
344 []{ s_json_parser
->error_code
= JSON_ERROR_NONE
; },
345 InitFiniNode::When::ThreadInit
348 class JsonParserCleaner
{
350 explicit JsonParserCleaner(json_parser
*json
) : m_json(json
) {}
351 ~JsonParserCleaner() {
352 for (int i
= 0; i
<= m_json
->the_mark
; i
++) {
353 m_json
->the_zstack
[i
].unset();
354 m_json
->the_kstack
[i
].reset();
362 * These modes can be pushed on the PDA stack.
366 #define MODE_OBJECT 3
370 * Push a mode onto the stack. Return false if there is overflow.
372 static int push(json_parser
*json
, int mode
) {
373 if (json
->the_top
+ 1 >= json
->depth
) {
377 json
->the_stack
[json
->the_top
] = mode
;
378 if (json
->the_top
> json
->the_mark
) {
379 json
->the_mark
= json
->the_top
;
386 * Pop the stack, assuring that the current mode matches the expectation.
387 * Return false if there is underflow or if the modes mismatch.
389 static int pop(json_parser
*json
, int mode
) {
390 if (json
->the_top
< 0 || json
->the_stack
[json
->the_top
] != mode
) {
393 json
->the_stack
[json
->the_top
] = 0;
398 static int dehexchar(char c
) {
399 if (c
>= '0' && c
<= '9') return c
- '0';
400 if (c
>= 'A' && c
<= 'F') return c
- ('A' - 10);
401 if (c
>= 'a' && c
<= 'f') return c
- ('a' - 10);
405 static void json_create_zval(Variant
&z
, StringBuffer
&buf
, int type
,
411 const char *p
= buf
.data();
418 bool neg
= *buf
.data() == '-';
420 int len
= buf
.size();
422 if (len
>= MAX_LENGTH_OF_LONG
- 1) {
423 if (len
== MAX_LENGTH_OF_LONG
- 1) {
424 int cmp
= strcmp(p
+ (neg
? 1 : 0), long_min_digits
);
425 if (!(cmp
< 0 || (cmp
== 0 && neg
))) {
434 if (options
& k_JSON_BIGINT_AS_STRING
) {
440 z
= int64_t(strtoll(buf
.data(), nullptr, 10));
445 z
= buf
.data() ? strtod(buf
.data(), NULL
) : 0.0;
451 z
= (buf
.data() && (*buf
.data() == 't'));
459 void utf16_to_utf8(StringBuffer
&buf
, unsigned short utf16
) {
461 buf
.append((char)utf16
);
462 } else if (utf16
< 0x800) {
463 buf
.append((char)(0xc0 | (utf16
>> 6)));
464 buf
.append((char)(0x80 | (utf16
& 0x3f)));
465 } else if ((utf16
& 0xfc00) == 0xdc00
467 && ((unsigned char)buf
.data()[buf
.size() - 3]) == 0xed
468 && ((unsigned char)buf
.data()[buf
.size() - 2] & 0xf0) == 0xa0
469 && ((unsigned char)buf
.data()[buf
.size() - 1] & 0xc0) == 0x80) {
470 /* found surrogate pair */
473 utf32
= (((buf
.data()[buf
.size() - 2] & 0xf) << 16)
474 | ((buf
.data()[buf
.size() - 1] & 0x3f) << 10)
475 | (utf16
& 0x3ff)) + 0x10000;
476 buf
.resize(buf
.size() - 3);
478 buf
.append((char)(0xf0 | (utf32
>> 18)));
479 buf
.append((char)(0x80 | ((utf32
>> 12) & 0x3f)));
480 buf
.append((char)(0x80 | ((utf32
>> 6) & 0x3f)));
481 buf
.append((char)(0x80 | (utf32
& 0x3f)));
483 buf
.append((char)(0xe0 | (utf16
>> 12)));
484 buf
.append((char)(0x80 | ((utf16
>> 6) & 0x3f)));
485 buf
.append((char)(0x80 | (utf16
& 0x3f)));
489 StaticString
s__empty_("_empty_");
491 static void object_set(Variant
&var
,
493 const Variant
& value
,
497 // We know it is stdClass, and everything is public (and dynamic).
499 var
.getObjectData()->o_set(s__empty_
, value
);
501 var
.getObjectData()->o_set(key
, value
);
505 auto keyTV
= make_tv
<KindOfString
>(key
.get());
506 collectionSet(var
.getObjectData(), &keyTV
, cvarToCell(&value
));
508 forceToArray(var
).set(key
, value
);
513 static void attach_zval(json_parser
*json
,
517 if (json
->the_top
< 1) {
521 Variant
&root
= json
->the_zstack
[json
->the_top
- 1];
522 Variant
&child
= json
->the_zstack
[json
->the_top
];
523 int up_mode
= json
->the_stack
[json
->the_top
- 1];
525 if (up_mode
== MODE_ARRAY
) {
527 collectionAppend(root
.getObjectData(), child
.asCell());
529 root
.toArrRef().append(child
);
531 } else if (up_mode
== MODE_OBJECT
) {
532 object_set(root
, key
, child
, assoc
, collections
);
536 #define SWAP_BUFFERS(from, to) do { \
537 StringBuffer *tmp = from; \
541 #define JSON_RESET_TYPE() do { type = -1; } while(0);
544 * The JSON_parser takes a UTF-8 encoded string and determines if it is a
545 * syntactically correct JSON text. Along the way, it creates a PHP variable.
547 * It is implemented as a Pushdown Automaton; that means it is a finite state
548 * machine with a stack.
550 bool JSON_parser(Variant
&z
, const char *p
, int length
, bool const assoc
,
551 int depth
, int64_t options
) {
552 int b
; /* the next character */
553 int c
; /* the next character class */
554 int s
; /* the next state */
555 json_parser
*the_json
= s_json_parser
.get(); /* the parser state */
556 JsonParserCleaner
cleaner(the_json
);
560 bool const loose
= options
& k_JSON_FB_LOOSE
;
561 bool const stable_maps
= options
& k_JSON_FB_STABLE_MAPS
;
562 bool const collections
= stable_maps
|| (options
& k_JSON_FB_COLLECTIONS
);
564 int const *byte_class
;
566 byte_class
= loose_ascii_class
;
568 byte_class
= ascii_class
;
572 StringBuffer
sb_buf(127), sb_key(127);
573 StringBuffer
*buf
= &sb_buf
;
574 StringBuffer
*key
= &sb_key
;
577 unsigned short utf16
= 0;
579 the_json
->depth
= depth
;
580 // Since the stack is maintainined on a per request basis, for performance
581 // reasons, it only makes sense to expand if necessary and cycles are wasted
582 // contracting. Calls with a depth other than default should be rare.
583 if (depth
> the_json
->the_stack
.size()) {
584 the_json
->the_stack
.resize(depth
);
585 the_json
->the_zstack
.resize(depth
);
586 the_json
->the_kstack
.resize(depth
);
589 the_json
->the_mark
= the_json
->the_top
= -1;
590 push(the_json
, MODE_DONE
);
592 UTF8To16Decoder
decoder(p
, length
, loose
);
594 b
= decoder
.decode();
595 if (b
== UTF8_END
) break; // UTF-8 decoding finishes successfully.
596 if (b
== UTF8_ERROR
) {
597 s_json_parser
->error_code
= JSON_ERROR_UTF8
;
602 if ((b
& 127) == b
) {
607 s_json_parser
->error_code
= JSON_ERROR_CTRL_CHAR
;
614 Get the next state from the transition table.
619 s
= loose_state_transition_table
[the_state
][c
];
621 s
= state_transition_table
[the_state
][c
];
635 Perform one of the predefined actions.
642 attach_zval(the_json
, the_json
->the_kstack
[the_json
->the_top
], assoc
,
645 if (!pop(the_json
, MODE_KEY
)) {
654 if (!push(the_json
, MODE_KEY
)) {
655 s_json_parser
->error_code
= JSON_ERROR_DEPTH
;
660 if (the_json
->the_top
> 0) {
661 Variant
&top
= the_json
->the_zstack
[the_json
->the_top
];
662 if (the_json
->the_top
== 1) {
669 // stable_maps is meaningless
670 top
= NEWOBJ(c_Map
)();
674 top
= SystemLib::AllocStdClassObject();
676 top
= Array::Create();
681 the_json
->the_kstack
[the_json
->the_top
] = key
->detach();
689 /*** BEGIN Facebook: json_utf8_loose ***/
691 If this is a trailing comma in an object definition,
692 we're in MODE_KEY. In that case, throw that off the
693 stack and restore MODE_OBJECT so that we pretend the
694 trailing comma just didn't happen.
697 if (pop(the_json
, MODE_KEY
)) {
698 push(the_json
, MODE_OBJECT
);
701 /*** END Facebook: json_utf8_loose ***/
704 the_json
->the_stack
[the_json
->the_top
] == MODE_OBJECT
) {
706 json_create_zval(mval
, *buf
, type
, options
);
707 Variant
&top
= the_json
->the_zstack
[the_json
->the_top
];
708 object_set(top
, key
->detach(), mval
, assoc
, collections
);
713 attach_zval(the_json
, the_json
->the_kstack
[the_json
->the_top
],
716 if (!pop(the_json
, MODE_OBJECT
)) {
717 s_json_parser
->error_code
= JSON_ERROR_STATE_MISMATCH
;
726 if (!push(the_json
, MODE_ARRAY
)) {
727 s_json_parser
->error_code
= JSON_ERROR_DEPTH
;
732 if (the_json
->the_top
> 0) {
733 Variant
&top
= the_json
->the_zstack
[the_json
->the_top
];
734 if (the_json
->the_top
== 1) {
741 top
= NEWOBJ(c_Vector
)();
743 top
= Array::Create();
746 the_json
->the_kstack
[the_json
->the_top
] = key
->detach();
756 the_json
->the_stack
[the_json
->the_top
] == MODE_ARRAY
) {
758 json_create_zval(mval
, *buf
, type
, options
);
759 auto& top
= the_json
->the_zstack
[the_json
->the_top
];
761 collectionAppend(top
.getObjectData(), mval
.asCell());
763 top
.toArrRef().append(mval
);
769 attach_zval(the_json
, the_json
->the_kstack
[the_json
->the_top
],
772 if (!pop(the_json
, MODE_ARRAY
)) {
773 s_json_parser
->error_code
= JSON_ERROR_STATE_MISMATCH
;
783 switch (the_json
->the_stack
[the_json
->the_top
]) {
786 SWAP_BUFFERS(buf
, key
);
794 if (type
== KindOfString
) {
799 /* fall through if not KindOfString */
801 s_json_parser
->error_code
= JSON_ERROR_SYNTAX
;
812 (the_json
->the_stack
[the_json
->the_top
] == MODE_OBJECT
||
813 the_json
->the_stack
[the_json
->the_top
] == MODE_ARRAY
)) {
814 json_create_zval(mval
, *buf
, type
, options
);
817 switch (the_json
->the_stack
[the_json
->the_top
]) {
819 if (pop(the_json
, MODE_OBJECT
) &&
820 push(the_json
, MODE_KEY
)) {
822 Variant
&top
= the_json
->the_zstack
[the_json
->the_top
];
823 object_set(top
, key
->detach(), mval
, assoc
, collections
);
830 auto& top
= the_json
->the_zstack
[the_json
->the_top
];
832 collectionAppend(top
.getObjectData(), mval
.asCell());
834 top
.toArrRef().append(mval
);
840 s_json_parser
->error_code
= JSON_ERROR_SYNTAX
;
850 : (after unquoted string)
853 if (the_json
->the_stack
[the_json
->the_top
] == MODE_KEY
) {
855 SWAP_BUFFERS(buf
, key
);
868 if (pop(the_json
, MODE_KEY
) && push(the_json
, MODE_OBJECT
)) {
876 s_json_parser
->error_code
= JSON_ERROR_SYNTAX
;
881 Change the state and iterate.
883 if (type
== KindOfString
) {
884 if (/*<fb>*/(/*</fb>*/s
== 3/*<fb>*/ || s
== 30)/*</fb>*/ &&
886 if (the_state
!= 4) {
887 utf16_to_utf8(*buf
, b
);
890 case 'b': buf
->append('\b'); break;
891 case 't': buf
->append('\t'); break;
892 case 'n': buf
->append('\n'); break;
893 case 'f': buf
->append('\f'); break;
894 case 'r': buf
->append('\r'); break;
896 utf16_to_utf8(*buf
, b
);
901 utf16
= dehexchar(b
) << 12;
903 utf16
+= dehexchar(b
) << 8;
905 utf16
+= dehexchar(b
) << 4;
906 } else if (s
== 3 && the_state
== 8) {
907 utf16
+= dehexchar(b
);
908 utf16_to_utf8(*buf
, utf16
);
910 } else if ((type
< 0 || type
== KindOfNull
) &&
911 (c
== S_DIG
|| c
== S_ZER
)) {
913 buf
->append((char)b
);
914 } else if (type
== KindOfInt64
&& s
== 24) {
916 buf
->append((char)b
);
917 } else if ((type
< 0 || type
== KindOfNull
|| type
== KindOfInt64
) &&
920 buf
->append((char)b
);
921 } else if (type
!= KindOfString
&& c
== S_QUO
) {
923 /*<fb>*/qchr
= b
;/*</fb>*/
924 } else if ((type
< 0 || type
== KindOfNull
|| type
== KindOfInt64
||
925 type
== KindOfDouble
) &&
926 ((the_state
== 12 && s
== 9) ||
927 (the_state
== 16 && s
== 9))) {
928 type
= KindOfBoolean
;
929 } else if (type
< 0 && the_state
== 19 && s
== 9) {
931 } else if (type
!= KindOfString
&& c
> S_WSP
) {
932 utf16_to_utf8(*buf
, b
);
939 if (the_state
== 9 && pop(the_json
, MODE_DONE
)) {
940 s_json_parser
->error_code
= JSON_ERROR_NONE
;
944 s_json_parser
->error_code
= JSON_ERROR_SYNTAX
;