Remove Variant's append and appendRef member functions
[hiphop-php.git] / hphp / runtime / ext / json / JSON_parser.cpp
blob3cf6aed430a0f466395cfd41b3c603c1d0a9cc40
1 /* JSON_parser.c */
3 /* 2005-12-30 */
5 /*
6 Copyright (c) 2005 JSON.org
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice shall be included in all
16 copies or substantial portions of the Software.
18 The Software shall be used for Good, not Evil.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 SOFTWARE.
30 #include "hphp/runtime/ext/json/JSON_parser.h"
31 #include <vector>
32 #include "hphp/runtime/base/complex-types.h"
33 #include "hphp/runtime/base/type-conversions.h"
34 #include "hphp/runtime/base/builtin-functions.h"
35 #include "hphp/runtime/base/utf8-decode.h"
36 #include "hphp/system/systemlib.h"
37 #include "hphp/runtime/base/thread-init-fini.h"
38 #include "hphp/runtime/ext/json/ext_json.h"
39 #include "hphp/runtime/ext/ext_collections.h"
41 #define MAX_LENGTH_OF_LONG 20
42 static const char long_min_digits[] = "9223372036854775808";
44 namespace HPHP {
46 #ifdef true
47 # undef true
48 #endif
50 #ifdef false
51 # undef false
52 #endif
54 #define true 1
55 #define false 0
58 Characters are mapped into these 32 symbol classes. This allows for
59 significant reductions in the size of the state transition table.
62 /* error */
63 #define S_ERR -1
65 /* space */
66 #define S_SPA 0
68 /* other whitespace */
69 #define S_WSP 1
71 /* { */
72 #define S_LBE 2
74 /* } */
75 #define S_RBE 3
77 /* [ */
78 #define S_LBT 4
80 /* ] */
81 #define S_RBT 5
83 /* : */
84 #define S_COL 6
86 /* , */
87 #define S_COM 7
89 /* " */
90 #define S_QUO 8
92 /* \ */
93 #define S_BAC 9
95 /* / */
96 #define S_SLA 10
98 /* + */
99 #define S_PLU 11
101 /* - */
102 #define S_MIN 12
104 /* . */
105 #define S_DOT 13
107 /* 0 */
108 #define S_ZER 14
110 /* 123456789 */
111 #define S_DIG 15
113 /* a */
114 #define S__A_ 16
116 /* b */
117 #define S__B_ 17
119 /* c */
120 #define S__C_ 18
122 /* d */
123 #define S__D_ 19
125 /* e */
126 #define S__E_ 20
128 /* f */
129 #define S__F_ 21
131 /* l */
132 #define S__L_ 22
134 /* n */
135 #define S__N_ 23
137 /* r */
138 #define S__R_ 24
140 /* s */
141 #define S__S_ 25
143 /* t */
144 #define S__T_ 26
146 /* u */
147 #define S__U_ 27
149 /* ABCDF */
150 #define S_A_F 28
152 /* E */
153 #define S_E 29
155 /* everything else */
156 #define S_ETC 30
160 This table maps the 128 ASCII characters into the 32 character classes.
161 The remaining Unicode characters should be mapped to S_ETC.
163 static const int ascii_class[128] = {
164 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
165 S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR,
166 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
167 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
169 S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
170 S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA,
171 S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
172 S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
174 S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC,
175 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
176 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
177 S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC,
179 S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC,
180 S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC,
181 S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC,
182 S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC
185 /*<fb>*/
186 static const int loose_ascii_class[128] = {
187 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
188 S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR,
189 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
190 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
192 S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_QUO,
193 S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA,
194 S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
195 S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
197 S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC,
198 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
199 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
200 S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC,
202 S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC,
203 S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC,
204 S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC,
205 S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC
207 /*</fb>*/
212 The state transition table takes the current state and the current symbol,
213 and returns either a new state or an action. A new state is a number between
214 0 and 29. An action is a negative number between -1 and -9. A JSON text is
215 accepted if the end of the text is in state 9 and mode is MODE_DONE.
217 static const int state_transition_table[30][31] = {
218 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
219 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
220 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
221 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
222 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
223 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
224 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
225 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
226 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
227 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
228 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
229 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
230 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
231 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
232 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
233 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
234 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
235 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
236 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
237 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
238 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
239 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
240 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
241 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
242 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
243 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
244 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
245 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
246 /*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
247 /*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
250 /*<fb>*/
252 Alternate "loose" transition table to support unquoted keys.
254 static const int loose_state_transition_table[31][31] = {
255 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
256 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
257 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
258 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
259 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
260 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
261 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
262 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
263 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
264 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
265 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
266 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
267 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
268 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
269 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
270 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
271 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
272 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
273 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
274 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
275 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
276 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
277 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
278 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
279 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
280 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
281 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
282 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
283 /*28*/ {28,28,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
284 /*29*/ {29,29,-1,-7,-1,-1,-1,-7, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
285 /*30*/ {30,-1,30,30,30,30,-10,30,-4,4,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}
287 /*</fb>*/
290 #define JSON_PARSER_DEFAULT_DEPTH 512
293 * A stack maintains the states of nested structures.
295 struct json_parser {
296 std::vector<int> the_stack;
297 std::vector<Variant> the_zstack;
298 std::vector<String> the_kstack;
299 int the_top;
300 int the_mark; // the watermark
301 int depth;
302 json_error_codes error_code;
303 json_parser() : the_stack(JSON_PARSER_DEFAULT_DEPTH),
304 the_zstack(JSON_PARSER_DEFAULT_DEPTH),
305 the_kstack(JSON_PARSER_DEFAULT_DEPTH) {};
309 IMPLEMENT_THREAD_LOCAL(json_parser, s_json_parser);
311 // In Zend, the json_parser struct is publicly
312 // accessible. Thus the fields could be accessed
313 // directly. Just using setter/accessor functions
314 // to get around that.
315 json_error_codes json_get_last_error_code() {
316 return s_json_parser->error_code;
318 void json_set_last_error_code(json_error_codes ec) {
319 s_json_parser->error_code = ec;
322 const char *json_get_last_error_msg() {
323 switch (s_json_parser->error_code) {
324 case JSON_ERROR_NONE:
325 return "No error";
326 case JSON_ERROR_DEPTH:
327 return "Maximum stack depth exceeded";
328 case JSON_ERROR_STATE_MISMATCH:
329 return "State mismatch (invalid or malformed JSON)";
330 case JSON_ERROR_CTRL_CHAR:
331 return "Control character error, possibly incorrectly encoded";
332 case JSON_ERROR_SYNTAX:
333 return "Syntax error";
334 case JSON_ERROR_UTF8:
335 return "Malformed UTF-8 characters, possibly incorrectly encoded";
336 default:
337 return "Unknown error";
341 // For each request, make sure we start with the default error code.
342 // Inline the function to do that reset.
343 static InitFiniNode init(
344 []{ s_json_parser->error_code = JSON_ERROR_NONE; },
345 InitFiniNode::When::ThreadInit
348 class JsonParserCleaner {
349 public:
350 explicit JsonParserCleaner(json_parser *json) : m_json(json) {}
351 ~JsonParserCleaner() {
352 for (int i = 0; i <= m_json->the_mark; i++) {
353 m_json->the_zstack[i].unset();
354 m_json->the_kstack[i].reset();
357 private:
358 json_parser *m_json;
362 * These modes can be pushed on the PDA stack.
364 #define MODE_DONE 1
365 #define MODE_KEY 2
366 #define MODE_OBJECT 3
367 #define MODE_ARRAY 4
370 * Push a mode onto the stack. Return false if there is overflow.
372 static int push(json_parser *json, int mode) {
373 if (json->the_top + 1 >= json->depth) {
374 return false;
376 json->the_top += 1;
377 json->the_stack[json->the_top] = mode;
378 if (json->the_top > json->the_mark) {
379 json->the_mark = json->the_top;
381 return true;
386 * Pop the stack, assuring that the current mode matches the expectation.
387 * Return false if there is underflow or if the modes mismatch.
389 static int pop(json_parser *json, int mode) {
390 if (json->the_top < 0 || json->the_stack[json->the_top] != mode) {
391 return false;
393 json->the_stack[json->the_top] = 0;
394 json->the_top -= 1;
395 return true;
398 static int dehexchar(char c) {
399 if (c >= '0' && c <= '9') return c - '0';
400 if (c >= 'A' && c <= 'F') return c - ('A' - 10);
401 if (c >= 'a' && c <= 'f') return c - ('a' - 10);
402 return -1;
405 static void json_create_zval(Variant &z, StringBuffer &buf, int type,
406 int64_t options) {
407 switch (type) {
408 case KindOfInt64:
410 bool bigint = false;
411 const char *p = buf.data();
412 assert(p);
413 if (p == NULL) {
414 z = int64_t(0);
415 return;
418 bool neg = *buf.data() == '-';
420 int len = buf.size();
421 if (neg) len--;
422 if (len >= MAX_LENGTH_OF_LONG - 1) {
423 if (len == MAX_LENGTH_OF_LONG - 1) {
424 int cmp = strcmp(p + (neg ? 1 : 0), long_min_digits);
425 if (!(cmp < 0 || (cmp == 0 && neg))) {
426 bigint = true;
428 } else {
429 bigint = true;
433 if (bigint) {
434 if (options & k_JSON_BIGINT_AS_STRING) {
435 z = buf.detach();
436 } else {
437 z = strtod(p, NULL);
439 } else {
440 z = int64_t(strtoll(buf.data(), nullptr, 10));
443 break;
444 case KindOfDouble:
445 z = buf.data() ? strtod(buf.data(), NULL) : 0.0;
446 break;
447 case KindOfString:
448 z = buf.detach();
449 break;
450 case KindOfBoolean:
451 z = (buf.data() && (*buf.data() == 't'));
452 break;
453 default:
454 z = uninit_null();
455 break;
459 void utf16_to_utf8(StringBuffer &buf, unsigned short utf16) {
460 if (utf16 < 0x80) {
461 buf.append((char)utf16);
462 } else if (utf16 < 0x800) {
463 buf.append((char)(0xc0 | (utf16 >> 6)));
464 buf.append((char)(0x80 | (utf16 & 0x3f)));
465 } else if ((utf16 & 0xfc00) == 0xdc00
466 && buf.size() >= 3
467 && ((unsigned char)buf.data()[buf.size() - 3]) == 0xed
468 && ((unsigned char)buf.data()[buf.size() - 2] & 0xf0) == 0xa0
469 && ((unsigned char)buf.data()[buf.size() - 1] & 0xc0) == 0x80) {
470 /* found surrogate pair */
471 unsigned long utf32;
473 utf32 = (((buf.data()[buf.size() - 2] & 0xf) << 16)
474 | ((buf.data()[buf.size() - 1] & 0x3f) << 10)
475 | (utf16 & 0x3ff)) + 0x10000;
476 buf.resize(buf.size() - 3);
478 buf.append((char)(0xf0 | (utf32 >> 18)));
479 buf.append((char)(0x80 | ((utf32 >> 12) & 0x3f)));
480 buf.append((char)(0x80 | ((utf32 >> 6) & 0x3f)));
481 buf.append((char)(0x80 | (utf32 & 0x3f)));
482 } else {
483 buf.append((char)(0xe0 | (utf16 >> 12)));
484 buf.append((char)(0x80 | ((utf16 >> 6) & 0x3f)));
485 buf.append((char)(0x80 | (utf16 & 0x3f)));
489 StaticString s__empty_("_empty_");
491 static void object_set(Variant &var,
492 const String& key,
493 const Variant& value,
494 int assoc,
495 bool collections) {
496 if (!assoc) {
497 // We know it is stdClass, and everything is public (and dynamic).
498 if (key.empty()) {
499 var.getObjectData()->o_set(s__empty_, value);
500 } else {
501 var.getObjectData()->o_set(key, value);
503 } else {
504 if (collections) {
505 auto keyTV = make_tv<KindOfString>(key.get());
506 collectionSet(var.getObjectData(), &keyTV, cvarToCell(&value));
507 } else {
508 forceToArray(var).set(key, value);
513 static void attach_zval(json_parser *json,
514 const String& key,
515 int assoc,
516 bool collections) {
517 if (json->the_top < 1) {
518 return;
521 Variant &root = json->the_zstack[json->the_top - 1];
522 Variant &child = json->the_zstack[json->the_top];
523 int up_mode = json->the_stack[json->the_top - 1];
525 if (up_mode == MODE_ARRAY) {
526 if (collections) {
527 collectionAppend(root.getObjectData(), child.asCell());
528 } else {
529 root.toArrRef().append(child);
531 } else if (up_mode == MODE_OBJECT) {
532 object_set(root, key, child, assoc, collections);
536 #define SWAP_BUFFERS(from, to) do { \
537 StringBuffer *tmp = from; \
538 from = to; \
539 to = tmp; \
540 } while(0);
541 #define JSON_RESET_TYPE() do { type = -1; } while(0);
544 * The JSON_parser takes a UTF-8 encoded string and determines if it is a
545 * syntactically correct JSON text. Along the way, it creates a PHP variable.
547 * It is implemented as a Pushdown Automaton; that means it is a finite state
548 * machine with a stack.
550 bool JSON_parser(Variant &z, const char *p, int length, bool const assoc,
551 int depth, int64_t options) {
552 int b; /* the next character */
553 int c; /* the next character class */
554 int s; /* the next state */
555 json_parser *the_json = s_json_parser.get(); /* the parser state */
556 JsonParserCleaner cleaner(the_json);
557 int the_state = 0;
559 /*<fb>*/
560 bool const loose = options & k_JSON_FB_LOOSE;
561 bool const stable_maps = options & k_JSON_FB_STABLE_MAPS;
562 bool const collections = stable_maps || (options & k_JSON_FB_COLLECTIONS);
563 int qchr = 0;
564 int const *byte_class;
565 if (loose) {
566 byte_class = loose_ascii_class;
567 } else {
568 byte_class = ascii_class;
570 /*</fb>*/
572 StringBuffer sb_buf(127), sb_key(127);
573 StringBuffer *buf = &sb_buf;
574 StringBuffer *key = &sb_key;
576 int type = -1;
577 unsigned short utf16 = 0;
579 the_json->depth = depth;
580 // Since the stack is maintainined on a per request basis, for performance
581 // reasons, it only makes sense to expand if necessary and cycles are wasted
582 // contracting. Calls with a depth other than default should be rare.
583 if (depth > the_json->the_stack.size()) {
584 the_json->the_stack.resize(depth);
585 the_json->the_zstack.resize(depth);
586 the_json->the_kstack.resize(depth);
589 the_json->the_mark = the_json->the_top = -1;
590 push(the_json, MODE_DONE);
592 UTF8To16Decoder decoder(p, length, loose);
593 for (;;) {
594 b = decoder.decode();
595 if (b == UTF8_END) break; // UTF-8 decoding finishes successfully.
596 if (b == UTF8_ERROR) {
597 s_json_parser->error_code = JSON_ERROR_UTF8;
598 return false;
600 assert(b >= 0);
602 if ((b & 127) == b) {
603 /*<fb>*/
604 c = byte_class[b];
605 /*</fb>*/
606 if (c <= S_ERR) {
607 s_json_parser->error_code = JSON_ERROR_CTRL_CHAR;
608 return false;
610 } else {
611 c = S_ETC;
614 Get the next state from the transition table.
617 /*<fb>*/
618 if (loose) {
619 s = loose_state_transition_table[the_state][c];
620 } else {
621 s = state_transition_table[the_state][c];
624 if (s == -4) {
625 if (b != qchr) {
626 s = 3;
627 } else {
628 qchr = 0;
631 /*</fb>*/
633 if (s < 0) {
635 Perform one of the predefined actions.
637 switch (s) {
639 empty }
641 case -9:
642 attach_zval(the_json, the_json->the_kstack[the_json->the_top], assoc,
643 collections);
645 if (!pop(the_json, MODE_KEY)) {
646 return false;
648 the_state = 9;
649 break;
653 case -8:
654 if (!push(the_json, MODE_KEY)) {
655 s_json_parser->error_code = JSON_ERROR_DEPTH;
656 return false;
659 the_state = 1;
660 if (the_json->the_top > 0) {
661 Variant &top = the_json->the_zstack[the_json->the_top];
662 if (the_json->the_top == 1) {
663 top.assignRef(z);
664 } else {
665 top.unset();
667 /*<fb>*/
668 if (collections) {
669 // stable_maps is meaningless
670 top = NEWOBJ(c_Map)();
671 } else {
672 /*</fb>*/
673 if (!assoc) {
674 top = SystemLib::AllocStdClassObject();
675 } else {
676 top = Array::Create();
678 /*<fb>*/
680 /*</fb>*/
681 the_json->the_kstack[the_json->the_top] = key->detach();
682 JSON_RESET_TYPE();
684 break;
688 case -7:
689 /*** BEGIN Facebook: json_utf8_loose ***/
691 If this is a trailing comma in an object definition,
692 we're in MODE_KEY. In that case, throw that off the
693 stack and restore MODE_OBJECT so that we pretend the
694 trailing comma just didn't happen.
696 if (loose) {
697 if (pop(the_json, MODE_KEY)) {
698 push(the_json, MODE_OBJECT);
701 /*** END Facebook: json_utf8_loose ***/
703 if (type != -1 &&
704 the_json->the_stack[the_json->the_top] == MODE_OBJECT) {
705 Variant mval;
706 json_create_zval(mval, *buf, type, options);
707 Variant &top = the_json->the_zstack[the_json->the_top];
708 object_set(top, key->detach(), mval, assoc, collections);
709 buf->clear();
710 JSON_RESET_TYPE();
713 attach_zval(the_json, the_json->the_kstack[the_json->the_top],
714 assoc, collections);
716 if (!pop(the_json, MODE_OBJECT)) {
717 s_json_parser->error_code = JSON_ERROR_STATE_MISMATCH;
718 return false;
720 the_state = 9;
721 break;
725 case -6:
726 if (!push(the_json, MODE_ARRAY)) {
727 s_json_parser->error_code = JSON_ERROR_DEPTH;
728 return false;
730 the_state = 2;
732 if (the_json->the_top > 0) {
733 Variant &top = the_json->the_zstack[the_json->the_top];
734 if (the_json->the_top == 1) {
735 top.assignRef(z);
736 } else {
737 top.unset();
739 /*<fb>*/
740 if (collections) {
741 top = NEWOBJ(c_Vector)();
742 } else {
743 top = Array::Create();
745 /*</fb>*/
746 the_json->the_kstack[the_json->the_top] = key->detach();
747 JSON_RESET_TYPE();
749 break;
753 case -5:
755 if (type != -1 &&
756 the_json->the_stack[the_json->the_top] == MODE_ARRAY) {
757 Variant mval;
758 json_create_zval(mval, *buf, type, options);
759 auto& top = the_json->the_zstack[the_json->the_top];
760 if (collections) {
761 collectionAppend(top.getObjectData(), mval.asCell());
762 } else {
763 top.toArrRef().append(mval);
765 buf->clear();
766 JSON_RESET_TYPE();
769 attach_zval(the_json, the_json->the_kstack[the_json->the_top],
770 assoc, collections);
772 if (!pop(the_json, MODE_ARRAY)) {
773 s_json_parser->error_code = JSON_ERROR_STATE_MISMATCH;
774 return false;
776 the_state = 9;
778 break;
782 case -4:
783 switch (the_json->the_stack[the_json->the_top]) {
784 case MODE_KEY:
785 the_state = 27;
786 SWAP_BUFFERS(buf, key);
787 JSON_RESET_TYPE();
788 break;
789 case MODE_ARRAY:
790 case MODE_OBJECT:
791 the_state = 9;
792 break;
793 case MODE_DONE:
794 if (type == KindOfString) {
795 z = buf->detach();
796 the_state = 9;
797 break;
799 /* fall through if not KindOfString */
800 default:
801 s_json_parser->error_code = JSON_ERROR_SYNTAX;
802 return false;
804 break;
808 case -3:
810 Variant mval;
811 if (type != -1 &&
812 (the_json->the_stack[the_json->the_top] == MODE_OBJECT ||
813 the_json->the_stack[the_json->the_top] == MODE_ARRAY)) {
814 json_create_zval(mval, *buf, type, options);
817 switch (the_json->the_stack[the_json->the_top]) {
818 case MODE_OBJECT:
819 if (pop(the_json, MODE_OBJECT) &&
820 push(the_json, MODE_KEY)) {
821 if (type != -1) {
822 Variant &top = the_json->the_zstack[the_json->the_top];
823 object_set(top, key->detach(), mval, assoc, collections);
825 the_state = 29;
827 break;
828 case MODE_ARRAY:
829 if (type != -1) {
830 auto& top = the_json->the_zstack[the_json->the_top];
831 if (collections) {
832 collectionAppend(top.getObjectData(), mval.asCell());
833 } else {
834 top.toArrRef().append(mval);
837 the_state = 28;
838 break;
839 default:
840 s_json_parser->error_code = JSON_ERROR_SYNTAX;
841 return false;
843 buf->clear();
844 JSON_RESET_TYPE();
846 break;
848 /*<fb>*/
850 : (after unquoted string)
852 case -10:
853 if (the_json->the_stack[the_json->the_top] == MODE_KEY) {
854 the_state = 27;
855 SWAP_BUFFERS(buf, key);
856 JSON_RESET_TYPE();
857 s = -2;
858 } else {
859 s = 3;
860 break;
862 /*</fb>*/
867 case -2:
868 if (pop(the_json, MODE_KEY) && push(the_json, MODE_OBJECT)) {
869 the_state = 28;
870 break;
873 syntax error
875 case -1:
876 s_json_parser->error_code = JSON_ERROR_SYNTAX;
877 return false;
879 } else {
881 Change the state and iterate.
883 if (type == KindOfString) {
884 if (/*<fb>*/(/*</fb>*/s == 3/*<fb>*/ || s == 30)/*</fb>*/ &&
885 the_state != 8) {
886 if (the_state != 4) {
887 utf16_to_utf8(*buf, b);
888 } else {
889 switch (b) {
890 case 'b': buf->append('\b'); break;
891 case 't': buf->append('\t'); break;
892 case 'n': buf->append('\n'); break;
893 case 'f': buf->append('\f'); break;
894 case 'r': buf->append('\r'); break;
895 default:
896 utf16_to_utf8(*buf, b);
897 break;
900 } else if (s == 6) {
901 utf16 = dehexchar(b) << 12;
902 } else if (s == 7) {
903 utf16 += dehexchar(b) << 8;
904 } else if (s == 8) {
905 utf16 += dehexchar(b) << 4;
906 } else if (s == 3 && the_state == 8) {
907 utf16 += dehexchar(b);
908 utf16_to_utf8(*buf, utf16);
910 } else if ((type < 0 || type == KindOfNull) &&
911 (c == S_DIG || c == S_ZER)) {
912 type = KindOfInt64;
913 buf->append((char)b);
914 } else if (type == KindOfInt64 && s == 24) {
915 type = KindOfDouble;
916 buf->append((char)b);
917 } else if ((type < 0 || type == KindOfNull || type == KindOfInt64) &&
918 c == S_DOT) {
919 type = KindOfDouble;
920 buf->append((char)b);
921 } else if (type != KindOfString && c == S_QUO) {
922 type = KindOfString;
923 /*<fb>*/qchr = b;/*</fb>*/
924 } else if ((type < 0 || type == KindOfNull || type == KindOfInt64 ||
925 type == KindOfDouble) &&
926 ((the_state == 12 && s == 9) ||
927 (the_state == 16 && s == 9))) {
928 type = KindOfBoolean;
929 } else if (type < 0 && the_state == 19 && s == 9) {
930 type = KindOfNull;
931 } else if (type != KindOfString && c > S_WSP) {
932 utf16_to_utf8(*buf, b);
935 the_state = s;
939 if (the_state == 9 && pop(the_json, MODE_DONE)) {
940 s_json_parser->error_code = JSON_ERROR_NONE;
941 return true;
944 s_json_parser->error_code = JSON_ERROR_SYNTAX;
945 return false;