make #includes consistent
[hiphop-php.git] / hphp / runtime / ext / JSON_parser.cpp
blob9970d4da36ad025ba4c1dd329122ab58249adb18
1 /* JSON_parser.c */
3 /* 2005-12-30 */
5 /*
6 Copyright (c) 2005 JSON.org
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice shall be included in all
16 copies or substantial portions of the Software.
18 The Software shall be used for Good, not Evil.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 SOFTWARE.
30 #include "hphp/runtime/ext/JSON_parser.h"
31 #include "hphp/runtime/base/util/string_buffer.h"
32 #include "hphp/runtime/base/complex_types.h"
33 #include "hphp/runtime/base/type_conversions.h"
34 #include "hphp/runtime/base/builtin_functions.h"
35 #include "hphp/runtime/base/zend/utf8_decode.h"
37 #include "hphp/system/lib/systemlib.h"
39 #define MAX_LENGTH_OF_LONG 20
40 static const char long_min_digits[] = "9223372036854775808";
42 using namespace HPHP;
44 #ifdef true
45 # undef true
46 #endif
48 #ifdef false
49 # undef false
50 #endif
52 #define true 1
53 #define false 0
56 Characters are mapped into these 32 symbol classes. This allows for
57 significant reductions in the size of the state transition table.
60 /* error */
61 #define S_ERR -1
63 /* space */
64 #define S_SPA 0
66 /* other whitespace */
67 #define S_WSP 1
69 /* { */
70 #define S_LBE 2
72 /* } */
73 #define S_RBE 3
75 /* [ */
76 #define S_LBT 4
78 /* ] */
79 #define S_RBT 5
81 /* : */
82 #define S_COL 6
84 /* , */
85 #define S_COM 7
87 /* " */
88 #define S_QUO 8
90 /* \ */
91 #define S_BAC 9
93 /* / */
94 #define S_SLA 10
96 /* + */
97 #define S_PLU 11
99 /* - */
100 #define S_MIN 12
102 /* . */
103 #define S_DOT 13
105 /* 0 */
106 #define S_ZER 14
108 /* 123456789 */
109 #define S_DIG 15
111 /* a */
112 #define S__A_ 16
114 /* b */
115 #define S__B_ 17
117 /* c */
118 #define S__C_ 18
120 /* d */
121 #define S__D_ 19
123 /* e */
124 #define S__E_ 20
126 /* f */
127 #define S__F_ 21
129 /* l */
130 #define S__L_ 22
132 /* n */
133 #define S__N_ 23
135 /* r */
136 #define S__R_ 24
138 /* s */
139 #define S__S_ 25
141 /* t */
142 #define S__T_ 26
144 /* u */
145 #define S__U_ 27
147 /* ABCDF */
148 #define S_A_F 28
150 /* E */
151 #define S_E 29
153 /* everything else */
154 #define S_ETC 30
158 This table maps the 128 ASCII characters into the 32 character classes.
159 The remaining Unicode characters should be mapped to S_ETC.
161 static const int ascii_class[128] = {
162 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
163 S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR,
164 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
165 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
167 S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
168 S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA,
169 S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
170 S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
172 S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC,
173 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
174 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
175 S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC,
177 S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC,
178 S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC,
179 S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC,
180 S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC
183 /*<fb>*/
184 static const int loose_ascii_class[128] = {
185 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
186 S_ERR, S_WSP, S_WSP, S_ERR, S_ERR, S_WSP, S_ERR, S_ERR,
187 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
188 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
190 S_SPA, S_ETC, S_QUO, S_ETC, S_ETC, S_ETC, S_ETC, S_QUO,
191 S_ETC, S_ETC, S_ETC, S_PLU, S_COM, S_MIN, S_DOT, S_SLA,
192 S_ZER, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG, S_DIG,
193 S_DIG, S_DIG, S_COL, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
195 S_ETC, S_A_F, S_A_F, S_A_F, S_A_F, S_E , S_A_F, S_ETC,
196 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
197 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
198 S_ETC, S_ETC, S_ETC, S_LBT, S_BAC, S_RBT, S_ETC, S_ETC,
200 S_ETC, S__A_, S__B_, S__C_, S__D_, S__E_, S__F_, S_ETC,
201 S_ETC, S_ETC, S_ETC, S_ETC, S__L_, S_ETC, S__N_, S_ETC,
202 S_ETC, S_ETC, S__R_, S__S_, S__T_, S__U_, S_ETC, S_ETC,
203 S_ETC, S_ETC, S_ETC, S_LBE, S_ETC, S_RBE, S_ETC, S_ETC
205 /*</fb>*/
210 The state transition table takes the current state and the current symbol,
211 and returns either a new state or an action. A new state is a number between
212 0 and 29. An action is a negative number between -1 and -9. A JSON text is
213 accepted if the end of the text is in state 9 and mode is MODE_DONE.
215 static const int state_transition_table[30][31] = {
216 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
217 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
218 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
219 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
220 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
221 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
222 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
223 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
224 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
225 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
226 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
227 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
228 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
229 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
230 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
231 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
232 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
233 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
234 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
235 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
236 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
237 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
238 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
239 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
240 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
241 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
242 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
243 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
244 /*28*/ {28,28,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
245 /*29*/ {29,29,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
248 /*<fb>*/
250 Alternate "loose" transition table to support unquoted keys.
252 static const int loose_state_transition_table[31][31] = {
253 /* 0*/ { 0, 0,-8,-1,-6,-1,-1,-1, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
254 /* 1*/ { 1, 1,-1,-9,-1,-1,-1,-1, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
255 /* 2*/ { 2, 2,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
256 /* 3*/ { 3,-1, 3, 3, 3, 3, 3, 3,-4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
257 /* 4*/ {-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3,-1,-1,-1, 3,-1, 3, 3,-1, 3, 5,-1,-1,-1},
258 /* 5*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 6, 6, 6, 6, 6, 6, 6, 6,-1,-1,-1,-1,-1,-1, 6, 6,-1},
259 /* 6*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 7, 7, 7, 7, 7, 7, 7, 7,-1,-1,-1,-1,-1,-1, 7, 7,-1},
260 /* 7*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 8, 8, 8, 8, 8, 8, 8, 8,-1,-1,-1,-1,-1,-1, 8, 8,-1},
261 /* 8*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 3, 3, 3, 3, 3, 3, 3, 3,-1,-1,-1,-1,-1,-1, 3, 3,-1},
262 /* 9*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
263 /*10*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,11,-1,-1,-1,-1,-1,-1},
264 /*11*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,12,-1,-1,-1},
265 /*12*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
266 /*13*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,14,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
267 /*14*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,15,-1,-1,-1,-1,-1,-1,-1,-1},
268 /*15*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,16,-1,-1,-1,-1,-1},
269 /*16*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
270 /*17*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,18,-1,-1,-1},
271 /*18*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,19,-1,-1,-1,-1,-1,-1,-1,-1},
272 /*19*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 9,-1,-1,-1,-1,-1,-1,-1,-1},
273 /*20*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,21,22,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
274 /*21*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
275 /*22*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,23,22,22,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
276 /*23*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,23,23,-1,-1,-1,-1,24,-1,-1,-1,-1,-1,-1,-1,-1,24,-1},
277 /*24*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,25,25,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
278 /*25*/ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
279 /*26*/ { 9, 9,-1,-7,-1,-5,-1,-3,-1,-1,-1,-1,-1,-1,26,26,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
280 /*27*/ {27,27,-1,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1},
281 /*28*/ {28,28,-8,-1,-6,-5,-1,-1, 3,-1,-1,-1,20,-1,21,22,-1,-1,-1,-1,-1,13,-1,17,-1,-1,10,-1,-1,-1,-1},
282 /*29*/ {29,29,-1,-7,-1,-1,-1,-7, 3,-1,-1,-1,-1,-1,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30},
283 /*30*/ {30,-1,30,30,30,30,-10,30,-4,4,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30}
285 /*</fb>*/
288 #define JSON_PARSER_MAX_DEPTH 512
291 * A stack maintains the states of nested structures.
293 struct json_parser {
294 int the_stack[JSON_PARSER_MAX_DEPTH];
295 Variant the_zstack[JSON_PARSER_MAX_DEPTH];
296 String the_kstack[JSON_PARSER_MAX_DEPTH];
297 int the_top;
298 int the_mark; // the watermark
301 IMPLEMENT_THREAD_LOCAL(json_parser, s_json_parser);
303 class JsonParserCleaner {
304 public:
305 explicit JsonParserCleaner(json_parser *json) : m_json(json) {}
306 ~JsonParserCleaner() {
307 for (int i = 0; i <= m_json->the_mark; i++) {
308 m_json->the_zstack[i].unset();
309 m_json->the_kstack[i].reset();
312 private:
313 json_parser *m_json;
317 * These modes can be pushed on the PDA stack.
319 #define MODE_DONE 1
320 #define MODE_KEY 2
321 #define MODE_OBJECT 3
322 #define MODE_ARRAY 4
325 * Push a mode onto the stack. Return false if there is overflow.
327 static int push(json_parser *json, int mode) {
328 json->the_top += 1;
329 if (json->the_top >= JSON_PARSER_MAX_DEPTH) {
330 return false;
332 json->the_stack[json->the_top] = mode;
333 if (json->the_top > json->the_mark) {
334 json->the_mark = json->the_top;
336 return true;
341 * Pop the stack, assuring that the current mode matches the expectation.
342 * Return false if there is underflow or if the modes mismatch.
344 static int pop(json_parser *json, int mode) {
345 if (json->the_top < 0 || json->the_stack[json->the_top] != mode) {
346 return false;
348 json->the_stack[json->the_top] = 0;
349 json->the_top -= 1;
350 return true;
353 static int dehexchar(char c) {
354 if (c >= '0' && c <= '9') return c - '0';
355 if (c >= 'A' && c <= 'F') return c - ('A' - 10);
356 if (c >= 'a' && c <= 'f') return c - ('a' - 10);
357 return -1;
360 static void json_create_zval(Variant &z, StringBuffer &buf, int type) {
361 switch (type) {
362 case KindOfInt64:
364 const char *p = buf.data();
365 assert(p);
366 if (p == NULL) {
367 z = int64_t(0);
368 return;
371 bool neg = (buf.charAt(0) == '-');
373 int len = buf.size();
374 if (neg) len--;
375 if (len >= MAX_LENGTH_OF_LONG - 1) {
376 if (len == MAX_LENGTH_OF_LONG - 1) {
377 int cmp = strcmp(p + (neg ? 1 : 0), long_min_digits);
378 if (!(cmp < 0 || (cmp == 0 && neg))) {
379 z = strtod(p, NULL);
380 return;
382 } else {
383 z = strtod(p, NULL);
384 return;
387 z = int64_t(strtoll(buf.data(), NULL, 10));
389 break;
390 case KindOfDouble:
391 z = buf.data() ? strtod(buf.data(), NULL) : 0.0;
392 break;
393 case KindOfString:
394 z = buf.detach();
395 break;
396 case KindOfBoolean:
397 z = (buf.data() && (*buf.data() == 't'));
398 break;
399 default:
400 z = uninit_null();
401 break;
405 static void utf16_to_utf8(StringBuffer &buf, unsigned short utf16) {
406 if (utf16 < 0x80) {
407 buf += (char)utf16;
408 } else if (utf16 < 0x800) {
409 buf += (char)(0xc0 | (utf16 >> 6));
410 buf += (char)(0x80 | (utf16 & 0x3f));
411 } else if ((utf16 & 0xfc00) == 0xdc00
412 && buf.size() >= 3
413 && ((unsigned char)buf.charAt(buf.size() - 3)) == 0xed
414 && ((unsigned char)buf.charAt(buf.size() - 2) & 0xf0) == 0xa0
415 && ((unsigned char)buf.charAt(buf.size() - 1) & 0xc0) == 0x80) {
416 /* found surrogate pair */
417 unsigned long utf32;
419 utf32 = (((buf.charAt(buf.size() - 2) & 0xf) << 16)
420 | ((buf.charAt(buf.size() - 1) & 0x3f) << 10)
421 | (utf16 & 0x3ff)) + 0x10000;
422 buf.resize(buf.size() - 3);
424 buf += (char)(0xf0 | (utf32 >> 18));
425 buf += (char)(0x80 | ((utf32 >> 12) & 0x3f));
426 buf += (char)(0x80 | ((utf32 >> 6) & 0x3f));
427 buf += (char)(0x80 | (utf32 & 0x3f));
428 } else {
429 buf += (char)(0xe0 | (utf16 >> 12));
430 buf += (char)(0x80 | ((utf16 >> 6) & 0x3f));
431 buf += (char)(0x80 | (utf16 & 0x3f));
435 static void object_set(Variant &var, CStrRef key, CVarRef value,
436 int assoc) {
437 if (!assoc) {
438 // We know it is stdClass, and everything is public (and dynamic).
439 if (key.empty()) {
440 var.getObjectData()->o_set("_empty_", value);
441 } else {
442 var.getObjectData()->o_set(key, value);
444 } else {
445 var.set(key, value);
449 static void attach_zval(json_parser *json, CStrRef key,
450 int assoc) {
451 Variant &root = json->the_zstack[json->the_top - 1];
452 Variant &child = json->the_zstack[json->the_top];
453 int up_mode = json->the_stack[json->the_top - 1];
455 if (up_mode == MODE_ARRAY) {
456 root.append(child);
457 } else if (up_mode == MODE_OBJECT) {
458 object_set(root, key, child, assoc);
462 #define SWAP_BUFFERS(from, to) do { \
463 StringBuffer *tmp = from; \
464 from = to; \
465 to = tmp; \
466 } while(0);
467 #define JSON_RESET_TYPE() do { type = -1; } while(0);
468 #define JSON(x) the_json->x
471 * The JSON_parser takes a UTF-8 encoded string and determines if it is a
472 * syntactically correct JSON text. Along the way, it creates a PHP variable.
474 * It is implemented as a Pushdown Automaton; that means it is a finite state
475 * machine with a stack.
477 bool JSON_parser(Variant &z, const char *p, int length, bool assoc/*<fb>*/,
478 bool loose/*</fb>*/) {
479 int b; /* the next character */
480 int c; /* the next character class */
481 int s; /* the next state */
482 json_parser *the_json = s_json_parser.get(); /* the parser state */
483 JsonParserCleaner cleaner(the_json);
484 int the_state = 0;
486 /*<fb>*/
487 int qchr = 0;
488 int const *byte_class;
489 if (loose) {
490 byte_class = loose_ascii_class;
491 } else {
492 byte_class = ascii_class;
494 /*</fb>*/
496 StringBuffer sb_buf(127), sb_key(127);
497 StringBuffer *buf = &sb_buf;
498 StringBuffer *key = &sb_key;
500 int type = -1;
501 unsigned short utf16 = 0;
503 JSON(the_mark) = JSON(the_top) = -1;
504 push(the_json, MODE_DONE);
506 UTF8To16Decoder decoder(p, length, loose);
507 for (;;) {
508 b = decoder.decode();
509 if (b == UTF8_END) break; // UTF-8 decoding finishes successfully.
510 if (b == UTF8_ERROR) {
511 return false;
513 assert(b >= 0);
515 if ((b & 127) == b) {
516 /*<fb>*/
517 c = byte_class[b];
518 /*</fb>*/
519 if (c <= S_ERR) {
520 return false;
522 } else {
523 c = S_ETC;
526 Get the next state from the transition table.
529 /*<fb>*/
530 if (loose) {
531 s = loose_state_transition_table[the_state][c];
532 } else {
533 s = state_transition_table[the_state][c];
536 if (s == -4) {
537 if (b != qchr) {
538 s = 3;
539 } else {
540 qchr = 0;
543 /*</fb>*/
545 if (s < 0) {
547 Perform one of the predefined actions.
549 switch (s) {
551 empty }
553 case -9:
554 if (!pop(the_json, MODE_KEY)) {
555 return false;
557 the_state = 9;
558 break;
562 case -8:
563 if (!push(the_json, MODE_KEY)) {
564 return false;
567 the_state = 1;
568 if (JSON(the_top) > 0) {
569 Variant &top = JSON(the_zstack)[JSON(the_top)];
570 if (JSON(the_top) == 1) {
571 top.assignRef(z);
572 } else {
573 top.unset();
575 if (!assoc) {
576 top = SystemLib::AllocStdClassObject();
577 } else {
578 top = Array::Create();
580 JSON(the_kstack)[JSON(the_top)] = key->detach();
581 JSON_RESET_TYPE();
583 break;
587 case -7:
588 /*** BEGIN Facebook: json_utf8_loose ***/
590 If this is a trailing comma in an object definition,
591 we're in MODE_KEY. In that case, throw that off the
592 stack and restore MODE_OBJECT so that we pretend the
593 trailing comma just didn't happen.
595 if (loose) {
596 if (pop(the_json, MODE_KEY)) {
597 push(the_json, MODE_OBJECT);
600 /*** END Facebook: json_utf8_loose ***/
602 if (type != -1 &&
603 JSON(the_stack)[JSON(the_top)] == MODE_OBJECT) {
604 Variant mval;
605 json_create_zval(mval, *buf, type);
606 Variant &top = JSON(the_zstack)[JSON(the_top)];
607 object_set(top, key->detach(), mval, assoc);
608 buf->reset();
609 JSON_RESET_TYPE();
612 attach_zval(the_json, JSON(the_kstack)[JSON(the_top)], assoc);
614 if (!pop(the_json, MODE_OBJECT)) {
615 return false;
617 the_state = 9;
618 break;
622 case -6:
623 if (!push(the_json, MODE_ARRAY)) {
624 return false;
626 the_state = 2;
628 if (JSON(the_top) > 0) {
629 if (JSON(the_top) == 1) {
630 JSON(the_zstack)[JSON(the_top)].assignRef(z);
631 } else {
632 JSON(the_zstack)[JSON(the_top)].unset();
634 JSON(the_zstack)[JSON(the_top)] = Array::Create();
635 JSON(the_kstack)[JSON(the_top)] = key->detach();
636 JSON_RESET_TYPE();
638 break;
642 case -5:
644 if (type != -1 &&
645 JSON(the_stack)[JSON(the_top)] == MODE_ARRAY) {
646 Variant mval;
647 json_create_zval(mval, *buf, type);
648 JSON(the_zstack)[JSON(the_top)].append(mval);
649 buf->reset();
650 JSON_RESET_TYPE();
653 attach_zval(the_json, JSON(the_kstack[JSON(the_top)]), assoc);
655 if (!pop(the_json, MODE_ARRAY)) {
656 return false;
658 the_state = 9;
660 break;
664 case -4:
665 switch (JSON(the_stack)[JSON(the_top)]) {
666 case MODE_KEY:
667 the_state = 27;
668 SWAP_BUFFERS(buf, key);
669 JSON_RESET_TYPE();
670 break;
671 case MODE_ARRAY:
672 case MODE_OBJECT:
673 the_state = 9;
674 break;
675 case MODE_DONE:
676 if (type == KindOfString) {
677 z = buf->detach();
678 the_state = 9;
679 break;
681 /* fall through if not KindOfString */
682 default:
683 return false;
685 break;
689 case -3:
691 Variant mval;
692 if (type != -1 &&
693 (JSON(the_stack)[JSON(the_top)] == MODE_OBJECT ||
694 JSON(the_stack)[JSON(the_top)] == MODE_ARRAY)) {
695 json_create_zval(mval, *buf, type);
698 switch (JSON(the_stack)[JSON(the_top)]) {
699 case MODE_OBJECT:
700 if (pop(the_json, MODE_OBJECT) &&
701 push(the_json, MODE_KEY)) {
702 if (type != -1) {
703 Variant &top = JSON(the_zstack)[JSON(the_top)];
704 object_set(top, key->detach(), mval, assoc);
706 the_state = 29;
708 break;
709 case MODE_ARRAY:
710 if (type != -1) {
711 JSON(the_zstack)[JSON(the_top)].append(mval);
713 the_state = 28;
714 break;
715 default:
716 return false;
718 buf->reset();
719 JSON_RESET_TYPE();
721 break;
723 /*<fb>*/
725 : (after unquoted string)
727 case -10:
728 if (JSON(the_stack)[JSON(the_top)] == MODE_KEY) {
729 the_state = 27;
730 SWAP_BUFFERS(buf, key);
731 JSON_RESET_TYPE();
732 s = -2;
733 } else {
734 s = 3;
735 break;
737 /*</fb>*/
742 case -2:
743 if (pop(the_json, MODE_KEY) && push(the_json, MODE_OBJECT)) {
744 the_state = 28;
745 break;
748 syntax error
750 case -1:
751 return false;
753 } else {
755 Change the state and iterate.
757 if (type == KindOfString) {
758 if (/*<fb>*/(/*</fb>*/s == 3/*<fb>*/ || s == 30)/*</fb>*/ &&
759 the_state != 8) {
760 if (the_state != 4) {
761 utf16_to_utf8(*buf, b);
762 } else {
763 switch (b) {
764 case 'b': buf->append('\b'); break;
765 case 't': buf->append('\t'); break;
766 case 'n': buf->append('\n'); break;
767 case 'f': buf->append('\f'); break;
768 case 'r': buf->append('\r'); break;
769 default:
770 utf16_to_utf8(*buf, b);
771 break;
774 } else if (s == 6) {
775 utf16 = dehexchar(b) << 12;
776 } else if (s == 7) {
777 utf16 += dehexchar(b) << 8;
778 } else if (s == 8) {
779 utf16 += dehexchar(b) << 4;
780 } else if (s == 3 && the_state == 8) {
781 utf16 += dehexchar(b);
782 utf16_to_utf8(*buf, utf16);
784 } else if ((type < 0 || type == KindOfNull) &&
785 (c == S_DIG || c == S_ZER)) {
786 type = KindOfInt64;
787 buf->append((char)b);
788 } else if (type == KindOfInt64 && s == 24) {
789 type = KindOfDouble;
790 buf->append((char)b);
791 } else if ((type < 0 || type == KindOfNull || type == KindOfInt64) &&
792 c == S_DOT) {
793 type = KindOfDouble;
794 buf->append((char)b);
795 } else if (type != KindOfString && c == S_QUO) {
796 type = KindOfString;
797 /*<fb>*/qchr = b;/*</fb>*/
798 } else if ((type < 0 || type == KindOfNull || type == KindOfInt64 ||
799 type == KindOfDouble) &&
800 ((the_state == 12 && s == 9) ||
801 (the_state == 16 && s == 9))) {
802 type = KindOfBoolean;
803 } else if (type < 0 && the_state == 19 && s == 9) {
804 type = KindOfNull;
805 } else if (type != KindOfString && c > S_WSP) {
806 utf16_to_utf8(*buf, b);
809 the_state = s;
813 return the_state == 9 && pop(the_json, MODE_DONE);