Add more boundary checking to sse3/4 parsing
[qemu/ar7.git] / json-parser.c
blob70b9b6f967963214266770068e119e6bcc6d4f15
1 /*
2 * JSON Parser
4 * Copyright IBM, Corp. 2009
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include <stdarg.h>
16 #include "qemu-common.h"
17 #include "qstring.h"
18 #include "qint.h"
19 #include "qdict.h"
20 #include "qlist.h"
21 #include "qfloat.h"
22 #include "qbool.h"
23 #include "json-parser.h"
24 #include "json-lexer.h"
26 typedef struct JSONParserContext
28 } JSONParserContext;
30 #define BUG_ON(cond) assert(!(cond))
32 /**
33 * TODO
35 * 0) make errors meaningful again
36 * 1) add geometry information to tokens
37 * 3) should we return a parsed size?
38 * 4) deal with premature EOI
41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
43 /**
44 * Token manipulators
46 * tokens are dictionaries that contain a type, a string value, and geometry information
47 * about a token identified by the lexer. These are routines that make working with
48 * these objects a bit easier.
50 static const char *token_get_value(QObject *obj)
52 return qdict_get_str(qobject_to_qdict(obj), "token");
55 static JSONTokenType token_get_type(QObject *obj)
57 return qdict_get_int(qobject_to_qdict(obj), "type");
60 static int token_is_operator(QObject *obj, char op)
62 const char *val;
64 if (token_get_type(obj) != JSON_OPERATOR) {
65 return 0;
68 val = token_get_value(obj);
70 return (val[0] == op) && (val[1] == 0);
73 static int token_is_keyword(QObject *obj, const char *value)
75 if (token_get_type(obj) != JSON_KEYWORD) {
76 return 0;
79 return strcmp(token_get_value(obj), value) == 0;
82 static int token_is_escape(QObject *obj, const char *value)
84 if (token_get_type(obj) != JSON_ESCAPE) {
85 return 0;
88 return (strcmp(token_get_value(obj), value) == 0);
91 /**
92 * Error handler
94 static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
96 va_list ap;
97 va_start(ap, msg);
98 fprintf(stderr, "parse error: ");
99 vfprintf(stderr, msg, ap);
100 fprintf(stderr, "\n");
101 va_end(ap);
105 * String helpers
107 * These helpers are used to unescape strings.
109 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111 if (wchar <= 0x007F) {
112 BUG_ON(buffer_length < 2);
114 buffer[0] = wchar & 0x7F;
115 buffer[1] = 0;
116 } else if (wchar <= 0x07FF) {
117 BUG_ON(buffer_length < 3);
119 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
120 buffer[1] = 0x80 | (wchar & 0x3F);
121 buffer[2] = 0;
122 } else {
123 BUG_ON(buffer_length < 4);
125 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
126 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
127 buffer[2] = 0x80 | (wchar & 0x3F);
128 buffer[3] = 0;
132 static int hex2decimal(char ch)
134 if (ch >= '0' && ch <= '9') {
135 return (ch - '0');
136 } else if (ch >= 'a' && ch <= 'f') {
137 return 10 + (ch - 'a');
138 } else if (ch >= 'A' && ch <= 'F') {
139 return 10 + (ch - 'A');
142 return -1;
146 * parse_string(): Parse a json string and return a QObject
148 * string
149 * ""
150 * " chars "
151 * chars
152 * char
153 * char chars
154 * char
155 * any-Unicode-character-
156 * except-"-or-\-or-
157 * control-character
158 * \"
159 * \\
160 * \/
161 * \b
162 * \f
163 * \n
164 * \r
165 * \t
166 * \u four-hex-digits
168 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170 const char *ptr = token_get_value(token);
171 QString *str;
172 int double_quote = 1;
174 if (*ptr == '"') {
175 double_quote = 1;
176 } else {
177 double_quote = 0;
179 ptr++;
181 str = qstring_new();
182 while (*ptr &&
183 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
184 if (*ptr == '\\') {
185 ptr++;
187 switch (*ptr) {
188 case '"':
189 qstring_append(str, "\"");
190 ptr++;
191 break;
192 case '\'':
193 qstring_append(str, "'");
194 ptr++;
195 break;
196 case '\\':
197 qstring_append(str, "\\");
198 ptr++;
199 break;
200 case '/':
201 qstring_append(str, "/");
202 ptr++;
203 break;
204 case 'b':
205 qstring_append(str, "\b");
206 ptr++;
207 break;
208 case 'f':
209 qstring_append(str, "\f");
210 ptr++;
211 break;
212 case 'n':
213 qstring_append(str, "\n");
214 ptr++;
215 break;
216 case 'r':
217 qstring_append(str, "\r");
218 ptr++;
219 break;
220 case 't':
221 qstring_append(str, "\t");
222 ptr++;
223 break;
224 case 'u': {
225 uint16_t unicode_char = 0;
226 char utf8_char[4];
227 int i = 0;
229 ptr++;
231 for (i = 0; i < 4; i++) {
232 if (qemu_isxdigit(*ptr)) {
233 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
234 } else {
235 parse_error(ctxt, token,
236 "invalid hex escape sequence in string");
237 goto out;
239 ptr++;
242 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
243 qstring_append(str, utf8_char);
244 } break;
245 default:
246 parse_error(ctxt, token, "invalid escape sequence in string");
247 goto out;
249 } else {
250 char dummy[2];
252 dummy[0] = *ptr++;
253 dummy[1] = 0;
255 qstring_append(str, dummy);
259 return str;
261 out:
262 QDECREF(str);
263 return NULL;
267 * Parsing rules
269 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
271 QObject *key, *token = NULL, *value, *peek;
272 QList *working = qlist_copy(*tokens);
274 peek = qlist_peek(working);
275 key = parse_value(ctxt, &working, ap);
276 if (!key || qobject_type(key) != QTYPE_QSTRING) {
277 parse_error(ctxt, peek, "key is not a string in object");
278 goto out;
281 token = qlist_pop(working);
282 if (!token_is_operator(token, ':')) {
283 parse_error(ctxt, token, "missing : in object pair");
284 goto out;
287 value = parse_value(ctxt, &working, ap);
288 if (value == NULL) {
289 parse_error(ctxt, token, "Missing value in dict");
290 goto out;
293 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
295 qobject_decref(token);
296 qobject_decref(key);
297 QDECREF(*tokens);
298 *tokens = working;
300 return 0;
302 out:
303 qobject_decref(token);
304 qobject_decref(key);
305 QDECREF(working);
307 return -1;
310 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
312 QDict *dict = NULL;
313 QObject *token, *peek;
314 QList *working = qlist_copy(*tokens);
316 token = qlist_pop(working);
317 if (!token_is_operator(token, '{')) {
318 goto out;
320 qobject_decref(token);
321 token = NULL;
323 dict = qdict_new();
325 peek = qlist_peek(working);
326 if (!token_is_operator(peek, '}')) {
327 if (parse_pair(ctxt, dict, &working, ap) == -1) {
328 goto out;
331 token = qlist_pop(working);
332 while (!token_is_operator(token, '}')) {
333 if (!token_is_operator(token, ',')) {
334 parse_error(ctxt, token, "expected separator in dict");
335 goto out;
337 qobject_decref(token);
338 token = NULL;
340 if (parse_pair(ctxt, dict, &working, ap) == -1) {
341 goto out;
344 token = qlist_pop(working);
346 qobject_decref(token);
347 token = NULL;
348 } else {
349 token = qlist_pop(working);
350 qobject_decref(token);
351 token = NULL;
354 QDECREF(*tokens);
355 *tokens = working;
357 return QOBJECT(dict);
359 out:
360 qobject_decref(token);
361 QDECREF(working);
362 QDECREF(dict);
363 return NULL;
366 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
368 QList *list = NULL;
369 QObject *token, *peek;
370 QList *working = qlist_copy(*tokens);
372 token = qlist_pop(working);
373 if (!token_is_operator(token, '[')) {
374 goto out;
376 qobject_decref(token);
377 token = NULL;
379 list = qlist_new();
381 peek = qlist_peek(working);
382 if (!token_is_operator(peek, ']')) {
383 QObject *obj;
385 obj = parse_value(ctxt, &working, ap);
386 if (obj == NULL) {
387 parse_error(ctxt, token, "expecting value");
388 goto out;
391 qlist_append_obj(list, obj);
393 token = qlist_pop(working);
394 while (!token_is_operator(token, ']')) {
395 if (!token_is_operator(token, ',')) {
396 parse_error(ctxt, token, "expected separator in list");
397 goto out;
400 qobject_decref(token);
401 token = NULL;
403 obj = parse_value(ctxt, &working, ap);
404 if (obj == NULL) {
405 parse_error(ctxt, token, "expecting value");
406 goto out;
409 qlist_append_obj(list, obj);
411 token = qlist_pop(working);
414 qobject_decref(token);
415 token = NULL;
416 } else {
417 token = qlist_pop(working);
418 qobject_decref(token);
419 token = NULL;
422 QDECREF(*tokens);
423 *tokens = working;
425 return QOBJECT(list);
427 out:
428 qobject_decref(token);
429 QDECREF(working);
430 QDECREF(list);
431 return NULL;
434 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
436 QObject *token, *ret;
437 QList *working = qlist_copy(*tokens);
439 token = qlist_pop(working);
441 if (token_get_type(token) != JSON_KEYWORD) {
442 goto out;
445 if (token_is_keyword(token, "true")) {
446 ret = QOBJECT(qbool_from_int(true));
447 } else if (token_is_keyword(token, "false")) {
448 ret = QOBJECT(qbool_from_int(false));
449 } else {
450 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
451 goto out;
454 qobject_decref(token);
455 QDECREF(*tokens);
456 *tokens = working;
458 return ret;
460 out:
461 qobject_decref(token);
462 QDECREF(working);
464 return NULL;
467 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
469 QObject *token = NULL, *obj;
470 QList *working = qlist_copy(*tokens);
472 if (ap == NULL) {
473 goto out;
476 token = qlist_pop(working);
478 if (token_is_escape(token, "%p")) {
479 obj = va_arg(*ap, QObject *);
480 } else if (token_is_escape(token, "%i")) {
481 obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
482 } else if (token_is_escape(token, "%d")) {
483 obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
484 } else if (token_is_escape(token, "%ld")) {
485 obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
486 } else if (token_is_escape(token, "%lld") ||
487 token_is_escape(token, "%I64d")) {
488 obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
489 } else if (token_is_escape(token, "%s")) {
490 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
491 } else if (token_is_escape(token, "%f")) {
492 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
493 } else {
494 goto out;
497 qobject_decref(token);
498 QDECREF(*tokens);
499 *tokens = working;
501 return obj;
503 out:
504 qobject_decref(token);
505 QDECREF(working);
507 return NULL;
510 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
512 QObject *token, *obj;
513 QList *working = qlist_copy(*tokens);
515 token = qlist_pop(working);
516 switch (token_get_type(token)) {
517 case JSON_STRING:
518 obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
519 break;
520 case JSON_INTEGER:
521 obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
522 break;
523 case JSON_FLOAT:
524 /* FIXME dependent on locale */
525 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
526 break;
527 default:
528 goto out;
531 qobject_decref(token);
532 QDECREF(*tokens);
533 *tokens = working;
535 return obj;
537 out:
538 qobject_decref(token);
539 QDECREF(working);
541 return NULL;
544 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
546 QObject *obj;
548 obj = parse_object(ctxt, tokens, ap);
549 if (obj == NULL) {
550 obj = parse_array(ctxt, tokens, ap);
552 if (obj == NULL) {
553 obj = parse_escape(ctxt, tokens, ap);
555 if (obj == NULL) {
556 obj = parse_keyword(ctxt, tokens);
558 if (obj == NULL) {
559 obj = parse_literal(ctxt, tokens);
562 return obj;
565 QObject *json_parser_parse(QList *tokens, va_list *ap)
567 JSONParserContext ctxt = {};
568 QList *working = qlist_copy(tokens);
569 QObject *result;
571 result = parse_value(&ctxt, &working, ap);
573 QDECREF(working);
575 return result;