qjson: Give each of the six structural chars its own token type
[qemu/ar7.git] / qobject / json-parser.c
blob020c6e141e57d3b11286345cbe52e3446ac8f6d6
1 /*
2 * JSON Parser
4 * Copyright IBM, Corp. 2009
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include <stdarg.h>
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
26 typedef struct JSONParserContext
28 Error *err;
29 struct {
30 QObject **buf;
31 size_t pos;
32 size_t count;
33 } tokens;
34 } JSONParserContext;
36 #define BUG_ON(cond) assert(!(cond))
38 /**
39 * TODO
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
49 /**
50 * Token manipulators
52 * tokens are dictionaries that contain a type, a string value, and geometry information
53 * about a token identified by the lexer. These are routines that make working with
54 * these objects a bit easier.
56 static const char *token_get_value(QObject *obj)
58 return qdict_get_str(qobject_to_qdict(obj), "token");
61 static JSONTokenType token_get_type(QObject *obj)
63 return qdict_get_int(qobject_to_qdict(obj), "type");
66 static int token_is_keyword(QObject *obj, const char *value)
68 if (token_get_type(obj) != JSON_KEYWORD) {
69 return 0;
72 return strcmp(token_get_value(obj), value) == 0;
75 static int token_is_escape(QObject *obj, const char *value)
77 if (token_get_type(obj) != JSON_ESCAPE) {
78 return 0;
81 return (strcmp(token_get_value(obj), value) == 0);
84 /**
85 * Error handler
87 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
88 QObject *token, const char *msg, ...)
90 va_list ap;
91 char message[1024];
92 va_start(ap, msg);
93 vsnprintf(message, sizeof(message), msg, ap);
94 va_end(ap);
95 if (ctxt->err) {
96 error_free(ctxt->err);
97 ctxt->err = NULL;
99 error_setg(&ctxt->err, "JSON parse error, %s", message);
103 * String helpers
105 * These helpers are used to unescape strings.
107 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
109 if (wchar <= 0x007F) {
110 BUG_ON(buffer_length < 2);
112 buffer[0] = wchar & 0x7F;
113 buffer[1] = 0;
114 } else if (wchar <= 0x07FF) {
115 BUG_ON(buffer_length < 3);
117 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
118 buffer[1] = 0x80 | (wchar & 0x3F);
119 buffer[2] = 0;
120 } else {
121 BUG_ON(buffer_length < 4);
123 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
124 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
125 buffer[2] = 0x80 | (wchar & 0x3F);
126 buffer[3] = 0;
130 static int hex2decimal(char ch)
132 if (ch >= '0' && ch <= '9') {
133 return (ch - '0');
134 } else if (ch >= 'a' && ch <= 'f') {
135 return 10 + (ch - 'a');
136 } else if (ch >= 'A' && ch <= 'F') {
137 return 10 + (ch - 'A');
140 return -1;
144 * parse_string(): Parse a json string and return a QObject
146 * string
147 * ""
148 * " chars "
149 * chars
150 * char
151 * char chars
152 * char
153 * any-Unicode-character-
154 * except-"-or-\-or-
155 * control-character
156 * \"
157 * \\
158 * \/
159 * \b
160 * \f
161 * \n
162 * \r
163 * \t
164 * \u four-hex-digits
166 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
168 const char *ptr = token_get_value(token);
169 QString *str;
170 int double_quote = 1;
172 if (*ptr == '"') {
173 double_quote = 1;
174 } else {
175 double_quote = 0;
177 ptr++;
179 str = qstring_new();
180 while (*ptr &&
181 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
182 if (*ptr == '\\') {
183 ptr++;
185 switch (*ptr) {
186 case '"':
187 qstring_append(str, "\"");
188 ptr++;
189 break;
190 case '\'':
191 qstring_append(str, "'");
192 ptr++;
193 break;
194 case '\\':
195 qstring_append(str, "\\");
196 ptr++;
197 break;
198 case '/':
199 qstring_append(str, "/");
200 ptr++;
201 break;
202 case 'b':
203 qstring_append(str, "\b");
204 ptr++;
205 break;
206 case 'f':
207 qstring_append(str, "\f");
208 ptr++;
209 break;
210 case 'n':
211 qstring_append(str, "\n");
212 ptr++;
213 break;
214 case 'r':
215 qstring_append(str, "\r");
216 ptr++;
217 break;
218 case 't':
219 qstring_append(str, "\t");
220 ptr++;
221 break;
222 case 'u': {
223 uint16_t unicode_char = 0;
224 char utf8_char[4];
225 int i = 0;
227 ptr++;
229 for (i = 0; i < 4; i++) {
230 if (qemu_isxdigit(*ptr)) {
231 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
232 } else {
233 parse_error(ctxt, token,
234 "invalid hex escape sequence in string");
235 goto out;
237 ptr++;
240 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
241 qstring_append(str, utf8_char);
242 } break;
243 default:
244 parse_error(ctxt, token, "invalid escape sequence in string");
245 goto out;
247 } else {
248 char dummy[2];
250 dummy[0] = *ptr++;
251 dummy[1] = 0;
253 qstring_append(str, dummy);
257 return str;
259 out:
260 QDECREF(str);
261 return NULL;
264 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
266 QObject *token;
267 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
268 token = ctxt->tokens.buf[ctxt->tokens.pos];
269 ctxt->tokens.pos++;
270 return token;
273 /* Note: parser_context_{peek|pop}_token do not increment the
274 * token object's refcount. In both cases the references will continue
275 * to be tracked and cleaned up in parser_context_free(), so do not
276 * attempt to free the token object.
278 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
280 QObject *token;
281 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
282 token = ctxt->tokens.buf[ctxt->tokens.pos];
283 return token;
286 static JSONParserContext parser_context_save(JSONParserContext *ctxt)
288 JSONParserContext saved_ctxt = {0};
289 saved_ctxt.tokens.pos = ctxt->tokens.pos;
290 saved_ctxt.tokens.count = ctxt->tokens.count;
291 saved_ctxt.tokens.buf = ctxt->tokens.buf;
292 return saved_ctxt;
295 static void parser_context_restore(JSONParserContext *ctxt,
296 JSONParserContext saved_ctxt)
298 ctxt->tokens.pos = saved_ctxt.tokens.pos;
299 ctxt->tokens.count = saved_ctxt.tokens.count;
300 ctxt->tokens.buf = saved_ctxt.tokens.buf;
303 static void tokens_append_from_iter(QObject *obj, void *opaque)
305 JSONParserContext *ctxt = opaque;
306 g_assert(ctxt->tokens.pos < ctxt->tokens.count);
307 ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
308 qobject_incref(obj);
311 static JSONParserContext *parser_context_new(QList *tokens)
313 JSONParserContext *ctxt;
314 size_t count;
316 if (!tokens) {
317 return NULL;
320 count = qlist_size(tokens);
321 if (count == 0) {
322 return NULL;
325 ctxt = g_malloc0(sizeof(JSONParserContext));
326 ctxt->tokens.pos = 0;
327 ctxt->tokens.count = count;
328 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
329 qlist_iter(tokens, tokens_append_from_iter, ctxt);
330 ctxt->tokens.pos = 0;
332 return ctxt;
335 /* to support error propagation, ctxt->err must be freed separately */
336 static void parser_context_free(JSONParserContext *ctxt)
338 int i;
339 if (ctxt) {
340 for (i = 0; i < ctxt->tokens.count; i++) {
341 qobject_decref(ctxt->tokens.buf[i]);
343 g_free(ctxt->tokens.buf);
344 g_free(ctxt);
349 * Parsing rules
351 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
353 QObject *key = NULL, *token = NULL, *value, *peek;
354 JSONParserContext saved_ctxt = parser_context_save(ctxt);
356 peek = parser_context_peek_token(ctxt);
357 if (peek == NULL) {
358 parse_error(ctxt, NULL, "premature EOI");
359 goto out;
362 key = parse_value(ctxt, ap);
363 if (!key || qobject_type(key) != QTYPE_QSTRING) {
364 parse_error(ctxt, peek, "key is not a string in object");
365 goto out;
368 token = parser_context_pop_token(ctxt);
369 if (token == NULL) {
370 parse_error(ctxt, NULL, "premature EOI");
371 goto out;
374 if (token_get_type(token) != JSON_COLON) {
375 parse_error(ctxt, token, "missing : in object pair");
376 goto out;
379 value = parse_value(ctxt, ap);
380 if (value == NULL) {
381 parse_error(ctxt, token, "Missing value in dict");
382 goto out;
385 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
387 qobject_decref(key);
389 return 0;
391 out:
392 parser_context_restore(ctxt, saved_ctxt);
393 qobject_decref(key);
395 return -1;
398 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
400 QDict *dict = NULL;
401 QObject *token, *peek;
402 JSONParserContext saved_ctxt = parser_context_save(ctxt);
404 token = parser_context_pop_token(ctxt);
405 if (token == NULL) {
406 goto out;
409 if (token_get_type(token) != JSON_LCURLY) {
410 goto out;
413 dict = qdict_new();
415 peek = parser_context_peek_token(ctxt);
416 if (peek == NULL) {
417 parse_error(ctxt, NULL, "premature EOI");
418 goto out;
421 if (token_get_type(peek) != JSON_RCURLY) {
422 if (parse_pair(ctxt, dict, ap) == -1) {
423 goto out;
426 token = parser_context_pop_token(ctxt);
427 if (token == NULL) {
428 parse_error(ctxt, NULL, "premature EOI");
429 goto out;
432 while (token_get_type(token) != JSON_RCURLY) {
433 if (token_get_type(token) != JSON_COMMA) {
434 parse_error(ctxt, token, "expected separator in dict");
435 goto out;
438 if (parse_pair(ctxt, dict, ap) == -1) {
439 goto out;
442 token = parser_context_pop_token(ctxt);
443 if (token == NULL) {
444 parse_error(ctxt, NULL, "premature EOI");
445 goto out;
448 } else {
449 (void)parser_context_pop_token(ctxt);
452 return QOBJECT(dict);
454 out:
455 parser_context_restore(ctxt, saved_ctxt);
456 QDECREF(dict);
457 return NULL;
460 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
462 QList *list = NULL;
463 QObject *token, *peek;
464 JSONParserContext saved_ctxt = parser_context_save(ctxt);
466 token = parser_context_pop_token(ctxt);
467 if (token == NULL) {
468 goto out;
471 if (token_get_type(token) != JSON_LSQUARE) {
472 goto out;
475 list = qlist_new();
477 peek = parser_context_peek_token(ctxt);
478 if (peek == NULL) {
479 parse_error(ctxt, NULL, "premature EOI");
480 goto out;
483 if (token_get_type(peek) != JSON_RSQUARE) {
484 QObject *obj;
486 obj = parse_value(ctxt, ap);
487 if (obj == NULL) {
488 parse_error(ctxt, token, "expecting value");
489 goto out;
492 qlist_append_obj(list, obj);
494 token = parser_context_pop_token(ctxt);
495 if (token == NULL) {
496 parse_error(ctxt, NULL, "premature EOI");
497 goto out;
500 while (token_get_type(token) != JSON_RSQUARE) {
501 if (token_get_type(token) != JSON_COMMA) {
502 parse_error(ctxt, token, "expected separator in list");
503 goto out;
506 obj = parse_value(ctxt, ap);
507 if (obj == NULL) {
508 parse_error(ctxt, token, "expecting value");
509 goto out;
512 qlist_append_obj(list, obj);
514 token = parser_context_pop_token(ctxt);
515 if (token == NULL) {
516 parse_error(ctxt, NULL, "premature EOI");
517 goto out;
520 } else {
521 (void)parser_context_pop_token(ctxt);
524 return QOBJECT(list);
526 out:
527 parser_context_restore(ctxt, saved_ctxt);
528 QDECREF(list);
529 return NULL;
532 static QObject *parse_keyword(JSONParserContext *ctxt)
534 QObject *token, *ret;
535 JSONParserContext saved_ctxt = parser_context_save(ctxt);
537 token = parser_context_pop_token(ctxt);
538 if (token == NULL) {
539 goto out;
542 if (token_get_type(token) != JSON_KEYWORD) {
543 goto out;
546 if (token_is_keyword(token, "true")) {
547 ret = QOBJECT(qbool_from_bool(true));
548 } else if (token_is_keyword(token, "false")) {
549 ret = QOBJECT(qbool_from_bool(false));
550 } else if (token_is_keyword(token, "null")) {
551 ret = qnull();
552 } else {
553 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
554 goto out;
557 return ret;
559 out:
560 parser_context_restore(ctxt, saved_ctxt);
562 return NULL;
565 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
567 QObject *token = NULL, *obj;
568 JSONParserContext saved_ctxt = parser_context_save(ctxt);
570 if (ap == NULL) {
571 goto out;
574 token = parser_context_pop_token(ctxt);
575 if (token == NULL) {
576 goto out;
579 if (token_is_escape(token, "%p")) {
580 obj = va_arg(*ap, QObject *);
581 } else if (token_is_escape(token, "%i")) {
582 obj = QOBJECT(qbool_from_bool(va_arg(*ap, int)));
583 } else if (token_is_escape(token, "%d")) {
584 obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
585 } else if (token_is_escape(token, "%ld")) {
586 obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
587 } else if (token_is_escape(token, "%lld") ||
588 token_is_escape(token, "%I64d")) {
589 obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
590 } else if (token_is_escape(token, "%s")) {
591 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
592 } else if (token_is_escape(token, "%f")) {
593 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
594 } else {
595 goto out;
598 return obj;
600 out:
601 parser_context_restore(ctxt, saved_ctxt);
603 return NULL;
606 static QObject *parse_literal(JSONParserContext *ctxt)
608 QObject *token, *obj;
609 JSONParserContext saved_ctxt = parser_context_save(ctxt);
611 token = parser_context_pop_token(ctxt);
612 if (token == NULL) {
613 goto out;
616 switch (token_get_type(token)) {
617 case JSON_STRING:
618 obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
619 break;
620 case JSON_INTEGER: {
621 /* A possibility exists that this is a whole-valued float where the
622 * fractional part was left out due to being 0 (.0). It's not a big
623 * deal to treat these as ints in the parser, so long as users of the
624 * resulting QObject know to expect a QInt in place of a QFloat in
625 * cases like these.
627 * However, in some cases these values will overflow/underflow a
628 * QInt/int64 container, thus we should assume these are to be handled
629 * as QFloats/doubles rather than silently changing their values.
631 * strtoll() indicates these instances by setting errno to ERANGE
633 int64_t value;
635 errno = 0; /* strtoll doesn't set errno on success */
636 value = strtoll(token_get_value(token), NULL, 10);
637 if (errno != ERANGE) {
638 obj = QOBJECT(qint_from_int(value));
639 break;
641 /* fall through to JSON_FLOAT */
643 case JSON_FLOAT:
644 /* FIXME dependent on locale */
645 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
646 break;
647 default:
648 goto out;
651 return obj;
653 out:
654 parser_context_restore(ctxt, saved_ctxt);
656 return NULL;
659 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
661 QObject *obj;
663 obj = parse_object(ctxt, ap);
664 if (obj == NULL) {
665 obj = parse_array(ctxt, ap);
667 if (obj == NULL) {
668 obj = parse_escape(ctxt, ap);
670 if (obj == NULL) {
671 obj = parse_keyword(ctxt);
673 if (obj == NULL) {
674 obj = parse_literal(ctxt);
677 return obj;
680 QObject *json_parser_parse(QList *tokens, va_list *ap)
682 return json_parser_parse_err(tokens, ap, NULL);
685 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
687 JSONParserContext *ctxt = parser_context_new(tokens);
688 QObject *result;
690 if (!ctxt) {
691 return NULL;
694 result = parse_value(ctxt, ap);
696 error_propagate(errp, ctxt->err);
698 parser_context_free(ctxt);
700 return result;