4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu-common.h"
16 #include "qapi/qmp/json-lexer.h"
18 #define MAX_TOKEN_SIZE (64ULL << 20)
21 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
22 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
23 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
29 enum json_lexer_state
{
30 IN_ERROR
= 0, /* must really be 0, see json_lexer[] */
50 IN_NEG_NONZERO_NUMBER
,
62 QEMU_BUILD_BUG_ON((int)JSON_MIN
<= (int)IN_START
);
64 #define TERMINAL(state) [0 ... 0x7F] = (state)
66 /* Return whether TERMINAL is a terminal state and the transition to it
67 from OLD_STATE required lookahead. This happens whenever the table
68 below uses the TERMINAL macro. */
69 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
70 (json_lexer[(old_state)][0] == (terminal))
72 static const uint8_t json_lexer
[][256] = {
73 /* Relies on default initialization to IN_ERROR! */
75 /* double quote string */
77 ['0' ... '9'] = IN_DQ_STRING
,
78 ['a' ... 'f'] = IN_DQ_STRING
,
79 ['A' ... 'F'] = IN_DQ_STRING
,
82 ['0' ... '9'] = IN_DQ_UCODE3
,
83 ['a' ... 'f'] = IN_DQ_UCODE3
,
84 ['A' ... 'F'] = IN_DQ_UCODE3
,
87 ['0' ... '9'] = IN_DQ_UCODE2
,
88 ['a' ... 'f'] = IN_DQ_UCODE2
,
89 ['A' ... 'F'] = IN_DQ_UCODE2
,
92 ['0' ... '9'] = IN_DQ_UCODE1
,
93 ['a' ... 'f'] = IN_DQ_UCODE1
,
94 ['A' ... 'F'] = IN_DQ_UCODE1
,
96 [IN_DQ_STRING_ESCAPE
] = {
100 ['r'] = IN_DQ_STRING
,
101 ['t'] = IN_DQ_STRING
,
102 ['/'] = IN_DQ_STRING
,
103 ['\\'] = IN_DQ_STRING
,
104 ['\''] = IN_DQ_STRING
,
105 ['\"'] = IN_DQ_STRING
,
106 ['u'] = IN_DQ_UCODE0
,
109 [1 ... 0xBF] = IN_DQ_STRING
,
110 [0xC2 ... 0xF4] = IN_DQ_STRING
,
111 ['\\'] = IN_DQ_STRING_ESCAPE
,
115 /* single quote string */
117 ['0' ... '9'] = IN_SQ_STRING
,
118 ['a' ... 'f'] = IN_SQ_STRING
,
119 ['A' ... 'F'] = IN_SQ_STRING
,
122 ['0' ... '9'] = IN_SQ_UCODE3
,
123 ['a' ... 'f'] = IN_SQ_UCODE3
,
124 ['A' ... 'F'] = IN_SQ_UCODE3
,
127 ['0' ... '9'] = IN_SQ_UCODE2
,
128 ['a' ... 'f'] = IN_SQ_UCODE2
,
129 ['A' ... 'F'] = IN_SQ_UCODE2
,
132 ['0' ... '9'] = IN_SQ_UCODE1
,
133 ['a' ... 'f'] = IN_SQ_UCODE1
,
134 ['A' ... 'F'] = IN_SQ_UCODE1
,
136 [IN_SQ_STRING_ESCAPE
] = {
137 ['b'] = IN_SQ_STRING
,
138 ['f'] = IN_SQ_STRING
,
139 ['n'] = IN_SQ_STRING
,
140 ['r'] = IN_SQ_STRING
,
141 ['t'] = IN_SQ_STRING
,
142 ['/'] = IN_SQ_STRING
,
143 ['\\'] = IN_SQ_STRING
,
144 ['\''] = IN_SQ_STRING
,
145 ['\"'] = IN_SQ_STRING
,
146 ['u'] = IN_SQ_UCODE0
,
149 [1 ... 0xBF] = IN_SQ_STRING
,
150 [0xC2 ... 0xF4] = IN_SQ_STRING
,
151 ['\\'] = IN_SQ_STRING_ESCAPE
,
152 ['\''] = JSON_STRING
,
157 TERMINAL(JSON_INTEGER
),
158 ['0' ... '9'] = IN_ERROR
,
164 TERMINAL(JSON_FLOAT
),
165 ['0' ... '9'] = IN_DIGITS
,
169 ['0' ... '9'] = IN_DIGITS
,
175 ['0' ... '9'] = IN_DIGITS
,
178 [IN_MANTISSA_DIGITS
] = {
179 TERMINAL(JSON_FLOAT
),
180 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
186 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
190 [IN_NONZERO_NUMBER
] = {
191 TERMINAL(JSON_INTEGER
),
192 ['0' ... '9'] = IN_NONZERO_NUMBER
,
198 [IN_NEG_NONZERO_NUMBER
] = {
200 ['1' ... '9'] = IN_NONZERO_NUMBER
,
205 TERMINAL(JSON_KEYWORD
),
206 ['a' ... 'z'] = IN_KEYWORD
,
212 [' '] = IN_WHITESPACE
,
213 ['\t'] = IN_WHITESPACE
,
214 ['\r'] = IN_WHITESPACE
,
215 ['\n'] = IN_WHITESPACE
,
225 ['l'] = IN_ESCAPE_LL
,
233 ['4'] = IN_ESCAPE_I64
,
237 ['6'] = IN_ESCAPE_I6
,
252 ['"'] = IN_DQ_STRING
,
253 ['\''] = IN_SQ_STRING
,
255 ['1' ... '9'] = IN_NONZERO_NUMBER
,
256 ['-'] = IN_NEG_NONZERO_NUMBER
,
259 ['['] = JSON_LSQUARE
,
260 [']'] = JSON_RSQUARE
,
263 ['a' ... 'z'] = IN_KEYWORD
,
265 [' '] = IN_WHITESPACE
,
266 ['\t'] = IN_WHITESPACE
,
267 ['\r'] = IN_WHITESPACE
,
268 ['\n'] = IN_WHITESPACE
,
272 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
275 lexer
->state
= IN_START
;
276 lexer
->token
= g_string_sized_new(3);
277 lexer
->x
= lexer
->y
= 0;
280 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
, bool flush
)
282 int char_consumed
, new_state
;
291 assert(lexer
->state
<= ARRAY_SIZE(json_lexer
));
292 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
293 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
295 g_string_append_c(lexer
->token
, ch
);
310 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
313 g_string_truncate(lexer
->token
, 0);
314 new_state
= IN_START
;
317 /* XXX: To avoid having previous bad input leaving the parser in an
318 * unresponsive state where we consume unpredictable amounts of
319 * subsequent "good" input, percolate this error state up to the
320 * tokenizer/parser by forcing a NULL object to be emitted, then
323 * Also note that this handling is required for reliable channel
324 * negotiation between QMP and the guest agent, since chr(0xFF)
325 * is placed at the beginning of certain events to ensure proper
326 * delivery when the channel is in an unknown state. chr(0xFF) is
327 * never a valid ASCII/UTF-8 sequence, so this should reliably
328 * induce an error/flush state.
330 lexer
->emit(lexer
, lexer
->token
, JSON_ERROR
, lexer
->x
, lexer
->y
);
331 g_string_truncate(lexer
->token
, 0);
332 new_state
= IN_START
;
333 lexer
->state
= new_state
;
338 lexer
->state
= new_state
;
339 } while (!char_consumed
&& !flush
);
341 /* Do not let a single token grow to an arbitrarily large size,
342 * this is a security consideration.
344 if (lexer
->token
->len
> MAX_TOKEN_SIZE
) {
345 lexer
->emit(lexer
, lexer
->token
, lexer
->state
, lexer
->x
, lexer
->y
);
346 g_string_truncate(lexer
->token
, 0);
347 lexer
->state
= IN_START
;
353 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
357 for (i
= 0; i
< size
; i
++) {
360 err
= json_lexer_feed_char(lexer
, buffer
[i
], false);
369 int json_lexer_flush(JSONLexer
*lexer
)
371 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0, true);
374 void json_lexer_destroy(JSONLexer
*lexer
)
376 g_string_free(lexer
->token
, true);