4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include "qapi/qmp/qstring.h"
15 #include "qapi/qmp/qlist.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/qmp/qint.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/json-lexer.h"
21 #define MAX_TOKEN_SIZE (64ULL << 20)
24 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
32 enum json_lexer_state
{
33 IN_ERROR
= 0, /* must really be 0, see json_lexer[] */
53 IN_NEG_NONZERO_NUMBER
,
65 QEMU_BUILD_BUG_ON((int)JSON_MIN
<= (int)IN_START
);
67 #define TERMINAL(state) [0 ... 0x7F] = (state)
69 /* Return whether TERMINAL is a terminal state and the transition to it
70 from OLD_STATE required lookahead. This happens whenever the table
71 below uses the TERMINAL macro. */
72 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
73 (json_lexer[(old_state)][0] == (terminal))
75 static const uint8_t json_lexer
[][256] = {
76 /* Relies on default initialization to IN_ERROR! */
78 /* double quote string */
80 ['0' ... '9'] = IN_DQ_STRING
,
81 ['a' ... 'f'] = IN_DQ_STRING
,
82 ['A' ... 'F'] = IN_DQ_STRING
,
85 ['0' ... '9'] = IN_DQ_UCODE3
,
86 ['a' ... 'f'] = IN_DQ_UCODE3
,
87 ['A' ... 'F'] = IN_DQ_UCODE3
,
90 ['0' ... '9'] = IN_DQ_UCODE2
,
91 ['a' ... 'f'] = IN_DQ_UCODE2
,
92 ['A' ... 'F'] = IN_DQ_UCODE2
,
95 ['0' ... '9'] = IN_DQ_UCODE1
,
96 ['a' ... 'f'] = IN_DQ_UCODE1
,
97 ['A' ... 'F'] = IN_DQ_UCODE1
,
99 [IN_DQ_STRING_ESCAPE
] = {
100 ['b'] = IN_DQ_STRING
,
101 ['f'] = IN_DQ_STRING
,
102 ['n'] = IN_DQ_STRING
,
103 ['r'] = IN_DQ_STRING
,
104 ['t'] = IN_DQ_STRING
,
105 ['/'] = IN_DQ_STRING
,
106 ['\\'] = IN_DQ_STRING
,
107 ['\''] = IN_DQ_STRING
,
108 ['\"'] = IN_DQ_STRING
,
109 ['u'] = IN_DQ_UCODE0
,
112 [1 ... 0xBF] = IN_DQ_STRING
,
113 [0xC2 ... 0xF4] = IN_DQ_STRING
,
114 ['\\'] = IN_DQ_STRING_ESCAPE
,
118 /* single quote string */
120 ['0' ... '9'] = IN_SQ_STRING
,
121 ['a' ... 'f'] = IN_SQ_STRING
,
122 ['A' ... 'F'] = IN_SQ_STRING
,
125 ['0' ... '9'] = IN_SQ_UCODE3
,
126 ['a' ... 'f'] = IN_SQ_UCODE3
,
127 ['A' ... 'F'] = IN_SQ_UCODE3
,
130 ['0' ... '9'] = IN_SQ_UCODE2
,
131 ['a' ... 'f'] = IN_SQ_UCODE2
,
132 ['A' ... 'F'] = IN_SQ_UCODE2
,
135 ['0' ... '9'] = IN_SQ_UCODE1
,
136 ['a' ... 'f'] = IN_SQ_UCODE1
,
137 ['A' ... 'F'] = IN_SQ_UCODE1
,
139 [IN_SQ_STRING_ESCAPE
] = {
140 ['b'] = IN_SQ_STRING
,
141 ['f'] = IN_SQ_STRING
,
142 ['n'] = IN_SQ_STRING
,
143 ['r'] = IN_SQ_STRING
,
144 ['t'] = IN_SQ_STRING
,
145 ['/'] = IN_SQ_STRING
,
146 ['\\'] = IN_SQ_STRING
,
147 ['\''] = IN_SQ_STRING
,
148 ['\"'] = IN_SQ_STRING
,
149 ['u'] = IN_SQ_UCODE0
,
152 [1 ... 0xBF] = IN_SQ_STRING
,
153 [0xC2 ... 0xF4] = IN_SQ_STRING
,
154 ['\\'] = IN_SQ_STRING_ESCAPE
,
155 ['\''] = JSON_STRING
,
160 TERMINAL(JSON_INTEGER
),
161 ['0' ... '9'] = IN_ERROR
,
167 TERMINAL(JSON_FLOAT
),
168 ['0' ... '9'] = IN_DIGITS
,
172 ['0' ... '9'] = IN_DIGITS
,
178 ['0' ... '9'] = IN_DIGITS
,
181 [IN_MANTISSA_DIGITS
] = {
182 TERMINAL(JSON_FLOAT
),
183 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
189 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
193 [IN_NONZERO_NUMBER
] = {
194 TERMINAL(JSON_INTEGER
),
195 ['0' ... '9'] = IN_NONZERO_NUMBER
,
201 [IN_NEG_NONZERO_NUMBER
] = {
203 ['1' ... '9'] = IN_NONZERO_NUMBER
,
208 TERMINAL(JSON_KEYWORD
),
209 ['a' ... 'z'] = IN_KEYWORD
,
215 [' '] = IN_WHITESPACE
,
216 ['\t'] = IN_WHITESPACE
,
217 ['\r'] = IN_WHITESPACE
,
218 ['\n'] = IN_WHITESPACE
,
228 ['l'] = IN_ESCAPE_LL
,
236 ['4'] = IN_ESCAPE_I64
,
240 ['6'] = IN_ESCAPE_I6
,
255 ['"'] = IN_DQ_STRING
,
256 ['\''] = IN_SQ_STRING
,
258 ['1' ... '9'] = IN_NONZERO_NUMBER
,
259 ['-'] = IN_NEG_NONZERO_NUMBER
,
260 ['{'] = JSON_OPERATOR
,
261 ['}'] = JSON_OPERATOR
,
262 ['['] = JSON_OPERATOR
,
263 [']'] = JSON_OPERATOR
,
264 [','] = JSON_OPERATOR
,
265 [':'] = JSON_OPERATOR
,
266 ['a' ... 'z'] = IN_KEYWORD
,
268 [' '] = IN_WHITESPACE
,
269 ['\t'] = IN_WHITESPACE
,
270 ['\r'] = IN_WHITESPACE
,
271 ['\n'] = IN_WHITESPACE
,
275 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
278 lexer
->state
= IN_START
;
279 lexer
->token
= qstring_new();
280 lexer
->x
= lexer
->y
= 0;
283 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
, bool flush
)
285 int char_consumed
, new_state
;
294 assert(lexer
->state
<= ARRAY_SIZE(json_lexer
));
295 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
296 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
298 qstring_append_chr(lexer
->token
, ch
);
308 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
311 QDECREF(lexer
->token
);
312 lexer
->token
= qstring_new();
313 new_state
= IN_START
;
316 /* XXX: To avoid having previous bad input leaving the parser in an
317 * unresponsive state where we consume unpredictable amounts of
318 * subsequent "good" input, percolate this error state up to the
319 * tokenizer/parser by forcing a NULL object to be emitted, then
322 * Also note that this handling is required for reliable channel
323 * negotiation between QMP and the guest agent, since chr(0xFF)
324 * is placed at the beginning of certain events to ensure proper
325 * delivery when the channel is in an unknown state. chr(0xFF) is
326 * never a valid ASCII/UTF-8 sequence, so this should reliably
327 * induce an error/flush state.
329 lexer
->emit(lexer
, lexer
->token
, JSON_ERROR
, lexer
->x
, lexer
->y
);
330 QDECREF(lexer
->token
);
331 lexer
->token
= qstring_new();
332 new_state
= IN_START
;
333 lexer
->state
= new_state
;
338 lexer
->state
= new_state
;
339 } while (!char_consumed
&& !flush
);
341 /* Do not let a single token grow to an arbitrarily large size,
342 * this is a security consideration.
344 if (lexer
->token
->length
> MAX_TOKEN_SIZE
) {
345 lexer
->emit(lexer
, lexer
->token
, lexer
->state
, lexer
->x
, lexer
->y
);
346 QDECREF(lexer
->token
);
347 lexer
->token
= qstring_new();
348 lexer
->state
= IN_START
;
354 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
358 for (i
= 0; i
< size
; i
++) {
361 err
= json_lexer_feed_char(lexer
, buffer
[i
], false);
370 int json_lexer_flush(JSONLexer
*lexer
)
372 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0, true);
375 void json_lexer_destroy(JSONLexer
*lexer
)
377 QDECREF(lexer
->token
);