4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "json-lexer.h"
21 #define MAX_TOKEN_SIZE (64ULL << 20)
24 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
25 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
26 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
32 enum json_lexer_state
{
53 IN_NEG_NONZERO_NUMBER
,
65 #define TERMINAL(state) [0 ... 0x7F] = (state)
67 /* Return whether TERMINAL is a terminal state and the transition to it
68 from OLD_STATE required lookahead. This happens whenever the table
69 below uses the TERMINAL macro. */
70 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
71 (json_lexer[(old_state)][0] == (terminal))
73 static const uint8_t json_lexer
[][256] = {
74 /* double quote string */
76 ['0' ... '9'] = IN_DQ_STRING
,
77 ['a' ... 'f'] = IN_DQ_STRING
,
78 ['A' ... 'F'] = IN_DQ_STRING
,
81 ['0' ... '9'] = IN_DQ_UCODE3
,
82 ['a' ... 'f'] = IN_DQ_UCODE3
,
83 ['A' ... 'F'] = IN_DQ_UCODE3
,
86 ['0' ... '9'] = IN_DQ_UCODE2
,
87 ['a' ... 'f'] = IN_DQ_UCODE2
,
88 ['A' ... 'F'] = IN_DQ_UCODE2
,
91 ['0' ... '9'] = IN_DQ_UCODE1
,
92 ['a' ... 'f'] = IN_DQ_UCODE1
,
93 ['A' ... 'F'] = IN_DQ_UCODE1
,
95 [IN_DQ_STRING_ESCAPE
] = {
100 ['t'] = IN_DQ_STRING
,
101 ['/'] = IN_DQ_STRING
,
102 ['\\'] = IN_DQ_STRING
,
103 ['\''] = IN_DQ_STRING
,
104 ['\"'] = IN_DQ_STRING
,
105 ['u'] = IN_DQ_UCODE0
,
108 [1 ... 0xFF] = IN_DQ_STRING
,
109 ['\\'] = IN_DQ_STRING_ESCAPE
,
113 /* single quote string */
115 ['0' ... '9'] = IN_SQ_STRING
,
116 ['a' ... 'f'] = IN_SQ_STRING
,
117 ['A' ... 'F'] = IN_SQ_STRING
,
120 ['0' ... '9'] = IN_SQ_UCODE3
,
121 ['a' ... 'f'] = IN_SQ_UCODE3
,
122 ['A' ... 'F'] = IN_SQ_UCODE3
,
125 ['0' ... '9'] = IN_SQ_UCODE2
,
126 ['a' ... 'f'] = IN_SQ_UCODE2
,
127 ['A' ... 'F'] = IN_SQ_UCODE2
,
130 ['0' ... '9'] = IN_SQ_UCODE1
,
131 ['a' ... 'f'] = IN_SQ_UCODE1
,
132 ['A' ... 'F'] = IN_SQ_UCODE1
,
134 [IN_SQ_STRING_ESCAPE
] = {
135 ['b'] = IN_SQ_STRING
,
136 ['f'] = IN_SQ_STRING
,
137 ['n'] = IN_SQ_STRING
,
138 ['r'] = IN_SQ_STRING
,
139 ['t'] = IN_SQ_STRING
,
140 ['/'] = IN_DQ_STRING
,
141 ['\\'] = IN_DQ_STRING
,
142 ['\''] = IN_SQ_STRING
,
143 ['\"'] = IN_SQ_STRING
,
144 ['u'] = IN_SQ_UCODE0
,
147 [1 ... 0xFF] = IN_SQ_STRING
,
148 ['\\'] = IN_SQ_STRING_ESCAPE
,
149 ['\''] = JSON_STRING
,
154 TERMINAL(JSON_INTEGER
),
155 ['0' ... '9'] = IN_ERROR
,
161 TERMINAL(JSON_FLOAT
),
162 ['0' ... '9'] = IN_DIGITS
,
166 ['0' ... '9'] = IN_DIGITS
,
172 ['0' ... '9'] = IN_DIGITS
,
175 [IN_MANTISSA_DIGITS
] = {
176 TERMINAL(JSON_FLOAT
),
177 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
183 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
187 [IN_NONZERO_NUMBER
] = {
188 TERMINAL(JSON_INTEGER
),
189 ['0' ... '9'] = IN_NONZERO_NUMBER
,
195 [IN_NEG_NONZERO_NUMBER
] = {
197 ['1' ... '9'] = IN_NONZERO_NUMBER
,
202 TERMINAL(JSON_KEYWORD
),
203 ['a' ... 'z'] = IN_KEYWORD
,
209 [' '] = IN_WHITESPACE
,
210 ['\t'] = IN_WHITESPACE
,
211 ['\r'] = IN_WHITESPACE
,
212 ['\n'] = IN_WHITESPACE
,
222 ['l'] = IN_ESCAPE_LL
,
230 ['4'] = IN_ESCAPE_I64
,
234 ['6'] = IN_ESCAPE_I6
,
249 ['"'] = IN_DQ_STRING
,
250 ['\''] = IN_SQ_STRING
,
252 ['1' ... '9'] = IN_NONZERO_NUMBER
,
253 ['-'] = IN_NEG_NONZERO_NUMBER
,
254 ['{'] = JSON_OPERATOR
,
255 ['}'] = JSON_OPERATOR
,
256 ['['] = JSON_OPERATOR
,
257 [']'] = JSON_OPERATOR
,
258 [','] = JSON_OPERATOR
,
259 [':'] = JSON_OPERATOR
,
260 ['a' ... 'z'] = IN_KEYWORD
,
262 [' '] = IN_WHITESPACE
,
263 ['\t'] = IN_WHITESPACE
,
264 ['\r'] = IN_WHITESPACE
,
265 ['\n'] = IN_WHITESPACE
,
269 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
272 lexer
->state
= IN_START
;
273 lexer
->token
= qstring_new();
274 lexer
->x
= lexer
->y
= 0;
277 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
, bool flush
)
279 int char_consumed
, new_state
;
288 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
289 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
291 qstring_append_chr(lexer
->token
, ch
);
301 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
303 QDECREF(lexer
->token
);
304 lexer
->token
= qstring_new();
305 new_state
= IN_START
;
308 QDECREF(lexer
->token
);
309 lexer
->token
= qstring_new();
310 new_state
= IN_START
;
315 lexer
->state
= new_state
;
316 } while (!char_consumed
&& !flush
);
318 /* Do not let a single token grow to an arbitrarily large size,
319 * this is a security consideration.
321 if (lexer
->token
->length
> MAX_TOKEN_SIZE
) {
322 lexer
->emit(lexer
, lexer
->token
, lexer
->state
, lexer
->x
, lexer
->y
);
323 QDECREF(lexer
->token
);
324 lexer
->token
= qstring_new();
325 lexer
->state
= IN_START
;
331 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
335 for (i
= 0; i
< size
; i
++) {
338 err
= json_lexer_feed_char(lexer
, buffer
[i
], false);
347 int json_lexer_flush(JSONLexer
*lexer
)
349 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0);
352 void json_lexer_destroy(JSONLexer
*lexer
)
354 QDECREF(lexer
->token
);