dmg: fix reading of uncompressed chunks
[qemu/aliguori-queue.git] / json-lexer.c
blob9d649205a7f5e586875d04632e29cb68e1d08365
1 /*
2 * JSON lexer
4 * Copyright IBM, Corp. 2009
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include "qstring.h"
15 #include "qlist.h"
16 #include "qdict.h"
17 #include "qint.h"
18 #include "qemu-common.h"
19 #include "json-lexer.h"
22 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25 * [{}\[\],:]
26 * [a-z]+
30 enum json_lexer_state {
31 ERROR = 0,
32 IN_DONE_STRING,
33 IN_DQ_UCODE3,
34 IN_DQ_UCODE2,
35 IN_DQ_UCODE1,
36 IN_DQ_UCODE0,
37 IN_DQ_STRING_ESCAPE,
38 IN_DQ_STRING,
39 IN_SQ_UCODE3,
40 IN_SQ_UCODE2,
41 IN_SQ_UCODE1,
42 IN_SQ_UCODE0,
43 IN_SQ_STRING_ESCAPE,
44 IN_SQ_STRING,
45 IN_ZERO,
46 IN_DIGITS,
47 IN_DIGIT,
48 IN_EXP_E,
49 IN_MANTISSA,
50 IN_MANTISSA_DIGITS,
51 IN_NONZERO_NUMBER,
52 IN_NEG_NONZERO_NUMBER,
53 IN_KEYWORD,
54 IN_ESCAPE,
55 IN_ESCAPE_L,
56 IN_ESCAPE_LL,
57 IN_ESCAPE_I,
58 IN_ESCAPE_I6,
59 IN_ESCAPE_I64,
60 IN_ESCAPE_DONE,
61 IN_WHITESPACE,
62 IN_OPERATOR_DONE,
63 IN_START,
66 #define TERMINAL(state) [0 ... 0x7F] = (state)
68 static const uint8_t json_lexer[][256] = {
69 [IN_DONE_STRING] = {
70 TERMINAL(JSON_STRING),
73 /* double quote string */
74 [IN_DQ_UCODE3] = {
75 ['0' ... '9'] = IN_DQ_STRING,
76 ['a' ... 'f'] = IN_DQ_STRING,
77 ['A' ... 'F'] = IN_DQ_STRING,
79 [IN_DQ_UCODE2] = {
80 ['0' ... '9'] = IN_DQ_UCODE3,
81 ['a' ... 'f'] = IN_DQ_UCODE3,
82 ['A' ... 'F'] = IN_DQ_UCODE3,
84 [IN_DQ_UCODE1] = {
85 ['0' ... '9'] = IN_DQ_UCODE2,
86 ['a' ... 'f'] = IN_DQ_UCODE2,
87 ['A' ... 'F'] = IN_DQ_UCODE2,
89 [IN_DQ_UCODE0] = {
90 ['0' ... '9'] = IN_DQ_UCODE1,
91 ['a' ... 'f'] = IN_DQ_UCODE1,
92 ['A' ... 'F'] = IN_DQ_UCODE1,
94 [IN_DQ_STRING_ESCAPE] = {
95 ['b'] = IN_DQ_STRING,
96 ['f'] = IN_DQ_STRING,
97 ['n'] = IN_DQ_STRING,
98 ['r'] = IN_DQ_STRING,
99 ['t'] = IN_DQ_STRING,
100 ['\''] = IN_DQ_STRING,
101 ['\"'] = IN_DQ_STRING,
102 ['u'] = IN_DQ_UCODE0,
104 [IN_DQ_STRING] = {
105 [1 ... 0xFF] = IN_DQ_STRING,
106 ['\\'] = IN_DQ_STRING_ESCAPE,
107 ['"'] = IN_DONE_STRING,
110 /* single quote string */
111 [IN_SQ_UCODE3] = {
112 ['0' ... '9'] = IN_SQ_STRING,
113 ['a' ... 'f'] = IN_SQ_STRING,
114 ['A' ... 'F'] = IN_SQ_STRING,
116 [IN_SQ_UCODE2] = {
117 ['0' ... '9'] = IN_SQ_UCODE3,
118 ['a' ... 'f'] = IN_SQ_UCODE3,
119 ['A' ... 'F'] = IN_SQ_UCODE3,
121 [IN_SQ_UCODE1] = {
122 ['0' ... '9'] = IN_SQ_UCODE2,
123 ['a' ... 'f'] = IN_SQ_UCODE2,
124 ['A' ... 'F'] = IN_SQ_UCODE2,
126 [IN_SQ_UCODE0] = {
127 ['0' ... '9'] = IN_SQ_UCODE1,
128 ['a' ... 'f'] = IN_SQ_UCODE1,
129 ['A' ... 'F'] = IN_SQ_UCODE1,
131 [IN_SQ_STRING_ESCAPE] = {
132 ['b'] = IN_SQ_STRING,
133 ['f'] = IN_SQ_STRING,
134 ['n'] = IN_SQ_STRING,
135 ['r'] = IN_SQ_STRING,
136 ['t'] = IN_SQ_STRING,
137 ['\''] = IN_SQ_STRING,
138 ['\"'] = IN_SQ_STRING,
139 ['u'] = IN_SQ_UCODE0,
141 [IN_SQ_STRING] = {
142 [1 ... 0xFF] = IN_SQ_STRING,
143 ['\\'] = IN_SQ_STRING_ESCAPE,
144 ['\''] = IN_DONE_STRING,
147 /* Zero */
148 [IN_ZERO] = {
149 TERMINAL(JSON_INTEGER),
150 ['0' ... '9'] = ERROR,
151 ['.'] = IN_MANTISSA,
154 /* Float */
155 [IN_DIGITS] = {
156 TERMINAL(JSON_FLOAT),
157 ['0' ... '9'] = IN_DIGITS,
160 [IN_DIGIT] = {
161 ['0' ... '9'] = IN_DIGITS,
164 [IN_EXP_E] = {
165 ['-'] = IN_DIGIT,
166 ['+'] = IN_DIGIT,
167 ['0' ... '9'] = IN_DIGITS,
170 [IN_MANTISSA_DIGITS] = {
171 TERMINAL(JSON_FLOAT),
172 ['0' ... '9'] = IN_MANTISSA_DIGITS,
173 ['e'] = IN_EXP_E,
174 ['E'] = IN_EXP_E,
177 [IN_MANTISSA] = {
178 ['0' ... '9'] = IN_MANTISSA_DIGITS,
181 /* Number */
182 [IN_NONZERO_NUMBER] = {
183 TERMINAL(JSON_INTEGER),
184 ['0' ... '9'] = IN_NONZERO_NUMBER,
185 ['e'] = IN_EXP_E,
186 ['E'] = IN_EXP_E,
187 ['.'] = IN_MANTISSA,
190 [IN_NEG_NONZERO_NUMBER] = {
191 ['0'] = IN_ZERO,
192 ['1' ... '9'] = IN_NONZERO_NUMBER,
195 /* keywords */
196 [IN_KEYWORD] = {
197 TERMINAL(JSON_KEYWORD),
198 ['a' ... 'z'] = IN_KEYWORD,
201 /* whitespace */
202 [IN_WHITESPACE] = {
203 TERMINAL(JSON_SKIP),
204 [' '] = IN_WHITESPACE,
205 ['\t'] = IN_WHITESPACE,
206 ['\r'] = IN_WHITESPACE,
207 ['\n'] = IN_WHITESPACE,
210 /* operator */
211 [IN_OPERATOR_DONE] = {
212 TERMINAL(JSON_OPERATOR),
215 /* escape */
216 [IN_ESCAPE_DONE] = {
217 TERMINAL(JSON_ESCAPE),
220 [IN_ESCAPE_LL] = {
221 ['d'] = IN_ESCAPE_DONE,
224 [IN_ESCAPE_L] = {
225 ['d'] = IN_ESCAPE_DONE,
226 ['l'] = IN_ESCAPE_LL,
229 [IN_ESCAPE_I64] = {
230 ['d'] = IN_ESCAPE_DONE,
233 [IN_ESCAPE_I6] = {
234 ['4'] = IN_ESCAPE_I64,
237 [IN_ESCAPE_I] = {
238 ['6'] = IN_ESCAPE_I6,
241 [IN_ESCAPE] = {
242 ['d'] = IN_ESCAPE_DONE,
243 ['i'] = IN_ESCAPE_DONE,
244 ['p'] = IN_ESCAPE_DONE,
245 ['s'] = IN_ESCAPE_DONE,
246 ['f'] = IN_ESCAPE_DONE,
247 ['l'] = IN_ESCAPE_L,
248 ['I'] = IN_ESCAPE_I,
251 /* top level rule */
252 [IN_START] = {
253 ['"'] = IN_DQ_STRING,
254 ['\''] = IN_SQ_STRING,
255 ['0'] = IN_ZERO,
256 ['1' ... '9'] = IN_NONZERO_NUMBER,
257 ['-'] = IN_NEG_NONZERO_NUMBER,
258 ['{'] = IN_OPERATOR_DONE,
259 ['}'] = IN_OPERATOR_DONE,
260 ['['] = IN_OPERATOR_DONE,
261 [']'] = IN_OPERATOR_DONE,
262 [','] = IN_OPERATOR_DONE,
263 [':'] = IN_OPERATOR_DONE,
264 ['a' ... 'z'] = IN_KEYWORD,
265 ['%'] = IN_ESCAPE,
266 [' '] = IN_WHITESPACE,
267 ['\t'] = IN_WHITESPACE,
268 ['\r'] = IN_WHITESPACE,
269 ['\n'] = IN_WHITESPACE,
273 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
275 lexer->emit = func;
276 lexer->state = IN_START;
277 lexer->token = qstring_new();
280 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
282 char buf[2];
284 lexer->x++;
285 if (ch == '\n') {
286 lexer->x = 0;
287 lexer->y++;
290 lexer->state = json_lexer[lexer->state][(uint8_t)ch];
292 switch (lexer->state) {
293 case JSON_OPERATOR:
294 case JSON_ESCAPE:
295 case JSON_INTEGER:
296 case JSON_FLOAT:
297 case JSON_KEYWORD:
298 case JSON_STRING:
299 lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
300 case JSON_SKIP:
301 lexer->state = json_lexer[IN_START][(uint8_t)ch];
302 QDECREF(lexer->token);
303 lexer->token = qstring_new();
304 break;
305 case ERROR:
306 return -EINVAL;
307 default:
308 break;
311 buf[0] = ch;
312 buf[1] = 0;
314 qstring_append(lexer->token, buf);
316 return 0;
319 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
321 size_t i;
323 for (i = 0; i < size; i++) {
324 int err;
326 err = json_lexer_feed_char(lexer, buffer[i]);
327 if (err < 0) {
328 return err;
332 return 0;
335 int json_lexer_flush(JSONLexer *lexer)
337 return json_lexer_feed_char(lexer, 0);
340 void json_lexer_destroy(JSONLexer *lexer)
342 QDECREF(lexer->token);