4 * Copyright IBM, Corp. 2009
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
18 #include "qemu-common.h"
19 #include "json-lexer.h"
22 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
30 enum json_lexer_state
{
51 IN_NEG_NONZERO_NUMBER
,
63 #define TERMINAL(state) [0 ... 0x7F] = (state)
65 /* Return whether TERMINAL is a terminal state and the transition to it
66 from OLD_STATE required lookahead. This happens whenever the table
67 below uses the TERMINAL macro. */
68 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
69 (json_lexer[(old_state)][0] == (terminal))
71 static const uint8_t json_lexer
[][256] = {
72 /* double quote string */
74 ['0' ... '9'] = IN_DQ_STRING
,
75 ['a' ... 'f'] = IN_DQ_STRING
,
76 ['A' ... 'F'] = IN_DQ_STRING
,
79 ['0' ... '9'] = IN_DQ_UCODE3
,
80 ['a' ... 'f'] = IN_DQ_UCODE3
,
81 ['A' ... 'F'] = IN_DQ_UCODE3
,
84 ['0' ... '9'] = IN_DQ_UCODE2
,
85 ['a' ... 'f'] = IN_DQ_UCODE2
,
86 ['A' ... 'F'] = IN_DQ_UCODE2
,
89 ['0' ... '9'] = IN_DQ_UCODE1
,
90 ['a' ... 'f'] = IN_DQ_UCODE1
,
91 ['A' ... 'F'] = IN_DQ_UCODE1
,
93 [IN_DQ_STRING_ESCAPE
] = {
100 ['\\'] = IN_DQ_STRING
,
101 ['\''] = IN_DQ_STRING
,
102 ['\"'] = IN_DQ_STRING
,
103 ['u'] = IN_DQ_UCODE0
,
106 [1 ... 0xFF] = IN_DQ_STRING
,
107 ['\\'] = IN_DQ_STRING_ESCAPE
,
111 /* single quote string */
113 ['0' ... '9'] = IN_SQ_STRING
,
114 ['a' ... 'f'] = IN_SQ_STRING
,
115 ['A' ... 'F'] = IN_SQ_STRING
,
118 ['0' ... '9'] = IN_SQ_UCODE3
,
119 ['a' ... 'f'] = IN_SQ_UCODE3
,
120 ['A' ... 'F'] = IN_SQ_UCODE3
,
123 ['0' ... '9'] = IN_SQ_UCODE2
,
124 ['a' ... 'f'] = IN_SQ_UCODE2
,
125 ['A' ... 'F'] = IN_SQ_UCODE2
,
128 ['0' ... '9'] = IN_SQ_UCODE1
,
129 ['a' ... 'f'] = IN_SQ_UCODE1
,
130 ['A' ... 'F'] = IN_SQ_UCODE1
,
132 [IN_SQ_STRING_ESCAPE
] = {
133 ['b'] = IN_SQ_STRING
,
134 ['f'] = IN_SQ_STRING
,
135 ['n'] = IN_SQ_STRING
,
136 ['r'] = IN_SQ_STRING
,
137 ['t'] = IN_SQ_STRING
,
138 ['/'] = IN_DQ_STRING
,
139 ['\\'] = IN_DQ_STRING
,
140 ['\''] = IN_SQ_STRING
,
141 ['\"'] = IN_SQ_STRING
,
142 ['u'] = IN_SQ_UCODE0
,
145 [1 ... 0xFF] = IN_SQ_STRING
,
146 ['\\'] = IN_SQ_STRING_ESCAPE
,
147 ['\''] = JSON_STRING
,
152 TERMINAL(JSON_INTEGER
),
153 ['0' ... '9'] = IN_ERROR
,
159 TERMINAL(JSON_FLOAT
),
160 ['0' ... '9'] = IN_DIGITS
,
164 ['0' ... '9'] = IN_DIGITS
,
170 ['0' ... '9'] = IN_DIGITS
,
173 [IN_MANTISSA_DIGITS
] = {
174 TERMINAL(JSON_FLOAT
),
175 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
181 ['0' ... '9'] = IN_MANTISSA_DIGITS
,
185 [IN_NONZERO_NUMBER
] = {
186 TERMINAL(JSON_INTEGER
),
187 ['0' ... '9'] = IN_NONZERO_NUMBER
,
193 [IN_NEG_NONZERO_NUMBER
] = {
195 ['1' ... '9'] = IN_NONZERO_NUMBER
,
200 TERMINAL(JSON_KEYWORD
),
201 ['a' ... 'z'] = IN_KEYWORD
,
207 [' '] = IN_WHITESPACE
,
208 ['\t'] = IN_WHITESPACE
,
209 ['\r'] = IN_WHITESPACE
,
210 ['\n'] = IN_WHITESPACE
,
220 ['l'] = IN_ESCAPE_LL
,
228 ['4'] = IN_ESCAPE_I64
,
232 ['6'] = IN_ESCAPE_I6
,
247 ['"'] = IN_DQ_STRING
,
248 ['\''] = IN_SQ_STRING
,
250 ['1' ... '9'] = IN_NONZERO_NUMBER
,
251 ['-'] = IN_NEG_NONZERO_NUMBER
,
252 ['{'] = JSON_OPERATOR
,
253 ['}'] = JSON_OPERATOR
,
254 ['['] = JSON_OPERATOR
,
255 [']'] = JSON_OPERATOR
,
256 [','] = JSON_OPERATOR
,
257 [':'] = JSON_OPERATOR
,
258 ['a' ... 'z'] = IN_KEYWORD
,
260 [' '] = IN_WHITESPACE
,
261 ['\t'] = IN_WHITESPACE
,
262 ['\r'] = IN_WHITESPACE
,
263 ['\n'] = IN_WHITESPACE
,
267 void json_lexer_init(JSONLexer
*lexer
, JSONLexerEmitter func
)
270 lexer
->state
= IN_START
;
271 lexer
->token
= qstring_new();
272 lexer
->x
= lexer
->y
= 0;
275 static int json_lexer_feed_char(JSONLexer
*lexer
, char ch
)
277 int char_consumed
, new_state
;
286 new_state
= json_lexer
[lexer
->state
][(uint8_t)ch
];
287 char_consumed
= !TERMINAL_NEEDED_LOOKAHEAD(lexer
->state
, new_state
);
289 qstring_append_chr(lexer
->token
, ch
);
299 lexer
->emit(lexer
, lexer
->token
, new_state
, lexer
->x
, lexer
->y
);
301 QDECREF(lexer
->token
);
302 lexer
->token
= qstring_new();
303 new_state
= IN_START
;
310 lexer
->state
= new_state
;
311 } while (!char_consumed
);
315 int json_lexer_feed(JSONLexer
*lexer
, const char *buffer
, size_t size
)
319 for (i
= 0; i
< size
; i
++) {
322 err
= json_lexer_feed_char(lexer
, buffer
[i
]);
331 int json_lexer_flush(JSONLexer
*lexer
)
333 return lexer
->state
== IN_START
? 0 : json_lexer_feed_char(lexer
, 0);
336 void json_lexer_destroy(JSONLexer
*lexer
)
338 QDECREF(lexer
->token
);