3 * The following is a full featured parser for configuration files using
4 * basic format "key = value".
6 * Well, it's big, but it can properly manage spaces, empty lines,
7 * single and double-quoted strings, hex numbers, comments, semicolons
8 * and more. It also happens to be much more robust than the original one.
19 STATE_BEGIN
, /**< initial state */
20 STATE_COMMENT
, /**< currently in a comment */
21 STATE_KEY
, /**< (key) currently in a key */
22 STATE_KEYBS
, /**< (key) backslash */
23 STATE_KEYBSX1
, /**< (key) first character of a hex value (\\x) */
24 STATE_KEYBSX2
, /**< (key) second character of a hex value (\\x) */
25 STATE_KEYSQ
, /**< (key) currently in a simple quoted key */
26 STATE_KEYDQ
, /**< (key) currently in a double quoted key */
27 STATE_KEYDQBS
, /**< (key) backslash while in double quotes */
28 STATE_KEYDQBSX1
, /**< (key) first value of \\x in double quotes */
29 STATE_KEYDQBSX2
, /**< (key) second value of \\x in double quotes */
30 STATE_BEQ
, /**< before '=' between key and value */
31 STATE_AEQ
, /**< after '=' between key and value */
32 STATE_VALUE
, /**< (value) same as (key) things above, for values */
33 STATE_VALBS
, /**< (value) backslash */
34 STATE_VALBSX1
, /**< (value) first character of an hex value (\\x) */
35 STATE_VALBSX2
, /**< (value) second character of a hex value (\\x) */
36 STATE_VALSQ
, /**< (value) currently in a simple quoted value */
37 STATE_VALDQ
, /**< (value) currently in a double quoted value */
38 STATE_VALDQBS
, /**< (value) backslash while in double quotes */
39 STATE_VALDQBSX1
, /**< (value) first value of \\x in double quotes */
40 STATE_VALDQBSX2
, /**< (value) second values of \\x in double quotes */
41 STATE_VALEND
, /**< end of a value, ready to take a new key */
42 ACTION_KEY
= 0x0100, /**< key complete */
43 ACTION_VALUE
= 0x0200, /**< value complete */
44 ACTION_ERROR
= 0x0400, /**< caught an error */
45 ACTION_STORE
= 0x1000, /**< character must be stored as is */
46 ACTION_STORE_MOD
= 0x2000, /**< store filtered character */
47 ACTION_STORE_HEX1
= 0x4000, /**< store first hex digit */
48 ACTION_STORE_HEX2
= 0x8000 /**< store second hex digit */
51 #define HEX_INDICES(st) \
52 ['0'] = (st), ['1'] = (st), ['2'] = (st), ['3'] = (st), \
53 ['4'] = (st), ['5'] = (st), ['6'] = (st), ['7'] = (st), \
54 ['8'] = (st), ['9'] = (st), ['a'] = (st), ['b'] = (st), \
55 ['c'] = (st), ['d'] = (st), ['e'] = (st), ['f'] = (st), \
56 ['A'] = (st), ['B'] = (st), ['C'] = (st), ['D'] = (st), \
57 ['E'] = (st), ['F'] = (st)
60 * ckvp_parse() takes the current state (ckvp), a buffer in[size] and returns
61 * the number of characters processed.
63 * Each time ckvp_parse() returns, ckvp->state must be checked. If no error
64 * occured, ckvp_parse() must be called again with the remaining characters
65 * if any, otherwise the next input buffer.
67 * At the end of input, ckvp_parse() must be called with a zero size.
69 * This function doesn't allocate anything.
71 * @param[in,out] ckvp Current state.
72 * @param size Number of characters in buffer "in".
73 * @param in Input buffer to parse.
74 * @return Number of characters processed.
76 size_t ckvp_parse(ckvp_t
*ckvp
, size_t size
, const char in
[])
79 * State machine definition:
81 * st[current_state][current_character] = next state | action
83 * Special indices for current_character are:
85 * - 0x100 for action on characters not in the list
86 * - 0x101 for action when encountering end of input while in the
87 * current state (often ACTION_ERROR)
89 static const unsigned int st
[][0x102] = {
91 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
92 [0x101] = ACTION_ERROR
100 ['\v'] = STATE_BEGIN
,
101 [';'] = (STATE_ERROR
| ACTION_ERROR
),
102 ['#'] = STATE_COMMENT
,
103 ['\''] = STATE_KEYSQ
,
105 ['\\'] = STATE_KEYBS
,
106 ['='] = (STATE_ERROR
| ACTION_ERROR
),
107 [0x100] = (STATE_KEY
| ACTION_STORE
),
111 ['\n'] = STATE_BEGIN
,
112 [0x100] = STATE_COMMENT
,
116 [' '] = (STATE_BEQ
| ACTION_KEY
),
117 ['\f'] = (STATE_BEQ
| ACTION_KEY
),
118 ['\n'] = (STATE_BEQ
| ACTION_KEY
),
119 ['\r'] = (STATE_BEQ
| ACTION_KEY
),
120 ['\t'] = (STATE_BEQ
| ACTION_KEY
),
121 ['\v'] = (STATE_BEQ
| ACTION_KEY
),
122 ['\''] = STATE_KEYSQ
,
123 ['\"'] = STATE_KEYDQ
,
124 [';'] = (STATE_ERROR
| ACTION_ERROR
),
125 ['='] = (STATE_AEQ
| ACTION_KEY
),
126 ['#'] = (STATE_ERROR
| ACTION_ERROR
),
127 ['\\'] = STATE_KEYBS
,
128 [0x100] = (STATE_KEY
| ACTION_STORE
),
129 [0x101] = ACTION_ERROR
132 ['f'] = (STATE_KEY
| ACTION_STORE_MOD
),
133 ['n'] = (STATE_KEY
| ACTION_STORE_MOD
),
134 ['r'] = (STATE_KEY
| ACTION_STORE_MOD
),
135 ['t'] = (STATE_KEY
| ACTION_STORE_MOD
),
136 ['v'] = (STATE_KEY
| ACTION_STORE_MOD
),
137 ['x'] = STATE_KEYBSX1
,
139 [0x100] = (STATE_KEY
| ACTION_STORE
),
140 [0x101] = ACTION_ERROR
143 HEX_INDICES(STATE_KEYBSX2
| ACTION_STORE_HEX1
),
144 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
145 [0x101] = ACTION_ERROR
148 HEX_INDICES(STATE_KEY
| ACTION_STORE_HEX2
),
149 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
150 [0x101] = ACTION_ERROR
154 [0x100] = (STATE_KEYSQ
| ACTION_STORE
),
155 [0x101] = ACTION_ERROR
159 ['\\'] = STATE_KEYDQBS
,
160 [0x100] = (STATE_KEYDQ
| ACTION_STORE
),
161 [0x101] = ACTION_ERROR
164 ['f'] = (STATE_KEYDQ
| ACTION_STORE_MOD
),
165 ['n'] = (STATE_KEYDQ
| ACTION_STORE_MOD
),
166 ['r'] = (STATE_KEYDQ
| ACTION_STORE_MOD
),
167 ['t'] = (STATE_KEYDQ
| ACTION_STORE_MOD
),
168 ['v'] = (STATE_KEYDQ
| ACTION_STORE_MOD
),
169 ['x'] = STATE_KEYDQBSX1
,
170 ['\n'] = STATE_KEYDQ
,
171 [0x100] = (STATE_KEYDQ
| ACTION_STORE
),
172 [0x101] = ACTION_ERROR
174 [STATE_KEYDQBSX1
] = {
175 HEX_INDICES(STATE_KEYDQBSX2
| ACTION_STORE_HEX1
),
176 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
177 [0x101] = ACTION_ERROR
179 [STATE_KEYDQBSX2
] = {
180 HEX_INDICES(STATE_KEYDQ
| ACTION_STORE_HEX2
),
181 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
182 [0x101] = ACTION_ERROR
192 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
193 [0x101] = ACTION_ERROR
202 ['\''] = STATE_VALSQ
,
203 ['\"'] = STATE_VALDQ
,
204 ['\\'] = STATE_VALBS
,
205 ['='] = (STATE_ERROR
| ACTION_ERROR
),
206 ['#'] = (STATE_COMMENT
| ACTION_VALUE
),
207 [';'] = (STATE_BEGIN
| ACTION_VALUE
),
208 [0x100] = (STATE_VALUE
| ACTION_STORE
),
209 [0x101] = ACTION_VALUE
212 [' '] = (STATE_VALEND
| ACTION_VALUE
),
213 ['\f'] = (STATE_VALEND
| ACTION_VALUE
),
214 ['\n'] = (STATE_BEGIN
| ACTION_VALUE
),
215 ['\r'] = (STATE_VALEND
| ACTION_VALUE
),
216 ['\t'] = (STATE_VALEND
| ACTION_VALUE
),
217 ['\v'] = (STATE_VALEND
| ACTION_VALUE
),
218 ['\''] = STATE_VALSQ
,
219 ['\"'] = STATE_VALDQ
,
220 [';'] = (STATE_BEGIN
| ACTION_VALUE
),
221 ['='] = (STATE_ERROR
| ACTION_ERROR
),
222 ['#'] = (STATE_COMMENT
| ACTION_VALUE
),
223 ['\\'] = STATE_VALBS
,
224 [0x100] = (STATE_VALUE
| ACTION_STORE
),
225 [0x101] = ACTION_VALUE
228 ['f'] = (STATE_VALUE
| ACTION_STORE_MOD
),
229 ['n'] = (STATE_VALUE
| ACTION_STORE_MOD
),
230 ['r'] = (STATE_VALUE
| ACTION_STORE_MOD
),
231 ['t'] = (STATE_VALUE
| ACTION_STORE_MOD
),
232 ['v'] = (STATE_VALUE
| ACTION_STORE_MOD
),
233 ['x'] = STATE_VALBSX1
,
234 ['\n'] = STATE_VALUE
,
235 [0x100] = (STATE_VALUE
| ACTION_STORE
),
236 [0x101] = ACTION_ERROR
239 HEX_INDICES(STATE_VALBSX2
| ACTION_STORE_HEX1
),
240 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
241 [0x101] = ACTION_ERROR
244 HEX_INDICES(STATE_VALUE
| ACTION_STORE_HEX2
),
245 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
246 [0x101] = ACTION_ERROR
249 ['\''] = STATE_VALUE
,
250 [0x100] = (STATE_VALSQ
| ACTION_STORE
),
251 [0x101] = ACTION_ERROR
255 ['\\'] = STATE_VALDQBS
,
256 [0x100] = (STATE_VALDQ
| ACTION_STORE
),
257 [0x101] = ACTION_ERROR
260 ['f'] = (STATE_VALDQ
| ACTION_STORE_MOD
),
261 ['n'] = (STATE_VALDQ
| ACTION_STORE_MOD
),
262 ['r'] = (STATE_VALDQ
| ACTION_STORE_MOD
),
263 ['t'] = (STATE_VALDQ
| ACTION_STORE_MOD
),
264 ['v'] = (STATE_VALDQ
| ACTION_STORE_MOD
),
265 ['x'] = STATE_VALDQBSX1
,
266 ['\n'] = STATE_VALDQ
,
267 [0x100] = (STATE_VALDQ
| ACTION_STORE
),
268 [0x101] = ACTION_ERROR
270 [STATE_VALDQBSX1
] = {
271 HEX_INDICES(STATE_VALDQBSX2
| ACTION_STORE_HEX1
),
272 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
273 [0x101] = ACTION_ERROR
275 [STATE_VALDQBSX2
] = {
276 HEX_INDICES(STATE_VALDQ
| ACTION_STORE_HEX2
),
277 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
278 [0x101] = ACTION_ERROR
281 [' '] = STATE_VALEND
,
282 ['\f'] = STATE_VALEND
,
283 ['\n'] = STATE_BEGIN
,
284 ['\r'] = STATE_VALEND
,
285 ['\t'] = STATE_VALEND
,
286 ['\v'] = STATE_VALEND
,
288 ['#'] = STATE_COMMENT
,
289 [0x100] = (STATE_ERROR
| ACTION_ERROR
),
293 static const unsigned char cv
[] = {
294 ['f'] = '\f', ['n'] = '\n', ['r'] = '\r',
295 ['t'] = '\t', ['v'] = '\v'
297 static const unsigned char hb
[] = {
298 ['0'] = 0x0, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3,
299 ['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7,
300 ['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb,
301 ['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf,
302 ['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd,
303 ['E'] = 0xe, ['F'] = 0xf
307 assert(sizeof(unsigned int) >= 4);
308 assert(ckvp
!= NULL
);
310 if (ckvp
->state
!= CKVP_NONE
) {
312 ckvp
->state
= CKVP_NONE
;
314 if (ckvp
->internal
& 0x00010000) {
318 else if (ckvp
->internal
& 0x00020000)
320 ckvp
->internal
&= ~(0x00030000);
322 assert((ckvp
->internal
& 0x00ff) != 0x00);
323 assert((ckvp
->internal
& 0x00ff) <= STATE_VALEND
);
324 if (st
[(ckvp
->internal
& 0x00ff)][0x101] & ACTION_ERROR
)
325 ckvp
->state
= CKVP_ERROR
;
326 else if (st
[(ckvp
->internal
& 0x00ff)][0x101] & ACTION_VALUE
)
327 ckvp
->state
= CKVP_OUT_VALUE
;
330 for (i
= 0; (i
< size
); ++i
) {
331 unsigned char c
= in
[i
];
334 assert((ckvp
->internal
& 0x00ff) != 0x00);
335 assert((ckvp
->internal
& 0x00ff) <= STATE_VALEND
);
336 if ((newst
= st
[(ckvp
->internal
& 0x00ff)][(c
& 0xff)]) == 0)
337 newst
= st
[(ckvp
->internal
& 0x00ff)][0x100];
338 ckvp
->internal
= ((ckvp
->internal
& 0xffff0000) | newst
);
340 if (newst
& 0x0f00) {
341 if (newst
& ACTION_ERROR
)
342 ckvp
->state
= CKVP_ERROR
;
343 else if (newst
& ACTION_KEY
)
344 ckvp
->state
= CKVP_OUT_KEY
;
345 else if (newst
& ACTION_VALUE
)
346 ckvp
->state
= CKVP_OUT_VALUE
;
349 if (newst
& 0xf000) {
350 if (newst
& ACTION_STORE_HEX1
) {
351 ckvp
->internal
&= ~(0x00f00000);
352 ckvp
->internal
|= (hb
[c
] << 20);
355 else if (newst
& ACTION_STORE_HEX2
)
356 c
= (((ckvp
->internal
>> 16) & 0xf0) | hb
[c
]);
357 else if (newst
& ACTION_STORE_MOD
)
359 if (ckvp
->out_size
== CKVP_OUT_SIZE
) {
364 ckvp
->out
[((ckvp
->out_size
)++)] = c
;
365 if (ckvp
->out_size
== CKVP_OUT_SIZE
) {
366 ckvp
->state
= CKVP_OUT_FULL
;
379 ckvp
->internal
|= 0x00010000;
381 ckvp
->internal
|= 0x00020000;