Patches no longer seem to be needed
[debian-dgen.git] / ckvp.c
blob7263dd734b84068de562101b8fd98443824c6267
1 /**
2 * @file
3 * The following is a full featured parser for configuration files using
4 * basic format "key = value".
6 * Well, it's big, but it can properly manage spaces, empty lines,
7 * single and double-quoted strings, hex numbers, comments, semicolons
8 * and more. It also happens to be much more robust than the original one.
10 * @author zamaz
13 #include <stddef.h>
14 #include <assert.h>
15 #include "ckvp.h"
17 enum {
18 STATE_ERROR = 1,
19 STATE_BEGIN, /**< initial state */
20 STATE_COMMENT, /**< currently in a comment */
21 STATE_KEY, /**< (key) currently in a key */
22 STATE_KEYBS, /**< (key) backslash */
23 STATE_KEYBSX1, /**< (key) first character of a hex value (\\x) */
24 STATE_KEYBSX2, /**< (key) second character of a hex value (\\x) */
25 STATE_KEYSQ, /**< (key) currently in a simple quoted key */
26 STATE_KEYDQ, /**< (key) currently in a double quoted key */
27 STATE_KEYDQBS, /**< (key) backslash while in double quotes */
28 STATE_KEYDQBSX1, /**< (key) first value of \\x in double quotes */
29 STATE_KEYDQBSX2, /**< (key) second value of \\x in double quotes */
30 STATE_BEQ, /**< before '=' between key and value */
31 STATE_AEQ, /**< after '=' between key and value */
32 STATE_VALUE, /**< (value) same as (key) things above, for values */
33 STATE_VALBS, /**< (value) backslash */
34 STATE_VALBSX1, /**< (value) first character of an hex value (\\x) */
35 STATE_VALBSX2, /**< (value) second character of a hex value (\\x) */
36 STATE_VALSQ, /**< (value) currently in a simple quoted value */
37 STATE_VALDQ, /**< (value) currently in a double quoted value */
38 STATE_VALDQBS, /**< (value) backslash while in double quotes */
39 STATE_VALDQBSX1, /**< (value) first value of \\x in double quotes */
40 STATE_VALDQBSX2, /**< (value) second values of \\x in double quotes */
41 STATE_VALEND, /**< end of a value, ready to take a new key */
42 ACTION_KEY = 0x0100, /**< key complete */
43 ACTION_VALUE = 0x0200, /**< value complete */
44 ACTION_ERROR = 0x0400, /**< caught an error */
45 ACTION_STORE = 0x1000, /**< character must be stored as is */
46 ACTION_STORE_MOD = 0x2000, /**< store filtered character */
47 ACTION_STORE_HEX1 = 0x4000, /**< store first hex digit */
48 ACTION_STORE_HEX2 = 0x8000 /**< store second hex digit */
51 #define HEX_INDICES(st) \
52 ['0'] = (st), ['1'] = (st), ['2'] = (st), ['3'] = (st), \
53 ['4'] = (st), ['5'] = (st), ['6'] = (st), ['7'] = (st), \
54 ['8'] = (st), ['9'] = (st), ['a'] = (st), ['b'] = (st), \
55 ['c'] = (st), ['d'] = (st), ['e'] = (st), ['f'] = (st), \
56 ['A'] = (st), ['B'] = (st), ['C'] = (st), ['D'] = (st), \
57 ['E'] = (st), ['F'] = (st)
59 /**
60 * ckvp_parse() takes the current state (ckvp), a buffer in[size] and returns
61 * the number of characters processed.
63 * Each time ckvp_parse() returns, ckvp->state must be checked. If no error
64 * occured, ckvp_parse() must be called again with the remaining characters
65 * if any, otherwise the next input buffer.
67 * At the end of input, ckvp_parse() must be called with a zero size.
69 * This function doesn't allocate anything.
71 * @param[in,out] ckvp Current state.
72 * @param size Number of characters in buffer "in".
73 * @param in Input buffer to parse.
74 * @return Number of characters processed.
76 size_t ckvp_parse(ckvp_t *ckvp, size_t size, const char in[])
78 /**
79 * State machine definition:
81 * st[current_state][current_character] = next state | action
83 * Special indices for current_character are:
85 * - 0x100 for action on characters not in the list
86 * - 0x101 for action when encountering end of input while in the
87 * current state (often ACTION_ERROR)
89 static const unsigned int st[][0x102] = {
90 [STATE_ERROR] = {
91 [0x100] = (STATE_ERROR | ACTION_ERROR),
92 [0x101] = ACTION_ERROR
94 [STATE_BEGIN] = {
95 [' '] = STATE_BEGIN,
96 ['\f'] = STATE_BEGIN,
97 ['\n'] = STATE_BEGIN,
98 ['\r'] = STATE_BEGIN,
99 ['\t'] = STATE_BEGIN,
100 ['\v'] = STATE_BEGIN,
101 [';'] = (STATE_ERROR | ACTION_ERROR),
102 ['#'] = STATE_COMMENT,
103 ['\''] = STATE_KEYSQ,
104 ['"'] = STATE_KEYDQ,
105 ['\\'] = STATE_KEYBS,
106 ['='] = (STATE_ERROR | ACTION_ERROR),
107 [0x100] = (STATE_KEY | ACTION_STORE),
108 [0x101] = 0
110 [STATE_COMMENT] = {
111 ['\n'] = STATE_BEGIN,
112 [0x100] = STATE_COMMENT,
113 [0x101] = 0
115 [STATE_KEY] = {
116 [' '] = (STATE_BEQ | ACTION_KEY),
117 ['\f'] = (STATE_BEQ | ACTION_KEY),
118 ['\n'] = (STATE_BEQ | ACTION_KEY),
119 ['\r'] = (STATE_BEQ | ACTION_KEY),
120 ['\t'] = (STATE_BEQ | ACTION_KEY),
121 ['\v'] = (STATE_BEQ | ACTION_KEY),
122 ['\''] = STATE_KEYSQ,
123 ['\"'] = STATE_KEYDQ,
124 [';'] = (STATE_ERROR | ACTION_ERROR),
125 ['='] = (STATE_AEQ | ACTION_KEY),
126 ['#'] = (STATE_ERROR | ACTION_ERROR),
127 ['\\'] = STATE_KEYBS,
128 [0x100] = (STATE_KEY | ACTION_STORE),
129 [0x101] = ACTION_ERROR
131 [STATE_KEYBS] = {
132 ['f'] = (STATE_KEY | ACTION_STORE_MOD),
133 ['n'] = (STATE_KEY | ACTION_STORE_MOD),
134 ['r'] = (STATE_KEY | ACTION_STORE_MOD),
135 ['t'] = (STATE_KEY | ACTION_STORE_MOD),
136 ['v'] = (STATE_KEY | ACTION_STORE_MOD),
137 ['x'] = STATE_KEYBSX1,
138 ['\n'] = STATE_KEY,
139 [0x100] = (STATE_KEY | ACTION_STORE),
140 [0x101] = ACTION_ERROR
142 [STATE_KEYBSX1] = {
143 HEX_INDICES(STATE_KEYBSX2 | ACTION_STORE_HEX1),
144 [0x100] = (STATE_ERROR | ACTION_ERROR),
145 [0x101] = ACTION_ERROR
147 [STATE_KEYBSX2] = {
148 HEX_INDICES(STATE_KEY | ACTION_STORE_HEX2),
149 [0x100] = (STATE_ERROR | ACTION_ERROR),
150 [0x101] = ACTION_ERROR
152 [STATE_KEYSQ] = {
153 ['\''] = STATE_KEY,
154 [0x100] = (STATE_KEYSQ | ACTION_STORE),
155 [0x101] = ACTION_ERROR
157 [STATE_KEYDQ] = {
158 ['"'] = STATE_KEY,
159 ['\\'] = STATE_KEYDQBS,
160 [0x100] = (STATE_KEYDQ | ACTION_STORE),
161 [0x101] = ACTION_ERROR
163 [STATE_KEYDQBS] = {
164 ['f'] = (STATE_KEYDQ | ACTION_STORE_MOD),
165 ['n'] = (STATE_KEYDQ | ACTION_STORE_MOD),
166 ['r'] = (STATE_KEYDQ | ACTION_STORE_MOD),
167 ['t'] = (STATE_KEYDQ | ACTION_STORE_MOD),
168 ['v'] = (STATE_KEYDQ | ACTION_STORE_MOD),
169 ['x'] = STATE_KEYDQBSX1,
170 ['\n'] = STATE_KEYDQ,
171 [0x100] = (STATE_KEYDQ | ACTION_STORE),
172 [0x101] = ACTION_ERROR
174 [STATE_KEYDQBSX1] = {
175 HEX_INDICES(STATE_KEYDQBSX2 | ACTION_STORE_HEX1),
176 [0x100] = (STATE_ERROR | ACTION_ERROR),
177 [0x101] = ACTION_ERROR
179 [STATE_KEYDQBSX2] = {
180 HEX_INDICES(STATE_KEYDQ | ACTION_STORE_HEX2),
181 [0x100] = (STATE_ERROR | ACTION_ERROR),
182 [0x101] = ACTION_ERROR
184 [STATE_BEQ] = {
185 [' '] = STATE_BEQ,
186 ['\f'] = STATE_BEQ,
187 ['\n'] = STATE_BEQ,
188 ['\r'] = STATE_BEQ,
189 ['\t'] = STATE_BEQ,
190 ['\v'] = STATE_BEQ,
191 ['='] = STATE_AEQ,
192 [0x100] = (STATE_ERROR | ACTION_ERROR),
193 [0x101] = ACTION_ERROR
195 [STATE_AEQ] = {
196 [' '] = STATE_AEQ,
197 ['\f'] = STATE_AEQ,
198 ['\n'] = STATE_AEQ,
199 ['\r'] = STATE_AEQ,
200 ['\t'] = STATE_AEQ,
201 ['\v'] = STATE_AEQ,
202 ['\''] = STATE_VALSQ,
203 ['\"'] = STATE_VALDQ,
204 ['\\'] = STATE_VALBS,
205 ['='] = (STATE_ERROR | ACTION_ERROR),
206 ['#'] = (STATE_COMMENT | ACTION_VALUE),
207 [';'] = (STATE_BEGIN | ACTION_VALUE),
208 [0x100] = (STATE_VALUE | ACTION_STORE),
209 [0x101] = ACTION_VALUE
211 [STATE_VALUE] = {
212 [' '] = (STATE_VALEND | ACTION_VALUE),
213 ['\f'] = (STATE_VALEND | ACTION_VALUE),
214 ['\n'] = (STATE_BEGIN | ACTION_VALUE),
215 ['\r'] = (STATE_VALEND | ACTION_VALUE),
216 ['\t'] = (STATE_VALEND | ACTION_VALUE),
217 ['\v'] = (STATE_VALEND | ACTION_VALUE),
218 ['\''] = STATE_VALSQ,
219 ['\"'] = STATE_VALDQ,
220 [';'] = (STATE_BEGIN | ACTION_VALUE),
221 ['='] = (STATE_ERROR | ACTION_ERROR),
222 ['#'] = (STATE_COMMENT | ACTION_VALUE),
223 ['\\'] = STATE_VALBS,
224 [0x100] = (STATE_VALUE | ACTION_STORE),
225 [0x101] = ACTION_VALUE
227 [STATE_VALBS] = {
228 ['f'] = (STATE_VALUE | ACTION_STORE_MOD),
229 ['n'] = (STATE_VALUE | ACTION_STORE_MOD),
230 ['r'] = (STATE_VALUE | ACTION_STORE_MOD),
231 ['t'] = (STATE_VALUE | ACTION_STORE_MOD),
232 ['v'] = (STATE_VALUE | ACTION_STORE_MOD),
233 ['x'] = STATE_VALBSX1,
234 ['\n'] = STATE_VALUE,
235 [0x100] = (STATE_VALUE | ACTION_STORE),
236 [0x101] = ACTION_ERROR
238 [STATE_VALBSX1] = {
239 HEX_INDICES(STATE_VALBSX2 | ACTION_STORE_HEX1),
240 [0x100] = (STATE_ERROR | ACTION_ERROR),
241 [0x101] = ACTION_ERROR
243 [STATE_VALBSX2] = {
244 HEX_INDICES(STATE_VALUE | ACTION_STORE_HEX2),
245 [0x100] = (STATE_ERROR | ACTION_ERROR),
246 [0x101] = ACTION_ERROR
248 [STATE_VALSQ] = {
249 ['\''] = STATE_VALUE,
250 [0x100] = (STATE_VALSQ | ACTION_STORE),
251 [0x101] = ACTION_ERROR
253 [STATE_VALDQ] = {
254 ['"'] = STATE_VALUE,
255 ['\\'] = STATE_VALDQBS,
256 [0x100] = (STATE_VALDQ | ACTION_STORE),
257 [0x101] = ACTION_ERROR
259 [STATE_VALDQBS] = {
260 ['f'] = (STATE_VALDQ | ACTION_STORE_MOD),
261 ['n'] = (STATE_VALDQ | ACTION_STORE_MOD),
262 ['r'] = (STATE_VALDQ | ACTION_STORE_MOD),
263 ['t'] = (STATE_VALDQ | ACTION_STORE_MOD),
264 ['v'] = (STATE_VALDQ | ACTION_STORE_MOD),
265 ['x'] = STATE_VALDQBSX1,
266 ['\n'] = STATE_VALDQ,
267 [0x100] = (STATE_VALDQ | ACTION_STORE),
268 [0x101] = ACTION_ERROR
270 [STATE_VALDQBSX1] = {
271 HEX_INDICES(STATE_VALDQBSX2 | ACTION_STORE_HEX1),
272 [0x100] = (STATE_ERROR | ACTION_ERROR),
273 [0x101] = ACTION_ERROR
275 [STATE_VALDQBSX2] = {
276 HEX_INDICES(STATE_VALDQ | ACTION_STORE_HEX2),
277 [0x100] = (STATE_ERROR | ACTION_ERROR),
278 [0x101] = ACTION_ERROR
280 [STATE_VALEND] = {
281 [' '] = STATE_VALEND,
282 ['\f'] = STATE_VALEND,
283 ['\n'] = STATE_BEGIN,
284 ['\r'] = STATE_VALEND,
285 ['\t'] = STATE_VALEND,
286 ['\v'] = STATE_VALEND,
287 [';'] = STATE_BEGIN,
288 ['#'] = STATE_COMMENT,
289 [0x100] = (STATE_ERROR | ACTION_ERROR),
290 [0x101] = 0
293 static const unsigned char cv[] = {
294 ['f'] = '\f', ['n'] = '\n', ['r'] = '\r',
295 ['t'] = '\t', ['v'] = '\v'
297 static const unsigned char hb[] = {
298 ['0'] = 0x0, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3,
299 ['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7,
300 ['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb,
301 ['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf,
302 ['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd,
303 ['E'] = 0xe, ['F'] = 0xf
305 size_t i;
307 assert(sizeof(unsigned int) >= 4);
308 assert(ckvp != NULL);
309 assert(in != NULL);
310 if (ckvp->state != CKVP_NONE) {
311 ckvp->out_size = 0;
312 ckvp->state = CKVP_NONE;
314 if (ckvp->internal & 0x00010000) {
315 ++(ckvp->line);
316 ckvp->column = 1;
318 else if (ckvp->internal & 0x00020000)
319 ++(ckvp->column);
320 ckvp->internal &= ~(0x00030000);
321 if (size == 0) {
322 assert((ckvp->internal & 0x00ff) != 0x00);
323 assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
324 if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_ERROR)
325 ckvp->state = CKVP_ERROR;
326 else if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_VALUE)
327 ckvp->state = CKVP_OUT_VALUE;
328 return 0;
330 for (i = 0; (i < size); ++i) {
331 unsigned char c = in[i];
332 unsigned int newst;
334 assert((ckvp->internal & 0x00ff) != 0x00);
335 assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
336 if ((newst = st[(ckvp->internal & 0x00ff)][(c & 0xff)]) == 0)
337 newst = st[(ckvp->internal & 0x00ff)][0x100];
338 ckvp->internal = ((ckvp->internal & 0xffff0000) | newst);
339 assert(newst != 0);
340 if (newst & 0x0f00) {
341 if (newst & ACTION_ERROR)
342 ckvp->state = CKVP_ERROR;
343 else if (newst & ACTION_KEY)
344 ckvp->state = CKVP_OUT_KEY;
345 else if (newst & ACTION_VALUE)
346 ckvp->state = CKVP_OUT_VALUE;
347 goto endnl;
349 if (newst & 0xf000) {
350 if (newst & ACTION_STORE_HEX1) {
351 ckvp->internal &= ~(0x00f00000);
352 ckvp->internal |= (hb[c] << 20);
353 continue;
355 else if (newst & ACTION_STORE_HEX2)
356 c = (((ckvp->internal >> 16) & 0xf0) | hb[c]);
357 else if (newst & ACTION_STORE_MOD)
358 c = cv[c];
359 if (ckvp->out_size == CKVP_OUT_SIZE) {
360 ckvp->out[0] = c;
361 ckvp->out_size = 1;
363 else
364 ckvp->out[((ckvp->out_size)++)] = c;
365 if (ckvp->out_size == CKVP_OUT_SIZE) {
366 ckvp->state = CKVP_OUT_FULL;
367 goto endnl;
370 if (c == '\n') {
371 ++(ckvp->line);
372 ckvp->column = 1;
374 else
375 ++(ckvp->column);
376 continue;
377 endnl:
378 if (c == '\n')
379 ckvp->internal |= 0x00010000;
380 else
381 ckvp->internal |= 0x00020000;
382 return ++i;
384 return size;