Bumping manifests a=b2g-bump
[gecko.git] / modules / libpref / prefread.cpp
blob01612ba9402f4b84b0bdbacbd7046a8ebb8a50a6
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include <stdlib.h>
6 #include <string.h>
7 #include <ctype.h>
8 #include "prefread.h"
9 #include "nsString.h"
10 #include "nsUTF8Utils.h"
12 #ifdef TEST_PREFREAD
13 #include <stdio.h>
14 #define NS_WARNING(_s) printf(">>> " _s "!\n")
15 #define NS_NOTREACHED(_s) NS_WARNING(_s)
16 #else
17 #include "nsDebug.h" // for NS_WARNING
18 #endif
20 /* pref parser states */
21 enum {
22 PREF_PARSE_INIT,
23 PREF_PARSE_MATCH_STRING,
24 PREF_PARSE_UNTIL_NAME,
25 PREF_PARSE_QUOTED_STRING,
26 PREF_PARSE_UNTIL_COMMA,
27 PREF_PARSE_UNTIL_VALUE,
28 PREF_PARSE_INT_VALUE,
29 PREF_PARSE_COMMENT_MAYBE_START,
30 PREF_PARSE_COMMENT_BLOCK,
31 PREF_PARSE_COMMENT_BLOCK_MAYBE_END,
32 PREF_PARSE_ESC_SEQUENCE,
33 PREF_PARSE_HEX_ESCAPE,
34 PREF_PARSE_UTF16_LOW_SURROGATE,
35 PREF_PARSE_UNTIL_OPEN_PAREN,
36 PREF_PARSE_UNTIL_CLOSE_PAREN,
37 PREF_PARSE_UNTIL_SEMICOLON,
38 PREF_PARSE_UNTIL_EOL
41 #define UTF16_ESC_NUM_DIGITS 4
42 #define HEX_ESC_NUM_DIGITS 2
43 #define BITS_PER_HEX_DIGIT 4
45 static const char kUserPref[] = "user_pref";
46 static const char kPref[] = "pref";
47 static const char kTrue[] = "true";
48 static const char kFalse[] = "false";
50 /**
51 * pref_GrowBuf
53 * this function will increase the size of the buffer owned
54 * by the given pref parse state. We currently use a simple
55 * doubling algorithm, but the only hard requirement is that
56 * it increase the buffer by at least the size of the ps->esctmp
57 * buffer used for escape processing (currently 6 bytes).
59 * this buffer is used to store partial pref lines. it is
60 * freed when the parse state is destroyed.
62 * @param ps
63 * parse state instance
65 * this function updates all pointers that reference an
66 * address within lb since realloc may relocate the buffer.
68 * @return false if insufficient memory.
70 static bool
71 pref_GrowBuf(PrefParseState *ps)
73 int bufLen, curPos, valPos;
75 bufLen = ps->lbend - ps->lb;
76 curPos = ps->lbcur - ps->lb;
77 valPos = ps->vb - ps->lb;
79 if (bufLen == 0)
80 bufLen = 128; /* default buffer size */
81 else
82 bufLen <<= 1; /* double buffer size */
84 #ifdef TEST_PREFREAD
85 fprintf(stderr, ">>> realloc(%d)\n", bufLen);
86 #endif
88 ps->lb = (char*) realloc(ps->lb, bufLen);
89 if (!ps->lb)
90 return false;
92 ps->lbcur = ps->lb + curPos;
93 ps->lbend = ps->lb + bufLen;
94 ps->vb = ps->lb + valPos;
96 return true;
99 /**
100 * pref_DoCallback
102 * this function is called when a complete pref name-value pair has
103 * been extracted from the input data.
105 * @param ps
106 * parse state instance
108 * @return false to indicate a fatal error.
110 static bool
111 pref_DoCallback(PrefParseState *ps)
113 PrefValue value;
115 switch (ps->vtype) {
116 case PREF_STRING:
117 value.stringVal = ps->vb;
118 break;
119 case PREF_INT:
120 if ((ps->vb[0] == '-' || ps->vb[0] == '+') && ps->vb[1] == '\0') {
121 NS_WARNING("malformed integer value");
122 return false;
124 value.intVal = atoi(ps->vb);
125 break;
126 case PREF_BOOL:
127 value.boolVal = (ps->vb == kTrue);
128 break;
129 default:
130 break;
132 (*ps->reader)(ps->closure, ps->lb, value, ps->vtype, ps->fdefault);
133 return true;
136 void
137 PREF_InitParseState(PrefParseState *ps, PrefReader reader, void *closure)
139 memset(ps, 0, sizeof(*ps));
140 ps->reader = reader;
141 ps->closure = closure;
144 void
145 PREF_FinalizeParseState(PrefParseState *ps)
147 if (ps->lb)
148 free(ps->lb);
152 * Pseudo-BNF
153 * ----------
154 * function = LJUNK function-name JUNK function-args
155 * function-name = "user_pref" | "pref"
156 * function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";"
157 * pref-name = quoted-string
158 * pref-value = quoted-string | "true" | "false" | integer-value
159 * JUNK = *(WS | comment-block | comment-line)
160 * LJUNK = *(WS | comment-block | comment-line | bcomment-line)
161 * WS = SP | HT | LF | VT | FF | CR
162 * SP = <US-ASCII SP, space (32)>
163 * HT = <US-ASCII HT, horizontal-tab (9)>
164 * LF = <US-ASCII LF, linefeed (10)>
165 * VT = <US-ASCII HT, vertical-tab (11)>
166 * FF = <US-ASCII FF, form-feed (12)>
167 * CR = <US-ASCII CR, carriage return (13)>
168 * comment-block = <C/C++ style comment block>
169 * comment-line = <C++ style comment line>
170 * bcomment-line = <bourne-shell style comment line>
172 bool
173 PREF_ParseBuf(PrefParseState *ps, const char *buf, int bufLen)
175 const char *end;
176 char c;
177 char udigit;
178 int state;
180 state = ps->state;
181 for (end = buf + bufLen; buf != end; ++buf) {
182 c = *buf;
183 switch (state) {
184 /* initial state */
185 case PREF_PARSE_INIT:
186 if (ps->lbcur != ps->lb) { /* reset state */
187 ps->lbcur = ps->lb;
188 ps->vb = nullptr;
189 ps->vtype = PREF_INVALID;
190 ps->fdefault = false;
192 switch (c) {
193 case '/': /* begin comment block or line? */
194 state = PREF_PARSE_COMMENT_MAYBE_START;
195 break;
196 case '#': /* accept shell style comments */
197 state = PREF_PARSE_UNTIL_EOL;
198 break;
199 case 'u': /* indicating user_pref */
200 case 'p': /* indicating pref */
201 ps->smatch = (c == 'u' ? kUserPref : kPref);
202 ps->sindex = 1;
203 ps->nextstate = PREF_PARSE_UNTIL_OPEN_PAREN;
204 state = PREF_PARSE_MATCH_STRING;
205 break;
206 /* else skip char */
208 break;
210 /* string matching */
211 case PREF_PARSE_MATCH_STRING:
212 if (c == ps->smatch[ps->sindex++]) {
213 /* if we've matched all characters, then move to next state. */
214 if (ps->smatch[ps->sindex] == '\0') {
215 state = ps->nextstate;
216 ps->nextstate = PREF_PARSE_INIT; /* reset next state */
218 /* else wait for next char */
220 else {
221 NS_WARNING("malformed pref file");
222 return false;
224 break;
226 /* quoted string parsing */
227 case PREF_PARSE_QUOTED_STRING:
228 /* we assume that the initial quote has already been consumed */
229 if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
230 return false; /* out of memory */
231 if (c == '\\')
232 state = PREF_PARSE_ESC_SEQUENCE;
233 else if (c == ps->quotechar) {
234 *ps->lbcur++ = '\0';
235 state = ps->nextstate;
236 ps->nextstate = PREF_PARSE_INIT; /* reset next state */
238 else
239 *ps->lbcur++ = c;
240 break;
242 /* name parsing */
243 case PREF_PARSE_UNTIL_NAME:
244 if (c == '\"' || c == '\'') {
245 ps->fdefault = (ps->smatch == kPref);
246 ps->quotechar = c;
247 ps->nextstate = PREF_PARSE_UNTIL_COMMA; /* return here when done */
248 state = PREF_PARSE_QUOTED_STRING;
250 else if (c == '/') { /* allow embedded comment */
251 ps->nextstate = state; /* return here when done with comment */
252 state = PREF_PARSE_COMMENT_MAYBE_START;
254 else if (!isspace(c)) {
255 NS_WARNING("malformed pref file");
256 return false;
258 break;
260 /* parse until we find a comma separating name and value */
261 case PREF_PARSE_UNTIL_COMMA:
262 if (c == ',') {
263 ps->vb = ps->lbcur;
264 state = PREF_PARSE_UNTIL_VALUE;
266 else if (c == '/') { /* allow embedded comment */
267 ps->nextstate = state; /* return here when done with comment */
268 state = PREF_PARSE_COMMENT_MAYBE_START;
270 else if (!isspace(c)) {
271 NS_WARNING("malformed pref file");
272 return false;
274 break;
276 /* value parsing */
277 case PREF_PARSE_UNTIL_VALUE:
278 /* the pref value type is unknown. so, we scan for the first
279 * character of the value, and determine the type from that. */
280 if (c == '\"' || c == '\'') {
281 ps->vtype = PREF_STRING;
282 ps->quotechar = c;
283 ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
284 state = PREF_PARSE_QUOTED_STRING;
286 else if (c == 't' || c == 'f') {
287 ps->vb = (char *) (c == 't' ? kTrue : kFalse);
288 ps->vtype = PREF_BOOL;
289 ps->smatch = ps->vb;
290 ps->sindex = 1;
291 ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
292 state = PREF_PARSE_MATCH_STRING;
294 else if (isdigit(c) || (c == '-') || (c == '+')) {
295 ps->vtype = PREF_INT;
296 /* write c to line buffer... */
297 if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
298 return false; /* out of memory */
299 *ps->lbcur++ = c;
300 state = PREF_PARSE_INT_VALUE;
302 else if (c == '/') { /* allow embedded comment */
303 ps->nextstate = state; /* return here when done with comment */
304 state = PREF_PARSE_COMMENT_MAYBE_START;
306 else if (!isspace(c)) {
307 NS_WARNING("malformed pref file");
308 return false;
310 break;
311 case PREF_PARSE_INT_VALUE:
312 /* grow line buffer if necessary... */
313 if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
314 return false; /* out of memory */
315 if (isdigit(c))
316 *ps->lbcur++ = c;
317 else {
318 *ps->lbcur++ = '\0'; /* stomp null terminator; we are done. */
319 if (c == ')')
320 state = PREF_PARSE_UNTIL_SEMICOLON;
321 else if (c == '/') { /* allow embedded comment */
322 ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
323 state = PREF_PARSE_COMMENT_MAYBE_START;
325 else if (isspace(c))
326 state = PREF_PARSE_UNTIL_CLOSE_PAREN;
327 else {
328 NS_WARNING("malformed pref file");
329 return false;
332 break;
334 /* comment parsing */
335 case PREF_PARSE_COMMENT_MAYBE_START:
336 switch (c) {
337 case '*': /* comment block */
338 state = PREF_PARSE_COMMENT_BLOCK;
339 break;
340 case '/': /* comment line */
341 state = PREF_PARSE_UNTIL_EOL;
342 break;
343 default:
344 /* pref file is malformed */
345 NS_WARNING("malformed pref file");
346 return false;
348 break;
349 case PREF_PARSE_COMMENT_BLOCK:
350 if (c == '*')
351 state = PREF_PARSE_COMMENT_BLOCK_MAYBE_END;
352 break;
353 case PREF_PARSE_COMMENT_BLOCK_MAYBE_END:
354 switch (c) {
355 case '/':
356 state = ps->nextstate;
357 ps->nextstate = PREF_PARSE_INIT;
358 break;
359 case '*': /* stay in this state */
360 break;
361 default:
362 state = PREF_PARSE_COMMENT_BLOCK;
364 break;
366 /* string escape sequence parsing */
367 case PREF_PARSE_ESC_SEQUENCE:
368 /* not necessary to resize buffer here since we should be writing
369 * only one character and the resize check would have been done
370 * for us in the previous state */
371 switch (c) {
372 case '\"':
373 case '\'':
374 case '\\':
375 break;
376 case 'r':
377 c = '\r';
378 break;
379 case 'n':
380 c = '\n';
381 break;
382 case 'x': /* hex escape -- always interpreted as Latin-1 */
383 case 'u': /* UTF16 escape */
384 ps->esctmp[0] = c;
385 ps->esclen = 1;
386 ps->utf16[0] = ps->utf16[1] = 0;
387 ps->sindex = (c == 'x' ) ?
388 HEX_ESC_NUM_DIGITS :
389 UTF16_ESC_NUM_DIGITS;
390 state = PREF_PARSE_HEX_ESCAPE;
391 continue;
392 default:
393 NS_WARNING("preserving unexpected JS escape sequence");
394 /* Invalid escape sequence so we do have to write more than
395 * one character. Grow line buffer if necessary... */
396 if ((ps->lbcur+1) == ps->lbend && !pref_GrowBuf(ps))
397 return false; /* out of memory */
398 *ps->lbcur++ = '\\'; /* preserve the escape sequence */
399 break;
401 *ps->lbcur++ = c;
402 state = PREF_PARSE_QUOTED_STRING;
403 break;
405 /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */
406 case PREF_PARSE_HEX_ESCAPE:
407 if ( c >= '0' && c <= '9' )
408 udigit = (c - '0');
409 else if ( c >= 'A' && c <= 'F' )
410 udigit = (c - 'A') + 10;
411 else if ( c >= 'a' && c <= 'f' )
412 udigit = (c - 'a') + 10;
413 else {
414 /* bad escape sequence found, write out broken escape as-is */
415 NS_WARNING("preserving invalid or incomplete hex escape");
416 *ps->lbcur++ = '\\'; /* original escape slash */
417 if ((ps->lbcur + ps->esclen) >= ps->lbend && !pref_GrowBuf(ps))
418 return false;
419 for (int i = 0; i < ps->esclen; ++i)
420 *ps->lbcur++ = ps->esctmp[i];
422 /* push the non-hex character back for re-parsing. */
423 /* (++buf at the top of the loop keeps this safe) */
424 --buf;
425 state = PREF_PARSE_QUOTED_STRING;
426 continue;
429 /* have a digit */
430 ps->esctmp[ps->esclen++] = c; /* preserve it */
431 ps->utf16[1] <<= BITS_PER_HEX_DIGIT;
432 ps->utf16[1] |= udigit;
433 ps->sindex--;
434 if (ps->sindex == 0) {
435 /* have the full escape. Convert to UTF8 */
436 int utf16len = 0;
437 if (ps->utf16[0]) {
438 /* already have a high surrogate, this is a two char seq */
439 utf16len = 2;
441 else if (0xD800 == (0xFC00 & ps->utf16[1])) {
442 /* a high surrogate, can't convert until we have the low */
443 ps->utf16[0] = ps->utf16[1];
444 ps->utf16[1] = 0;
445 state = PREF_PARSE_UTF16_LOW_SURROGATE;
446 break;
448 else {
449 /* a single utf16 character */
450 ps->utf16[0] = ps->utf16[1];
451 utf16len = 1;
454 /* actual conversion */
455 /* make sure there's room, 6 bytes is max utf8 len (in */
456 /* theory; 4 bytes covers the actual utf16 range) */
457 if (ps->lbcur+6 >= ps->lbend && !pref_GrowBuf(ps))
458 return false;
460 ConvertUTF16toUTF8 converter(ps->lbcur);
461 converter.write(ps->utf16, utf16len);
462 ps->lbcur += converter.Size();
463 state = PREF_PARSE_QUOTED_STRING;
465 break;
467 /* looking for beginning of utf16 low surrogate */
468 case PREF_PARSE_UTF16_LOW_SURROGATE:
469 if (ps->sindex == 0 && c == '\\') {
470 ++ps->sindex;
472 else if (ps->sindex == 1 && c == 'u') {
473 /* escape sequence is correct, now parse hex */
474 ps->sindex = UTF16_ESC_NUM_DIGITS;
475 ps->esctmp[0] = 'u';
476 ps->esclen = 1;
477 state = PREF_PARSE_HEX_ESCAPE;
479 else {
480 /* didn't find expected low surrogate. Ignore high surrogate
481 * (it would just get converted to nothing anyway) and start
482 * over with this character */
483 --buf;
484 if (ps->sindex == 1)
485 state = PREF_PARSE_ESC_SEQUENCE;
486 else
487 state = PREF_PARSE_QUOTED_STRING;
488 continue;
490 break;
492 /* function open and close parsing */
493 case PREF_PARSE_UNTIL_OPEN_PAREN:
494 /* tolerate only whitespace and embedded comments */
495 if (c == '(')
496 state = PREF_PARSE_UNTIL_NAME;
497 else if (c == '/') {
498 ps->nextstate = state; /* return here when done with comment */
499 state = PREF_PARSE_COMMENT_MAYBE_START;
501 else if (!isspace(c)) {
502 NS_WARNING("malformed pref file");
503 return false;
505 break;
506 case PREF_PARSE_UNTIL_CLOSE_PAREN:
507 /* tolerate only whitespace and embedded comments */
508 if (c == ')')
509 state = PREF_PARSE_UNTIL_SEMICOLON;
510 else if (c == '/') {
511 ps->nextstate = state; /* return here when done with comment */
512 state = PREF_PARSE_COMMENT_MAYBE_START;
514 else if (!isspace(c)) {
515 NS_WARNING("malformed pref file");
516 return false;
518 break;
520 /* function terminator ';' parsing */
521 case PREF_PARSE_UNTIL_SEMICOLON:
522 /* tolerate only whitespace and embedded comments */
523 if (c == ';') {
524 if (!pref_DoCallback(ps))
525 return false;
526 state = PREF_PARSE_INIT;
528 else if (c == '/') {
529 ps->nextstate = state; /* return here when done with comment */
530 state = PREF_PARSE_COMMENT_MAYBE_START;
532 else if (!isspace(c)) {
533 NS_WARNING("malformed pref file");
534 return false;
536 break;
538 /* eol parsing */
539 case PREF_PARSE_UNTIL_EOL:
540 /* need to handle mac, unix, or dos line endings.
541 * PREF_PARSE_INIT will eat the next \n in case
542 * we have \r\n. */
543 if (c == '\r' || c == '\n' || c == 0x1A) {
544 state = ps->nextstate;
545 ps->nextstate = PREF_PARSE_INIT; /* reset next state */
547 break;
550 ps->state = state;
551 return true;
554 #ifdef TEST_PREFREAD
556 static void
557 pref_reader(void *closure,
558 const char *pref,
559 PrefValue val,
560 PrefType type,
561 bool defPref)
563 printf("%spref(\"%s\", ", defPref ? "" : "user_", pref);
564 switch (type) {
565 case PREF_STRING:
566 printf("\"%s\");\n", val.stringVal);
567 break;
568 case PREF_INT:
569 printf("%i);\n", val.intVal);
570 break;
571 case PREF_BOOL:
572 printf("%s);\n", val.boolVal == false ? "false" : "true");
573 break;
578 main(int argc, char **argv)
580 PrefParseState ps;
581 char buf[4096]; /* i/o buffer */
582 FILE *fp;
583 int n;
585 if (argc == 1) {
586 printf("usage: prefread file.js\n");
587 return -1;
590 fp = fopen(argv[1], "r");
591 if (!fp) {
592 printf("failed to open file\n");
593 return -1;
596 PREF_InitParseState(&ps, pref_reader, nullptr);
598 while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
599 PREF_ParseBuf(&ps, buf, n);
601 PREF_FinalizeParseState(&ps);
603 fclose(fp);
604 return 0;
607 #endif /* TEST_PREFREAD */