1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
10 #include "nsUTF8Utils.h"
14 #define NS_WARNING(_s) printf(">>> " _s "!\n")
15 #define NS_NOTREACHED(_s) NS_WARNING(_s)
17 #include "nsDebug.h" // for NS_WARNING
20 /* pref parser states */
23 PREF_PARSE_MATCH_STRING
,
24 PREF_PARSE_UNTIL_NAME
,
25 PREF_PARSE_QUOTED_STRING
,
26 PREF_PARSE_UNTIL_COMMA
,
27 PREF_PARSE_UNTIL_VALUE
,
29 PREF_PARSE_COMMENT_MAYBE_START
,
30 PREF_PARSE_COMMENT_BLOCK
,
31 PREF_PARSE_COMMENT_BLOCK_MAYBE_END
,
32 PREF_PARSE_ESC_SEQUENCE
,
33 PREF_PARSE_HEX_ESCAPE
,
34 PREF_PARSE_UTF16_LOW_SURROGATE
,
35 PREF_PARSE_UNTIL_OPEN_PAREN
,
36 PREF_PARSE_UNTIL_CLOSE_PAREN
,
37 PREF_PARSE_UNTIL_SEMICOLON
,
41 #define UTF16_ESC_NUM_DIGITS 4
42 #define HEX_ESC_NUM_DIGITS 2
43 #define BITS_PER_HEX_DIGIT 4
45 static const char kUserPref
[] = "user_pref";
46 static const char kPref
[] = "pref";
47 static const char kTrue
[] = "true";
48 static const char kFalse
[] = "false";
53 * this function will increase the size of the buffer owned
54 * by the given pref parse state. We currently use a simple
55 * doubling algorithm, but the only hard requirement is that
56 * it increase the buffer by at least the size of the ps->esctmp
57 * buffer used for escape processing (currently 6 bytes).
59 * this buffer is used to store partial pref lines. it is
60 * freed when the parse state is destroyed.
63 * parse state instance
65 * this function updates all pointers that reference an
66 * address within lb since realloc may relocate the buffer.
68 * @return false if insufficient memory.
71 pref_GrowBuf(PrefParseState
*ps
)
73 int bufLen
, curPos
, valPos
;
75 bufLen
= ps
->lbend
- ps
->lb
;
76 curPos
= ps
->lbcur
- ps
->lb
;
77 valPos
= ps
->vb
- ps
->lb
;
80 bufLen
= 128; /* default buffer size */
82 bufLen
<<= 1; /* double buffer size */
85 fprintf(stderr
, ">>> realloc(%d)\n", bufLen
);
88 ps
->lb
= (char*) realloc(ps
->lb
, bufLen
);
92 ps
->lbcur
= ps
->lb
+ curPos
;
93 ps
->lbend
= ps
->lb
+ bufLen
;
94 ps
->vb
= ps
->lb
+ valPos
;
102 * this function is called when a complete pref name-value pair has
103 * been extracted from the input data.
106 * parse state instance
108 * @return false to indicate a fatal error.
111 pref_DoCallback(PrefParseState
*ps
)
117 value
.stringVal
= ps
->vb
;
120 if ((ps
->vb
[0] == '-' || ps
->vb
[0] == '+') && ps
->vb
[1] == '\0') {
121 NS_WARNING("malformed integer value");
124 value
.intVal
= atoi(ps
->vb
);
127 value
.boolVal
= (ps
->vb
== kTrue
);
132 (*ps
->reader
)(ps
->closure
, ps
->lb
, value
, ps
->vtype
, ps
->fdefault
);
137 PREF_InitParseState(PrefParseState
*ps
, PrefReader reader
, void *closure
)
139 memset(ps
, 0, sizeof(*ps
));
141 ps
->closure
= closure
;
145 PREF_FinalizeParseState(PrefParseState
*ps
)
154 * function = LJUNK function-name JUNK function-args
155 * function-name = "user_pref" | "pref"
156 * function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";"
157 * pref-name = quoted-string
158 * pref-value = quoted-string | "true" | "false" | integer-value
159 * JUNK = *(WS | comment-block | comment-line)
160 * LJUNK = *(WS | comment-block | comment-line | bcomment-line)
161 * WS = SP | HT | LF | VT | FF | CR
162 * SP = <US-ASCII SP, space (32)>
163 * HT = <US-ASCII HT, horizontal-tab (9)>
164 * LF = <US-ASCII LF, linefeed (10)>
165 * VT = <US-ASCII HT, vertical-tab (11)>
166 * FF = <US-ASCII FF, form-feed (12)>
167 * CR = <US-ASCII CR, carriage return (13)>
168 * comment-block = <C/C++ style comment block>
169 * comment-line = <C++ style comment line>
170 * bcomment-line = <bourne-shell style comment line>
173 PREF_ParseBuf(PrefParseState
*ps
, const char *buf
, int bufLen
)
181 for (end
= buf
+ bufLen
; buf
!= end
; ++buf
) {
185 case PREF_PARSE_INIT
:
186 if (ps
->lbcur
!= ps
->lb
) { /* reset state */
189 ps
->vtype
= PREF_INVALID
;
190 ps
->fdefault
= false;
193 case '/': /* begin comment block or line? */
194 state
= PREF_PARSE_COMMENT_MAYBE_START
;
196 case '#': /* accept shell style comments */
197 state
= PREF_PARSE_UNTIL_EOL
;
199 case 'u': /* indicating user_pref */
200 case 'p': /* indicating pref */
201 ps
->smatch
= (c
== 'u' ? kUserPref
: kPref
);
203 ps
->nextstate
= PREF_PARSE_UNTIL_OPEN_PAREN
;
204 state
= PREF_PARSE_MATCH_STRING
;
210 /* string matching */
211 case PREF_PARSE_MATCH_STRING
:
212 if (c
== ps
->smatch
[ps
->sindex
++]) {
213 /* if we've matched all characters, then move to next state. */
214 if (ps
->smatch
[ps
->sindex
] == '\0') {
215 state
= ps
->nextstate
;
216 ps
->nextstate
= PREF_PARSE_INIT
; /* reset next state */
218 /* else wait for next char */
221 NS_WARNING("malformed pref file");
226 /* quoted string parsing */
227 case PREF_PARSE_QUOTED_STRING
:
228 /* we assume that the initial quote has already been consumed */
229 if (ps
->lbcur
== ps
->lbend
&& !pref_GrowBuf(ps
))
230 return false; /* out of memory */
232 state
= PREF_PARSE_ESC_SEQUENCE
;
233 else if (c
== ps
->quotechar
) {
235 state
= ps
->nextstate
;
236 ps
->nextstate
= PREF_PARSE_INIT
; /* reset next state */
243 case PREF_PARSE_UNTIL_NAME
:
244 if (c
== '\"' || c
== '\'') {
245 ps
->fdefault
= (ps
->smatch
== kPref
);
247 ps
->nextstate
= PREF_PARSE_UNTIL_COMMA
; /* return here when done */
248 state
= PREF_PARSE_QUOTED_STRING
;
250 else if (c
== '/') { /* allow embedded comment */
251 ps
->nextstate
= state
; /* return here when done with comment */
252 state
= PREF_PARSE_COMMENT_MAYBE_START
;
254 else if (!isspace(c
)) {
255 NS_WARNING("malformed pref file");
260 /* parse until we find a comma separating name and value */
261 case PREF_PARSE_UNTIL_COMMA
:
264 state
= PREF_PARSE_UNTIL_VALUE
;
266 else if (c
== '/') { /* allow embedded comment */
267 ps
->nextstate
= state
; /* return here when done with comment */
268 state
= PREF_PARSE_COMMENT_MAYBE_START
;
270 else if (!isspace(c
)) {
271 NS_WARNING("malformed pref file");
277 case PREF_PARSE_UNTIL_VALUE
:
278 /* the pref value type is unknown. so, we scan for the first
279 * character of the value, and determine the type from that. */
280 if (c
== '\"' || c
== '\'') {
281 ps
->vtype
= PREF_STRING
;
283 ps
->nextstate
= PREF_PARSE_UNTIL_CLOSE_PAREN
;
284 state
= PREF_PARSE_QUOTED_STRING
;
286 else if (c
== 't' || c
== 'f') {
287 ps
->vb
= (char *) (c
== 't' ? kTrue
: kFalse
);
288 ps
->vtype
= PREF_BOOL
;
291 ps
->nextstate
= PREF_PARSE_UNTIL_CLOSE_PAREN
;
292 state
= PREF_PARSE_MATCH_STRING
;
294 else if (isdigit(c
) || (c
== '-') || (c
== '+')) {
295 ps
->vtype
= PREF_INT
;
296 /* write c to line buffer... */
297 if (ps
->lbcur
== ps
->lbend
&& !pref_GrowBuf(ps
))
298 return false; /* out of memory */
300 state
= PREF_PARSE_INT_VALUE
;
302 else if (c
== '/') { /* allow embedded comment */
303 ps
->nextstate
= state
; /* return here when done with comment */
304 state
= PREF_PARSE_COMMENT_MAYBE_START
;
306 else if (!isspace(c
)) {
307 NS_WARNING("malformed pref file");
311 case PREF_PARSE_INT_VALUE
:
312 /* grow line buffer if necessary... */
313 if (ps
->lbcur
== ps
->lbend
&& !pref_GrowBuf(ps
))
314 return false; /* out of memory */
318 *ps
->lbcur
++ = '\0'; /* stomp null terminator; we are done. */
320 state
= PREF_PARSE_UNTIL_SEMICOLON
;
321 else if (c
== '/') { /* allow embedded comment */
322 ps
->nextstate
= PREF_PARSE_UNTIL_CLOSE_PAREN
;
323 state
= PREF_PARSE_COMMENT_MAYBE_START
;
326 state
= PREF_PARSE_UNTIL_CLOSE_PAREN
;
328 NS_WARNING("malformed pref file");
334 /* comment parsing */
335 case PREF_PARSE_COMMENT_MAYBE_START
:
337 case '*': /* comment block */
338 state
= PREF_PARSE_COMMENT_BLOCK
;
340 case '/': /* comment line */
341 state
= PREF_PARSE_UNTIL_EOL
;
344 /* pref file is malformed */
345 NS_WARNING("malformed pref file");
349 case PREF_PARSE_COMMENT_BLOCK
:
351 state
= PREF_PARSE_COMMENT_BLOCK_MAYBE_END
;
353 case PREF_PARSE_COMMENT_BLOCK_MAYBE_END
:
356 state
= ps
->nextstate
;
357 ps
->nextstate
= PREF_PARSE_INIT
;
359 case '*': /* stay in this state */
362 state
= PREF_PARSE_COMMENT_BLOCK
;
366 /* string escape sequence parsing */
367 case PREF_PARSE_ESC_SEQUENCE
:
368 /* not necessary to resize buffer here since we should be writing
369 * only one character and the resize check would have been done
370 * for us in the previous state */
382 case 'x': /* hex escape -- always interpreted as Latin-1 */
383 case 'u': /* UTF16 escape */
386 ps
->utf16
[0] = ps
->utf16
[1] = 0;
387 ps
->sindex
= (c
== 'x' ) ?
389 UTF16_ESC_NUM_DIGITS
;
390 state
= PREF_PARSE_HEX_ESCAPE
;
393 NS_WARNING("preserving unexpected JS escape sequence");
394 /* Invalid escape sequence so we do have to write more than
395 * one character. Grow line buffer if necessary... */
396 if ((ps
->lbcur
+1) == ps
->lbend
&& !pref_GrowBuf(ps
))
397 return false; /* out of memory */
398 *ps
->lbcur
++ = '\\'; /* preserve the escape sequence */
402 state
= PREF_PARSE_QUOTED_STRING
;
405 /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */
406 case PREF_PARSE_HEX_ESCAPE
:
407 if ( c
>= '0' && c
<= '9' )
409 else if ( c
>= 'A' && c
<= 'F' )
410 udigit
= (c
- 'A') + 10;
411 else if ( c
>= 'a' && c
<= 'f' )
412 udigit
= (c
- 'a') + 10;
414 /* bad escape sequence found, write out broken escape as-is */
415 NS_WARNING("preserving invalid or incomplete hex escape");
416 *ps
->lbcur
++ = '\\'; /* original escape slash */
417 if ((ps
->lbcur
+ ps
->esclen
) >= ps
->lbend
&& !pref_GrowBuf(ps
))
419 for (int i
= 0; i
< ps
->esclen
; ++i
)
420 *ps
->lbcur
++ = ps
->esctmp
[i
];
422 /* push the non-hex character back for re-parsing. */
423 /* (++buf at the top of the loop keeps this safe) */
425 state
= PREF_PARSE_QUOTED_STRING
;
430 ps
->esctmp
[ps
->esclen
++] = c
; /* preserve it */
431 ps
->utf16
[1] <<= BITS_PER_HEX_DIGIT
;
432 ps
->utf16
[1] |= udigit
;
434 if (ps
->sindex
== 0) {
435 /* have the full escape. Convert to UTF8 */
438 /* already have a high surrogate, this is a two char seq */
441 else if (0xD800 == (0xFC00 & ps
->utf16
[1])) {
442 /* a high surrogate, can't convert until we have the low */
443 ps
->utf16
[0] = ps
->utf16
[1];
445 state
= PREF_PARSE_UTF16_LOW_SURROGATE
;
449 /* a single utf16 character */
450 ps
->utf16
[0] = ps
->utf16
[1];
454 /* actual conversion */
455 /* make sure there's room, 6 bytes is max utf8 len (in */
456 /* theory; 4 bytes covers the actual utf16 range) */
457 if (ps
->lbcur
+6 >= ps
->lbend
&& !pref_GrowBuf(ps
))
460 ConvertUTF16toUTF8
converter(ps
->lbcur
);
461 converter
.write(ps
->utf16
, utf16len
);
462 ps
->lbcur
+= converter
.Size();
463 state
= PREF_PARSE_QUOTED_STRING
;
467 /* looking for beginning of utf16 low surrogate */
468 case PREF_PARSE_UTF16_LOW_SURROGATE
:
469 if (ps
->sindex
== 0 && c
== '\\') {
472 else if (ps
->sindex
== 1 && c
== 'u') {
473 /* escape sequence is correct, now parse hex */
474 ps
->sindex
= UTF16_ESC_NUM_DIGITS
;
477 state
= PREF_PARSE_HEX_ESCAPE
;
480 /* didn't find expected low surrogate. Ignore high surrogate
481 * (it would just get converted to nothing anyway) and start
482 * over with this character */
485 state
= PREF_PARSE_ESC_SEQUENCE
;
487 state
= PREF_PARSE_QUOTED_STRING
;
492 /* function open and close parsing */
493 case PREF_PARSE_UNTIL_OPEN_PAREN
:
494 /* tolerate only whitespace and embedded comments */
496 state
= PREF_PARSE_UNTIL_NAME
;
498 ps
->nextstate
= state
; /* return here when done with comment */
499 state
= PREF_PARSE_COMMENT_MAYBE_START
;
501 else if (!isspace(c
)) {
502 NS_WARNING("malformed pref file");
506 case PREF_PARSE_UNTIL_CLOSE_PAREN
:
507 /* tolerate only whitespace and embedded comments */
509 state
= PREF_PARSE_UNTIL_SEMICOLON
;
511 ps
->nextstate
= state
; /* return here when done with comment */
512 state
= PREF_PARSE_COMMENT_MAYBE_START
;
514 else if (!isspace(c
)) {
515 NS_WARNING("malformed pref file");
520 /* function terminator ';' parsing */
521 case PREF_PARSE_UNTIL_SEMICOLON
:
522 /* tolerate only whitespace and embedded comments */
524 if (!pref_DoCallback(ps
))
526 state
= PREF_PARSE_INIT
;
529 ps
->nextstate
= state
; /* return here when done with comment */
530 state
= PREF_PARSE_COMMENT_MAYBE_START
;
532 else if (!isspace(c
)) {
533 NS_WARNING("malformed pref file");
539 case PREF_PARSE_UNTIL_EOL
:
540 /* need to handle mac, unix, or dos line endings.
541 * PREF_PARSE_INIT will eat the next \n in case
543 if (c
== '\r' || c
== '\n' || c
== 0x1A) {
544 state
= ps
->nextstate
;
545 ps
->nextstate
= PREF_PARSE_INIT
; /* reset next state */
557 pref_reader(void *closure
,
563 printf("%spref(\"%s\", ", defPref
? "" : "user_", pref
);
566 printf("\"%s\");\n", val
.stringVal
);
569 printf("%i);\n", val
.intVal
);
572 printf("%s);\n", val
.boolVal
== false ? "false" : "true");
578 main(int argc
, char **argv
)
581 char buf
[4096]; /* i/o buffer */
586 printf("usage: prefread file.js\n");
590 fp
= fopen(argv
[1], "r");
592 printf("failed to open file\n");
596 PREF_InitParseState(&ps
, pref_reader
, nullptr);
598 while ((n
= fread(buf
, 1, sizeof(buf
), fp
)) > 0)
599 PREF_ParseBuf(&ps
, buf
, n
);
601 PREF_FinalizeParseState(&ps
);
607 #endif /* TEST_PREFREAD */