2 * Copyright (C) 2004, 2006 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.c,v 1.66.2.6.2.10 2006/01/04 23:50:21 marka Exp $ */
26 #include <isc/buffer.h>
31 #include <isc/parseint.h>
32 #include <isc/print.h>
33 #include <isc/stdio.h>
34 #include <isc/string.h>
37 typedef struct inputsource
{
39 isc_boolean_t is_file
;
40 isc_boolean_t need_close
;
42 isc_buffer_t
* pushback
;
47 unsigned long saved_line
;
48 ISC_LINK(struct inputsource
) link
;
51 #define LEX_MAGIC ISC_MAGIC('L', 'e', 'x', '!')
52 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
60 unsigned int comments
;
61 isc_boolean_t comment_ok
;
62 isc_boolean_t last_was_eol
;
63 unsigned int paren_count
;
64 unsigned int saved_paren_count
;
65 isc_lexspecials_t specials
;
66 LIST(struct inputsource
) sources
;
69 static inline isc_result_t
70 grow_data(isc_lex_t
*lex
, size_t *remainingp
, char **currp
, char **prevp
) {
73 new = isc_mem_get(lex
->mctx
, lex
->max_token
* 2 + 1);
75 return (ISC_R_NOMEMORY
);
76 memcpy(new, lex
->data
, lex
->max_token
+ 1);
77 *currp
= new + (*currp
- lex
->data
);
79 *prevp
= new + (*prevp
- lex
->data
);
80 isc_mem_put(lex
->mctx
, lex
->data
, lex
->max_token
+ 1);
82 *remainingp
+= lex
->max_token
;
84 return (ISC_R_SUCCESS
);
88 isc_lex_create(isc_mem_t
*mctx
, size_t max_token
, isc_lex_t
**lexp
) {
95 REQUIRE(lexp
!= NULL
&& *lexp
== NULL
);
96 REQUIRE(max_token
> 0U);
98 lex
= isc_mem_get(mctx
, sizeof(*lex
));
100 return (ISC_R_NOMEMORY
);
101 lex
->data
= isc_mem_get(mctx
, max_token
+ 1);
102 if (lex
->data
== NULL
) {
103 isc_mem_put(mctx
, lex
, sizeof(*lex
));
104 return (ISC_R_NOMEMORY
);
107 lex
->max_token
= max_token
;
109 lex
->comment_ok
= ISC_TRUE
;
110 lex
->last_was_eol
= ISC_TRUE
;
111 lex
->paren_count
= 0;
112 lex
->saved_paren_count
= 0;
113 memset(lex
->specials
, 0, 256);
114 INIT_LIST(lex
->sources
);
115 lex
->magic
= LEX_MAGIC
;
119 return (ISC_R_SUCCESS
);
123 isc_lex_destroy(isc_lex_t
**lexp
) {
130 REQUIRE(lexp
!= NULL
);
132 REQUIRE(VALID_LEX(lex
));
134 while (!EMPTY(lex
->sources
))
135 RUNTIME_CHECK(isc_lex_close(lex
) == ISC_R_SUCCESS
);
136 if (lex
->data
!= NULL
)
137 isc_mem_put(lex
->mctx
, lex
->data
, lex
->max_token
+ 1);
139 isc_mem_put(lex
->mctx
, lex
, sizeof(*lex
));
145 isc_lex_getcomments(isc_lex_t
*lex
) {
147 * Return the current lexer commenting styles.
150 REQUIRE(VALID_LEX(lex
));
152 return (lex
->comments
);
156 isc_lex_setcomments(isc_lex_t
*lex
, unsigned int comments
) {
158 * Set allowed lexer commenting styles.
161 REQUIRE(VALID_LEX(lex
));
163 lex
->comments
= comments
;
167 isc_lex_getspecials(isc_lex_t
*lex
, isc_lexspecials_t specials
) {
169 * Put the current list of specials into 'specials'.
172 REQUIRE(VALID_LEX(lex
));
174 memcpy(specials
, lex
->specials
, 256);
178 isc_lex_setspecials(isc_lex_t
*lex
, isc_lexspecials_t specials
) {
180 * The characters in 'specials' are returned as tokens. Along with
181 * whitespace, they delimit strings and numbers.
184 REQUIRE(VALID_LEX(lex
));
186 memcpy(lex
->specials
, specials
, 256);
189 static inline isc_result_t
190 new_source(isc_lex_t
*lex
, isc_boolean_t is_file
, isc_boolean_t need_close
,
191 void *input
, const char *name
)
196 source
= isc_mem_get(lex
->mctx
, sizeof(*source
));
198 return (ISC_R_NOMEMORY
);
199 source
->result
= ISC_R_SUCCESS
;
200 source
->is_file
= is_file
;
201 source
->need_close
= need_close
;
202 source
->at_eof
= ISC_FALSE
;
203 source
->input
= input
;
204 source
->name
= isc_mem_strdup(lex
->mctx
, name
);
205 if (source
->name
== NULL
) {
206 isc_mem_put(lex
->mctx
, source
, sizeof(*source
));
207 return (ISC_R_NOMEMORY
);
209 source
->pushback
= NULL
;
210 result
= isc_buffer_allocate(lex
->mctx
, &source
->pushback
,
212 if (result
!= ISC_R_SUCCESS
) {
213 isc_mem_free(lex
->mctx
, source
->name
);
214 isc_mem_put(lex
->mctx
, source
, sizeof(*source
));
219 ISC_LIST_INITANDPREPEND(lex
->sources
, source
, link
);
221 return (ISC_R_SUCCESS
);
225 isc_lex_openfile(isc_lex_t
*lex
, const char *filename
) {
230 * Open 'filename' and make it the current input source for 'lex'.
233 REQUIRE(VALID_LEX(lex
));
235 result
= isc_stdio_open(filename
, "r", &stream
);
236 if (result
!= ISC_R_SUCCESS
)
239 result
= new_source(lex
, ISC_TRUE
, ISC_TRUE
, stream
, filename
);
240 if (result
!= ISC_R_SUCCESS
)
241 (void)fclose(stream
);
246 isc_lex_openstream(isc_lex_t
*lex
, FILE *stream
) {
250 * Make 'stream' the current input source for 'lex'.
253 REQUIRE(VALID_LEX(lex
));
255 snprintf(name
, sizeof(name
), "stream-%p", stream
);
257 return (new_source(lex
, ISC_TRUE
, ISC_FALSE
, stream
, name
));
261 isc_lex_openbuffer(isc_lex_t
*lex
, isc_buffer_t
*buffer
) {
265 * Make 'buffer' the current input source for 'lex'.
268 REQUIRE(VALID_LEX(lex
));
270 snprintf(name
, sizeof(name
), "buffer-%p", buffer
);
272 return (new_source(lex
, ISC_FALSE
, ISC_FALSE
, buffer
, name
));
276 isc_lex_close(isc_lex_t
*lex
) {
280 * Close the most recently opened object (i.e. file or buffer).
283 REQUIRE(VALID_LEX(lex
));
285 source
= HEAD(lex
->sources
);
287 return (ISC_R_NOMORE
);
289 ISC_LIST_UNLINK(lex
->sources
, source
, link
);
290 if (source
->is_file
) {
291 if (source
->need_close
)
292 (void)fclose((FILE *)(source
->input
));
294 isc_mem_free(lex
->mctx
, source
->name
);
295 isc_buffer_free(&source
->pushback
);
296 isc_mem_put(lex
->mctx
, source
, sizeof(*source
));
298 return (ISC_R_SUCCESS
);
306 lexstate_maybecomment
,
308 lexstate_ccommentend
,
313 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
316 pushback(inputsource
*source
, int c
) {
317 REQUIRE(source
->pushback
->current
> 0);
319 source
->at_eof
= ISC_FALSE
;
322 source
->pushback
->current
--;
328 pushandgrow(isc_lex_t
*lex
, inputsource
*source
, int c
) {
329 if (isc_buffer_availablelength(source
->pushback
) == 0) {
330 isc_buffer_t
*tbuf
= NULL
;
335 oldlen
= isc_buffer_length(source
->pushback
);
336 result
= isc_buffer_allocate(lex
->mctx
, &tbuf
, oldlen
* 2);
337 if (result
!= ISC_R_SUCCESS
)
339 isc_buffer_usedregion(source
->pushback
, &used
);
340 result
= isc_buffer_copyregion(tbuf
, &used
);
341 INSIST(result
== ISC_R_SUCCESS
);
342 tbuf
->current
= source
->pushback
->current
;
343 isc_buffer_free(&source
->pushback
);
344 source
->pushback
= tbuf
;
346 isc_buffer_putuint8(source
->pushback
, (isc_uint8_t
)c
);
347 return (ISC_R_SUCCESS
);
351 isc_lex_gettoken(isc_lex_t
*lex
, unsigned int options
, isc_token_t
*tokenp
) {
354 isc_boolean_t done
= ISC_FALSE
;
355 isc_boolean_t no_comments
= ISC_FALSE
;
356 isc_boolean_t escaped
= ISC_FALSE
;
357 lexstate state
= lexstate_start
;
358 lexstate saved_state
= lexstate_start
;
359 isc_buffer_t
*buffer
;
363 isc_uint32_t as_ulong
;
364 unsigned int saved_options
;
368 * Get the next token.
371 REQUIRE(VALID_LEX(lex
));
372 source
= HEAD(lex
->sources
);
373 REQUIRE(tokenp
!= NULL
);
375 if (source
== NULL
) {
376 if ((options
& ISC_LEXOPT_NOMORE
) != 0) {
377 tokenp
->type
= isc_tokentype_nomore
;
378 return (ISC_R_SUCCESS
);
380 return (ISC_R_NOMORE
);
383 if (source
->result
!= ISC_R_SUCCESS
)
384 return (source
->result
);
386 lex
->saved_paren_count
= lex
->paren_count
;
387 source
->saved_line
= source
->line
;
389 if (isc_buffer_remaininglength(source
->pushback
) == 0 &&
392 if ((options
& ISC_LEXOPT_DNSMULTILINE
) != 0 &&
393 lex
->paren_count
!= 0) {
394 lex
->paren_count
= 0;
395 return (ISC_R_UNBALANCED
);
397 if ((options
& ISC_LEXOPT_EOF
) != 0) {
398 tokenp
->type
= isc_tokentype_eof
;
399 return (ISC_R_SUCCESS
);
404 isc_buffer_compact(source
->pushback
);
406 saved_options
= options
;
407 if ((options
& ISC_LEXOPT_DNSMULTILINE
) != 0 && lex
->paren_count
> 0)
414 remaining
= lex
->max_token
;
416 #ifdef HAVE_FLOCKFILE
418 flockfile(source
->input
);
422 if (isc_buffer_remaininglength(source
->pushback
) == 0) {
423 if (source
->is_file
) {
424 stream
= source
->input
;
426 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETCUNLOCKED)
427 c
= getc_unlocked(stream
);
432 if (ferror(stream
)) {
433 source
->result
= ISC_R_IOERROR
;
434 result
= source
->result
;
437 source
->at_eof
= ISC_TRUE
;
440 buffer
= source
->input
;
442 if (buffer
->current
== buffer
->used
) {
444 source
->at_eof
= ISC_TRUE
;
446 c
= *((char *)buffer
->base
+
452 source
->result
= pushandgrow(lex
, source
, c
);
453 if (source
->result
!= ISC_R_SUCCESS
) {
454 result
= source
->result
;
460 if (!source
->at_eof
) {
461 if (state
== lexstate_start
)
462 /* Token has not started yet. */
464 isc_buffer_consumedlength(source
->pushback
);
465 c
= isc_buffer_getuint8(source
->pushback
);
473 if (lex
->comment_ok
&& !no_comments
) {
474 if (!escaped
&& c
== ';' &&
475 ((lex
->comments
& ISC_LEXCOMMENT_DNSMASTERFILE
)
478 state
= lexstate_eatline
;
479 no_comments
= ISC_TRUE
;
481 } else if (c
== '/' &&
484 ISC_LEXCOMMENT_CPLUSPLUS
)) != 0) {
486 state
= lexstate_maybecomment
;
487 no_comments
= ISC_TRUE
;
489 } else if (c
== '#' &&
490 ((lex
->comments
& ISC_LEXCOMMENT_SHELL
)
493 state
= lexstate_eatline
;
494 no_comments
= ISC_TRUE
;
500 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
504 lex
->last_was_eol
= ISC_FALSE
;
505 if ((options
& ISC_LEXOPT_DNSMULTILINE
) != 0 &&
506 lex
->paren_count
!= 0) {
507 lex
->paren_count
= 0;
508 result
= ISC_R_UNBALANCED
;
511 if ((options
& ISC_LEXOPT_EOF
) == 0) {
515 tokenp
->type
= isc_tokentype_eof
;
517 } else if (c
== ' ' || c
== '\t') {
518 if (lex
->last_was_eol
&&
519 (options
& ISC_LEXOPT_INITIALWS
)
521 lex
->last_was_eol
= ISC_FALSE
;
522 tokenp
->type
= isc_tokentype_initialws
;
523 tokenp
->value
.as_char
= c
;
526 } else if (c
== '\n') {
527 if ((options
& ISC_LEXOPT_EOL
) != 0) {
528 tokenp
->type
= isc_tokentype_eol
;
531 lex
->last_was_eol
= ISC_TRUE
;
532 } else if (c
== '\r') {
533 if ((options
& ISC_LEXOPT_EOL
) != 0)
534 state
= lexstate_crlf
;
535 } else if (c
== '"' &&
536 (options
& ISC_LEXOPT_QSTRING
) != 0) {
537 lex
->last_was_eol
= ISC_FALSE
;
538 no_comments
= ISC_TRUE
;
539 state
= lexstate_qstring
;
540 } else if (lex
->specials
[c
]) {
541 lex
->last_was_eol
= ISC_FALSE
;
542 if ((c
== '(' || c
== ')') &&
543 (options
& ISC_LEXOPT_DNSMULTILINE
) != 0) {
545 if (lex
->paren_count
== 0)
549 if (lex
->paren_count
== 0) {
550 result
= ISC_R_UNBALANCED
;
554 if (lex
->paren_count
== 0)
560 tokenp
->type
= isc_tokentype_special
;
561 tokenp
->value
.as_char
= c
;
563 } else if (isdigit((unsigned char)c
) &&
564 (options
& ISC_LEXOPT_NUMBER
) != 0) {
565 lex
->last_was_eol
= ISC_FALSE
;
566 state
= lexstate_number
;
569 lex
->last_was_eol
= ISC_FALSE
;
570 state
= lexstate_string
;
577 tokenp
->type
= isc_tokentype_eol
;
579 lex
->last_was_eol
= ISC_TRUE
;
581 case lexstate_number
:
582 if (c
== EOF
|| !isdigit((unsigned char)c
)) {
583 if (c
== ' ' || c
== '\t' || c
== '\r' ||
584 c
== '\n' || c
== EOF
||
587 if ((options
& ISC_LEXOPT_CNUMBER
) != 0)
593 result
= isc_parse_uint32(&as_ulong
,
596 if (result
== ISC_R_SUCCESS
) {
598 isc_tokentype_number
;
599 tokenp
->value
.as_ulong
=
601 } else if (result
== ISC_R_BADNUMBER
) {
605 isc_tokentype_string
;
606 v
= &(tokenp
->value
);
607 v
->as_textregion
.base
=
609 v
->as_textregion
.length
=
616 } else if (!(options
& ISC_LEXOPT_CNUMBER
) ||
617 ((c
!= 'x' && c
!= 'X') ||
618 (curr
!= &lex
->data
[1]) ||
619 (lex
->data
[0] != '0'))) {
620 /* Above test supports hex numbers */
621 state
= lexstate_string
;
624 if (remaining
== 0U) {
625 result
= grow_data(lex
, &remaining
,
627 if (result
!= ISC_R_SUCCESS
)
630 INSIST(remaining
> 0U);
635 case lexstate_string
:
637 * EOF needs to be checked before lex->specials[c]
638 * as lex->specials[EOF] is not a good idea.
640 if (c
== '\r' || c
== '\n' || c
== EOF
||
642 (c
== ' ' || c
== '\t' || lex
->specials
[c
]))) {
644 if (source
->result
!= ISC_R_SUCCESS
) {
645 result
= source
->result
;
648 tokenp
->type
= isc_tokentype_string
;
649 tokenp
->value
.as_textregion
.base
= lex
->data
;
650 tokenp
->value
.as_textregion
.length
=
651 lex
->max_token
- remaining
;
655 if ((options
& ISC_LEXOPT_ESCAPE
) != 0)
656 escaped
= (!escaped
&& c
== '\\') ?
657 ISC_TRUE
: ISC_FALSE
;
658 if (remaining
== 0U) {
659 result
= grow_data(lex
, &remaining
,
661 if (result
!= ISC_R_SUCCESS
)
664 INSIST(remaining
> 0U);
669 case lexstate_maybecomment
:
671 (lex
->comments
& ISC_LEXCOMMENT_C
) != 0) {
672 state
= lexstate_ccomment
;
674 } else if (c
== '/' &&
675 (lex
->comments
& ISC_LEXCOMMENT_CPLUSPLUS
) != 0) {
676 state
= lexstate_eatline
;
681 no_comments
= ISC_FALSE
;
684 case lexstate_ccomment
:
686 result
= ISC_R_UNEXPECTEDEND
;
690 state
= lexstate_ccommentend
;
692 case lexstate_ccommentend
:
694 result
= ISC_R_UNEXPECTEDEND
;
699 * C-style comments become a single space.
700 * We do this to ensure that a comment will
701 * act as a delimiter for strings and
705 no_comments
= ISC_FALSE
;
709 state
= lexstate_ccomment
;
711 case lexstate_eatline
:
713 result
= ISC_R_UNEXPECTEDEND
;
717 no_comments
= ISC_FALSE
;
722 case lexstate_qstring
:
724 result
= ISC_R_UNEXPECTEDEND
;
731 * Overwrite the preceding backslash.
733 INSIST(prev
!= NULL
);
736 tokenp
->type
= isc_tokentype_qstring
;
737 tokenp
->value
.as_textregion
.base
=
739 tokenp
->value
.as_textregion
.length
=
740 lex
->max_token
- remaining
;
741 no_comments
= ISC_FALSE
;
745 if (c
== '\n' && !escaped
&&
746 (options
& ISC_LEXOPT_QSTRINGMULTILINE
) == 0) {
748 result
= ISC_R_UNBALANCEDQUOTES
;
751 if (c
== '\\' && !escaped
)
755 if (remaining
== 0U) {
756 result
= grow_data(lex
, &remaining
,
758 if (result
!= ISC_R_SUCCESS
)
761 INSIST(remaining
> 0U);
769 FATAL_ERROR(__FILE__
, __LINE__
,
770 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_LEX
,
771 ISC_MSG_UNEXPECTEDSTATE
,
772 "Unexpected state %d"),
774 /* Does not return. */
779 result
= ISC_R_SUCCESS
;
781 #ifdef HAVE_FLOCKFILE
783 funlockfile(source
->input
);
789 isc_lex_getmastertoken(isc_lex_t
*lex
, isc_token_t
*token
,
790 isc_tokentype_t expect
, isc_boolean_t eol
)
792 unsigned int options
= ISC_LEXOPT_EOL
| ISC_LEXOPT_EOF
|
793 ISC_LEXOPT_DNSMULTILINE
| ISC_LEXOPT_ESCAPE
;
796 if (expect
== isc_tokentype_qstring
)
797 options
|= ISC_LEXOPT_QSTRING
;
798 else if (expect
== isc_tokentype_number
)
799 options
|= ISC_LEXOPT_NUMBER
;
800 result
= isc_lex_gettoken(lex
, options
, token
);
801 if (result
== ISC_R_RANGE
)
802 isc_lex_ungettoken(lex
, token
);
803 if (result
!= ISC_R_SUCCESS
)
806 if (eol
&& ((token
->type
== isc_tokentype_eol
) ||
807 (token
->type
== isc_tokentype_eof
)))
808 return (ISC_R_SUCCESS
);
809 if (token
->type
== isc_tokentype_string
&&
810 expect
== isc_tokentype_qstring
)
811 return (ISC_R_SUCCESS
);
812 if (token
->type
!= expect
) {
813 isc_lex_ungettoken(lex
, token
);
814 if (token
->type
== isc_tokentype_eol
||
815 token
->type
== isc_tokentype_eof
)
816 return (ISC_R_UNEXPECTEDEND
);
817 if (expect
== isc_tokentype_number
)
818 return (ISC_R_BADNUMBER
);
819 return (ISC_R_UNEXPECTEDTOKEN
);
821 return (ISC_R_SUCCESS
);
825 isc_lex_ungettoken(isc_lex_t
*lex
, isc_token_t
*tokenp
) {
828 * Unget the current token.
831 REQUIRE(VALID_LEX(lex
));
832 source
= HEAD(lex
->sources
);
833 REQUIRE(source
!= NULL
);
834 REQUIRE(tokenp
!= NULL
);
835 REQUIRE(isc_buffer_consumedlength(source
->pushback
) != 0 ||
836 tokenp
->type
== isc_tokentype_eof
);
840 isc_buffer_first(source
->pushback
);
841 lex
->paren_count
= lex
->saved_paren_count
;
842 source
->line
= source
->saved_line
;
843 source
->at_eof
= ISC_FALSE
;
847 isc_lex_getlasttokentext(isc_lex_t
*lex
, isc_token_t
*tokenp
, isc_region_t
*r
)
851 REQUIRE(VALID_LEX(lex
));
852 source
= HEAD(lex
->sources
);
853 REQUIRE(source
!= NULL
);
854 REQUIRE(tokenp
!= NULL
);
855 REQUIRE(isc_buffer_consumedlength(source
->pushback
) != 0 ||
856 tokenp
->type
== isc_tokentype_eof
);
860 INSIST(source
->ignored
<= isc_buffer_consumedlength(source
->pushback
));
861 r
->base
= (unsigned char *)isc_buffer_base(source
->pushback
) +
863 r
->length
= isc_buffer_consumedlength(source
->pushback
) -
869 isc_lex_getsourcename(isc_lex_t
*lex
) {
872 REQUIRE(VALID_LEX(lex
));
873 source
= HEAD(lex
->sources
);
878 return (source
->name
);
882 isc_lex_getsourceline(isc_lex_t
*lex
) {
885 REQUIRE(VALID_LEX(lex
));
886 source
= HEAD(lex
->sources
);
891 return (source
->line
);
896 isc_lex_setsourcename(isc_lex_t
*lex
, const char *name
) {
900 REQUIRE(VALID_LEX(lex
));
901 source
= HEAD(lex
->sources
);
904 return(ISC_R_NOTFOUND
);
905 newname
= isc_mem_strdup(lex
->mctx
, name
);
907 return (ISC_R_NOMEMORY
);
908 isc_mem_free(lex
->mctx
, source
->name
);
909 source
->name
= newname
;
910 return (ISC_R_SUCCESS
);
914 isc_lex_isfile(isc_lex_t
*lex
) {
917 REQUIRE(VALID_LEX(lex
));
919 source
= HEAD(lex
->sources
);
924 return (source
->is_file
);