1 /* GNU m4 -- A simple macro processor
2 Copyright (C) 1989-1994, 2002, 2004, 2006-2010, 2013-2014, 2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU M4.
7 GNU M4 is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU M4 is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "m4private.h"
25 /* Define this to see runtime debug info. Implied by DEBUG. */
26 /*#define DEBUG_SYNTAX */
30 The input is read character by character and grouped together
31 according to a syntax table. The character groups are (definitions
32 are all in m4module.h, those marked with a * are not yet in use):
34 Basic (all characters fall in one of these mutually exclusive bins)
35 M4_SYNTAX_IGNORE *Character to be deleted from input as if not present
36 M4_SYNTAX_OTHER Any character with no special meaning to m4
37 M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments)
38 M4_SYNTAX_OPEN Open list of macro arguments
39 M4_SYNTAX_CLOSE Close list of macro arguments
40 M4_SYNTAX_COMMA Separates macro arguments
41 M4_SYNTAX_ACTIVE This character is a macro name by itself
42 M4_SYNTAX_ESCAPE Use this character to prefix all macro names
44 M4_SYNTAX_ALPHA Alphabetic characters (can start macro names)
45 M4_SYNTAX_NUM Numeric characters (can form macro names)
47 M4_SYNTAX_LQUOTE A single character left quote
48 M4_SYNTAX_BCOMM A single character begin comment delimiter
50 Attribute (these are context sensitive, and exist in addition to basic)
51 M4_SYNTAX_RQUOTE A single character right quote
52 M4_SYNTAX_ECOMM A single character end comment delimiter
53 M4_SYNTAX_DOLLAR Indicates macro argument in user macros
54 M4_SYNTAX_LBRACE *Indicates start of extended macro argument
55 M4_SYNTAX_RBRACE *Indicates end of extended macro argument
57 Besides adding new facilities, the use of a syntax table will reduce
58 the number of calls to next_token (). Now groups of OTHER, NUM and
59 SPACE characters can be returned as a single token, since next_token
60 () knows they have no special syntactical meaning to m4. This is,
61 however, only possible if only single character quotes comments
62 comments are used, because otherwise the quote and comment characters
63 will not show up in the syntax-table.
65 Having a syntax table allows new facilities. The new builtin
66 "changesyntax" allows the user to change the category of any
69 By default, '\n' is both ECOMM and SPACE, depending on the context.
70 Hence we have basic categories (mutually exclusive, can introduce a
71 context, and can be empty sets), and attribute categories
72 (additive, only recognized in context, and will never be empty).
74 The precedence as implemented by next_token () is:
76 M4_SYNTAX_IGNORE *Filtered out below next_token ()
77 M4_SYNTAX_ESCAPE Reads macro name iff set, else next character
78 M4_SYNTAX_ALPHA Reads M4_SYNTAX_ALPHA and M4_SYNTAX_NUM as macro name
79 M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE
80 M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
82 M4_SYNTAX_OTHER } Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM
85 M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE, depending on buffering
86 M4_SYNTAX_ACTIVE Returns a single char as a macro name
88 M4_SYNTAX_OPEN } Returned as a single char
92 M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM are context-sensitive, and
93 close out M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM, respectively.
94 Also, M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, and M4_SYNTAX_RBRACE are
95 context-sensitive, only mattering when expanding macro definitions.
97 There are several optimizations that can be performed depending on
98 known states of the syntax table. For example, when searching for
99 quotes, if there is only a single start quote and end quote
100 delimiter, we can use memchr2 and search a word at a time, instead
101 of performing a table lookup a byte at a time. The is_single_*
102 flags track whether quotes and comments have a single delimiter
103 (always the case if changequote/changecom were used, and
104 potentially the case after changesyntax). Since we frequently need
105 to access quotes, we store the oldest valid quote outside the
106 lookup table; the suspect flag tracks whether a cleanup pass is
107 needed to restore our invariants. On the other hand, coalescing
108 multiple M4_SYNTAX_OTHER bytes could form a delimiter, so many
109 optimizations must be disabled if a multi-byte delimiter exists;
110 this is handled by m4__safe_quotes. Meanwhile, quotes and comments
111 can be disabled if the leading delimiter is length 0. */
113 static int add_syntax_attribute (m4_syntax_table
*, char, int);
114 static int remove_syntax_attribute (m4_syntax_table
*, char, int);
115 static void set_quote_age (m4_syntax_table
*, bool, bool);
118 m4_syntax_create (void)
120 m4_syntax_table
*syntax
= (m4_syntax_table
*) xzalloc (sizeof *syntax
);
123 /* Set up default table. This table never changes during operation,
124 and contains no context attributes. */
125 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
129 syntax
->orig
[ch
] = M4_SYNTAX_OPEN
;
132 syntax
->orig
[ch
] = M4_SYNTAX_CLOSE
;
135 syntax
->orig
[ch
] = M4_SYNTAX_COMMA
;
138 syntax
->orig
[ch
] = M4_SYNTAX_LQUOTE
;
141 syntax
->orig
[ch
] = M4_SYNTAX_BCOMM
;
145 syntax
->orig
[ch
] = M4_SYNTAX_SPACE
;
146 else if (isalpha (ch
) || ch
== '_')
147 syntax
->orig
[ch
] = M4_SYNTAX_ALPHA
;
148 else if (isdigit (ch
))
149 syntax
->orig
[ch
] = M4_SYNTAX_NUM
;
151 syntax
->orig
[ch
] = M4_SYNTAX_OTHER
;
154 /* Set up current table to match default. */
155 m4_reset_syntax (syntax
);
156 syntax
->cached_simple
.str1
= syntax
->cached_lquote
;
157 syntax
->cached_simple
.len1
= 1;
158 syntax
->cached_simple
.str2
= syntax
->cached_rquote
;
159 syntax
->cached_simple
.len2
= 1;
164 m4_syntax_delete (m4_syntax_table
*syntax
)
168 free (syntax
->quote
.str1
);
169 free (syntax
->quote
.str2
);
170 free (syntax
->comm
.str1
);
171 free (syntax
->comm
.str2
);
176 m4_syntax_code (char ch
)
182 /* Sorted according to the order of M4_SYNTAX_* in m4module.h. */
183 /* FIXME - revisit the ignore syntax attribute. */
184 case 'I': case 'i': code
= M4_SYNTAX_IGNORE
; break;
185 /* Basic categories. */
186 case '@': code
= M4_SYNTAX_ESCAPE
; break;
187 case 'W': case 'w': code
= M4_SYNTAX_ALPHA
; break;
188 case 'L': case 'l': code
= M4_SYNTAX_LQUOTE
; break;
189 case 'B': case 'b': code
= M4_SYNTAX_BCOMM
; break;
190 case 'A': case 'a': code
= M4_SYNTAX_ACTIVE
; break;
191 case 'D': case 'd': code
= M4_SYNTAX_NUM
; break;
192 case 'S': case 's': code
= M4_SYNTAX_SPACE
; break;
193 case '(': code
= M4_SYNTAX_OPEN
; break;
194 case ')': code
= M4_SYNTAX_CLOSE
; break;
195 case ',': code
= M4_SYNTAX_COMMA
; break;
196 case 'O': case 'o': code
= M4_SYNTAX_OTHER
; break;
197 /* Context categories. */
198 case '$': code
= M4_SYNTAX_DOLLAR
; break;
199 case '{': code
= M4_SYNTAX_LBRACE
; break;
200 case '}': code
= M4_SYNTAX_RBRACE
; break;
201 case 'R': case 'r': code
= M4_SYNTAX_RQUOTE
; break;
202 case 'E': case 'e': code
= M4_SYNTAX_ECOMM
; break;
204 default: code
= -1; break;
212 /* Functions to manipulate the syntax table. */
214 add_syntax_attribute (m4_syntax_table
*syntax
, char ch
, int code
)
216 int c
= to_uchar (ch
);
217 if (code
& M4_SYNTAX_MASKS
)
219 syntax
->table
[c
] |= code
;
220 syntax
->suspect
= true;
224 if ((code
& (M4_SYNTAX_SUSPECT
)) != 0
225 || m4_has_syntax (syntax
, c
, M4_SYNTAX_SUSPECT
))
226 syntax
->suspect
= true;
227 syntax
->table
[c
] = ((syntax
->table
[c
] & M4_SYNTAX_MASKS
) | code
);
231 xfprintf(stderr
, "Set syntax %o %c = %04X\n", c
, isprint(c
) ? c
: '-',
235 return syntax
->table
[c
];
239 remove_syntax_attribute (m4_syntax_table
*syntax
, char ch
, int code
)
241 int c
= to_uchar (ch
);
242 assert (code
& M4_SYNTAX_MASKS
);
243 syntax
->table
[c
] &= ~code
;
244 syntax
->suspect
= true;
247 xfprintf(stderr
, "Unset syntax %o %c = %04X\n", c
, isprint(c
) ? c
: '-',
251 return syntax
->table
[c
];
254 /* Add the set CHARS of length LEN to syntax category CODE, removing
255 them from whatever category they used to be in. */
257 add_syntax_set (m4_syntax_table
*syntax
, const char *chars
, size_t len
,
261 add_syntax_attribute (syntax
, *chars
++, code
);
264 /* Remove the set CHARS of length LEN from syntax category CODE,
265 adding them to category M4_SYNTAX_OTHER instead. */
267 subtract_syntax_set (m4_syntax_table
*syntax
, const char *chars
, size_t len
,
273 if ((code
& M4_SYNTAX_MASKS
) != 0)
274 remove_syntax_attribute (syntax
, ch
, code
);
275 else if (m4_has_syntax (syntax
, ch
, code
))
276 add_syntax_attribute (syntax
, ch
, M4_SYNTAX_OTHER
);
280 /* Make the set CHARS of length LEN become syntax category CODE,
281 removing CHARS from any other categories, and sending all bytes in
282 the category but not in CHARS to category M4_SYNTAX_OTHER
285 set_syntax_set (m4_syntax_table
*syntax
, const char *chars
, size_t len
,
289 /* Explicit set of characters to install with this category; all
290 other characters that used to have the category get reset to
292 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
294 if ((code
& M4_SYNTAX_MASKS
) != 0)
295 remove_syntax_attribute (syntax
, ch
, code
);
296 else if (m4_has_syntax (syntax
, ch
, code
))
297 add_syntax_attribute (syntax
, ch
, M4_SYNTAX_OTHER
);
302 add_syntax_attribute (syntax
, ch
, code
);
306 /* Reset syntax category CODE to its default state, sending all other
307 characters in the category back to their default state. */
309 reset_syntax_set (m4_syntax_table
*syntax
, int code
)
312 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
314 /* Reset the category back to its default state. All other
315 characters that used to have this category get reset to
316 their default state as well. */
317 if (code
== M4_SYNTAX_RQUOTE
)
320 add_syntax_attribute (syntax
, ch
, code
);
322 remove_syntax_attribute (syntax
, ch
, code
);
324 else if (code
== M4_SYNTAX_ECOMM
)
327 add_syntax_attribute (syntax
, ch
, code
);
329 remove_syntax_attribute (syntax
, ch
, code
);
331 else if (code
== M4_SYNTAX_DOLLAR
)
334 add_syntax_attribute (syntax
, ch
, code
);
336 remove_syntax_attribute (syntax
, ch
, code
);
338 else if (code
== M4_SYNTAX_LBRACE
)
341 add_syntax_attribute (syntax
, ch
, code
);
343 remove_syntax_attribute (syntax
, ch
, code
);
345 else if (code
== M4_SYNTAX_RBRACE
)
348 add_syntax_attribute (syntax
, ch
, code
);
350 remove_syntax_attribute (syntax
, ch
, code
);
352 else if (syntax
->orig
[ch
] == code
|| m4_has_syntax (syntax
, ch
, code
))
353 add_syntax_attribute (syntax
, ch
, syntax
->orig
[ch
]);
357 /* Reset the syntax table to its default state. */
359 m4_reset_syntax (m4_syntax_table
*syntax
)
361 /* Restore the default syntax, which has known quote and comment
363 memcpy (syntax
->table
, syntax
->orig
, sizeof syntax
->orig
);
365 free (syntax
->quote
.str1
);
366 free (syntax
->quote
.str2
);
367 free (syntax
->comm
.str1
);
368 free (syntax
->comm
.str2
);
370 /* The use of xmemdup0 is exploited by input.c. */
371 syntax
->quote
.str1
= xmemdup0 (DEF_LQUOTE
, 1);
372 syntax
->quote
.len1
= 1;
373 syntax
->quote
.str2
= xmemdup0 (DEF_RQUOTE
, 1);
374 syntax
->quote
.len2
= 1;
375 syntax
->comm
.str1
= xmemdup0 (DEF_BCOMM
, 1);
376 syntax
->comm
.len1
= 1;
377 syntax
->comm
.str2
= xmemdup0 (DEF_ECOMM
, 1);
378 syntax
->comm
.len2
= 1;
379 syntax
->dollar
= '$';
381 add_syntax_attribute (syntax
, syntax
->quote
.str2
[0], M4_SYNTAX_RQUOTE
);
382 add_syntax_attribute (syntax
, syntax
->comm
.str2
[0], M4_SYNTAX_ECOMM
);
383 add_syntax_attribute (syntax
, '$', M4_SYNTAX_DOLLAR
);
384 add_syntax_attribute (syntax
, '{', M4_SYNTAX_LBRACE
);
385 add_syntax_attribute (syntax
, '}', M4_SYNTAX_RBRACE
);
387 syntax
->is_single_quotes
= true;
388 syntax
->is_single_comments
= true;
389 syntax
->is_single_dollar
= true;
390 syntax
->is_macro_escaped
= false;
391 set_quote_age (syntax
, true, false);
394 /* Alter the syntax for category KEY, according to ACTION: '+' to add,
395 '-' to subtract, '=' to set, or '\0' to reset. The array CHARS of
396 length LEN describes the characters to modify; it is ignored if
397 ACTION is '\0'. Return -1 if KEY is invalid, otherwise return the
398 syntax category matching KEY. */
400 m4_set_syntax (m4_syntax_table
*syntax
, char key
, char action
,
401 const char *chars
, size_t len
)
405 assert (syntax
&& chars
);
406 code
= m4_syntax_code (key
);
411 syntax
->suspect
= false;
415 add_syntax_set (syntax
, chars
, len
, code
);
418 subtract_syntax_set (syntax
, chars
, len
, code
);
421 set_syntax_set (syntax
, chars
, len
, code
);
425 reset_syntax_set (syntax
, code
);
431 /* Check for any cleanup needed. */
439 bool single_quote_possible
= true;
440 bool single_comm_possible
= true;
442 if (m4_has_syntax (syntax
, syntax
->quote
.str1
[0], M4_SYNTAX_LQUOTE
))
444 assert (syntax
->quote
.len1
== 1);
445 lquote
= to_uchar (syntax
->quote
.str1
[0]);
447 if (m4_has_syntax (syntax
, syntax
->quote
.str2
[0], M4_SYNTAX_RQUOTE
))
449 assert (syntax
->quote
.len2
== 1);
450 rquote
= to_uchar (syntax
->quote
.str2
[0]);
452 if (m4_has_syntax (syntax
, syntax
->comm
.str1
[0], M4_SYNTAX_BCOMM
))
454 assert (syntax
->comm
.len1
== 1);
455 bcomm
= to_uchar (syntax
->comm
.str1
[0]);
457 if (m4_has_syntax (syntax
, syntax
->comm
.str2
[0], M4_SYNTAX_ECOMM
))
459 assert (syntax
->comm
.len2
== 1);
460 ecomm
= to_uchar (syntax
->comm
.str2
[0]);
462 syntax
->is_single_dollar
= false;
463 syntax
->is_macro_escaped
= false;
464 /* Find candidates for each category. */
465 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
467 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_LQUOTE
))
471 else if (lquote
!= ch
)
472 single_quote_possible
= false;
474 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_RQUOTE
))
478 else if (rquote
!= ch
)
479 single_quote_possible
= false;
481 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_BCOMM
))
485 else if (bcomm
!= ch
)
486 single_comm_possible
= false;
488 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_ECOMM
))
492 else if (ecomm
!= ch
)
493 single_comm_possible
= false;
495 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_DOLLAR
))
499 syntax
->dollar
= dollar
= ch
;
500 syntax
->is_single_dollar
= true;
503 syntax
->is_single_dollar
= false;
505 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_ESCAPE
))
506 syntax
->is_macro_escaped
= true;
508 /* Disable multi-character delimiters if we discovered
510 if (!single_quote_possible
)
511 syntax
->is_single_quotes
= false;
512 if (!single_comm_possible
)
513 syntax
->is_single_comments
= false;
514 if ((1 < syntax
->quote
.len1
|| 1 < syntax
->quote
.len2
)
515 && (!syntax
->is_single_quotes
|| lquote
!= -1 || rquote
!= -1))
517 if (syntax
->quote
.len1
)
519 syntax
->quote
.len1
= lquote
== to_uchar (syntax
->quote
.str1
[0]);
520 syntax
->quote
.str1
[syntax
->quote
.len1
] = '\0';
522 if (syntax
->quote
.len2
)
524 syntax
->quote
.len2
= rquote
== to_uchar (syntax
->quote
.str2
[0]);
525 syntax
->quote
.str2
[syntax
->quote
.len2
] = '\0';
528 if ((1 < syntax
->comm
.len1
|| 1 < syntax
->comm
.len2
)
529 && (!syntax
->is_single_comments
|| bcomm
!= -1 || ecomm
!= -1))
531 if (syntax
->comm
.len1
)
533 syntax
->comm
.len1
= bcomm
== to_uchar (syntax
->comm
.str1
[0]);
534 syntax
->comm
.str1
[syntax
->comm
.len1
] = '\0';
536 if (syntax
->comm
.len2
)
538 syntax
->comm
.len2
= ecomm
== to_uchar (syntax
->comm
.str2
[0]);
539 syntax
->comm
.str2
[syntax
->comm
.len2
] = '\0';
542 /* Update the strings. */
545 if (single_quote_possible
)
546 syntax
->is_single_quotes
= true;
547 if (syntax
->quote
.len1
)
548 assert (syntax
->quote
.len1
== 1);
551 free (syntax
->quote
.str1
);
552 syntax
->quote
.str1
= xcharalloc (2);
553 syntax
->quote
.str1
[1] = '\0';
554 syntax
->quote
.len1
= 1;
556 syntax
->quote
.str1
[0] = lquote
;
560 add_syntax_attribute (syntax
, rquote
, M4_SYNTAX_RQUOTE
);
562 if (!syntax
->quote
.len2
)
564 free (syntax
->quote
.str2
);
565 syntax
->quote
.str2
= xcharalloc (2);
567 syntax
->quote
.str2
[0] = rquote
;
568 syntax
->quote
.str2
[1] = '\0';
569 syntax
->quote
.len2
= 1;
573 if (single_comm_possible
)
574 syntax
->is_single_comments
= true;
575 if (syntax
->comm
.len1
)
576 assert (syntax
->comm
.len1
== 1);
579 free (syntax
->comm
.str1
);
580 syntax
->comm
.str1
= xcharalloc (2);
581 syntax
->comm
.str1
[1] = '\0';
582 syntax
->comm
.len1
= 1;
584 syntax
->comm
.str1
[0] = bcomm
;
588 add_syntax_attribute (syntax
, ecomm
, M4_SYNTAX_ECOMM
);
590 if (!syntax
->comm
.len2
)
592 free (syntax
->comm
.str2
);
593 syntax
->comm
.str2
= xcharalloc (2);
595 syntax
->comm
.str2
[0] = ecomm
;
596 syntax
->comm
.str2
[1] = '\0';
597 syntax
->comm
.len2
= 1;
600 set_quote_age (syntax
, false, true);
601 m4__quote_uncache (syntax
);
606 /* Functions for setting quotes and comment delimiters. Used by
607 m4_changecom () and m4_changequote (). Both functions override the
608 syntax table to maintain compatibility. */
610 /* Set the quote delimiters to LQ and RQ, with respective lengths
611 LQ_LEN and RQ_LEN. Pass NULL if the argument was not present, to
612 distinguish from an explicit empty string. */
614 m4_set_quotes (m4_syntax_table
*syntax
, const char *lq
, size_t lq_len
,
615 const char *rq
, size_t rq_len
)
621 /* POSIX states that with 0 arguments, the default quotes are used.
622 POSIX XCU ERN 112 states that behavior is implementation-defined
623 if there was only one argument, or if there is an empty string in
624 either position when there are two arguments. We allow an empty
625 left quote to disable quoting, but a non-empty left quote will
626 always create a non-empty right quote. See the texinfo for what
627 some other implementations do. */
635 else if (!rq
|| (lq_len
&& !rq_len
))
641 if (syntax
->quote
.len1
== lq_len
&& syntax
->quote
.len2
== rq_len
642 && memcmp (syntax
->quote
.str1
, lq
, lq_len
) == 0
643 && memcmp (syntax
->quote
.str2
, rq
, rq_len
) == 0)
646 free (syntax
->quote
.str1
);
647 free (syntax
->quote
.str2
);
648 /* The use of xmemdup0 is exploited by input.c. */
649 syntax
->quote
.str1
= xmemdup0 (lq
, lq_len
);
650 syntax
->quote
.len1
= lq_len
;
651 syntax
->quote
.str2
= xmemdup0 (rq
, rq_len
);
652 syntax
->quote
.len2
= rq_len
;
654 /* changequote overrides syntax_table, but be careful when it is
655 used to select a start-quote sequence that is effectively
657 syntax
->is_single_quotes
= true;
658 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
660 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_LQUOTE
))
661 add_syntax_attribute (syntax
, ch
,
662 (syntax
->orig
[ch
] == M4_SYNTAX_LQUOTE
663 ? M4_SYNTAX_OTHER
: syntax
->orig
[ch
]));
664 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_RQUOTE
))
665 remove_syntax_attribute (syntax
, ch
, M4_SYNTAX_RQUOTE
);
668 if (!m4_has_syntax (syntax
, *syntax
->quote
.str1
,
669 (M4_SYNTAX_IGNORE
| M4_SYNTAX_ESCAPE
| M4_SYNTAX_ALPHA
672 if (syntax
->quote
.len1
== 1)
673 add_syntax_attribute (syntax
, syntax
->quote
.str1
[0], M4_SYNTAX_LQUOTE
);
674 if (syntax
->quote
.len2
== 1)
675 add_syntax_attribute (syntax
, syntax
->quote
.str2
[0], M4_SYNTAX_RQUOTE
);
677 set_quote_age (syntax
, false, false);
680 /* Set the comment delimiters to BC and EC, with respective lengths
681 BC_LEN and EC_LEN. Pass NULL if the argument was not present, to
682 distinguish from an explicit empty string. */
684 m4_set_comment (m4_syntax_table
*syntax
, const char *bc
, size_t bc_len
,
685 const char *ec
, size_t ec_len
)
691 /* POSIX requires no arguments to disable comments, and that one
692 argument use newline as the close-comment. POSIX XCU ERN 131
693 states that empty arguments invoke implementation-defined
694 behavior. We allow an empty begin comment to disable comments,
695 and a non-empty begin comment will always create a non-empty end
696 comment. See the texinfo for what some other implementations
703 else if (!ec
|| (bc_len
&& !ec_len
))
709 if (syntax
->comm
.len1
== bc_len
&& syntax
->comm
.len2
== ec_len
710 && memcmp (syntax
->comm
.str1
, bc
, bc_len
) == 0
711 && memcmp (syntax
->comm
.str2
, ec
, ec_len
) == 0)
714 free (syntax
->comm
.str1
);
715 free (syntax
->comm
.str2
);
716 /* The use of xmemdup0 is exploited by input.c. */
717 syntax
->comm
.str1
= xmemdup0 (bc
, bc_len
);
718 syntax
->comm
.len1
= bc_len
;
719 syntax
->comm
.str2
= xmemdup0 (ec
, ec_len
);
720 syntax
->comm
.len2
= ec_len
;
722 /* changecom overrides syntax_table, but be careful when it is used
723 to select a start-comment sequence that is effectively
725 syntax
->is_single_comments
= true;
726 for (ch
= UCHAR_MAX
+ 1; --ch
>= 0; )
728 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_BCOMM
))
729 add_syntax_attribute (syntax
, ch
,
730 (syntax
->orig
[ch
] == M4_SYNTAX_BCOMM
731 ? M4_SYNTAX_OTHER
: syntax
->orig
[ch
]));
732 if (m4_has_syntax (syntax
, ch
, M4_SYNTAX_ECOMM
))
733 remove_syntax_attribute (syntax
, ch
, M4_SYNTAX_ECOMM
);
735 if (!m4_has_syntax (syntax
, *syntax
->comm
.str1
,
736 (M4_SYNTAX_IGNORE
| M4_SYNTAX_ESCAPE
| M4_SYNTAX_ALPHA
737 | M4_SYNTAX_NUM
| M4_SYNTAX_LQUOTE
)))
739 if (syntax
->comm
.len1
== 1)
740 add_syntax_attribute (syntax
, syntax
->comm
.str1
[0], M4_SYNTAX_BCOMM
);
741 if (syntax
->comm
.len2
== 1)
742 add_syntax_attribute (syntax
, syntax
->comm
.str2
[0], M4_SYNTAX_ECOMM
);
744 set_quote_age (syntax
, false, false);
747 /* Call this when changing anything that might impact the quote age,
748 so that m4__quote_age and m4__safe_quotes will reflect the change.
749 If RESET, changesyntax was reset to its default stage; if CHANGE,
750 arbitrary syntax has changed; otherwise, just quotes or comment
751 delimiters have changed. */
753 set_quote_age (m4_syntax_table
*syntax
, bool reset
, bool change
)
755 /* Multi-character quotes are inherently unsafe, since concatenation
756 of individual characters can result in a quote delimiter,
759 define(echo,``$1'')define(a,A)changequote(<[,]>)echo(<[]]><[>a]>)
762 Also, unquoted close delimiters are unsafe, consider:
764 define(echo,``$1'')define(a,A)echo(`a''`a')
767 Duplicated start and end quote delimiters, as well as comment
768 delimiters that overlap with quote delimiters or active characters,
769 also present a problem, consider:
771 define(echo,$*)echo(a,a,a`'define(a,A)changecom(`,',`,'))
774 The impact of arbitrary changesyntax is difficult to characterize.
775 So if things are in their default state, we use 0 for the upper 16
776 bits of quote_age; otherwise we increment syntax_age for each
777 changesyntax, but saturate it at 0xffff rather than wrapping
778 around. Perhaps a cache of other frequently used states is
779 warranted, if changesyntax becomes more popular.
781 Perhaps someday we will fix $@ expansion to use the current
782 settings of the comma category, or even allow multi-character
783 argument separators via changesyntax. Until then, we use a literal
784 `,' in $@ expansion, therefore we must insist that `,' be an
785 argument separator for quote_age to be non-zero.
787 Rather than check every token for an unquoted delimiter, we merely
788 encode current_quote_age to 0 when things are unsafe, and non-zero
789 when safe (namely, the syntax_age in the upper 16 bits, coupled
790 with the 16-bit value composed of the single-character start and
791 end quote delimiters). There may be other situations which are
792 safe even when this algorithm sets the quote_age to zero, but at
793 least a quote_age of zero always produces correct results (although
794 it may take more time in doing so). */
796 unsigned short local_syntax_age
;
798 local_syntax_age
= 0;
799 else if (change
&& syntax
->syntax_age
< 0xffff)
800 local_syntax_age
= ++syntax
->syntax_age
;
802 local_syntax_age
= syntax
->syntax_age
;
803 if (local_syntax_age
< 0xffff && syntax
->is_single_quotes
804 && syntax
->quote
.len1
== 1 && syntax
->quote
.len2
== 1
805 && !m4_has_syntax (syntax
, *syntax
->quote
.str1
,
806 (M4_SYNTAX_ALPHA
| M4_SYNTAX_NUM
| M4_SYNTAX_OPEN
807 | M4_SYNTAX_COMMA
| M4_SYNTAX_CLOSE
809 && !m4_has_syntax (syntax
, *syntax
->quote
.str2
,
810 (M4_SYNTAX_ALPHA
| M4_SYNTAX_NUM
| M4_SYNTAX_OPEN
811 | M4_SYNTAX_COMMA
| M4_SYNTAX_CLOSE
813 && *syntax
->quote
.str1
!= *syntax
->quote
.str2
814 && (!syntax
->comm
.len1
815 || (*syntax
->comm
.str1
!= *syntax
->quote
.str2
816 && !m4_has_syntax (syntax
, *syntax
->comm
.str1
,
817 (M4_SYNTAX_OPEN
| M4_SYNTAX_COMMA
818 | M4_SYNTAX_CLOSE
))))
819 && m4_has_syntax (syntax
, ',', M4_SYNTAX_COMMA
))
821 syntax
->quote_age
= ((local_syntax_age
<< 16)
822 | ((*syntax
->quote
.str1
& 0xff) << 8)
823 | (*syntax
->quote
.str2
& 0xff));
826 syntax
->quote_age
= 0;
829 /* Interface for caching frequently used quote pairs, independently of
830 the current quote delimiters (for example, consider a text macro
831 expansion that includes several copies of $@), and using AGE for
832 optimization. If QUOTES is NULL, don't use quoting. If OBS is
833 non-NULL, AGE should be the current quote age, and QUOTES should be
834 m4_get_syntax_quotes; the return value will be a cached quote pair,
835 where the pointer is valid at least as long as OBS is not reset,
836 but whose contents are only guaranteed until the next changequote
837 or quote_cache. Otherwise, OBS is NULL, AGE should be the same as
838 before, and QUOTES should be a previously returned cache value;
839 used to refresh the contents of the result. */
840 const m4_string_pair
*
841 m4__quote_cache (m4_syntax_table
*syntax
, m4_obstack
*obs
, unsigned int age
,
842 const m4_string_pair
*quotes
)
844 /* Implementation - if AGE is non-zero, then the implementation of
845 set_quote_age guarantees that we can recreate the return value on
846 the fly; so we use static storage, and the contents must be used
847 immediately. If AGE is zero, then we must copy QUOTES onto OBS,
848 but we might as well cache that copy. */
853 *syntax
->cached_lquote
= (age
>> 8) & 0xff;
854 *syntax
->cached_rquote
= age
& 0xff;
855 return &syntax
->cached_simple
;
859 assert (quotes
== &syntax
->quote
);
860 if (!syntax
->cached_quote
)
862 assert (obstack_object_size (obs
) == 0);
863 syntax
->cached_quote
= (m4_string_pair
*) obstack_copy (obs
, quotes
,
865 syntax
->cached_quote
->str1
= (char *) obstack_copy0 (obs
, quotes
->str1
,
867 syntax
->cached_quote
->str2
= (char *) obstack_copy0 (obs
, quotes
->str2
,
870 return syntax
->cached_quote
;
874 /* Define these functions at the end, so that calls in the file use the
875 faster macro version from m4module.h. */
876 #undef m4_get_syntax_lquote
878 m4_get_syntax_lquote (m4_syntax_table
*syntax
)
881 return syntax
->quote
.str1
;
884 #undef m4_get_syntax_rquote
886 m4_get_syntax_rquote (m4_syntax_table
*syntax
)
889 return syntax
->quote
.str2
;
892 #undef m4_get_syntax_quotes
893 const m4_string_pair
*
894 m4_get_syntax_quotes (m4_syntax_table
*syntax
)
897 return &syntax
->quote
;
900 #undef m4_is_syntax_single_quotes
902 m4_is_syntax_single_quotes (m4_syntax_table
*syntax
)
905 return syntax
->is_single_quotes
;
908 #undef m4_get_syntax_bcomm
910 m4_get_syntax_bcomm (m4_syntax_table
*syntax
)
913 return syntax
->comm
.str1
;
916 #undef m4_get_syntax_ecomm
918 m4_get_syntax_ecomm (m4_syntax_table
*syntax
)
921 return syntax
->comm
.str2
;
924 #undef m4_get_syntax_comments
925 const m4_string_pair
*
926 m4_get_syntax_comments (m4_syntax_table
*syntax
)
929 return &syntax
->comm
;
932 #undef m4_is_syntax_single_comments
934 m4_is_syntax_single_comments (m4_syntax_table
*syntax
)
937 return syntax
->is_single_comments
;
940 #undef m4_is_syntax_single_dollar
942 m4_is_syntax_single_dollar (m4_syntax_table
*syntax
)
945 return syntax
->is_single_dollar
;
948 #undef m4_is_syntax_macro_escaped
950 m4_is_syntax_macro_escaped (m4_syntax_table
*syntax
)
953 return syntax
->is_macro_escaped
;