From fc8c3407fa619dddff995f32b662680f9919892b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 13 Jul 2006 13:38:19 +0000 Subject: [PATCH] * src/builtin.c (numeric_arg): Treat empty string as 0, with a warning. Detect quoted leading space and overflow as warnings. (m4_eval): Treat empty radix as 10, and allow output in radix 1. Treat width as minimum number of digits, as required by POSIX. (m4_ifdef, m4_divert, m4_m4exit, m4_translit): Ignore extra arguments. (m4_substr): Likewise. Silently treat empty start as 0. (m4_undivert): Treat ` 1a' as file, not diversion 1. * src/eval.c (eval_lex): Parse radix 1 numbers. * doc/m4.texinfo (Invoking m4): Fix wording; there is more than one type of warning. (Manual): Document behavior of numeric parsing of empty string. (Divert, Incr): Document error handling. (Eval): Document radices better. (Incompatibilities): Document translit incompatibility. * NEWS: Document these changes. --- ChangeLog | 17 +++++++++ NEWS | 8 +++++ doc/m4.texinfo | 77 ++++++++++++++++++++++++++++++---------- src/builtin.c | 109 +++++++++++++++++++++++++++++++++++++++------------------ src/eval.c | 15 ++++++-- 5 files changed, 170 insertions(+), 56 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3be72da2..263c07be 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,22 @@ 2006-07-13 Eric Blake + * src/builtin.c (numeric_arg): Treat empty string as 0, with a + warning. Detect quoted leading space and overflow as warnings. + (m4_eval): Treat empty radix as 10, and allow output in radix 1. + Treat width as minimum number of digits, as required by POSIX. + (m4_ifdef, m4_divert, m4_m4exit, m4_translit): Ignore extra + arguments. + (m4_substr): Likewise. Silently treat empty start as 0. + (m4_undivert): Treat ` 1a' as file, not diversion 1. + * src/eval.c (eval_lex): Parse radix 1 numbers. + * doc/m4.texinfo (Invoking m4): Fix wording; there is more than + one type of warning. + (Manual): Document behavior of numeric parsing of empty string. + (Divert, Incr): Document error handling. + (Eval): Document radices better. + (Incompatibilities): Document translit incompatibility. + * NEWS: Document these changes. + * Makefile.maint (fetch): Get gendocs from gnulib, not texinfo. (web-manual): Simplify. * m4/gnulib-cache.m4: Augment with gnulib-tool --import gendocs. diff --git a/NEWS b/NEWS index d3973cf4..71133652 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,14 @@ Version 1.4.5 - ?? 2006, by ??? (CVS version 1.4.4c) under FDL 1.2, rather than a stricter verbatim-only license. * Raise the -L (--nesting-limit) command line option limit from 250 to 1024. +* The decr, incr, divert, m4exit, and substr macros treat an empty number + as 0, issue a warning, and expand as normal; rather than issuing an error + and expanding to the empty string. +* The eval macro now treats an empty radix argument as 10, handles radix 1, + and treats the width argument as number of digits excluding the sign, + for compatibility with other m4 implementations. +* The ifdef, divert, m4exit, substr, and translit macros now correctly + ignore extra arguments. Version 1.4.4b - 17 June 2006, by Eric Blake (CVS version 1.4.4a) diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 7a1984c6..848f1fd2 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -418,7 +418,8 @@ also specified. @item -Q @itemx --quiet @itemx --silent -Suppress warnings about missing or superfluous arguments in macro calls. +Suppress warnings, such as missing or superfluous arguments in macro +calls, or treating the empty string as zero. @item -W @var{REGEXP} @itemx --word-regexp=@var{REGEXP} @@ -553,7 +554,7 @@ Finally, there are several options for aiding in debugging @code{m4} scripts. @table @code -@item -d@var{FLAGS} +@item -d[@var{FLAGS}] @itemx --debug[=@var{FLAGS}] Set the debug-level according to the flags @var{FLAGS}. The debug-level controls the format and amount of information presented by the debugging @@ -644,6 +645,7 @@ Example of input line @error{}and an error message @end example +The sequence @samp{^D} in an example indicates the end of the input file. The majority of these examples are self-contained, and you can run them with similar results by invoking @kbd{m4 -d}. In fact, the testsuite that is bundled in the GNU M4 package consists of the examples in this @@ -658,8 +660,15 @@ arguments, e.g., regexp(@var{string}, @var{regexp}, opt @var{replacement}) @end example -All macro arguments in @code{m4} are strings, but some are given special -interpretation, e.g., as numbers, file names, regular expressions, etc. +All macro arguments in @code{m4} are strings, but some are given +special interpretation, e.g., as numbers, file names, regular +expressions, etc. The documentation for each macro will state how the +parameters are interpreted, and what happens if the argument cannot be +parsed according to the desired interpretation. Unless specified +otherwise, a parameter specified to be a number is parsed as a decimal, +even if the argument has leading zeros; and parsing the empty string as +a number results in 0 rather than an error, although a warning will be +issued. The @samp{opt} before the third argument shows that this argument is optional---if it is left out, it is taken to be the empty string. An @@ -1638,6 +1647,9 @@ define(`foo', `') @result{} ifdef(`foo', ``foo' is defined', ``foo' is not defined') @result{}foo is defined +ifdef(`no_such_macro', `yes', `no', `extra argument') +@error{}stdin:4: m4: Warning: excess arguments to builtin `ifdef' ignored +@result{}no @end example The macro @code{ifdef} is recognized only with parameters. @@ -2649,7 +2661,8 @@ divert(opt @var{number}) @noindent where @var{number} is the diversion to be used. If @var{number} is left -out, it is assumed to be zero. +out or empty, it is assumed to be zero. If @var{number} cannot be +parsed, the diversion is unchanged. The expansion of @code{divert} is void. @@ -3007,7 +3020,8 @@ substr(@var{string}, @var{from}, opt @var{length}) which expands to the substring of @var{string}, which starts at index @var{from}, and extends for @var{length} characters, or to the end of @var{string}, if @var{length} is omitted. The starting index of a string -is always 0. +is always 0. The expansion is empty if there is an error parsing +@var{from} or @var{length}. @example substr(`gnus, gnats, and armadillos', `6') @@ -3233,13 +3247,20 @@ decr(@var{number}) @noindent which expand to the numerical value of @var{number}, incremented, -or decremented, respectively, by one. +or decremented, respectively, by one. Except for the empty string, the +expansion is empty if @var{number} could not be parsed. @example incr(`4') @result{}5 decr(`7') @result{}6 +incr() +@error{}stdin:3: m4: empty string treated as 0 in builtin `incr' +@result{}1 +decr() +@error{}stdin:4: m4: empty string treated as 0 in builtin `decr' +@result{}-1 @end example The builtin macros @code{incr} and @code{decr} are recognized only when @@ -3260,7 +3281,8 @@ eval(@var{expression}, opt @var{radix}, opt @var{width}) @end example @noindent -which expands to the value of @var{expression}. +which expands to the value of @var{expression}. The expansion is empty +if an error is encountered while parsing the arguments. Expressions can contain the following operators, listed in order of decreasing precedence. @@ -3305,12 +3327,15 @@ implementations. This behavior is likely to change in a future version to match @acronym{POSIX}, so use parentheses to force the desired precedence. -Numbers without special prefix are given decimal. A simple @samp{0} +Within @var{expression}, (but not @var{radix} or @var{width}), +numbers without a special prefix are decimal. A simple @samp{0} prefix introduces an octal number. @samp{0x} introduces a hexadecimal number. @samp{0b} introduces a binary number. @samp{0r} introduces a number expressed in any radix between 1 and 36: the prefix should be immediately followed by the decimal expression of the radix, a colon, -then the digits making the number. For any radix, the digits are +then the digits making the number. For radix 1, leading zeros are +ignored and all remaining digits must be @samp{1}; for all other +radices, the digits are @samp{0}, @samp{1}, @samp{2}, @dots{}. Beyond @samp{9}, the digits are @samp{a}, @samp{b} @dots{} up to @samp{z}. Lower and upper case letters can be used interchangeably in numbers prefixes and as number digits. @@ -3326,6 +3351,8 @@ eval(`-3 * 5') @result{}-15 eval(index(`Hello world', `llo') >= 0) @result{}1 +eval(`0r1:0111 + 0b100 + 0r3:12') +@result{}12 define(`square', `eval(`('$1`)**2')') @result{} square(`9') @@ -3335,7 +3362,7 @@ square(square(`5')`+1') define(`foo', `666') @result{} eval(`foo/6') -@error{}stdin:7: m4: bad expression in eval: foo/6 +@error{}stdin:8: m4: bad expression in eval: foo/6 @result{} eval(foo/6) @result{}111 @@ -3365,10 +3392,15 @@ eval(-4 >> 33) @end example If @var{radix} is specified, it specifies the radix to be used in the -expansion. The default radix is 10. The result of @code{eval} is -always taken to be signed. The @var{width} argument specifies a minimum -output width. The result is zero-padded to extend the expansion to the -requested width. +expansion. The default radix is 10; this is also the case if +@var{radix} is the empty string. It is an error if the radix is outside +the range of 1 through 36, inclusive. The result of @code{eval} is +always taken to be signed. No radix prefix is output, and for radices +greater than 10, the digits are lower case. The @var{width} argument +specifies the minimum output width, excluding any negative sign. The +result is zero-padded to extend the expansion to the requested width. +It is an error if the width is negative. On error, the expansion of +@code{eval} is empty. @example eval(`666', `10') @@ -3380,11 +3412,15 @@ eval(`666', `6') eval(`666', `6', `10') @result{}0000003030 eval(`-666', `6', `10') -@result{}-000003030 +@result{}-0000003030 +eval(`10', `', `0') +@result{}10 +`0r1:'eval(`10', `1', `11') +@result{}0r1:01111111111 +eval(`10', `16') +@result{}a @end example -Take note that @var{radix} cannot be larger than 36. - The builtin macro @code{eval} is recognized only when given arguments. @node Shell commands @@ -4209,6 +4245,11 @@ to ensure proper precedence. As extensions to @acronym{POSIX}, GNU @code{m4} treats the shift operators @samp{<<} and @samp{>>} as well-defined on signed integers (even though they are not in C), and adds the exponentiation operator @samp{**}. + +@item +@acronym{POSIX} requires @code{translit} (@pxref{Translit}) to treat +each character of the second and third arguments literally, but GNU +@code{m4} treats @samp{-} as a range operator. @end itemize @node Other Incompatibilities diff --git a/src/builtin.c b/src/builtin.c index d34eb88e..59eefa32 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -250,16 +250,16 @@ builtin_init (void) for (bp = &builtin_tab[0]; bp->name != NULL; bp++) if (!no_gnu_extensions || !bp->gnu_extension) { - if (prefix_all_builtins) - { - string = (char *) xmalloc (strlen (bp->name) + 4); - strcpy (string, "m4_"); - strcat (string, bp->name); - define_builtin (string, bp, SYMBOL_INSERT); - free (string); - } - else - define_builtin (bp->name, bp, SYMBOL_INSERT); + if (prefix_all_builtins) + { + string = (char *) xmalloc (strlen (bp->name) + 4); + strcpy (string, "m4_"); + strcat (string, bp->name); + define_builtin (string, bp, SYMBOL_INSERT); + free (string); + } + else + define_builtin (bp->name, bp, SYMBOL_INSERT); } for (pp = &predefined_tab[0]; pp->func != NULL; pp++) @@ -315,12 +315,32 @@ numeric_arg (token_data *macro, const char *arg, int *valuep) { char *endp; - if (*arg == 0 || (*valuep = strtol (arg, &endp, 10), *endp != 0)) + if (*arg == '\0') { + *valuep = 0; M4ERROR ((warning_status, 0, - "non-numeric argument to builtin `%s'", + "empty string treated as 0 in builtin `%s'", TOKEN_DATA_TEXT (macro))); - return FALSE; + } + else + { + errno = 0; + *valuep = strtol (arg, &endp, 10); + if (*endp != '\0') + { + M4ERROR ((warning_status, 0, + "non-numeric argument to builtin `%s'", + TOKEN_DATA_TEXT (macro))); + return FALSE; + } + if (isspace (to_uchar (*arg))) + M4ERROR ((warning_status, 0, + "leading whitespace ignored in builtin `%s'", + TOKEN_DATA_TEXT (macro))); + else if (errno == ERANGE) + M4ERROR ((warning_status, 0, + "numeric overflow detected in builtin `%s'", + TOKEN_DATA_TEXT (macro))); } return TRUE; } @@ -508,7 +528,7 @@ m4_ifdef (struct obstack *obs, int argc, token_data **argv) if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID) result = ARG (2); - else if (argc == 4) + else if (argc >= 4) result = ARG (3); else result = NULL; @@ -750,10 +770,10 @@ m4_defn (struct obstack *obs, int argc, token_data **argv) case TOKEN_FUNC: b = SYMBOL_FUNC (s); if (b == m4_placeholder) - M4ERROR ((warning_status, 0, "\ + M4ERROR ((warning_status, 0, "\ builtin `%s' requested by frozen file is not supported", ARG (1))); else - push_macro (b); + push_macro (b); break; case TOKEN_VOID: @@ -873,7 +893,7 @@ m4_sysval (struct obstack *obs, int argc, token_data **argv) static void m4_eval (struct obstack *obs, int argc, token_data **argv) { - eval_t value; + eval_t value = 0; int radix = 10; int min = 1; const char *s; @@ -881,35 +901,53 @@ m4_eval (struct obstack *obs, int argc, token_data **argv) if (bad_argc (argv[0], argc, 2, 4)) return; - if (argc >= 3 && !numeric_arg (argv[0], ARG (2), &radix)) + if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix)) return; - if (radix <= 1 || radix > (int) strlen (digits)) + if (radix < 1 || radix > (int) strlen (digits)) { M4ERROR ((warning_status, 0, "radix in builtin `%s' out of range (radix = %d)", - ARG (0), radix)); + ARG (0), radix)); return; } if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &min)) return; - if (min <= 0) + if (min < 0) { M4ERROR ((warning_status, 0, "negative width to builtin `%s'", ARG (0))); return; } - if (evaluate (ARG (1), &value)) + if (!*ARG (1)) + M4ERROR ((warning_status, 0, + "empty string treated as 0 in builtin `%s'", ARG (0))); + else if (evaluate (ARG (1), &value)) return; + if (radix == 1) + { + if (value < 0) + { + obstack_1grow (obs, '-'); + value = -value; + } + /* This assumes 2's-complement for correctly handling INT_MIN. */ + while (min-- - value > 0) + obstack_1grow (obs, '0'); + while (value-- != 0) + obstack_1grow (obs, '1'); + obstack_1grow (obs, '\0'); + return; + } + s = ntoa (value, radix); if (*s == '-') { obstack_1grow (obs, '-'); - min--; s++; } for (min -= strlen (s); --min >= 0;) @@ -962,7 +1000,7 @@ m4_divert (struct obstack *obs, int argc, token_data **argv) if (bad_argc (argv[0], argc, 1, 2)) return; - if (argc == 2 && !numeric_arg (argv[0], ARG (1), &i)) + if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i)) return; make_diversion (i); @@ -992,16 +1030,16 @@ m4_undivert (struct obstack *obs, int argc, token_data **argv) { int i, file; FILE *fp; + char *endp; if (argc == 1) undivert_all (); else for (i = 1; i < argc; i++) { - if (sscanf (ARG (i), "%d", &file) == 1) + file = strtol (ARG (i), &endp, 10); + if (*endp == '\0' && !isspace (to_uchar (*ARG (i)))) insert_diversion (file); - else if (!*ARG (i)) - /* Ignore empty string. */; else if (no_gnu_extensions) M4ERROR ((warning_status, 0, "non-numeric argument to builtin `%s'", ARG (0))); @@ -1165,7 +1203,7 @@ m4_maketemp (struct obstack *obs, int argc, token_data **argv) if ((fd = mkstemp (ARG (1))) < 0) { M4ERROR ((warning_status, errno, "cannot create tempfile `%s'", - ARG (1))); + ARG (1))); return; } close(fd); @@ -1219,7 +1257,7 @@ m4_m4exit (struct obstack *obs, int argc, token_data **argv) if (bad_argc (argv[0], argc, 1, 2)) return; - if (argc == 2 && !numeric_arg (argv[0], ARG (1), &exit_code)) + if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &exit_code)) exit_code = 0; exit (exit_code); @@ -1424,7 +1462,8 @@ m4_index (struct obstack *obs, int argc, token_data **argv) static void m4_substr (struct obstack *obs, int argc, token_data **argv) { - int start, length, avail; + int start = 0; + int length, avail; if (bad_argc (argv[0], argc, 3, 4)) return; @@ -1433,7 +1472,7 @@ m4_substr (struct obstack *obs, int argc, token_data **argv) if (!numeric_arg (argv[0], ARG (2), &start)) return; - if (argc == 4 && !numeric_arg (argv[0], ARG (3), &length)) + if (argc >= 4 && !numeric_arg (argv[0], ARG (3), &length)) return; if (start < 0 || length <= 0 || start >= avail) @@ -1467,9 +1506,9 @@ expand_ranges (const char *s, struct obstack *obs) to = *++s; if (to == '\0') { - /* trailing dash */ - obstack_1grow (obs, '-'); - break; + /* trailing dash */ + obstack_1grow (obs, '-'); + break; } else if (from <= to) { @@ -1515,7 +1554,7 @@ m4_translit (struct obstack *obs, int argc, token_data **argv) return; } - if (argc == 4) + if (argc >= 4) { to = ARG (3); if (strchr (to, '-') != NULL) diff --git a/src/eval.c b/src/eval.c index 93def844..dc723f92 100644 --- a/src/eval.c +++ b/src/eval.c @@ -161,10 +161,19 @@ eval_lex (eval_t *val) else break; - if (digit >= base) + if (base == 1) + { + if (digit == 1) + (*val)++; + else if (digit == 0 && !*val) + continue; + else + break; + } + else if (digit >= base) break; - - (*val) = (*val) * base + digit; + else + (*val) = (*val) * base + digit; } return NUMBER; } -- 2.11.4.GIT