maint: use mbszero
[coreutils.git] / src / printf.c
blob16ea1c17f1955f9b55375a28832a6f0af935eb84
1 /* printf - format and print data
2 Copyright (C) 1990-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Usage: printf format [argument...]
19 A front end to the printf function that lets it be used from the shell.
21 Backslash escapes:
23 \" = double quote
24 \\ = backslash
25 \a = alert (bell)
26 \b = backspace
27 \c = produce no further output
28 \e = escape
29 \f = form feed
30 \n = new line
31 \r = carriage return
32 \t = horizontal tab
33 \v = vertical tab
34 \ooo = octal number (ooo is 1 to 3 digits)
35 \xhh = hexadecimal number (hhh is 1 to 2 digits)
36 \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37 \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
39 Additional directive:
41 %b = print an argument string, interpreting backslash escapes,
42 except that octal escapes are of the form \0 or \0ooo.
44 %q = print an argument string in a format that can be
45 reused as shell input. Escaped characters used the proposed
46 POSIX $'' syntax supported by most shells.
48 The 'format' argument is re-used as many times as necessary
49 to convert all of the given arguments.
51 David MacKenzie <djm@gnu.ai.mit.edu> */
53 #include <config.h>
54 #include <stdio.h>
55 #include <sys/types.h>
56 #include <wchar.h>
58 #include "system.h"
59 #include "cl-strtod.h"
60 #include "quote.h"
61 #include "unicodeio.h"
62 #include "xprintf.h"
64 /* The official name of this program (e.g., no 'g' prefix). */
65 #define PROGRAM_NAME "printf"
67 #define AUTHORS proper_name ("David MacKenzie")
69 #define isodigit(c) ((c) >= '0' && (c) <= '7')
70 #define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \
71 (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0')
72 #define octtobin(c) ((c) - '0')
74 /* The value to return to the calling program. */
75 static int exit_status;
77 /* True if the POSIXLY_CORRECT environment variable is set. */
78 static bool posixly_correct;
80 /* This message appears in N_() here rather than just in _() below because
81 the sole use would have been in a #define. */
82 static char const *const cfcc_msg =
83 N_("warning: %s: character(s) following character constant have been ignored");
85 void
86 usage (int status)
88 if (status != EXIT_SUCCESS)
89 emit_try_help ();
90 else
92 printf (_("\
93 Usage: %s FORMAT [ARGUMENT]...\n\
94 or: %s OPTION\n\
95 "),
96 program_name, program_name);
97 fputs (_("\
98 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
99 \n\
100 "), stdout);
101 fputs (HELP_OPTION_DESCRIPTION, stdout);
102 fputs (VERSION_OPTION_DESCRIPTION, stdout);
103 fputs (_("\
105 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
107 \\\" double quote\n\
108 "), stdout);
109 fputs (_("\
110 \\\\ backslash\n\
111 \\a alert (BEL)\n\
112 \\b backspace\n\
113 \\c produce no further output\n\
114 \\e escape\n\
115 \\f form feed\n\
116 \\n new line\n\
117 \\r carriage return\n\
118 \\t horizontal tab\n\
119 \\v vertical tab\n\
120 "), stdout);
121 fputs (_("\
122 \\NNN byte with octal value NNN (1 to 3 digits)\n\
123 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
124 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
125 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
126 "), stdout);
127 fputs (_("\
128 %% a single %\n\
129 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
130 except that octal escapes are of the form \\0 or \\0NNN\n\
131 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
132 escaping non-printable characters with the proposed POSIX $'' syntax.\
133 \n\n\
134 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
135 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
136 "), stdout);
137 printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
138 emit_ancillary_info (PROGRAM_NAME);
140 exit (status);
143 static void
144 verify_numeric (char const *s, char const *end)
146 if (errno)
148 error (0, errno, "%s", quote (s));
149 exit_status = EXIT_FAILURE;
151 else if (*end)
153 if (s == end)
154 error (0, 0, _("%s: expected a numeric value"), quote (s));
155 else
156 error (0, 0, _("%s: value not completely converted"), quote (s));
157 exit_status = EXIT_FAILURE;
161 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
162 static TYPE \
163 FUNC_NAME (char const *s) \
165 char *end; \
166 TYPE val; \
168 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
170 unsigned char ch = *++s; \
171 val = ch; \
173 if (MB_CUR_MAX > 1 && *(s + 1)) \
175 mbstate_t mbstate; mbszero (&mbstate); \
176 wchar_t wc; \
177 size_t slen = strlen (s); \
178 ssize_t bytes; \
179 bytes = mbrtowc (&wc, s, slen, &mbstate); \
180 if (0 < bytes) \
182 val = wc; \
183 s += bytes - 1; \
187 /* If POSIXLY_CORRECT is not set, then give a warning that there \
188 are characters following the character constant and that GNU \
189 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
190 set, then don't give the warning. */ \
191 if (*++s != 0 && !posixly_correct) \
192 error (0, 0, _(cfcc_msg), s); \
194 else \
196 errno = 0; \
197 val = (LIB_FUNC_EXPR); \
198 verify_numeric (s, end); \
200 return val; \
203 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
204 STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
205 STRTOX (long double, vstrtold, cl_strtold (s, &end))
207 /* Output a single-character \ escape. */
209 static void
210 print_esc_char (char c)
212 switch (c)
214 case 'a': /* Alert. */
215 putchar ('\a');
216 break;
217 case 'b': /* Backspace. */
218 putchar ('\b');
219 break;
220 case 'c': /* Cancel the rest of the output. */
221 exit (EXIT_SUCCESS);
222 break;
223 case 'e': /* Escape. */
224 putchar ('\x1B');
225 break;
226 case 'f': /* Form feed. */
227 putchar ('\f');
228 break;
229 case 'n': /* New line. */
230 putchar ('\n');
231 break;
232 case 'r': /* Carriage return. */
233 putchar ('\r');
234 break;
235 case 't': /* Horizontal tab. */
236 putchar ('\t');
237 break;
238 case 'v': /* Vertical tab. */
239 putchar ('\v');
240 break;
241 default:
242 putchar (c);
243 break;
247 /* Print a \ escape sequence starting at ESCSTART.
248 Return the number of characters in the escape sequence
249 besides the backslash.
250 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
251 is an octal digit; otherwise they are of the form \ooo. */
253 static int
254 print_esc (char const *escstart, bool octal_0)
256 char const *p = escstart + 1;
257 int esc_value = 0; /* Value of \nnn escape. */
258 int esc_length; /* Length of \nnn escape. */
260 if (*p == 'x')
262 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
263 for (esc_length = 0, ++p;
264 esc_length < 2 && isxdigit (to_uchar (*p));
265 ++esc_length, ++p)
266 esc_value = esc_value * 16 + hextobin (*p);
267 if (esc_length == 0)
268 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
269 putchar (esc_value);
271 else if (isodigit (*p))
273 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
274 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
275 extension to POSIX that is compatible with Bash 2.05b. */
276 for (esc_length = 0, p += octal_0 && *p == '0';
277 esc_length < 3 && isodigit (*p);
278 ++esc_length, ++p)
279 esc_value = esc_value * 8 + octtobin (*p);
280 putchar (esc_value);
282 else if (*p && strchr ("\"\\abcefnrtv", *p))
283 print_esc_char (*p++);
284 else if (*p == 'u' || *p == 'U')
286 char esc_char = *p;
287 unsigned int uni_value;
289 uni_value = 0;
290 for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
291 esc_length > 0;
292 --esc_length, ++p)
294 if (! isxdigit (to_uchar (*p)))
295 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
296 uni_value = uni_value * 16 + hextobin (*p);
299 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
300 Note print_unicode_char() would print the literal \u.. in this case. */
301 if (uni_value >= 0xd800 && uni_value <= 0xdfff)
302 error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
303 esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
305 print_unicode_char (stdout, uni_value, 0);
307 else
309 putchar ('\\');
310 if (*p)
312 putchar (*p);
313 p++;
316 return p - escstart - 1;
319 /* Print string STR, evaluating \ escapes. */
321 static void
322 print_esc_string (char const *str)
324 for (; *str; str++)
325 if (*str == '\\')
326 str += print_esc (str, true);
327 else
328 putchar (*str);
331 /* Evaluate a printf conversion specification. START is the start of
332 the directive, LENGTH is its length, and CONVERSION specifies the
333 type of conversion. LENGTH does not include any length modifier or
334 the conversion specifier itself. FIELD_WIDTH and PRECISION are the
335 field width and precision for '*' values, if HAVE_FIELD_WIDTH and
336 HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
337 be formatted. */
339 static void
340 print_direc (char const *start, size_t length, char conversion,
341 bool have_field_width, int field_width,
342 bool have_precision, int precision,
343 char const *argument)
345 char *p; /* Null-terminated copy of % directive. */
347 /* Create a null-terminated copy of the % directive, with an
348 intmax_t-wide length modifier substituted for any existing
349 integer length modifier. */
351 char *q;
352 char const *length_modifier;
353 size_t length_modifier_len;
355 switch (conversion)
357 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
358 length_modifier = "j";
359 length_modifier_len = 1;
360 break;
362 case 'a': case 'e': case 'f': case 'g':
363 case 'A': case 'E': case 'F': case 'G':
364 length_modifier = "L";
365 length_modifier_len = 1;
366 break;
368 default:
369 length_modifier = start; /* Any valid pointer will do. */
370 length_modifier_len = 0;
371 break;
374 p = xmalloc (length + length_modifier_len + 2);
375 q = mempcpy (p, start, length);
376 q = mempcpy (q, length_modifier, length_modifier_len);
377 *q++ = conversion;
378 *q = '\0';
381 switch (conversion)
383 case 'd':
384 case 'i':
386 intmax_t arg = vstrtoimax (argument);
387 if (!have_field_width)
389 if (!have_precision)
390 xprintf (p, arg);
391 else
392 xprintf (p, precision, arg);
394 else
396 if (!have_precision)
397 xprintf (p, field_width, arg);
398 else
399 xprintf (p, field_width, precision, arg);
402 break;
404 case 'o':
405 case 'u':
406 case 'x':
407 case 'X':
409 uintmax_t arg = vstrtoumax (argument);
410 if (!have_field_width)
412 if (!have_precision)
413 xprintf (p, arg);
414 else
415 xprintf (p, precision, arg);
417 else
419 if (!have_precision)
420 xprintf (p, field_width, arg);
421 else
422 xprintf (p, field_width, precision, arg);
425 break;
427 case 'a':
428 case 'A':
429 case 'e':
430 case 'E':
431 case 'f':
432 case 'F':
433 case 'g':
434 case 'G':
436 long double arg = vstrtold (argument);
437 if (!have_field_width)
439 if (!have_precision)
440 xprintf (p, arg);
441 else
442 xprintf (p, precision, arg);
444 else
446 if (!have_precision)
447 xprintf (p, field_width, arg);
448 else
449 xprintf (p, field_width, precision, arg);
452 break;
454 case 'c':
455 if (!have_field_width)
456 xprintf (p, *argument);
457 else
458 xprintf (p, field_width, *argument);
459 break;
461 case 's':
462 if (!have_field_width)
464 if (!have_precision)
465 xprintf (p, argument);
466 else
467 xprintf (p, precision, argument);
469 else
471 if (!have_precision)
472 xprintf (p, field_width, argument);
473 else
474 xprintf (p, field_width, precision, argument);
476 break;
479 free (p);
482 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
483 arguments to any '%' directives.
484 Return the number of elements of ARGV used. */
486 static int
487 print_formatted (char const *format, int argc, char **argv)
489 int save_argc = argc; /* Preserve original value. */
490 char const *f; /* Pointer into 'format'. */
491 char const *direc_start; /* Start of % directive. */
492 size_t direc_length; /* Length of % directive. */
493 bool have_field_width; /* True if FIELD_WIDTH is valid. */
494 int field_width = 0; /* Arg to first '*'. */
495 bool have_precision; /* True if PRECISION is valid. */
496 int precision = 0; /* Arg to second '*'. */
497 char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
499 for (f = format; *f; ++f)
501 switch (*f)
503 case '%':
504 direc_start = f++;
505 direc_length = 1;
506 have_field_width = have_precision = false;
507 if (*f == '%')
509 putchar ('%');
510 break;
512 if (*f == 'b')
514 /* FIXME: Field width and precision are not supported
515 for %b, even though POSIX requires it. */
516 if (argc > 0)
518 print_esc_string (*argv);
519 ++argv;
520 --argc;
522 break;
525 if (*f == 'q')
527 if (argc > 0)
529 fputs (quotearg_style (shell_escape_quoting_style, *argv),
530 stdout);
531 ++argv;
532 --argc;
534 break;
537 memset (ok, 0, sizeof ok);
538 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
539 ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
540 ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
542 for (;; f++, direc_length++)
543 switch (*f)
545 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
546 case 'I':
547 #endif
548 case '\'':
549 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
550 ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
551 break;
552 case '-': case '+': case ' ':
553 break;
554 case '#':
555 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
556 break;
557 case '0':
558 ok['c'] = ok['s'] = 0;
559 break;
560 default:
561 goto no_more_flag_characters;
563 no_more_flag_characters:
565 if (*f == '*')
567 ++f;
568 ++direc_length;
569 if (argc > 0)
571 intmax_t width = vstrtoimax (*argv);
572 if (INT_MIN <= width && width <= INT_MAX)
573 field_width = width;
574 else
575 error (EXIT_FAILURE, 0, _("invalid field width: %s"),
576 quote (*argv));
577 ++argv;
578 --argc;
580 else
581 field_width = 0;
582 have_field_width = true;
584 else
585 while (ISDIGIT (*f))
587 ++f;
588 ++direc_length;
590 if (*f == '.')
592 ++f;
593 ++direc_length;
594 ok['c'] = 0;
595 if (*f == '*')
597 ++f;
598 ++direc_length;
599 if (argc > 0)
601 intmax_t prec = vstrtoimax (*argv);
602 if (prec < 0)
604 /* A negative precision is taken as if the
605 precision were omitted, so -1 is safe
606 here even if prec < INT_MIN. */
607 precision = -1;
609 else if (INT_MAX < prec)
610 error (EXIT_FAILURE, 0, _("invalid precision: %s"),
611 quote (*argv));
612 else
613 precision = prec;
614 ++argv;
615 --argc;
617 else
618 precision = 0;
619 have_precision = true;
621 else
622 while (ISDIGIT (*f))
624 ++f;
625 ++direc_length;
629 while (*f == 'l' || *f == 'L' || *f == 'h'
630 || *f == 'j' || *f == 't' || *f == 'z')
631 ++f;
634 unsigned char conversion = *f;
635 int speclen = MIN (f + 1 - direc_start, INT_MAX);
636 if (! ok[conversion])
637 error (EXIT_FAILURE, 0,
638 _("%.*s: invalid conversion specification"),
639 speclen, direc_start);
642 print_direc (direc_start, direc_length, *f,
643 have_field_width, field_width,
644 have_precision, precision,
645 (argc <= 0 ? "" : (argc--, *argv++)));
646 break;
648 case '\\':
649 f += print_esc (f, false);
650 break;
652 default:
653 putchar (*f);
657 return save_argc - argc;
661 main (int argc, char **argv)
663 char *format;
664 int args_used;
666 initialize_main (&argc, &argv);
667 set_program_name (argv[0]);
668 setlocale (LC_ALL, "");
669 bindtextdomain (PACKAGE, LOCALEDIR);
670 textdomain (PACKAGE);
672 atexit (close_stdout);
674 exit_status = EXIT_SUCCESS;
676 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
678 /* We directly parse options, rather than use parse_long_options, in
679 order to avoid accepting abbreviations. */
680 if (argc == 2)
682 if (STREQ (argv[1], "--help"))
683 usage (EXIT_SUCCESS);
685 if (STREQ (argv[1], "--version"))
687 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
688 (char *) nullptr);
689 return EXIT_SUCCESS;
693 /* The above handles --help and --version.
694 Since there is no other invocation of getopt, handle '--' here. */
695 if (1 < argc && STREQ (argv[1], "--"))
697 --argc;
698 ++argv;
701 if (argc <= 1)
703 error (0, 0, _("missing operand"));
704 usage (EXIT_FAILURE);
707 format = argv[1];
708 argc -= 2;
709 argv += 2;
713 args_used = print_formatted (format, argc, argv);
714 argc -= args_used;
715 argv += args_used;
717 while (args_used > 0 && argc > 0);
719 if (argc > 0)
720 error (0, 0,
721 _("warning: ignoring excess arguments, starting with %s"),
722 quote (argv[0]));
724 return exit_status;