doc: sort: give example for sorting on the last field
[coreutils.git] / src / printf.c
blob9be4f2337be67281614ec78745c0f08a6194fc4c
1 /* printf - format and print data
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Usage: printf format [argument...]
19 A front end to the printf function that lets it be used from the shell.
21 Backslash escapes:
23 \" = double quote
24 \\ = backslash
25 \a = alert (bell)
26 \b = backspace
27 \c = produce no further output
28 \e = escape
29 \f = form feed
30 \n = new line
31 \r = carriage return
32 \t = horizontal tab
33 \v = vertical tab
34 \ooo = octal number (ooo is 1 to 3 digits)
35 \xhh = hexadecimal number (hhh is 1 to 2 digits)
36 \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37 \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
39 Additional directive:
41 %b = print an argument string, interpreting backslash escapes,
42 except that octal escapes are of the form \0 or \0ooo.
44 %q = print an argument string in a format that can be
45 reused as shell input. Escaped characters used the proposed
46 POSIX $'' syntax supported by most shells.
48 The 'format' argument is re-used as many times as necessary
49 to convert all of the given arguments.
51 David MacKenzie <djm@gnu.ai.mit.edu> */
53 #include <config.h>
54 #include <stdio.h>
55 #include <sys/types.h>
56 #include <wchar.h>
58 #include "system.h"
59 #include "c-ctype.h"
60 #include "cl-strtod.h"
61 #include "quote.h"
62 #include "unicodeio.h"
63 #include "xprintf.h"
65 /* The official name of this program (e.g., no 'g' prefix). */
66 #define PROGRAM_NAME "printf"
68 #define AUTHORS proper_name ("David MacKenzie")
70 #define isodigit(c) ((c) >= '0' && (c) <= '7')
71 #define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \
72 (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0')
73 #define octtobin(c) ((c) - '0')
75 /* The value to return to the calling program. */
76 static int exit_status;
78 /* True if the POSIXLY_CORRECT environment variable is set. */
79 static bool posixly_correct;
81 /* This message appears in N_() here rather than just in _() below because
82 the sole use would have been in a #define. */
83 static char const *const cfcc_msg =
84 N_("warning: %s: character(s) following character constant have been ignored");
86 void
87 usage (int status)
89 if (status != EXIT_SUCCESS)
90 emit_try_help ();
91 else
93 printf (_("\
94 Usage: %s FORMAT [ARGUMENT]...\n\
95 or: %s OPTION\n\
96 "),
97 program_name, program_name);
98 fputs (_("\
99 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
101 "), stdout);
102 fputs (HELP_OPTION_DESCRIPTION, stdout);
103 fputs (VERSION_OPTION_DESCRIPTION, stdout);
104 fputs (_("\
106 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
108 \\\" double quote\n\
109 "), stdout);
110 fputs (_("\
111 \\\\ backslash\n\
112 \\a alert (BEL)\n\
113 \\b backspace\n\
114 \\c produce no further output\n\
115 \\e escape\n\
116 \\f form feed\n\
117 \\n new line\n\
118 \\r carriage return\n\
119 \\t horizontal tab\n\
120 \\v vertical tab\n\
121 "), stdout);
122 fputs (_("\
123 \\NNN byte with octal value NNN (1 to 3 digits)\n\
124 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
125 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
126 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
127 "), stdout);
128 fputs (_("\
129 %% a single %\n\
130 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
131 except that octal escapes are of the form \\0 or \\0NNN\n\
132 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
133 escaping non-printable characters with the proposed POSIX $'' syntax.\
134 \n\n\
135 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
136 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
137 "), stdout);
138 printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
139 emit_ancillary_info (PROGRAM_NAME);
141 exit (status);
144 static void
145 verify_numeric (char const *s, char const *end)
147 if (errno)
149 error (0, errno, "%s", quote (s));
150 exit_status = EXIT_FAILURE;
152 else if (*end)
154 if (s == end)
155 error (0, 0, _("%s: expected a numeric value"), quote (s));
156 else
157 error (0, 0, _("%s: value not completely converted"), quote (s));
158 exit_status = EXIT_FAILURE;
162 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
163 static TYPE \
164 FUNC_NAME (char const *s) \
166 char *end; \
167 TYPE val; \
169 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
171 unsigned char ch = *++s; \
172 val = ch; \
174 if (MB_CUR_MAX > 1 && *(s + 1)) \
176 mbstate_t mbstate; mbszero (&mbstate); \
177 wchar_t wc; \
178 size_t slen = strlen (s); \
179 ssize_t bytes; \
180 /* Use mbrtowc not mbrtoc32, as per POSIX. */ \
181 bytes = mbrtowc (&wc, s, slen, &mbstate); \
182 if (0 < bytes) \
184 val = wc; \
185 s += bytes - 1; \
189 /* If POSIXLY_CORRECT is not set, then give a warning that there \
190 are characters following the character constant and that GNU \
191 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
192 set, then don't give the warning. */ \
193 if (*++s != 0 && !posixly_correct) \
194 error (0, 0, _(cfcc_msg), s); \
196 else \
198 errno = 0; \
199 val = (LIB_FUNC_EXPR); \
200 verify_numeric (s, end); \
202 return val; \
205 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
206 STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
207 STRTOX (long double, vstrtold, cl_strtold (s, &end))
209 /* Output a single-character \ escape. */
211 static void
212 print_esc_char (char c)
214 switch (c)
216 case 'a': /* Alert. */
217 putchar ('\a');
218 break;
219 case 'b': /* Backspace. */
220 putchar ('\b');
221 break;
222 case 'c': /* Cancel the rest of the output. */
223 exit (EXIT_SUCCESS);
224 break;
225 case 'e': /* Escape. */
226 putchar ('\x1B');
227 break;
228 case 'f': /* Form feed. */
229 putchar ('\f');
230 break;
231 case 'n': /* New line. */
232 putchar ('\n');
233 break;
234 case 'r': /* Carriage return. */
235 putchar ('\r');
236 break;
237 case 't': /* Horizontal tab. */
238 putchar ('\t');
239 break;
240 case 'v': /* Vertical tab. */
241 putchar ('\v');
242 break;
243 default:
244 putchar (c);
245 break;
249 /* Print a \ escape sequence starting at ESCSTART.
250 Return the number of characters in the escape sequence
251 besides the backslash.
252 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
253 is an octal digit; otherwise they are of the form \ooo. */
255 static int
256 print_esc (char const *escstart, bool octal_0)
258 char const *p = escstart + 1;
259 int esc_value = 0; /* Value of \nnn escape. */
260 int esc_length; /* Length of \nnn escape. */
262 if (*p == 'x')
264 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
265 for (esc_length = 0, ++p;
266 esc_length < 2 && c_isxdigit (to_uchar (*p));
267 ++esc_length, ++p)
268 esc_value = esc_value * 16 + hextobin (*p);
269 if (esc_length == 0)
270 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
271 putchar (esc_value);
273 else if (isodigit (*p))
275 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
276 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
277 extension to POSIX that is compatible with Bash 2.05b. */
278 for (esc_length = 0, p += octal_0 && *p == '0';
279 esc_length < 3 && isodigit (*p);
280 ++esc_length, ++p)
281 esc_value = esc_value * 8 + octtobin (*p);
282 putchar (esc_value);
284 else if (*p && strchr ("\"\\abcefnrtv", *p))
285 print_esc_char (*p++);
286 else if (*p == 'u' || *p == 'U')
288 char esc_char = *p;
289 unsigned int uni_value;
291 uni_value = 0;
292 for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
293 esc_length > 0;
294 --esc_length, ++p)
296 if (! c_isxdigit (to_uchar (*p)))
297 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
298 uni_value = uni_value * 16 + hextobin (*p);
301 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
302 Note print_unicode_char() would print the literal \u.. in this case. */
303 if (uni_value >= 0xd800 && uni_value <= 0xdfff)
304 error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
305 esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
307 print_unicode_char (stdout, uni_value, 0);
309 else
311 putchar ('\\');
312 if (*p)
314 putchar (*p);
315 p++;
318 return p - escstart - 1;
321 /* Print string STR, evaluating \ escapes. */
323 static void
324 print_esc_string (char const *str)
326 for (; *str; str++)
327 if (*str == '\\')
328 str += print_esc (str, true);
329 else
330 putchar (*str);
333 /* Evaluate a printf conversion specification. START is the start of
334 the directive, LENGTH is its length, and CONVERSION specifies the
335 type of conversion. LENGTH does not include any length modifier or
336 the conversion specifier itself. FIELD_WIDTH and PRECISION are the
337 field width and precision for '*' values, if HAVE_FIELD_WIDTH and
338 HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
339 be formatted. */
341 static void
342 print_direc (char const *start, size_t length, char conversion,
343 bool have_field_width, int field_width,
344 bool have_precision, int precision,
345 char const *argument)
347 char *p; /* Null-terminated copy of % directive. */
349 /* Create a null-terminated copy of the % directive, with an
350 intmax_t-wide length modifier substituted for any existing
351 integer length modifier. */
353 char *q;
354 char const *length_modifier;
355 size_t length_modifier_len;
357 switch (conversion)
359 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
360 length_modifier = "j";
361 length_modifier_len = 1;
362 break;
364 case 'a': case 'e': case 'f': case 'g':
365 case 'A': case 'E': case 'F': case 'G':
366 length_modifier = "L";
367 length_modifier_len = 1;
368 break;
370 default:
371 length_modifier = start; /* Any valid pointer will do. */
372 length_modifier_len = 0;
373 break;
376 p = xmalloc (length + length_modifier_len + 2);
377 q = mempcpy (p, start, length);
378 q = mempcpy (q, length_modifier, length_modifier_len);
379 *q++ = conversion;
380 *q = '\0';
383 switch (conversion)
385 case 'd':
386 case 'i':
388 intmax_t arg = vstrtoimax (argument);
389 if (!have_field_width)
391 if (!have_precision)
392 xprintf (p, arg);
393 else
394 xprintf (p, precision, arg);
396 else
398 if (!have_precision)
399 xprintf (p, field_width, arg);
400 else
401 xprintf (p, field_width, precision, arg);
404 break;
406 case 'o':
407 case 'u':
408 case 'x':
409 case 'X':
411 uintmax_t arg = vstrtoumax (argument);
412 if (!have_field_width)
414 if (!have_precision)
415 xprintf (p, arg);
416 else
417 xprintf (p, precision, arg);
419 else
421 if (!have_precision)
422 xprintf (p, field_width, arg);
423 else
424 xprintf (p, field_width, precision, arg);
427 break;
429 case 'a':
430 case 'A':
431 case 'e':
432 case 'E':
433 case 'f':
434 case 'F':
435 case 'g':
436 case 'G':
438 long double arg = vstrtold (argument);
439 if (!have_field_width)
441 if (!have_precision)
442 xprintf (p, arg);
443 else
444 xprintf (p, precision, arg);
446 else
448 if (!have_precision)
449 xprintf (p, field_width, arg);
450 else
451 xprintf (p, field_width, precision, arg);
454 break;
456 case 'c':
457 if (!have_field_width)
458 xprintf (p, *argument);
459 else
460 xprintf (p, field_width, *argument);
461 break;
463 case 's':
464 if (!have_field_width)
466 if (!have_precision)
467 xprintf (p, argument);
468 else
469 xprintf (p, precision, argument);
471 else
473 if (!have_precision)
474 xprintf (p, field_width, argument);
475 else
476 xprintf (p, field_width, precision, argument);
478 break;
481 free (p);
484 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
485 arguments to any '%' directives.
486 Return the number of elements of ARGV used. */
488 static int
489 print_formatted (char const *format, int argc, char **argv)
491 int save_argc = argc; /* Preserve original value. */
492 char const *f; /* Pointer into 'format'. */
493 char const *direc_start; /* Start of % directive. */
494 size_t direc_length; /* Length of % directive. */
495 bool have_field_width; /* True if FIELD_WIDTH is valid. */
496 int field_width = 0; /* Arg to first '*'. */
497 bool have_precision; /* True if PRECISION is valid. */
498 int precision = 0; /* Arg to second '*'. */
499 char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
501 for (f = format; *f; ++f)
503 switch (*f)
505 case '%':
506 direc_start = f++;
507 direc_length = 1;
508 have_field_width = have_precision = false;
509 if (*f == '%')
511 putchar ('%');
512 break;
514 if (*f == 'b')
516 /* FIXME: Field width and precision are not supported
517 for %b, even though POSIX requires it. */
518 if (argc > 0)
520 print_esc_string (*argv);
521 ++argv;
522 --argc;
524 break;
527 if (*f == 'q')
529 if (argc > 0)
531 fputs (quotearg_style (shell_escape_quoting_style, *argv),
532 stdout);
533 ++argv;
534 --argc;
536 break;
539 memset (ok, 0, sizeof ok);
540 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
541 ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
542 ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
544 for (;; f++, direc_length++)
545 switch (*f)
547 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
548 case 'I':
549 #endif
550 case '\'':
551 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
552 ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
553 break;
554 case '-': case '+': case ' ':
555 break;
556 case '#':
557 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
558 break;
559 case '0':
560 ok['c'] = ok['s'] = 0;
561 break;
562 default:
563 goto no_more_flag_characters;
565 no_more_flag_characters:
567 if (*f == '*')
569 ++f;
570 ++direc_length;
571 if (argc > 0)
573 intmax_t width = vstrtoimax (*argv);
574 if (INT_MIN <= width && width <= INT_MAX)
575 field_width = width;
576 else
577 error (EXIT_FAILURE, 0, _("invalid field width: %s"),
578 quote (*argv));
579 ++argv;
580 --argc;
582 else
583 field_width = 0;
584 have_field_width = true;
586 else
587 while (ISDIGIT (*f))
589 ++f;
590 ++direc_length;
592 if (*f == '.')
594 ++f;
595 ++direc_length;
596 ok['c'] = 0;
597 if (*f == '*')
599 ++f;
600 ++direc_length;
601 if (argc > 0)
603 intmax_t prec = vstrtoimax (*argv);
604 if (prec < 0)
606 /* A negative precision is taken as if the
607 precision were omitted, so -1 is safe
608 here even if prec < INT_MIN. */
609 precision = -1;
611 else if (INT_MAX < prec)
612 error (EXIT_FAILURE, 0, _("invalid precision: %s"),
613 quote (*argv));
614 else
615 precision = prec;
616 ++argv;
617 --argc;
619 else
620 precision = 0;
621 have_precision = true;
623 else
624 while (ISDIGIT (*f))
626 ++f;
627 ++direc_length;
631 while (*f == 'l' || *f == 'L' || *f == 'h'
632 || *f == 'j' || *f == 't' || *f == 'z')
633 ++f;
636 unsigned char conversion = *f;
637 int speclen = MIN (f + 1 - direc_start, INT_MAX);
638 if (! ok[conversion])
639 error (EXIT_FAILURE, 0,
640 _("%.*s: invalid conversion specification"),
641 speclen, direc_start);
644 print_direc (direc_start, direc_length, *f,
645 have_field_width, field_width,
646 have_precision, precision,
647 (argc <= 0 ? "" : (argc--, *argv++)));
648 break;
650 case '\\':
651 f += print_esc (f, false);
652 break;
654 default:
655 putchar (*f);
659 return save_argc - argc;
663 main (int argc, char **argv)
665 char *format;
666 int args_used;
668 initialize_main (&argc, &argv);
669 set_program_name (argv[0]);
670 setlocale (LC_ALL, "");
671 bindtextdomain (PACKAGE, LOCALEDIR);
672 textdomain (PACKAGE);
674 atexit (close_stdout);
676 exit_status = EXIT_SUCCESS;
678 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
680 /* We directly parse options, rather than use parse_long_options, in
681 order to avoid accepting abbreviations. */
682 if (argc == 2)
684 if (STREQ (argv[1], "--help"))
685 usage (EXIT_SUCCESS);
687 if (STREQ (argv[1], "--version"))
689 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
690 (char *) nullptr);
691 return EXIT_SUCCESS;
695 /* The above handles --help and --version.
696 Since there is no other invocation of getopt, handle '--' here. */
697 if (1 < argc && STREQ (argv[1], "--"))
699 --argc;
700 ++argv;
703 if (argc <= 1)
705 error (0, 0, _("missing operand"));
706 usage (EXIT_FAILURE);
709 format = argv[1];
710 argc -= 2;
711 argv += 2;
715 args_used = print_formatted (format, argc, argv);
716 argc -= args_used;
717 argv += args_used;
719 while (args_used > 0 && argc > 0);
721 if (argc > 0)
722 error (0, 0,
723 _("warning: ignoring excess arguments, starting with %s"),
724 quote (argv[0]));
726 return exit_status;