wc: increase I/O size from 16 KiB to 256KiB
[coreutils.git] / src / numfmt.c
bloba9f9e81c80fe19fc2c98d96b783b5a5fc5efef2d
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <ctype.h>
19 #include <float.h>
20 #include <getopt.h>
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <langinfo.h>
25 #include "argmatch.h"
26 #include "c-ctype.h"
27 #include "mbswidth.h"
28 #include "quote.h"
29 #include "skipchars.h"
30 #include "system.h"
31 #include "xstrtol.h"
33 #include "set-fields.h"
35 #if HAVE_FPSETPREC
36 # include <ieeefp.h>
37 #endif
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "numfmt"
42 #define AUTHORS proper_name ("Assaf Gordon")
44 /* Exit code when some numbers fail to convert. */
45 enum { EXIT_CONVERSION_WARNINGS = 2 };
47 enum
49 FROM_OPTION = CHAR_MAX + 1,
50 FROM_UNIT_OPTION,
51 TO_OPTION,
52 TO_UNIT_OPTION,
53 ROUND_OPTION,
54 SUFFIX_OPTION,
55 GROUPING_OPTION,
56 PADDING_OPTION,
57 FIELD_OPTION,
58 DEBUG_OPTION,
59 DEV_DEBUG_OPTION,
60 HEADER_OPTION,
61 FORMAT_OPTION,
62 INVALID_OPTION
65 enum scale_type
67 scale_none, /* the default: no scaling. */
68 scale_auto, /* --from only. */
69 scale_SI,
70 scale_IEC,
71 scale_IEC_I /* 'i' suffix is required. */
74 static char const *const scale_from_args[] =
76 "none", "auto", "si", "iec", "iec-i", nullptr
79 static enum scale_type const scale_from_types[] =
81 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
84 static char const *const scale_to_args[] =
86 "none", "si", "iec", "iec-i", nullptr
89 static enum scale_type const scale_to_types[] =
91 scale_none, scale_SI, scale_IEC, scale_IEC_I
95 enum round_type
97 round_ceiling,
98 round_floor,
99 round_from_zero,
100 round_to_zero,
101 round_nearest,
104 static char const *const round_args[] =
106 "up", "down", "from-zero", "towards-zero", "nearest", nullptr
109 static enum round_type const round_types[] =
111 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
115 enum inval_type
117 inval_abort,
118 inval_fail,
119 inval_warn,
120 inval_ignore
123 static char const *const inval_args[] =
125 "abort", "fail", "warn", "ignore", nullptr
128 static enum inval_type const inval_types[] =
130 inval_abort, inval_fail, inval_warn, inval_ignore
133 static struct option const longopts[] =
135 {"from", required_argument, nullptr, FROM_OPTION},
136 {"from-unit", required_argument, nullptr, FROM_UNIT_OPTION},
137 {"to", required_argument, nullptr, TO_OPTION},
138 {"to-unit", required_argument, nullptr, TO_UNIT_OPTION},
139 {"round", required_argument, nullptr, ROUND_OPTION},
140 {"padding", required_argument, nullptr, PADDING_OPTION},
141 {"suffix", required_argument, nullptr, SUFFIX_OPTION},
142 {"grouping", no_argument, nullptr, GROUPING_OPTION},
143 {"delimiter", required_argument, nullptr, 'd'},
144 {"field", required_argument, nullptr, FIELD_OPTION},
145 {"debug", no_argument, nullptr, DEBUG_OPTION},
146 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
147 {"header", optional_argument, nullptr, HEADER_OPTION},
148 {"format", required_argument, nullptr, FORMAT_OPTION},
149 {"invalid", required_argument, nullptr, INVALID_OPTION},
150 {"zero-terminated", no_argument, nullptr, 'z'},
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {nullptr, 0, nullptr, 0}
156 /* If delimiter has this value, blanks separate fields. */
157 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
159 /* Maximum number of digits we can safely handle
160 without precision loss, if scaling is 'none'. */
161 enum { MAX_UNSCALED_DIGITS = LDBL_DIG };
163 /* Maximum number of digits we can work with.
164 This is equivalent to 999Q.
165 NOTE: 'long double' can handle more than that, but there's
166 no official suffix assigned beyond Quetta (1000^10). */
167 enum { MAX_ACCEPTABLE_DIGITS = 33 };
169 static enum scale_type scale_from = scale_none;
170 static enum scale_type scale_to = scale_none;
171 static enum round_type round_style = round_from_zero;
172 static enum inval_type inval_style = inval_abort;
173 static char const *suffix = nullptr;
174 static uintmax_t from_unit_size = 1;
175 static uintmax_t to_unit_size = 1;
176 static int grouping = 0;
177 static char *padding_buffer = nullptr;
178 static idx_t padding_buffer_size = 0;
179 static intmax_t padding_width = 0;
180 static int zero_padding_width = 0;
181 static long int user_precision = -1;
182 static char const *format_str = nullptr;
183 static char *format_str_prefix = nullptr;
184 static char *format_str_suffix = nullptr;
186 /* By default, any conversion error will terminate the program. */
187 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
190 /* auto-pad each line based on skipped whitespace. */
191 static int auto_padding = 0;
193 /* field delimiter */
194 static int delimiter = DELIMITER_DEFAULT;
196 /* line delimiter. */
197 static unsigned char line_delim = '\n';
199 /* if non-zero, the first 'header' lines from STDIN are skipped. */
200 static uintmax_t header = 0;
202 /* Debug for users: print warnings to STDERR about possible
203 error (similar to sort's debug). */
204 static bool debug;
206 /* will be set according to the current locale. */
207 static char const *decimal_point;
208 static int decimal_point_length;
210 /* debugging for developers. Enables devmsg(). */
211 static bool dev_debug = false;
214 static inline int
215 default_scale_base (enum scale_type scale)
217 switch (scale)
219 case scale_IEC:
220 case scale_IEC_I:
221 return 1024;
223 case scale_none:
224 case scale_auto:
225 case scale_SI:
226 default:
227 return 1000;
231 static char const zero_and_valid_suffixes[] = "0KkMGTPEZYRQ";
232 static char const *valid_suffixes = 1 + zero_and_valid_suffixes;
234 static inline bool
235 valid_suffix (const char suf)
237 return strchr (valid_suffixes, suf) != nullptr;
240 static inline int
241 suffix_power (const char suf)
243 switch (suf)
245 case 'k': /* kilo. */
246 case 'K': /* kilo or kibi. */
247 return 1;
249 case 'M': /* mega or mebi. */
250 return 2;
252 case 'G': /* giga or gibi. */
253 return 3;
255 case 'T': /* tera or tebi. */
256 return 4;
258 case 'P': /* peta or pebi. */
259 return 5;
261 case 'E': /* exa or exbi. */
262 return 6;
264 case 'Z': /* zetta or 2**70. */
265 return 7;
267 case 'Y': /* yotta or 2**80. */
268 return 8;
270 case 'R': /* ronna or 2**90. */
271 return 9;
273 case 'Q': /* quetta or 2**100. */
274 return 10;
276 default: /* should never happen. assert? */
277 return 0;
281 static inline char const *
282 suffix_power_char (int power)
284 switch (power)
286 case 0:
287 return "";
289 case 1:
290 return "K";
292 case 2:
293 return "M";
295 case 3:
296 return "G";
298 case 4:
299 return "T";
301 case 5:
302 return "P";
304 case 6:
305 return "E";
307 case 7:
308 return "Z";
310 case 8:
311 return "Y";
313 case 9:
314 return "R";
316 case 10:
317 return "Q";
319 default:
320 return "(error)";
324 /* Similar to 'powl(3)' but without requiring 'libm'. */
325 static long double
326 powerld (long double base, int x)
328 long double result = base;
329 if (x == 0)
330 return 1; /* note for test coverage: this is never
331 reached, as 'powerld' won't be called if
332 there's no suffix, hence, no "power". */
334 /* TODO: check for overflow, inf? */
335 while (--x)
336 result *= base;
337 return result;
340 /* Similar to 'fabs(3)' but without requiring 'libm'. */
341 static inline long double
342 absld (long double val)
344 return val < 0 ? -val : val;
347 /* Scale down 'val', returns 'updated val' and 'x', such that
348 val*base^X = original val
349 Similar to "frexpl(3)" but without requiring 'libm',
350 allowing only integer scale, limited functionality and error checking. */
351 static long double
352 expld (long double val, int base, int /*output */ *x)
354 int power = 0;
356 if (val >= -LDBL_MAX && val <= LDBL_MAX)
358 while (absld (val) >= base)
360 ++power;
361 val /= base;
364 if (x)
365 *x = power;
366 return val;
369 /* EXTREMELY limited 'ceil' - without 'libm'.
370 Assumes values that fit in intmax_t. */
371 static inline intmax_t
372 simple_round_ceiling (long double val)
374 intmax_t intval = val;
375 if (intval < val)
376 intval++;
377 return intval;
380 /* EXTREMELY limited 'floor' - without 'libm'.
381 Assumes values that fit in intmax_t. */
382 static inline intmax_t
383 simple_round_floor (long double val)
385 return -simple_round_ceiling (-val);
388 /* EXTREMELY limited 'round away from zero'.
389 Assumes values that fit in intmax_t. */
390 static inline intmax_t
391 simple_round_from_zero (long double val)
393 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
396 /* EXTREMELY limited 'round away to zero'.
397 Assumes values that fit in intmax_t. */
398 static inline intmax_t
399 simple_round_to_zero (long double val)
401 return val;
404 /* EXTREMELY limited 'round' - without 'libm'.
405 Assumes values that fit in intmax_t. */
406 static inline intmax_t
407 simple_round_nearest (long double val)
409 return val < 0 ? val - 0.5 : val + 0.5;
412 ATTRIBUTE_CONST
413 static inline long double
414 simple_round (long double val, enum round_type t)
416 intmax_t rval;
417 intmax_t intmax_mul = val / INTMAX_MAX;
418 val -= (long double) INTMAX_MAX * intmax_mul;
420 switch (t)
422 case round_ceiling:
423 rval = simple_round_ceiling (val);
424 break;
426 case round_floor:
427 rval = simple_round_floor (val);
428 break;
430 case round_from_zero:
431 rval = simple_round_from_zero (val);
432 break;
434 case round_to_zero:
435 rval = simple_round_to_zero (val);
436 break;
438 case round_nearest:
439 rval = simple_round_nearest (val);
440 break;
442 default:
443 /* to silence the compiler - this should never happen. */
444 return 0;
447 return (long double) INTMAX_MAX * intmax_mul + rval;
450 enum simple_strtod_error
452 SSE_OK = 0,
453 SSE_OK_PRECISION_LOSS,
454 SSE_OVERFLOW,
455 SSE_INVALID_NUMBER,
457 /* the following are returned by 'simple_strtod_human'. */
458 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
459 SSE_INVALID_SUFFIX,
460 SSE_MISSING_I_SUFFIX
463 /* Read an *integer* INPUT_STR,
464 but return the integer value in a 'long double' VALUE
465 hence, no UINTMAX_MAX limitation.
466 NEGATIVE is updated, and is stored separately from the VALUE
467 so that signbit() isn't required to determine the sign of -0..
468 ENDPTR is required (unlike strtod) and is used to store a pointer
469 to the character after the last character used in the conversion.
471 Note locale'd grouping is not supported,
472 nor is skipping of white-space supported.
474 Returns:
475 SSE_OK - valid number.
476 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
477 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
478 SSE_INVALID_NUMBER - if no digits were found. */
479 static enum simple_strtod_error
480 simple_strtod_int (char const *input_str,
481 char **endptr, long double *value, bool *negative)
483 enum simple_strtod_error e = SSE_OK;
485 long double val = 0;
486 int digits = 0;
487 bool found_digit = false;
489 if (*input_str == '-')
491 input_str++;
492 *negative = true;
494 else
495 *negative = false;
497 *endptr = (char *) input_str;
498 while (c_isdigit (**endptr))
500 int digit = (**endptr) - '0';
502 found_digit = true;
504 if (val || digit)
505 digits++;
507 if (digits > MAX_UNSCALED_DIGITS)
508 e = SSE_OK_PRECISION_LOSS;
510 if (digits > MAX_ACCEPTABLE_DIGITS)
511 return SSE_OVERFLOW;
513 val *= 10;
514 val += digit;
516 ++(*endptr);
518 if (! found_digit
519 && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length))
520 return SSE_INVALID_NUMBER;
521 if (*negative)
522 val = -val;
524 if (value)
525 *value = val;
527 return e;
530 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
531 and return the value in a 'long double' VALUE.
532 ENDPTR is required (unlike strtod) and is used to store a pointer
533 to the character after the last character used in the conversion.
534 PRECISION is optional and used to indicate fractions are present.
536 Note locale'd grouping is not supported,
537 nor is skipping of white-space supported.
539 Returns:
540 SSE_OK - valid number.
541 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
542 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
543 SSE_INVALID_NUMBER - if no digits were found. */
544 static enum simple_strtod_error
545 simple_strtod_float (char const *input_str,
546 char **endptr,
547 long double *value,
548 size_t *precision)
550 bool negative;
551 enum simple_strtod_error e = SSE_OK;
553 if (precision)
554 *precision = 0;
556 /* TODO: accept locale'd grouped values for the integral part. */
557 e = simple_strtod_int (input_str, endptr, value, &negative);
558 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
559 return e;
561 /* optional decimal point + fraction. */
562 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
564 char *ptr2;
565 long double val_frac = 0;
566 bool neg_frac;
568 (*endptr) += decimal_point_length;
569 enum simple_strtod_error e2 =
570 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
571 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
572 return e2;
573 if (e2 == SSE_OK_PRECISION_LOSS)
574 e = e2; /* propagate warning. */
575 if (neg_frac)
576 return SSE_INVALID_NUMBER;
578 /* number of digits in the fractions. */
579 size_t exponent = ptr2 - *endptr;
581 val_frac = ((long double) val_frac) / powerld (10, exponent);
583 /* TODO: detect loss of precision (only really 18 digits
584 of precision across all digits (before and after '.')). */
585 if (value)
587 if (negative)
588 *value -= val_frac;
589 else
590 *value += val_frac;
593 if (precision)
594 *precision = exponent;
596 *endptr = ptr2;
598 return e;
601 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
602 and return the value in a 'long double' VALUE,
603 with the precision of the input returned in PRECISION.
604 ENDPTR is required (unlike strtod) and is used to store a pointer
605 to the character after the last character used in the conversion.
606 ALLOWED_SCALING determines the scaling supported.
608 TODO:
609 support locale'd grouping
610 accept scientific and hex floats (probably use strtold directly)
612 Returns:
613 SSE_OK - valid number.
614 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
615 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
616 SSE_INVALID_NUMBER - if no digits were found.
617 SSE_VALID_BUT_FORBIDDEN_SUFFIX
618 SSE_INVALID_SUFFIX
619 SSE_MISSING_I_SUFFIX */
620 static enum simple_strtod_error
621 simple_strtod_human (char const *input_str,
622 char **endptr, long double *value, size_t *precision,
623 enum scale_type allowed_scaling)
625 int power = 0;
626 /* 'scale_auto' is checked below. */
627 int scale_base = default_scale_base (allowed_scaling);
629 devmsg ("simple_strtod_human:\n input string: %s\n"
630 " locale decimal-point: %s\n"
631 " MAX_UNSCALED_DIGITS: %d\n",
632 quote_n (0, input_str),
633 quote_n (1, decimal_point),
634 MAX_UNSCALED_DIGITS);
636 enum simple_strtod_error e =
637 simple_strtod_float (input_str, endptr, value, precision);
638 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
639 return e;
641 devmsg (" parsed numeric value: %Lf\n"
642 " input precision = %d\n", *value, (int)*precision);
644 if (**endptr != '\0')
646 /* process suffix. */
648 /* Skip any blanks between the number and suffix. */
649 while (isblank (to_uchar (**endptr)))
650 (*endptr)++;
652 if (!valid_suffix (**endptr))
653 return SSE_INVALID_SUFFIX;
655 if (allowed_scaling == scale_none)
656 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
658 power = suffix_power (**endptr);
659 (*endptr)++; /* skip first suffix character. */
661 if (allowed_scaling == scale_auto && **endptr == 'i')
663 /* auto-scaling enabled, and the first suffix character
664 is followed by an 'i' (e.g. Ki, Mi, Gi). */
665 scale_base = 1024;
666 (*endptr)++; /* skip second ('i') suffix character. */
667 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
668 scale_base);
671 *precision = 0; /* Reset, to select precision based on scale. */
674 if (allowed_scaling == scale_IEC_I)
676 if (**endptr == 'i')
677 (*endptr)++;
678 else
679 return SSE_MISSING_I_SUFFIX;
682 long double multiplier = powerld (scale_base, power);
684 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
686 /* TODO: detect loss of precision and overflows. */
687 (*value) = (*value) * multiplier;
689 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
691 return e;
695 static void
696 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
698 char const *msgid = nullptr;
700 switch (err)
702 case SSE_OK_PRECISION_LOSS:
703 case SSE_OK:
704 /* should never happen - this function isn't called when OK. */
705 unreachable ();
707 case SSE_OVERFLOW:
708 msgid = N_("value too large to be converted: %s");
709 break;
711 case SSE_INVALID_NUMBER:
712 msgid = N_("invalid number: %s");
713 break;
715 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
716 msgid = N_("rejecting suffix in input: %s (consider using --from)");
717 break;
719 case SSE_INVALID_SUFFIX:
720 msgid = N_("invalid suffix in input: %s");
721 break;
723 case SSE_MISSING_I_SUFFIX:
724 msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
725 break;
729 if (inval_style != inval_ignore)
730 error (conv_exit_code, 0, gettext (msgid), quote (input_str));
733 /* Convert VAL to a human format string using PRECISION in BUF of size
734 BUF_SIZE. Use SCALE, GROUP, and ROUND to format. Return
735 the number of bytes needed to represent VAL. If this number is not
736 less than BUF_SIZE, the buffer is too small; if it is negative, the
737 formatting failed for some reason. */
738 static int
739 double_to_human (long double val, int precision,
740 char *buf, idx_t buf_size,
741 enum scale_type scale, int group, enum round_type round)
743 char fmt[sizeof "%'0.*Lfi%s%s%s" + INT_STRLEN_BOUND (zero_padding_width)];
744 char *pfmt = fmt;
745 *pfmt++ = '%';
747 if (group)
748 *pfmt++ = '\'';
750 if (zero_padding_width)
751 pfmt += sprintf (pfmt, "0%d", zero_padding_width);
753 devmsg ("double_to_human:\n");
755 if (scale == scale_none)
757 val *= powerld (10, precision);
758 val = simple_round (val, round);
759 val /= powerld (10, precision);
761 devmsg ((group) ?
762 " no scaling, returning (grouped) value: %'.*Lf\n" :
763 " no scaling, returning value: %.*Lf\n", precision, val);
765 strcpy (pfmt, ".*Lf%s");
767 return snprintf (buf, buf_size, fmt, precision, val,
768 suffix ? suffix : "");
771 /* Scaling requested by user. */
772 double scale_base = default_scale_base (scale);
774 /* Normalize val to scale. */
775 int power = 0;
776 val = expld (val, scale_base, &power);
777 devmsg (" scaled value to %Lf * %0.f ^ %d\n", val, scale_base, power);
779 /* Perform rounding. */
780 int power_adjust = 0;
781 if (user_precision != -1)
782 power_adjust = MIN (power * 3, user_precision);
783 else if (absld (val) < 10)
785 /* for values less than 10, we allow one decimal-point digit,
786 so adjust before rounding. */
787 power_adjust = 1;
790 val *= powerld (10, power_adjust);
791 val = simple_round (val, round);
792 val /= powerld (10, power_adjust);
794 /* two special cases after rounding:
795 1. a "999.99" can turn into 1000 - so scale down
796 2. a "9.99" can turn into 10 - so don't display decimal-point. */
797 if (absld (val) >= scale_base)
799 val /= scale_base;
800 power++;
803 /* should "7.0" be printed as "7" ?
804 if removing the ".0" is preferred, enable the fourth condition. */
805 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
806 /* && (absld (val) > simple_round_floor (val))) */
808 devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power);
810 strcpy (pfmt, ".*Lf%s%s%s");
812 int prec = user_precision == -1 ? show_decimal_point : user_precision;
814 return snprintf (buf, buf_size, fmt, prec, val,
815 power == 1 && scale == scale_SI
816 ? "k" : suffix_power_char (power),
817 &"i"[! (scale == scale_IEC_I && 0 < power)],
818 suffix ? suffix : "");
821 /* Convert a string of decimal digits, N_STRING, with an optional suffix
822 to an integral value. Suffixes are handled as with --from=auto.
823 Upon successful conversion, return that value.
824 If it cannot be converted, give a diagnostic and exit. */
825 static uintmax_t
826 unit_to_umax (char const *n_string)
828 strtol_error s_err;
829 char const *c_string = n_string;
830 char *t_string = nullptr;
831 size_t n_len = strlen (n_string);
832 char *end = nullptr;
833 uintmax_t n;
834 char const *suffixes = valid_suffixes;
836 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
837 if (n_len && ! c_isdigit (n_string[n_len - 1]))
839 t_string = xmalloc (n_len + 2);
840 end = t_string + n_len - 1;
841 memcpy (t_string, n_string, n_len);
843 if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
844 *end = '\0';
845 else
847 *++end = 'B';
848 *++end = '\0';
849 suffixes = zero_and_valid_suffixes;
852 c_string = t_string;
855 s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
857 if (s_err != LONGINT_OK || *end || n == 0)
859 free (t_string);
860 error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
863 free (t_string);
865 return n;
868 void
869 usage (int status)
871 if (status != EXIT_SUCCESS)
872 emit_try_help ();
873 else
875 printf (_("\
876 Usage: %s [OPTION]... [NUMBER]...\n\
877 "), program_name);
878 fputs (_("\
879 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
880 "), stdout);
881 emit_mandatory_arg_note ();
882 fputs (_("\
883 --debug print warnings about invalid input\n\
884 "), stdout);
885 fputs (_("\
886 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
887 "), stdout);
888 fputs (_("\
889 --field=FIELDS replace the numbers in these input fields (default=1);\n\
890 see FIELDS below\n\
891 "), stdout);
892 fputs (_("\
893 --format=FORMAT use printf style floating-point FORMAT;\n\
894 see FORMAT below for details\n\
895 "), stdout);
896 fputs (_("\
897 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
898 see UNIT below\n\
899 "), stdout);
900 fputs (_("\
901 --from-unit=N specify the input unit size (instead of the default 1)\n\
902 "), stdout);
903 fputs (_("\
904 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
905 (which means it has no effect in the C/POSIX locale)\n\
906 "), stdout);
907 fputs (_("\
908 --header[=N] print (without converting) the first N header lines;\n\
909 N defaults to 1 if not specified\n\
910 "), stdout);
911 fputs (_("\
912 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
913 abort (default), fail, warn, ignore\n\
914 "), stdout);
915 fputs (_("\
916 --padding=N pad the output to N characters; positive N will\n\
917 right-align; negative N will left-align;\n\
918 padding is ignored if the output is wider than N;\n\
919 the default is to automatically pad if a whitespace\n\
920 is found\n\
921 "), stdout);
922 fputs (_("\
923 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
924 up, down, from-zero (default), towards-zero, nearest\n\
925 "), stdout);
926 fputs (_("\
927 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
928 SUFFIX in input numbers\n\
929 "), stdout);
930 fputs (_("\
931 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
932 "), stdout);
933 fputs (_("\
934 --to-unit=N the output unit size (instead of the default 1)\n\
935 "), stdout);
936 fputs (_("\
937 -z, --zero-terminated line delimiter is NUL, not newline\n\
938 "), stdout);
939 fputs (HELP_OPTION_DESCRIPTION, stdout);
940 fputs (VERSION_OPTION_DESCRIPTION, stdout);
942 fputs (_("\
944 UNIT options:\n"), stdout);
945 fputs (_("\
946 none no auto-scaling is done; suffixes will trigger an error\n\
947 "), stdout);
948 fputs (_("\
949 auto accept optional single/two letter suffix:\n\
950 1K = 1000, 1k = 1000,\n\
951 1Ki = 1024,\n\
952 1M = 1000000,\n\
953 1Mi = 1048576,\n"), stdout);
954 fputs (_("\
955 si accept optional single letter suffix:\n\
956 1k = 1000, 1K = 1000,\n\
957 1M = 1000000,\n\
958 ...\n"), stdout);
959 fputs (_("\
960 iec accept optional single letter suffix:\n\
961 1K = 1024, 1k = 1024,\n\
962 1M = 1048576,\n\
963 ...\n"), stdout);
964 fputs (_("\
965 iec-i accept optional two-letter suffix:\n\
966 1Ki = 1024, 1ki = 1024,\n\
967 1Mi = 1048576,\n\
968 ...\n"), stdout);
970 fputs (_("\n\
971 FIELDS supports cut(1) style field ranges:\n\
972 N N'th field, counted from 1\n\
973 N- from N'th field, to end of line\n\
974 N-M from N'th to M'th field (inclusive)\n\
975 -M from first to M'th field (inclusive)\n\
976 - all fields\n\
977 Multiple fields/ranges can be separated with commas\n\
978 "), stdout);
980 fputs (_("\n\
981 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
982 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
983 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
984 will zero pad the number. Optional negative values (%-10f) will left align.\n\
985 Optional precision (%.1f) will override the input determined precision.\n\
986 "), stdout);
988 printf (_("\n\
989 Exit status is 0 if all input numbers were successfully converted.\n\
990 By default, %s will stop at the first conversion error with exit status 2.\n\
991 With --invalid='fail' a warning is printed for each conversion error\n\
992 and the exit status is 2. With --invalid='warn' each conversion error is\n\
993 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
994 errors are not diagnosed and the exit status is 0.\n\
995 "), program_name);
997 printf (_("\n\
998 Examples:\n\
999 $ %s --to=si 1000\n\
1000 -> \"1.0k\"\n\
1001 $ %s --to=iec 2048\n\
1002 -> \"2.0K\"\n\
1003 $ %s --to=iec-i 4096\n\
1004 -> \"4.0Ki\"\n\
1005 $ echo 1K | %s --from=si\n\
1006 -> \"1000\"\n\
1007 $ echo 1K | %s --from=iec\n\
1008 -> \"1024\"\n\
1009 $ df -B1 | %s --header --field 2-4 --to=si\n\
1010 $ ls -l | %s --header --field 5 --to=iec\n\
1011 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1012 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1013 program_name, program_name, program_name,
1014 program_name, program_name, program_name,
1015 program_name, program_name, program_name);
1016 emit_ancillary_info (PROGRAM_NAME);
1018 exit (status);
1021 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1022 1. padding (e.g. %20f)
1023 2. alignment (e.g. %-20f)
1024 3. grouping (e.g. %'f)
1026 Only a limited subset of printf(3) syntax is supported.
1028 TODO:
1029 support %e %g etc. rather than just %f
1031 NOTES:
1032 1. This function sets the global variables:
1033 padding_width, grouping,
1034 format_str_prefix, format_str_suffix
1035 2. The function aborts on any errors. */
1036 static void
1037 parse_format_string (char const *fmt)
1039 size_t i;
1040 size_t prefix_len = 0;
1041 size_t suffix_pos;
1042 char *endptr = nullptr;
1043 bool zero_padding = false;
1045 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
1047 if (!fmt[i])
1048 error (EXIT_FAILURE, 0,
1049 _("format %s has no %% directive"), quote (fmt));
1050 prefix_len++;
1053 i++;
1054 while (true)
1056 size_t skip = strspn (fmt + i, " ");
1057 i += skip;
1058 if (fmt[i] == '\'')
1060 grouping = 1;
1061 i++;
1063 else if (fmt[i] == '0')
1065 zero_padding = true;
1066 i++;
1068 else if (! skip)
1069 break;
1072 intmax_t pad = strtoimax (fmt + i, &endptr, 10);
1074 if (pad != 0)
1076 if (debug && padding_width && !(zero_padding && pad > 0))
1077 error (0, 0, _("--format padding overriding --padding"));
1079 /* Set padding width and alignment. On overflow, set widths to
1080 large values that cause later code to avoid undefined behavior
1081 and fail at a reasonable point. */
1082 if (pad < 0)
1083 padding_width = pad;
1084 else
1086 if (zero_padding)
1087 zero_padding_width = MIN (pad, INT_MAX);
1088 else
1089 padding_width = pad;
1092 i = endptr - fmt;
1094 if (fmt[i] == '\0')
1095 error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
1097 if (fmt[i] == '.')
1099 i++;
1100 errno = 0;
1101 user_precision = strtol (fmt + i, &endptr, 10);
1102 if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
1103 || isblank (fmt[i]) || fmt[i] == '+')
1105 /* Note we disallow negative user_precision to be
1106 consistent with printf(1). POSIX states that
1107 negative precision is only supported (and ignored)
1108 when used with '.*f'. glibc at least will malform
1109 output when passed a direct negative precision. */
1110 error (EXIT_FAILURE, 0,
1111 _("invalid precision in format %s"), quote (fmt));
1113 i = endptr - fmt;
1116 if (fmt[i] != 'f')
1117 error (EXIT_FAILURE, 0, _("invalid format %s,"
1118 " directive must be %%[0]['][-][N][.][N]f"),
1119 quote (fmt));
1120 i++;
1121 suffix_pos = i;
1123 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
1124 if (fmt[i] == '%' && fmt[i + 1] != '%')
1125 error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
1126 quote (fmt));
1128 if (prefix_len)
1129 format_str_prefix = ximemdup0 (fmt, prefix_len);
1130 if (fmt[suffix_pos] != '\0')
1131 format_str_suffix = xstrdup (fmt + suffix_pos);
1133 devmsg ("format String:\n input: %s\n grouping: %s\n"
1134 " padding width: %jd\n"
1135 " prefix: %s\n suffix: %s\n",
1136 quote_n (0, fmt), (grouping) ? "yes" : "no",
1137 padding_width,
1138 quote_n (1, format_str_prefix ? format_str_prefix : ""),
1139 quote_n (2, format_str_suffix ? format_str_suffix : ""));
1142 /* Parse a numeric value (with optional suffix) from a string.
1143 Returns a long double value, with input precision.
1145 If there's an error converting the string to value - exits with
1146 an error.
1148 If there are any trailing characters after the number
1149 (besides a valid suffix) - exits with an error. */
1150 static enum simple_strtod_error
1151 parse_human_number (char const *str, long double /*output */ *value,
1152 size_t *precision)
1154 char *ptr = nullptr;
1156 enum simple_strtod_error e =
1157 simple_strtod_human (str, &ptr, value, precision, scale_from);
1158 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1160 simple_strtod_fatal (e, str);
1161 return e;
1164 if (ptr && *ptr != '\0')
1166 if (inval_style != inval_ignore)
1167 error (conv_exit_code, 0, _("invalid suffix in input %s: %s"),
1168 quote_n (0, str), quote_n (1, ptr));
1169 e = SSE_INVALID_SUFFIX;
1171 return e;
1175 /* Print the given VAL, using the requested representation.
1176 The number is printed to STDOUT, with padding and alignment. */
1177 static bool
1178 prepare_padded_number (const long double val, size_t precision,
1179 intmax_t *padding)
1181 /* Generate Output. */
1182 size_t precision_used = user_precision == -1 ? precision : user_precision;
1184 /* Can't reliably print too-large values without auto-scaling. */
1185 int x;
1186 expld (val, 10, &x);
1188 if (scale_to == scale_none
1189 && x + precision_used > MAX_UNSCALED_DIGITS)
1191 if (inval_style != inval_ignore)
1193 if (precision_used)
1194 error (conv_exit_code, 0,
1195 _("value/precision too large to be printed: '%Lg/%zu'"
1196 " (consider using --to)"), val, precision_used);
1197 else
1198 error (conv_exit_code, 0,
1199 _("value too large to be printed: '%Lg'"
1200 " (consider using --to)"), val);
1202 return false;
1205 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1207 if (inval_style != inval_ignore)
1208 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1209 " (cannot handle values > 999Q)"), val);
1210 return false;
1213 while (true)
1215 int numlen = double_to_human (val, precision_used,
1216 padding_buffer, padding_buffer_size,
1217 scale_to, grouping, round_style);
1218 ptrdiff_t growth;
1219 if (numlen < 0 || ckd_sub (&growth, numlen, padding_buffer_size - 1))
1220 error (EXIT_FAILURE, 0,
1221 _("failed to prepare value '%Lf' for printing"), val);
1222 if (growth <= 0)
1223 break;
1224 padding_buffer = xpalloc (padding_buffer, &padding_buffer_size,
1225 growth, -1, 1);
1228 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1229 val, quote (padding_buffer));
1231 intmax_t pad = 0;
1232 if (padding_width)
1234 int buf_width = mbswidth (padding_buffer,
1235 MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE);
1236 if (0 <= buf_width)
1238 if (padding_width < 0)
1240 if (padding_width < -buf_width)
1241 pad = padding_width + buf_width;
1243 else
1245 if (buf_width < padding_width)
1246 pad = padding_width - buf_width;
1251 *padding = pad;
1252 return true;
1255 static void
1256 print_padded_number (intmax_t padding)
1258 if (format_str_prefix)
1259 fputs (format_str_prefix, stdout);
1261 for (intmax_t p = padding; 0 < p; p--)
1262 putchar (' ');
1264 fputs (padding_buffer, stdout);
1266 for (intmax_t p = padding; p < 0; p++)
1267 putchar (' ');
1269 if (format_str_suffix)
1270 fputs (format_str_suffix, stdout);
1273 /* Converts the TEXT number string to the requested representation,
1274 and handles automatic suffix addition. */
1275 static int
1276 process_suffixed_number (char *text, long double *result,
1277 size_t *precision, long int field)
1279 if (suffix && strlen (text) > strlen (suffix))
1281 char *possible_suffix = text + strlen (text) - strlen (suffix);
1283 if (STREQ (suffix, possible_suffix))
1285 /* trim suffix, ONLY if it's at the end of the text. */
1286 *possible_suffix = '\0';
1287 devmsg ("trimming suffix %s\n", quote (suffix));
1289 else
1290 devmsg ("no valid suffix found\n");
1293 /* Skip white space - always. */
1294 char *p = text;
1295 while (*p && isblank (to_uchar (*p)))
1296 ++p;
1298 /* setup auto-padding. */
1299 if (auto_padding)
1301 padding_width = text < p || 1 < field ? strlen (text) : 0;
1302 devmsg ("setting Auto-Padding to %jd characters\n", padding_width);
1305 long double val = 0;
1306 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1307 if (e == SSE_OK_PRECISION_LOSS && debug)
1308 error (0, 0, _("large input value %s: possible precision loss"),
1309 quote (p));
1311 if (from_unit_size != 1 || to_unit_size != 1)
1312 val = (val * from_unit_size) / to_unit_size;
1314 *result = val;
1316 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1319 static bool
1320 newline_or_blank (mcel_t g)
1322 return g.ch == '\n' || c32isblank (g.ch);
1325 /* Return a pointer to the beginning of the next field in line.
1326 The line pointer is moved to the end of the next field. */
1327 static char*
1328 next_field (char **line)
1330 char *field_start = *line;
1331 char *field_end = field_start;
1333 if (delimiter != DELIMITER_DEFAULT)
1335 if (*field_start != delimiter)
1337 while (*field_end && *field_end != delimiter)
1338 ++field_end;
1340 /* else empty field */
1342 else
1344 /* keep any space prefix in the returned field */
1345 field_end = skip_str_matching (field_end, newline_or_blank, true);
1346 field_end = skip_str_matching (field_end, newline_or_blank, false);
1349 *line = field_end;
1350 return field_start;
1353 ATTRIBUTE_PURE
1354 static bool
1355 include_field (uintmax_t field)
1357 struct field_range_pair *p = frp;
1358 if (!p)
1359 return field == 1;
1361 while (p->lo != UINTMAX_MAX)
1363 if (p->lo <= field && p->hi >= field)
1364 return true;
1365 ++p;
1367 return false;
1370 /* Convert and output the given field. If it is not included in the set
1371 of fields to process just output the original */
1372 static bool
1373 process_field (char *text, uintmax_t field)
1375 long double val = 0;
1376 size_t precision = 0;
1377 bool valid_number = true;
1379 if (include_field (field))
1381 valid_number =
1382 process_suffixed_number (text, &val, &precision, field);
1384 intmax_t padding;
1385 if (valid_number)
1386 valid_number = prepare_padded_number (val, precision, &padding);
1388 if (valid_number)
1389 print_padded_number (padding);
1390 else
1391 fputs (text, stdout);
1393 else
1394 fputs (text, stdout);
1396 return valid_number;
1399 /* Convert number in a given line of text.
1400 NEWLINE specifies whether to output a '\n' for this "line". */
1401 static int
1402 process_line (char *line, bool newline)
1404 char *next;
1405 uintmax_t field = 0;
1406 bool valid_number = true;
1408 while (true) {
1409 ++field;
1410 next = next_field (&line);
1412 if (*line != '\0')
1414 /* nul terminate the current field string and process */
1415 *line = '\0';
1417 if (! process_field (next, field))
1418 valid_number = false;
1420 fputc ((delimiter == DELIMITER_DEFAULT) ?
1421 ' ' : delimiter, stdout);
1422 ++line;
1424 else
1426 /* end of the line, process the last field and finish */
1427 if (! process_field (next, field))
1428 valid_number = false;
1430 break;
1434 if (newline)
1435 putchar (line_delim);
1437 return valid_number;
1441 main (int argc, char **argv)
1443 int valid_numbers = 1;
1444 bool locale_ok;
1446 initialize_main (&argc, &argv);
1447 set_program_name (argv[0]);
1448 locale_ok = !!setlocale (LC_ALL, "");
1449 bindtextdomain (PACKAGE, LOCALEDIR);
1450 textdomain (PACKAGE);
1452 #if HAVE_FPSETPREC
1453 /* Enabled extended precision if needed. */
1454 fpsetprec (FP_PE);
1455 #endif
1457 decimal_point = nl_langinfo (RADIXCHAR);
1458 if (decimal_point == nullptr || strlen (decimal_point) == 0)
1459 decimal_point = ".";
1460 decimal_point_length = strlen (decimal_point);
1462 atexit (close_stdout);
1464 while (true)
1466 int c = getopt_long (argc, argv, "d:z", longopts, nullptr);
1468 if (c == -1)
1469 break;
1471 switch (c)
1473 case FROM_OPTION:
1474 scale_from = XARGMATCH ("--from", optarg,
1475 scale_from_args, scale_from_types);
1476 break;
1478 case FROM_UNIT_OPTION:
1479 from_unit_size = unit_to_umax (optarg);
1480 break;
1482 case TO_OPTION:
1483 scale_to =
1484 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1485 break;
1487 case TO_UNIT_OPTION:
1488 to_unit_size = unit_to_umax (optarg);
1489 break;
1491 case ROUND_OPTION:
1492 round_style = XARGMATCH ("--round", optarg, round_args, round_types);
1493 break;
1495 case GROUPING_OPTION:
1496 grouping = 1;
1497 break;
1499 case PADDING_OPTION:
1500 if (((xstrtoimax (optarg, nullptr, 10, &padding_width, "")
1501 & ~LONGINT_OVERFLOW)
1502 != LONGINT_OK)
1503 || padding_width == 0)
1504 error (EXIT_FAILURE, 0, _("invalid padding value %s"),
1505 quote (optarg));
1506 /* TODO: We probably want to apply a specific --padding
1507 to --header lines too. */
1508 break;
1510 case FIELD_OPTION:
1511 if (n_frp)
1512 error (EXIT_FAILURE, 0, _("multiple field specifications"));
1513 set_fields (optarg, SETFLD_ALLOW_DASH);
1514 break;
1516 case 'd':
1517 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1518 if (optarg[0] != '\0' && optarg[1] != '\0')
1519 error (EXIT_FAILURE, 0,
1520 _("the delimiter must be a single character"));
1521 delimiter = optarg[0];
1522 break;
1524 case 'z':
1525 line_delim = '\0';
1526 break;
1528 case SUFFIX_OPTION:
1529 suffix = optarg;
1530 break;
1532 case DEBUG_OPTION:
1533 debug = true;
1534 break;
1536 case DEV_DEBUG_OPTION:
1537 dev_debug = true;
1538 debug = true;
1539 break;
1541 case HEADER_OPTION:
1542 if (optarg)
1544 if (xstrtoumax (optarg, nullptr, 10, &header, "") != LONGINT_OK
1545 || header == 0)
1546 error (EXIT_FAILURE, 0, _("invalid header value %s"),
1547 quote (optarg));
1549 else
1551 header = 1;
1553 break;
1555 case FORMAT_OPTION:
1556 format_str = optarg;
1557 break;
1559 case INVALID_OPTION:
1560 inval_style = XARGMATCH ("--invalid", optarg,
1561 inval_args, inval_types);
1562 break;
1564 case_GETOPT_HELP_CHAR;
1565 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1567 default:
1568 usage (EXIT_FAILURE);
1572 if (format_str != nullptr && grouping)
1573 error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1575 if (debug && ! locale_ok)
1576 error (0, 0, _("failed to set locale"));
1578 /* Warn about no-op. */
1579 if (debug && scale_from == scale_none && scale_to == scale_none
1580 && !grouping && (padding_width == 0) && (format_str == nullptr))
1581 error (0, 0, _("no conversion option specified"));
1583 if (format_str)
1584 parse_format_string (format_str);
1586 if (grouping)
1588 if (scale_to != scale_none)
1589 error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1590 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1591 error (0, 0, _("grouping has no effect in this locale"));
1594 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1596 if (inval_style != inval_abort)
1597 conv_exit_code = 0;
1599 if (argc > optind)
1601 if (debug && header)
1602 error (0, 0, _("--header ignored with command-line input"));
1604 for (; optind < argc; optind++)
1605 valid_numbers &= process_line (argv[optind], true);
1607 else
1609 char *line = nullptr;
1610 size_t line_allocated = 0;
1611 ssize_t len;
1613 while (header-- && getdelim (&line, &line_allocated,
1614 line_delim, stdin) > 0)
1615 fputs (line, stdout);
1617 while ((len = getdelim (&line, &line_allocated,
1618 line_delim, stdin)) > 0)
1620 bool newline = line[len - 1] == line_delim;
1621 if (newline)
1622 line[len - 1] = '\0';
1623 valid_numbers &= process_line (line, newline);
1626 if (ferror (stdin))
1627 error (EXIT_FAILURE, errno, _("error reading input"));
1630 if (debug && !valid_numbers)
1631 error (0, 0, _("failed to convert some of the input numbers"));
1633 int exit_status = EXIT_SUCCESS;
1634 if (!valid_numbers
1635 && inval_style != inval_warn && inval_style != inval_ignore)
1636 exit_status = EXIT_CONVERSION_WARNINGS;
1638 main_exit (exit_status);