numfmt: prefer signed types
[coreutils.git] / src / numfmt.c
blob51f69b0213963642def974657d4d88e5aec6d0d4
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <float.h>
19 #include <getopt.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <langinfo.h>
24 #include "mbsalign.h"
25 #include "argmatch.h"
26 #include "c-ctype.h"
27 #include "quote.h"
28 #include "system.h"
29 #include "xstrtol.h"
31 #include "set-fields.h"
33 #if HAVE_FPSETPREC
34 # include <ieeefp.h>
35 #endif
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "numfmt"
40 #define AUTHORS proper_name ("Assaf Gordon")
42 /* Exit code when some numbers fail to convert. */
43 enum { EXIT_CONVERSION_WARNINGS = 2 };
45 enum
47 FROM_OPTION = CHAR_MAX + 1,
48 FROM_UNIT_OPTION,
49 TO_OPTION,
50 TO_UNIT_OPTION,
51 ROUND_OPTION,
52 SUFFIX_OPTION,
53 GROUPING_OPTION,
54 PADDING_OPTION,
55 FIELD_OPTION,
56 DEBUG_OPTION,
57 DEV_DEBUG_OPTION,
58 HEADER_OPTION,
59 FORMAT_OPTION,
60 INVALID_OPTION
63 enum scale_type
65 scale_none, /* the default: no scaling. */
66 scale_auto, /* --from only. */
67 scale_SI,
68 scale_IEC,
69 scale_IEC_I /* 'i' suffix is required. */
72 static char const *const scale_from_args[] =
74 "none", "auto", "si", "iec", "iec-i", nullptr
77 static enum scale_type const scale_from_types[] =
79 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
82 static char const *const scale_to_args[] =
84 "none", "si", "iec", "iec-i", nullptr
87 static enum scale_type const scale_to_types[] =
89 scale_none, scale_SI, scale_IEC, scale_IEC_I
93 enum round_type
95 round_ceiling,
96 round_floor,
97 round_from_zero,
98 round_to_zero,
99 round_nearest,
102 static char const *const round_args[] =
104 "up", "down", "from-zero", "towards-zero", "nearest", nullptr
107 static enum round_type const round_types[] =
109 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
113 enum inval_type
115 inval_abort,
116 inval_fail,
117 inval_warn,
118 inval_ignore
121 static char const *const inval_args[] =
123 "abort", "fail", "warn", "ignore", nullptr
126 static enum inval_type const inval_types[] =
128 inval_abort, inval_fail, inval_warn, inval_ignore
131 static struct option const longopts[] =
133 {"from", required_argument, nullptr, FROM_OPTION},
134 {"from-unit", required_argument, nullptr, FROM_UNIT_OPTION},
135 {"to", required_argument, nullptr, TO_OPTION},
136 {"to-unit", required_argument, nullptr, TO_UNIT_OPTION},
137 {"round", required_argument, nullptr, ROUND_OPTION},
138 {"padding", required_argument, nullptr, PADDING_OPTION},
139 {"suffix", required_argument, nullptr, SUFFIX_OPTION},
140 {"grouping", no_argument, nullptr, GROUPING_OPTION},
141 {"delimiter", required_argument, nullptr, 'd'},
142 {"field", required_argument, nullptr, FIELD_OPTION},
143 {"debug", no_argument, nullptr, DEBUG_OPTION},
144 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
145 {"header", optional_argument, nullptr, HEADER_OPTION},
146 {"format", required_argument, nullptr, FORMAT_OPTION},
147 {"invalid", required_argument, nullptr, INVALID_OPTION},
148 {"zero-terminated", no_argument, nullptr, 'z'},
149 {GETOPT_HELP_OPTION_DECL},
150 {GETOPT_VERSION_OPTION_DECL},
151 {nullptr, 0, nullptr, 0}
154 /* If delimiter has this value, blanks separate fields. */
155 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
157 /* Maximum number of digits we can safely handle
158 without precision loss, if scaling is 'none'. */
159 enum { MAX_UNSCALED_DIGITS = LDBL_DIG };
161 /* Maximum number of digits we can work with.
162 This is equivalent to 999Q.
163 NOTE: 'long double' can handle more than that, but there's
164 no official suffix assigned beyond Quetta (1000^10). */
165 enum { MAX_ACCEPTABLE_DIGITS = 33 };
167 static enum scale_type scale_from = scale_none;
168 static enum scale_type scale_to = scale_none;
169 static enum round_type round_style = round_from_zero;
170 static enum inval_type inval_style = inval_abort;
171 static char const *suffix = nullptr;
172 static uintmax_t from_unit_size = 1;
173 static uintmax_t to_unit_size = 1;
174 static int grouping = 0;
175 static char *padding_buffer = nullptr;
176 static size_t padding_buffer_size = 0;
177 static long int padding_width = 0;
178 static long int zero_padding_width = 0;
179 static long int user_precision = -1;
180 static char const *format_str = nullptr;
181 static char *format_str_prefix = nullptr;
182 static char *format_str_suffix = nullptr;
184 /* By default, any conversion error will terminate the program. */
185 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
188 /* auto-pad each line based on skipped whitespace. */
189 static int auto_padding = 0;
190 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
192 /* field delimiter */
193 static int delimiter = DELIMITER_DEFAULT;
195 /* line delimiter. */
196 static unsigned char line_delim = '\n';
198 /* if non-zero, the first 'header' lines from STDIN are skipped. */
199 static uintmax_t header = 0;
201 /* Debug for users: print warnings to STDERR about possible
202 error (similar to sort's debug). */
203 static bool debug;
205 /* will be set according to the current locale. */
206 static char const *decimal_point;
207 static int decimal_point_length;
209 /* debugging for developers. Enables devmsg(). */
210 static bool dev_debug = false;
213 static inline int
214 default_scale_base (enum scale_type scale)
216 switch (scale)
218 case scale_IEC:
219 case scale_IEC_I:
220 return 1024;
222 case scale_none:
223 case scale_auto:
224 case scale_SI:
225 default:
226 return 1000;
230 static char const zero_and_valid_suffixes[] = "0KMGTPEZYRQ";
231 static char const *valid_suffixes = 1 + zero_and_valid_suffixes;
233 static inline bool
234 valid_suffix (const char suf)
236 return strchr (valid_suffixes, suf) != nullptr;
239 static inline int
240 suffix_power (const char suf)
242 switch (suf)
244 case 'K': /* kilo or kibi. */
245 return 1;
247 case 'M': /* mega or mebi. */
248 return 2;
250 case 'G': /* giga or gibi. */
251 return 3;
253 case 'T': /* tera or tebi. */
254 return 4;
256 case 'P': /* peta or pebi. */
257 return 5;
259 case 'E': /* exa or exbi. */
260 return 6;
262 case 'Z': /* zetta or 2**70. */
263 return 7;
265 case 'Y': /* yotta or 2**80. */
266 return 8;
268 case 'R': /* ronna or 2**90. */
269 return 9;
271 case 'Q': /* quetta or 2**100. */
272 return 10;
274 default: /* should never happen. assert? */
275 return 0;
279 static inline char const *
280 suffix_power_char (int power)
282 switch (power)
284 case 0:
285 return "";
287 case 1:
288 return "K";
290 case 2:
291 return "M";
293 case 3:
294 return "G";
296 case 4:
297 return "T";
299 case 5:
300 return "P";
302 case 6:
303 return "E";
305 case 7:
306 return "Z";
308 case 8:
309 return "Y";
311 case 9:
312 return "R";
314 case 10:
315 return "Q";
317 default:
318 return "(error)";
322 /* Similar to 'powl(3)' but without requiring 'libm'. */
323 static long double
324 powerld (long double base, int x)
326 long double result = base;
327 if (x == 0)
328 return 1; /* note for test coverage: this is never
329 reached, as 'powerld' won't be called if
330 there's no suffix, hence, no "power". */
332 /* TODO: check for overflow, inf? */
333 while (--x)
334 result *= base;
335 return result;
338 /* Similar to 'fabs(3)' but without requiring 'libm'. */
339 static inline long double
340 absld (long double val)
342 return val < 0 ? -val : val;
345 /* Scale down 'val', returns 'updated val' and 'x', such that
346 val*base^X = original val
347 Similar to "frexpl(3)" but without requiring 'libm',
348 allowing only integer scale, limited functionality and error checking. */
349 static long double
350 expld (long double val, int base, int /*output */ *x)
352 int power = 0;
354 if (val >= -LDBL_MAX && val <= LDBL_MAX)
356 while (absld (val) >= base)
358 ++power;
359 val /= base;
362 if (x)
363 *x = power;
364 return val;
367 /* EXTREMELY limited 'ceil' - without 'libm'.
368 Assumes values that fit in intmax_t. */
369 static inline intmax_t
370 simple_round_ceiling (long double val)
372 intmax_t intval = val;
373 if (intval < val)
374 intval++;
375 return intval;
378 /* EXTREMELY limited 'floor' - without 'libm'.
379 Assumes values that fit in intmax_t. */
380 static inline intmax_t
381 simple_round_floor (long double val)
383 return -simple_round_ceiling (-val);
386 /* EXTREMELY limited 'round away from zero'.
387 Assumes values that fit in intmax_t. */
388 static inline intmax_t
389 simple_round_from_zero (long double val)
391 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
394 /* EXTREMELY limited 'round away to zero'.
395 Assumes values that fit in intmax_t. */
396 static inline intmax_t
397 simple_round_to_zero (long double val)
399 return val;
402 /* EXTREMELY limited 'round' - without 'libm'.
403 Assumes values that fit in intmax_t. */
404 static inline intmax_t
405 simple_round_nearest (long double val)
407 return val < 0 ? val - 0.5 : val + 0.5;
410 ATTRIBUTE_CONST
411 static inline long double
412 simple_round (long double val, enum round_type t)
414 intmax_t rval;
415 intmax_t intmax_mul = val / INTMAX_MAX;
416 val -= (long double) INTMAX_MAX * intmax_mul;
418 switch (t)
420 case round_ceiling:
421 rval = simple_round_ceiling (val);
422 break;
424 case round_floor:
425 rval = simple_round_floor (val);
426 break;
428 case round_from_zero:
429 rval = simple_round_from_zero (val);
430 break;
432 case round_to_zero:
433 rval = simple_round_to_zero (val);
434 break;
436 case round_nearest:
437 rval = simple_round_nearest (val);
438 break;
440 default:
441 /* to silence the compiler - this should never happen. */
442 return 0;
445 return (long double) INTMAX_MAX * intmax_mul + rval;
448 enum simple_strtod_error
450 SSE_OK = 0,
451 SSE_OK_PRECISION_LOSS,
452 SSE_OVERFLOW,
453 SSE_INVALID_NUMBER,
455 /* the following are returned by 'simple_strtod_human'. */
456 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
457 SSE_INVALID_SUFFIX,
458 SSE_MISSING_I_SUFFIX
461 /* Read an *integer* INPUT_STR,
462 but return the integer value in a 'long double' VALUE
463 hence, no UINTMAX_MAX limitation.
464 NEGATIVE is updated, and is stored separately from the VALUE
465 so that signbit() isn't required to determine the sign of -0..
466 ENDPTR is required (unlike strtod) and is used to store a pointer
467 to the character after the last character used in the conversion.
469 Note locale'd grouping is not supported,
470 nor is skipping of white-space supported.
472 Returns:
473 SSE_OK - valid number.
474 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
475 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
476 SSE_INVALID_NUMBER - if no digits were found. */
477 static enum simple_strtod_error
478 simple_strtod_int (char const *input_str,
479 char **endptr, long double *value, bool *negative)
481 enum simple_strtod_error e = SSE_OK;
483 long double val = 0;
484 int digits = 0;
485 bool found_digit = false;
487 if (*input_str == '-')
489 input_str++;
490 *negative = true;
492 else
493 *negative = false;
495 *endptr = (char *) input_str;
496 while (c_isdigit (**endptr))
498 int digit = (**endptr) - '0';
500 found_digit = true;
502 if (val || digit)
503 digits++;
505 if (digits > MAX_UNSCALED_DIGITS)
506 e = SSE_OK_PRECISION_LOSS;
508 if (digits > MAX_ACCEPTABLE_DIGITS)
509 return SSE_OVERFLOW;
511 val *= 10;
512 val += digit;
514 ++(*endptr);
516 if (! found_digit
517 && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length))
518 return SSE_INVALID_NUMBER;
519 if (*negative)
520 val = -val;
522 if (value)
523 *value = val;
525 return e;
528 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
529 and return the value in a 'long double' VALUE.
530 ENDPTR is required (unlike strtod) and is used to store a pointer
531 to the character after the last character used in the conversion.
532 PRECISION is optional and used to indicate fractions are present.
534 Note locale'd grouping is not supported,
535 nor is skipping of white-space supported.
537 Returns:
538 SSE_OK - valid number.
539 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
540 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
541 SSE_INVALID_NUMBER - if no digits were found. */
542 static enum simple_strtod_error
543 simple_strtod_float (char const *input_str,
544 char **endptr,
545 long double *value,
546 size_t *precision)
548 bool negative;
549 enum simple_strtod_error e = SSE_OK;
551 if (precision)
552 *precision = 0;
554 /* TODO: accept locale'd grouped values for the integral part. */
555 e = simple_strtod_int (input_str, endptr, value, &negative);
556 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
557 return e;
559 /* optional decimal point + fraction. */
560 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
562 char *ptr2;
563 long double val_frac = 0;
564 bool neg_frac;
566 (*endptr) += decimal_point_length;
567 enum simple_strtod_error e2 =
568 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
569 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
570 return e2;
571 if (e2 == SSE_OK_PRECISION_LOSS)
572 e = e2; /* propagate warning. */
573 if (neg_frac)
574 return SSE_INVALID_NUMBER;
576 /* number of digits in the fractions. */
577 size_t exponent = ptr2 - *endptr;
579 val_frac = ((long double) val_frac) / powerld (10, exponent);
581 /* TODO: detect loss of precision (only really 18 digits
582 of precision across all digits (before and after '.')). */
583 if (value)
585 if (negative)
586 *value -= val_frac;
587 else
588 *value += val_frac;
591 if (precision)
592 *precision = exponent;
594 *endptr = ptr2;
596 return e;
599 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
600 and return the value in a 'long double' VALUE,
601 with the precision of the input returned in PRECISION.
602 ENDPTR is required (unlike strtod) and is used to store a pointer
603 to the character after the last character used in the conversion.
604 ALLOWED_SCALING determines the scaling supported.
606 TODO:
607 support locale'd grouping
608 accept scentific and hex floats (probably use strtold directly)
610 Returns:
611 SSE_OK - valid number.
612 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
613 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
614 SSE_INVALID_NUMBER - if no digits were found.
615 SSE_VALID_BUT_FORBIDDEN_SUFFIX
616 SSE_INVALID_SUFFIX
617 SSE_MISSING_I_SUFFIX */
618 static enum simple_strtod_error
619 simple_strtod_human (char const *input_str,
620 char **endptr, long double *value, size_t *precision,
621 enum scale_type allowed_scaling)
623 int power = 0;
624 /* 'scale_auto' is checked below. */
625 int scale_base = default_scale_base (allowed_scaling);
627 devmsg ("simple_strtod_human:\n input string: %s\n"
628 " locale decimal-point: %s\n"
629 " MAX_UNSCALED_DIGITS: %d\n",
630 quote_n (0, input_str),
631 quote_n (1, decimal_point),
632 MAX_UNSCALED_DIGITS);
634 enum simple_strtod_error e =
635 simple_strtod_float (input_str, endptr, value, precision);
636 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
637 return e;
639 devmsg (" parsed numeric value: %Lf\n"
640 " input precision = %d\n", *value, (int)*precision);
642 if (**endptr != '\0')
644 /* process suffix. */
646 /* Skip any blanks between the number and suffix. */
647 while (isblank (to_uchar (**endptr)))
648 (*endptr)++;
650 if (!valid_suffix (**endptr))
651 return SSE_INVALID_SUFFIX;
653 if (allowed_scaling == scale_none)
654 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
656 power = suffix_power (**endptr);
657 (*endptr)++; /* skip first suffix character. */
659 if (allowed_scaling == scale_auto && **endptr == 'i')
661 /* auto-scaling enabled, and the first suffix character
662 is followed by an 'i' (e.g. Ki, Mi, Gi). */
663 scale_base = 1024;
664 (*endptr)++; /* skip second ('i') suffix character. */
665 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
666 scale_base);
669 *precision = 0; /* Reset, to select precision based on scale. */
672 if (allowed_scaling == scale_IEC_I)
674 if (**endptr == 'i')
675 (*endptr)++;
676 else
677 return SSE_MISSING_I_SUFFIX;
680 long double multiplier = powerld (scale_base, power);
682 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
684 /* TODO: detect loss of precision and overflows. */
685 (*value) = (*value) * multiplier;
687 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
689 return e;
693 static void
694 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
696 char const *msgid = nullptr;
698 switch (err)
700 case SSE_OK_PRECISION_LOSS:
701 case SSE_OK:
702 /* should never happen - this function isn't called when OK. */
703 unreachable ();
705 case SSE_OVERFLOW:
706 msgid = N_("value too large to be converted: %s");
707 break;
709 case SSE_INVALID_NUMBER:
710 msgid = N_("invalid number: %s");
711 break;
713 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
714 msgid = N_("rejecting suffix in input: %s (consider using --from)");
715 break;
717 case SSE_INVALID_SUFFIX:
718 msgid = N_("invalid suffix in input: %s");
719 break;
721 case SSE_MISSING_I_SUFFIX:
722 msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
723 break;
727 if (inval_style != inval_ignore)
728 error (conv_exit_code, 0, gettext (msgid), quote (input_str));
731 /* Convert VAL to a human format string in BUF. */
732 static void
733 double_to_human (long double val, int precision,
734 char *buf, size_t buf_size,
735 enum scale_type scale, int group, enum round_type round)
737 int num_size;
738 char fmt[64];
739 static_assert ((INT_BUFSIZE_BOUND (zero_padding_width)
740 + INT_BUFSIZE_BOUND (precision)
741 + 10 /* for %.Lf etc. */)
742 < sizeof fmt);
744 char *pfmt = fmt;
745 *pfmt++ = '%';
747 if (group)
748 *pfmt++ = '\'';
750 if (zero_padding_width)
751 pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width);
753 devmsg ("double_to_human:\n");
755 if (scale == scale_none)
757 val *= powerld (10, precision);
758 val = simple_round (val, round);
759 val /= powerld (10, precision);
761 devmsg ((group) ?
762 " no scaling, returning (grouped) value: %'.*Lf\n" :
763 " no scaling, returning value: %.*Lf\n", precision, val);
765 stpcpy (pfmt, ".*Lf");
767 num_size = snprintf (buf, buf_size, fmt, precision, val);
768 if (num_size < 0 || num_size >= (int) buf_size)
769 error (EXIT_FAILURE, 0,
770 _("failed to prepare value '%Lf' for printing"), val);
771 return;
774 /* Scaling requested by user. */
775 double scale_base = default_scale_base (scale);
777 /* Normalize val to scale. */
778 int power = 0;
779 val = expld (val, scale_base, &power);
780 devmsg (" scaled value to %Lf * %0.f ^ %d\n", val, scale_base, power);
782 /* Perform rounding. */
783 int power_adjust = 0;
784 if (user_precision != -1)
785 power_adjust = MIN (power * 3, user_precision);
786 else if (absld (val) < 10)
788 /* for values less than 10, we allow one decimal-point digit,
789 so adjust before rounding. */
790 power_adjust = 1;
793 val *= powerld (10, power_adjust);
794 val = simple_round (val, round);
795 val /= powerld (10, power_adjust);
797 /* two special cases after rounding:
798 1. a "999.99" can turn into 1000 - so scale down
799 2. a "9.99" can turn into 10 - so don't display decimal-point. */
800 if (absld (val) >= scale_base)
802 val /= scale_base;
803 power++;
806 /* should "7.0" be printed as "7" ?
807 if removing the ".0" is preferred, enable the fourth condition. */
808 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
809 /* && (absld (val) > simple_round_floor (val))) */
811 devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power);
813 stpcpy (pfmt, ".*Lf%s");
815 int prec = user_precision == -1 ? show_decimal_point : user_precision;
817 /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */
818 num_size = snprintf (buf, buf_size - 1, fmt, prec, val,
819 suffix_power_char (power));
820 if (num_size < 0 || num_size >= (int) buf_size - 1)
821 error (EXIT_FAILURE, 0,
822 _("failed to prepare value '%Lf' for printing"), val);
824 if (scale == scale_IEC_I && power > 0)
825 strncat (buf, "i", buf_size - num_size - 1);
827 devmsg (" returning value: %s\n", quote (buf));
829 return;
832 /* Convert a string of decimal digits, N_STRING, with an optional suffix
833 to an integral value. Suffixes are handled as with --from=auto.
834 Upon successful conversion, return that value.
835 If it cannot be converted, give a diagnostic and exit. */
836 static uintmax_t
837 unit_to_umax (char const *n_string)
839 strtol_error s_err;
840 char const *c_string = n_string;
841 char *t_string = nullptr;
842 size_t n_len = strlen (n_string);
843 char *end = nullptr;
844 uintmax_t n;
845 char const *suffixes = valid_suffixes;
847 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
848 if (n_len && ! c_isdigit (n_string[n_len - 1]))
850 t_string = xmalloc (n_len + 2);
851 end = t_string + n_len - 1;
852 memcpy (t_string, n_string, n_len);
854 if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
855 *end = '\0';
856 else
858 *++end = 'B';
859 *++end = '\0';
860 suffixes = zero_and_valid_suffixes;
863 c_string = t_string;
866 s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
868 if (s_err != LONGINT_OK || *end || n == 0)
870 free (t_string);
871 error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
874 free (t_string);
876 return n;
880 static void
881 setup_padding_buffer (size_t min_size)
883 if (padding_buffer_size > min_size)
884 return;
886 padding_buffer_size = min_size + 1;
887 padding_buffer = xrealloc (padding_buffer, padding_buffer_size);
890 void
891 usage (int status)
893 if (status != EXIT_SUCCESS)
894 emit_try_help ();
895 else
897 printf (_("\
898 Usage: %s [OPTION]... [NUMBER]...\n\
899 "), program_name);
900 fputs (_("\
901 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
902 "), stdout);
903 emit_mandatory_arg_note ();
904 fputs (_("\
905 --debug print warnings about invalid input\n\
906 "), stdout);
907 fputs (_("\
908 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
909 "), stdout);
910 fputs (_("\
911 --field=FIELDS replace the numbers in these input fields (default=1);\n\
912 see FIELDS below\n\
913 "), stdout);
914 fputs (_("\
915 --format=FORMAT use printf style floating-point FORMAT;\n\
916 see FORMAT below for details\n\
917 "), stdout);
918 fputs (_("\
919 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
920 see UNIT below\n\
921 "), stdout);
922 fputs (_("\
923 --from-unit=N specify the input unit size (instead of the default 1)\n\
924 "), stdout);
925 fputs (_("\
926 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
927 (which means it has no effect in the C/POSIX locale)\n\
928 "), stdout);
929 fputs (_("\
930 --header[=N] print (without converting) the first N header lines;\n\
931 N defaults to 1 if not specified\n\
932 "), stdout);
933 fputs (_("\
934 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
935 abort (default), fail, warn, ignore\n\
936 "), stdout);
937 fputs (_("\
938 --padding=N pad the output to N characters; positive N will\n\
939 right-align; negative N will left-align;\n\
940 padding is ignored if the output is wider than N;\n\
941 the default is to automatically pad if a whitespace\n\
942 is found\n\
943 "), stdout);
944 fputs (_("\
945 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
946 up, down, from-zero (default), towards-zero, nearest\n\
947 "), stdout);
948 fputs (_("\
949 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
950 SUFFIX in input numbers\n\
951 "), stdout);
952 fputs (_("\
953 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
954 "), stdout);
955 fputs (_("\
956 --to-unit=N the output unit size (instead of the default 1)\n\
957 "), stdout);
958 fputs (_("\
959 -z, --zero-terminated line delimiter is NUL, not newline\n\
960 "), stdout);
961 fputs (HELP_OPTION_DESCRIPTION, stdout);
962 fputs (VERSION_OPTION_DESCRIPTION, stdout);
964 fputs (_("\
966 UNIT options:\n"), stdout);
967 fputs (_("\
968 none no auto-scaling is done; suffixes will trigger an error\n\
969 "), stdout);
970 fputs (_("\
971 auto accept optional single/two letter suffix:\n\
972 1K = 1000,\n\
973 1Ki = 1024,\n\
974 1M = 1000000,\n\
975 1Mi = 1048576,\n"), stdout);
976 fputs (_("\
977 si accept optional single letter suffix:\n\
978 1K = 1000,\n\
979 1M = 1000000,\n\
980 ...\n"), stdout);
981 fputs (_("\
982 iec accept optional single letter suffix:\n\
983 1K = 1024,\n\
984 1M = 1048576,\n\
985 ...\n"), stdout);
986 fputs (_("\
987 iec-i accept optional two-letter suffix:\n\
988 1Ki = 1024,\n\
989 1Mi = 1048576,\n\
990 ...\n"), stdout);
992 fputs (_("\n\
993 FIELDS supports cut(1) style field ranges:\n\
994 N N'th field, counted from 1\n\
995 N- from N'th field, to end of line\n\
996 N-M from N'th to M'th field (inclusive)\n\
997 -M from first to M'th field (inclusive)\n\
998 - all fields\n\
999 Multiple fields/ranges can be separated with commas\n\
1000 "), stdout);
1002 fputs (_("\n\
1003 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
1004 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
1005 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
1006 will zero pad the number. Optional negative values (%-10f) will left align.\n\
1007 Optional precision (%.1f) will override the input determined precision.\n\
1008 "), stdout);
1010 printf (_("\n\
1011 Exit status is 0 if all input numbers were successfully converted.\n\
1012 By default, %s will stop at the first conversion error with exit status 2.\n\
1013 With --invalid='fail' a warning is printed for each conversion error\n\
1014 and the exit status is 2. With --invalid='warn' each conversion error is\n\
1015 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
1016 errors are not diagnosed and the exit status is 0.\n\
1017 "), program_name);
1019 printf (_("\n\
1020 Examples:\n\
1021 $ %s --to=si 1000\n\
1022 -> \"1.0K\"\n\
1023 $ %s --to=iec 2048\n\
1024 -> \"2.0K\"\n\
1025 $ %s --to=iec-i 4096\n\
1026 -> \"4.0Ki\"\n\
1027 $ echo 1K | %s --from=si\n\
1028 -> \"1000\"\n\
1029 $ echo 1K | %s --from=iec\n\
1030 -> \"1024\"\n\
1031 $ df -B1 | %s --header --field 2-4 --to=si\n\
1032 $ ls -l | %s --header --field 5 --to=iec\n\
1033 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1034 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1035 program_name, program_name, program_name,
1036 program_name, program_name, program_name,
1037 program_name, program_name, program_name);
1038 emit_ancillary_info (PROGRAM_NAME);
1040 exit (status);
1043 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1044 1. padding (e.g. %20f)
1045 2. alignment (e.g. %-20f)
1046 3. grouping (e.g. %'f)
1048 Only a limited subset of printf(3) syntax is supported.
1050 TODO:
1051 support %e %g etc. rather than just %f
1053 NOTES:
1054 1. This function sets the global variables:
1055 padding_width, padding_alignment, grouping,
1056 format_str_prefix, format_str_suffix
1057 2. The function aborts on any errors. */
1058 static void
1059 parse_format_string (char const *fmt)
1061 size_t i;
1062 size_t prefix_len = 0;
1063 size_t suffix_pos;
1064 long int pad = 0;
1065 char *endptr = nullptr;
1066 bool zero_padding = false;
1068 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
1070 if (!fmt[i])
1071 error (EXIT_FAILURE, 0,
1072 _("format %s has no %% directive"), quote (fmt));
1073 prefix_len++;
1076 i++;
1077 while (true)
1079 size_t skip = strspn (fmt + i, " ");
1080 i += skip;
1081 if (fmt[i] == '\'')
1083 grouping = 1;
1084 i++;
1086 else if (fmt[i] == '0')
1088 zero_padding = true;
1089 i++;
1091 else if (! skip)
1092 break;
1095 errno = 0;
1096 pad = strtol (fmt + i, &endptr, 10);
1097 if (errno == ERANGE || pad < -LONG_MAX)
1098 error (EXIT_FAILURE, 0,
1099 _("invalid format %s (width overflow)"), quote (fmt));
1101 if (endptr != (fmt + i) && pad != 0)
1103 if (debug && padding_width && !(zero_padding && pad > 0))
1104 error (0, 0, _("--format padding overriding --padding"));
1106 if (pad < 0)
1108 padding_alignment = MBS_ALIGN_LEFT;
1109 padding_width = -pad;
1111 else
1113 if (zero_padding)
1114 zero_padding_width = pad;
1115 else
1116 padding_width = pad;
1120 i = endptr - fmt;
1122 if (fmt[i] == '\0')
1123 error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
1125 if (fmt[i] == '.')
1127 i++;
1128 errno = 0;
1129 user_precision = strtol (fmt + i, &endptr, 10);
1130 if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
1131 || isblank (fmt[i]) || fmt[i] == '+')
1133 /* Note we disallow negative user_precision to be
1134 consistent with printf(1). POSIX states that
1135 negative precision is only supported (and ignored)
1136 when used with '.*f'. glibc at least will malform
1137 output when passed a direct negative precision. */
1138 error (EXIT_FAILURE, 0,
1139 _("invalid precision in format %s"), quote (fmt));
1141 i = endptr - fmt;
1144 if (fmt[i] != 'f')
1145 error (EXIT_FAILURE, 0, _("invalid format %s,"
1146 " directive must be %%[0]['][-][N][.][N]f"),
1147 quote (fmt));
1148 i++;
1149 suffix_pos = i;
1151 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
1152 if (fmt[i] == '%' && fmt[i + 1] != '%')
1153 error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
1154 quote (fmt));
1156 if (prefix_len)
1157 format_str_prefix = ximemdup0 (fmt, prefix_len);
1158 if (fmt[suffix_pos] != '\0')
1159 format_str_suffix = xstrdup (fmt + suffix_pos);
1161 devmsg ("format String:\n input: %s\n grouping: %s\n"
1162 " padding width: %ld\n alignment: %s\n"
1163 " prefix: %s\n suffix: %s\n",
1164 quote_n (0, fmt), (grouping) ? "yes" : "no",
1165 padding_width,
1166 (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
1167 quote_n (1, format_str_prefix ? format_str_prefix : ""),
1168 quote_n (2, format_str_suffix ? format_str_suffix : ""));
1171 /* Parse a numeric value (with optional suffix) from a string.
1172 Returns a long double value, with input precision.
1174 If there's an error converting the string to value - exits with
1175 an error.
1177 If there are any trailing characters after the number
1178 (besides a valid suffix) - exits with an error. */
1179 static enum simple_strtod_error
1180 parse_human_number (char const *str, long double /*output */ *value,
1181 size_t *precision)
1183 char *ptr = nullptr;
1185 enum simple_strtod_error e =
1186 simple_strtod_human (str, &ptr, value, precision, scale_from);
1187 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1189 simple_strtod_fatal (e, str);
1190 return e;
1193 if (ptr && *ptr != '\0')
1195 if (inval_style != inval_ignore)
1196 error (conv_exit_code, 0, _("invalid suffix in input %s: %s"),
1197 quote_n (0, str), quote_n (1, ptr));
1198 e = SSE_INVALID_SUFFIX;
1200 return e;
1204 /* Print the given VAL, using the requested representation.
1205 The number is printed to STDOUT, with padding and alignment. */
1206 static int
1207 prepare_padded_number (const long double val, size_t precision)
1209 /* Generate Output. */
1210 char buf[128];
1212 size_t precision_used = user_precision == -1 ? precision : user_precision;
1214 /* Can't reliably print too-large values without auto-scaling. */
1215 int x;
1216 expld (val, 10, &x);
1218 if (scale_to == scale_none
1219 && x + precision_used > MAX_UNSCALED_DIGITS)
1221 if (inval_style != inval_ignore)
1223 if (precision_used)
1224 error (conv_exit_code, 0,
1225 _("value/precision too large to be printed: '%Lg/%"PRIuMAX"'"
1226 " (consider using --to)"), val, (uintmax_t)precision_used);
1227 else
1228 error (conv_exit_code, 0,
1229 _("value too large to be printed: '%Lg'"
1230 " (consider using --to)"), val);
1232 return 0;
1235 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1237 if (inval_style != inval_ignore)
1238 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1239 " (cannot handle values > 999Q)"), val);
1240 return 0;
1243 double_to_human (val, precision_used, buf, sizeof (buf),
1244 scale_to, grouping, round_style);
1245 if (suffix)
1246 strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
1248 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1249 val, quote (buf));
1251 if (padding_width && strlen (buf) < padding_width)
1253 size_t w = padding_width;
1254 mbsalign (buf, padding_buffer, padding_buffer_size, &w,
1255 padding_alignment, MBA_UNIBYTE_ONLY);
1257 devmsg (" After padding: %s\n", quote (padding_buffer));
1259 else
1261 setup_padding_buffer (strlen (buf) + 1);
1262 strcpy (padding_buffer, buf);
1265 return 1;
1268 static void
1269 print_padded_number (void)
1271 if (format_str_prefix)
1272 fputs (format_str_prefix, stdout);
1274 fputs (padding_buffer, stdout);
1276 if (format_str_suffix)
1277 fputs (format_str_suffix, stdout);
1280 /* Converts the TEXT number string to the requested representation,
1281 and handles automatic suffix addition. */
1282 static int
1283 process_suffixed_number (char *text, long double *result,
1284 size_t *precision, long int field)
1286 if (suffix && strlen (text) > strlen (suffix))
1288 char *possible_suffix = text + strlen (text) - strlen (suffix);
1290 if (STREQ (suffix, possible_suffix))
1292 /* trim suffix, ONLY if it's at the end of the text. */
1293 *possible_suffix = '\0';
1294 devmsg ("trimming suffix %s\n", quote (suffix));
1296 else
1297 devmsg ("no valid suffix found\n");
1300 /* Skip white space - always. */
1301 char *p = text;
1302 while (*p && isblank (to_uchar (*p)))
1303 ++p;
1305 /* setup auto-padding. */
1306 if (auto_padding)
1308 if (text < p || field > 1)
1310 padding_width = strlen (text);
1311 setup_padding_buffer (padding_width);
1313 else
1315 padding_width = 0;
1317 devmsg ("setting Auto-Padding to %ld characters\n", padding_width);
1320 long double val = 0;
1321 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1322 if (e == SSE_OK_PRECISION_LOSS && debug)
1323 error (0, 0, _("large input value %s: possible precision loss"),
1324 quote (p));
1326 if (from_unit_size != 1 || to_unit_size != 1)
1327 val = (val * from_unit_size) / to_unit_size;
1329 *result = val;
1331 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1334 /* Return a pointer to the beginning of the next field in line.
1335 The line pointer is moved to the end of the next field. */
1336 static char*
1337 next_field (char **line)
1339 char *field_start = *line;
1340 char *field_end = field_start;
1342 if (delimiter != DELIMITER_DEFAULT)
1344 if (*field_start != delimiter)
1346 while (*field_end && *field_end != delimiter)
1347 ++field_end;
1349 /* else empty field */
1351 else
1353 /* keep any space prefix in the returned field */
1354 while (*field_end && field_sep (*field_end))
1355 ++field_end;
1357 while (*field_end && ! field_sep (*field_end))
1358 ++field_end;
1361 *line = field_end;
1362 return field_start;
1365 ATTRIBUTE_PURE
1366 static bool
1367 include_field (uintmax_t field)
1369 struct field_range_pair *p = frp;
1370 if (!p)
1371 return field == 1;
1373 while (p->lo != UINTMAX_MAX)
1375 if (p->lo <= field && p->hi >= field)
1376 return true;
1377 ++p;
1379 return false;
1382 /* Convert and output the given field. If it is not included in the set
1383 of fields to process just output the original */
1384 static bool
1385 process_field (char *text, uintmax_t field)
1387 long double val = 0;
1388 size_t precision = 0;
1389 bool valid_number = true;
1391 if (include_field (field))
1393 valid_number =
1394 process_suffixed_number (text, &val, &precision, field);
1396 if (valid_number)
1397 valid_number = prepare_padded_number (val, precision);
1399 if (valid_number)
1400 print_padded_number ();
1401 else
1402 fputs (text, stdout);
1404 else
1405 fputs (text, stdout);
1407 return valid_number;
1410 /* Convert number in a given line of text.
1411 NEWLINE specifies whether to output a '\n' for this "line". */
1412 static int
1413 process_line (char *line, bool newline)
1415 char *next;
1416 uintmax_t field = 0;
1417 bool valid_number = true;
1419 while (true) {
1420 ++field;
1421 next = next_field (&line);
1423 if (*line != '\0')
1425 /* nul terminate the current field string and process */
1426 *line = '\0';
1428 if (! process_field (next, field))
1429 valid_number = false;
1431 fputc ((delimiter == DELIMITER_DEFAULT) ?
1432 ' ' : delimiter, stdout);
1433 ++line;
1435 else
1437 /* end of the line, process the last field and finish */
1438 if (! process_field (next, field))
1439 valid_number = false;
1441 break;
1445 if (newline)
1446 putchar (line_delim);
1448 return valid_number;
1452 main (int argc, char **argv)
1454 int valid_numbers = 1;
1455 bool locale_ok;
1457 initialize_main (&argc, &argv);
1458 set_program_name (argv[0]);
1459 locale_ok = !!setlocale (LC_ALL, "");
1460 bindtextdomain (PACKAGE, LOCALEDIR);
1461 textdomain (PACKAGE);
1463 #if HAVE_FPSETPREC
1464 /* Enabled extended precision if needed. */
1465 fpsetprec (FP_PE);
1466 #endif
1468 decimal_point = nl_langinfo (RADIXCHAR);
1469 if (decimal_point == nullptr || strlen (decimal_point) == 0)
1470 decimal_point = ".";
1471 decimal_point_length = strlen (decimal_point);
1473 atexit (close_stdout);
1475 while (true)
1477 int c = getopt_long (argc, argv, "d:z", longopts, nullptr);
1479 if (c == -1)
1480 break;
1482 switch (c)
1484 case FROM_OPTION:
1485 scale_from = XARGMATCH ("--from", optarg,
1486 scale_from_args, scale_from_types);
1487 break;
1489 case FROM_UNIT_OPTION:
1490 from_unit_size = unit_to_umax (optarg);
1491 break;
1493 case TO_OPTION:
1494 scale_to =
1495 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1496 break;
1498 case TO_UNIT_OPTION:
1499 to_unit_size = unit_to_umax (optarg);
1500 break;
1502 case ROUND_OPTION:
1503 round_style = XARGMATCH ("--round", optarg, round_args, round_types);
1504 break;
1506 case GROUPING_OPTION:
1507 grouping = 1;
1508 break;
1510 case PADDING_OPTION:
1511 if (xstrtol (optarg, nullptr, 10, &padding_width, "") != LONGINT_OK
1512 || padding_width == 0 || padding_width < -LONG_MAX)
1513 error (EXIT_FAILURE, 0, _("invalid padding value %s"),
1514 quote (optarg));
1515 if (padding_width < 0)
1517 padding_alignment = MBS_ALIGN_LEFT;
1518 padding_width = -padding_width;
1520 /* TODO: We probably want to apply a specific --padding
1521 to --header lines too. */
1522 break;
1524 case FIELD_OPTION:
1525 if (n_frp)
1526 error (EXIT_FAILURE, 0, _("multiple field specifications"));
1527 set_fields (optarg, SETFLD_ALLOW_DASH);
1528 break;
1530 case 'd':
1531 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1532 if (optarg[0] != '\0' && optarg[1] != '\0')
1533 error (EXIT_FAILURE, 0,
1534 _("the delimiter must be a single character"));
1535 delimiter = optarg[0];
1536 break;
1538 case 'z':
1539 line_delim = '\0';
1540 break;
1542 case SUFFIX_OPTION:
1543 suffix = optarg;
1544 break;
1546 case DEBUG_OPTION:
1547 debug = true;
1548 break;
1550 case DEV_DEBUG_OPTION:
1551 dev_debug = true;
1552 debug = true;
1553 break;
1555 case HEADER_OPTION:
1556 if (optarg)
1558 if (xstrtoumax (optarg, nullptr, 10, &header, "") != LONGINT_OK
1559 || header == 0)
1560 error (EXIT_FAILURE, 0, _("invalid header value %s"),
1561 quote (optarg));
1563 else
1565 header = 1;
1567 break;
1569 case FORMAT_OPTION:
1570 format_str = optarg;
1571 break;
1573 case INVALID_OPTION:
1574 inval_style = XARGMATCH ("--invalid", optarg,
1575 inval_args, inval_types);
1576 break;
1578 case_GETOPT_HELP_CHAR;
1579 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1581 default:
1582 usage (EXIT_FAILURE);
1586 if (format_str != nullptr && grouping)
1587 error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1589 if (debug && ! locale_ok)
1590 error (0, 0, _("failed to set locale"));
1592 /* Warn about no-op. */
1593 if (debug && scale_from == scale_none && scale_to == scale_none
1594 && !grouping && (padding_width == 0) && (format_str == nullptr))
1595 error (0, 0, _("no conversion option specified"));
1597 if (format_str)
1598 parse_format_string (format_str);
1600 if (grouping)
1602 if (scale_to != scale_none)
1603 error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1604 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1605 error (0, 0, _("grouping has no effect in this locale"));
1609 setup_padding_buffer (padding_width);
1610 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1612 if (inval_style != inval_abort)
1613 conv_exit_code = 0;
1615 if (argc > optind)
1617 if (debug && header)
1618 error (0, 0, _("--header ignored with command-line input"));
1620 for (; optind < argc; optind++)
1621 valid_numbers &= process_line (argv[optind], true);
1623 else
1625 char *line = nullptr;
1626 size_t line_allocated = 0;
1627 ssize_t len;
1629 while (header-- && getdelim (&line, &line_allocated,
1630 line_delim, stdin) > 0)
1631 fputs (line, stdout);
1633 while ((len = getdelim (&line, &line_allocated,
1634 line_delim, stdin)) > 0)
1636 bool newline = line[len - 1] == line_delim;
1637 if (newline)
1638 line[len - 1] = '\0';
1639 valid_numbers &= process_line (line, newline);
1642 if (ferror (stdin))
1643 error (EXIT_FAILURE, errno, _("error reading input"));
1646 if (debug && !valid_numbers)
1647 error (0, 0, _("failed to convert some of the input numbers"));
1649 int exit_status = EXIT_SUCCESS;
1650 if (!valid_numbers
1651 && inval_style != inval_warn && inval_style != inval_ignore)
1652 exit_status = EXIT_CONVERSION_WARNINGS;
1654 main_exit (exit_status);