split: port ‘split -n N /dev/null’ better to macOS
[coreutils.git] / src / numfmt.c
blob2067ca164fc3d3de037904ad1f78cbc03ff55191
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <float.h>
19 #include <getopt.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <langinfo.h>
24 #include "mbsalign.h"
25 #include "argmatch.h"
26 #include "c-ctype.h"
27 #include "die.h"
28 #include "error.h"
29 #include "quote.h"
30 #include "system.h"
31 #include "xstrtol.h"
33 #include "set-fields.h"
35 #if HAVE_FPSETPREC
36 # include <ieeefp.h>
37 #endif
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "numfmt"
42 #define AUTHORS proper_name ("Assaf Gordon")
44 /* Exit code when some numbers fail to convert. */
45 enum { TIMEOUT_FAILURE = 1, EXIT_CONVERSION_WARNINGS = 2 };
47 enum
49 FROM_OPTION = CHAR_MAX + 1,
50 FROM_UNIT_OPTION,
51 TO_OPTION,
52 TO_UNIT_OPTION,
53 ROUND_OPTION,
54 SUFFIX_OPTION,
55 GROUPING_OPTION,
56 PADDING_OPTION,
57 FIELD_OPTION,
58 DEBUG_OPTION,
59 DEV_DEBUG_OPTION,
60 HEADER_OPTION,
61 FORMAT_OPTION,
62 INVALID_OPTION
65 enum scale_type
67 scale_none, /* the default: no scaling. */
68 scale_auto, /* --from only. */
69 scale_SI,
70 scale_IEC,
71 scale_IEC_I /* 'i' suffix is required. */
74 static char const *const scale_from_args[] =
76 "none", "auto", "si", "iec", "iec-i", NULL
79 static enum scale_type const scale_from_types[] =
81 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
84 static char const *const scale_to_args[] =
86 "none", "si", "iec", "iec-i", NULL
89 static enum scale_type const scale_to_types[] =
91 scale_none, scale_SI, scale_IEC, scale_IEC_I
95 enum round_type
97 round_ceiling,
98 round_floor,
99 round_from_zero,
100 round_to_zero,
101 round_nearest,
104 static char const *const round_args[] =
106 "up", "down", "from-zero", "towards-zero", "nearest", NULL
109 static enum round_type const round_types[] =
111 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
115 enum inval_type
117 inval_abort,
118 inval_fail,
119 inval_warn,
120 inval_ignore
123 static char const *const inval_args[] =
125 "abort", "fail", "warn", "ignore", NULL
128 static enum inval_type const inval_types[] =
130 inval_abort, inval_fail, inval_warn, inval_ignore
133 static struct option const longopts[] =
135 {"from", required_argument, NULL, FROM_OPTION},
136 {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
137 {"to", required_argument, NULL, TO_OPTION},
138 {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
139 {"round", required_argument, NULL, ROUND_OPTION},
140 {"padding", required_argument, NULL, PADDING_OPTION},
141 {"suffix", required_argument, NULL, SUFFIX_OPTION},
142 {"grouping", no_argument, NULL, GROUPING_OPTION},
143 {"delimiter", required_argument, NULL, 'd'},
144 {"field", required_argument, NULL, FIELD_OPTION},
145 {"debug", no_argument, NULL, DEBUG_OPTION},
146 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
147 {"header", optional_argument, NULL, HEADER_OPTION},
148 {"format", required_argument, NULL, FORMAT_OPTION},
149 {"invalid", required_argument, NULL, INVALID_OPTION},
150 {"zero-terminated", no_argument, NULL, 'z'},
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {NULL, 0, NULL, 0}
156 /* If delimiter has this value, blanks separate fields. */
157 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
159 /* Maximum number of digits we can safely handle
160 without precision loss, if scaling is 'none'. */
161 enum { MAX_UNSCALED_DIGITS = LDBL_DIG };
163 /* Maximum number of digits we can work with.
164 This is equivalent to 999Q.
165 NOTE: 'long double' can handle more than that, but there's
166 no official suffix assigned beyond Quetta (1000^10). */
167 enum { MAX_ACCEPTABLE_DIGITS = 33 };
169 static enum scale_type scale_from = scale_none;
170 static enum scale_type scale_to = scale_none;
171 static enum round_type round_style = round_from_zero;
172 static enum inval_type inval_style = inval_abort;
173 static char const *suffix = NULL;
174 static uintmax_t from_unit_size = 1;
175 static uintmax_t to_unit_size = 1;
176 static int grouping = 0;
177 static char *padding_buffer = NULL;
178 static size_t padding_buffer_size = 0;
179 static long int padding_width = 0;
180 static long int zero_padding_width = 0;
181 static long int user_precision = -1;
182 static char const *format_str = NULL;
183 static char *format_str_prefix = NULL;
184 static char *format_str_suffix = NULL;
186 /* By default, any conversion error will terminate the program. */
187 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
190 /* auto-pad each line based on skipped whitespace. */
191 static int auto_padding = 0;
192 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
194 /* field delimiter */
195 static int delimiter = DELIMITER_DEFAULT;
197 /* line delimiter. */
198 static unsigned char line_delim = '\n';
200 /* if non-zero, the first 'header' lines from STDIN are skipped. */
201 static uintmax_t header = 0;
203 /* Debug for users: print warnings to STDERR about possible
204 error (similar to sort's debug). */
205 static bool debug;
207 /* will be set according to the current locale. */
208 static char const *decimal_point;
209 static int decimal_point_length;
211 /* debugging for developers. Enables devmsg(). */
212 static bool dev_debug = false;
215 static inline int
216 default_scale_base (enum scale_type scale)
218 switch (scale)
220 case scale_IEC:
221 case scale_IEC_I:
222 return 1024;
224 case scale_none:
225 case scale_auto:
226 case scale_SI:
227 default:
228 return 1000;
232 static char const zero_and_valid_suffixes[] = "0KMGTPEZYRQ";
233 static char const *valid_suffixes = 1 + zero_and_valid_suffixes;
235 static inline bool
236 valid_suffix (const char suf)
238 return strchr (valid_suffixes, suf) != NULL;
241 static inline int
242 suffix_power (const char suf)
244 switch (suf)
246 case 'K': /* kilo or kibi. */
247 return 1;
249 case 'M': /* mega or mebi. */
250 return 2;
252 case 'G': /* giga or gibi. */
253 return 3;
255 case 'T': /* tera or tebi. */
256 return 4;
258 case 'P': /* peta or pebi. */
259 return 5;
261 case 'E': /* exa or exbi. */
262 return 6;
264 case 'Z': /* zetta or 2**70. */
265 return 7;
267 case 'Y': /* yotta or 2**80. */
268 return 8;
270 case 'R': /* ronna or 2**90. */
271 return 9;
273 case 'Q': /* quetta or 2**100. */
274 return 10;
276 default: /* should never happen. assert? */
277 return 0;
281 static inline char const *
282 suffix_power_char (unsigned int power)
284 switch (power)
286 case 0:
287 return "";
289 case 1:
290 return "K";
292 case 2:
293 return "M";
295 case 3:
296 return "G";
298 case 4:
299 return "T";
301 case 5:
302 return "P";
304 case 6:
305 return "E";
307 case 7:
308 return "Z";
310 case 8:
311 return "Y";
313 case 9:
314 return "R";
316 case 10:
317 return "Q";
319 default:
320 return "(error)";
324 /* Similar to 'powl(3)' but without requiring 'libm'. */
325 static long double
326 powerld (long double base, unsigned int x)
328 long double result = base;
329 if (x == 0)
330 return 1; /* note for test coverage: this is never
331 reached, as 'powerld' won't be called if
332 there's no suffix, hence, no "power". */
334 /* TODO: check for overflow, inf? */
335 while (--x)
336 result *= base;
337 return result;
340 /* Similar to 'fabs(3)' but without requiring 'libm'. */
341 static inline long double
342 absld (long double val)
344 return val < 0 ? -val : val;
347 /* Scale down 'val', returns 'updated val' and 'x', such that
348 val*base^X = original val
349 Similar to "frexpl(3)" but without requiring 'libm',
350 allowing only integer scale, limited functionality and error checking. */
351 static long double
352 expld (long double val, unsigned int base, unsigned int /*output */ *x)
354 unsigned int power = 0;
356 if (val >= -LDBL_MAX && val <= LDBL_MAX)
358 while (absld (val) >= base)
360 ++power;
361 val /= base;
364 if (x)
365 *x = power;
366 return val;
369 /* EXTREMELY limited 'ceil' - without 'libm'.
370 Assumes values that fit in intmax_t. */
371 static inline intmax_t
372 simple_round_ceiling (long double val)
374 intmax_t intval = val;
375 if (intval < val)
376 intval++;
377 return intval;
380 /* EXTREMELY limited 'floor' - without 'libm'.
381 Assumes values that fit in intmax_t. */
382 static inline intmax_t
383 simple_round_floor (long double val)
385 return -simple_round_ceiling (-val);
388 /* EXTREMELY limited 'round away from zero'.
389 Assumes values that fit in intmax_t. */
390 static inline intmax_t
391 simple_round_from_zero (long double val)
393 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
396 /* EXTREMELY limited 'round away to zero'.
397 Assumes values that fit in intmax_t. */
398 static inline intmax_t
399 simple_round_to_zero (long double val)
401 return val;
404 /* EXTREMELY limited 'round' - without 'libm'.
405 Assumes values that fit in intmax_t. */
406 static inline intmax_t
407 simple_round_nearest (long double val)
409 return val < 0 ? val - 0.5 : val + 0.5;
412 ATTRIBUTE_CONST
413 static inline long double
414 simple_round (long double val, enum round_type t)
416 intmax_t rval;
417 intmax_t intmax_mul = val / INTMAX_MAX;
418 val -= (long double) INTMAX_MAX * intmax_mul;
420 switch (t)
422 case round_ceiling:
423 rval = simple_round_ceiling (val);
424 break;
426 case round_floor:
427 rval = simple_round_floor (val);
428 break;
430 case round_from_zero:
431 rval = simple_round_from_zero (val);
432 break;
434 case round_to_zero:
435 rval = simple_round_to_zero (val);
436 break;
438 case round_nearest:
439 rval = simple_round_nearest (val);
440 break;
442 default:
443 /* to silence the compiler - this should never happen. */
444 return 0;
447 return (long double) INTMAX_MAX * intmax_mul + rval;
450 enum simple_strtod_error
452 SSE_OK = 0,
453 SSE_OK_PRECISION_LOSS,
454 SSE_OVERFLOW,
455 SSE_INVALID_NUMBER,
457 /* the following are returned by 'simple_strtod_human'. */
458 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
459 SSE_INVALID_SUFFIX,
460 SSE_MISSING_I_SUFFIX
463 /* Read an *integer* INPUT_STR,
464 but return the integer value in a 'long double' VALUE
465 hence, no UINTMAX_MAX limitation.
466 NEGATIVE is updated, and is stored separately from the VALUE
467 so that signbit() isn't required to determine the sign of -0..
468 ENDPTR is required (unlike strtod) and is used to store a pointer
469 to the character after the last character used in the conversion.
471 Note locale'd grouping is not supported,
472 nor is skipping of white-space supported.
474 Returns:
475 SSE_OK - valid number.
476 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
477 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
478 SSE_INVALID_NUMBER - if no digits were found. */
479 static enum simple_strtod_error
480 simple_strtod_int (char const *input_str,
481 char **endptr, long double *value, bool *negative)
483 enum simple_strtod_error e = SSE_OK;
485 long double val = 0;
486 unsigned int digits = 0;
487 bool found_digit = false;
489 if (*input_str == '-')
491 input_str++;
492 *negative = true;
494 else
495 *negative = false;
497 *endptr = (char *) input_str;
498 while (c_isdigit (**endptr))
500 int digit = (**endptr) - '0';
502 found_digit = true;
504 if (val || digit)
505 digits++;
507 if (digits > MAX_UNSCALED_DIGITS)
508 e = SSE_OK_PRECISION_LOSS;
510 if (digits > MAX_ACCEPTABLE_DIGITS)
511 return SSE_OVERFLOW;
513 val *= 10;
514 val += digit;
516 ++(*endptr);
518 if (! found_digit
519 && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length))
520 return SSE_INVALID_NUMBER;
521 if (*negative)
522 val = -val;
524 if (value)
525 *value = val;
527 return e;
530 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
531 and return the value in a 'long double' VALUE.
532 ENDPTR is required (unlike strtod) and is used to store a pointer
533 to the character after the last character used in the conversion.
534 PRECISION is optional and used to indicate fractions are present.
536 Note locale'd grouping is not supported,
537 nor is skipping of white-space supported.
539 Returns:
540 SSE_OK - valid number.
541 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
542 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
543 SSE_INVALID_NUMBER - if no digits were found. */
544 static enum simple_strtod_error
545 simple_strtod_float (char const *input_str,
546 char **endptr,
547 long double *value,
548 size_t *precision)
550 bool negative;
551 enum simple_strtod_error e = SSE_OK;
553 if (precision)
554 *precision = 0;
556 /* TODO: accept locale'd grouped values for the integral part. */
557 e = simple_strtod_int (input_str, endptr, value, &negative);
558 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
559 return e;
561 /* optional decimal point + fraction. */
562 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
564 char *ptr2;
565 long double val_frac = 0;
566 bool neg_frac;
568 (*endptr) += decimal_point_length;
569 enum simple_strtod_error e2 =
570 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
571 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
572 return e2;
573 if (e2 == SSE_OK_PRECISION_LOSS)
574 e = e2; /* propagate warning. */
575 if (neg_frac)
576 return SSE_INVALID_NUMBER;
578 /* number of digits in the fractions. */
579 size_t exponent = ptr2 - *endptr;
581 val_frac = ((long double) val_frac) / powerld (10, exponent);
583 /* TODO: detect loss of precision (only really 18 digits
584 of precision across all digits (before and after '.')). */
585 if (value)
587 if (negative)
588 *value -= val_frac;
589 else
590 *value += val_frac;
593 if (precision)
594 *precision = exponent;
596 *endptr = ptr2;
598 return e;
601 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
602 and return the value in a 'long double' VALUE,
603 with the precision of the input returned in PRECISION.
604 ENDPTR is required (unlike strtod) and is used to store a pointer
605 to the character after the last character used in the conversion.
606 ALLOWED_SCALING determines the scaling supported.
608 TODO:
609 support locale'd grouping
610 accept scentific and hex floats (probably use strtold directly)
612 Returns:
613 SSE_OK - valid number.
614 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
615 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
616 SSE_INVALID_NUMBER - if no digits were found.
617 SSE_VALID_BUT_FORBIDDEN_SUFFIX
618 SSE_INVALID_SUFFIX
619 SSE_MISSING_I_SUFFIX */
620 static enum simple_strtod_error
621 simple_strtod_human (char const *input_str,
622 char **endptr, long double *value, size_t *precision,
623 enum scale_type allowed_scaling)
625 int power = 0;
626 /* 'scale_auto' is checked below. */
627 int scale_base = default_scale_base (allowed_scaling);
629 devmsg ("simple_strtod_human:\n input string: %s\n"
630 " locale decimal-point: %s\n"
631 " MAX_UNSCALED_DIGITS: %d\n",
632 quote_n (0, input_str),
633 quote_n (1, decimal_point),
634 MAX_UNSCALED_DIGITS);
636 enum simple_strtod_error e =
637 simple_strtod_float (input_str, endptr, value, precision);
638 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
639 return e;
641 devmsg (" parsed numeric value: %Lf\n"
642 " input precision = %d\n", *value, (int)*precision);
644 if (**endptr != '\0')
646 /* process suffix. */
648 /* Skip any blanks between the number and suffix. */
649 while (isblank (to_uchar (**endptr)))
650 (*endptr)++;
652 if (!valid_suffix (**endptr))
653 return SSE_INVALID_SUFFIX;
655 if (allowed_scaling == scale_none)
656 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
658 power = suffix_power (**endptr);
659 (*endptr)++; /* skip first suffix character. */
661 if (allowed_scaling == scale_auto && **endptr == 'i')
663 /* auto-scaling enabled, and the first suffix character
664 is followed by an 'i' (e.g. Ki, Mi, Gi). */
665 scale_base = 1024;
666 (*endptr)++; /* skip second ('i') suffix character. */
667 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
668 scale_base);
671 *precision = 0; /* Reset, to select precision based on scale. */
674 if (allowed_scaling == scale_IEC_I)
676 if (**endptr == 'i')
677 (*endptr)++;
678 else
679 return SSE_MISSING_I_SUFFIX;
682 long double multiplier = powerld (scale_base, power);
684 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
686 /* TODO: detect loss of precision and overflows. */
687 (*value) = (*value) * multiplier;
689 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
691 return e;
695 static void
696 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
698 char const *msgid = NULL;
700 switch (err)
702 case SSE_OK_PRECISION_LOSS:
703 case SSE_OK:
704 /* should never happen - this function isn't called when OK. */
705 abort ();
707 case SSE_OVERFLOW:
708 msgid = N_("value too large to be converted: %s");
709 break;
711 case SSE_INVALID_NUMBER:
712 msgid = N_("invalid number: %s");
713 break;
715 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
716 msgid = N_("rejecting suffix in input: %s (consider using --from)");
717 break;
719 case SSE_INVALID_SUFFIX:
720 msgid = N_("invalid suffix in input: %s");
721 break;
723 case SSE_MISSING_I_SUFFIX:
724 msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
725 break;
729 if (inval_style != inval_ignore)
730 error (conv_exit_code, 0, gettext (msgid), quote (input_str));
733 /* Convert VAL to a human format string in BUF. */
734 static void
735 double_to_human (long double val, int precision,
736 char *buf, size_t buf_size,
737 enum scale_type scale, int group, enum round_type round)
739 int num_size;
740 char fmt[64];
741 static_assert ((INT_BUFSIZE_BOUND (zero_padding_width)
742 + INT_BUFSIZE_BOUND (precision)
743 + 10 /* for %.Lf etc. */)
744 < sizeof fmt);
746 char *pfmt = fmt;
747 *pfmt++ = '%';
749 if (group)
750 *pfmt++ = '\'';
752 if (zero_padding_width)
753 pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width);
755 devmsg ("double_to_human:\n");
757 if (scale == scale_none)
759 val *= powerld (10, precision);
760 val = simple_round (val, round);
761 val /= powerld (10, precision);
763 devmsg ((group) ?
764 " no scaling, returning (grouped) value: %'.*Lf\n" :
765 " no scaling, returning value: %.*Lf\n", precision, val);
767 stpcpy (pfmt, ".*Lf");
769 num_size = snprintf (buf, buf_size, fmt, precision, val);
770 if (num_size < 0 || num_size >= (int) buf_size)
771 die (EXIT_FAILURE, 0,
772 _("failed to prepare value '%Lf' for printing"), val);
773 return;
776 /* Scaling requested by user. */
777 double scale_base = default_scale_base (scale);
779 /* Normalize val to scale. */
780 unsigned int power = 0;
781 val = expld (val, scale_base, &power);
782 devmsg (" scaled value to %Lf * %0.f ^ %u\n", val, scale_base, power);
784 /* Perform rounding. */
785 unsigned int power_adjust = 0;
786 if (user_precision != -1)
787 power_adjust = MIN (power * 3, user_precision);
788 else if (absld (val) < 10)
790 /* for values less than 10, we allow one decimal-point digit,
791 so adjust before rounding. */
792 power_adjust = 1;
795 val *= powerld (10, power_adjust);
796 val = simple_round (val, round);
797 val /= powerld (10, power_adjust);
799 /* two special cases after rounding:
800 1. a "999.99" can turn into 1000 - so scale down
801 2. a "9.99" can turn into 10 - so don't display decimal-point. */
802 if (absld (val) >= scale_base)
804 val /= scale_base;
805 power++;
808 /* should "7.0" be printed as "7" ?
809 if removing the ".0" is preferred, enable the fourth condition. */
810 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
811 /* && (absld (val) > simple_round_floor (val))) */
813 devmsg (" after rounding, value=%Lf * %0.f ^ %u\n", val, scale_base, power);
815 stpcpy (pfmt, ".*Lf%s");
817 int prec = user_precision == -1 ? show_decimal_point : user_precision;
819 /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */
820 num_size = snprintf (buf, buf_size - 1, fmt, prec, val,
821 suffix_power_char (power));
822 if (num_size < 0 || num_size >= (int) buf_size - 1)
823 die (EXIT_FAILURE, 0,
824 _("failed to prepare value '%Lf' for printing"), val);
826 if (scale == scale_IEC_I && power > 0)
827 strncat (buf, "i", buf_size - num_size - 1);
829 devmsg (" returning value: %s\n", quote (buf));
831 return;
834 /* Convert a string of decimal digits, N_STRING, with an optional suffix
835 to an integral value. Suffixes are handled as with --from=auto.
836 Upon successful conversion, return that value.
837 If it cannot be converted, give a diagnostic and exit. */
838 static uintmax_t
839 unit_to_umax (char const *n_string)
841 strtol_error s_err;
842 char const *c_string = n_string;
843 char *t_string = NULL;
844 size_t n_len = strlen (n_string);
845 char *end = NULL;
846 uintmax_t n;
847 char const *suffixes = valid_suffixes;
849 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
850 if (n_len && ! c_isdigit (n_string[n_len - 1]))
852 t_string = xmalloc (n_len + 2);
853 end = t_string + n_len - 1;
854 memcpy (t_string, n_string, n_len);
856 if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
857 *end = '\0';
858 else
860 *++end = 'B';
861 *++end = '\0';
862 suffixes = zero_and_valid_suffixes;
865 c_string = t_string;
868 s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
870 if (s_err != LONGINT_OK || *end || n == 0)
872 free (t_string);
873 die (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
876 free (t_string);
878 return n;
882 static void
883 setup_padding_buffer (size_t min_size)
885 if (padding_buffer_size > min_size)
886 return;
888 padding_buffer_size = min_size + 1;
889 padding_buffer = xrealloc (padding_buffer, padding_buffer_size);
892 void
893 usage (int status)
895 if (status != EXIT_SUCCESS)
896 emit_try_help ();
897 else
899 printf (_("\
900 Usage: %s [OPTION]... [NUMBER]...\n\
901 "), program_name);
902 fputs (_("\
903 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
904 "), stdout);
905 emit_mandatory_arg_note ();
906 fputs (_("\
907 --debug print warnings about invalid input\n\
908 "), stdout);
909 fputs (_("\
910 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
911 "), stdout);
912 fputs (_("\
913 --field=FIELDS replace the numbers in these input fields (default=1);\n\
914 see FIELDS below\n\
915 "), stdout);
916 fputs (_("\
917 --format=FORMAT use printf style floating-point FORMAT;\n\
918 see FORMAT below for details\n\
919 "), stdout);
920 fputs (_("\
921 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
922 see UNIT below\n\
923 "), stdout);
924 fputs (_("\
925 --from-unit=N specify the input unit size (instead of the default 1)\n\
926 "), stdout);
927 fputs (_("\
928 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
929 (which means it has no effect in the C/POSIX locale)\n\
930 "), stdout);
931 fputs (_("\
932 --header[=N] print (without converting) the first N header lines;\n\
933 N defaults to 1 if not specified\n\
934 "), stdout);
935 fputs (_("\
936 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
937 abort (default), fail, warn, ignore\n\
938 "), stdout);
939 fputs (_("\
940 --padding=N pad the output to N characters; positive N will\n\
941 right-align; negative N will left-align;\n\
942 padding is ignored if the output is wider than N;\n\
943 the default is to automatically pad if a whitespace\n\
944 is found\n\
945 "), stdout);
946 fputs (_("\
947 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
948 up, down, from-zero (default), towards-zero, nearest\n\
949 "), stdout);
950 fputs (_("\
951 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
952 SUFFIX in input numbers\n\
953 "), stdout);
954 fputs (_("\
955 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
956 "), stdout);
957 fputs (_("\
958 --to-unit=N the output unit size (instead of the default 1)\n\
959 "), stdout);
960 fputs (_("\
961 -z, --zero-terminated line delimiter is NUL, not newline\n\
962 "), stdout);
963 fputs (HELP_OPTION_DESCRIPTION, stdout);
964 fputs (VERSION_OPTION_DESCRIPTION, stdout);
966 fputs (_("\
968 UNIT options:\n"), stdout);
969 fputs (_("\
970 none no auto-scaling is done; suffixes will trigger an error\n\
971 "), stdout);
972 fputs (_("\
973 auto accept optional single/two letter suffix:\n\
974 1K = 1000,\n\
975 1Ki = 1024,\n\
976 1M = 1000000,\n\
977 1Mi = 1048576,\n"), stdout);
978 fputs (_("\
979 si accept optional single letter suffix:\n\
980 1K = 1000,\n\
981 1M = 1000000,\n\
982 ...\n"), stdout);
983 fputs (_("\
984 iec accept optional single letter suffix:\n\
985 1K = 1024,\n\
986 1M = 1048576,\n\
987 ...\n"), stdout);
988 fputs (_("\
989 iec-i accept optional two-letter suffix:\n\
990 1Ki = 1024,\n\
991 1Mi = 1048576,\n\
992 ...\n"), stdout);
994 fputs (_("\n\
995 FIELDS supports cut(1) style field ranges:\n\
996 N N'th field, counted from 1\n\
997 N- from N'th field, to end of line\n\
998 N-M from N'th to M'th field (inclusive)\n\
999 -M from first to M'th field (inclusive)\n\
1000 - all fields\n\
1001 Multiple fields/ranges can be separated with commas\n\
1002 "), stdout);
1004 fputs (_("\n\
1005 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
1006 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
1007 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
1008 will zero pad the number. Optional negative values (%-10f) will left align.\n\
1009 Optional precision (%.1f) will override the input determined precision.\n\
1010 "), stdout);
1012 printf (_("\n\
1013 Exit status is 0 if all input numbers were successfully converted.\n\
1014 By default, %s will stop at the first conversion error with exit status 2.\n\
1015 With --invalid='fail' a warning is printed for each conversion error\n\
1016 and the exit status is 2. With --invalid='warn' each conversion error is\n\
1017 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
1018 errors are not diagnosed and the exit status is 0.\n\
1019 "), program_name);
1021 printf (_("\n\
1022 Examples:\n\
1023 $ %s --to=si 1000\n\
1024 -> \"1.0K\"\n\
1025 $ %s --to=iec 2048\n\
1026 -> \"2.0K\"\n\
1027 $ %s --to=iec-i 4096\n\
1028 -> \"4.0Ki\"\n\
1029 $ echo 1K | %s --from=si\n\
1030 -> \"1000\"\n\
1031 $ echo 1K | %s --from=iec\n\
1032 -> \"1024\"\n\
1033 $ df -B1 | %s --header --field 2-4 --to=si\n\
1034 $ ls -l | %s --header --field 5 --to=iec\n\
1035 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1036 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1037 program_name, program_name, program_name,
1038 program_name, program_name, program_name,
1039 program_name, program_name, program_name);
1040 emit_ancillary_info (PROGRAM_NAME);
1042 exit (status);
1045 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1046 1. padding (e.g. %20f)
1047 2. alignment (e.g. %-20f)
1048 3. grouping (e.g. %'f)
1050 Only a limited subset of printf(3) syntax is supported.
1052 TODO:
1053 support %e %g etc. rather than just %f
1055 NOTES:
1056 1. This function sets the global variables:
1057 padding_width, padding_alignment, grouping,
1058 format_str_prefix, format_str_suffix
1059 2. The function aborts on any errors. */
1060 static void
1061 parse_format_string (char const *fmt)
1063 size_t i;
1064 size_t prefix_len = 0;
1065 size_t suffix_pos;
1066 long int pad = 0;
1067 char *endptr = NULL;
1068 bool zero_padding = false;
1070 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
1072 if (!fmt[i])
1073 die (EXIT_FAILURE, 0,
1074 _("format %s has no %% directive"), quote (fmt));
1075 prefix_len++;
1078 i++;
1079 while (true)
1081 size_t skip = strspn (fmt + i, " ");
1082 i += skip;
1083 if (fmt[i] == '\'')
1085 grouping = 1;
1086 i++;
1088 else if (fmt[i] == '0')
1090 zero_padding = true;
1091 i++;
1093 else if (! skip)
1094 break;
1097 errno = 0;
1098 pad = strtol (fmt + i, &endptr, 10);
1099 if (errno == ERANGE || pad < -LONG_MAX)
1100 die (EXIT_FAILURE, 0,
1101 _("invalid format %s (width overflow)"), quote (fmt));
1103 if (endptr != (fmt + i) && pad != 0)
1105 if (debug && padding_width && !(zero_padding && pad > 0))
1106 error (0, 0, _("--format padding overriding --padding"));
1108 if (pad < 0)
1110 padding_alignment = MBS_ALIGN_LEFT;
1111 padding_width = -pad;
1113 else
1115 if (zero_padding)
1116 zero_padding_width = pad;
1117 else
1118 padding_width = pad;
1122 i = endptr - fmt;
1124 if (fmt[i] == '\0')
1125 die (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
1127 if (fmt[i] == '.')
1129 i++;
1130 errno = 0;
1131 user_precision = strtol (fmt + i, &endptr, 10);
1132 if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
1133 || isblank (fmt[i]) || fmt[i] == '+')
1135 /* Note we disallow negative user_precision to be
1136 consistent with printf(1). POSIX states that
1137 negative precision is only supported (and ignored)
1138 when used with '.*f'. glibc at least will malform
1139 output when passed a direct negative precision. */
1140 die (EXIT_FAILURE, 0,
1141 _("invalid precision in format %s"), quote (fmt));
1143 i = endptr - fmt;
1146 if (fmt[i] != 'f')
1147 die (EXIT_FAILURE, 0, _("invalid format %s,"
1148 " directive must be %%[0]['][-][N][.][N]f"),
1149 quote (fmt));
1150 i++;
1151 suffix_pos = i;
1153 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
1154 if (fmt[i] == '%' && fmt[i + 1] != '%')
1155 die (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
1156 quote (fmt));
1158 if (prefix_len)
1159 format_str_prefix = ximemdup0 (fmt, prefix_len);
1160 if (fmt[suffix_pos] != '\0')
1161 format_str_suffix = xstrdup (fmt + suffix_pos);
1163 devmsg ("format String:\n input: %s\n grouping: %s\n"
1164 " padding width: %ld\n alignment: %s\n"
1165 " prefix: %s\n suffix: %s\n",
1166 quote_n (0, fmt), (grouping) ? "yes" : "no",
1167 padding_width,
1168 (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
1169 quote_n (1, format_str_prefix ? format_str_prefix : ""),
1170 quote_n (2, format_str_suffix ? format_str_suffix : ""));
1173 /* Parse a numeric value (with optional suffix) from a string.
1174 Returns a long double value, with input precision.
1176 If there's an error converting the string to value - exits with
1177 an error.
1179 If there are any trailing characters after the number
1180 (besides a valid suffix) - exits with an error. */
1181 static enum simple_strtod_error
1182 parse_human_number (char const *str, long double /*output */ *value,
1183 size_t *precision)
1185 char *ptr = NULL;
1187 enum simple_strtod_error e =
1188 simple_strtod_human (str, &ptr, value, precision, scale_from);
1189 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1191 simple_strtod_fatal (e, str);
1192 return e;
1195 if (ptr && *ptr != '\0')
1197 if (inval_style != inval_ignore)
1198 error (conv_exit_code, 0, _("invalid suffix in input %s: %s"),
1199 quote_n (0, str), quote_n (1, ptr));
1200 e = SSE_INVALID_SUFFIX;
1202 return e;
1206 /* Print the given VAL, using the requested representation.
1207 The number is printed to STDOUT, with padding and alignment. */
1208 static int
1209 prepare_padded_number (const long double val, size_t precision)
1211 /* Generate Output. */
1212 char buf[128];
1214 size_t precision_used = user_precision == -1 ? precision : user_precision;
1216 /* Can't reliably print too-large values without auto-scaling. */
1217 unsigned int x;
1218 expld (val, 10, &x);
1220 if (scale_to == scale_none
1221 && x + precision_used > MAX_UNSCALED_DIGITS)
1223 if (inval_style != inval_ignore)
1225 if (precision_used)
1226 error (conv_exit_code, 0,
1227 _("value/precision too large to be printed: '%Lg/%"PRIuMAX"'"
1228 " (consider using --to)"), val, (uintmax_t)precision_used);
1229 else
1230 error (conv_exit_code, 0,
1231 _("value too large to be printed: '%Lg'"
1232 " (consider using --to)"), val);
1234 return 0;
1237 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1239 if (inval_style != inval_ignore)
1240 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1241 " (cannot handle values > 999Q)"), val);
1242 return 0;
1245 double_to_human (val, precision_used, buf, sizeof (buf),
1246 scale_to, grouping, round_style);
1247 if (suffix)
1248 strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
1250 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1251 val, quote (buf));
1253 if (padding_width && strlen (buf) < padding_width)
1255 size_t w = padding_width;
1256 mbsalign (buf, padding_buffer, padding_buffer_size, &w,
1257 padding_alignment, MBA_UNIBYTE_ONLY);
1259 devmsg (" After padding: %s\n", quote (padding_buffer));
1261 else
1263 setup_padding_buffer (strlen (buf) + 1);
1264 strcpy (padding_buffer, buf);
1267 return 1;
1270 static void
1271 print_padded_number (void)
1273 if (format_str_prefix)
1274 fputs (format_str_prefix, stdout);
1276 fputs (padding_buffer, stdout);
1278 if (format_str_suffix)
1279 fputs (format_str_suffix, stdout);
1282 /* Converts the TEXT number string to the requested representation,
1283 and handles automatic suffix addition. */
1284 static int
1285 process_suffixed_number (char *text, long double *result,
1286 size_t *precision, long int field)
1288 if (suffix && strlen (text) > strlen (suffix))
1290 char *possible_suffix = text + strlen (text) - strlen (suffix);
1292 if (STREQ (suffix, possible_suffix))
1294 /* trim suffix, ONLY if it's at the end of the text. */
1295 *possible_suffix = '\0';
1296 devmsg ("trimming suffix %s\n", quote (suffix));
1298 else
1299 devmsg ("no valid suffix found\n");
1302 /* Skip white space - always. */
1303 char *p = text;
1304 while (*p && isblank (to_uchar (*p)))
1305 ++p;
1306 const unsigned int skip_count = text - p;
1308 /* setup auto-padding. */
1309 if (auto_padding)
1311 if (skip_count > 0 || field > 1)
1313 padding_width = strlen (text);
1314 setup_padding_buffer (padding_width);
1316 else
1318 padding_width = 0;
1320 devmsg ("setting Auto-Padding to %ld characters\n", padding_width);
1323 long double val = 0;
1324 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1325 if (e == SSE_OK_PRECISION_LOSS && debug)
1326 error (0, 0, _("large input value %s: possible precision loss"),
1327 quote (p));
1329 if (from_unit_size != 1 || to_unit_size != 1)
1330 val = (val * from_unit_size) / to_unit_size;
1332 *result = val;
1334 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1337 /* Return a pointer to the beginning of the next field in line.
1338 The line pointer is moved to the end of the next field. */
1339 static char*
1340 next_field (char **line)
1342 char *field_start = *line;
1343 char *field_end = field_start;
1345 if (delimiter != DELIMITER_DEFAULT)
1347 if (*field_start != delimiter)
1349 while (*field_end && *field_end != delimiter)
1350 ++field_end;
1352 /* else empty field */
1354 else
1356 /* keep any space prefix in the returned field */
1357 while (*field_end && field_sep (*field_end))
1358 ++field_end;
1360 while (*field_end && ! field_sep (*field_end))
1361 ++field_end;
1364 *line = field_end;
1365 return field_start;
1368 ATTRIBUTE_PURE
1369 static bool
1370 include_field (uintmax_t field)
1372 struct field_range_pair *p = frp;
1373 if (!p)
1374 return field == 1;
1376 while (p->lo != UINTMAX_MAX)
1378 if (p->lo <= field && p->hi >= field)
1379 return true;
1380 ++p;
1382 return false;
1385 /* Convert and output the given field. If it is not included in the set
1386 of fields to process just output the original */
1387 static bool
1388 process_field (char *text, uintmax_t field)
1390 long double val = 0;
1391 size_t precision = 0;
1392 bool valid_number = true;
1394 if (include_field (field))
1396 valid_number =
1397 process_suffixed_number (text, &val, &precision, field);
1399 if (valid_number)
1400 valid_number = prepare_padded_number (val, precision);
1402 if (valid_number)
1403 print_padded_number ();
1404 else
1405 fputs (text, stdout);
1407 else
1408 fputs (text, stdout);
1410 return valid_number;
1413 /* Convert number in a given line of text.
1414 NEWLINE specifies whether to output a '\n' for this "line". */
1415 static int
1416 process_line (char *line, bool newline)
1418 char *next;
1419 uintmax_t field = 0;
1420 bool valid_number = true;
1422 while (true) {
1423 ++field;
1424 next = next_field (&line);
1426 if (*line != '\0')
1428 /* nul terminate the current field string and process */
1429 *line = '\0';
1431 if (! process_field (next, field))
1432 valid_number = false;
1434 fputc ((delimiter == DELIMITER_DEFAULT) ?
1435 ' ' : delimiter, stdout);
1436 ++line;
1438 else
1440 /* end of the line, process the last field and finish */
1441 if (! process_field (next, field))
1442 valid_number = false;
1444 break;
1448 if (newline)
1449 putchar (line_delim);
1451 return valid_number;
1455 main (int argc, char **argv)
1457 int valid_numbers = 1;
1458 bool locale_ok;
1460 initialize_main (&argc, &argv);
1461 set_program_name (argv[0]);
1462 locale_ok = !!setlocale (LC_ALL, "");
1463 bindtextdomain (PACKAGE, LOCALEDIR);
1464 textdomain (PACKAGE);
1466 #if HAVE_FPSETPREC
1467 /* Enabled extended precision if needed. */
1468 fpsetprec (FP_PE);
1469 #endif
1471 decimal_point = nl_langinfo (RADIXCHAR);
1472 if (decimal_point == NULL || strlen (decimal_point) == 0)
1473 decimal_point = ".";
1474 decimal_point_length = strlen (decimal_point);
1476 initialize_exit_failure (TIMEOUT_FAILURE);
1477 atexit (close_stdout);
1479 while (true)
1481 int c = getopt_long (argc, argv, "d:z", longopts, NULL);
1483 if (c == -1)
1484 break;
1486 switch (c)
1488 case FROM_OPTION:
1489 scale_from = XARGMATCH ("--from", optarg,
1490 scale_from_args, scale_from_types);
1491 break;
1493 case FROM_UNIT_OPTION:
1494 from_unit_size = unit_to_umax (optarg);
1495 break;
1497 case TO_OPTION:
1498 scale_to =
1499 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1500 break;
1502 case TO_UNIT_OPTION:
1503 to_unit_size = unit_to_umax (optarg);
1504 break;
1506 case ROUND_OPTION:
1507 round_style = XARGMATCH ("--round", optarg, round_args, round_types);
1508 break;
1510 case GROUPING_OPTION:
1511 grouping = 1;
1512 break;
1514 case PADDING_OPTION:
1515 if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
1516 || padding_width == 0 || padding_width < -LONG_MAX)
1517 die (EXIT_FAILURE, 0, _("invalid padding value %s"),
1518 quote (optarg));
1519 if (padding_width < 0)
1521 padding_alignment = MBS_ALIGN_LEFT;
1522 padding_width = -padding_width;
1524 /* TODO: We probably want to apply a specific --padding
1525 to --header lines too. */
1526 break;
1528 case FIELD_OPTION:
1529 if (n_frp)
1530 die (EXIT_FAILURE, 0, _("multiple field specifications"));
1531 set_fields (optarg, SETFLD_ALLOW_DASH);
1532 break;
1534 case 'd':
1535 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1536 if (optarg[0] != '\0' && optarg[1] != '\0')
1537 die (EXIT_FAILURE, 0,
1538 _("the delimiter must be a single character"));
1539 delimiter = optarg[0];
1540 break;
1542 case 'z':
1543 line_delim = '\0';
1544 break;
1546 case SUFFIX_OPTION:
1547 suffix = optarg;
1548 break;
1550 case DEBUG_OPTION:
1551 debug = true;
1552 break;
1554 case DEV_DEBUG_OPTION:
1555 dev_debug = true;
1556 debug = true;
1557 break;
1559 case HEADER_OPTION:
1560 if (optarg)
1562 if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
1563 || header == 0)
1564 die (EXIT_FAILURE, 0, _("invalid header value %s"),
1565 quote (optarg));
1567 else
1569 header = 1;
1571 break;
1573 case FORMAT_OPTION:
1574 format_str = optarg;
1575 break;
1577 case INVALID_OPTION:
1578 inval_style = XARGMATCH ("--invalid", optarg,
1579 inval_args, inval_types);
1580 break;
1582 case_GETOPT_HELP_CHAR;
1583 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1585 default:
1586 usage (EXIT_FAILURE);
1590 if (format_str != NULL && grouping)
1591 die (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1593 if (debug && ! locale_ok)
1594 error (0, 0, _("failed to set locale"));
1596 /* Warn about no-op. */
1597 if (debug && scale_from == scale_none && scale_to == scale_none
1598 && !grouping && (padding_width == 0) && (format_str == NULL))
1599 error (0, 0, _("no conversion option specified"));
1601 if (format_str)
1602 parse_format_string (format_str);
1604 if (grouping)
1606 if (scale_to != scale_none)
1607 die (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1608 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1609 error (0, 0, _("grouping has no effect in this locale"));
1613 setup_padding_buffer (padding_width);
1614 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1616 if (inval_style != inval_abort)
1617 conv_exit_code = 0;
1619 if (argc > optind)
1621 if (debug && header)
1622 error (0, 0, _("--header ignored with command-line input"));
1624 for (; optind < argc; optind++)
1625 valid_numbers &= process_line (argv[optind], true);
1627 else
1629 char *line = NULL;
1630 size_t line_allocated = 0;
1631 ssize_t len;
1633 while (header-- && getdelim (&line, &line_allocated,
1634 line_delim, stdin) > 0)
1635 fputs (line, stdout);
1637 while ((len = getdelim (&line, &line_allocated,
1638 line_delim, stdin)) > 0)
1640 bool newline = line[len - 1] == line_delim;
1641 if (newline)
1642 line[len - 1] = '\0';
1643 valid_numbers &= process_line (line, newline);
1646 if (ferror (stdin))
1647 error (0, errno, _("error reading input"));
1650 if (debug && !valid_numbers)
1651 error (0, 0, _("failed to convert some of the input numbers"));
1653 int exit_status = EXIT_SUCCESS;
1654 if (!valid_numbers
1655 && inval_style != inval_warn && inval_style != inval_ignore)
1656 exit_status = EXIT_CONVERSION_WARNINGS;
1658 main_exit (exit_status);