1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include <sys/types.h>
31 #include "set-fields.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "numfmt"
40 #define AUTHORS proper_name ("Assaf Gordon")
42 /* Exit code when some numbers fail to convert. */
43 enum { EXIT_CONVERSION_WARNINGS
= 2 };
47 FROM_OPTION
= CHAR_MAX
+ 1,
65 scale_none
, /* the default: no scaling. */
66 scale_auto
, /* --from only. */
69 scale_IEC_I
/* 'i' suffix is required. */
72 static char const *const scale_from_args
[] =
74 "none", "auto", "si", "iec", "iec-i", nullptr
77 static enum scale_type
const scale_from_types
[] =
79 scale_none
, scale_auto
, scale_SI
, scale_IEC
, scale_IEC_I
82 static char const *const scale_to_args
[] =
84 "none", "si", "iec", "iec-i", nullptr
87 static enum scale_type
const scale_to_types
[] =
89 scale_none
, scale_SI
, scale_IEC
, scale_IEC_I
102 static char const *const round_args
[] =
104 "up", "down", "from-zero", "towards-zero", "nearest", nullptr
107 static enum round_type
const round_types
[] =
109 round_ceiling
, round_floor
, round_from_zero
, round_to_zero
, round_nearest
121 static char const *const inval_args
[] =
123 "abort", "fail", "warn", "ignore", nullptr
126 static enum inval_type
const inval_types
[] =
128 inval_abort
, inval_fail
, inval_warn
, inval_ignore
131 static struct option
const longopts
[] =
133 {"from", required_argument
, nullptr, FROM_OPTION
},
134 {"from-unit", required_argument
, nullptr, FROM_UNIT_OPTION
},
135 {"to", required_argument
, nullptr, TO_OPTION
},
136 {"to-unit", required_argument
, nullptr, TO_UNIT_OPTION
},
137 {"round", required_argument
, nullptr, ROUND_OPTION
},
138 {"padding", required_argument
, nullptr, PADDING_OPTION
},
139 {"suffix", required_argument
, nullptr, SUFFIX_OPTION
},
140 {"grouping", no_argument
, nullptr, GROUPING_OPTION
},
141 {"delimiter", required_argument
, nullptr, 'd'},
142 {"field", required_argument
, nullptr, FIELD_OPTION
},
143 {"debug", no_argument
, nullptr, DEBUG_OPTION
},
144 {"-debug", no_argument
, nullptr, DEV_DEBUG_OPTION
},
145 {"header", optional_argument
, nullptr, HEADER_OPTION
},
146 {"format", required_argument
, nullptr, FORMAT_OPTION
},
147 {"invalid", required_argument
, nullptr, INVALID_OPTION
},
148 {"zero-terminated", no_argument
, nullptr, 'z'},
149 {GETOPT_HELP_OPTION_DECL
},
150 {GETOPT_VERSION_OPTION_DECL
},
151 {nullptr, 0, nullptr, 0}
154 /* If delimiter has this value, blanks separate fields. */
155 enum { DELIMITER_DEFAULT
= CHAR_MAX
+ 1 };
157 /* Maximum number of digits we can safely handle
158 without precision loss, if scaling is 'none'. */
159 enum { MAX_UNSCALED_DIGITS
= LDBL_DIG
};
161 /* Maximum number of digits we can work with.
162 This is equivalent to 999Q.
163 NOTE: 'long double' can handle more than that, but there's
164 no official suffix assigned beyond Quetta (1000^10). */
165 enum { MAX_ACCEPTABLE_DIGITS
= 33 };
167 static enum scale_type scale_from
= scale_none
;
168 static enum scale_type scale_to
= scale_none
;
169 static enum round_type round_style
= round_from_zero
;
170 static enum inval_type inval_style
= inval_abort
;
171 static char const *suffix
= nullptr;
172 static uintmax_t from_unit_size
= 1;
173 static uintmax_t to_unit_size
= 1;
174 static int grouping
= 0;
175 static char *padding_buffer
= nullptr;
176 static size_t padding_buffer_size
= 0;
177 static long int padding_width
= 0;
178 static long int zero_padding_width
= 0;
179 static long int user_precision
= -1;
180 static char const *format_str
= nullptr;
181 static char *format_str_prefix
= nullptr;
182 static char *format_str_suffix
= nullptr;
184 /* By default, any conversion error will terminate the program. */
185 static int conv_exit_code
= EXIT_CONVERSION_WARNINGS
;
188 /* auto-pad each line based on skipped whitespace. */
189 static int auto_padding
= 0;
190 static mbs_align_t padding_alignment
= MBS_ALIGN_RIGHT
;
192 /* field delimiter */
193 static int delimiter
= DELIMITER_DEFAULT
;
195 /* line delimiter. */
196 static unsigned char line_delim
= '\n';
198 /* if non-zero, the first 'header' lines from STDIN are skipped. */
199 static uintmax_t header
= 0;
201 /* Debug for users: print warnings to STDERR about possible
202 error (similar to sort's debug). */
205 /* will be set according to the current locale. */
206 static char const *decimal_point
;
207 static int decimal_point_length
;
209 /* debugging for developers. Enables devmsg(). */
210 static bool dev_debug
= false;
214 default_scale_base (enum scale_type scale
)
230 static char const zero_and_valid_suffixes
[] = "0KMGTPEZYRQ";
231 static char const *valid_suffixes
= 1 + zero_and_valid_suffixes
;
234 valid_suffix (const char suf
)
236 return strchr (valid_suffixes
, suf
) != nullptr;
240 suffix_power (const char suf
)
244 case 'K': /* kilo or kibi. */
247 case 'M': /* mega or mebi. */
250 case 'G': /* giga or gibi. */
253 case 'T': /* tera or tebi. */
256 case 'P': /* peta or pebi. */
259 case 'E': /* exa or exbi. */
262 case 'Z': /* zetta or 2**70. */
265 case 'Y': /* yotta or 2**80. */
268 case 'R': /* ronna or 2**90. */
271 case 'Q': /* quetta or 2**100. */
274 default: /* should never happen. assert? */
279 static inline char const *
280 suffix_power_char (int power
)
322 /* Similar to 'powl(3)' but without requiring 'libm'. */
324 powerld (long double base
, int x
)
326 long double result
= base
;
328 return 1; /* note for test coverage: this is never
329 reached, as 'powerld' won't be called if
330 there's no suffix, hence, no "power". */
332 /* TODO: check for overflow, inf? */
338 /* Similar to 'fabs(3)' but without requiring 'libm'. */
339 static inline long double
340 absld (long double val
)
342 return val
< 0 ? -val
: val
;
345 /* Scale down 'val', returns 'updated val' and 'x', such that
346 val*base^X = original val
347 Similar to "frexpl(3)" but without requiring 'libm',
348 allowing only integer scale, limited functionality and error checking. */
350 expld (long double val
, int base
, int /*output */ *x
)
354 if (val
>= -LDBL_MAX
&& val
<= LDBL_MAX
)
356 while (absld (val
) >= base
)
367 /* EXTREMELY limited 'ceil' - without 'libm'.
368 Assumes values that fit in intmax_t. */
369 static inline intmax_t
370 simple_round_ceiling (long double val
)
372 intmax_t intval
= val
;
378 /* EXTREMELY limited 'floor' - without 'libm'.
379 Assumes values that fit in intmax_t. */
380 static inline intmax_t
381 simple_round_floor (long double val
)
383 return -simple_round_ceiling (-val
);
386 /* EXTREMELY limited 'round away from zero'.
387 Assumes values that fit in intmax_t. */
388 static inline intmax_t
389 simple_round_from_zero (long double val
)
391 return val
< 0 ? simple_round_floor (val
) : simple_round_ceiling (val
);
394 /* EXTREMELY limited 'round away to zero'.
395 Assumes values that fit in intmax_t. */
396 static inline intmax_t
397 simple_round_to_zero (long double val
)
402 /* EXTREMELY limited 'round' - without 'libm'.
403 Assumes values that fit in intmax_t. */
404 static inline intmax_t
405 simple_round_nearest (long double val
)
407 return val
< 0 ? val
- 0.5 : val
+ 0.5;
411 static inline long double
412 simple_round (long double val
, enum round_type t
)
415 intmax_t intmax_mul
= val
/ INTMAX_MAX
;
416 val
-= (long double) INTMAX_MAX
* intmax_mul
;
421 rval
= simple_round_ceiling (val
);
425 rval
= simple_round_floor (val
);
428 case round_from_zero
:
429 rval
= simple_round_from_zero (val
);
433 rval
= simple_round_to_zero (val
);
437 rval
= simple_round_nearest (val
);
441 /* to silence the compiler - this should never happen. */
445 return (long double) INTMAX_MAX
* intmax_mul
+ rval
;
448 enum simple_strtod_error
451 SSE_OK_PRECISION_LOSS
,
455 /* the following are returned by 'simple_strtod_human'. */
456 SSE_VALID_BUT_FORBIDDEN_SUFFIX
,
461 /* Read an *integer* INPUT_STR,
462 but return the integer value in a 'long double' VALUE
463 hence, no UINTMAX_MAX limitation.
464 NEGATIVE is updated, and is stored separately from the VALUE
465 so that signbit() isn't required to determine the sign of -0..
466 ENDPTR is required (unlike strtod) and is used to store a pointer
467 to the character after the last character used in the conversion.
469 Note locale'd grouping is not supported,
470 nor is skipping of white-space supported.
473 SSE_OK - valid number.
474 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
475 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
476 SSE_INVALID_NUMBER - if no digits were found. */
477 static enum simple_strtod_error
478 simple_strtod_int (char const *input_str
,
479 char **endptr
, long double *value
, bool *negative
)
481 enum simple_strtod_error e
= SSE_OK
;
485 bool found_digit
= false;
487 if (*input_str
== '-')
495 *endptr
= (char *) input_str
;
496 while (c_isdigit (**endptr
))
498 int digit
= (**endptr
) - '0';
505 if (digits
> MAX_UNSCALED_DIGITS
)
506 e
= SSE_OK_PRECISION_LOSS
;
508 if (digits
> MAX_ACCEPTABLE_DIGITS
)
517 && ! STREQ_LEN (*endptr
, decimal_point
, decimal_point_length
))
518 return SSE_INVALID_NUMBER
;
528 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
529 and return the value in a 'long double' VALUE.
530 ENDPTR is required (unlike strtod) and is used to store a pointer
531 to the character after the last character used in the conversion.
532 PRECISION is optional and used to indicate fractions are present.
534 Note locale'd grouping is not supported,
535 nor is skipping of white-space supported.
538 SSE_OK - valid number.
539 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
540 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
541 SSE_INVALID_NUMBER - if no digits were found. */
542 static enum simple_strtod_error
543 simple_strtod_float (char const *input_str
,
549 enum simple_strtod_error e
= SSE_OK
;
554 /* TODO: accept locale'd grouped values for the integral part. */
555 e
= simple_strtod_int (input_str
, endptr
, value
, &negative
);
556 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
559 /* optional decimal point + fraction. */
560 if (STREQ_LEN (*endptr
, decimal_point
, decimal_point_length
))
563 long double val_frac
= 0;
566 (*endptr
) += decimal_point_length
;
567 enum simple_strtod_error e2
=
568 simple_strtod_int (*endptr
, &ptr2
, &val_frac
, &neg_frac
);
569 if (e2
!= SSE_OK
&& e2
!= SSE_OK_PRECISION_LOSS
)
571 if (e2
== SSE_OK_PRECISION_LOSS
)
572 e
= e2
; /* propagate warning. */
574 return SSE_INVALID_NUMBER
;
576 /* number of digits in the fractions. */
577 size_t exponent
= ptr2
- *endptr
;
579 val_frac
= ((long double) val_frac
) / powerld (10, exponent
);
581 /* TODO: detect loss of precision (only really 18 digits
582 of precision across all digits (before and after '.')). */
592 *precision
= exponent
;
599 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
600 and return the value in a 'long double' VALUE,
601 with the precision of the input returned in PRECISION.
602 ENDPTR is required (unlike strtod) and is used to store a pointer
603 to the character after the last character used in the conversion.
604 ALLOWED_SCALING determines the scaling supported.
607 support locale'd grouping
608 accept scentific and hex floats (probably use strtold directly)
611 SSE_OK - valid number.
612 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
613 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
614 SSE_INVALID_NUMBER - if no digits were found.
615 SSE_VALID_BUT_FORBIDDEN_SUFFIX
617 SSE_MISSING_I_SUFFIX */
618 static enum simple_strtod_error
619 simple_strtod_human (char const *input_str
,
620 char **endptr
, long double *value
, size_t *precision
,
621 enum scale_type allowed_scaling
)
624 /* 'scale_auto' is checked below. */
625 int scale_base
= default_scale_base (allowed_scaling
);
627 devmsg ("simple_strtod_human:\n input string: %s\n"
628 " locale decimal-point: %s\n"
629 " MAX_UNSCALED_DIGITS: %d\n",
630 quote_n (0, input_str
),
631 quote_n (1, decimal_point
),
632 MAX_UNSCALED_DIGITS
);
634 enum simple_strtod_error e
=
635 simple_strtod_float (input_str
, endptr
, value
, precision
);
636 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
639 devmsg (" parsed numeric value: %Lf\n"
640 " input precision = %d\n", *value
, (int)*precision
);
642 if (**endptr
!= '\0')
644 /* process suffix. */
646 /* Skip any blanks between the number and suffix. */
647 while (isblank (to_uchar (**endptr
)))
650 if (!valid_suffix (**endptr
))
651 return SSE_INVALID_SUFFIX
;
653 if (allowed_scaling
== scale_none
)
654 return SSE_VALID_BUT_FORBIDDEN_SUFFIX
;
656 power
= suffix_power (**endptr
);
657 (*endptr
)++; /* skip first suffix character. */
659 if (allowed_scaling
== scale_auto
&& **endptr
== 'i')
661 /* auto-scaling enabled, and the first suffix character
662 is followed by an 'i' (e.g. Ki, Mi, Gi). */
664 (*endptr
)++; /* skip second ('i') suffix character. */
665 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
669 *precision
= 0; /* Reset, to select precision based on scale. */
672 if (allowed_scaling
== scale_IEC_I
)
677 return SSE_MISSING_I_SUFFIX
;
680 long double multiplier
= powerld (scale_base
, power
);
682 devmsg (" suffix power=%d^%d = %Lf\n", scale_base
, power
, multiplier
);
684 /* TODO: detect loss of precision and overflows. */
685 (*value
) = (*value
) * multiplier
;
687 devmsg (" returning value: %Lf (%LG)\n", *value
, *value
);
694 simple_strtod_fatal (enum simple_strtod_error err
, char const *input_str
)
696 char const *msgid
= nullptr;
700 case SSE_OK_PRECISION_LOSS
:
702 /* should never happen - this function isn't called when OK. */
706 msgid
= N_("value too large to be converted: %s");
709 case SSE_INVALID_NUMBER
:
710 msgid
= N_("invalid number: %s");
713 case SSE_VALID_BUT_FORBIDDEN_SUFFIX
:
714 msgid
= N_("rejecting suffix in input: %s (consider using --from)");
717 case SSE_INVALID_SUFFIX
:
718 msgid
= N_("invalid suffix in input: %s");
721 case SSE_MISSING_I_SUFFIX
:
722 msgid
= N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
727 if (inval_style
!= inval_ignore
)
728 error (conv_exit_code
, 0, gettext (msgid
), quote (input_str
));
731 /* Convert VAL to a human format string in BUF. */
733 double_to_human (long double val
, int precision
,
734 char *buf
, size_t buf_size
,
735 enum scale_type scale
, int group
, enum round_type round
)
739 static_assert ((INT_BUFSIZE_BOUND (zero_padding_width
)
740 + INT_BUFSIZE_BOUND (precision
)
741 + 10 /* for %.Lf etc. */)
750 if (zero_padding_width
)
751 pfmt
+= snprintf (pfmt
, sizeof (fmt
) - 2, "0%ld", zero_padding_width
);
753 devmsg ("double_to_human:\n");
755 if (scale
== scale_none
)
757 val
*= powerld (10, precision
);
758 val
= simple_round (val
, round
);
759 val
/= powerld (10, precision
);
762 " no scaling, returning (grouped) value: %'.*Lf\n" :
763 " no scaling, returning value: %.*Lf\n", precision
, val
);
765 stpcpy (pfmt
, ".*Lf");
767 num_size
= snprintf (buf
, buf_size
, fmt
, precision
, val
);
768 if (num_size
< 0 || num_size
>= (int) buf_size
)
769 error (EXIT_FAILURE
, 0,
770 _("failed to prepare value '%Lf' for printing"), val
);
774 /* Scaling requested by user. */
775 double scale_base
= default_scale_base (scale
);
777 /* Normalize val to scale. */
779 val
= expld (val
, scale_base
, &power
);
780 devmsg (" scaled value to %Lf * %0.f ^ %d\n", val
, scale_base
, power
);
782 /* Perform rounding. */
783 int power_adjust
= 0;
784 if (user_precision
!= -1)
785 power_adjust
= MIN (power
* 3, user_precision
);
786 else if (absld (val
) < 10)
788 /* for values less than 10, we allow one decimal-point digit,
789 so adjust before rounding. */
793 val
*= powerld (10, power_adjust
);
794 val
= simple_round (val
, round
);
795 val
/= powerld (10, power_adjust
);
797 /* two special cases after rounding:
798 1. a "999.99" can turn into 1000 - so scale down
799 2. a "9.99" can turn into 10 - so don't display decimal-point. */
800 if (absld (val
) >= scale_base
)
806 /* should "7.0" be printed as "7" ?
807 if removing the ".0" is preferred, enable the fourth condition. */
808 int show_decimal_point
= (val
!= 0) && (absld (val
) < 10) && (power
> 0);
809 /* && (absld (val) > simple_round_floor (val))) */
811 devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val
, scale_base
, power
);
813 stpcpy (pfmt
, ".*Lf%s");
815 int prec
= user_precision
== -1 ? show_decimal_point
: user_precision
;
817 /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */
818 num_size
= snprintf (buf
, buf_size
- 1, fmt
, prec
, val
,
819 suffix_power_char (power
));
820 if (num_size
< 0 || num_size
>= (int) buf_size
- 1)
821 error (EXIT_FAILURE
, 0,
822 _("failed to prepare value '%Lf' for printing"), val
);
824 if (scale
== scale_IEC_I
&& power
> 0)
825 strncat (buf
, "i", buf_size
- num_size
- 1);
827 devmsg (" returning value: %s\n", quote (buf
));
832 /* Convert a string of decimal digits, N_STRING, with an optional suffix
833 to an integral value. Suffixes are handled as with --from=auto.
834 Upon successful conversion, return that value.
835 If it cannot be converted, give a diagnostic and exit. */
837 unit_to_umax (char const *n_string
)
840 char const *c_string
= n_string
;
841 char *t_string
= nullptr;
842 size_t n_len
= strlen (n_string
);
845 char const *suffixes
= valid_suffixes
;
847 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
848 if (n_len
&& ! c_isdigit (n_string
[n_len
- 1]))
850 t_string
= xmalloc (n_len
+ 2);
851 end
= t_string
+ n_len
- 1;
852 memcpy (t_string
, n_string
, n_len
);
854 if (*end
== 'i' && 2 <= n_len
&& ! c_isdigit (*(end
- 1)))
860 suffixes
= zero_and_valid_suffixes
;
866 s_err
= xstrtoumax (c_string
, &end
, 10, &n
, suffixes
);
868 if (s_err
!= LONGINT_OK
|| *end
|| n
== 0)
871 error (EXIT_FAILURE
, 0, _("invalid unit size: %s"), quote (n_string
));
881 setup_padding_buffer (size_t min_size
)
883 if (padding_buffer_size
> min_size
)
886 padding_buffer_size
= min_size
+ 1;
887 padding_buffer
= xrealloc (padding_buffer
, padding_buffer_size
);
893 if (status
!= EXIT_SUCCESS
)
898 Usage: %s [OPTION]... [NUMBER]...\n\
901 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
903 emit_mandatory_arg_note ();
905 --debug print warnings about invalid input\n\
908 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
911 --field=FIELDS replace the numbers in these input fields (default=1);\n\
915 --format=FORMAT use printf style floating-point FORMAT;\n\
916 see FORMAT below for details\n\
919 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
923 --from-unit=N specify the input unit size (instead of the default 1)\n\
926 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
927 (which means it has no effect in the C/POSIX locale)\n\
930 --header[=N] print (without converting) the first N header lines;\n\
931 N defaults to 1 if not specified\n\
934 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
935 abort (default), fail, warn, ignore\n\
938 --padding=N pad the output to N characters; positive N will\n\
939 right-align; negative N will left-align;\n\
940 padding is ignored if the output is wider than N;\n\
941 the default is to automatically pad if a whitespace\n\
945 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
946 up, down, from-zero (default), towards-zero, nearest\n\
949 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
950 SUFFIX in input numbers\n\
953 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
956 --to-unit=N the output unit size (instead of the default 1)\n\
959 -z, --zero-terminated line delimiter is NUL, not newline\n\
961 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
962 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
966 UNIT options:\n"), stdout
);
968 none no auto-scaling is done; suffixes will trigger an error\n\
971 auto accept optional single/two letter suffix:\n\
975 1Mi = 1048576,\n"), stdout
);
977 si accept optional single letter suffix:\n\
982 iec accept optional single letter suffix:\n\
987 iec-i accept optional two-letter suffix:\n\
993 FIELDS supports cut(1) style field ranges:\n\
994 N N'th field, counted from 1\n\
995 N- from N'th field, to end of line\n\
996 N-M from N'th to M'th field (inclusive)\n\
997 -M from first to M'th field (inclusive)\n\
999 Multiple fields/ranges can be separated with commas\n\
1003 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
1004 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
1005 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
1006 will zero pad the number. Optional negative values (%-10f) will left align.\n\
1007 Optional precision (%.1f) will override the input determined precision.\n\
1011 Exit status is 0 if all input numbers were successfully converted.\n\
1012 By default, %s will stop at the first conversion error with exit status 2.\n\
1013 With --invalid='fail' a warning is printed for each conversion error\n\
1014 and the exit status is 2. With --invalid='warn' each conversion error is\n\
1015 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
1016 errors are not diagnosed and the exit status is 0.\n\
1021 $ %s --to=si 1000\n\
1023 $ %s --to=iec 2048\n\
1025 $ %s --to=iec-i 4096\n\
1027 $ echo 1K | %s --from=si\n\
1029 $ echo 1K | %s --from=iec\n\
1031 $ df -B1 | %s --header --field 2-4 --to=si\n\
1032 $ ls -l | %s --header --field 5 --to=iec\n\
1033 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1034 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1035 program_name
, program_name
, program_name
,
1036 program_name
, program_name
, program_name
,
1037 program_name
, program_name
, program_name
);
1038 emit_ancillary_info (PROGRAM_NAME
);
1043 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1044 1. padding (e.g. %20f)
1045 2. alignment (e.g. %-20f)
1046 3. grouping (e.g. %'f)
1048 Only a limited subset of printf(3) syntax is supported.
1051 support %e %g etc. rather than just %f
1054 1. This function sets the global variables:
1055 padding_width, padding_alignment, grouping,
1056 format_str_prefix, format_str_suffix
1057 2. The function aborts on any errors. */
1059 parse_format_string (char const *fmt
)
1062 size_t prefix_len
= 0;
1065 char *endptr
= nullptr;
1066 bool zero_padding
= false;
1068 for (i
= 0; !(fmt
[i
] == '%' && fmt
[i
+ 1] != '%'); i
+= (fmt
[i
] == '%') + 1)
1071 error (EXIT_FAILURE
, 0,
1072 _("format %s has no %% directive"), quote (fmt
));
1079 size_t skip
= strspn (fmt
+ i
, " ");
1086 else if (fmt
[i
] == '0')
1088 zero_padding
= true;
1096 pad
= strtol (fmt
+ i
, &endptr
, 10);
1097 if (errno
== ERANGE
|| pad
< -LONG_MAX
)
1098 error (EXIT_FAILURE
, 0,
1099 _("invalid format %s (width overflow)"), quote (fmt
));
1101 if (endptr
!= (fmt
+ i
) && pad
!= 0)
1103 if (debug
&& padding_width
&& !(zero_padding
&& pad
> 0))
1104 error (0, 0, _("--format padding overriding --padding"));
1108 padding_alignment
= MBS_ALIGN_LEFT
;
1109 padding_width
= -pad
;
1114 zero_padding_width
= pad
;
1116 padding_width
= pad
;
1123 error (EXIT_FAILURE
, 0, _("format %s ends in %%"), quote (fmt
));
1129 user_precision
= strtol (fmt
+ i
, &endptr
, 10);
1130 if (errno
== ERANGE
|| user_precision
< 0 || SIZE_MAX
< user_precision
1131 || isblank (fmt
[i
]) || fmt
[i
] == '+')
1133 /* Note we disallow negative user_precision to be
1134 consistent with printf(1). POSIX states that
1135 negative precision is only supported (and ignored)
1136 when used with '.*f'. glibc at least will malform
1137 output when passed a direct negative precision. */
1138 error (EXIT_FAILURE
, 0,
1139 _("invalid precision in format %s"), quote (fmt
));
1145 error (EXIT_FAILURE
, 0, _("invalid format %s,"
1146 " directive must be %%[0]['][-][N][.][N]f"),
1151 for (; fmt
[i
] != '\0'; i
+= (fmt
[i
] == '%') + 1)
1152 if (fmt
[i
] == '%' && fmt
[i
+ 1] != '%')
1153 error (EXIT_FAILURE
, 0, _("format %s has too many %% directives"),
1157 format_str_prefix
= ximemdup0 (fmt
, prefix_len
);
1158 if (fmt
[suffix_pos
] != '\0')
1159 format_str_suffix
= xstrdup (fmt
+ suffix_pos
);
1161 devmsg ("format String:\n input: %s\n grouping: %s\n"
1162 " padding width: %ld\n alignment: %s\n"
1163 " prefix: %s\n suffix: %s\n",
1164 quote_n (0, fmt
), (grouping
) ? "yes" : "no",
1166 (padding_alignment
== MBS_ALIGN_LEFT
) ? "Left" : "Right",
1167 quote_n (1, format_str_prefix
? format_str_prefix
: ""),
1168 quote_n (2, format_str_suffix
? format_str_suffix
: ""));
1171 /* Parse a numeric value (with optional suffix) from a string.
1172 Returns a long double value, with input precision.
1174 If there's an error converting the string to value - exits with
1177 If there are any trailing characters after the number
1178 (besides a valid suffix) - exits with an error. */
1179 static enum simple_strtod_error
1180 parse_human_number (char const *str
, long double /*output */ *value
,
1183 char *ptr
= nullptr;
1185 enum simple_strtod_error e
=
1186 simple_strtod_human (str
, &ptr
, value
, precision
, scale_from
);
1187 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
1189 simple_strtod_fatal (e
, str
);
1193 if (ptr
&& *ptr
!= '\0')
1195 if (inval_style
!= inval_ignore
)
1196 error (conv_exit_code
, 0, _("invalid suffix in input %s: %s"),
1197 quote_n (0, str
), quote_n (1, ptr
));
1198 e
= SSE_INVALID_SUFFIX
;
1204 /* Print the given VAL, using the requested representation.
1205 The number is printed to STDOUT, with padding and alignment. */
1207 prepare_padded_number (const long double val
, size_t precision
)
1209 /* Generate Output. */
1212 size_t precision_used
= user_precision
== -1 ? precision
: user_precision
;
1214 /* Can't reliably print too-large values without auto-scaling. */
1216 expld (val
, 10, &x
);
1218 if (scale_to
== scale_none
1219 && x
+ precision_used
> MAX_UNSCALED_DIGITS
)
1221 if (inval_style
!= inval_ignore
)
1224 error (conv_exit_code
, 0,
1225 _("value/precision too large to be printed: '%Lg/%"PRIuMAX
"'"
1226 " (consider using --to)"), val
, (uintmax_t)precision_used
);
1228 error (conv_exit_code
, 0,
1229 _("value too large to be printed: '%Lg'"
1230 " (consider using --to)"), val
);
1235 if (x
> MAX_ACCEPTABLE_DIGITS
- 1)
1237 if (inval_style
!= inval_ignore
)
1238 error (conv_exit_code
, 0, _("value too large to be printed: '%Lg'"
1239 " (cannot handle values > 999Q)"), val
);
1243 double_to_human (val
, precision_used
, buf
, sizeof (buf
),
1244 scale_to
, grouping
, round_style
);
1246 strncat (buf
, suffix
, sizeof (buf
) - strlen (buf
) -1);
1248 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1251 if (padding_width
&& strlen (buf
) < padding_width
)
1253 size_t w
= padding_width
;
1254 mbsalign (buf
, padding_buffer
, padding_buffer_size
, &w
,
1255 padding_alignment
, MBA_UNIBYTE_ONLY
);
1257 devmsg (" After padding: %s\n", quote (padding_buffer
));
1261 setup_padding_buffer (strlen (buf
) + 1);
1262 strcpy (padding_buffer
, buf
);
1269 print_padded_number (void)
1271 if (format_str_prefix
)
1272 fputs (format_str_prefix
, stdout
);
1274 fputs (padding_buffer
, stdout
);
1276 if (format_str_suffix
)
1277 fputs (format_str_suffix
, stdout
);
1280 /* Converts the TEXT number string to the requested representation,
1281 and handles automatic suffix addition. */
1283 process_suffixed_number (char *text
, long double *result
,
1284 size_t *precision
, long int field
)
1286 if (suffix
&& strlen (text
) > strlen (suffix
))
1288 char *possible_suffix
= text
+ strlen (text
) - strlen (suffix
);
1290 if (STREQ (suffix
, possible_suffix
))
1292 /* trim suffix, ONLY if it's at the end of the text. */
1293 *possible_suffix
= '\0';
1294 devmsg ("trimming suffix %s\n", quote (suffix
));
1297 devmsg ("no valid suffix found\n");
1300 /* Skip white space - always. */
1302 while (*p
&& isblank (to_uchar (*p
)))
1305 /* setup auto-padding. */
1308 if (text
< p
|| field
> 1)
1310 padding_width
= strlen (text
);
1311 setup_padding_buffer (padding_width
);
1317 devmsg ("setting Auto-Padding to %ld characters\n", padding_width
);
1320 long double val
= 0;
1321 enum simple_strtod_error e
= parse_human_number (p
, &val
, precision
);
1322 if (e
== SSE_OK_PRECISION_LOSS
&& debug
)
1323 error (0, 0, _("large input value %s: possible precision loss"),
1326 if (from_unit_size
!= 1 || to_unit_size
!= 1)
1327 val
= (val
* from_unit_size
) / to_unit_size
;
1331 return (e
== SSE_OK
|| e
== SSE_OK_PRECISION_LOSS
);
1334 /* Return a pointer to the beginning of the next field in line.
1335 The line pointer is moved to the end of the next field. */
1337 next_field (char **line
)
1339 char *field_start
= *line
;
1340 char *field_end
= field_start
;
1342 if (delimiter
!= DELIMITER_DEFAULT
)
1344 if (*field_start
!= delimiter
)
1346 while (*field_end
&& *field_end
!= delimiter
)
1349 /* else empty field */
1353 /* keep any space prefix in the returned field */
1354 while (*field_end
&& field_sep (*field_end
))
1357 while (*field_end
&& ! field_sep (*field_end
))
1367 include_field (uintmax_t field
)
1369 struct field_range_pair
*p
= frp
;
1373 while (p
->lo
!= UINTMAX_MAX
)
1375 if (p
->lo
<= field
&& p
->hi
>= field
)
1382 /* Convert and output the given field. If it is not included in the set
1383 of fields to process just output the original */
1385 process_field (char *text
, uintmax_t field
)
1387 long double val
= 0;
1388 size_t precision
= 0;
1389 bool valid_number
= true;
1391 if (include_field (field
))
1394 process_suffixed_number (text
, &val
, &precision
, field
);
1397 valid_number
= prepare_padded_number (val
, precision
);
1400 print_padded_number ();
1402 fputs (text
, stdout
);
1405 fputs (text
, stdout
);
1407 return valid_number
;
1410 /* Convert number in a given line of text.
1411 NEWLINE specifies whether to output a '\n' for this "line". */
1413 process_line (char *line
, bool newline
)
1416 uintmax_t field
= 0;
1417 bool valid_number
= true;
1421 next
= next_field (&line
);
1425 /* nul terminate the current field string and process */
1428 if (! process_field (next
, field
))
1429 valid_number
= false;
1431 fputc ((delimiter
== DELIMITER_DEFAULT
) ?
1432 ' ' : delimiter
, stdout
);
1437 /* end of the line, process the last field and finish */
1438 if (! process_field (next
, field
))
1439 valid_number
= false;
1446 putchar (line_delim
);
1448 return valid_number
;
1452 main (int argc
, char **argv
)
1454 int valid_numbers
= 1;
1457 initialize_main (&argc
, &argv
);
1458 set_program_name (argv
[0]);
1459 locale_ok
= !!setlocale (LC_ALL
, "");
1460 bindtextdomain (PACKAGE
, LOCALEDIR
);
1461 textdomain (PACKAGE
);
1464 /* Enabled extended precision if needed. */
1468 decimal_point
= nl_langinfo (RADIXCHAR
);
1469 if (decimal_point
== nullptr || strlen (decimal_point
) == 0)
1470 decimal_point
= ".";
1471 decimal_point_length
= strlen (decimal_point
);
1473 atexit (close_stdout
);
1477 int c
= getopt_long (argc
, argv
, "d:z", longopts
, nullptr);
1485 scale_from
= XARGMATCH ("--from", optarg
,
1486 scale_from_args
, scale_from_types
);
1489 case FROM_UNIT_OPTION
:
1490 from_unit_size
= unit_to_umax (optarg
);
1495 XARGMATCH ("--to", optarg
, scale_to_args
, scale_to_types
);
1498 case TO_UNIT_OPTION
:
1499 to_unit_size
= unit_to_umax (optarg
);
1503 round_style
= XARGMATCH ("--round", optarg
, round_args
, round_types
);
1506 case GROUPING_OPTION
:
1510 case PADDING_OPTION
:
1511 if (xstrtol (optarg
, nullptr, 10, &padding_width
, "") != LONGINT_OK
1512 || padding_width
== 0 || padding_width
< -LONG_MAX
)
1513 error (EXIT_FAILURE
, 0, _("invalid padding value %s"),
1515 if (padding_width
< 0)
1517 padding_alignment
= MBS_ALIGN_LEFT
;
1518 padding_width
= -padding_width
;
1520 /* TODO: We probably want to apply a specific --padding
1521 to --header lines too. */
1526 error (EXIT_FAILURE
, 0, _("multiple field specifications"));
1527 set_fields (optarg
, SETFLD_ALLOW_DASH
);
1531 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1532 if (optarg
[0] != '\0' && optarg
[1] != '\0')
1533 error (EXIT_FAILURE
, 0,
1534 _("the delimiter must be a single character"));
1535 delimiter
= optarg
[0];
1550 case DEV_DEBUG_OPTION
:
1558 if (xstrtoumax (optarg
, nullptr, 10, &header
, "") != LONGINT_OK
1560 error (EXIT_FAILURE
, 0, _("invalid header value %s"),
1570 format_str
= optarg
;
1573 case INVALID_OPTION
:
1574 inval_style
= XARGMATCH ("--invalid", optarg
,
1575 inval_args
, inval_types
);
1578 case_GETOPT_HELP_CHAR
;
1579 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1582 usage (EXIT_FAILURE
);
1586 if (format_str
!= nullptr && grouping
)
1587 error (EXIT_FAILURE
, 0, _("--grouping cannot be combined with --format"));
1589 if (debug
&& ! locale_ok
)
1590 error (0, 0, _("failed to set locale"));
1592 /* Warn about no-op. */
1593 if (debug
&& scale_from
== scale_none
&& scale_to
== scale_none
1594 && !grouping
&& (padding_width
== 0) && (format_str
== nullptr))
1595 error (0, 0, _("no conversion option specified"));
1598 parse_format_string (format_str
);
1602 if (scale_to
!= scale_none
)
1603 error (EXIT_FAILURE
, 0, _("grouping cannot be combined with --to"));
1604 if (debug
&& (strlen (nl_langinfo (THOUSEP
)) == 0))
1605 error (0, 0, _("grouping has no effect in this locale"));
1609 setup_padding_buffer (padding_width
);
1610 auto_padding
= (padding_width
== 0 && delimiter
== DELIMITER_DEFAULT
);
1612 if (inval_style
!= inval_abort
)
1617 if (debug
&& header
)
1618 error (0, 0, _("--header ignored with command-line input"));
1620 for (; optind
< argc
; optind
++)
1621 valid_numbers
&= process_line (argv
[optind
], true);
1625 char *line
= nullptr;
1626 size_t line_allocated
= 0;
1629 while (header
-- && getdelim (&line
, &line_allocated
,
1630 line_delim
, stdin
) > 0)
1631 fputs (line
, stdout
);
1633 while ((len
= getdelim (&line
, &line_allocated
,
1634 line_delim
, stdin
)) > 0)
1636 bool newline
= line
[len
- 1] == line_delim
;
1638 line
[len
- 1] = '\0';
1639 valid_numbers
&= process_line (line
, newline
);
1643 error (EXIT_FAILURE
, errno
, _("error reading input"));
1646 if (debug
&& !valid_numbers
)
1647 error (0, 0, _("failed to convert some of the input numbers"));
1649 int exit_status
= EXIT_SUCCESS
;
1651 && inval_style
!= inval_warn
&& inval_style
!= inval_ignore
)
1652 exit_status
= EXIT_CONVERSION_WARNINGS
;
1654 main_exit (exit_status
);