id: support multiple specified users
[coreutils.git] / src / numfmt.c
blobce5c131e4522eed03a1748d2d6968eebcf65a84d
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2018 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <float.h>
19 #include <getopt.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <langinfo.h>
24 #include "mbsalign.h"
25 #include "argmatch.h"
26 #include "c-ctype.h"
27 #include "die.h"
28 #include "error.h"
29 #include "quote.h"
30 #include "system.h"
31 #include "xstrtol.h"
32 #include "xstrndup.h"
34 #include "set-fields.h"
36 #if HAVE_FPSETPREC
37 # include <ieeefp.h>
38 #endif
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "numfmt"
43 #define AUTHORS proper_name ("Assaf Gordon")
45 /* Exit code when some numbers fail to convert. */
46 enum { EXIT_CONVERSION_WARNINGS = 2 };
48 enum
50 FROM_OPTION = CHAR_MAX + 1,
51 FROM_UNIT_OPTION,
52 TO_OPTION,
53 TO_UNIT_OPTION,
54 ROUND_OPTION,
55 SUFFIX_OPTION,
56 GROUPING_OPTION,
57 PADDING_OPTION,
58 FIELD_OPTION,
59 DEBUG_OPTION,
60 DEV_DEBUG_OPTION,
61 HEADER_OPTION,
62 FORMAT_OPTION,
63 INVALID_OPTION
66 enum scale_type
68 scale_none, /* the default: no scaling. */
69 scale_auto, /* --from only. */
70 scale_SI,
71 scale_IEC,
72 scale_IEC_I /* 'i' suffix is required. */
75 static char const *const scale_from_args[] =
77 "none", "auto", "si", "iec", "iec-i", NULL
80 static enum scale_type const scale_from_types[] =
82 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
85 static char const *const scale_to_args[] =
87 "none", "si", "iec", "iec-i", NULL
90 static enum scale_type const scale_to_types[] =
92 scale_none, scale_SI, scale_IEC, scale_IEC_I
96 enum round_type
98 round_ceiling,
99 round_floor,
100 round_from_zero,
101 round_to_zero,
102 round_nearest,
105 static char const *const round_args[] =
107 "up", "down", "from-zero", "towards-zero", "nearest", NULL
110 static enum round_type const round_types[] =
112 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
116 enum inval_type
118 inval_abort,
119 inval_fail,
120 inval_warn,
121 inval_ignore
124 static char const *const inval_args[] =
126 "abort", "fail", "warn", "ignore", NULL
129 static enum inval_type const inval_types[] =
131 inval_abort, inval_fail, inval_warn, inval_ignore
134 static struct option const longopts[] =
136 {"from", required_argument, NULL, FROM_OPTION},
137 {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
138 {"to", required_argument, NULL, TO_OPTION},
139 {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
140 {"round", required_argument, NULL, ROUND_OPTION},
141 {"padding", required_argument, NULL, PADDING_OPTION},
142 {"suffix", required_argument, NULL, SUFFIX_OPTION},
143 {"grouping", no_argument, NULL, GROUPING_OPTION},
144 {"delimiter", required_argument, NULL, 'd'},
145 {"field", required_argument, NULL, FIELD_OPTION},
146 {"debug", no_argument, NULL, DEBUG_OPTION},
147 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
148 {"header", optional_argument, NULL, HEADER_OPTION},
149 {"format", required_argument, NULL, FORMAT_OPTION},
150 {"invalid", required_argument, NULL, INVALID_OPTION},
151 {"zero-terminated", no_argument, NULL, 'z'},
152 {GETOPT_HELP_OPTION_DECL},
153 {GETOPT_VERSION_OPTION_DECL},
154 {NULL, 0, NULL, 0}
157 /* If delimiter has this value, blanks separate fields. */
158 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
160 /* Maximum number of digits we can safely handle
161 without precision loss, if scaling is 'none'. */
162 enum { MAX_UNSCALED_DIGITS = LDBL_DIG };
164 /* Maximum number of digits we can work with.
165 This is equivalent to 999Y.
166 NOTE: 'long double' can handle more than that, but there's
167 no official suffix assigned beyond Yotta (1000^8). */
168 enum { MAX_ACCEPTABLE_DIGITS = 27 };
170 static enum scale_type scale_from = scale_none;
171 static enum scale_type scale_to = scale_none;
172 static enum round_type round_style = round_from_zero;
173 static enum inval_type inval_style = inval_abort;
174 static const char *suffix = NULL;
175 static uintmax_t from_unit_size = 1;
176 static uintmax_t to_unit_size = 1;
177 static int grouping = 0;
178 static char *padding_buffer = NULL;
179 static size_t padding_buffer_size = 0;
180 static long int padding_width = 0;
181 static long int zero_padding_width = 0;
182 static long int user_precision = -1;
183 static const char *format_str = NULL;
184 static char *format_str_prefix = NULL;
185 static char *format_str_suffix = NULL;
187 /* By default, any conversion error will terminate the program. */
188 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
191 /* auto-pad each line based on skipped whitespace. */
192 static int auto_padding = 0;
193 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
195 /* field delimiter */
196 static int delimiter = DELIMITER_DEFAULT;
198 /* line delimiter. */
199 static unsigned char line_delim = '\n';
201 /* if non-zero, the first 'header' lines from STDIN are skipped. */
202 static uintmax_t header = 0;
204 /* Debug for users: print warnings to STDERR about possible
205 error (similar to sort's debug). */
206 static bool debug;
208 /* will be set according to the current locale. */
209 static const char *decimal_point;
210 static int decimal_point_length;
212 /* debugging for developers. Enables devmsg(). */
213 static bool dev_debug = false;
216 static inline int
217 default_scale_base (enum scale_type scale)
219 switch (scale)
221 case scale_IEC:
222 case scale_IEC_I:
223 return 1024;
225 case scale_none:
226 case scale_auto:
227 case scale_SI:
228 default:
229 return 1000;
233 static inline int
234 valid_suffix (const char suf)
236 static const char *valid_suffixes = "KMGTPEZY";
237 return (strchr (valid_suffixes, suf) != NULL);
240 static inline int
241 suffix_power (const char suf)
243 switch (suf)
245 case 'K': /* kilo or kibi. */
246 return 1;
248 case 'M': /* mega or mebi. */
249 return 2;
251 case 'G': /* giga or gibi. */
252 return 3;
254 case 'T': /* tera or tebi. */
255 return 4;
257 case 'P': /* peta or pebi. */
258 return 5;
260 case 'E': /* exa or exbi. */
261 return 6;
263 case 'Z': /* zetta or 2**70. */
264 return 7;
266 case 'Y': /* yotta or 2**80. */
267 return 8;
269 default: /* should never happen. assert? */
270 return 0;
274 static inline const char *
275 suffix_power_char (unsigned int power)
277 switch (power)
279 case 0:
280 return "";
282 case 1:
283 return "K";
285 case 2:
286 return "M";
288 case 3:
289 return "G";
291 case 4:
292 return "T";
294 case 5:
295 return "P";
297 case 6:
298 return "E";
300 case 7:
301 return "Z";
303 case 8:
304 return "Y";
306 default:
307 return "(error)";
311 /* Similar to 'powl(3)' but without requiring 'libm'. */
312 static long double
313 powerld (long double base, unsigned int x)
315 long double result = base;
316 if (x == 0)
317 return 1; /* note for test coverage: this is never
318 reached, as 'powerld' won't be called if
319 there's no suffix, hence, no "power". */
321 /* TODO: check for overflow, inf? */
322 while (--x)
323 result *= base;
324 return result;
327 /* Similar to 'fabs(3)' but without requiring 'libm'. */
328 static inline long double
329 absld (long double val)
331 return val < 0 ? -val : val;
334 /* Scale down 'val', returns 'updated val' and 'x', such that
335 val*base^X = original val
336 Similar to "frexpl(3)" but without requiring 'libm',
337 allowing only integer scale, limited functionality and error checking. */
338 static long double
339 expld (long double val, unsigned int base, unsigned int /*output */ *x)
341 unsigned int power = 0;
343 if (val >= -LDBL_MAX && val <= LDBL_MAX)
345 while (absld (val) >= base)
347 ++power;
348 val /= base;
351 if (x)
352 *x = power;
353 return val;
356 /* EXTREMELY limited 'ceil' - without 'libm'.
357 Assumes values that fit in intmax_t. */
358 static inline intmax_t
359 simple_round_ceiling (long double val)
361 intmax_t intval = val;
362 if (intval < val)
363 intval++;
364 return intval;
367 /* EXTREMELY limited 'floor' - without 'libm'.
368 Assumes values that fit in intmax_t. */
369 static inline intmax_t
370 simple_round_floor (long double val)
372 return -simple_round_ceiling (-val);
375 /* EXTREMELY limited 'round away from zero'.
376 Assumes values that fit in intmax_t. */
377 static inline intmax_t
378 simple_round_from_zero (long double val)
380 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
383 /* EXTREMELY limited 'round away to zero'.
384 Assumes values that fit in intmax_t. */
385 static inline intmax_t
386 simple_round_to_zero (long double val)
388 return val;
391 /* EXTREMELY limited 'round' - without 'libm'.
392 Assumes values that fit in intmax_t. */
393 static inline intmax_t
394 simple_round_nearest (long double val)
396 return val < 0 ? val - 0.5 : val + 0.5;
399 static inline long double _GL_ATTRIBUTE_CONST
400 simple_round (long double val, enum round_type t)
402 intmax_t rval;
403 intmax_t intmax_mul = val / INTMAX_MAX;
404 val -= (long double) INTMAX_MAX * intmax_mul;
406 switch (t)
408 case round_ceiling:
409 rval = simple_round_ceiling (val);
410 break;
412 case round_floor:
413 rval = simple_round_floor (val);
414 break;
416 case round_from_zero:
417 rval = simple_round_from_zero (val);
418 break;
420 case round_to_zero:
421 rval = simple_round_to_zero (val);
422 break;
424 case round_nearest:
425 rval = simple_round_nearest (val);
426 break;
428 default:
429 /* to silence the compiler - this should never happen. */
430 return 0;
433 return (long double) INTMAX_MAX * intmax_mul + rval;
436 enum simple_strtod_error
438 SSE_OK = 0,
439 SSE_OK_PRECISION_LOSS,
440 SSE_OVERFLOW,
441 SSE_INVALID_NUMBER,
443 /* the following are returned by 'simple_strtod_human'. */
444 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
445 SSE_INVALID_SUFFIX,
446 SSE_MISSING_I_SUFFIX
449 /* Read an *integer* INPUT_STR,
450 but return the integer value in a 'long double' VALUE
451 hence, no UINTMAX_MAX limitation.
452 NEGATIVE is updated, and is stored separately from the VALUE
453 so that signbit() isn't required to determine the sign of -0..
454 ENDPTR is required (unlike strtod) and is used to store a pointer
455 to the character after the last character used in the conversion.
457 Note locale'd grouping is not supported,
458 nor is skipping of white-space supported.
460 Returns:
461 SSE_OK - valid number.
462 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
463 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
464 SSE_INVALID_NUMBER - if no digits were found. */
465 static enum simple_strtod_error
466 simple_strtod_int (const char *input_str,
467 char **endptr, long double *value, bool *negative)
469 enum simple_strtod_error e = SSE_OK;
471 long double val = 0;
472 unsigned int digits = 0;
473 bool found_digit = false;
475 if (*input_str == '-')
477 input_str++;
478 *negative = true;
480 else
481 *negative = false;
483 *endptr = (char *) input_str;
484 while (*endptr && c_isdigit (**endptr))
486 int digit = (**endptr) - '0';
488 found_digit = true;
490 if (val || digit)
491 digits++;
493 if (digits > MAX_UNSCALED_DIGITS)
494 e = SSE_OK_PRECISION_LOSS;
496 if (digits > MAX_ACCEPTABLE_DIGITS)
497 return SSE_OVERFLOW;
499 val *= 10;
500 val += digit;
502 ++(*endptr);
504 if (! found_digit
505 && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length))
506 return SSE_INVALID_NUMBER;
507 if (*negative)
508 val = -val;
510 if (value)
511 *value = val;
513 return e;
516 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
517 and return the value in a 'long double' VALUE.
518 ENDPTR is required (unlike strtod) and is used to store a pointer
519 to the character after the last character used in the conversion.
520 PRECISION is optional and used to indicate fractions are present.
522 Note locale'd grouping is not supported,
523 nor is skipping of white-space supported.
525 Returns:
526 SSE_OK - valid number.
527 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
528 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
529 SSE_INVALID_NUMBER - if no digits were found. */
530 static enum simple_strtod_error
531 simple_strtod_float (const char *input_str,
532 char **endptr,
533 long double *value,
534 size_t *precision)
536 bool negative;
537 enum simple_strtod_error e = SSE_OK;
539 if (precision)
540 *precision = 0;
542 /* TODO: accept locale'd grouped values for the integral part. */
543 e = simple_strtod_int (input_str, endptr, value, &negative);
544 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
545 return e;
547 /* optional decimal point + fraction. */
548 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
550 char *ptr2;
551 long double val_frac = 0;
552 bool neg_frac;
554 (*endptr) += decimal_point_length;
555 enum simple_strtod_error e2 =
556 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
557 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
558 return e2;
559 if (e2 == SSE_OK_PRECISION_LOSS)
560 e = e2; /* propagate warning. */
561 if (neg_frac)
562 return SSE_INVALID_NUMBER;
564 /* number of digits in the fractions. */
565 size_t exponent = ptr2 - *endptr;
567 val_frac = ((long double) val_frac) / powerld (10, exponent);
569 /* TODO: detect loss of precision (only really 18 digits
570 of precision across all digits (before and after '.')). */
571 if (value)
573 if (negative)
574 *value -= val_frac;
575 else
576 *value += val_frac;
579 if (precision)
580 *precision = exponent;
582 *endptr = ptr2;
584 return e;
587 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
588 and return the value in a 'long double' VALUE,
589 with the precision of the input returned in PRECISION.
590 ENDPTR is required (unlike strtod) and is used to store a pointer
591 to the character after the last character used in the conversion.
592 ALLOWED_SCALING determines the scaling supported.
594 TODO:
595 support locale'd grouping
596 accept scentific and hex floats (probably use strtold directly)
598 Returns:
599 SSE_OK - valid number.
600 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
601 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
602 SSE_INVALID_NUMBER - if no digits were found.
603 SSE_VALID_BUT_FORBIDDEN_SUFFIX
604 SSE_INVALID_SUFFIX
605 SSE_MISSING_I_SUFFIX */
606 static enum simple_strtod_error
607 simple_strtod_human (const char *input_str,
608 char **endptr, long double *value, size_t *precision,
609 enum scale_type allowed_scaling)
611 int power = 0;
612 /* 'scale_auto' is checked below. */
613 int scale_base = default_scale_base (allowed_scaling);
615 devmsg ("simple_strtod_human:\n input string: %s\n"
616 " locale decimal-point: %s\n"
617 " MAX_UNSCALED_DIGITS: %d\n",
618 quote_n (0, input_str),
619 quote_n (1, decimal_point),
620 MAX_UNSCALED_DIGITS);
622 enum simple_strtod_error e =
623 simple_strtod_float (input_str, endptr, value, precision);
624 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
625 return e;
627 devmsg (" parsed numeric value: %Lf\n"
628 " input precision = %d\n", *value, (int)*precision);
630 if (**endptr != '\0')
632 /* process suffix. */
634 /* Skip any blanks between the number and suffix. */
635 while (isblank (to_uchar (**endptr)))
636 (*endptr)++;
638 if (!valid_suffix (**endptr))
639 return SSE_INVALID_SUFFIX;
641 if (allowed_scaling == scale_none)
642 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
644 power = suffix_power (**endptr);
645 (*endptr)++; /* skip first suffix character. */
647 if (allowed_scaling == scale_auto && **endptr == 'i')
649 /* auto-scaling enabled, and the first suffix character
650 is followed by an 'i' (e.g. Ki, Mi, Gi). */
651 scale_base = 1024;
652 (*endptr)++; /* skip second ('i') suffix character. */
653 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
654 scale_base);
657 *precision = 0; /* Reset, to select precision based on scale. */
660 if (allowed_scaling == scale_IEC_I)
662 if (**endptr == 'i')
663 (*endptr)++;
664 else
665 return SSE_MISSING_I_SUFFIX;
668 long double multiplier = powerld (scale_base, power);
670 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
672 /* TODO: detect loss of precision and overflows. */
673 (*value) = (*value) * multiplier;
675 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
677 return e;
681 static void
682 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
684 char const *msgid = NULL;
686 switch (err)
688 case SSE_OK_PRECISION_LOSS:
689 case SSE_OK:
690 /* should never happen - this function isn't called when OK. */
691 abort ();
693 case SSE_OVERFLOW:
694 msgid = N_("value too large to be converted: %s");
695 break;
697 case SSE_INVALID_NUMBER:
698 msgid = N_("invalid number: %s");
699 break;
701 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
702 msgid = N_("rejecting suffix in input: %s (consider using --from)");
703 break;
705 case SSE_INVALID_SUFFIX:
706 msgid = N_("invalid suffix in input: %s");
707 break;
709 case SSE_MISSING_I_SUFFIX:
710 msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
711 break;
715 if (inval_style != inval_ignore)
716 error (conv_exit_code, 0, gettext (msgid), quote (input_str));
719 /* Convert VAL to a human format string in BUF. */
720 static void
721 double_to_human (long double val, int precision,
722 char *buf, size_t buf_size,
723 enum scale_type scale, int group, enum round_type round)
725 int num_size;
726 char fmt[64];
727 verify (sizeof (fmt) > (INT_BUFSIZE_BOUND (zero_padding_width)
728 + INT_BUFSIZE_BOUND (precision)
729 + 10 /* for %.Lf etc. */));
731 char *pfmt = fmt;
732 *pfmt++ = '%';
734 if (group)
735 *pfmt++ = '\'';
737 if (zero_padding_width)
738 pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width);
740 devmsg ("double_to_human:\n");
742 if (scale == scale_none)
744 val *= powerld (10, precision);
745 val = simple_round (val, round);
746 val /= powerld (10, precision);
748 devmsg ((group) ?
749 " no scaling, returning (grouped) value: %'.*Lf\n" :
750 " no scaling, returning value: %.*Lf\n", precision, val);
752 stpcpy (pfmt, ".*Lf");
754 num_size = snprintf (buf, buf_size, fmt, precision, val);
755 if (num_size < 0 || num_size >= (int) buf_size)
756 die (EXIT_FAILURE, 0,
757 _("failed to prepare value '%Lf' for printing"), val);
758 return;
761 /* Scaling requested by user. */
762 double scale_base = default_scale_base (scale);
764 /* Normalize val to scale. */
765 unsigned int power = 0;
766 val = expld (val, scale_base, &power);
767 devmsg (" scaled value to %Lf * %0.f ^ %u\n", val, scale_base, power);
769 /* Perform rounding. */
770 unsigned int power_adjust = 0;
771 if (user_precision != -1)
772 power_adjust = MIN (power * 3, user_precision);
773 else if (absld (val) < 10)
775 /* for values less than 10, we allow one decimal-point digit,
776 so adjust before rounding. */
777 power_adjust = 1;
780 val *= powerld (10, power_adjust);
781 val = simple_round (val, round);
782 val /= powerld (10, power_adjust);
784 /* two special cases after rounding:
785 1. a "999.99" can turn into 1000 - so scale down
786 2. a "9.99" can turn into 10 - so don't display decimal-point. */
787 if (absld (val) >= scale_base)
789 val /= scale_base;
790 power++;
793 /* should "7.0" be printed as "7" ?
794 if removing the ".0" is preferred, enable the fourth condition. */
795 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
796 /* && (absld (val) > simple_round_floor (val))) */
798 devmsg (" after rounding, value=%Lf * %0.f ^ %u\n", val, scale_base, power);
800 stpcpy (pfmt, ".*Lf%s");
802 int prec = user_precision == -1 ? show_decimal_point : user_precision;
804 /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */
805 num_size = snprintf (buf, buf_size - 1, fmt, prec, val,
806 suffix_power_char (power));
807 if (num_size < 0 || num_size >= (int) buf_size - 1)
808 die (EXIT_FAILURE, 0,
809 _("failed to prepare value '%Lf' for printing"), val);
811 if (scale == scale_IEC_I && power > 0)
812 strncat (buf, "i", buf_size - num_size - 1);
814 devmsg (" returning value: %s\n", quote (buf));
816 return;
819 /* Convert a string of decimal digits, N_STRING, with an optional suffix
820 to an integral value. Suffixes are handled as with --from=auto.
821 Upon successful conversion, return that value.
822 If it cannot be converted, give a diagnostic and exit. */
823 static uintmax_t
824 unit_to_umax (const char *n_string)
826 strtol_error s_err;
827 const char *c_string = n_string;
828 char *t_string = NULL;
829 size_t n_len = strlen (n_string);
830 char *end = NULL;
831 uintmax_t n;
832 const char *suffixes = "KMGTPEZY";
834 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
835 if (n_len && ! c_isdigit (n_string[n_len - 1]))
837 t_string = xmalloc (n_len + 2);
838 end = t_string + n_len - 1;
839 memcpy (t_string, n_string, n_len);
841 if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
842 *end = '\0';
843 else
845 *++end = 'B';
846 *++end = '\0';
847 suffixes = "KMGTPEZY0";
850 c_string = t_string;
853 s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
855 if (s_err != LONGINT_OK || *end || n == 0)
857 free (t_string);
858 die (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
861 free (t_string);
863 return n;
867 static void
868 setup_padding_buffer (size_t min_size)
870 if (padding_buffer_size > min_size)
871 return;
873 padding_buffer_size = min_size + 1;
874 padding_buffer = xrealloc (padding_buffer, padding_buffer_size);
877 void
878 usage (int status)
880 if (status != EXIT_SUCCESS)
881 emit_try_help ();
882 else
884 printf (_("\
885 Usage: %s [OPTION]... [NUMBER]...\n\
886 "), program_name);
887 fputs (_("\
888 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
889 "), stdout);
890 emit_mandatory_arg_note ();
891 fputs (_("\
892 --debug print warnings about invalid input\n\
893 "), stdout);
894 fputs (_("\
895 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
896 "), stdout);
897 fputs (_("\
898 --field=FIELDS replace the numbers in these input fields (default=1)\n\
899 see FIELDS below\n\
900 "), stdout);
901 fputs (_("\
902 --format=FORMAT use printf style floating-point FORMAT;\n\
903 see FORMAT below for details\n\
904 "), stdout);
905 fputs (_("\
906 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
907 see UNIT below\n\
908 "), stdout);
909 fputs (_("\
910 --from-unit=N specify the input unit size (instead of the default 1)\n\
911 "), stdout);
912 fputs (_("\
913 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
914 (which means it has no effect in the C/POSIX locale)\n\
915 "), stdout);
916 fputs (_("\
917 --header[=N] print (without converting) the first N header lines;\n\
918 N defaults to 1 if not specified\n\
919 "), stdout);
920 fputs (_("\
921 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
922 abort (default), fail, warn, ignore\n\
923 "), stdout);
924 fputs (_("\
925 --padding=N pad the output to N characters; positive N will\n\
926 right-align; negative N will left-align;\n\
927 padding is ignored if the output is wider than N;\n\
928 the default is to automatically pad if a whitespace\n\
929 is found\n\
930 "), stdout);
931 fputs (_("\
932 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
933 up, down, from-zero (default), towards-zero, nearest\n\
934 "), stdout);
935 fputs (_("\
936 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
937 SUFFIX in input numbers\n\
938 "), stdout);
939 fputs (_("\
940 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
941 "), stdout);
942 fputs (_("\
943 --to-unit=N the output unit size (instead of the default 1)\n\
944 "), stdout);
945 fputs (_("\
946 -z, --zero-terminated line delimiter is NUL, not newline\n\
947 "), stdout);
948 fputs (HELP_OPTION_DESCRIPTION, stdout);
949 fputs (VERSION_OPTION_DESCRIPTION, stdout);
951 fputs (_("\
953 UNIT options:\n"), stdout);
954 fputs (_("\
955 none no auto-scaling is done; suffixes will trigger an error\n\
956 "), stdout);
957 fputs (_("\
958 auto accept optional single/two letter suffix:\n\
959 1K = 1000,\n\
960 1Ki = 1024,\n\
961 1M = 1000000,\n\
962 1Mi = 1048576,\n"), stdout);
963 fputs (_("\
964 si accept optional single letter suffix:\n\
965 1K = 1000,\n\
966 1M = 1000000,\n\
967 ...\n"), stdout);
968 fputs (_("\
969 iec accept optional single letter suffix:\n\
970 1K = 1024,\n\
971 1M = 1048576,\n\
972 ...\n"), stdout);
973 fputs (_("\
974 iec-i accept optional two-letter suffix:\n\
975 1Ki = 1024,\n\
976 1Mi = 1048576,\n\
977 ...\n"), stdout);
979 fputs (_("\n\
980 FIELDS supports cut(1) style field ranges:\n\
981 N N'th field, counted from 1\n\
982 N- from N'th field, to end of line\n\
983 N-M from N'th to M'th field (inclusive)\n\
984 -M from first to M'th field (inclusive)\n\
985 - all fields\n\
986 Multiple fields/ranges can be separated with commas\n\
987 "), stdout);
989 fputs (_("\n\
990 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
991 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
992 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
993 will zero pad the number. Optional negative values (%-10f) will left align.\n\
994 Optional precision (%.1f) will override the input determined precision.\n\
995 "), stdout);
997 printf (_("\n\
998 Exit status is 0 if all input numbers were successfully converted.\n\
999 By default, %s will stop at the first conversion error with exit status 2.\n\
1000 With --invalid='fail' a warning is printed for each conversion error\n\
1001 and the exit status is 2. With --invalid='warn' each conversion error is\n\
1002 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
1003 errors are not diagnosed and the exit status is 0.\n\
1004 "), program_name);
1006 printf (_("\n\
1007 Examples:\n\
1008 $ %s --to=si 1000\n\
1009 -> \"1.0K\"\n\
1010 $ %s --to=iec 2048\n\
1011 -> \"2.0K\"\n\
1012 $ %s --to=iec-i 4096\n\
1013 -> \"4.0Ki\"\n\
1014 $ echo 1K | %s --from=si\n\
1015 -> \"1000\"\n\
1016 $ echo 1K | %s --from=iec\n\
1017 -> \"1024\"\n\
1018 $ df -B1 | %s --header --field 2-4 --to=si\n\
1019 $ ls -l | %s --header --field 5 --to=iec\n\
1020 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1021 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1022 program_name, program_name, program_name,
1023 program_name, program_name, program_name,
1024 program_name, program_name, program_name);
1025 emit_ancillary_info (PROGRAM_NAME);
1027 exit (status);
1030 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1031 1. padding (e.g. %20f)
1032 2. alignment (e.g. %-20f)
1033 3. grouping (e.g. %'f)
1035 Only a limited subset of printf(3) syntax is supported.
1037 TODO:
1038 support %e %g etc. rather than just %f
1040 NOTES:
1041 1. This function sets the global variables:
1042 padding_width, padding_alignment, grouping,
1043 format_str_prefix, format_str_suffix
1044 2. The function aborts on any errors. */
1045 static void
1046 parse_format_string (char const *fmt)
1048 size_t i;
1049 size_t prefix_len = 0;
1050 size_t suffix_pos;
1051 long int pad = 0;
1052 char *endptr = NULL;
1053 bool zero_padding = false;
1055 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
1057 if (!fmt[i])
1058 die (EXIT_FAILURE, 0,
1059 _("format %s has no %% directive"), quote (fmt));
1060 prefix_len++;
1063 i++;
1064 while (true)
1066 size_t skip = strspn (fmt + i, " ");
1067 i += skip;
1068 if (fmt[i] == '\'')
1070 grouping = 1;
1071 i++;
1073 else if (fmt[i] == '0')
1075 zero_padding = true;
1076 i++;
1078 else if (! skip)
1079 break;
1082 errno = 0;
1083 pad = strtol (fmt + i, &endptr, 10);
1084 if (errno == ERANGE)
1085 die (EXIT_FAILURE, 0,
1086 _("invalid format %s (width overflow)"), quote (fmt));
1088 if (endptr != (fmt + i) && pad != 0)
1090 if (debug && padding_width && !(zero_padding && pad > 0))
1091 error (0, 0, _("--format padding overriding --padding"));
1093 if (pad < 0)
1095 padding_alignment = MBS_ALIGN_LEFT;
1096 padding_width = -pad;
1098 else
1100 if (zero_padding)
1101 zero_padding_width = pad;
1102 else
1103 padding_width = pad;
1107 i = endptr - fmt;
1109 if (fmt[i] == '\0')
1110 die (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
1112 if (fmt[i] == '.')
1114 i++;
1115 errno = 0;
1116 user_precision = strtol (fmt + i, &endptr, 10);
1117 if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
1118 || isblank (fmt[i]) || fmt[i] == '+')
1120 /* Note we disallow negative user_precision to be
1121 consistent with printf(1). POSIX states that
1122 negative precision is only supported (and ignored)
1123 when used with '.*f'. glibc at least will malform
1124 output when passed a direct negative precision. */
1125 die (EXIT_FAILURE, 0,
1126 _("invalid precision in format %s"), quote (fmt));
1128 i = endptr - fmt;
1131 if (fmt[i] != 'f')
1132 die (EXIT_FAILURE, 0, _("invalid format %s,"
1133 " directive must be %%[0]['][-][N][.][N]f"),
1134 quote (fmt));
1135 i++;
1136 suffix_pos = i;
1138 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
1139 if (fmt[i] == '%' && fmt[i + 1] != '%')
1140 die (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
1141 quote (fmt));
1143 if (prefix_len)
1144 format_str_prefix = xstrndup (fmt, prefix_len);
1145 if (fmt[suffix_pos] != '\0')
1146 format_str_suffix = xstrdup (fmt + suffix_pos);
1148 devmsg ("format String:\n input: %s\n grouping: %s\n"
1149 " padding width: %ld\n alignment: %s\n"
1150 " prefix: %s\n suffix: %s\n",
1151 quote_n (0, fmt), (grouping) ? "yes" : "no",
1152 padding_width,
1153 (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
1154 quote_n (1, format_str_prefix ? format_str_prefix : ""),
1155 quote_n (2, format_str_suffix ? format_str_suffix : ""));
1158 /* Parse a numeric value (with optional suffix) from a string.
1159 Returns a long double value, with input precision.
1161 If there's an error converting the string to value - exits with
1162 an error.
1164 If there are any trailing characters after the number
1165 (besides a valid suffix) - exits with an error. */
1166 static enum simple_strtod_error
1167 parse_human_number (const char *str, long double /*output */ *value,
1168 size_t *precision)
1170 char *ptr = NULL;
1172 enum simple_strtod_error e =
1173 simple_strtod_human (str, &ptr, value, precision, scale_from);
1174 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1176 simple_strtod_fatal (e, str);
1177 return e;
1180 if (ptr && *ptr != '\0')
1182 if (inval_style != inval_ignore)
1183 error (conv_exit_code, 0, _("invalid suffix in input %s: %s"),
1184 quote_n (0, str), quote_n (1, ptr));
1185 e = SSE_INVALID_SUFFIX;
1187 return e;
1191 /* Print the given VAL, using the requested representation.
1192 The number is printed to STDOUT, with padding and alignment. */
1193 static int
1194 prepare_padded_number (const long double val, size_t precision)
1196 /* Generate Output. */
1197 char buf[128];
1199 size_t precision_used = user_precision == -1 ? precision : user_precision;
1201 /* Can't reliably print too-large values without auto-scaling. */
1202 unsigned int x;
1203 expld (val, 10, &x);
1205 if (scale_to == scale_none
1206 && x + precision_used > MAX_UNSCALED_DIGITS)
1208 if (inval_style != inval_ignore)
1210 if (precision_used)
1211 error (conv_exit_code, 0,
1212 _("value/precision too large to be printed: '%Lg/%"PRIuMAX"'"
1213 " (consider using --to)"), val, (uintmax_t)precision_used);
1214 else
1215 error (conv_exit_code, 0,
1216 _("value too large to be printed: '%Lg'"
1217 " (consider using --to)"), val);
1219 return 0;
1222 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1224 if (inval_style != inval_ignore)
1225 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1226 " (cannot handle values > 999Y)"), val);
1227 return 0;
1230 double_to_human (val, precision_used, buf, sizeof (buf),
1231 scale_to, grouping, round_style);
1232 if (suffix)
1233 strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
1235 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1236 val, quote (buf));
1238 if (padding_width && strlen (buf) < padding_width)
1240 size_t w = padding_width;
1241 mbsalign (buf, padding_buffer, padding_buffer_size, &w,
1242 padding_alignment, MBA_UNIBYTE_ONLY);
1244 devmsg (" After padding: %s\n", quote (padding_buffer));
1246 else
1248 setup_padding_buffer (strlen (buf) + 1);
1249 strcpy (padding_buffer, buf);
1252 return 1;
1255 static void
1256 print_padded_number (void)
1258 if (format_str_prefix)
1259 fputs (format_str_prefix, stdout);
1261 fputs (padding_buffer, stdout);
1263 if (format_str_suffix)
1264 fputs (format_str_suffix, stdout);
1267 /* Converts the TEXT number string to the requested representation,
1268 and handles automatic suffix addition. */
1269 static int
1270 process_suffixed_number (char *text, long double *result,
1271 size_t *precision, long int field)
1273 if (suffix && strlen (text) > strlen (suffix))
1275 char *possible_suffix = text + strlen (text) - strlen (suffix);
1277 if (STREQ (suffix, possible_suffix))
1279 /* trim suffix, ONLY if it's at the end of the text. */
1280 *possible_suffix = '\0';
1281 devmsg ("trimming suffix %s\n", quote (suffix));
1283 else
1284 devmsg ("no valid suffix found\n");
1287 /* Skip white space - always. */
1288 char *p = text;
1289 while (*p && isblank (to_uchar (*p)))
1290 ++p;
1291 const unsigned int skip_count = text - p;
1293 /* setup auto-padding. */
1294 if (auto_padding)
1296 if (skip_count > 0 || field > 1)
1298 padding_width = strlen (text);
1299 setup_padding_buffer (padding_width);
1301 else
1303 padding_width = 0;
1305 devmsg ("setting Auto-Padding to %ld characters\n", padding_width);
1308 long double val = 0;
1309 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1310 if (e == SSE_OK_PRECISION_LOSS && debug)
1311 error (0, 0, _("large input value %s: possible precision loss"),
1312 quote (p));
1314 if (from_unit_size != 1 || to_unit_size != 1)
1315 val = (val * from_unit_size) / to_unit_size;
1317 *result = val;
1319 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1322 /* Return a pointer to the beginning of the next field in line.
1323 The line pointer is moved to the end of the next field. */
1324 static char*
1325 next_field (char **line)
1327 char *field_start = *line;
1328 char *field_end = field_start;
1330 if (delimiter != DELIMITER_DEFAULT)
1332 if (*field_start != delimiter)
1334 while (*field_end && *field_end != delimiter)
1335 ++field_end;
1337 /* else empty field */
1339 else
1341 /* keep any space prefix in the returned field */
1342 while (*field_end && field_sep (*field_end))
1343 ++field_end;
1345 while (*field_end && ! field_sep (*field_end))
1346 ++field_end;
1349 *line = field_end;
1350 return field_start;
1353 static bool _GL_ATTRIBUTE_PURE
1354 include_field (uintmax_t field)
1356 struct field_range_pair *p = frp;
1357 if (!p)
1358 return field == 1;
1360 while (p->lo != UINTMAX_MAX)
1362 if (p->lo <= field && p->hi >= field)
1363 return true;
1364 ++p;
1366 return false;
1369 /* Convert and output the given field. If it is not included in the set
1370 of fields to process just output the original */
1371 static bool
1372 process_field (char *text, uintmax_t field)
1374 long double val = 0;
1375 size_t precision = 0;
1376 bool valid_number = true;
1378 if (include_field (field))
1380 valid_number =
1381 process_suffixed_number (text, &val, &precision, field);
1383 if (valid_number)
1384 valid_number = prepare_padded_number (val, precision);
1386 if (valid_number)
1387 print_padded_number ();
1388 else
1389 fputs (text, stdout);
1391 else
1392 fputs (text, stdout);
1394 return valid_number;
1397 /* Convert number in a given line of text.
1398 NEWLINE specifies whether to output a '\n' for this "line". */
1399 static int
1400 process_line (char *line, bool newline)
1402 char *next;
1403 uintmax_t field = 0;
1404 bool valid_number = true;
1406 while (true) {
1407 ++field;
1408 next = next_field (&line);
1410 if (*line != '\0')
1412 /* nul terminate the current field string and process */
1413 *line = '\0';
1415 if (! process_field (next, field))
1416 valid_number = false;
1418 fputc ((delimiter == DELIMITER_DEFAULT) ?
1419 ' ' : delimiter, stdout);
1420 ++line;
1422 else
1424 /* end of the line, process the last field and finish */
1425 if (! process_field (next, field))
1426 valid_number = false;
1428 break;
1432 if (newline)
1433 putchar (line_delim);
1435 return valid_number;
1439 main (int argc, char **argv)
1441 int valid_numbers = 1;
1442 bool locale_ok;
1444 initialize_main (&argc, &argv);
1445 set_program_name (argv[0]);
1446 locale_ok = !!setlocale (LC_ALL, "");
1447 bindtextdomain (PACKAGE, LOCALEDIR);
1448 textdomain (PACKAGE);
1450 #if HAVE_FPSETPREC
1451 /* Enabled extended precision if needed. */
1452 fpsetprec (FP_PE);
1453 #endif
1455 decimal_point = nl_langinfo (RADIXCHAR);
1456 if (decimal_point == NULL || strlen (decimal_point) == 0)
1457 decimal_point = ".";
1458 decimal_point_length = strlen (decimal_point);
1460 atexit (close_stdout);
1462 while (true)
1464 int c = getopt_long (argc, argv, "d:z", longopts, NULL);
1466 if (c == -1)
1467 break;
1469 switch (c)
1471 case FROM_OPTION:
1472 scale_from = XARGMATCH ("--from", optarg,
1473 scale_from_args, scale_from_types);
1474 break;
1476 case FROM_UNIT_OPTION:
1477 from_unit_size = unit_to_umax (optarg);
1478 break;
1480 case TO_OPTION:
1481 scale_to =
1482 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1483 break;
1485 case TO_UNIT_OPTION:
1486 to_unit_size = unit_to_umax (optarg);
1487 break;
1489 case ROUND_OPTION:
1490 round_style = XARGMATCH ("--round", optarg, round_args, round_types);
1491 break;
1493 case GROUPING_OPTION:
1494 grouping = 1;
1495 break;
1497 case PADDING_OPTION:
1498 if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
1499 || padding_width == 0)
1500 die (EXIT_FAILURE, 0, _("invalid padding value %s"),
1501 quote (optarg));
1502 if (padding_width < 0)
1504 padding_alignment = MBS_ALIGN_LEFT;
1505 padding_width = -padding_width;
1507 /* TODO: We probably want to apply a specific --padding
1508 to --header lines too. */
1509 break;
1511 case FIELD_OPTION:
1512 if (n_frp)
1513 die (EXIT_FAILURE, 0, _("multiple field specifications"));
1514 set_fields (optarg, SETFLD_ALLOW_DASH);
1515 break;
1517 case 'd':
1518 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1519 if (optarg[0] != '\0' && optarg[1] != '\0')
1520 die (EXIT_FAILURE, 0,
1521 _("the delimiter must be a single character"));
1522 delimiter = optarg[0];
1523 break;
1525 case 'z':
1526 line_delim = '\0';
1527 break;
1529 case SUFFIX_OPTION:
1530 suffix = optarg;
1531 break;
1533 case DEBUG_OPTION:
1534 debug = true;
1535 break;
1537 case DEV_DEBUG_OPTION:
1538 dev_debug = true;
1539 debug = true;
1540 break;
1542 case HEADER_OPTION:
1543 if (optarg)
1545 if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
1546 || header == 0)
1547 die (EXIT_FAILURE, 0, _("invalid header value %s"),
1548 quote (optarg));
1550 else
1552 header = 1;
1554 break;
1556 case FORMAT_OPTION:
1557 format_str = optarg;
1558 break;
1560 case INVALID_OPTION:
1561 inval_style = XARGMATCH ("--invalid", optarg,
1562 inval_args, inval_types);
1563 break;
1565 case_GETOPT_HELP_CHAR;
1566 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1568 default:
1569 usage (EXIT_FAILURE);
1573 if (format_str != NULL && grouping)
1574 die (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1576 if (debug && ! locale_ok)
1577 error (0, 0, _("failed to set locale"));
1579 /* Warn about no-op. */
1580 if (debug && scale_from == scale_none && scale_to == scale_none
1581 && !grouping && (padding_width == 0) && (format_str == NULL))
1582 error (0, 0, _("no conversion option specified"));
1584 if (format_str)
1585 parse_format_string (format_str);
1587 if (grouping)
1589 if (scale_to != scale_none)
1590 die (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1591 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1592 error (0, 0, _("grouping has no effect in this locale"));
1596 setup_padding_buffer (padding_width);
1597 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1599 if (inval_style != inval_abort)
1600 conv_exit_code = 0;
1602 if (argc > optind)
1604 if (debug && header)
1605 error (0, 0, _("--header ignored with command-line input"));
1607 for (; optind < argc; optind++)
1608 valid_numbers &= process_line (argv[optind], true);
1610 else
1612 char *line = NULL;
1613 size_t line_allocated = 0;
1614 ssize_t len;
1616 while (header-- && getdelim (&line, &line_allocated,
1617 line_delim, stdin) > 0)
1618 fputs (line, stdout);
1620 while ((len = getdelim (&line, &line_allocated,
1621 line_delim, stdin)) > 0)
1623 bool newline = line[len - 1] == line_delim;
1624 if (newline)
1625 line[len - 1] = '\0';
1626 valid_numbers &= process_line (line, newline);
1629 IF_LINT (free (line));
1631 if (ferror (stdin))
1632 error (0, errno, _("error reading input"));
1635 #ifdef lint
1636 free (padding_buffer);
1637 free (format_str_prefix);
1638 free (format_str_suffix);
1639 reset_fields ();
1640 #endif
1642 if (debug && !valid_numbers)
1643 error (0, 0, _("failed to convert some of the input numbers"));
1645 int exit_status = EXIT_SUCCESS;
1646 if (!valid_numbers
1647 && inval_style != inval_warn && inval_style != inval_ignore)
1648 exit_status = EXIT_CONVERSION_WARNINGS;
1650 return exit_status;