maint: consolidate developer debug messages
[coreutils.git] / src / numfmt.c
blobf7c8e5eded6528f715de554c0ed08e754024e2b5
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <float.h>
19 #include <getopt.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <langinfo.h>
24 #include "mbsalign.h"
25 #include "argmatch.h"
26 #include "error.h"
27 #include "quote.h"
28 #include "system.h"
29 #include "xstrtol.h"
30 #include "xstrndup.h"
32 /* The official name of this program (e.g., no 'g' prefix). */
33 #define PROGRAM_NAME "numfmt"
35 #define AUTHORS proper_name ("Assaf Gordon")
37 /* Exit code when some numbers fail to convert. */
38 enum { EXIT_CONVERSION_WARNINGS = 2 };
40 enum
42 FROM_OPTION = CHAR_MAX + 1,
43 FROM_UNIT_OPTION,
44 TO_OPTION,
45 TO_UNIT_OPTION,
46 ROUND_OPTION,
47 SUFFIX_OPTION,
48 GROUPING_OPTION,
49 PADDING_OPTION,
50 FIELD_OPTION,
51 DEBUG_OPTION,
52 DEV_DEBUG_OPTION,
53 HEADER_OPTION,
54 FORMAT_OPTION,
55 INVALID_OPTION
58 enum scale_type
60 scale_none, /* the default: no scaling. */
61 scale_auto, /* --from only. */
62 scale_SI,
63 scale_IEC,
64 scale_IEC_I /* 'i' suffix is required. */
67 static char const *const scale_from_args[] =
69 "none", "auto", "si", "iec", "iec-i", NULL
72 static enum scale_type const scale_from_types[] =
74 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
77 static char const *const scale_to_args[] =
79 "none", "si", "iec", "iec-i", NULL
82 static enum scale_type const scale_to_types[] =
84 scale_none, scale_SI, scale_IEC, scale_IEC_I
88 enum round_type
90 round_ceiling,
91 round_floor,
92 round_from_zero,
93 round_to_zero,
94 round_nearest,
97 static char const *const round_args[] =
99 "up", "down", "from-zero", "towards-zero", "nearest", NULL
102 static enum round_type const round_types[] =
104 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
108 enum inval_type
110 inval_abort,
111 inval_fail,
112 inval_warn,
113 inval_ignore
116 static char const *const inval_args[] =
118 "abort", "fail", "warn", "ignore", NULL
121 static enum inval_type const inval_types[] =
123 inval_abort, inval_fail, inval_warn, inval_ignore
126 static struct option const longopts[] =
128 {"from", required_argument, NULL, FROM_OPTION},
129 {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
130 {"to", required_argument, NULL, TO_OPTION},
131 {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
132 {"round", required_argument, NULL, ROUND_OPTION},
133 {"padding", required_argument, NULL, PADDING_OPTION},
134 {"suffix", required_argument, NULL, SUFFIX_OPTION},
135 {"grouping", no_argument, NULL, GROUPING_OPTION},
136 {"delimiter", required_argument, NULL, 'd'},
137 {"field", required_argument, NULL, FIELD_OPTION},
138 {"debug", no_argument, NULL, DEBUG_OPTION},
139 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
140 {"header", optional_argument, NULL, HEADER_OPTION},
141 {"format", required_argument, NULL, FORMAT_OPTION},
142 {"invalid", required_argument, NULL, INVALID_OPTION},
143 {GETOPT_HELP_OPTION_DECL},
144 {GETOPT_VERSION_OPTION_DECL},
145 {NULL, 0, NULL, 0}
148 /* If delimiter has this value, blanks separate fields. */
149 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
151 /* Maximum number of digits we can safely handle
152 without precision loss, if scaling is 'none'. */
153 enum { MAX_UNSCALED_DIGITS = 18 };
155 /* Maximum number of digits we can work with.
156 This is equivalent to 999Y.
157 NOTE: 'long double' can handle more than that, but there's
158 no official suffix assigned beyond Yotta (1000^8). */
159 enum { MAX_ACCEPTABLE_DIGITS = 27 };
161 static enum scale_type scale_from = scale_none;
162 static enum scale_type scale_to = scale_none;
163 static enum round_type _round = round_from_zero;
164 static enum inval_type _invalid = inval_abort;
165 static const char *suffix = NULL;
166 static uintmax_t from_unit_size = 1;
167 static uintmax_t to_unit_size = 1;
168 static int grouping = 0;
169 static char *padding_buffer = NULL;
170 static size_t padding_buffer_size = 0;
171 static long int padding_width = 0;
172 static const char *format_str = NULL;
173 static char *format_str_prefix = NULL;
174 static char *format_str_suffix = NULL;
176 /* By default, any conversion error will terminate the program. */
177 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
180 /* auto-pad each line based on skipped whitespace. */
181 static int auto_padding = 0;
182 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
183 static long int field = 1;
184 static int delimiter = DELIMITER_DEFAULT;
186 /* if non-zero, the first 'header' lines from STDIN are skipped. */
187 static uintmax_t header = 0;
189 /* Debug for users: print warnings to STDERR about possible
190 error (similar to sort's debug). */
191 static bool debug;
193 /* debugging for developers. Enables devmsg(). */
194 bool dev_debug = false;
196 /* will be set according to the current locale. */
197 static const char *decimal_point;
198 static int decimal_point_length;
201 static inline int
202 default_scale_base (enum scale_type scale)
204 switch (scale)
206 case scale_IEC:
207 case scale_IEC_I:
208 return 1024;
210 case scale_none:
211 case scale_auto:
212 case scale_SI:
213 default:
214 return 1000;
218 static inline int
219 valid_suffix (const char suf)
221 static const char *valid_suffixes = "KMGTPEZY";
222 return (strchr (valid_suffixes, suf) != NULL);
225 static inline int
226 suffix_power (const char suf)
228 switch (suf)
230 case 'K': /* kilo or kibi. */
231 return 1;
233 case 'M': /* mega or mebi. */
234 return 2;
236 case 'G': /* giga or gibi. */
237 return 3;
239 case 'T': /* tera or tebi. */
240 return 4;
242 case 'P': /* peta or pebi. */
243 return 5;
245 case 'E': /* exa or exbi. */
246 return 6;
248 case 'Z': /* zetta or 2**70. */
249 return 7;
251 case 'Y': /* yotta or 2**80. */
252 return 8;
254 default: /* should never happen. assert? */
255 return 0;
259 static inline const char *
260 suffix_power_character (unsigned int power)
262 switch (power)
264 case 0:
265 return "";
267 case 1:
268 return "K";
270 case 2:
271 return "M";
273 case 3:
274 return "G";
276 case 4:
277 return "T";
279 case 5:
280 return "P";
282 case 6:
283 return "E";
285 case 7:
286 return "Z";
288 case 8:
289 return "Y";
291 default:
292 return "(error)";
296 /* Similar to 'powl(3)' but without requiring 'libm'. */
297 static long double
298 powerld (long double base, unsigned int x)
300 long double result = base;
301 if (x == 0)
302 return 1; /* note for test coverage: this is never
303 reached, as 'powerld' won't be called if
304 there's no suffix, hence, no "power". */
306 /* TODO: check for overflow, inf? */
307 while (--x)
308 result *= base;
309 return result;
312 /* Similar to 'fabs(3)' but without requiring 'libm'. */
313 static inline long double
314 absld (long double val)
316 return val < 0 ? -val : val;
319 /* Scale down 'val', returns 'updated val' and 'x', such that
320 val*base^X = original val
321 Similar to "frexpl(3)" but without requiring 'libm',
322 allowing only integer scale, limited functionality and error checking. */
323 static long double
324 expld (long double val, unsigned int base, unsigned int /*output */ *x)
326 unsigned int power = 0;
328 if (val >= -LDBL_MAX && val <= LDBL_MAX)
330 while (absld (val) >= base)
332 ++power;
333 val /= base;
336 if (x)
337 *x = power;
338 return val;
341 /* EXTREMELY limited 'ceil' - without 'libm'.
342 Assumes values that fit in intmax_t. */
343 static inline intmax_t
344 simple_round_ceiling (long double val)
346 intmax_t intval = val;
347 if (intval < val)
348 intval++;
349 return intval;
352 /* EXTREMELY limited 'floor' - without 'libm'.
353 Assumes values that fit in intmax_t. */
354 static inline intmax_t
355 simple_round_floor (long double val)
357 return -simple_round_ceiling (-val);
360 /* EXTREMELY limited 'round away from zero'.
361 Assumes values that fit in intmax_t. */
362 static inline intmax_t
363 simple_round_from_zero (long double val)
365 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
368 /* EXTREMELY limited 'round away to zero'.
369 Assumes values that fit in intmax_t. */
370 static inline intmax_t
371 simple_round_to_zero (long double val)
373 return val;
376 /* EXTREMELY limited 'round' - without 'libm'.
377 Assumes values that fit in intmax_t. */
378 static inline intmax_t
379 simple_round_nearest (long double val)
381 return val < 0 ? val - 0.5 : val + 0.5;
384 static inline intmax_t
385 simple_round (long double val, enum round_type t)
387 switch (t)
389 case round_ceiling:
390 return simple_round_ceiling (val);
392 case round_floor:
393 return simple_round_floor (val);
395 case round_from_zero:
396 return simple_round_from_zero (val);
398 case round_to_zero:
399 return simple_round_to_zero (val);
401 case round_nearest:
402 return simple_round_nearest (val);
404 default:
405 /* to silence the compiler - this should never happen. */
406 return 0;
410 enum simple_strtod_error
412 SSE_OK = 0,
413 SSE_OK_PRECISION_LOSS,
414 SSE_OVERFLOW,
415 SSE_INVALID_NUMBER,
417 /* the following are returned by 'simple_strtod_human'. */
418 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
419 SSE_INVALID_SUFFIX,
420 SSE_MISSING_I_SUFFIX
423 /* Read an *integer* INPUT_STR,
424 but return the integer value in a 'long double' VALUE
425 hence, no UINTMAX_MAX limitation.
426 NEGATIVE is updated, and is stored separately from the VALUE
427 so that signbit() isn't required to determine the sign of -0..
428 ENDPTR is required (unlike strtod) and is used to store a pointer
429 to the character after the last character used in the conversion.
431 Note locale'd grouping is not supported,
432 nor is skipping of white-space supported.
434 Returns:
435 SSE_OK - valid number.
436 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
437 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
438 SSE_INVALID_NUMBER - if no digits were found. */
439 static enum simple_strtod_error
440 simple_strtod_int (const char *input_str,
441 char **endptr, long double *value, bool *negative)
443 enum simple_strtod_error e = SSE_OK;
445 long double val = 0;
446 unsigned int digits = 0;
448 if (*input_str == '-')
450 input_str++;
451 *negative = true;
453 else
454 *negative = false;
456 *endptr = (char *) input_str;
457 while (*endptr && isdigit (**endptr))
459 int digit = (**endptr) - '0';
461 /* can this happen in some strange locale? */
462 if (digit < 0 || digit > 9)
463 return SSE_INVALID_NUMBER;
465 if (digits > MAX_UNSCALED_DIGITS)
466 e = SSE_OK_PRECISION_LOSS;
468 ++digits;
469 if (digits > MAX_ACCEPTABLE_DIGITS)
470 return SSE_OVERFLOW;
472 val *= 10;
473 val += digit;
475 ++(*endptr);
477 if (digits == 0)
478 return SSE_INVALID_NUMBER;
479 if (*negative)
480 val = -val;
482 if (value)
483 *value = val;
485 return e;
488 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
489 and return the value in a 'long double' VALUE.
490 ENDPTR is required (unlike strtod) and is used to store a pointer
491 to the character after the last character used in the conversion.
492 PRECISION is optional and used to indicate fractions are present.
494 Note locale'd grouping is not supported,
495 nor is skipping of white-space supported.
497 Returns:
498 SSE_OK - valid number.
499 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
500 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
501 SSE_INVALID_NUMBER - if no digits were found. */
502 static enum simple_strtod_error
503 simple_strtod_float (const char *input_str,
504 char **endptr,
505 long double *value,
506 size_t *precision)
508 bool negative;
509 enum simple_strtod_error e = SSE_OK;
511 if (precision)
512 *precision = 0;
514 /* TODO: accept locale'd grouped values for the integral part. */
515 e = simple_strtod_int (input_str, endptr, value, &negative);
516 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
517 return e;
520 /* optional decimal point + fraction. */
521 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
523 char *ptr2;
524 long double val_frac = 0;
525 bool neg_frac;
527 (*endptr) += decimal_point_length;
528 enum simple_strtod_error e2 =
529 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
530 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
531 return e2;
532 if (e2 == SSE_OK_PRECISION_LOSS)
533 e = e2; /* propagate warning. */
534 if (neg_frac)
535 return SSE_INVALID_NUMBER;
537 /* number of digits in the fractions. */
538 size_t exponent = ptr2 - *endptr;
540 val_frac = ((long double) val_frac) / powerld (10, exponent);
542 if (value)
544 if (negative)
545 *value -= val_frac;
546 else
547 *value += val_frac;
550 if (precision)
551 *precision = exponent;
553 *endptr = ptr2;
555 return e;
558 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
559 and return the value in a 'long double' VALUE,
560 with the precision of the input returned in PRECISION.
561 ENDPTR is required (unlike strtod) and is used to store a pointer
562 to the character after the last character used in the conversion.
563 ALLOWED_SCALING determines the scaling supported.
565 TODO:
566 support locale'd grouping
567 accept scentific and hex floats (probably use strtold directly)
569 Returns:
570 SSE_OK - valid number.
571 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
572 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
573 SSE_INVALID_NUMBER - if no digits were found.
574 SSE_VALID_BUT_FORBIDDEN_SUFFIX
575 SSE_INVALID_SUFFIX
576 SSE_MISSING_I_SUFFIX */
577 static enum simple_strtod_error
578 simple_strtod_human (const char *input_str,
579 char **endptr, long double *value, size_t *precision,
580 enum scale_type allowed_scaling)
582 int power = 0;
583 /* 'scale_auto' is checked below. */
584 int scale_base = default_scale_base (allowed_scaling);
586 devmsg ("simple_strtod_human:\n input string: '%s'\n "
587 "locale decimal-point: '%s'\n", input_str, decimal_point);
589 enum simple_strtod_error e =
590 simple_strtod_float (input_str, endptr, value, precision);
591 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
592 return e;
594 devmsg (" parsed numeric value: %Lf\n"
595 " input precision = %d\n", *value, (int)*precision);
597 if (**endptr != '\0')
599 /* process suffix. */
601 /* Skip any blanks between the number and suffix. */
602 while (isblank (**endptr))
603 (*endptr)++;
605 if (!valid_suffix (**endptr))
606 return SSE_INVALID_SUFFIX;
608 if (allowed_scaling == scale_none)
609 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
611 power = suffix_power (**endptr);
612 (*endptr)++; /* skip first suffix character. */
614 if (allowed_scaling == scale_auto && **endptr == 'i')
616 /* auto-scaling enabled, and the first suffix character
617 is followed by an 'i' (e.g. Ki, Mi, Gi). */
618 scale_base = 1024;
619 (*endptr)++; /* skip second ('i') suffix character. */
620 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
621 scale_base);
624 *precision = 0; /* Reset, to select precision based on scale. */
627 if (allowed_scaling == scale_IEC_I)
629 if (**endptr == 'i')
630 (*endptr)++;
631 else
632 return SSE_MISSING_I_SUFFIX;
635 long double multiplier = powerld (scale_base, power);
637 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
639 /* TODO: detect loss of precision and overflows. */
640 (*value) = (*value) * multiplier;
642 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
644 return e;
648 static void
649 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
651 char const *msgid = NULL;
653 switch (err)
655 case SSE_OK_PRECISION_LOSS:
656 case SSE_OK:
657 /* should never happen - this function isn't called when OK. */
658 abort ();
660 case SSE_OVERFLOW:
661 msgid = N_("value too large to be converted: '%s'");
662 break;
664 case SSE_INVALID_NUMBER:
665 msgid = N_("invalid number: '%s'");
666 break;
668 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
669 msgid = N_("rejecting suffix in input: '%s' (consider using --from)");
670 break;
672 case SSE_INVALID_SUFFIX:
673 msgid = N_("invalid suffix in input: '%s'");
674 break;
676 case SSE_MISSING_I_SUFFIX:
677 msgid = N_("missing 'i' suffix in input: '%s' (e.g Ki/Mi/Gi)");
678 break;
682 if (_invalid != inval_ignore)
683 error (conv_exit_code, 0, gettext (msgid), input_str);
686 /* Convert VAL to a human format string in BUF. */
687 static void
688 double_to_human (long double val, int precision,
689 char *buf, size_t buf_size,
690 enum scale_type scale, int group, enum round_type round)
692 devmsg ("double_to_human:\n");
694 if (scale == scale_none)
696 val *= powerld (10, precision);
697 val = simple_round (val, round);
698 val /= powerld (10, precision);
700 devmsg ((group) ?
701 " no scaling, returning (grouped) value: %'.*Lf\n" :
702 " no scaling, returning value: %.*Lf\n", precision, val);
704 int i = snprintf (buf, buf_size, (group) ? "%'.*Lf" : "%.*Lf",
705 precision, val);
706 if (i < 0 || i >= (int) buf_size)
707 error (EXIT_FAILURE, 0,
708 _("failed to prepare value '%Lf' for printing"), val);
709 return;
712 /* Scaling requested by user. */
713 double scale_base = default_scale_base (scale);
715 /* Normalize val to scale. */
716 unsigned int power = 0;
717 val = expld (val, scale_base, &power);
718 devmsg (" scaled value to %Lf * %0.f ^ %d\n", val, scale_base, power);
720 /* Perform rounding. */
721 int ten_or_less = 0;
722 if (absld (val) < 10)
724 /* for values less than 10, we allow one decimal-point digit,
725 so adjust before rounding. */
726 ten_or_less = 1;
727 val *= 10;
729 val = simple_round (val, round);
730 /* two special cases after rounding:
731 1. a "999.99" can turn into 1000 - so scale down
732 2. a "9.99" can turn into 10 - so don't display decimal-point. */
733 if (absld (val) >= scale_base)
735 val /= scale_base;
736 power++;
738 if (ten_or_less)
739 val /= 10;
741 /* should "7.0" be printed as "7" ?
742 if removing the ".0" is preferred, enable the fourth condition. */
743 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
744 /* && (absld (val) > simple_round_floor (val))) */
746 devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power);
748 snprintf (buf, buf_size, (show_decimal_point) ? "%.1Lf%s" : "%.0Lf%s",
749 val, suffix_power_character (power));
751 if (scale == scale_IEC_I && power > 0)
752 strncat (buf, "i", buf_size - strlen (buf) - 1);
754 devmsg (" returning value: '%s'\n", buf);
756 return;
759 /* Convert a string of decimal digits, N_STRING, with an optional suffix
760 to an integral value. Upon successful conversion, return that value.
761 If it cannot be converted, give a diagnostic and exit. */
762 static uintmax_t
763 unit_to_umax (const char *n_string)
765 strtol_error s_err;
766 char *end = NULL;
767 uintmax_t n;
769 s_err = xstrtoumax (n_string, &end, 10, &n, "KMGTPEZY");
771 if (s_err != LONGINT_OK || *end || n == 0)
772 error (EXIT_FAILURE, 0, _("invalid unit size: '%s'"), n_string);
774 return n;
778 static void
779 setup_padding_buffer (size_t min_size)
781 if (padding_buffer_size > min_size)
782 return;
784 padding_buffer_size = min_size + 1;
785 padding_buffer = realloc (padding_buffer, padding_buffer_size);
786 if (!padding_buffer)
787 error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
788 padding_buffer_size);
791 void
792 usage (int status)
794 if (status != EXIT_SUCCESS)
795 emit_try_help ();
796 else
798 printf (_("\
799 Usage: %s [OPTIONS] [NUMBER]\n\
800 "), program_name);
801 fputs (_("\
802 Reformat NUMBER(s) from stdin or command arguments.\n\
803 "), stdout);
804 emit_mandatory_arg_note ();
805 fputs (_("\
806 --from=UNIT auto-scale input numbers to UNITs. Default is 'none'.\n\
807 See UNIT below.\n\
808 --from-unit=N specify the input unit size (instead of the default 1).\n\
809 --to=UNIT auto-scale output numbers to UNITs.\n\
810 See UNIT below.\n\
811 --to-unit=N the output unit size (instead of the default 1).\n\
812 --round=METHOD the rounding method to use when scaling. METHOD can be:\n\
813 up, down, from-zero (default), towards-zero, nearest\n\
814 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional SUFFIX\n\
815 in input numbers.\n\
816 --padding=N pad the output to N characters.\n\
817 Positive N will right-aligned. Negative N will left-align.\n\
818 Note: if the output is wider than N, padding is ignored.\n\
819 Default is to automatically pad if whitespace is found.\n\
820 --grouping group digits together (e.g. 1,000,000).\n\
821 Uses the locale-defined grouping (i.e. have no effect\n\
822 in C/POSIX locales).\n\
823 --header[=N] print (without converting) the first N header lines.\n\
824 N defaults to 1 if not specified.\n\
825 --field N replace the number in input field N (default is 1)\n\
826 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
827 --format=FORMAT use printf style floating-point FORMAT.\n\
828 See FORMAT below for details.\n\
829 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
830 abort (the default), fail, warn, ignore.\n\
831 --debug print warnings about invalid input.\n\
833 "), stdout);
834 fputs (HELP_OPTION_DESCRIPTION, stdout);
835 fputs (VERSION_OPTION_DESCRIPTION, stdout);
838 fputs (_("\
840 UNIT options:\n\
841 none No auto-scaling is done. Suffixes will trigger an error.\n\
842 auto Accept optional single-letter/two-letter suffix:\n\
843 1K = 1000\n\
844 1Ki = 1024\n\
845 1M = 1000000\n\
846 1Mi = 1048576\n\
847 si Accept optional single letter suffix:\n\
848 1K = 1000\n\
849 1M = 1000000\n\
850 ...\n\
851 iec Accept optional single letter suffix:\n\
852 1K = 1024\n\
853 1M = 1048576\n\
854 ...\n\
855 iec-i Accept optional two-letter suffix:\n\
856 1Ki = 1024\n\
857 1Mi = 1048576\n\
858 ...\n\
860 "), stdout);
862 fputs (_("\
864 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
865 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
866 Optional width value (%10f) will pad output. Optional negative width values\n\
867 (%-10f) will left-pad output.\n\
869 "), stdout);
871 printf (_("\
873 Exit status is 0 if all input numbers were successfully converted.\n\
874 By default, %s will stop at the first conversion error with exit status 2.\n\
875 With --invalid='fail' a warning is printed for each conversion error\n\
876 and the exit status is 2. With --invalid='warn' each conversion error is\n\
877 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
878 errors are not diagnosed and the exit status is 0.\n\
880 "), program_name);
884 printf (_("\
886 Examples:\n\
887 $ %s --to=si 1000\n\
888 -> \"1.0K\"\n\
889 $ %s --to=iec 2048\n\
890 -> \"2.0K\"\n\
891 $ %s --to=iec-i 4096\n\
892 -> \"4.0Ki\"\n\
893 $ echo 1K | %s --from=si\n\
894 -> \"1000\"\n\
895 $ echo 1K | %s --from=iec\n\
896 -> \"1024\"\n\
897 $ df | %s --header --field 2 --to=si\n\
898 $ ls -l | %s --header --field 5 --to=iec\n\
899 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
900 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n\
902 program_name, program_name, program_name,
903 program_name, program_name, program_name,
904 program_name, program_name, program_name);
905 emit_ancillary_info ();
907 exit (status);
910 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
911 1. padding (e.g. %20f)
912 2. alignment (e.g. %-20f)
913 3. grouping (e.g. %'f)
915 Only a limited subset of printf(3) syntax is supported.
917 TODO:
918 support .precision
919 support %e %g etc. rather than just %f
921 NOTES:
922 1. This function sets the global variables:
923 padding_width, padding_alignment, grouping,
924 format_str_prefix, format_str_suffix
925 2. The function aborts on any errors. */
926 static void
927 parse_format_string (char const *fmt)
929 size_t i;
930 size_t prefix_len = 0;
931 size_t suffix_pos;
932 long int pad = 0;
933 char *endptr = NULL;
935 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
937 if (!fmt[i])
938 error (EXIT_FAILURE, 0,
939 _("format %s has no %% directive"), quote (fmt));
940 prefix_len++;
943 i++;
944 i += strspn (fmt + i, " ");
945 if (fmt[i] == '\'')
947 grouping = 1;
948 i++;
950 i += strspn (fmt + i, " ");
951 errno = 0;
952 pad = strtol (fmt + i, &endptr, 10);
953 if (errno != 0)
954 error (EXIT_FAILURE, 0,
955 _("invalid format %s (width overflow)"), quote (fmt));
957 if (endptr != (fmt + i) && pad != 0)
959 if (pad < 0)
961 padding_alignment = MBS_ALIGN_LEFT;
962 padding_width = -pad;
964 else
966 padding_width = pad;
969 i = endptr - fmt;
971 if (fmt[i] == '\0')
972 error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
974 if (fmt[i] != 'f')
975 error (EXIT_FAILURE, 0, _("invalid format %s,"
976 " directive must be %%['][-][N]f"),
977 quote (fmt));
978 i++;
979 suffix_pos = i;
981 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
982 if (fmt[i] == '%' && fmt[i + 1] != '%')
983 error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
984 quote (fmt));
986 if (prefix_len)
988 format_str_prefix = xstrndup (fmt, prefix_len);
989 if (!format_str_prefix)
990 error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
991 prefix_len + 1);
993 if (fmt[suffix_pos] != '\0')
995 format_str_suffix = strdup (fmt + suffix_pos);
996 if (!format_str_suffix)
997 error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
998 strlen (fmt + suffix_pos));
1001 devmsg ("format String:\n input: %s\n grouping: %s\n"
1002 " padding width: %ld\n alignment: %s\n"
1003 " prefix: '%s'\n suffix: '%s'\n",
1004 quote (fmt), (grouping) ? "yes" : "no",
1005 padding_width,
1006 (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
1007 format_str_prefix, format_str_suffix);
1010 /* Parse a numeric value (with optional suffix) from a string.
1011 Returns a long double value, with input precision.
1013 If there's an error converting the string to value - exits with
1014 an error.
1016 If there are any trailing characters after the number
1017 (besides a valid suffix) - exits with an error. */
1018 static enum simple_strtod_error
1019 parse_human_number (const char *str, long double /*output */ *value,
1020 size_t *precision)
1022 char *ptr = NULL;
1024 enum simple_strtod_error e =
1025 simple_strtod_human (str, &ptr, value, precision, scale_from);
1026 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1028 simple_strtod_fatal (e, str);
1029 return e;
1032 if (ptr && *ptr != '\0')
1034 if (_invalid != inval_ignore)
1035 error (conv_exit_code, 0, _("invalid suffix in input '%s': '%s'"),
1036 str, ptr);
1037 e = SSE_INVALID_SUFFIX;
1039 return e;
1043 /* Print the given VAL, using the requested representation.
1044 The number is printed to STDOUT, with padding and alignment. */
1045 static int
1046 prepare_padded_number (const long double val, size_t precision)
1048 /* Generate Output. */
1049 char buf[128];
1051 /* Can't reliably print too-large values without auto-scaling. */
1052 unsigned int x;
1053 expld (val, 10, &x);
1054 if (scale_to == scale_none && x > MAX_UNSCALED_DIGITS)
1056 if (_invalid != inval_ignore)
1057 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1058 " (consider using --to)"), val);
1059 return 0;
1062 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1064 if (_invalid != inval_ignore)
1065 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1066 " (cannot handle values > 999Y)"), val);
1067 return 0;
1070 double_to_human (val, precision, buf, sizeof (buf), scale_to, grouping,
1071 _round);
1072 if (suffix)
1073 strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
1075 devmsg ("formatting output:\n value: %Lf\n humanized: '%s'\n", val, buf);
1077 if (padding_width && strlen (buf) < padding_width)
1079 size_t w = padding_width;
1080 mbsalign (buf, padding_buffer, padding_buffer_size, &w,
1081 padding_alignment, MBA_UNIBYTE_ONLY);
1083 devmsg (" After padding: '%s'\n", padding_buffer);
1085 else
1087 setup_padding_buffer (strlen (buf) + 1);
1088 strcpy (padding_buffer, buf);
1091 return 1;
1094 static void
1095 print_padded_number (void)
1097 if (format_str_prefix)
1098 fputs (format_str_prefix, stdout);
1100 fputs (padding_buffer, stdout);
1102 if (format_str_suffix)
1103 fputs (format_str_suffix, stdout);
1106 /* Converts the TEXT number string to the requested representation,
1107 and handles automatic suffix addition. */
1108 static int
1109 process_suffixed_number (char *text, long double *result, size_t *precision)
1111 if (suffix && strlen (text) > strlen (suffix))
1113 char *possible_suffix = text + strlen (text) - strlen (suffix);
1115 if (STREQ (suffix, possible_suffix))
1117 /* trim suffix, ONLY if it's at the end of the text. */
1118 *possible_suffix = '\0';
1119 devmsg ("trimming suffix '%s'\n", suffix);
1121 else
1122 devmsg ("no valid suffix found\n");
1125 /* Skip white space - always. */
1126 char *p = text;
1127 while (*p && isblank (*p))
1128 ++p;
1129 const unsigned int skip_count = text - p;
1131 /* setup auto-padding. */
1132 if (auto_padding)
1134 if (skip_count > 0 || field > 1)
1136 padding_width = strlen (text);
1137 setup_padding_buffer (padding_width);
1139 else
1141 padding_width = 0;
1143 devmsg ("setting Auto-Padding to %ld characters\n", padding_width);
1146 long double val = 0;
1147 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1148 if (e == SSE_OK_PRECISION_LOSS && debug)
1149 error (0, 0, _("large input value '%s': possible precision loss"), p);
1151 if (from_unit_size != 1 || to_unit_size != 1)
1152 val = (val * from_unit_size) / to_unit_size;
1154 *result = val;
1156 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1159 /* Skip the requested number of fields in the input string.
1160 Returns a pointer to the *delimiter* of the requested field,
1161 or a pointer to NUL (if reached the end of the string). */
1162 static inline char *
1163 __attribute ((pure))
1164 skip_fields (char *buf, int fields)
1166 char *ptr = buf;
1167 if (delimiter != DELIMITER_DEFAULT)
1169 if (*ptr == delimiter)
1170 fields--;
1171 while (*ptr && fields--)
1173 while (*ptr && *ptr == delimiter)
1174 ++ptr;
1175 while (*ptr && *ptr != delimiter)
1176 ++ptr;
1179 else
1180 while (*ptr && fields--)
1182 while (*ptr && isblank (*ptr))
1183 ++ptr;
1184 while (*ptr && !isblank (*ptr))
1185 ++ptr;
1187 return ptr;
1190 /* Parse a delimited string, and extracts the requested field.
1191 NOTE: the input buffer is modified.
1193 TODO:
1194 Maybe support multiple fields, though can always pipe output
1195 into another numfmt to process other fields.
1196 Maybe default to processing all fields rather than just first?
1198 Output:
1199 _PREFIX, _DATA, _SUFFIX will point to the relevant positions
1200 in the input string, or be NULL if such a part doesn't exist. */
1201 static void
1202 extract_fields (char *line, int _field,
1203 char ** _prefix, char ** _data, char ** _suffix)
1205 char *ptr = line;
1206 *_prefix = NULL;
1207 *_data = NULL;
1208 *_suffix = NULL;
1210 devmsg ("extracting Fields:\n input: '%s'\n field: %d\n", line, _field);
1212 if (field > 1)
1214 /* skip the requested number of fields. */
1215 *_prefix = line;
1216 ptr = skip_fields (line, field - 1);
1217 if (*ptr == '\0')
1219 /* not enough fields in the input - print warning? */
1220 devmsg (" TOO FEW FIELDS!\n prefix: '%s'\n", *_prefix);
1221 return;
1224 *ptr = '\0';
1225 ++ptr;
1228 *_data = ptr;
1229 *_suffix = skip_fields (*_data, 1);
1230 if (**_suffix)
1232 /* there is a suffix (i.e. the field is not the last on the line),
1233 so null-terminate the _data before it. */
1234 **_suffix = '\0';
1235 ++(*_suffix);
1237 else
1238 *_suffix = NULL;
1240 devmsg (" prefix: '%s'\n number: '%s'\n suffix: '%s'\n",
1241 *_prefix, *_data, *_suffix);
1245 /* Convert a number in a given line of text.
1246 NEWLINE specifies whether to output a '\n' for this "line". */
1247 static int
1248 process_line (char *line, bool newline)
1250 char *pre, *num, *suf;
1251 long double val = 0;
1252 size_t precision = 0;
1253 int valid_number = 0;
1255 extract_fields (line, field, &pre, &num, &suf);
1256 if (!num)
1257 if (_invalid != inval_ignore)
1258 error (conv_exit_code, 0, _("input line is too short, "
1259 "no numbers found to convert in field %ld"),
1260 field);
1262 if (num)
1264 valid_number = process_suffixed_number (num, &val, &precision);
1265 if (valid_number)
1266 valid_number = prepare_padded_number (val, precision);
1269 if (pre)
1270 fputs (pre, stdout);
1272 if (pre && num)
1273 fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
1275 if (valid_number)
1277 print_padded_number ();
1279 else
1281 if (num)
1282 fputs (num, stdout);
1285 if (suf)
1287 fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
1288 fputs (suf, stdout);
1291 if (newline)
1292 putchar ('\n');
1294 return valid_number;
1298 main (int argc, char **argv)
1300 int valid_numbers = 1;
1302 initialize_main (&argc, &argv);
1303 set_program_name (argv[0]);
1304 setlocale (LC_ALL, "");
1305 bindtextdomain (PACKAGE, LOCALEDIR);
1306 textdomain (PACKAGE);
1308 decimal_point = nl_langinfo (RADIXCHAR);
1309 if (decimal_point == NULL || strlen (decimal_point) == 0)
1310 decimal_point = ".";
1311 decimal_point_length = strlen (decimal_point);
1313 atexit (close_stdout);
1315 while (true)
1317 int c = getopt_long (argc, argv, "d:", longopts, NULL);
1319 if (c == -1)
1320 break;
1322 switch (c)
1324 case FROM_OPTION:
1325 scale_from = XARGMATCH ("--from", optarg,
1326 scale_from_args, scale_from_types);
1327 break;
1329 case FROM_UNIT_OPTION:
1330 from_unit_size = unit_to_umax (optarg);
1331 break;
1333 case TO_OPTION:
1334 scale_to =
1335 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1336 break;
1338 case TO_UNIT_OPTION:
1339 to_unit_size = unit_to_umax (optarg);
1340 break;
1342 case ROUND_OPTION:
1343 _round = XARGMATCH ("--round", optarg, round_args, round_types);
1344 break;
1346 case GROUPING_OPTION:
1347 grouping = 1;
1348 break;
1350 case PADDING_OPTION:
1351 if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
1352 || padding_width == 0)
1353 error (EXIT_FAILURE, 0, _("invalid padding value '%s'"), optarg);
1354 if (padding_width < 0)
1356 padding_alignment = MBS_ALIGN_LEFT;
1357 padding_width = -padding_width;
1359 /* TODO: We probably want to apply a specific --padding
1360 to --header lines too. */
1361 break;
1363 case FIELD_OPTION:
1364 if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
1365 || field <= 0)
1366 error (EXIT_FAILURE, 0, _("invalid field value '%s'"), optarg);
1367 break;
1369 case 'd':
1370 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1371 if (optarg[0] != '\0' && optarg[1] != '\0')
1372 error (EXIT_FAILURE, 0,
1373 _("the delimiter must be a single character"));
1374 delimiter = optarg[0];
1375 break;
1377 case SUFFIX_OPTION:
1378 suffix = optarg;
1379 break;
1381 case DEBUG_OPTION:
1382 debug = true;
1383 break;
1385 case DEV_DEBUG_OPTION:
1386 dev_debug = true;
1387 debug = true;
1388 break;
1390 case HEADER_OPTION:
1391 if (optarg)
1393 if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
1394 || header == 0)
1395 error (EXIT_FAILURE, 0, _("invalid header value '%s'"),
1396 optarg);
1398 else
1400 header = 1;
1402 break;
1404 case FORMAT_OPTION:
1405 format_str = optarg;
1406 break;
1408 case INVALID_OPTION:
1409 _invalid = XARGMATCH ("--invalid", optarg, inval_args, inval_types);
1410 break;
1412 case_GETOPT_HELP_CHAR;
1413 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1415 default:
1416 usage (EXIT_FAILURE);
1420 if (format_str != NULL && grouping)
1421 error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1422 if (format_str != NULL && padding_width > 0)
1423 error (EXIT_FAILURE, 0, _("--padding cannot be combined with --format"));
1425 /* Warn about no-op. */
1426 if (debug && scale_from == scale_none && scale_to == scale_none
1427 && !grouping && (padding_width == 0) && (format_str == NULL))
1428 error (0, 0, _("no conversion option specified"));
1430 if (format_str)
1431 parse_format_string (format_str);
1433 if (grouping)
1435 if (scale_to != scale_none)
1436 error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1437 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1438 error (0, 0, _("grouping has no effect in this locale"));
1442 setup_padding_buffer (padding_width);
1443 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1445 if (_invalid != inval_abort)
1446 conv_exit_code = 0;
1448 if (argc > optind)
1450 if (debug && header)
1451 error (0, 0, _("--header ignored with command-line input"));
1453 for (; optind < argc; optind++)
1454 valid_numbers &= process_line (argv[optind], true);
1456 else
1458 char *line = NULL;
1459 size_t line_allocated = 0;
1460 ssize_t len;
1462 while (header-- && getline (&line, &line_allocated, stdin) > 0)
1463 fputs (line, stdout);
1465 while ((len = getline (&line, &line_allocated, stdin)) > 0)
1467 bool newline = line[len - 1] == '\n';
1468 if (newline)
1469 line[len - 1] = '\0';
1470 valid_numbers &= process_line (line, newline);
1473 IF_LINT (free (line));
1475 if (ferror (stdin))
1476 error (0, errno, _("error reading input"));
1479 free (padding_buffer);
1480 free (format_str_prefix);
1481 free (format_str_suffix);
1484 if (debug && !valid_numbers)
1485 error (0, 0, _("failed to convert some of the input numbers"));
1487 int exit_status = EXIT_SUCCESS;
1488 if (!valid_numbers && _invalid != inval_warn && _invalid != inval_ignore)
1489 exit_status = EXIT_CONVERSION_WARNINGS;
1491 exit (exit_status);