src/od.c

   1 /* od -- dump files in octal and other formats
   2    Copyright (C) 1992-2024 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Jim Meyering.  */
  18
  19 #include <config.h>
  20
  21 #include <ctype.h>
  22 #include <float.h>
  23 #include <stdio.h>
  24 #include <getopt.h>
  25 #include <sys/types.h>
  26 #include "system.h"
  27 #include "argmatch.h"
  28 #include "assure.h"
  29 #include "ftoastr.h"
  30 #include "quote.h"
  31 #include "stat-size.h"
  32 #include "xbinary-io.h"
  33 #include "xprintf.h"
  34 #include "xstrtol.h"
  35 #include "xstrtol-error.h"
  36
  37 /* The official name of this program (e.g., no 'g' prefix).  */
  38 #define PROGRAM_NAME "od"
  39
  40 #define AUTHORS proper_name ("Jim Meyering")
  41
  42 /* The default number of input bytes per output line.  */
  43 #define DEFAULT_BYTES_PER_BLOCK 16
  44
  45 #if HAVE_UNSIGNED_LONG_LONG_INT
  46 typedef unsigned long long int unsigned_long_long_int;
  47 #else
  48 /* This is just a place-holder to avoid a few '#if' directives.
  49    In this case, the type isn't actually used.  */
  50 typedef unsigned long int unsigned_long_long_int;
  51 #endif
  52
  53 #if FLOAT16_SUPPORTED
  54   /* Available since clang 6 (2018), and gcc 7 (2017).  */
  55   typedef _Float16 float16;
  56 #else
  57 # define FLOAT16_SUPPORTED 0
  58   /* This is just a place-holder to avoid a few '#if' directives.
  59      In this case, the type isn't actually used.  */
  60   typedef float float16;
  61 #endif
  62
  63 #if BF16_SUPPORTED
  64   /* Available since clang 11 (2020), and gcc 13 (2023). */
  65   typedef __bf16 bfloat16;
  66 #else
  67 # define BF16_SUPPORTED 0
  68   /* This is just a place-holder to avoid a few '#if' directives.
  69      In this case, the type isn't actually used.  */
  70   typedef float bfloat16;
  71 #endif
  72
  73 enum size_spec
  74   {
  75     NO_SIZE,
  76     CHAR,
  77     SHORT,
  78     INT,
  79     LONG,
  80     LONG_LONG,
  81     /* FIXME: add INTMAX support, too */
  82     FLOAT_HALF,
  83     FLOAT_SINGLE,
  84     FLOAT_DOUBLE,
  85     FLOAT_LONG_DOUBLE,
  86     N_SIZE_SPECS
  87   };
  88
  89 enum output_format
  90   {
  91     SIGNED_DECIMAL,
  92     UNSIGNED_DECIMAL,
  93     OCTAL,
  94     HEXADECIMAL,
  95     FLOATING_POINT,
  96     HFLOATING_POINT,
  97     BFLOATING_POINT,
  98     NAMED_CHARACTER,
  99     CHARACTER
 100   };
 101
 102 #define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
 103
 104 /* The maximum number of bytes needed for a format string, including
 105    the trailing nul.  Each format string expects a variable amount of
 106    padding (guaranteed to be at least 1 plus the field width), then an
 107    element that will be formatted in the field.  */
 108 enum
 109   {
 110     FMT_BYTES_ALLOCATED =
 111            (sizeof "%*.99" + 1
 112             + MAX (sizeof "ld",
 113                    MAX (sizeof "jd",
 114                         MAX (sizeof "jd",
 115                              MAX (sizeof "ju",
 116                                   sizeof "jx")))))
 117   };
 118
 119 /* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable.  */
 120 static_assert (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99);
 121
 122 /* Each output format specification (from '-t spec' or from
 123    old-style options) is represented by one of these structures.  */
 124 struct tspec
 125   {
 126     enum output_format fmt;
 127     enum size_spec size; /* Type of input object.  */
 128     /* FIELDS is the number of fields per line, BLANK is the number of
 129        fields to leave blank.  WIDTH is width of one field, excluding
 130        leading space, and PAD is total pad to divide among FIELDS.
 131        PAD is at least as large as FIELDS.  */
 132     void (*print_function) (size_t fields, size_t blank, void const *data,
 133                             char const *fmt, int width, int pad);
 134     char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d".  */
 135     bool hexl_mode_trailer;
 136     int field_width; /* Minimum width of a field, excluding leading space.  */
 137     int pad_width; /* Total padding to be divided among fields.  */
 138   };
 139
 140 /* Convert the number of 8-bit bytes of a binary representation to
 141    the number of characters (digits + sign if the type is signed)
 142    required to represent the same quantity in the specified base/type.
 143    For example, a 32-bit (4-byte) quantity may require a field width
 144    as wide as the following for these types:
 145    11   unsigned octal
 146    11   signed decimal
 147    10   unsigned decimal
 148    8    unsigned hexadecimal  */
 149
 150 static char const bytes_to_oct_digits[] =
 151 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
 152
 153 static char const bytes_to_signed_dec_digits[] =
 154 {1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
 155
 156 static char const bytes_to_unsigned_dec_digits[] =
 157 {0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
 158
 159 static char const bytes_to_hex_digits[] =
 160 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
 161
 162 /* It'll be a while before we see integral types wider than 16 bytes,
 163    but if/when it happens, this check will catch it.  Without this check,
 164    a wider type would provoke a buffer overrun.  */
 165 static_assert (MAX_INTEGRAL_TYPE_SIZE
 166                < ARRAY_CARDINALITY (bytes_to_hex_digits));
 167
 168 /* Make sure the other arrays have the same length.  */
 169 static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_signed_dec_digits);
 170 static_assert (sizeof bytes_to_oct_digits
 171                == sizeof bytes_to_unsigned_dec_digits);
 172 static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_hex_digits);
 173
 174 /* Convert enum size_spec to the size of the named type.  */
 175 static const int width_bytes[] =
 176 {
 177   -1,
 178   sizeof (char),
 179   sizeof (short int),
 180   sizeof (int),
 181   sizeof (long int),
 182   sizeof (unsigned_long_long_int),
 183 #if BF16_SUPPORTED
 184   sizeof (bfloat16),
 185 #else
 186   sizeof (float16),
 187 #endif
 188   sizeof (float),
 189   sizeof (double),
 190   sizeof (long double)
 191 };
 192
 193 /* Ensure that for each member of 'enum size_spec' there is an
 194    initializer in the width_bytes array.  */
 195 static_assert (ARRAY_CARDINALITY (width_bytes) == N_SIZE_SPECS);
 196
 197 /* Names for some non-printing characters.  */
 198 static char const charname[33][4] =
 199 {
 200   "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
 201   "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
 202   "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
 203   "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
 204   "sp"
 205 };
 206
 207 /* Address base (8, 10 or 16).  */
 208 static int address_base;
 209
 210 /* The number of octal digits required to represent the largest
 211    address value.  */
 212 #define MAX_ADDRESS_LENGTH \
 213   ((sizeof (uintmax_t) * CHAR_BIT + CHAR_BIT - 1) / 3)
 214
 215 /* Width of a normal address.  */
 216 static int address_pad_len;
 217
 218 /* Minimum length when detecting --strings.  */
 219 static size_t string_min;
 220
 221 /* True when in --strings mode.  */
 222 static bool flag_dump_strings;
 223
 224 /* True if we should recognize the older non-option arguments
 225    that specified at most one file and optional arguments specifying
 226    offset and pseudo-start address.  */
 227 static bool traditional;
 228
 229 /* True if an old-style 'pseudo-address' was specified.  */
 230 static bool flag_pseudo_start;
 231
 232 /* The difference between the old-style pseudo starting address and
 233    the number of bytes to skip.  */
 234 static uintmax_t pseudo_offset;
 235
 236 /* Function that accepts an address and an optional following char,
 237    and prints the address and char to stdout.  */
 238 static void (*format_address) (uintmax_t, char);
 239
 240 /* The number of input bytes to skip before formatting and writing.  */
 241 static uintmax_t n_bytes_to_skip = 0;
 242
 243 /* When false, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
 244    input is formatted.  */
 245 static bool limit_bytes_to_format = false;
 246
 247 /* The maximum number of bytes that will be formatted.  */
 248 static uintmax_t max_bytes_to_format;
 249
 250 /* The offset of the first byte after the last byte to be formatted.  */
 251 static uintmax_t end_offset;
 252
 253 /* When true and two or more consecutive blocks are equal, format
 254    only the first block and output an asterisk alone on the following
 255    line to indicate that identical blocks have been elided.  */
 256 static bool abbreviate_duplicate_blocks = true;
 257
 258 /* An array of specs describing how to format each input block.  */
 259 static struct tspec *spec;
 260
 261 /* The number of format specs.  */
 262 static size_t n_specs;
 263
 264 /* The allocated length of SPEC.  */
 265 static size_t n_specs_allocated;
 266
 267 /* The number of input bytes formatted per output line.  It must be
 268    a multiple of the least common multiple of the sizes associated with
 269    the specified output types.  It should be as large as possible, but
 270    no larger than 16 -- unless specified with the -w option.  */
 271 static size_t bytes_per_block;
 272
 273 /* Human-readable representation of *file_list (for error messages).
 274    It differs from file_list[-1] only when file_list[-1] is "-".  */
 275 static char const *input_filename;
 276
 277 /* A null-terminated list of the file-arguments from the command line.  */
 278 static char const *const *file_list;
 279
 280 /* Initializer for file_list if no file-arguments
 281    were specified on the command line.  */
 282 static char const *const default_file_list[] = {"-", nullptr};
 283
 284 /* The input stream associated with the current file.  */
 285 static FILE *in_stream;
 286
 287 /* If true, at least one of the files we read was standard input.  */
 288 static bool have_read_stdin;
 289
 290 /* Map the size in bytes to a type identifier.  */
 291 static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
 292
 293 #define MAX_FP_TYPE_SIZE sizeof (long double)
 294 static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
 295
 296 #ifndef WORDS_BIGENDIAN
 297 # define WORDS_BIGENDIAN 0
 298 #endif
 299
 300 /* Use native endianness by default.  */
 301 static bool input_swap;
 302
 303 static char const short_options[] = "A:aBbcDdeFfHhIij:LlN:OoS:st:vw::Xx";
 304
 305 /* For long options that have no equivalent short option, use a
 306    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 307 enum
 308 {
 309   TRADITIONAL_OPTION = CHAR_MAX + 1,
 310   ENDIAN_OPTION,
 311 };
 312
 313 enum endian_type
 314 {
 315   endian_little,
 316   endian_big
 317 };
 318
 319 static char const *const endian_args[] =
 320 {
 321   "little", "big", nullptr
 322 };
 323
 324 static enum endian_type const endian_types[] =
 325 {
 326   endian_little, endian_big
 327 };
 328
 329 static struct option const long_options[] =
 330 {
 331   {"skip-bytes", required_argument, nullptr, 'j'},
 332   {"address-radix", required_argument, nullptr, 'A'},
 333   {"read-bytes", required_argument, nullptr, 'N'},
 334   {"format", required_argument, nullptr, 't'},
 335   {"output-duplicates", no_argument, nullptr, 'v'},
 336   {"strings", optional_argument, nullptr, 'S'},
 337   {"traditional", no_argument, nullptr, TRADITIONAL_OPTION},
 338   {"width", optional_argument, nullptr, 'w'},
 339   {"endian", required_argument, nullptr, ENDIAN_OPTION },
 340
 341   {GETOPT_HELP_OPTION_DECL},
 342   {GETOPT_VERSION_OPTION_DECL},
 343   {nullptr, 0, nullptr, 0}
 344 };
 345
 346 void
 347 usage (int status)
 348 {
 349   if (status != EXIT_SUCCESS)
 350     emit_try_help ();
 351   else
 352     {
 353       printf (_("\
 354 Usage: %s [OPTION]... [FILE]...\n\
 355   or:  %s [-abcdfilosx]... [FILE] [[+]OFFSET[.][b]]\n\
 356   or:  %s --traditional [OPTION]... [FILE] [[+]OFFSET[.][b] [+][LABEL][.][b]]\n\
 357 "),
 358               program_name, program_name, program_name);
 359       fputs (_("\n\
 360 Write an unambiguous representation, octal bytes by default,\n\
 361 of FILE to standard output.  With more than one FILE argument,\n\
 362 concatenate them in the listed order to form the input.\n\
 363 "), stdout);
 364
 365       emit_stdin_note ();
 366
 367       fputs (_("\
 368 \n\
 369 If first and second call formats both apply, the second format is assumed\n\
 370 if the last operand begins with + or (if there are 2 operands) a digit.\n\
 371 An OFFSET operand means -j OFFSET.  LABEL is the pseudo-address\n\
 372 at first byte printed, incremented when dump is progressing.\n\
 373 For OFFSET and LABEL, a 0x or 0X prefix indicates hexadecimal;\n\
 374 suffixes may be . for octal and b for multiply by 512.\n\
 375 "), stdout);
 376
 377       emit_mandatory_arg_note ();
 378
 379       fputs (_("\
 380   -A, --address-radix=RADIX   output format for file offsets; RADIX is one\n\
 381                                 of [doxn], for Decimal, Octal, Hex or None\n\
 382       --endian={big|little}   swap input bytes according the specified order\n\
 383   -j, --skip-bytes=BYTES      skip BYTES input bytes first\n\
 384 "), stdout);
 385       fputs (_("\
 386   -N, --read-bytes=BYTES      limit dump to BYTES input bytes\n\
 387   -S BYTES, --strings[=BYTES]  show only NUL terminated strings\n\
 388                                 of at least BYTES (3) printable characters\n\
 389   -t, --format=TYPE           select output format or formats\n\
 390   -v, --output-duplicates     do not use * to mark line suppression\n\
 391   -w[BYTES], --width[=BYTES]  output BYTES bytes per output line;\n\
 392                                 32 is implied when BYTES is not specified\n\
 393       --traditional           accept arguments in third form above\n\
 394 "), stdout);
 395       fputs (HELP_OPTION_DESCRIPTION, stdout);
 396       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 397       fputs (_("\
 398 \n\
 399 \n\
 400 Traditional format specifications may be intermixed; they accumulate:\n\
 401   -a   same as -t a,  select named characters, ignoring high-order bit\n\
 402   -b   same as -t o1, select octal bytes\n\
 403   -c   same as -t c,  select printable characters or backslash escapes\n\
 404   -d   same as -t u2, select unsigned decimal 2-byte units\n\
 405 "), stdout);
 406       fputs (_("\
 407   -f   same as -t fF, select floats\n\
 408   -i   same as -t dI, select decimal ints\n\
 409   -l   same as -t dL, select decimal longs\n\
 410   -o   same as -t o2, select octal 2-byte units\n\
 411   -s   same as -t d2, select decimal 2-byte units\n\
 412   -x   same as -t x2, select hexadecimal 2-byte units\n\
 413 "), stdout);
 414       fputs (_("\
 415 \n\
 416 \n\
 417 TYPE is made up of one or more of these specifications:\n\
 418   a          named character, ignoring high-order bit\n\
 419   c          printable character or backslash escape\n\
 420 "), stdout);
 421       fputs (_("\
 422   d[SIZE]    signed decimal, SIZE bytes per integer\n\
 423   f[SIZE]    floating point, SIZE bytes per float\n\
 424   o[SIZE]    octal, SIZE bytes per integer\n\
 425   u[SIZE]    unsigned decimal, SIZE bytes per integer\n\
 426   x[SIZE]    hexadecimal, SIZE bytes per integer\n\
 427 "), stdout);
 428       fputs (_("\
 429 \n\
 430 SIZE is a number.  For TYPE in [doux], SIZE may also be C for\n\
 431 sizeof(char), S for sizeof(short), I for sizeof(int) or L for\n\
 432 sizeof(long).  If TYPE is f, SIZE may also be B for Brain 16 bit,\n\
 433 H for Half precision float, F for sizeof(float), D for sizeof(double),\n\
 434 or L for sizeof(long double).\n\
 435 "), stdout);
 436       fputs (_("\
 437 \n\
 438 Adding a z suffix to any type displays printable characters at the end of\n\
 439 each output line.\n\
 440 "), stdout);
 441       fputs (_("\
 442 \n\
 443 \n\
 444 BYTES is hex with 0x or 0X prefix, and may have a multiplier suffix:\n\
 445   b    512\n\
 446   KB   1000\n\
 447   K    1024\n\
 448   MB   1000*1000\n\
 449   M    1024*1024\n\
 450 and so on for G, T, P, E, Z, Y, R, Q.\n\
 451 Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
 452 "), stdout);
 453       emit_ancillary_info (PROGRAM_NAME);
 454     }
 455   exit (status);
 456 }
 457
 458 /* Define the print functions.  */
 459
 460 #define PRINT_FIELDS(N, T, FMT_STRING_DECL, ACTION)                     \
 461 static void                                                             \
 462 N (size_t fields, size_t blank, void const *block,                      \
 463    FMT_STRING_DECL, int width, int pad)                                 \
 464 {                                                                       \
 465   T const *p = block;                                                   \
 466   uintmax_t i;                                                          \
 467   int pad_remaining = pad;                                              \
 468   for (i = fields; blank < i; i--)                                      \
 469     {                                                                   \
 470       int next_pad = pad * (i - 1) / fields;                            \
 471       int adjusted_width = pad_remaining - next_pad + width;            \
 472       T x;                                                              \
 473       if (input_swap && sizeof (T) > 1)                                 \
 474         {                                                               \
 475           size_t j;                                                     \
 476           union {                                                       \
 477             T x;                                                        \
 478             char b[sizeof (T)];                                         \
 479           } u;                                                          \
 480           for (j = 0; j < sizeof (T); j++)                              \
 481             u.b[j] = ((char const *) p)[sizeof (T) - 1 - j];            \
 482           x = u.x;                                                      \
 483         }                                                               \
 484       else                                                              \
 485         x = *p;                                                         \
 486       p++;                                                              \
 487       ACTION;                                                           \
 488       pad_remaining = next_pad;                                         \
 489     }                                                                   \
 490 }
 491
 492 #define PRINT_TYPE(N, T)                                                \
 493   PRINT_FIELDS (N, T, char const *fmt_string,                           \
 494                 xprintf (fmt_string, adjusted_width, x))
 495
 496 #define PRINT_FLOATTYPE(N, T, FTOASTR, BUFSIZE)                         \
 497   PRINT_FIELDS (N, T, MAYBE_UNUSED char const *fmt_string,              \
 498                 char buf[BUFSIZE];                                      \
 499                 FTOASTR (buf, sizeof buf, 0, 0, x);                     \
 500                 xprintf ("%*s", adjusted_width, buf))
 501
 502 PRINT_TYPE (print_s_char, signed char)
 503 PRINT_TYPE (print_char, unsigned char)
 504 PRINT_TYPE (print_s_short, short int)
 505 PRINT_TYPE (print_short, unsigned short int)
 506 PRINT_TYPE (print_int, unsigned int)
 507 PRINT_TYPE (print_long, unsigned long int)
 508 PRINT_TYPE (print_long_long, unsigned_long_long_int)
 509
 510 PRINT_FLOATTYPE (print_bfloat, bfloat16, ftoastr, FLT_BUFSIZE_BOUND)
 511 PRINT_FLOATTYPE (print_halffloat, float16, ftoastr, FLT_BUFSIZE_BOUND)
 512 PRINT_FLOATTYPE (print_float, float, ftoastr, FLT_BUFSIZE_BOUND)
 513 PRINT_FLOATTYPE (print_double, double, dtoastr, DBL_BUFSIZE_BOUND)
 514 PRINT_FLOATTYPE (print_long_double, long double, ldtoastr, LDBL_BUFSIZE_BOUND)
 515
 516 #undef PRINT_TYPE
 517 #undef PRINT_FLOATTYPE
 518
 519 static void
 520 dump_hexl_mode_trailer (size_t n_bytes, char const *block)
 521 {
 522   fputs ("  >", stdout);
 523   for (size_t i = n_bytes; i > 0; i--)
 524     {
 525       unsigned char c = *block++;
 526       unsigned char c2 = (isprint (c) ? c : '.');
 527       putchar (c2);
 528     }
 529   putchar ('<');
 530 }
 531
 532 static void
 533 print_named_ascii (size_t fields, size_t blank, void const *block,
 534                    MAYBE_UNUSED char const *unused_fmt_string,
 535                    int width, int pad)
 536 {
 537   unsigned char const *p = block;
 538   uintmax_t i;
 539   int pad_remaining = pad;
 540   for (i = fields; blank < i; i--)
 541     {
 542       int next_pad = pad * (i - 1) / fields;
 543       int masked_c = *p++ & 0x7f;
 544       char const *s;
 545       char buf[2];
 546
 547       if (masked_c == 127)
 548         s = "del";
 549       else if (masked_c <= 040)
 550         s = charname[masked_c];
 551       else
 552         {
 553           buf[0] = masked_c;
 554           buf[1] = 0;
 555           s = buf;
 556         }
 557
 558       xprintf ("%*s", pad_remaining - next_pad + width, s);
 559       pad_remaining = next_pad;
 560     }
 561 }
 562
 563 static void
 564 print_ascii (size_t fields, size_t blank, void const *block,
 565              MAYBE_UNUSED char const *unused_fmt_string, int width,
 566              int pad)
 567 {
 568   unsigned char const *p = block;
 569   uintmax_t i;
 570   int pad_remaining = pad;
 571   for (i = fields; blank < i; i--)
 572     {
 573       int next_pad = pad * (i - 1) / fields;
 574       unsigned char c = *p++;
 575       char const *s;
 576       char buf[4];
 577
 578       switch (c)
 579         {
 580         case '\0':
 581           s = "\\0";
 582           break;
 583
 584         case '\a':
 585           s = "\\a";
 586           break;
 587
 588         case '\b':
 589           s = "\\b";
 590           break;
 591
 592         case '\f':
 593           s = "\\f";
 594           break;
 595
 596         case '\n':
 597           s = "\\n";
 598           break;
 599
 600         case '\r':
 601           s = "\\r";
 602           break;
 603
 604         case '\t':
 605           s = "\\t";
 606           break;
 607
 608         case '\v':
 609           s = "\\v";
 610           break;
 611
 612         default:
 613           sprintf (buf, (isprint (c) ? "%c" : "%03o"), c);
 614           s = buf;
 615         }
 616
 617       xprintf ("%*s", pad_remaining - next_pad + width, s);
 618       pad_remaining = next_pad;
 619     }
 620 }
 621
 622 /* Convert a null-terminated (possibly zero-length) string S to an
 623    int value.  If S points to a non-digit set *P to S,
 624    *VAL to 0, and return true.  Otherwise, accumulate the integer value of
 625    the string of digits.  If the string of digits represents a value
 626    larger than INT_MAX, don't modify *VAL or *P and return false.
 627    Otherwise, advance *P to the first non-digit after S, set *VAL to
 628    the result of the conversion and return true.  */
 629
 630 static bool
 631 simple_strtoi (char const *s, char const **p, int *val)
 632 {
 633   int sum;
 634
 635   for (sum = 0; ISDIGIT (*s); s++)
 636     if (ckd_mul (&sum, sum, 10) || ckd_add (&sum, sum, *s - '0'))
 637       return false;
 638   *p = s;
 639   *val = sum;
 640   return true;
 641 }
 642
 643 /* If S points to a single valid modern od format string, put
 644    a description of that format in *TSPEC, make *NEXT point at the
 645    character following the just-decoded format (if *NEXT is non-null),
 646    and return true.  If S is not valid, don't modify *NEXT or *TSPEC,
 647    give a diagnostic, and return false.  For example, if S were
 648    "d4afL" *NEXT would be set to "afL" and *TSPEC would be
 649      {
 650        fmt = SIGNED_DECIMAL;
 651        size = INT or LONG; (whichever integral_type_size[4] resolves to)
 652        print_function = print_int; (assuming size == INT)
 653        field_width = 11;
 654        fmt_string = "%*d";
 655       }
 656    pad_width is determined later, but is at least as large as the
 657    number of fields printed per row.
 658    S_ORIG is solely for reporting errors.  It should be the full format
 659    string argument.
 660    */
 661
 662 static bool ATTRIBUTE_NONNULL ()
 663 decode_one_format (char const *s_orig, char const *s, char const **next,
 664                    struct tspec *tspec)
 665 {
 666   enum size_spec size_spec;
 667   int size;
 668   enum output_format fmt;
 669   void (*print_function) (size_t, size_t, void const *, char const *,
 670                           int, int);
 671   char const *p;
 672   char c;
 673   int field_width;
 674
 675   switch (*s)
 676     {
 677     case 'd':
 678     case 'o':
 679     case 'u':
 680     case 'x':
 681       c = *s;
 682       ++s;
 683       switch (*s)
 684         {
 685         case 'C':
 686           ++s;
 687           size = sizeof (char);
 688           break;
 689
 690         case 'S':
 691           ++s;
 692           size = sizeof (short int);
 693           break;
 694
 695         case 'I':
 696           ++s;
 697           size = sizeof (int);
 698           break;
 699
 700         case 'L':
 701           ++s;
 702           size = sizeof (long int);
 703           break;
 704
 705         default:
 706           if (! simple_strtoi (s, &p, &size))
 707             {
 708               /* The integer at P in S would overflow an int.
 709                  A digit string that long is sufficiently odd looking
 710                  that the following diagnostic is sufficient.  */
 711               error (0, 0, _("invalid type string %s"), quote (s_orig));
 712               return false;
 713             }
 714           if (p == s)
 715             size = sizeof (int);
 716           else
 717             {
 718               if (MAX_INTEGRAL_TYPE_SIZE < size
 719                   || integral_type_size[size] == NO_SIZE)
 720                 {
 721                   error (0, 0, _("invalid type string %s;\nthis system"
 722                                  " doesn't provide a %d-byte integral type"),
 723                          quote (s_orig), size);
 724                   return false;
 725                 }
 726               s = p;
 727             }
 728           break;
 729         }
 730
 731 #define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format)      \
 732   ((Spec) == LONG_LONG ? (Max_format)                                   \
 733    : ((Spec) == LONG ? (Long_format)                                    \
 734       : (Min_format)))                                                  \
 735
 736       size_spec = integral_type_size[size];
 737
 738       switch (c)
 739         {
 740         case 'd':
 741           fmt = SIGNED_DECIMAL;
 742           field_width = bytes_to_signed_dec_digits[size];
 743           sprintf (tspec->fmt_string, "%%*%s",
 744                    ISPEC_TO_FORMAT (size_spec, "d", "ld", "jd"));
 745           break;
 746
 747         case 'o':
 748           fmt = OCTAL;
 749           sprintf (tspec->fmt_string, "%%*.%d%s",
 750                    (field_width = bytes_to_oct_digits[size]),
 751                    ISPEC_TO_FORMAT (size_spec, "o", "lo", "jo"));
 752           break;
 753
 754         case 'u':
 755           fmt = UNSIGNED_DECIMAL;
 756           field_width = bytes_to_unsigned_dec_digits[size];
 757           sprintf (tspec->fmt_string, "%%*%s",
 758                    ISPEC_TO_FORMAT (size_spec, "u", "lu", "ju"));
 759           break;
 760
 761         case 'x':
 762           fmt = HEXADECIMAL;
 763           sprintf (tspec->fmt_string, "%%*.%d%s",
 764                    (field_width = bytes_to_hex_digits[size]),
 765                    ISPEC_TO_FORMAT (size_spec, "x", "lx", "jx"));
 766           break;
 767
 768         default:
 769           unreachable ();
 770         }
 771
 772       switch (size_spec)
 773         {
 774         case CHAR:
 775           print_function = (fmt == SIGNED_DECIMAL
 776                             ? print_s_char
 777                             : print_char);
 778           break;
 779
 780         case SHORT:
 781           print_function = (fmt == SIGNED_DECIMAL
 782                             ? print_s_short
 783                             : print_short);
 784           break;
 785
 786         case INT:
 787           print_function = print_int;
 788           break;
 789
 790         case LONG:
 791           print_function = print_long;
 792           break;
 793
 794         case LONG_LONG:
 795           print_function = print_long_long;
 796           break;
 797
 798         default:
 799           affirm (false);
 800         }
 801       break;
 802
 803     case 'f':
 804       fmt = FLOATING_POINT;
 805       ++s;
 806       switch (*s)
 807         {
 808         case 'B':
 809           ++s;
 810           fmt = BFLOATING_POINT;
 811           size = sizeof (bfloat16);
 812           break;
 813
 814         case 'H':
 815           ++s;
 816           fmt = HFLOATING_POINT;
 817           size = sizeof (float16);
 818           break;
 819
 820         case 'F':
 821           ++s;
 822           size = sizeof (float);
 823           break;
 824
 825         case 'D':
 826           ++s;
 827           size = sizeof (double);
 828           break;
 829
 830         case 'L':
 831           ++s;
 832           size = sizeof (long double);
 833           break;
 834
 835         default:
 836           if (! simple_strtoi (s, &p, &size))
 837             {
 838               /* The integer at P in S would overflow an int.
 839                  A digit string that long is sufficiently odd looking
 840                  that the following diagnostic is sufficient.  */
 841               error (0, 0, _("invalid type string %s"), quote (s_orig));
 842               return false;
 843             }
 844           if (p == s)
 845             size = sizeof (double);
 846           else
 847             {
 848               if (size > MAX_FP_TYPE_SIZE
 849                   || fp_type_size[size] == NO_SIZE
 850                   || (! FLOAT16_SUPPORTED && BF16_SUPPORTED
 851                       && size == sizeof (bfloat16))
 852                   )
 853                 {
 854                   error (0, 0,
 855                          _("invalid type string %s;\n"
 856                            "this system doesn't provide a %d-byte"
 857                            " floating point type"),
 858                          quote (s_orig), size);
 859                   return false;
 860                 }
 861               s = p;
 862             }
 863           break;
 864         }
 865       size_spec = fp_type_size[size];
 866
 867       if ((! FLOAT16_SUPPORTED && fmt == HFLOATING_POINT)
 868           || (! BF16_SUPPORTED && fmt == BFLOATING_POINT))
 869       {
 870         error (0, 0,
 871                _("this system doesn't provide a %s floating point type"),
 872                quote (s_orig));
 873         return false;
 874       }
 875
 876       {
 877         struct lconv const *locale = localeconv ();
 878         size_t decimal_point_len =
 879           (locale->decimal_point[0] ? strlen (locale->decimal_point) : 1);
 880
 881         switch (size_spec)
 882           {
 883           case FLOAT_HALF:
 884             print_function = fmt == BFLOATING_POINT
 885                              ? print_bfloat : print_halffloat;
 886             field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
 887             break;
 888
 889           case FLOAT_SINGLE:
 890             print_function = print_float;
 891             field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
 892             break;
 893
 894           case FLOAT_DOUBLE:
 895             print_function = print_double;
 896             field_width = DBL_STRLEN_BOUND_L (decimal_point_len);
 897             break;
 898
 899           case FLOAT_LONG_DOUBLE:
 900             print_function = print_long_double;
 901             field_width = LDBL_STRLEN_BOUND_L (decimal_point_len);
 902             break;
 903
 904           default:
 905             affirm (false);
 906           }
 907
 908         break;
 909       }
 910
 911     case 'a':
 912       ++s;
 913       fmt = NAMED_CHARACTER;
 914       size_spec = CHAR;
 915       print_function = print_named_ascii;
 916       field_width = 3;
 917       break;
 918
 919     case 'c':
 920       ++s;
 921       fmt = CHARACTER;
 922       size_spec = CHAR;
 923       print_function = print_ascii;
 924       field_width = 3;
 925       break;
 926
 927     default:
 928       error (0, 0, _("invalid character '%c' in type string %s"),
 929              *s, quote (s_orig));
 930       return false;
 931     }
 932
 933   tspec->size = size_spec;
 934   tspec->fmt = fmt;
 935   tspec->print_function = print_function;
 936
 937   tspec->field_width = field_width;
 938   tspec->hexl_mode_trailer = (*s == 'z');
 939   if (tspec->hexl_mode_trailer)
 940     s++;
 941
 942   *next = s;
 943   return true;
 944 }
 945
 946 /* Given a list of one or more input filenames FILE_LIST, set the global
 947    file pointer IN_STREAM and the global string INPUT_FILENAME to the
 948    first one that can be successfully opened. Modify FILE_LIST to
 949    reference the next filename in the list.  A file name of "-" is
 950    interpreted as standard input.  If any file open fails, give an error
 951    message and return false.  */
 952
 953 static bool
 954 open_next_file (void)
 955 {
 956   bool ok = true;
 957
 958   do
 959     {
 960       input_filename = *file_list;
 961       if (input_filename == nullptr)
 962         return ok;
 963       ++file_list;
 964
 965       if (STREQ (input_filename, "-"))
 966         {
 967           input_filename = _("standard input");
 968           in_stream = stdin;
 969           have_read_stdin = true;
 970           xset_binary_mode (STDIN_FILENO, O_BINARY);
 971         }
 972       else
 973         {
 974           in_stream = fopen (input_filename, (O_BINARY ? "rb" : "r"));
 975           if (in_stream == nullptr)
 976             {
 977               error (0, errno, "%s", quotef (input_filename));
 978               ok = false;
 979             }
 980         }
 981     }
 982   while (in_stream == nullptr);
 983
 984   if (limit_bytes_to_format && !flag_dump_strings)
 985     setvbuf (in_stream, nullptr, _IONBF, 0);
 986
 987   return ok;
 988 }
 989
 990 /* Test whether there have been errors on in_stream, and close it if
 991    it is not standard input.  Return false if there has been an error
 992    on in_stream or stdout; return true otherwise.  This function will
 993    report more than one error only if both a read and a write error
 994    have occurred.  IN_ERRNO, if nonzero, is the error number
 995    corresponding to the most recent action for IN_STREAM.  */
 996
 997 static bool
 998 check_and_close (int in_errno)
 999 {
1000   bool ok = true;
1001
1002   if (in_stream != nullptr)
1003     {
1004       if (!ferror (in_stream))
1005         in_errno = 0;
1006       if (STREQ (file_list[-1], "-"))
1007         clearerr (in_stream);
1008       else if (fclose (in_stream) != 0 && !in_errno)
1009         in_errno = errno;
1010       if (in_errno)
1011         {
1012           error (0, in_errno, "%s", quotef (input_filename));
1013           ok = false;
1014         }
1015
1016       in_stream = nullptr;
1017     }
1018
1019   if (ferror (stdout))
1020     {
1021       error (0, 0, _("write error"));
1022       ok = false;
1023     }
1024
1025   return ok;
1026 }
1027
1028 /* Decode the modern od format string S.  Append the decoded
1029    representation to the global array SPEC, reallocating SPEC if
1030    necessary.  Return true if S is valid.  */
1031
1032 static bool ATTRIBUTE_NONNULL ()
1033 decode_format_string (char const *s)
1034 {
1035   char const *s_orig = s;
1036
1037   while (*s != '\0')
1038     {
1039       char const *next;
1040
1041       if (n_specs_allocated <= n_specs)
1042         spec = X2NREALLOC (spec, &n_specs_allocated);
1043
1044       if (! decode_one_format (s_orig, s, &next, &spec[n_specs]))
1045         return false;
1046
1047       affirm (s != next);
1048       s = next;
1049       ++n_specs;
1050     }
1051
1052   return true;
1053 }
1054
1055 /* Given a list of one or more input filenames FILE_LIST, set the global
1056    file pointer IN_STREAM to position N_SKIP in the concatenation of
1057    those files.  If any file operation fails or if there are fewer than
1058    N_SKIP bytes in the combined input, give an error message and return
1059    false.  When possible, use seek rather than read operations to
1060    advance IN_STREAM.  */
1061
1062 static bool
1063 skip (uintmax_t n_skip)
1064 {
1065   bool ok = true;
1066   int in_errno = 0;
1067
1068   if (n_skip == 0)
1069     return true;
1070
1071   while (in_stream != nullptr)  /* EOF.  */
1072     {
1073       struct stat file_stats;
1074
1075       /* First try seeking.  For large offsets, this extra work is
1076          worthwhile.  If the offset is below some threshold it may be
1077          more efficient to move the pointer by reading.  There are two
1078          issues when trying to seek:
1079            - the file must be seekable.
1080            - before seeking to the specified position, make sure
1081              that the new position is in the current file.
1082              Try to do that by getting file's size using fstat.
1083              But that will work only for regular files.  */
1084
1085       if (fstat (fileno (in_stream), &file_stats) == 0)
1086         {
1087           bool usable_size = usable_st_size (&file_stats);
1088
1089           /* The st_size field is valid for regular files.
1090              If the number of bytes left to skip is larger than
1091              the size of the current file, we can decrement n_skip
1092              and go on to the next file.  Skip this optimization also
1093              when st_size is no greater than the block size, because
1094              some kernels report nonsense small file sizes for
1095              proc-like file systems.  */
1096           if (usable_size && STP_BLKSIZE (&file_stats) < file_stats.st_size)
1097             {
1098               if ((uintmax_t) file_stats.st_size < n_skip)
1099                 n_skip -= file_stats.st_size;
1100               else
1101                 {
1102                   if (fseeko (in_stream, n_skip, SEEK_CUR) != 0)
1103                     {
1104                       in_errno = errno;
1105                       ok = false;
1106                     }
1107                   n_skip = 0;
1108                 }
1109             }
1110
1111           else if (!usable_size && fseeko (in_stream, n_skip, SEEK_CUR) == 0)
1112             n_skip = 0;
1113
1114           /* If it's not a regular file with nonnegative size,
1115              or if it's so small that it might be in a proc-like file system,
1116              position the file pointer by reading.  */
1117
1118           else
1119             {
1120               char buf[BUFSIZ];
1121               size_t n_bytes_read, n_bytes_to_read = BUFSIZ;
1122
1123               while (0 < n_skip)
1124                 {
1125                   if (n_skip < n_bytes_to_read)
1126                     n_bytes_to_read = n_skip;
1127                   n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream);
1128                   n_skip -= n_bytes_read;
1129                   if (n_bytes_read != n_bytes_to_read)
1130                     {
1131                       if (ferror (in_stream))
1132                         {
1133                           in_errno = errno;
1134                           ok = false;
1135                           n_skip = 0;
1136                           break;
1137                         }
1138                       if (feof (in_stream))
1139                         break;
1140                     }
1141                 }
1142             }
1143
1144           if (n_skip == 0)
1145             break;
1146         }
1147
1148       else   /* cannot fstat() file */
1149         {
1150           error (0, errno, "%s", quotef (input_filename));
1151           ok = false;
1152         }
1153
1154       ok &= check_and_close (in_errno);
1155
1156       ok &= open_next_file ();
1157     }
1158
1159   if (n_skip != 0)
1160     error (EXIT_FAILURE, 0, _("cannot skip past end of combined input"));
1161
1162   return ok;
1163 }
1164
1165 static void
1166 format_address_none (MAYBE_UNUSED uintmax_t address,
1167                      MAYBE_UNUSED char c)
1168 {
1169 }
1170
1171 static void
1172 format_address_std (uintmax_t address, char c)
1173 {
1174   char buf[MAX_ADDRESS_LENGTH + 2];
1175   char *p = buf + sizeof buf;
1176   char const *pbound;
1177
1178   *--p = '\0';
1179   *--p = c;
1180   pbound = p - address_pad_len;
1181
1182   /* Use a special case of the code for each base.  This is measurably
1183      faster than generic code.  */
1184   switch (address_base)
1185     {
1186     case 8:
1187       do
1188         *--p = '0' + (address & 7);
1189       while ((address >>= 3) != 0);
1190       break;
1191
1192     case 10:
1193       do
1194         *--p = '0' + (address % 10);
1195       while ((address /= 10) != 0);
1196       break;
1197
1198     case 16:
1199       do
1200         *--p = "0123456789abcdef"[address & 15];
1201       while ((address >>= 4) != 0);
1202       break;
1203     }
1204
1205   while (pbound < p)
1206     *--p = '0';
1207
1208   fputs (p, stdout);
1209 }
1210
1211 static void
1212 format_address_paren (uintmax_t address, char c)
1213 {
1214   putchar ('(');
1215   format_address_std (address, ')');
1216   if (c)
1217     putchar (c);
1218 }
1219
1220 static void
1221 format_address_label (uintmax_t address, char c)
1222 {
1223   format_address_std (address, ' ');
1224   format_address_paren (address + pseudo_offset, c);
1225 }
1226
1227 /* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
1228    of the N_SPEC format specs.  CURRENT_OFFSET is the byte address of
1229    CURR_BLOCK in the concatenation of input files, and it is printed
1230    (optionally) only before the output line associated with the first
1231    format spec.  When duplicate blocks are being abbreviated, the output
1232    for a sequence of identical input blocks is the output for the first
1233    block followed by an asterisk alone on a line.  It is valid to compare
1234    the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
1235    That condition may be false only for the last input block.  */
1236
1237 static void
1238 write_block (uintmax_t current_offset, size_t n_bytes,
1239              char const *prev_block, char const *curr_block)
1240 {
1241   static bool first = true;
1242   static bool prev_pair_equal = false;
1243
1244 #define EQUAL_BLOCKS(b1, b2) (memcmp (b1, b2, bytes_per_block) == 0)
1245
1246   if (abbreviate_duplicate_blocks
1247       && !first && n_bytes == bytes_per_block
1248       && EQUAL_BLOCKS (prev_block, curr_block))
1249     {
1250       if (prev_pair_equal)
1251         {
1252           /* The two preceding blocks were equal, and the current
1253              block is the same as the last one, so print nothing.  */
1254         }
1255       else
1256         {
1257           printf ("*\n");
1258           prev_pair_equal = true;
1259         }
1260     }
1261   else
1262     {
1263       prev_pair_equal = false;
1264       for (size_t i = 0; i < n_specs; i++)
1265         {
1266           int datum_width = width_bytes[spec[i].size];
1267           int fields_per_block = bytes_per_block / datum_width;
1268           int blank_fields = (bytes_per_block - n_bytes) / datum_width;
1269           if (i == 0)
1270             format_address (current_offset, '\0');
1271           else
1272             printf ("%*s", address_pad_len, "");
1273           (*spec[i].print_function) (fields_per_block, blank_fields,
1274                                      curr_block, spec[i].fmt_string,
1275                                      spec[i].field_width, spec[i].pad_width);
1276           if (spec[i].hexl_mode_trailer)
1277             {
1278               /* space-pad out to full line width, then dump the trailer */
1279               int field_width = spec[i].field_width;
1280               int pad_width = (spec[i].pad_width * blank_fields
1281                                / fields_per_block);
1282               printf ("%*s", blank_fields * field_width + pad_width, "");
1283               dump_hexl_mode_trailer (n_bytes, curr_block);
1284             }
1285           putchar ('\n');
1286         }
1287     }
1288   first = false;
1289 }
1290
1291 /* Read a single byte into *C from the concatenation of the input files
1292    named in the global array FILE_LIST.  On the first call to this
1293    function, the global variable IN_STREAM is expected to be an open
1294    stream associated with the input file INPUT_FILENAME.  If IN_STREAM
1295    is at end-of-file, close it and update the global variables IN_STREAM
1296    and INPUT_FILENAME so they correspond to the next file in the list.
1297    Then try to read a byte from the newly opened file.  Repeat if
1298    necessary until EOF is reached for the last file in FILE_LIST, then
1299    set *C to EOF and return.  Subsequent calls do likewise.  Return
1300    true if successful.  */
1301
1302 static bool
1303 read_char (int *c)
1304 {
1305   bool ok = true;
1306
1307   *c = EOF;
1308
1309   while (in_stream != nullptr)  /* EOF.  */
1310     {
1311       *c = fgetc (in_stream);
1312
1313       if (*c != EOF)
1314         break;
1315
1316       ok &= check_and_close (errno);
1317
1318       ok &= open_next_file ();
1319     }
1320
1321   return ok;
1322 }
1323
1324 /* Read N bytes into BLOCK from the concatenation of the input files
1325    named in the global array FILE_LIST.  On the first call to this
1326    function, the global variable IN_STREAM is expected to be an open
1327    stream associated with the input file INPUT_FILENAME.  If all N
1328    bytes cannot be read from IN_STREAM, close IN_STREAM and update
1329    the global variables IN_STREAM and INPUT_FILENAME.  Then try to
1330    read the remaining bytes from the newly opened file.  Repeat if
1331    necessary until EOF is reached for the last file in FILE_LIST.
1332    On subsequent calls, don't modify BLOCK and return true.  Set
1333    *N_BYTES_IN_BUFFER to the number of bytes read.  If an error occurs,
1334    it will be detected through ferror when the stream is about to be
1335    closed.  If there is an error, give a message but continue reading
1336    as usual and return false.  Otherwise return true.  */
1337
1338 static bool
1339 read_block (size_t n, char *block, size_t *n_bytes_in_buffer)
1340 {
1341   bool ok = true;
1342
1343   affirm (0 < n && n <= bytes_per_block);
1344
1345   *n_bytes_in_buffer = 0;
1346
1347   while (in_stream != nullptr)  /* EOF.  */
1348     {
1349       size_t n_needed;
1350       size_t n_read;
1351
1352       n_needed = n - *n_bytes_in_buffer;
1353       n_read = fread (block + *n_bytes_in_buffer, 1, n_needed, in_stream);
1354
1355       *n_bytes_in_buffer += n_read;
1356
1357       if (n_read == n_needed)
1358         break;
1359
1360       ok &= check_and_close (errno);
1361
1362       ok &= open_next_file ();
1363     }
1364
1365   return ok;
1366 }
1367
1368 /* Return the least common multiple of the sizes associated
1369    with the format specs.  */
1370
1371 ATTRIBUTE_PURE
1372 static int
1373 get_lcm (void)
1374 {
1375   int l_c_m = 1;
1376
1377   for (size_t i = 0; i < n_specs; i++)
1378     l_c_m = lcm (l_c_m, width_bytes[spec[i].size]);
1379   return l_c_m;
1380 }
1381
1382 /* If S is a valid traditional offset specification with an optional
1383    leading '+' return true and set *OFFSET to the offset it denotes.  */
1384
1385 static bool
1386 parse_old_offset (char const *s, uintmax_t *offset)
1387 {
1388   int radix;
1389
1390   if (*s == '\0')
1391     return false;
1392
1393   /* Skip over any leading '+'. */
1394   if (s[0] == '+')
1395     ++s;
1396
1397   /* Determine the radix we'll use to interpret S.  If there is a '.',
1398      it's decimal, otherwise, if the string begins with '0X'or '0x',
1399      it's hexadecimal, else octal.  */
1400   if (strchr (s, '.') != nullptr)
1401     radix = 10;
1402   else
1403     {
1404       if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
1405         radix = 16;
1406       else
1407         radix = 8;
1408     }
1409
1410   return xstrtoumax (s, nullptr, radix, offset, "Bb") == LONGINT_OK;
1411 }
1412
1413 /* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
1414    formatted block to standard output, and repeat until the specified
1415    maximum number of bytes has been read or until all input has been
1416    processed.  If the last block read is smaller than BYTES_PER_BLOCK
1417    and its size is not a multiple of the size associated with a format
1418    spec, extend the input block with zero bytes until its length is a
1419    multiple of all format spec sizes.  Write the final block.  Finally,
1420    write on a line by itself the offset of the byte after the last byte
1421    read.  Accumulate return values from calls to read_block and
1422    check_and_close, and if any was false, return false.
1423    Otherwise, return true.  */
1424
1425 static bool
1426 dump (void)
1427 {
1428   char *block[2];
1429   uintmax_t current_offset;
1430   bool idx = false;
1431   bool ok = true;
1432   size_t n_bytes_read;
1433
1434   block[0] = xnmalloc (2, bytes_per_block);
1435   block[1] = block[0] + bytes_per_block;
1436
1437   current_offset = n_bytes_to_skip;
1438
1439   if (limit_bytes_to_format)
1440     {
1441       while (ok)
1442         {
1443           size_t n_needed;
1444           if (current_offset >= end_offset)
1445             {
1446               n_bytes_read = 0;
1447               break;
1448             }
1449           n_needed = MIN (end_offset - current_offset,
1450                           (uintmax_t) bytes_per_block);
1451           ok &= read_block (n_needed, block[idx], &n_bytes_read);
1452           if (n_bytes_read < bytes_per_block)
1453             break;
1454           affirm (n_bytes_read == bytes_per_block);
1455           write_block (current_offset, n_bytes_read,
1456                        block[!idx], block[idx]);
1457           if (ferror (stdout))
1458             ok = false;
1459           current_offset += n_bytes_read;
1460           idx = !idx;
1461         }
1462     }
1463   else
1464     {
1465       while (ok)
1466         {
1467           ok &= read_block (bytes_per_block, block[idx], &n_bytes_read);
1468           if (n_bytes_read < bytes_per_block)
1469             break;
1470           affirm (n_bytes_read == bytes_per_block);
1471           write_block (current_offset, n_bytes_read,
1472                        block[!idx], block[idx]);
1473           if (ferror (stdout))
1474             ok = false;
1475           current_offset += n_bytes_read;
1476           idx = !idx;
1477         }
1478     }
1479
1480   if (n_bytes_read > 0)
1481     {
1482       int l_c_m;
1483       size_t bytes_to_write;
1484
1485       l_c_m = get_lcm ();
1486
1487       /* Ensure zero-byte padding up to the smallest multiple of l_c_m that
1488          is at least as large as n_bytes_read.  */
1489       bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
1490
1491       memset (block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
1492       write_block (current_offset, n_bytes_read, block[!idx], block[idx]);
1493       current_offset += n_bytes_read;
1494     }
1495
1496   format_address (current_offset, '\n');
1497
1498   if (limit_bytes_to_format && current_offset >= end_offset)
1499     ok &= check_and_close (0);
1500
1501   free (block[0]);
1502
1503   return ok;
1504 }
1505
1506 /* STRINGS mode.  Find each "string constant" in the input.
1507    A string constant is a run of at least 'string_min' ASCII
1508    graphic (or formatting) characters terminated by a null.
1509    Based on a function written by Richard Stallman for a
1510    traditional version of od.  Return true if successful.  */
1511
1512 static bool
1513 dump_strings (void)
1514 {
1515   size_t bufsize = MAX (100, string_min);
1516   char *buf = xmalloc (bufsize);
1517   uintmax_t address = n_bytes_to_skip;
1518   bool ok = true;
1519
1520   while (true)
1521     {
1522       size_t i;
1523       int c;
1524
1525       /* See if the next 'string_min' chars are all printing chars.  */
1526     tryline:
1527
1528       if (limit_bytes_to_format
1529           && (end_offset < string_min || end_offset - string_min <= address))
1530         break;
1531
1532       for (i = 0; i < string_min; i++)
1533         {
1534           ok &= read_char (&c);
1535           address++;
1536           if (c < 0)
1537             {
1538               free (buf);
1539               return ok;
1540             }
1541           if (! isprint (c))
1542             /* Found a non-printing.  Try again starting with next char.  */
1543             goto tryline;
1544           buf[i] = c;
1545         }
1546
1547       /* We found a run of 'string_min' printable characters.
1548          Now see if it is terminated with a null byte.  */
1549       while (!limit_bytes_to_format || address < end_offset)
1550         {
1551           if (i == bufsize)
1552             {
1553               buf = X2REALLOC (buf, &bufsize);
1554             }
1555           ok &= read_char (&c);
1556           address++;
1557           if (c < 0)
1558             {
1559               free (buf);
1560               return ok;
1561             }
1562           if (c == '\0')
1563             break;              /* It is; print this string.  */
1564           if (! isprint (c))
1565             goto tryline;       /* It isn't; give up on this string.  */
1566           buf[i++] = c;         /* String continues; store it all.  */
1567         }
1568
1569       /* If we get here, the string is all printable and null-terminated,
1570          so print it.  It is all in 'buf' and 'i' is its length.  */
1571       buf[i] = 0;
1572       format_address (address - i - 1, ' ');
1573
1574       for (i = 0; (c = buf[i]); i++)
1575         {
1576           switch (c)
1577             {
1578             case '\a':
1579               fputs ("\\a", stdout);
1580               break;
1581
1582             case '\b':
1583               fputs ("\\b", stdout);
1584               break;
1585
1586             case '\f':
1587               fputs ("\\f", stdout);
1588               break;
1589
1590             case '\n':
1591               fputs ("\\n", stdout);
1592               break;
1593
1594             case '\r':
1595               fputs ("\\r", stdout);
1596               break;
1597
1598             case '\t':
1599               fputs ("\\t", stdout);
1600               break;
1601
1602             case '\v':
1603               fputs ("\\v", stdout);
1604               break;
1605
1606             default:
1607               putc (c, stdout);
1608             }
1609         }
1610       putchar ('\n');
1611     }
1612
1613   /* We reach this point only if we search through
1614      (max_bytes_to_format - string_min) bytes before reaching EOF.  */
1615
1616   free (buf);
1617
1618   ok &= check_and_close (0);
1619   return ok;
1620 }
1621
1622 int
1623 main (int argc, char **argv)
1624 {
1625   int n_files;
1626   size_t i;
1627   int l_c_m;
1628   idx_t desired_width IF_LINT ( = 0);
1629   bool modern = false;
1630   bool width_specified = false;
1631   bool ok = true;
1632   size_t width_per_block = 0;
1633   static char const multipliers[] = "bEGKkMmPQRTYZ0";
1634
1635   /* The old-style 'pseudo starting address' to be printed in parentheses
1636      after any true address.  */
1637   uintmax_t pseudo_start IF_LINT ( = 0);
1638
1639   initialize_main (&argc, &argv);
1640   set_program_name (argv[0]);
1641   setlocale (LC_ALL, "");
1642   bindtextdomain (PACKAGE, LOCALEDIR);
1643   textdomain (PACKAGE);
1644
1645   atexit (close_stdout);
1646
1647   for (i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++)
1648     integral_type_size[i] = NO_SIZE;
1649
1650   integral_type_size[sizeof (char)] = CHAR;
1651   integral_type_size[sizeof (short int)] = SHORT;
1652   integral_type_size[sizeof (int)] = INT;
1653   integral_type_size[sizeof (long int)] = LONG;
1654 #if HAVE_UNSIGNED_LONG_LONG_INT
1655   /* If 'long int' and 'long long int' have the same size, it's fine
1656      to overwrite the entry for 'long' with this one.  */
1657   integral_type_size[sizeof (unsigned_long_long_int)] = LONG_LONG;
1658 #endif
1659
1660   for (i = 0; i <= MAX_FP_TYPE_SIZE; i++)
1661     fp_type_size[i] = NO_SIZE;
1662
1663 #if FLOAT16_SUPPORTED
1664   fp_type_size[sizeof (float16)] = FLOAT_HALF;
1665 #elif BF16_SUPPORTED
1666   fp_type_size[sizeof (bfloat16)] = FLOAT_HALF;
1667 #endif
1668   fp_type_size[sizeof (float)] = FLOAT_SINGLE;
1669   /* The array entry for 'double' is filled in after that for 'long double'
1670      so that if they are the same size, we avoid any overhead of
1671      long double computation in libc.  */
1672   fp_type_size[sizeof (long double)] = FLOAT_LONG_DOUBLE;
1673   fp_type_size[sizeof (double)] = FLOAT_DOUBLE;
1674
1675   n_specs = 0;
1676   n_specs_allocated = 0;
1677   spec = nullptr;
1678
1679   format_address = format_address_std;
1680   address_base = 8;
1681   address_pad_len = 7;
1682   flag_dump_strings = false;
1683
1684   while (true)
1685     {
1686       uintmax_t tmp;
1687       enum strtol_error s_err;
1688       int oi = -1;
1689       int c = getopt_long (argc, argv, short_options, long_options, &oi);
1690       if (c == -1)
1691         break;
1692
1693       switch (c)
1694         {
1695         case 'A':
1696           modern = true;
1697           switch (optarg[0])
1698             {
1699             case 'd':
1700               format_address = format_address_std;
1701               address_base = 10;
1702               address_pad_len = 7;
1703               break;
1704             case 'o':
1705               format_address = format_address_std;
1706               address_base = 8;
1707               address_pad_len = 7;
1708               break;
1709             case 'x':
1710               format_address = format_address_std;
1711               address_base = 16;
1712               address_pad_len = 6;
1713               break;
1714             case 'n':
1715               format_address = format_address_none;
1716               address_pad_len = 0;
1717               break;
1718             default:
1719               error (EXIT_FAILURE, 0,
1720                      _("invalid output address radix '%c';"
1721                        " it must be one character from [doxn]"),
1722                      optarg[0]);
1723               break;
1724             }
1725           break;
1726
1727         case 'j':
1728           modern = true;
1729           s_err = xstrtoumax (optarg, nullptr, 0,
1730                               &n_bytes_to_skip, multipliers);
1731           if (s_err != LONGINT_OK)
1732             xstrtol_fatal (s_err, oi, c, long_options, optarg);
1733           break;
1734
1735         case 'N':
1736           modern = true;
1737           limit_bytes_to_format = true;
1738
1739           s_err = xstrtoumax (optarg, nullptr, 0, &max_bytes_to_format,
1740                               multipliers);
1741           if (s_err != LONGINT_OK)
1742             xstrtol_fatal (s_err, oi, c, long_options, optarg);
1743           break;
1744
1745         case 'S':
1746           modern = true;
1747           if (optarg == nullptr)
1748             string_min = 3;
1749           else
1750             {
1751               s_err = xstrtoumax (optarg, nullptr, 0, &tmp, multipliers);
1752               if (s_err != LONGINT_OK)
1753                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
1754
1755               /* The minimum string length may be no larger than SIZE_MAX,
1756                  since we may allocate a buffer of this size.  */
1757               if (SIZE_MAX < tmp)
1758                 error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
1759
1760               string_min = tmp;
1761             }
1762           flag_dump_strings = true;
1763           break;
1764
1765         case 't':
1766           modern = true;
1767           ok &= decode_format_string (optarg);
1768           break;
1769
1770         case 'v':
1771           modern = true;
1772           abbreviate_duplicate_blocks = false;
1773           break;
1774
1775         case TRADITIONAL_OPTION:
1776           traditional = true;
1777           break;
1778
1779         case ENDIAN_OPTION:
1780           switch (XARGMATCH ("--endian", optarg, endian_args, endian_types))
1781             {
1782               case endian_big:
1783                   input_swap = ! WORDS_BIGENDIAN;
1784                   break;
1785               case endian_little:
1786                   input_swap = WORDS_BIGENDIAN;
1787                   break;
1788             }
1789           break;
1790
1791           /* The next several cases map the traditional format
1792              specification options to the corresponding modern format
1793              specs.  GNU od accepts any combination of old- and
1794              new-style options.  Format specification options accumulate.
1795              The obsolescent and undocumented formats are compatible
1796              with FreeBSD 4.10 od.  */
1797
1798 #define CASE_OLD_ARG(old_char,new_string)               \
1799         case old_char:                                  \
1800           ok &= decode_format_string (new_string);      \
1801           break
1802
1803           CASE_OLD_ARG ('a', "a");
1804           CASE_OLD_ARG ('b', "o1");
1805           CASE_OLD_ARG ('c', "c");
1806           CASE_OLD_ARG ('D', "u4"); /* obsolescent and undocumented */
1807           CASE_OLD_ARG ('d', "u2");
1808         case 'F': /* obsolescent and undocumented alias */
1809           CASE_OLD_ARG ('e', "fD"); /* obsolescent and undocumented */
1810           CASE_OLD_ARG ('f', "fF");
1811         case 'X': /* obsolescent and undocumented alias */
1812           CASE_OLD_ARG ('H', "x4"); /* obsolescent and undocumented */
1813           CASE_OLD_ARG ('i', "dI");
1814         case 'I': case 'L': /* obsolescent and undocumented aliases */
1815           CASE_OLD_ARG ('l', "dL");
1816           CASE_OLD_ARG ('O', "o4"); /* obsolescent and undocumented */
1817         case 'B': /* obsolescent and undocumented alias */
1818           CASE_OLD_ARG ('o', "o2");
1819           CASE_OLD_ARG ('s', "d2");
1820         case 'h': /* obsolescent and undocumented alias */
1821           CASE_OLD_ARG ('x', "x2");
1822
1823 #undef CASE_OLD_ARG
1824
1825         case 'w':
1826           modern = true;
1827           width_specified = true;
1828           if (optarg == nullptr)
1829             {
1830               desired_width = 32;
1831             }
1832           else
1833             {
1834               intmax_t w_tmp;
1835               s_err = xstrtoimax (optarg, nullptr, 10, &w_tmp, "");
1836               if (s_err != LONGINT_OK || w_tmp <= 0)
1837                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
1838               if (ckd_add (&desired_width, w_tmp, 0))
1839                 error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
1840             }
1841           break;
1842
1843         case_GETOPT_HELP_CHAR;
1844
1845         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1846
1847         default:
1848           usage (EXIT_FAILURE);
1849           break;
1850         }
1851     }
1852
1853   if (!ok)
1854     return EXIT_FAILURE;
1855
1856   if (flag_dump_strings && n_specs > 0)
1857     error (EXIT_FAILURE, 0,
1858            _("no type may be specified when dumping strings"));
1859
1860   n_files = argc - optind;
1861
1862   /* If the --traditional option is used, there may be from
1863      0 to 3 remaining command line arguments;  handle each case
1864      separately.
1865         od [file] [[+]offset[.][b] [[+]label[.][b]]]
1866      The offset and label have the same syntax.
1867
1868      If --traditional is not given, and if no modern options are
1869      given, and if the offset begins with + or (if there are two
1870      operands) a digit, accept only this form, as per POSIX:
1871         od [file] [[+]offset[.][b]]
1872   */
1873
1874   if (!modern || traditional)
1875     {
1876       uintmax_t o1;
1877       uintmax_t o2;
1878
1879       switch (n_files)
1880         {
1881         case 1:
1882           if ((traditional || argv[optind][0] == '+')
1883               && parse_old_offset (argv[optind], &o1))
1884             {
1885               n_bytes_to_skip = o1;
1886               --n_files;
1887               ++argv;
1888             }
1889           break;
1890
1891         case 2:
1892           if ((traditional || argv[optind + 1][0] == '+'
1893                || ISDIGIT (argv[optind + 1][0]))
1894               && parse_old_offset (argv[optind + 1], &o2))
1895             {
1896               if (traditional && parse_old_offset (argv[optind], &o1))
1897                 {
1898                   n_bytes_to_skip = o1;
1899                   flag_pseudo_start = true;
1900                   pseudo_start = o2;
1901                   argv += 2;
1902                   n_files -= 2;
1903                 }
1904               else
1905                 {
1906                   n_bytes_to_skip = o2;
1907                   --n_files;
1908                   argv[optind + 1] = argv[optind];
1909                   ++argv;
1910                 }
1911             }
1912           break;
1913
1914         case 3:
1915           if (traditional
1916               && parse_old_offset (argv[optind + 1], &o1)
1917               && parse_old_offset (argv[optind + 2], &o2))
1918             {
1919               n_bytes_to_skip = o1;
1920               flag_pseudo_start = true;
1921               pseudo_start = o2;
1922               argv[optind + 2] = argv[optind];
1923               argv += 2;
1924               n_files -= 2;
1925             }
1926           break;
1927         }
1928
1929       if (traditional && 1 < n_files)
1930         {
1931           error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1932           error (0, 0, "%s",
1933                  _("compatibility mode supports at most one file"));
1934           usage (EXIT_FAILURE);
1935         }
1936     }
1937
1938   if (flag_pseudo_start)
1939     {
1940       if (format_address == format_address_none)
1941         {
1942           address_base = 8;
1943           address_pad_len = 7;
1944           format_address = format_address_paren;
1945         }
1946       else
1947         format_address = format_address_label;
1948     }
1949
1950   if (limit_bytes_to_format)
1951     {
1952       end_offset = n_bytes_to_skip + max_bytes_to_format;
1953       if (end_offset < n_bytes_to_skip)
1954         error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large"));
1955     }
1956
1957   if (n_specs == 0)
1958     decode_format_string ("oS");
1959
1960   if (n_files > 0)
1961     {
1962       /* Set the global pointer FILE_LIST so that it
1963          references the first file-argument on the command-line.  */
1964
1965       file_list = (char const *const *) &argv[optind];
1966     }
1967   else
1968     {
1969       /* No files were listed on the command line.
1970          Set the global pointer FILE_LIST so that it
1971          references the null-terminated list of one name: "-".  */
1972
1973       file_list = default_file_list;
1974     }
1975
1976   /* open the first input file */
1977   ok = open_next_file ();
1978   if (in_stream == nullptr)
1979     goto cleanup;
1980
1981   /* skip over any unwanted header bytes */
1982   ok &= skip (n_bytes_to_skip);
1983   if (in_stream == nullptr)
1984     goto cleanup;
1985
1986   pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0);
1987
1988   /* Compute output block length.  */
1989   l_c_m = get_lcm ();
1990
1991   if (width_specified)
1992     {
1993       if (desired_width != 0 && desired_width % l_c_m == 0)
1994         bytes_per_block = desired_width;
1995       else
1996         {
1997           error (0, 0, _("warning: invalid width %td; using %d instead"),
1998                  desired_width, l_c_m);
1999           bytes_per_block = l_c_m;
2000         }
2001     }
2002   else
2003     {
2004       if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
2005         bytes_per_block = l_c_m * (DEFAULT_BYTES_PER_BLOCK / l_c_m);
2006       else
2007         bytes_per_block = l_c_m;
2008     }
2009
2010   /* Compute padding necessary to align output block.  */
2011   for (i = 0; i < n_specs; i++)
2012     {
2013       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2014       int block_width = (spec[i].field_width + 1) * fields_per_block;
2015       if (width_per_block < block_width)
2016         width_per_block = block_width;
2017     }
2018   for (i = 0; i < n_specs; i++)
2019     {
2020       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2021       int block_width = spec[i].field_width * fields_per_block;
2022       spec[i].pad_width = width_per_block - block_width;
2023     }
2024
2025 #ifdef DEBUG
2026   printf ("lcm=%d, width_per_block=%zu\n", l_c_m, width_per_block);
2027   for (i = 0; i < n_specs; i++)
2028     {
2029       int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
2030       affirm (bytes_per_block % width_bytes[spec[i].size] == 0);
2031       affirm (1 <= spec[i].pad_width / fields_per_block);
2032       printf ("%d: fmt=\"%s\" in_width=%d out_width=%d pad=%d\n",
2033               i, spec[i].fmt_string, width_bytes[spec[i].size],
2034               spec[i].field_width, spec[i].pad_width);
2035     }
2036 #endif
2037
2038   ok &= (flag_dump_strings ? dump_strings () : dump ());
2039
2040 cleanup:
2041
2042   if (have_read_stdin && fclose (stdin) == EOF)
2043     error (EXIT_FAILURE, errno, _("standard input"));
2044
2045   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
2046 }