src/basenc.c

   1 /* Base64, base32, and similar encoding/decoding strings or files.
   2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>. */
  16
  17 /* Written by Simon Josefsson <simon@josefsson.org>.  */
  18
  19 #include <config.h>
  20
  21 #include <stdio.h>
  22 #include <getopt.h>
  23 #include <sys/types.h>
  24
  25 #include "system.h"
  26 #include "c-ctype.h"
  27 #include "die.h"
  28 #include "error.h"
  29 #include "fadvise.h"
  30 #include "idx.h"
  31 #include "quote.h"
  32 #include "xstrtol.h"
  33 #include "xdectoint.h"
  34 #include "xbinary-io.h"
  35
  36 #if BASE_TYPE == 42
  37 # define AUTHORS \
  38   proper_name ("Simon Josefsson"), \
  39   proper_name ("Assaf Gordon")
  40 #else
  41 # define AUTHORS proper_name ("Simon Josefsson")
  42 #endif
  43
  44 #if BASE_TYPE == 32
  45 # include "base32.h"
  46 # define PROGRAM_NAME "base32"
  47 #elif BASE_TYPE == 64
  48 # include "base64.h"
  49 # define PROGRAM_NAME "base64"
  50 #elif BASE_TYPE == 42
  51 # include "base32.h"
  52 # include "base64.h"
  53 # include <assert.h>
  54 # define PROGRAM_NAME "basenc"
  55 #else
  56 # error missing/invalid BASE_TYPE definition
  57 #endif
  58
  59
  60
  61 #if BASE_TYPE == 42
  62 enum
  63 {
  64   BASE64_OPTION = CHAR_MAX + 1,
  65   BASE64URL_OPTION,
  66   BASE32_OPTION,
  67   BASE32HEX_OPTION,
  68   BASE16_OPTION,
  69   BASE2MSBF_OPTION,
  70   BASE2LSBF_OPTION,
  71   Z85_OPTION
  72 };
  73 #endif
  74
  75 static struct option const long_options[] =
  76 {
  77   {"decode", no_argument, 0, 'd'},
  78   {"wrap", required_argument, 0, 'w'},
  79   {"ignore-garbage", no_argument, 0, 'i'},
  80 #if BASE_TYPE == 42
  81   {"base64",    no_argument, 0, BASE64_OPTION},
  82   {"base64url", no_argument, 0, BASE64URL_OPTION},
  83   {"base32",    no_argument, 0, BASE32_OPTION},
  84   {"base32hex", no_argument, 0, BASE32HEX_OPTION},
  85   {"base16",    no_argument, 0, BASE16_OPTION},
  86   {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
  87   {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
  88   {"z85",       no_argument, 0, Z85_OPTION},
  89 #endif
  90   {GETOPT_HELP_OPTION_DECL},
  91   {GETOPT_VERSION_OPTION_DECL},
  92   {NULL, 0, NULL, 0}
  93 };
  94
  95 void
  96 usage (int status)
  97 {
  98   if (status != EXIT_SUCCESS)
  99     emit_try_help ();
 100   else
 101     {
 102       printf (_("\
 103 Usage: %s [OPTION]... [FILE]\n\
 104 "), program_name);
 105
 106 #if BASE_TYPE == 42
 107       fputs (_("\
 108 basenc encode or decode FILE, or standard input, to standard output.\n\
 109 "), stdout);
 110 #else
 111       printf (_("\
 112 Base%d encode or decode FILE, or standard input, to standard output.\n\
 113 "), BASE_TYPE);
 114 #endif
 115
 116       emit_stdin_note ();
 117       emit_mandatory_arg_note ();
 118 #if BASE_TYPE == 42
 119       fputs (_("\
 120       --base64          same as 'base64' program (RFC4648 section 4)\n\
 121 "), stdout);
 122       fputs (_("\
 123       --base64url       file- and url-safe base64 (RFC4648 section 5)\n\
 124 "), stdout);
 125       fputs (_("\
 126       --base32          same as 'base32' program (RFC4648 section 6)\n\
 127 "), stdout);
 128       fputs (_("\
 129       --base32hex       extended hex alphabet base32 (RFC4648 section 7)\n\
 130 "), stdout);
 131       fputs (_("\
 132       --base16          hex encoding (RFC4648 section 8)\n\
 133 "), stdout);
 134       fputs (_("\
 135       --base2msbf       bit string with most significant bit (msb) first\n\
 136 "), stdout);
 137       fputs (_("\
 138       --base2lsbf       bit string with least significant bit (lsb) first\n\
 139 "), stdout);
 140 #endif
 141       fputs (_("\
 142   -d, --decode          decode data\n\
 143   -i, --ignore-garbage  when decoding, ignore non-alphabet characters\n\
 144   -w, --wrap=COLS       wrap encoded lines after COLS character (default 76).\n\
 145                           Use 0 to disable line wrapping\n\
 146 "), stdout);
 147 #if BASE_TYPE == 42
 148       fputs (_("\
 149       --z85             ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
 150                         when encoding, input length must be a multiple of 4;\n\
 151                         when decoding, input length must be a multiple of 5\n\
 152 "), stdout);
 153 #endif
 154       fputs (HELP_OPTION_DESCRIPTION, stdout);
 155       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 156 #if BASE_TYPE == 42
 157       fputs (_("\
 158 \n\
 159 When decoding, the input may contain newlines in addition to the bytes of\n\
 160 the formal alphabet.  Use --ignore-garbage to attempt to recover\n\
 161 from any other non-alphabet bytes in the encoded stream.\n\
 162 "), stdout);
 163 #else
 164       printf (_("\
 165 \n\
 166 The data are encoded as described for the %s alphabet in RFC 4648.\n\
 167 When decoding, the input may contain newlines in addition to the bytes of\n\
 168 the formal %s alphabet.  Use --ignore-garbage to attempt to recover\n\
 169 from any other non-alphabet bytes in the encoded stream.\n"),
 170               PROGRAM_NAME, PROGRAM_NAME);
 171 #endif
 172       emit_ancillary_info (PROGRAM_NAME);
 173     }
 174
 175   exit (status);
 176 }
 177
 178 #define ENC_BLOCKSIZE (1024 * 3 * 10)
 179
 180 #if BASE_TYPE == 32
 181 # define BASE_LENGTH BASE32_LENGTH
 182 /* Note that increasing this may decrease performance if --ignore-garbage
 183    is used, because of the memmove operation below.  */
 184 # define DEC_BLOCKSIZE (1024 * 5)
 185
 186 /* Ensure that BLOCKSIZE is a multiple of 5 and 8.  */
 187 verify (ENC_BLOCKSIZE % 40 == 0);  /* So padding chars only on last block.  */
 188 verify (DEC_BLOCKSIZE % 40 == 0);  /* So complete encoded blocks are used.  */
 189
 190 # define base_encode base32_encode
 191 # define base_decode_context base32_decode_context
 192 # define base_decode_ctx_init base32_decode_ctx_init
 193 # define base_decode_ctx base32_decode_ctx
 194 # define isbase isbase32
 195 #elif BASE_TYPE == 64
 196 # define BASE_LENGTH BASE64_LENGTH
 197 /* Note that increasing this may decrease performance if --ignore-garbage
 198    is used, because of the memmove operation below.  */
 199 # define DEC_BLOCKSIZE (1024 * 3)
 200
 201 /* Ensure that BLOCKSIZE is a multiple of 3 and 4.  */
 202 verify (ENC_BLOCKSIZE % 12 == 0);  /* So padding chars only on last block.  */
 203 verify (DEC_BLOCKSIZE % 12 == 0);  /* So complete encoded blocks are used.  */
 204
 205 # define base_encode base64_encode
 206 # define base_decode_context base64_decode_context
 207 # define base_decode_ctx_init base64_decode_ctx_init
 208 # define base_decode_ctx base64_decode_ctx
 209 # define isbase isbase64
 210 #elif BASE_TYPE == 42
 211
 212
 213 # define BASE_LENGTH base_length
 214
 215 /* Note that increasing this may decrease performance if --ignore-garbage
 216    is used, because of the memmove operation below.  */
 217 # define DEC_BLOCKSIZE (4200)
 218 verify (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32 */
 219 verify (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64 */
 220
 221 static int (*base_length) (int i);
 222 static bool (*isbase) (char ch);
 223 static void (*base_encode) (char const *restrict in, idx_t inlen,
 224                             char *restrict out, idx_t outlen);
 225
 226 struct base16_decode_context
 227 {
 228   char nibble;
 229   bool have_nibble;
 230 };
 231
 232 struct z85_decode_context
 233 {
 234   int i;
 235   unsigned char octets[5];
 236 };
 237
 238 struct base2_decode_context
 239 {
 240   unsigned char octet;
 241 };
 242
 243 struct base_decode_context
 244 {
 245   int i; /* will be updated manually */
 246   union {
 247     struct base64_decode_context base64;
 248     struct base32_decode_context base32;
 249     struct base16_decode_context base16;
 250     struct base2_decode_context base2;
 251     struct z85_decode_context z85;
 252   } ctx;
 253   char *inbuf;
 254   idx_t bufsize;
 255 };
 256 static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
 257 static bool (*base_decode_ctx) (struct base_decode_context *ctx,
 258                                 char const *restrict in, idx_t inlen,
 259                                 char *restrict out, idx_t *outlen);
 260 #endif
 261
 262
 263
 264
 265 #if BASE_TYPE == 42
 266
 267 static int
 268 base64_length_wrapper (int len)
 269 {
 270   return BASE64_LENGTH (len);
 271 }
 272
 273 static void
 274 base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
 275 {
 276   base64_decode_ctx_init (&ctx->ctx.base64);
 277 }
 278
 279 static bool
 280 base64_decode_ctx_wrapper (struct base_decode_context *ctx,
 281                            char const *restrict in, idx_t inlen,
 282                            char *restrict out, idx_t *outlen)
 283 {
 284   bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
 285   ctx->i = ctx->ctx.base64.i;
 286   return b;
 287 }
 288
 289 static void
 290 init_inbuf (struct base_decode_context *ctx)
 291 {
 292   ctx->bufsize = DEC_BLOCKSIZE;
 293   ctx->inbuf = xcharalloc (ctx->bufsize);
 294 }
 295
 296 static void
 297 prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
 298 {
 299   if (ctx->bufsize < inlen)
 300     {
 301       ctx->bufsize = inlen * 2;
 302       ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char));
 303     }
 304 }
 305
 306
 307 static void
 308 base64url_encode (char const *restrict in, idx_t inlen,
 309                   char *restrict out, idx_t outlen)
 310 {
 311   base64_encode (in, inlen, out, outlen);
 312   /* translate 62nd and 63rd characters */
 313   char *p = out;
 314   while (outlen--)
 315     {
 316       if (*p == '+')
 317         *p = '-';
 318       else if (*p == '/')
 319         *p = '_';
 320       ++p;
 321     }
 322 }
 323
 324 static bool
 325 isbase64url (char ch)
 326 {
 327   return (ch == '-' || ch == '_'
 328           || (ch != '+' && ch != '/' && isbase64 (ch)));
 329 }
 330
 331 static void
 332 base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
 333 {
 334   base64_decode_ctx_init (&ctx->ctx.base64);
 335   init_inbuf (ctx);
 336 }
 337
 338
 339 static bool
 340 base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
 341                               char const *restrict in, idx_t inlen,
 342                               char *restrict out, idx_t *outlen)
 343 {
 344   prepare_inbuf (ctx, inlen);
 345   memcpy (ctx->inbuf, in, inlen);
 346
 347   /* translate 62nd and 63rd characters */
 348   idx_t i = inlen;
 349   char *p = ctx->inbuf;
 350   while (i--)
 351     {
 352       if (*p == '+' || *p == '/')
 353         {
 354           *outlen = 0;
 355           return false; /* reject base64 input */
 356         }
 357       else if (*p == '-')
 358         *p = '+';
 359       else if (*p == '_')
 360         *p = '/';
 361       ++p;
 362     }
 363
 364   bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
 365                               out, outlen);
 366   ctx->i = ctx->ctx.base64.i;
 367
 368   return b;
 369 }
 370
 371
 372
 373 static int
 374 base32_length_wrapper (int len)
 375 {
 376   return BASE32_LENGTH (len);
 377 }
 378
 379 static void
 380 base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
 381 {
 382   base32_decode_ctx_init (&ctx->ctx.base32);
 383 }
 384
 385 static bool
 386 base32_decode_ctx_wrapper (struct base_decode_context *ctx,
 387                            char const *restrict in, idx_t inlen,
 388                            char *restrict out, idx_t *outlen)
 389 {
 390   bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
 391   ctx->i = ctx->ctx.base32.i;
 392   return b;
 393 }
 394
 395 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
 396      to
 397    0123456789ABCDEFGHIJKLMNOPQRSTUV */
 398 static const char base32_norm_to_hex[32 + 9] = {
 399 /*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
 400   'Q',  'R',  'S',  'T',  'U',  'V',
 401
 402   0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
 403
 404 /*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
 405   '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 406
 407 /*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
 408   '8',  '9',  'A',  'B',  'C',  'D',  'E',  'F',
 409
 410 /*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
 411   'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',
 412
 413 /*0x59, 0x5a, */
 414   'O',  'P',
 415 };
 416
 417 /* 0123456789ABCDEFGHIJKLMNOPQRSTUV
 418      to
 419    ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
 420 static const char base32_hex_to_norm[32 + 9] = {
 421   /* from: 0x30 .. 0x39 ('0' to '9') */
 422   /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 423
 424   0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
 425
 426   /* from: 0x41 .. 0x4A ('A' to 'J') */
 427   /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
 428
 429   /* from: 0x4B .. 0x54 ('K' to 'T') */
 430   /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
 431
 432   /* from: 0x55 .. 0x56 ('U' to 'V') */
 433   /* to:*/ '6', '7'
 434 };
 435
 436
 437 inline static bool
 438 isbase32hex (char ch)
 439 {
 440   return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
 441 }
 442
 443
 444 static void
 445 base32hex_encode (char const *restrict in, idx_t inlen,
 446                   char *restrict out, idx_t outlen)
 447 {
 448   base32_encode (in, inlen, out, outlen);
 449
 450   for (char *p = out; outlen--; p++)
 451     {
 452       assert (0x32 <= *p && *p <= 0x5a);          /* LCOV_EXCL_LINE */
 453       *p = base32_norm_to_hex[*p - 0x32];
 454     }
 455 }
 456
 457
 458 static void
 459 base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
 460 {
 461   base32_decode_ctx_init (&ctx->ctx.base32);
 462   init_inbuf (ctx);
 463 }
 464
 465
 466 static bool
 467 base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
 468                               char const *restrict in, idx_t inlen,
 469                               char *restrict out, idx_t *outlen)
 470 {
 471   prepare_inbuf (ctx, inlen);
 472
 473   idx_t i = inlen;
 474   char *p = ctx->inbuf;
 475   while (i--)
 476     {
 477       if (isbase32hex (*in))
 478         *p = base32_hex_to_norm[ (int)*in - 0x30];
 479       else
 480         *p = *in;
 481       ++p;
 482       ++in;
 483     }
 484
 485   bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
 486                               out, outlen);
 487   ctx->i = ctx->ctx.base32.i;
 488
 489   return b;
 490 }
 491
 492
 493 static bool
 494 isbase16 (char ch)
 495 {
 496   return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F');
 497 }
 498
 499 static int
 500 base16_length (int len)
 501 {
 502   return len * 2;
 503 }
 504
 505 static const char base16[16] = "0123456789ABCDEF";
 506
 507 static void
 508 base16_encode (char const *restrict in, idx_t inlen,
 509                char *restrict out, idx_t outlen)
 510 {
 511   while (inlen--)
 512     {
 513       unsigned char c = *in;
 514       *out++ = base16[c >> 4];
 515       *out++ = base16[c & 0x0F];
 516       ++in;
 517     }
 518 }
 519
 520
 521 static void
 522 base16_decode_ctx_init (struct base_decode_context *ctx)
 523 {
 524   init_inbuf (ctx);
 525   ctx->ctx.base16.have_nibble = false;
 526   ctx->i = 1;
 527 }
 528
 529
 530 static bool
 531 base16_decode_ctx (struct base_decode_context *ctx,
 532                    char const *restrict in, idx_t inlen,
 533                    char *restrict out, idx_t *outlen)
 534 {
 535   bool ignore_lines = true;  /* for now, always ignore them */
 536
 537   *outlen = 0;
 538
 539   /* inlen==0 is request to flush output.
 540      if there is a dangling high nibble - we are missing the low nibble,
 541      so return false - indicating an invalid input.  */
 542   if (inlen == 0)
 543     return !ctx->ctx.base16.have_nibble;
 544
 545   while (inlen--)
 546     {
 547       if (ignore_lines && *in == '\n')
 548         {
 549           ++in;
 550           continue;
 551         }
 552
 553       int nib = *in++;
 554       if ('0' <= nib && nib <= '9')
 555         nib -= '0';
 556       else if ('A' <= nib && nib <= 'F')
 557         nib -= 'A' - 10;
 558       else
 559         return false; /* garbage - return false */
 560
 561       if (ctx->ctx.base16.have_nibble)
 562         {
 563           /* have both nibbles, write octet */
 564           *out++ = (ctx->ctx.base16.nibble << 4) + nib;
 565           ++(*outlen);
 566         }
 567       else
 568         {
 569           /* Store higher nibble until next one arrives */
 570           ctx->ctx.base16.nibble = nib;
 571         }
 572       ctx->ctx.base16.have_nibble = !ctx->ctx.base16.have_nibble;
 573     }
 574   return true;
 575 }
 576
 577
 578
 579
 580 static int
 581 z85_length (int len)
 582 {
 583   /* Z85 does not allow padding, so no need to round to highest integer.  */
 584   int outlen = (len * 5) / 4;
 585   return outlen;
 586 }
 587
 588 static bool
 589 isz85 (char ch)
 590 {
 591   return c_isalnum (ch) || (strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != NULL);
 592 }
 593
 594 static char const z85_encoding[85] =
 595   "0123456789"
 596   "abcdefghijklmnopqrstuvwxyz"
 597   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 598   ".-:+=^!/*?&<>()[]{}@%$#";
 599
 600 static void
 601 z85_encode (char const *restrict in, idx_t inlen,
 602             char *restrict out, idx_t outlen)
 603 {
 604   int i = 0;
 605   unsigned char quad[4];
 606   idx_t outidx = 0;
 607
 608   while (true)
 609     {
 610       if (inlen == 0)
 611         {
 612           /* no more input, exactly on 4 octet boundary. */
 613           if (i == 0)
 614             return;
 615
 616           /* currently, there's no way to return an error in encoding.  */
 617           die (EXIT_FAILURE, 0,
 618                _("invalid input (length must be multiple of 4 characters)"));
 619         }
 620       else
 621         {
 622           quad[i++] = *in++;
 623           --inlen;
 624         }
 625
 626       /* Got a quad, encode it */
 627       if (i == 4)
 628         {
 629           int_fast64_t val = quad[0];
 630           val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
 631
 632           for (int j = 4; j >= 0; --j)
 633             {
 634               int c = val % 85;
 635               val /= 85;
 636
 637               /* NOTE: if there is padding (which is trimmed by z85
 638                  before outputting the result), the output buffer 'out'
 639                  might not include enough allocated bytes for the padding,
 640                  so don't store them. */
 641               if (outidx + j < outlen)
 642                 out[j] = z85_encoding[c];
 643             }
 644           out += 5;
 645           outidx += 5;
 646           i = 0;
 647         }
 648     }
 649 }
 650
 651 static void
 652 z85_decode_ctx_init (struct base_decode_context *ctx)
 653 {
 654   init_inbuf (ctx);
 655   ctx->ctx.z85.i = 0;
 656   ctx->i = 1;
 657 }
 658
 659
 660 # define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
 661   (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) +      \
 662    ((ctx)->ctx.z85.octets[2] * 85 * 85) +           \
 663    ((ctx)->ctx.z85.octets[3] * 85) +                \
 664    ((ctx)->ctx.z85.octets[4]))
 665
 666
 667 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
 668   ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
 669
 670 /*
 671  0 -  9:  0 1 2 3 4 5 6 7 8 9
 672  10 - 19:  a b c d e f g h i j
 673  20 - 29:  k l m n o p q r s t
 674  30 - 39:  u v w x y z A B C D
 675  40 - 49:  E F G H I J K L M N
 676  50 - 59:  O P Q R S T U V W X
 677  60 - 69:  Y Z . - : + = ^ ! /   #dummy comment to workaround syntax-check
 678  70 - 79:  * ? & < > ( ) [ ] {
 679  80 - 84:  } @ % $ #
 680 */
 681 static signed char const z85_decoding[93] = {
 682   68, -1,  84,  83, 82,  72, -1,               /* ! " # $ % & ' */
 683   75, 76,  70,  65, -1,  63, 62, 69,           /* ( ) * + , - . / */
 684   0,  1,   2,   3,  4,   5,  6,   7,  8,  9,   /* '0' to '9' */
 685   64, -1,  73,  66, 74,  71, 81,               /* : ; < =  > ? @ */
 686   36, 37,  38,  39, 40,  41, 42,  43, 44, 45,  /* 'A' to 'J' */
 687   46, 47,  48,  49, 50,  51, 52,  53, 54, 55,  /* 'K' to 'T' */
 688   56, 57,  58,  59, 60,  61,                   /* 'U' to 'Z' */
 689   77,  -1, 78,  67,  -1,  -1,                  /* [ \ ] ^ _ ` */
 690   10, 11,  12,  13, 14,  15, 16,  17, 18, 19,  /* 'a' to 'j' */
 691   20, 21,  22,  23, 24,  25, 26,  27, 28, 29,  /* 'k' to 't' */
 692   30, 31,  32,  33, 34,  35,                   /* 'u' to 'z' */
 693   79, -1,  80                                  /* { | } */
 694 };
 695
 696 static bool
 697 z85_decode_ctx (struct base_decode_context *ctx,
 698                 char const *restrict in, idx_t inlen,
 699                 char *restrict out, idx_t *outlen)
 700 {
 701   bool ignore_lines = true;  /* for now, always ignore them */
 702
 703   *outlen = 0;
 704
 705   /* inlen==0 is request to flush output.
 706      if there are dangling values - we are missing entries,
 707      so return false - indicating an invalid input.  */
 708   if (inlen == 0)
 709     {
 710       if (ctx->ctx.z85.i > 0)
 711         {
 712           /* Z85 variant does not allow padding - input must
 713              be a multiple of 5 - so return error.  */
 714           return false;
 715         }
 716       return true;
 717     }
 718
 719   while (inlen--)
 720     {
 721       if (ignore_lines && *in == '\n')
 722         {
 723           ++in;
 724           continue;
 725         }
 726
 727       /* z85 decoding */
 728       unsigned char c = *in;
 729
 730       if (c >= 33 && c <= 125)
 731         {
 732           signed char ch = z85_decoding[c - 33];
 733           if (ch < 0)
 734             return false; /* garbage - return false */
 735           c = ch;
 736         }
 737       else
 738         return false; /* garbage - return false */
 739
 740       ++in;
 741
 742       ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
 743       if (ctx->ctx.z85.i == 5)
 744         {
 745           /* decode the lowest 4 octets, then check for overflows.  */
 746           int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
 747
 748           /* The Z85 spec and the reference implementation say nothing
 749              about overflows. To be on the safe side, reject them.  */
 750
 751           val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
 752           if ((val >> 24) & ~0xFF)
 753             return false;
 754
 755           *out++ = val >> 24;
 756           *out++ = (val >> 16) & 0xFF;
 757           *out++ = (val >> 8) & 0xFF;
 758           *out++ = val & 0xFF;
 759
 760           *outlen += 4;
 761
 762           ctx->ctx.z85.i = 0;
 763         }
 764     }
 765   ctx->i = ctx->ctx.z85.i;
 766   return true;
 767 }
 768
 769
 770 inline static bool
 771 isbase2 (char ch)
 772 {
 773   return ch == '0' || ch == '1';
 774 }
 775
 776 static int
 777 base2_length (int len)
 778 {
 779   return len * 8;
 780 }
 781
 782
 783 inline static void
 784 base2msbf_encode (char const *restrict in, idx_t inlen,
 785                   char *restrict out, idx_t outlen)
 786 {
 787   while (inlen--)
 788     {
 789       unsigned char c = *in;
 790       for (int i = 0; i < 8; i++)
 791         {
 792           *out++ = c & 0x80 ? '1' : '0';
 793           c <<= 1;
 794         }
 795       outlen -= 8;
 796       ++in;
 797     }
 798 }
 799
 800 inline static void
 801 base2lsbf_encode (char const *restrict in, idx_t inlen,
 802                   char *restrict out, idx_t outlen)
 803 {
 804   while (inlen--)
 805     {
 806       unsigned char c = *in;
 807       for (int i = 0; i < 8; i++)
 808         {
 809           *out++ = c & 0x01 ? '1' : '0';
 810           c >>= 1;
 811         }
 812       outlen -= 8;
 813       ++in;
 814     }
 815 }
 816
 817
 818 static void
 819 base2_decode_ctx_init (struct base_decode_context *ctx)
 820 {
 821   init_inbuf (ctx);
 822   ctx->ctx.base2.octet = 0;
 823   ctx->i = 0;
 824 }
 825
 826
 827 static bool
 828 base2lsbf_decode_ctx (struct base_decode_context *ctx,
 829                       char const *restrict in, idx_t inlen,
 830                       char *restrict out, idx_t *outlen)
 831 {
 832   bool ignore_lines = true;  /* for now, always ignore them */
 833
 834   *outlen = 0;
 835
 836   /* inlen==0 is request to flush output.
 837      if there is a dangling bit - we are missing some bits,
 838      so return false - indicating an invalid input.  */
 839   if (inlen == 0)
 840     return ctx->i == 0;
 841
 842   while (inlen--)
 843     {
 844       if (ignore_lines && *in == '\n')
 845         {
 846           ++in;
 847           continue;
 848         }
 849
 850       if (!isbase2 (*in))
 851         return false;
 852
 853       bool bit = (*in == '1');
 854       ctx->ctx.base2.octet |= bit << ctx->i;
 855       ++ctx->i;
 856
 857       if (ctx->i == 8)
 858         {
 859           *out++ = ctx->ctx.base2.octet;
 860           ctx->ctx.base2.octet = 0;
 861           ++*outlen;
 862           ctx->i = 0;
 863         }
 864
 865       ++in;
 866     }
 867
 868   return true;
 869 }
 870
 871 static bool
 872 base2msbf_decode_ctx (struct base_decode_context *ctx,
 873                       char const *restrict in, idx_t inlen,
 874                       char *restrict out, idx_t *outlen)
 875 {
 876   bool ignore_lines = true;  /* for now, always ignore them */
 877
 878   *outlen = 0;
 879
 880   /* inlen==0 is request to flush output.
 881      if there is a dangling bit - we are missing some bits,
 882      so return false - indicating an invalid input.  */
 883   if (inlen == 0)
 884     return ctx->i == 0;
 885
 886   while (inlen--)
 887     {
 888       if (ignore_lines && *in == '\n')
 889         {
 890           ++in;
 891           continue;
 892         }
 893
 894       if (!isbase2 (*in))
 895         return false;
 896
 897       bool bit = (*in == '1');
 898       if (ctx->i == 0)
 899         ctx->i = 8;
 900       --ctx->i;
 901       ctx->ctx.base2.octet |= bit << ctx->i;
 902
 903       if (ctx->i == 0)
 904         {
 905           *out++ = ctx->ctx.base2.octet;
 906           ctx->ctx.base2.octet = 0;
 907           ++*outlen;
 908           ctx->i = 0;
 909         }
 910
 911       ++in;
 912     }
 913
 914   return true;
 915 }
 916
 917 #endif /* BASE_TYPE == 42, i.e., "basenc"*/
 918
 919
 920
 921 static void
 922 wrap_write (char const *buffer, idx_t len,
 923             idx_t wrap_column, idx_t *current_column, FILE *out)
 924 {
 925   if (wrap_column == 0)
 926     {
 927       /* Simple write. */
 928       if (fwrite (buffer, 1, len, stdout) < len)
 929         die (EXIT_FAILURE, errno, _("write error"));
 930     }
 931   else
 932     for (idx_t written = 0; written < len; )
 933       {
 934         idx_t to_write = MIN (wrap_column - *current_column, len - written);
 935
 936         if (to_write == 0)
 937           {
 938             if (fputc ('\n', out) == EOF)
 939               die (EXIT_FAILURE, errno, _("write error"));
 940             *current_column = 0;
 941           }
 942         else
 943           {
 944             if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
 945               die (EXIT_FAILURE, errno, _("write error"));
 946             *current_column += to_write;
 947             written += to_write;
 948           }
 949       }
 950 }
 951
 952 static _Noreturn void
 953 finish_and_exit (FILE *in, char const *infile)
 954 {
 955   if (fclose (in) != 0)
 956     {
 957       if (STREQ (infile, "-"))
 958         die (EXIT_FAILURE, errno, _("closing standard input"));
 959       else
 960         die (EXIT_FAILURE, errno, "%s", quotef (infile));
 961     }
 962
 963   exit (EXIT_SUCCESS);
 964 }
 965
 966 static _Noreturn void
 967 do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
 968 {
 969   idx_t current_column = 0;
 970   char *inbuf, *outbuf;
 971   idx_t sum;
 972
 973   inbuf = xmalloc (ENC_BLOCKSIZE);
 974   outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
 975
 976   do
 977     {
 978       idx_t n;
 979
 980       sum = 0;
 981       do
 982         {
 983           n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
 984           sum += n;
 985         }
 986       while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
 987
 988       if (sum > 0)
 989         {
 990           /* Process input one block at a time.  Note that ENC_BLOCKSIZE
 991              is sized so that no pad chars will appear in output. */
 992           base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
 993
 994           wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
 995                       &current_column, out);
 996         }
 997     }
 998   while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
 999
1000   /* When wrapping, terminate last line. */
1001   if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
1002     die (EXIT_FAILURE, errno, _("write error"));
1003
1004   if (ferror (in))
1005     die (EXIT_FAILURE, errno, _("read error"));
1006
1007   finish_and_exit (in, infile);
1008 }
1009
1010 static _Noreturn void
1011 do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
1012 {
1013   char *inbuf, *outbuf;
1014   idx_t sum;
1015   struct base_decode_context ctx;
1016
1017   inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
1018   outbuf = xmalloc (DEC_BLOCKSIZE);
1019
1020 #if BASE_TYPE == 42
1021   ctx.inbuf = NULL;
1022 #endif
1023   base_decode_ctx_init (&ctx);
1024
1025   do
1026     {
1027       bool ok;
1028
1029       sum = 0;
1030       do
1031         {
1032           idx_t n = fread (inbuf + sum,
1033                            1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
1034
1035           if (ignore_garbage)
1036             {
1037               for (idx_t i = 0; n > 0 && i < n;)
1038                 {
1039                   if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
1040                     i++;
1041                   else
1042                     memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
1043                 }
1044             }
1045
1046           sum += n;
1047
1048           if (ferror (in))
1049             die (EXIT_FAILURE, errno, _("read error"));
1050         }
1051       while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
1052
1053       /* The following "loop" is usually iterated just once.
1054          However, when it processes the final input buffer, we want
1055          to iterate it one additional time, but with an indicator
1056          telling it to flush what is in CTX.  */
1057       for (int k = 0; k < 1 + !!feof (in); k++)
1058         {
1059           if (k == 1 && ctx.i == 0)
1060             break;
1061           idx_t n = DEC_BLOCKSIZE;
1062           ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
1063
1064           if (fwrite (outbuf, 1, n, out) < n)
1065             die (EXIT_FAILURE, errno, _("write error"));
1066
1067           if (!ok)
1068             die (EXIT_FAILURE, 0, _("invalid input"));
1069         }
1070     }
1071   while (!feof (in));
1072
1073   finish_and_exit (in, infile);
1074 }
1075
1076 int
1077 main (int argc, char **argv)
1078 {
1079   int opt;
1080   FILE *input_fh;
1081   char const *infile;
1082
1083   /* True if --decode has been given and we should decode data. */
1084   bool decode = false;
1085   /* True if we should ignore non-base-alphabetic characters. */
1086   bool ignore_garbage = false;
1087   /* Wrap encoded data around the 76th column, by default. */
1088   idx_t wrap_column = 76;
1089
1090 #if BASE_TYPE == 42
1091   int base_type = 0;
1092 #endif
1093
1094   initialize_main (&argc, &argv);
1095   set_program_name (argv[0]);
1096   setlocale (LC_ALL, "");
1097   bindtextdomain (PACKAGE, LOCALEDIR);
1098   textdomain (PACKAGE);
1099
1100   atexit (close_stdout);
1101
1102   while ((opt = getopt_long (argc, argv, "diw:", long_options, NULL)) != -1)
1103     switch (opt)
1104       {
1105       case 'd':
1106         decode = true;
1107         break;
1108
1109       case 'w':
1110         {
1111           intmax_t w;
1112           strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
1113           if (LONGINT_OVERFLOW < s_err || w < 0)
1114             die (EXIT_FAILURE, 0, "%s: %s",
1115                  _("invalid wrap size"), quote (optarg));
1116           wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
1117         }
1118         break;
1119
1120       case 'i':
1121         ignore_garbage = true;
1122         break;
1123
1124 #if BASE_TYPE == 42
1125       case BASE64_OPTION:
1126       case BASE64URL_OPTION:
1127       case BASE32_OPTION:
1128       case BASE32HEX_OPTION:
1129       case BASE16_OPTION:
1130       case BASE2MSBF_OPTION:
1131       case BASE2LSBF_OPTION:
1132       case Z85_OPTION:
1133         base_type = opt;
1134         break;
1135 #endif
1136
1137       case_GETOPT_HELP_CHAR;
1138
1139       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1140
1141       default:
1142         usage (EXIT_FAILURE);
1143         break;
1144       }
1145
1146 #if BASE_TYPE == 42
1147   switch (base_type)
1148     {
1149     case BASE64_OPTION:
1150       base_length = base64_length_wrapper;
1151       isbase = isbase64;
1152       base_encode = base64_encode;
1153       base_decode_ctx_init = base64_decode_ctx_init_wrapper;
1154       base_decode_ctx = base64_decode_ctx_wrapper;
1155       break;
1156
1157     case BASE64URL_OPTION:
1158       base_length = base64_length_wrapper;
1159       isbase = isbase64url;
1160       base_encode = base64url_encode;
1161       base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
1162       base_decode_ctx = base64url_decode_ctx_wrapper;
1163       break;
1164
1165     case BASE32_OPTION:
1166       base_length = base32_length_wrapper;
1167       isbase = isbase32;
1168       base_encode = base32_encode;
1169       base_decode_ctx_init = base32_decode_ctx_init_wrapper;
1170       base_decode_ctx = base32_decode_ctx_wrapper;
1171       break;
1172
1173     case BASE32HEX_OPTION:
1174       base_length = base32_length_wrapper;
1175       isbase = isbase32hex;
1176       base_encode = base32hex_encode;
1177       base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
1178       base_decode_ctx = base32hex_decode_ctx_wrapper;
1179       break;
1180
1181     case BASE16_OPTION:
1182       base_length = base16_length;
1183       isbase = isbase16;
1184       base_encode = base16_encode;
1185       base_decode_ctx_init = base16_decode_ctx_init;
1186       base_decode_ctx = base16_decode_ctx;
1187       break;
1188
1189     case BASE2MSBF_OPTION:
1190       base_length = base2_length;
1191       isbase = isbase2;
1192       base_encode = base2msbf_encode;
1193       base_decode_ctx_init = base2_decode_ctx_init;
1194       base_decode_ctx = base2msbf_decode_ctx;
1195       break;
1196
1197     case BASE2LSBF_OPTION:
1198       base_length = base2_length;
1199       isbase = isbase2;
1200       base_encode = base2lsbf_encode;
1201       base_decode_ctx_init = base2_decode_ctx_init;
1202       base_decode_ctx = base2lsbf_decode_ctx;
1203       break;
1204
1205     case Z85_OPTION:
1206       base_length = z85_length;
1207       isbase = isz85;
1208       base_encode = z85_encode;
1209       base_decode_ctx_init = z85_decode_ctx_init;
1210       base_decode_ctx = z85_decode_ctx;
1211       break;
1212
1213     default:
1214       error (0, 0, _("missing encoding type"));
1215       usage (EXIT_FAILURE);
1216     }
1217 #endif
1218
1219   if (argc - optind > 1)
1220     {
1221       error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1222       usage (EXIT_FAILURE);
1223     }
1224
1225   if (optind < argc)
1226     infile = argv[optind];
1227   else
1228     infile = "-";
1229
1230   if (STREQ (infile, "-"))
1231     {
1232       xset_binary_mode (STDIN_FILENO, O_BINARY);
1233       input_fh = stdin;
1234     }
1235   else
1236     {
1237       input_fh = fopen (infile, "rb");
1238       if (input_fh == NULL)
1239         die (EXIT_FAILURE, errno, "%s", quotef (infile));
1240     }
1241
1242   fadvise (input_fh, FADVISE_SEQUENTIAL);
1243
1244   if (decode)
1245     do_decode (input_fh, infile, stdout, ignore_garbage);
1246   else
1247     do_encode (input_fh, infile, stdout, wrap_column);
1248 }