sort: pacify GCC 12 false positive
[coreutils.git] / src / basenc.c
blob04857d59e9f17776da87ffe68ebd5a4e27b1e1a5
1 /* Base64, base32, and similar encoding/decoding strings or files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Simon Josefsson <simon@josefsson.org>. */
19 #include <config.h>
21 #include <stdio.h>
22 #include <getopt.h>
23 #include <sys/types.h>
25 #include "system.h"
26 #include "c-ctype.h"
27 #include "die.h"
28 #include "error.h"
29 #include "fadvise.h"
30 #include "idx.h"
31 #include "quote.h"
32 #include "xstrtol.h"
33 #include "xdectoint.h"
34 #include "xbinary-io.h"
36 #if BASE_TYPE == 42
37 # define AUTHORS \
38 proper_name ("Simon Josefsson"), \
39 proper_name ("Assaf Gordon")
40 #else
41 # define AUTHORS proper_name ("Simon Josefsson")
42 #endif
44 #if BASE_TYPE == 32
45 # include "base32.h"
46 # define PROGRAM_NAME "base32"
47 #elif BASE_TYPE == 64
48 # include "base64.h"
49 # define PROGRAM_NAME "base64"
50 #elif BASE_TYPE == 42
51 # include "base32.h"
52 # include "base64.h"
53 # include <assert.h>
54 # define PROGRAM_NAME "basenc"
55 #else
56 # error missing/invalid BASE_TYPE definition
57 #endif
61 #if BASE_TYPE == 42
62 enum
64 BASE64_OPTION = CHAR_MAX + 1,
65 BASE64URL_OPTION,
66 BASE32_OPTION,
67 BASE32HEX_OPTION,
68 BASE16_OPTION,
69 BASE2MSBF_OPTION,
70 BASE2LSBF_OPTION,
71 Z85_OPTION
73 #endif
75 static struct option const long_options[] =
77 {"decode", no_argument, 0, 'd'},
78 {"wrap", required_argument, 0, 'w'},
79 {"ignore-garbage", no_argument, 0, 'i'},
80 #if BASE_TYPE == 42
81 {"base64", no_argument, 0, BASE64_OPTION},
82 {"base64url", no_argument, 0, BASE64URL_OPTION},
83 {"base32", no_argument, 0, BASE32_OPTION},
84 {"base32hex", no_argument, 0, BASE32HEX_OPTION},
85 {"base16", no_argument, 0, BASE16_OPTION},
86 {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
87 {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
88 {"z85", no_argument, 0, Z85_OPTION},
89 #endif
90 {GETOPT_HELP_OPTION_DECL},
91 {GETOPT_VERSION_OPTION_DECL},
92 {NULL, 0, NULL, 0}
95 void
96 usage (int status)
98 if (status != EXIT_SUCCESS)
99 emit_try_help ();
100 else
102 printf (_("\
103 Usage: %s [OPTION]... [FILE]\n\
104 "), program_name);
106 #if BASE_TYPE == 42
107 fputs (_("\
108 basenc encode or decode FILE, or standard input, to standard output.\n\
109 "), stdout);
110 #else
111 printf (_("\
112 Base%d encode or decode FILE, or standard input, to standard output.\n\
113 "), BASE_TYPE);
114 #endif
116 emit_stdin_note ();
117 emit_mandatory_arg_note ();
118 #if BASE_TYPE == 42
119 fputs (_("\
120 --base64 same as 'base64' program (RFC4648 section 4)\n\
121 "), stdout);
122 fputs (_("\
123 --base64url file- and url-safe base64 (RFC4648 section 5)\n\
124 "), stdout);
125 fputs (_("\
126 --base32 same as 'base32' program (RFC4648 section 6)\n\
127 "), stdout);
128 fputs (_("\
129 --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\
130 "), stdout);
131 fputs (_("\
132 --base16 hex encoding (RFC4648 section 8)\n\
133 "), stdout);
134 fputs (_("\
135 --base2msbf bit string with most significant bit (msb) first\n\
136 "), stdout);
137 fputs (_("\
138 --base2lsbf bit string with least significant bit (lsb) first\n\
139 "), stdout);
140 #endif
141 fputs (_("\
142 -d, --decode decode data\n\
143 -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\
144 -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\
145 Use 0 to disable line wrapping\n\
146 "), stdout);
147 #if BASE_TYPE == 42
148 fputs (_("\
149 --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
150 when encoding, input length must be a multiple of 4;\n\
151 when decoding, input length must be a multiple of 5\n\
152 "), stdout);
153 #endif
154 fputs (HELP_OPTION_DESCRIPTION, stdout);
155 fputs (VERSION_OPTION_DESCRIPTION, stdout);
156 #if BASE_TYPE == 42
157 fputs (_("\
159 When decoding, the input may contain newlines in addition to the bytes of\n\
160 the formal alphabet. Use --ignore-garbage to attempt to recover\n\
161 from any other non-alphabet bytes in the encoded stream.\n\
162 "), stdout);
163 #else
164 printf (_("\
166 The data are encoded as described for the %s alphabet in RFC 4648.\n\
167 When decoding, the input may contain newlines in addition to the bytes of\n\
168 the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\
169 from any other non-alphabet bytes in the encoded stream.\n"),
170 PROGRAM_NAME, PROGRAM_NAME);
171 #endif
172 emit_ancillary_info (PROGRAM_NAME);
175 exit (status);
178 #define ENC_BLOCKSIZE (1024 * 3 * 10)
180 #if BASE_TYPE == 32
181 # define BASE_LENGTH BASE32_LENGTH
182 /* Note that increasing this may decrease performance if --ignore-garbage
183 is used, because of the memmove operation below. */
184 # define DEC_BLOCKSIZE (1024 * 5)
186 /* Ensure that BLOCKSIZE is a multiple of 5 and 8. */
187 verify (ENC_BLOCKSIZE % 40 == 0); /* So padding chars only on last block. */
188 verify (DEC_BLOCKSIZE % 40 == 0); /* So complete encoded blocks are used. */
190 # define base_encode base32_encode
191 # define base_decode_context base32_decode_context
192 # define base_decode_ctx_init base32_decode_ctx_init
193 # define base_decode_ctx base32_decode_ctx
194 # define isbase isbase32
195 #elif BASE_TYPE == 64
196 # define BASE_LENGTH BASE64_LENGTH
197 /* Note that increasing this may decrease performance if --ignore-garbage
198 is used, because of the memmove operation below. */
199 # define DEC_BLOCKSIZE (1024 * 3)
201 /* Ensure that BLOCKSIZE is a multiple of 3 and 4. */
202 verify (ENC_BLOCKSIZE % 12 == 0); /* So padding chars only on last block. */
203 verify (DEC_BLOCKSIZE % 12 == 0); /* So complete encoded blocks are used. */
205 # define base_encode base64_encode
206 # define base_decode_context base64_decode_context
207 # define base_decode_ctx_init base64_decode_ctx_init
208 # define base_decode_ctx base64_decode_ctx
209 # define isbase isbase64
210 #elif BASE_TYPE == 42
213 # define BASE_LENGTH base_length
215 /* Note that increasing this may decrease performance if --ignore-garbage
216 is used, because of the memmove operation below. */
217 # define DEC_BLOCKSIZE (4200)
218 verify (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32 */
219 verify (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64 */
221 static int (*base_length) (int i);
222 static bool (*isbase) (char ch);
223 static void (*base_encode) (char const *restrict in, idx_t inlen,
224 char *restrict out, idx_t outlen);
226 struct base16_decode_context
228 char nibble;
229 bool have_nibble;
232 struct z85_decode_context
234 int i;
235 unsigned char octets[5];
238 struct base2_decode_context
240 unsigned char octet;
243 struct base_decode_context
245 int i; /* will be updated manually */
246 union {
247 struct base64_decode_context base64;
248 struct base32_decode_context base32;
249 struct base16_decode_context base16;
250 struct base2_decode_context base2;
251 struct z85_decode_context z85;
252 } ctx;
253 char *inbuf;
254 idx_t bufsize;
256 static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
257 static bool (*base_decode_ctx) (struct base_decode_context *ctx,
258 char const *restrict in, idx_t inlen,
259 char *restrict out, idx_t *outlen);
260 #endif
265 #if BASE_TYPE == 42
267 static int
268 base64_length_wrapper (int len)
270 return BASE64_LENGTH (len);
273 static void
274 base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
276 base64_decode_ctx_init (&ctx->ctx.base64);
279 static bool
280 base64_decode_ctx_wrapper (struct base_decode_context *ctx,
281 char const *restrict in, idx_t inlen,
282 char *restrict out, idx_t *outlen)
284 bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
285 ctx->i = ctx->ctx.base64.i;
286 return b;
289 static void
290 init_inbuf (struct base_decode_context *ctx)
292 ctx->bufsize = DEC_BLOCKSIZE;
293 ctx->inbuf = xcharalloc (ctx->bufsize);
296 static void
297 prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
299 if (ctx->bufsize < inlen)
301 ctx->bufsize = inlen * 2;
302 ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char));
307 static void
308 base64url_encode (char const *restrict in, idx_t inlen,
309 char *restrict out, idx_t outlen)
311 base64_encode (in, inlen, out, outlen);
312 /* translate 62nd and 63rd characters */
313 char *p = out;
314 while (outlen--)
316 if (*p == '+')
317 *p = '-';
318 else if (*p == '/')
319 *p = '_';
320 ++p;
324 static bool
325 isbase64url (char ch)
327 return (ch == '-' || ch == '_'
328 || (ch != '+' && ch != '/' && isbase64 (ch)));
331 static void
332 base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
334 base64_decode_ctx_init (&ctx->ctx.base64);
335 init_inbuf (ctx);
339 static bool
340 base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
341 char const *restrict in, idx_t inlen,
342 char *restrict out, idx_t *outlen)
344 prepare_inbuf (ctx, inlen);
345 memcpy (ctx->inbuf, in, inlen);
347 /* translate 62nd and 63rd characters */
348 idx_t i = inlen;
349 char *p = ctx->inbuf;
350 while (i--)
352 if (*p == '+' || *p == '/')
354 *outlen = 0;
355 return false; /* reject base64 input */
357 else if (*p == '-')
358 *p = '+';
359 else if (*p == '_')
360 *p = '/';
361 ++p;
364 bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
365 out, outlen);
366 ctx->i = ctx->ctx.base64.i;
368 return b;
373 static int
374 base32_length_wrapper (int len)
376 return BASE32_LENGTH (len);
379 static void
380 base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
382 base32_decode_ctx_init (&ctx->ctx.base32);
385 static bool
386 base32_decode_ctx_wrapper (struct base_decode_context *ctx,
387 char const *restrict in, idx_t inlen,
388 char *restrict out, idx_t *outlen)
390 bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
391 ctx->i = ctx->ctx.base32.i;
392 return b;
395 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
397 0123456789ABCDEFGHIJKLMNOPQRSTUV */
398 static const char base32_norm_to_hex[32 + 9] = {
399 /*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
400 'Q', 'R', 'S', 'T', 'U', 'V',
402 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
404 /*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
405 '0', '1', '2', '3', '4', '5', '6', '7',
407 /*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
408 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
410 /*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
411 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
413 /*0x59, 0x5a, */
414 'O', 'P',
417 /* 0123456789ABCDEFGHIJKLMNOPQRSTUV
419 ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
420 static const char base32_hex_to_norm[32 + 9] = {
421 /* from: 0x30 .. 0x39 ('0' to '9') */
422 /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
424 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
426 /* from: 0x41 .. 0x4A ('A' to 'J') */
427 /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
429 /* from: 0x4B .. 0x54 ('K' to 'T') */
430 /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
432 /* from: 0x55 .. 0x56 ('U' to 'V') */
433 /* to:*/ '6', '7'
437 inline static bool
438 isbase32hex (char ch)
440 return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
444 static void
445 base32hex_encode (char const *restrict in, idx_t inlen,
446 char *restrict out, idx_t outlen)
448 base32_encode (in, inlen, out, outlen);
450 for (char *p = out; outlen--; p++)
452 assert (0x32 <= *p && *p <= 0x5a); /* LCOV_EXCL_LINE */
453 *p = base32_norm_to_hex[*p - 0x32];
458 static void
459 base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
461 base32_decode_ctx_init (&ctx->ctx.base32);
462 init_inbuf (ctx);
466 static bool
467 base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
468 char const *restrict in, idx_t inlen,
469 char *restrict out, idx_t *outlen)
471 prepare_inbuf (ctx, inlen);
473 idx_t i = inlen;
474 char *p = ctx->inbuf;
475 while (i--)
477 if (isbase32hex (*in))
478 *p = base32_hex_to_norm[ (int)*in - 0x30];
479 else
480 *p = *in;
481 ++p;
482 ++in;
485 bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
486 out, outlen);
487 ctx->i = ctx->ctx.base32.i;
489 return b;
493 static bool
494 isbase16 (char ch)
496 return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F');
499 static int
500 base16_length (int len)
502 return len * 2;
505 static const char base16[16] = "0123456789ABCDEF";
507 static void
508 base16_encode (char const *restrict in, idx_t inlen,
509 char *restrict out, idx_t outlen)
511 while (inlen--)
513 unsigned char c = *in;
514 *out++ = base16[c >> 4];
515 *out++ = base16[c & 0x0F];
516 ++in;
521 static void
522 base16_decode_ctx_init (struct base_decode_context *ctx)
524 init_inbuf (ctx);
525 ctx->ctx.base16.have_nibble = false;
526 ctx->i = 1;
530 static bool
531 base16_decode_ctx (struct base_decode_context *ctx,
532 char const *restrict in, idx_t inlen,
533 char *restrict out, idx_t *outlen)
535 bool ignore_lines = true; /* for now, always ignore them */
537 *outlen = 0;
539 /* inlen==0 is request to flush output.
540 if there is a dangling high nibble - we are missing the low nibble,
541 so return false - indicating an invalid input. */
542 if (inlen == 0)
543 return !ctx->ctx.base16.have_nibble;
545 while (inlen--)
547 if (ignore_lines && *in == '\n')
549 ++in;
550 continue;
553 int nib = *in++;
554 if ('0' <= nib && nib <= '9')
555 nib -= '0';
556 else if ('A' <= nib && nib <= 'F')
557 nib -= 'A' - 10;
558 else
559 return false; /* garbage - return false */
561 if (ctx->ctx.base16.have_nibble)
563 /* have both nibbles, write octet */
564 *out++ = (ctx->ctx.base16.nibble << 4) + nib;
565 ++(*outlen);
567 else
569 /* Store higher nibble until next one arrives */
570 ctx->ctx.base16.nibble = nib;
572 ctx->ctx.base16.have_nibble = !ctx->ctx.base16.have_nibble;
574 return true;
580 static int
581 z85_length (int len)
583 /* Z85 does not allow padding, so no need to round to highest integer. */
584 int outlen = (len * 5) / 4;
585 return outlen;
588 static bool
589 isz85 (char ch)
591 return c_isalnum (ch) || (strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != NULL);
594 static char const z85_encoding[85] =
595 "0123456789"
596 "abcdefghijklmnopqrstuvwxyz"
597 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
598 ".-:+=^!/*?&<>()[]{}@%$#";
600 static void
601 z85_encode (char const *restrict in, idx_t inlen,
602 char *restrict out, idx_t outlen)
604 int i = 0;
605 unsigned char quad[4];
606 idx_t outidx = 0;
608 while (true)
610 if (inlen == 0)
612 /* no more input, exactly on 4 octet boundary. */
613 if (i == 0)
614 return;
616 /* currently, there's no way to return an error in encoding. */
617 die (EXIT_FAILURE, 0,
618 _("invalid input (length must be multiple of 4 characters)"));
620 else
622 quad[i++] = *in++;
623 --inlen;
626 /* Got a quad, encode it */
627 if (i == 4)
629 int_fast64_t val = quad[0];
630 val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
632 for (int j = 4; j >= 0; --j)
634 int c = val % 85;
635 val /= 85;
637 /* NOTE: if there is padding (which is trimmed by z85
638 before outputting the result), the output buffer 'out'
639 might not include enough allocated bytes for the padding,
640 so don't store them. */
641 if (outidx + j < outlen)
642 out[j] = z85_encoding[c];
644 out += 5;
645 outidx += 5;
646 i = 0;
651 static void
652 z85_decode_ctx_init (struct base_decode_context *ctx)
654 init_inbuf (ctx);
655 ctx->ctx.z85.i = 0;
656 ctx->i = 1;
660 # define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
661 (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \
662 ((ctx)->ctx.z85.octets[2] * 85 * 85) + \
663 ((ctx)->ctx.z85.octets[3] * 85) + \
664 ((ctx)->ctx.z85.octets[4]))
667 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
668 ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
671 0 - 9: 0 1 2 3 4 5 6 7 8 9
672 10 - 19: a b c d e f g h i j
673 20 - 29: k l m n o p q r s t
674 30 - 39: u v w x y z A B C D
675 40 - 49: E F G H I J K L M N
676 50 - 59: O P Q R S T U V W X
677 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check
678 70 - 79: * ? & < > ( ) [ ] {
679 80 - 84: } @ % $ #
681 static signed char const z85_decoding[93] = {
682 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
683 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
684 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
685 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
686 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
687 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
688 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
689 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
690 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
691 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
692 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
693 79, -1, 80 /* { | } */
696 static bool
697 z85_decode_ctx (struct base_decode_context *ctx,
698 char const *restrict in, idx_t inlen,
699 char *restrict out, idx_t *outlen)
701 bool ignore_lines = true; /* for now, always ignore them */
703 *outlen = 0;
705 /* inlen==0 is request to flush output.
706 if there are dangling values - we are missing entries,
707 so return false - indicating an invalid input. */
708 if (inlen == 0)
710 if (ctx->ctx.z85.i > 0)
712 /* Z85 variant does not allow padding - input must
713 be a multiple of 5 - so return error. */
714 return false;
716 return true;
719 while (inlen--)
721 if (ignore_lines && *in == '\n')
723 ++in;
724 continue;
727 /* z85 decoding */
728 unsigned char c = *in;
730 if (c >= 33 && c <= 125)
732 signed char ch = z85_decoding[c - 33];
733 if (ch < 0)
734 return false; /* garbage - return false */
735 c = ch;
737 else
738 return false; /* garbage - return false */
740 ++in;
742 ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
743 if (ctx->ctx.z85.i == 5)
745 /* decode the lowest 4 octets, then check for overflows. */
746 int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
748 /* The Z85 spec and the reference implementation say nothing
749 about overflows. To be on the safe side, reject them. */
751 val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
752 if ((val >> 24) & ~0xFF)
753 return false;
755 *out++ = val >> 24;
756 *out++ = (val >> 16) & 0xFF;
757 *out++ = (val >> 8) & 0xFF;
758 *out++ = val & 0xFF;
760 *outlen += 4;
762 ctx->ctx.z85.i = 0;
765 ctx->i = ctx->ctx.z85.i;
766 return true;
770 inline static bool
771 isbase2 (char ch)
773 return ch == '0' || ch == '1';
776 static int
777 base2_length (int len)
779 return len * 8;
783 inline static void
784 base2msbf_encode (char const *restrict in, idx_t inlen,
785 char *restrict out, idx_t outlen)
787 while (inlen--)
789 unsigned char c = *in;
790 for (int i = 0; i < 8; i++)
792 *out++ = c & 0x80 ? '1' : '0';
793 c <<= 1;
795 outlen -= 8;
796 ++in;
800 inline static void
801 base2lsbf_encode (char const *restrict in, idx_t inlen,
802 char *restrict out, idx_t outlen)
804 while (inlen--)
806 unsigned char c = *in;
807 for (int i = 0; i < 8; i++)
809 *out++ = c & 0x01 ? '1' : '0';
810 c >>= 1;
812 outlen -= 8;
813 ++in;
818 static void
819 base2_decode_ctx_init (struct base_decode_context *ctx)
821 init_inbuf (ctx);
822 ctx->ctx.base2.octet = 0;
823 ctx->i = 0;
827 static bool
828 base2lsbf_decode_ctx (struct base_decode_context *ctx,
829 char const *restrict in, idx_t inlen,
830 char *restrict out, idx_t *outlen)
832 bool ignore_lines = true; /* for now, always ignore them */
834 *outlen = 0;
836 /* inlen==0 is request to flush output.
837 if there is a dangling bit - we are missing some bits,
838 so return false - indicating an invalid input. */
839 if (inlen == 0)
840 return ctx->i == 0;
842 while (inlen--)
844 if (ignore_lines && *in == '\n')
846 ++in;
847 continue;
850 if (!isbase2 (*in))
851 return false;
853 bool bit = (*in == '1');
854 ctx->ctx.base2.octet |= bit << ctx->i;
855 ++ctx->i;
857 if (ctx->i == 8)
859 *out++ = ctx->ctx.base2.octet;
860 ctx->ctx.base2.octet = 0;
861 ++*outlen;
862 ctx->i = 0;
865 ++in;
868 return true;
871 static bool
872 base2msbf_decode_ctx (struct base_decode_context *ctx,
873 char const *restrict in, idx_t inlen,
874 char *restrict out, idx_t *outlen)
876 bool ignore_lines = true; /* for now, always ignore them */
878 *outlen = 0;
880 /* inlen==0 is request to flush output.
881 if there is a dangling bit - we are missing some bits,
882 so return false - indicating an invalid input. */
883 if (inlen == 0)
884 return ctx->i == 0;
886 while (inlen--)
888 if (ignore_lines && *in == '\n')
890 ++in;
891 continue;
894 if (!isbase2 (*in))
895 return false;
897 bool bit = (*in == '1');
898 if (ctx->i == 0)
899 ctx->i = 8;
900 --ctx->i;
901 ctx->ctx.base2.octet |= bit << ctx->i;
903 if (ctx->i == 0)
905 *out++ = ctx->ctx.base2.octet;
906 ctx->ctx.base2.octet = 0;
907 ++*outlen;
908 ctx->i = 0;
911 ++in;
914 return true;
917 #endif /* BASE_TYPE == 42, i.e., "basenc"*/
921 static void
922 wrap_write (char const *buffer, idx_t len,
923 idx_t wrap_column, idx_t *current_column, FILE *out)
925 if (wrap_column == 0)
927 /* Simple write. */
928 if (fwrite (buffer, 1, len, stdout) < len)
929 die (EXIT_FAILURE, errno, _("write error"));
931 else
932 for (idx_t written = 0; written < len; )
934 idx_t to_write = MIN (wrap_column - *current_column, len - written);
936 if (to_write == 0)
938 if (fputc ('\n', out) == EOF)
939 die (EXIT_FAILURE, errno, _("write error"));
940 *current_column = 0;
942 else
944 if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
945 die (EXIT_FAILURE, errno, _("write error"));
946 *current_column += to_write;
947 written += to_write;
952 static _Noreturn void
953 finish_and_exit (FILE *in, char const *infile)
955 if (fclose (in) != 0)
957 if (STREQ (infile, "-"))
958 die (EXIT_FAILURE, errno, _("closing standard input"));
959 else
960 die (EXIT_FAILURE, errno, "%s", quotef (infile));
963 exit (EXIT_SUCCESS);
966 static _Noreturn void
967 do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
969 idx_t current_column = 0;
970 char *inbuf, *outbuf;
971 idx_t sum;
973 inbuf = xmalloc (ENC_BLOCKSIZE);
974 outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
978 idx_t n;
980 sum = 0;
983 n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
984 sum += n;
986 while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
988 if (sum > 0)
990 /* Process input one block at a time. Note that ENC_BLOCKSIZE
991 is sized so that no pad chars will appear in output. */
992 base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
994 wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
995 &current_column, out);
998 while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
1000 /* When wrapping, terminate last line. */
1001 if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
1002 die (EXIT_FAILURE, errno, _("write error"));
1004 if (ferror (in))
1005 die (EXIT_FAILURE, errno, _("read error"));
1007 finish_and_exit (in, infile);
1010 static _Noreturn void
1011 do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
1013 char *inbuf, *outbuf;
1014 idx_t sum;
1015 struct base_decode_context ctx;
1017 inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
1018 outbuf = xmalloc (DEC_BLOCKSIZE);
1020 #if BASE_TYPE == 42
1021 ctx.inbuf = NULL;
1022 #endif
1023 base_decode_ctx_init (&ctx);
1027 bool ok;
1029 sum = 0;
1032 idx_t n = fread (inbuf + sum,
1033 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
1035 if (ignore_garbage)
1037 for (idx_t i = 0; n > 0 && i < n;)
1039 if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=')
1040 i++;
1041 else
1042 memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
1046 sum += n;
1048 if (ferror (in))
1049 die (EXIT_FAILURE, errno, _("read error"));
1051 while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
1053 /* The following "loop" is usually iterated just once.
1054 However, when it processes the final input buffer, we want
1055 to iterate it one additional time, but with an indicator
1056 telling it to flush what is in CTX. */
1057 for (int k = 0; k < 1 + !!feof (in); k++)
1059 if (k == 1 && ctx.i == 0)
1060 break;
1061 idx_t n = DEC_BLOCKSIZE;
1062 ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
1064 if (fwrite (outbuf, 1, n, out) < n)
1065 die (EXIT_FAILURE, errno, _("write error"));
1067 if (!ok)
1068 die (EXIT_FAILURE, 0, _("invalid input"));
1071 while (!feof (in));
1073 finish_and_exit (in, infile);
1077 main (int argc, char **argv)
1079 int opt;
1080 FILE *input_fh;
1081 char const *infile;
1083 /* True if --decode has been given and we should decode data. */
1084 bool decode = false;
1085 /* True if we should ignore non-base-alphabetic characters. */
1086 bool ignore_garbage = false;
1087 /* Wrap encoded data around the 76th column, by default. */
1088 idx_t wrap_column = 76;
1090 #if BASE_TYPE == 42
1091 int base_type = 0;
1092 #endif
1094 initialize_main (&argc, &argv);
1095 set_program_name (argv[0]);
1096 setlocale (LC_ALL, "");
1097 bindtextdomain (PACKAGE, LOCALEDIR);
1098 textdomain (PACKAGE);
1100 atexit (close_stdout);
1102 while ((opt = getopt_long (argc, argv, "diw:", long_options, NULL)) != -1)
1103 switch (opt)
1105 case 'd':
1106 decode = true;
1107 break;
1109 case 'w':
1111 intmax_t w;
1112 strtol_error s_err = xstrtoimax (optarg, NULL, 10, &w, "");
1113 if (LONGINT_OVERFLOW < s_err || w < 0)
1114 die (EXIT_FAILURE, 0, "%s: %s",
1115 _("invalid wrap size"), quote (optarg));
1116 wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
1118 break;
1120 case 'i':
1121 ignore_garbage = true;
1122 break;
1124 #if BASE_TYPE == 42
1125 case BASE64_OPTION:
1126 case BASE64URL_OPTION:
1127 case BASE32_OPTION:
1128 case BASE32HEX_OPTION:
1129 case BASE16_OPTION:
1130 case BASE2MSBF_OPTION:
1131 case BASE2LSBF_OPTION:
1132 case Z85_OPTION:
1133 base_type = opt;
1134 break;
1135 #endif
1137 case_GETOPT_HELP_CHAR;
1139 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1141 default:
1142 usage (EXIT_FAILURE);
1143 break;
1146 #if BASE_TYPE == 42
1147 switch (base_type)
1149 case BASE64_OPTION:
1150 base_length = base64_length_wrapper;
1151 isbase = isbase64;
1152 base_encode = base64_encode;
1153 base_decode_ctx_init = base64_decode_ctx_init_wrapper;
1154 base_decode_ctx = base64_decode_ctx_wrapper;
1155 break;
1157 case BASE64URL_OPTION:
1158 base_length = base64_length_wrapper;
1159 isbase = isbase64url;
1160 base_encode = base64url_encode;
1161 base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
1162 base_decode_ctx = base64url_decode_ctx_wrapper;
1163 break;
1165 case BASE32_OPTION:
1166 base_length = base32_length_wrapper;
1167 isbase = isbase32;
1168 base_encode = base32_encode;
1169 base_decode_ctx_init = base32_decode_ctx_init_wrapper;
1170 base_decode_ctx = base32_decode_ctx_wrapper;
1171 break;
1173 case BASE32HEX_OPTION:
1174 base_length = base32_length_wrapper;
1175 isbase = isbase32hex;
1176 base_encode = base32hex_encode;
1177 base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
1178 base_decode_ctx = base32hex_decode_ctx_wrapper;
1179 break;
1181 case BASE16_OPTION:
1182 base_length = base16_length;
1183 isbase = isbase16;
1184 base_encode = base16_encode;
1185 base_decode_ctx_init = base16_decode_ctx_init;
1186 base_decode_ctx = base16_decode_ctx;
1187 break;
1189 case BASE2MSBF_OPTION:
1190 base_length = base2_length;
1191 isbase = isbase2;
1192 base_encode = base2msbf_encode;
1193 base_decode_ctx_init = base2_decode_ctx_init;
1194 base_decode_ctx = base2msbf_decode_ctx;
1195 break;
1197 case BASE2LSBF_OPTION:
1198 base_length = base2_length;
1199 isbase = isbase2;
1200 base_encode = base2lsbf_encode;
1201 base_decode_ctx_init = base2_decode_ctx_init;
1202 base_decode_ctx = base2lsbf_decode_ctx;
1203 break;
1205 case Z85_OPTION:
1206 base_length = z85_length;
1207 isbase = isz85;
1208 base_encode = z85_encode;
1209 base_decode_ctx_init = z85_decode_ctx_init;
1210 base_decode_ctx = z85_decode_ctx;
1211 break;
1213 default:
1214 error (0, 0, _("missing encoding type"));
1215 usage (EXIT_FAILURE);
1217 #endif
1219 if (argc - optind > 1)
1221 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1222 usage (EXIT_FAILURE);
1225 if (optind < argc)
1226 infile = argv[optind];
1227 else
1228 infile = "-";
1230 if (STREQ (infile, "-"))
1232 xset_binary_mode (STDIN_FILENO, O_BINARY);
1233 input_fh = stdin;
1235 else
1237 input_fh = fopen (infile, "rb");
1238 if (input_fh == NULL)
1239 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1242 fadvise (input_fh, FADVISE_SEQUENTIAL);
1244 if (decode)
1245 do_decode (input_fh, infile, stdout, ignore_garbage);
1246 else
1247 do_encode (input_fh, infile, stdout, wrap_column);