tests: cksum: add incorrect data to verify --check & --strict
[coreutils.git] / src / basenc.c
blobba3186a2225f305d6c67c21971a96699f002aa91
1 /* Base64, base32, and similar encoding/decoding strings or files.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Simon Josefsson <simon@josefsson.org>. */
19 #include <config.h>
21 #include <stdio.h>
22 #include <getopt.h>
23 #include <sys/types.h>
25 #include "system.h"
26 #include "assure.h"
27 #include "c-ctype.h"
28 #include "fadvise.h"
29 #include "quote.h"
30 #include "xstrtol.h"
31 #include "xdectoint.h"
32 #include "xbinary-io.h"
34 #if BASE_TYPE == 42
35 # define AUTHORS \
36 proper_name ("Simon Josefsson"), \
37 proper_name ("Assaf Gordon")
38 #else
39 # define AUTHORS proper_name ("Simon Josefsson")
40 #endif
42 #if BASE_TYPE == 32
43 # include "base32.h"
44 # define PROGRAM_NAME "base32"
45 #elif BASE_TYPE == 64
46 # include "base64.h"
47 # define PROGRAM_NAME "base64"
48 #elif BASE_TYPE == 42
49 # include "base32.h"
50 # include "base64.h"
51 # include "assure.h"
52 # define PROGRAM_NAME "basenc"
53 #else
54 # error missing/invalid BASE_TYPE definition
55 #endif
59 #if BASE_TYPE == 42
60 enum
62 BASE64_OPTION = CHAR_MAX + 1,
63 BASE64URL_OPTION,
64 BASE32_OPTION,
65 BASE32HEX_OPTION,
66 BASE16_OPTION,
67 BASE2MSBF_OPTION,
68 BASE2LSBF_OPTION,
69 Z85_OPTION
71 #endif
73 static struct option const long_options[] =
75 {"decode", no_argument, 0, 'd'},
76 {"wrap", required_argument, 0, 'w'},
77 {"ignore-garbage", no_argument, 0, 'i'},
78 #if BASE_TYPE == 42
79 {"base64", no_argument, 0, BASE64_OPTION},
80 {"base64url", no_argument, 0, BASE64URL_OPTION},
81 {"base32", no_argument, 0, BASE32_OPTION},
82 {"base32hex", no_argument, 0, BASE32HEX_OPTION},
83 {"base16", no_argument, 0, BASE16_OPTION},
84 {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
85 {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
86 {"z85", no_argument, 0, Z85_OPTION},
87 #endif
88 {GETOPT_HELP_OPTION_DECL},
89 {GETOPT_VERSION_OPTION_DECL},
90 {nullptr, 0, nullptr, 0}
93 void
94 usage (int status)
96 if (status != EXIT_SUCCESS)
97 emit_try_help ();
98 else
100 printf (_("\
101 Usage: %s [OPTION]... [FILE]\n\
102 "), program_name);
104 #if BASE_TYPE == 42
105 fputs (_("\
106 basenc encode or decode FILE, or standard input, to standard output.\n\
107 "), stdout);
108 #else
109 printf (_("\
110 Base%d encode or decode FILE, or standard input, to standard output.\n\
111 "), BASE_TYPE);
112 #endif
114 emit_stdin_note ();
115 emit_mandatory_arg_note ();
116 #if BASE_TYPE == 42
117 fputs (_("\
118 --base64 same as 'base64' program (RFC4648 section 4)\n\
119 "), stdout);
120 fputs (_("\
121 --base64url file- and url-safe base64 (RFC4648 section 5)\n\
122 "), stdout);
123 fputs (_("\
124 --base32 same as 'base32' program (RFC4648 section 6)\n\
125 "), stdout);
126 fputs (_("\
127 --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\
128 "), stdout);
129 fputs (_("\
130 --base16 hex encoding (RFC4648 section 8)\n\
131 "), stdout);
132 fputs (_("\
133 --base2msbf bit string with most significant bit (msb) first\n\
134 "), stdout);
135 fputs (_("\
136 --base2lsbf bit string with least significant bit (lsb) first\n\
137 "), stdout);
138 #endif
139 fputs (_("\
140 -d, --decode decode data\n\
141 -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\
142 -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\
143 Use 0 to disable line wrapping\n\
144 "), stdout);
145 #if BASE_TYPE == 42
146 fputs (_("\
147 --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
148 when encoding, input length must be a multiple of 4;\n\
149 when decoding, input length must be a multiple of 5\n\
150 "), stdout);
151 #endif
152 fputs (HELP_OPTION_DESCRIPTION, stdout);
153 fputs (VERSION_OPTION_DESCRIPTION, stdout);
154 #if BASE_TYPE == 42
155 fputs (_("\
157 When decoding, the input may contain newlines in addition to the bytes of\n\
158 the formal alphabet. Use --ignore-garbage to attempt to recover\n\
159 from any other non-alphabet bytes in the encoded stream.\n\
160 "), stdout);
161 #else
162 printf (_("\
164 The data are encoded as described for the %s alphabet in RFC 4648.\n\
165 When decoding, the input may contain newlines in addition to the bytes of\n\
166 the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\
167 from any other non-alphabet bytes in the encoded stream.\n"),
168 PROGRAM_NAME, PROGRAM_NAME);
169 #endif
170 emit_ancillary_info (PROGRAM_NAME);
173 exit (status);
176 #if BASE_TYPE != 64
177 static int
178 base32_required_padding (int len)
180 int partial = len % 8;
181 return partial ? 8 - partial : 0;
183 #endif
185 #if BASE_TYPE != 32
186 static int
187 base64_required_padding (int len)
189 int partial = len % 4;
190 return partial ? 4 - partial : 0;
192 #endif
194 #if BASE_TYPE == 42
195 static int
196 no_required_padding (int len)
198 return 0;
200 #endif
202 #define ENC_BLOCKSIZE (1024 * 3 * 10)
204 #if BASE_TYPE == 32
205 # define BASE_LENGTH BASE32_LENGTH
206 # define REQUIRED_PADDING base32_required_padding
207 /* Note that increasing this may decrease performance if --ignore-garbage
208 is used, because of the memmove operation below. */
209 # define DEC_BLOCKSIZE (1024 * 5)
211 /* Ensure that BLOCKSIZE is a multiple of 5 and 8. */
212 static_assert (ENC_BLOCKSIZE % 40 == 0); /* Padding chars only on last block. */
213 static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */
215 # define base_encode base32_encode
216 # define base_decode_context base32_decode_context
217 # define base_decode_ctx_init base32_decode_ctx_init
218 # define base_decode_ctx base32_decode_ctx
219 # define isubase isubase32
220 #elif BASE_TYPE == 64
221 # define BASE_LENGTH BASE64_LENGTH
222 # define REQUIRED_PADDING base64_required_padding
223 /* Note that increasing this may decrease performance if --ignore-garbage
224 is used, because of the memmove operation below. */
225 # define DEC_BLOCKSIZE (1024 * 3)
227 /* Ensure that BLOCKSIZE is a multiple of 3 and 4. */
228 static_assert (ENC_BLOCKSIZE % 12 == 0); /* Padding chars only on last block. */
229 static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */
231 # define base_encode base64_encode
232 # define base_decode_context base64_decode_context
233 # define base_decode_ctx_init base64_decode_ctx_init
234 # define base_decode_ctx base64_decode_ctx
235 # define isubase isubase64
236 #elif BASE_TYPE == 42
239 # define BASE_LENGTH base_length
240 # define REQUIRED_PADDING required_padding
242 /* Note that increasing this may decrease performance if --ignore-garbage
243 is used, because of the memmove operation below. */
244 # define DEC_BLOCKSIZE (4200)
245 static_assert (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32*/
246 static_assert (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64*/
248 static int (*base_length) (int i);
249 static int (*required_padding) (int i);
250 static bool (*isubase) (unsigned char ch);
251 static void (*base_encode) (char const *restrict in, idx_t inlen,
252 char *restrict out, idx_t outlen);
254 struct base16_decode_context
256 /* Either a 4-bit nibble, or negative if we have no nibble. */
257 signed char nibble;
260 struct z85_decode_context
262 int i;
263 unsigned char octets[5];
266 struct base2_decode_context
268 unsigned char octet;
271 struct base_decode_context
273 int i; /* will be updated manually */
274 union {
275 struct base64_decode_context base64;
276 struct base32_decode_context base32;
277 struct base16_decode_context base16;
278 struct base2_decode_context base2;
279 struct z85_decode_context z85;
280 } ctx;
281 char *inbuf;
282 idx_t bufsize;
284 static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
285 static bool (*base_decode_ctx) (struct base_decode_context *ctx,
286 char const *restrict in, idx_t inlen,
287 char *restrict out, idx_t *outlen);
288 #endif
293 #if BASE_TYPE == 42
295 static int
296 base64_length_wrapper (int len)
298 return BASE64_LENGTH (len);
301 static void
302 base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
304 base64_decode_ctx_init (&ctx->ctx.base64);
307 static bool
308 base64_decode_ctx_wrapper (struct base_decode_context *ctx,
309 char const *restrict in, idx_t inlen,
310 char *restrict out, idx_t *outlen)
312 bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
313 ctx->i = ctx->ctx.base64.i;
314 return b;
317 static void
318 init_inbuf (struct base_decode_context *ctx)
320 ctx->bufsize = DEC_BLOCKSIZE;
321 ctx->inbuf = xcharalloc (ctx->bufsize);
324 static void
325 prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
327 if (ctx->bufsize < inlen)
329 ctx->bufsize = inlen * 2;
330 ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char));
335 static void
336 base64url_encode (char const *restrict in, idx_t inlen,
337 char *restrict out, idx_t outlen)
339 base64_encode (in, inlen, out, outlen);
340 /* translate 62nd and 63rd characters */
341 char *p = out;
342 while (outlen--)
344 if (*p == '+')
345 *p = '-';
346 else if (*p == '/')
347 *p = '_';
348 ++p;
352 static bool
353 isubase64url (unsigned char ch)
355 return (ch == '-' || ch == '_'
356 || (ch != '+' && ch != '/' && isubase64 (ch)));
359 static void
360 base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
362 base64_decode_ctx_init (&ctx->ctx.base64);
363 init_inbuf (ctx);
367 static bool
368 base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
369 char const *restrict in, idx_t inlen,
370 char *restrict out, idx_t *outlen)
372 prepare_inbuf (ctx, inlen);
373 memcpy (ctx->inbuf, in, inlen);
375 /* translate 62nd and 63rd characters */
376 idx_t i = inlen;
377 char *p = ctx->inbuf;
378 while (i--)
380 if (*p == '+' || *p == '/')
382 *outlen = 0;
383 return false; /* reject base64 input */
385 else if (*p == '-')
386 *p = '+';
387 else if (*p == '_')
388 *p = '/';
389 ++p;
392 bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
393 out, outlen);
394 ctx->i = ctx->ctx.base64.i;
396 return b;
401 static int
402 base32_length_wrapper (int len)
404 return BASE32_LENGTH (len);
407 static void
408 base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
410 base32_decode_ctx_init (&ctx->ctx.base32);
413 static bool
414 base32_decode_ctx_wrapper (struct base_decode_context *ctx,
415 char const *restrict in, idx_t inlen,
416 char *restrict out, idx_t *outlen)
418 bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
419 ctx->i = ctx->ctx.base32.i;
420 return b;
423 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
425 0123456789ABCDEFGHIJKLMNOPQRSTUV */
426 static const char base32_norm_to_hex[32 + 9] = {
427 /*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
428 'Q', 'R', 'S', 'T', 'U', 'V',
430 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
432 /*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
433 '0', '1', '2', '3', '4', '5', '6', '7',
435 /*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
436 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
438 /*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
439 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
441 /*0x59, 0x5a, */
442 'O', 'P',
445 /* 0123456789ABCDEFGHIJKLMNOPQRSTUV
447 ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
448 static const char base32_hex_to_norm[32 + 9] = {
449 /* from: 0x30 .. 0x39 ('0' to '9') */
450 /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
452 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
454 /* from: 0x41 .. 0x4A ('A' to 'J') */
455 /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
457 /* from: 0x4B .. 0x54 ('K' to 'T') */
458 /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
460 /* from: 0x55 .. 0x56 ('U' to 'V') */
461 /* to:*/ '6', '7'
465 inline static bool
466 isubase32hex (unsigned char ch)
468 return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
472 static void
473 base32hex_encode (char const *restrict in, idx_t inlen,
474 char *restrict out, idx_t outlen)
476 base32_encode (in, inlen, out, outlen);
478 for (char *p = out; outlen--; p++)
480 affirm (0x32 <= *p && *p <= 0x5a); /* LCOV_EXCL_LINE */
481 *p = base32_norm_to_hex[*p - 0x32];
486 static void
487 base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
489 base32_decode_ctx_init (&ctx->ctx.base32);
490 init_inbuf (ctx);
494 static bool
495 base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
496 char const *restrict in, idx_t inlen,
497 char *restrict out, idx_t *outlen)
499 prepare_inbuf (ctx, inlen);
501 idx_t i = inlen;
502 char *p = ctx->inbuf;
503 while (i--)
505 if (isubase32hex (*in))
506 *p = base32_hex_to_norm[*in - 0x30];
507 else
508 *p = *in;
509 ++p;
510 ++in;
513 bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
514 out, outlen);
515 ctx->i = ctx->ctx.base32.i;
517 return b;
519 /* With this approach this file works independent of the charset used
520 (think EBCDIC). However, it does assume that the characters in the
521 Base32 alphabet (A-Z2-7) are encoded in 0..255. POSIX
522 1003.1-2001 require that char and unsigned char are 8-bit
523 quantities, though, taking care of that problem. But this may be a
524 potential problem on non-POSIX C99 platforms.
526 IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
527 as the formal parameter rather than "x". */
528 # define B16(_) \
529 ((_) == '0' ? 0 \
530 : (_) == '1' ? 1 \
531 : (_) == '2' ? 2 \
532 : (_) == '3' ? 3 \
533 : (_) == '4' ? 4 \
534 : (_) == '5' ? 5 \
535 : (_) == '6' ? 6 \
536 : (_) == '7' ? 7 \
537 : (_) == '8' ? 8 \
538 : (_) == '9' ? 9 \
539 : (_) == 'A' || (_) == 'a' ? 10 \
540 : (_) == 'B' || (_) == 'b' ? 11 \
541 : (_) == 'C' || (_) == 'c' ? 12 \
542 : (_) == 'D' || (_) == 'd' ? 13 \
543 : (_) == 'E' || (_) == 'e' ? 14 \
544 : (_) == 'F' || (_) == 'f' ? 15 \
545 : -1)
547 static signed char const base16_to_int[256] = {
548 B16 (0), B16 (1), B16 (2), B16 (3),
549 B16 (4), B16 (5), B16 (6), B16 (7),
550 B16 (8), B16 (9), B16 (10), B16 (11),
551 B16 (12), B16 (13), B16 (14), B16 (15),
552 B16 (16), B16 (17), B16 (18), B16 (19),
553 B16 (20), B16 (21), B16 (22), B16 (23),
554 B16 (24), B16 (25), B16 (26), B16 (27),
555 B16 (28), B16 (29), B16 (30), B16 (31),
556 B16 (32), B16 (33), B16 (34), B16 (35),
557 B16 (36), B16 (37), B16 (38), B16 (39),
558 B16 (40), B16 (41), B16 (42), B16 (43),
559 B16 (44), B16 (45), B16 (46), B16 (47),
560 B16 (48), B16 (49), B16 (50), B16 (51),
561 B16 (52), B16 (53), B16 (54), B16 (55),
562 B16 (56), B16 (57), B16 (58), B16 (59),
563 B16 (60), B16 (61), B16 (62), B16 (63),
564 B16 (32), B16 (65), B16 (66), B16 (67),
565 B16 (68), B16 (69), B16 (70), B16 (71),
566 B16 (72), B16 (73), B16 (74), B16 (75),
567 B16 (76), B16 (77), B16 (78), B16 (79),
568 B16 (80), B16 (81), B16 (82), B16 (83),
569 B16 (84), B16 (85), B16 (86), B16 (87),
570 B16 (88), B16 (89), B16 (90), B16 (91),
571 B16 (92), B16 (93), B16 (94), B16 (95),
572 B16 (96), B16 (97), B16 (98), B16 (99),
573 B16 (100), B16 (101), B16 (102), B16 (103),
574 B16 (104), B16 (105), B16 (106), B16 (107),
575 B16 (108), B16 (109), B16 (110), B16 (111),
576 B16 (112), B16 (113), B16 (114), B16 (115),
577 B16 (116), B16 (117), B16 (118), B16 (119),
578 B16 (120), B16 (121), B16 (122), B16 (123),
579 B16 (124), B16 (125), B16 (126), B16 (127),
580 B16 (128), B16 (129), B16 (130), B16 (131),
581 B16 (132), B16 (133), B16 (134), B16 (135),
582 B16 (136), B16 (137), B16 (138), B16 (139),
583 B16 (140), B16 (141), B16 (142), B16 (143),
584 B16 (144), B16 (145), B16 (146), B16 (147),
585 B16 (148), B16 (149), B16 (150), B16 (151),
586 B16 (152), B16 (153), B16 (154), B16 (155),
587 B16 (156), B16 (157), B16 (158), B16 (159),
588 B16 (160), B16 (161), B16 (162), B16 (163),
589 B16 (132), B16 (165), B16 (166), B16 (167),
590 B16 (168), B16 (169), B16 (170), B16 (171),
591 B16 (172), B16 (173), B16 (174), B16 (175),
592 B16 (176), B16 (177), B16 (178), B16 (179),
593 B16 (180), B16 (181), B16 (182), B16 (183),
594 B16 (184), B16 (185), B16 (186), B16 (187),
595 B16 (188), B16 (189), B16 (190), B16 (191),
596 B16 (192), B16 (193), B16 (194), B16 (195),
597 B16 (196), B16 (197), B16 (198), B16 (199),
598 B16 (200), B16 (201), B16 (202), B16 (203),
599 B16 (204), B16 (205), B16 (206), B16 (207),
600 B16 (208), B16 (209), B16 (210), B16 (211),
601 B16 (212), B16 (213), B16 (214), B16 (215),
602 B16 (216), B16 (217), B16 (218), B16 (219),
603 B16 (220), B16 (221), B16 (222), B16 (223),
604 B16 (224), B16 (225), B16 (226), B16 (227),
605 B16 (228), B16 (229), B16 (230), B16 (231),
606 B16 (232), B16 (233), B16 (234), B16 (235),
607 B16 (236), B16 (237), B16 (238), B16 (239),
608 B16 (240), B16 (241), B16 (242), B16 (243),
609 B16 (244), B16 (245), B16 (246), B16 (247),
610 B16 (248), B16 (249), B16 (250), B16 (251),
611 B16 (252), B16 (253), B16 (254), B16 (255)
614 static bool
615 isubase16 (unsigned char ch)
617 return ch < sizeof base16_to_int && 0 <= base16_to_int[ch];
620 static int
621 base16_length (int len)
623 return len * 2;
627 static void
628 base16_encode (char const *restrict in, idx_t inlen,
629 char *restrict out, idx_t outlen)
631 static const char base16[16] = "0123456789ABCDEF";
633 while (inlen && outlen)
635 unsigned char c = *in;
636 *out++ = base16[c >> 4];
637 *out++ = base16[c & 0x0F];
638 ++in;
639 inlen--;
640 outlen -= 2;
645 static void
646 base16_decode_ctx_init (struct base_decode_context *ctx)
648 init_inbuf (ctx);
649 ctx->ctx.base16.nibble = -1;
650 ctx->i = 1;
654 static bool
655 base16_decode_ctx (struct base_decode_context *ctx,
656 char const *restrict in, idx_t inlen,
657 char *restrict out, idx_t *outlen)
659 bool ignore_lines = true; /* for now, always ignore them */
660 char *out0 = out;
661 signed char nibble = ctx->ctx.base16.nibble;
663 /* inlen==0 is request to flush output.
664 if there is a dangling high nibble - we are missing the low nibble,
665 so return false - indicating an invalid input. */
666 if (inlen == 0)
668 *outlen = 0;
669 return nibble < 0;
672 while (inlen--)
674 unsigned char c = *in++;
675 if (ignore_lines && c == '\n')
676 continue;
678 if (sizeof base16_to_int <= c || base16_to_int[c] < 0)
680 *outlen = out - out0;
681 return false; /* garbage - return false */
684 if (nibble < 0)
685 nibble = base16_to_int[c];
686 else
688 /* have both nibbles, write octet */
689 *out++ = (nibble << 4) + base16_to_int[c];
690 nibble = -1;
694 ctx->ctx.base16.nibble = nibble;
695 *outlen = out - out0;
696 return true;
702 static int
703 z85_length (int len)
705 /* Z85 does not allow padding, so no need to round to highest integer. */
706 int outlen = (len * 5) / 4;
707 return outlen;
710 static bool
711 isuz85 (unsigned char ch)
713 return c_isalnum (ch) || strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != nullptr;
716 static char const z85_encoding[85] =
717 "0123456789"
718 "abcdefghijklmnopqrstuvwxyz"
719 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
720 ".-:+=^!/*?&<>()[]{}@%$#";
722 static void
723 z85_encode (char const *restrict in, idx_t inlen,
724 char *restrict out, idx_t outlen)
726 int i = 0;
727 unsigned char quad[4];
728 idx_t outidx = 0;
730 while (true)
732 if (inlen == 0)
734 /* no more input, exactly on 4 octet boundary. */
735 if (i == 0)
736 return;
738 /* currently, there's no way to return an error in encoding. */
739 error (EXIT_FAILURE, 0,
740 _("invalid input (length must be multiple of 4 characters)"));
742 else
744 quad[i++] = *in++;
745 --inlen;
748 /* Got a quad, encode it */
749 if (i == 4)
751 int_fast64_t val = quad[0];
752 val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
754 for (int j = 4; j >= 0; --j)
756 int c = val % 85;
757 val /= 85;
759 /* NOTE: if there is padding (which is trimmed by z85
760 before outputting the result), the output buffer 'out'
761 might not include enough allocated bytes for the padding,
762 so don't store them. */
763 if (outidx + j < outlen)
764 out[j] = z85_encoding[c];
766 out += 5;
767 outidx += 5;
768 i = 0;
773 static void
774 z85_decode_ctx_init (struct base_decode_context *ctx)
776 init_inbuf (ctx);
777 ctx->ctx.z85.i = 0;
778 ctx->i = 1;
782 # define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
783 (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \
784 ((ctx)->ctx.z85.octets[2] * 85 * 85) + \
785 ((ctx)->ctx.z85.octets[3] * 85) + \
786 ((ctx)->ctx.z85.octets[4]))
789 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
790 ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
793 0 - 9: 0 1 2 3 4 5 6 7 8 9
794 10 - 19: a b c d e f g h i j
795 20 - 29: k l m n o p q r s t
796 30 - 39: u v w x y z A B C D
797 40 - 49: E F G H I J K L M N
798 50 - 59: O P Q R S T U V W X
799 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check
800 70 - 79: * ? & < > ( ) [ ] {
801 80 - 84: } @ % $ #
803 static signed char const z85_decoding[93] = {
804 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
805 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
806 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
807 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
808 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
809 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
810 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
811 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
812 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
813 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
814 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
815 79, -1, 80 /* { | } */
818 static bool
819 z85_decode_ctx (struct base_decode_context *ctx,
820 char const *restrict in, idx_t inlen,
821 char *restrict out, idx_t *outlen)
823 bool ignore_lines = true; /* for now, always ignore them */
825 *outlen = 0;
827 /* inlen==0 is request to flush output.
828 if there are dangling values - we are missing entries,
829 so return false - indicating an invalid input. */
830 if (inlen == 0)
832 if (ctx->ctx.z85.i > 0)
834 /* Z85 variant does not allow padding - input must
835 be a multiple of 5 - so return error. */
836 return false;
838 return true;
841 while (inlen--)
843 if (ignore_lines && *in == '\n')
845 ++in;
846 continue;
849 /* z85 decoding */
850 unsigned char c = *in;
852 if (c >= 33 && c <= 125)
854 signed char ch = z85_decoding[c - 33];
855 if (ch < 0)
856 return false; /* garbage - return false */
857 c = ch;
859 else
860 return false; /* garbage - return false */
862 ++in;
864 ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
865 if (ctx->ctx.z85.i == 5)
867 /* decode the lowest 4 octets, then check for overflows. */
868 int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
870 /* The Z85 spec and the reference implementation say nothing
871 about overflows. To be on the safe side, reject them. */
873 val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
874 if ((val >> 24) & ~0xFF)
875 return false;
877 *out++ = val >> 24;
878 *out++ = (val >> 16) & 0xFF;
879 *out++ = (val >> 8) & 0xFF;
880 *out++ = val & 0xFF;
882 *outlen += 4;
884 ctx->ctx.z85.i = 0;
887 ctx->i = ctx->ctx.z85.i;
888 return true;
892 inline static bool
893 isubase2 (unsigned char ch)
895 return ch == '0' || ch == '1';
898 static int
899 base2_length (int len)
901 return len * 8;
905 inline static void
906 base2msbf_encode (char const *restrict in, idx_t inlen,
907 char *restrict out, idx_t outlen)
909 while (inlen && outlen)
911 unsigned char c = *in;
912 for (int i = 0; i < 8; i++)
914 *out++ = c & 0x80 ? '1' : '0';
915 c <<= 1;
917 inlen--;
918 outlen -= 8;
919 ++in;
923 inline static void
924 base2lsbf_encode (char const *restrict in, idx_t inlen,
925 char *restrict out, idx_t outlen)
927 while (inlen && outlen)
929 unsigned char c = *in;
930 for (int i = 0; i < 8; i++)
932 *out++ = c & 0x01 ? '1' : '0';
933 c >>= 1;
935 inlen--;
936 outlen -= 8;
937 ++in;
942 static void
943 base2_decode_ctx_init (struct base_decode_context *ctx)
945 init_inbuf (ctx);
946 ctx->ctx.base2.octet = 0;
947 ctx->i = 0;
951 static bool
952 base2lsbf_decode_ctx (struct base_decode_context *ctx,
953 char const *restrict in, idx_t inlen,
954 char *restrict out, idx_t *outlen)
956 bool ignore_lines = true; /* for now, always ignore them */
958 *outlen = 0;
960 /* inlen==0 is request to flush output.
961 if there is a dangling bit - we are missing some bits,
962 so return false - indicating an invalid input. */
963 if (inlen == 0)
964 return ctx->i == 0;
966 while (inlen--)
968 if (ignore_lines && *in == '\n')
970 ++in;
971 continue;
974 if (!isubase2 (*in))
975 return false;
977 bool bit = (*in == '1');
978 ctx->ctx.base2.octet |= bit << ctx->i;
979 ++ctx->i;
981 if (ctx->i == 8)
983 *out++ = ctx->ctx.base2.octet;
984 ctx->ctx.base2.octet = 0;
985 ++*outlen;
986 ctx->i = 0;
989 ++in;
992 return true;
995 static bool
996 base2msbf_decode_ctx (struct base_decode_context *ctx,
997 char const *restrict in, idx_t inlen,
998 char *restrict out, idx_t *outlen)
1000 bool ignore_lines = true; /* for now, always ignore them */
1002 *outlen = 0;
1004 /* inlen==0 is request to flush output.
1005 if there is a dangling bit - we are missing some bits,
1006 so return false - indicating an invalid input. */
1007 if (inlen == 0)
1008 return ctx->i == 0;
1010 while (inlen--)
1012 if (ignore_lines && *in == '\n')
1014 ++in;
1015 continue;
1018 if (!isubase2 (*in))
1019 return false;
1021 bool bit = (*in == '1');
1022 if (ctx->i == 0)
1023 ctx->i = 8;
1024 --ctx->i;
1025 ctx->ctx.base2.octet |= bit << ctx->i;
1027 if (ctx->i == 0)
1029 *out++ = ctx->ctx.base2.octet;
1030 ctx->ctx.base2.octet = 0;
1031 ++*outlen;
1032 ctx->i = 0;
1035 ++in;
1038 return true;
1041 #endif /* BASE_TYPE == 42, i.e., "basenc"*/
1045 static void
1046 wrap_write (char const *buffer, idx_t len,
1047 idx_t wrap_column, idx_t *current_column, FILE *out)
1049 if (wrap_column == 0)
1051 /* Simple write. */
1052 if (fwrite (buffer, 1, len, stdout) < len)
1053 write_error ();
1055 else
1056 for (idx_t written = 0; written < len; )
1058 idx_t to_write = MIN (wrap_column - *current_column, len - written);
1060 if (to_write == 0)
1062 if (fputc ('\n', out) == EOF)
1063 write_error ();
1064 *current_column = 0;
1066 else
1068 if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
1069 write_error ();
1070 *current_column += to_write;
1071 written += to_write;
1076 static _Noreturn void
1077 finish_and_exit (FILE *in, char const *infile)
1079 if (fclose (in) != 0)
1081 if (STREQ (infile, "-"))
1082 error (EXIT_FAILURE, errno, _("closing standard input"));
1083 else
1084 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1087 exit (EXIT_SUCCESS);
1090 static _Noreturn void
1091 do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
1093 idx_t current_column = 0;
1094 char *inbuf, *outbuf;
1095 idx_t sum;
1097 inbuf = xmalloc (ENC_BLOCKSIZE);
1098 outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
1102 idx_t n;
1104 sum = 0;
1107 n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
1108 sum += n;
1110 while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
1112 if (sum > 0)
1114 /* Process input one block at a time. Note that ENC_BLOCKSIZE
1115 is sized so that no pad chars will appear in output. */
1116 base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
1118 wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
1119 &current_column, out);
1122 while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
1124 /* When wrapping, terminate last line. */
1125 if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
1126 write_error ();
1128 if (ferror (in))
1129 error (EXIT_FAILURE, errno, _("read error"));
1131 finish_and_exit (in, infile);
1134 static _Noreturn void
1135 do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
1137 char *inbuf, *outbuf;
1138 idx_t sum;
1139 struct base_decode_context ctx;
1141 char padbuf[8] = "========";
1142 inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
1143 outbuf = xmalloc (DEC_BLOCKSIZE);
1145 #if BASE_TYPE == 42
1146 ctx.inbuf = nullptr;
1147 #endif
1148 base_decode_ctx_init (&ctx);
1152 bool ok;
1154 sum = 0;
1157 idx_t n = fread (inbuf + sum,
1158 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
1160 if (ignore_garbage)
1162 for (idx_t i = 0; n > 0 && i < n;)
1164 if (isubase (inbuf[sum + i]) || inbuf[sum + i] == '=')
1165 i++;
1166 else
1167 memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
1171 sum += n;
1173 if (ferror (in))
1174 error (EXIT_FAILURE, errno, _("read error"));
1176 while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
1178 /* The following "loop" is usually iterated just once.
1179 However, when it processes the final input buffer, we want
1180 to iterate it one additional time, but with an indicator
1181 telling it to flush what is in CTX. */
1182 for (int k = 0; k < 1 + !!feof (in); k++)
1184 if (k == 1)
1186 if (ctx.i == 0)
1187 break;
1189 /* auto pad input (at eof). */
1190 idx_t auto_padding = REQUIRED_PADDING (ctx.i);
1191 if (auto_padding && (sum == 0 || inbuf[sum - 1] != '='))
1193 affirm (auto_padding <= sizeof (padbuf));
1194 IF_LINT (free (inbuf));
1195 sum = auto_padding;
1196 inbuf = padbuf;
1198 else
1199 sum = 0; /* process ctx buffer only */
1201 idx_t n = DEC_BLOCKSIZE;
1202 ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n);
1204 if (fwrite (outbuf, 1, n, out) < n)
1205 write_error ();
1207 if (!ok)
1208 error (EXIT_FAILURE, 0, _("invalid input"));
1211 while (!feof (in));
1213 finish_and_exit (in, infile);
1217 main (int argc, char **argv)
1219 int opt;
1220 FILE *input_fh;
1221 char const *infile;
1223 /* True if --decode has been given and we should decode data. */
1224 bool decode = false;
1225 /* True if we should ignore non-base-alphabetic characters. */
1226 bool ignore_garbage = false;
1227 /* Wrap encoded data around the 76th column, by default. */
1228 idx_t wrap_column = 76;
1230 #if BASE_TYPE == 42
1231 int base_type = 0;
1232 #endif
1234 initialize_main (&argc, &argv);
1235 set_program_name (argv[0]);
1236 setlocale (LC_ALL, "");
1237 bindtextdomain (PACKAGE, LOCALEDIR);
1238 textdomain (PACKAGE);
1240 atexit (close_stdout);
1242 while ((opt = getopt_long (argc, argv, "diw:", long_options, nullptr)) != -1)
1243 switch (opt)
1245 case 'd':
1246 decode = true;
1247 break;
1249 case 'w':
1251 intmax_t w;
1252 strtol_error s_err = xstrtoimax (optarg, nullptr, 10, &w, "");
1253 if (LONGINT_OVERFLOW < s_err || w < 0)
1254 error (EXIT_FAILURE, 0, "%s: %s",
1255 _("invalid wrap size"), quote (optarg));
1256 wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
1258 break;
1260 case 'i':
1261 ignore_garbage = true;
1262 break;
1264 #if BASE_TYPE == 42
1265 case BASE64_OPTION:
1266 case BASE64URL_OPTION:
1267 case BASE32_OPTION:
1268 case BASE32HEX_OPTION:
1269 case BASE16_OPTION:
1270 case BASE2MSBF_OPTION:
1271 case BASE2LSBF_OPTION:
1272 case Z85_OPTION:
1273 base_type = opt;
1274 break;
1275 #endif
1277 case_GETOPT_HELP_CHAR;
1279 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1281 default:
1282 usage (EXIT_FAILURE);
1283 break;
1286 #if BASE_TYPE == 42
1287 switch (base_type)
1289 case BASE64_OPTION:
1290 base_length = base64_length_wrapper;
1291 required_padding = base64_required_padding;
1292 isubase = isubase64;
1293 base_encode = base64_encode;
1294 base_decode_ctx_init = base64_decode_ctx_init_wrapper;
1295 base_decode_ctx = base64_decode_ctx_wrapper;
1296 break;
1298 case BASE64URL_OPTION:
1299 base_length = base64_length_wrapper;
1300 required_padding = base64_required_padding;
1301 isubase = isubase64url;
1302 base_encode = base64url_encode;
1303 base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
1304 base_decode_ctx = base64url_decode_ctx_wrapper;
1305 break;
1307 case BASE32_OPTION:
1308 base_length = base32_length_wrapper;
1309 required_padding = base32_required_padding;
1310 isubase = isubase32;
1311 base_encode = base32_encode;
1312 base_decode_ctx_init = base32_decode_ctx_init_wrapper;
1313 base_decode_ctx = base32_decode_ctx_wrapper;
1314 break;
1316 case BASE32HEX_OPTION:
1317 base_length = base32_length_wrapper;
1318 required_padding = base32_required_padding;
1319 isubase = isubase32hex;
1320 base_encode = base32hex_encode;
1321 base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
1322 base_decode_ctx = base32hex_decode_ctx_wrapper;
1323 break;
1325 case BASE16_OPTION:
1326 base_length = base16_length;
1327 required_padding = no_required_padding;
1328 isubase = isubase16;
1329 base_encode = base16_encode;
1330 base_decode_ctx_init = base16_decode_ctx_init;
1331 base_decode_ctx = base16_decode_ctx;
1332 break;
1334 case BASE2MSBF_OPTION:
1335 base_length = base2_length;
1336 required_padding = no_required_padding;
1337 isubase = isubase2;
1338 base_encode = base2msbf_encode;
1339 base_decode_ctx_init = base2_decode_ctx_init;
1340 base_decode_ctx = base2msbf_decode_ctx;
1341 break;
1343 case BASE2LSBF_OPTION:
1344 base_length = base2_length;
1345 required_padding = no_required_padding;
1346 isubase = isubase2;
1347 base_encode = base2lsbf_encode;
1348 base_decode_ctx_init = base2_decode_ctx_init;
1349 base_decode_ctx = base2lsbf_decode_ctx;
1350 break;
1352 case Z85_OPTION:
1353 base_length = z85_length;
1354 required_padding = no_required_padding;
1355 isubase = isuz85;
1356 base_encode = z85_encode;
1357 base_decode_ctx_init = z85_decode_ctx_init;
1358 base_decode_ctx = z85_decode_ctx;
1359 break;
1361 default:
1362 error (0, 0, _("missing encoding type"));
1363 usage (EXIT_FAILURE);
1365 #endif
1367 if (argc - optind > 1)
1369 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1370 usage (EXIT_FAILURE);
1373 if (optind < argc)
1374 infile = argv[optind];
1375 else
1376 infile = "-";
1378 if (STREQ (infile, "-"))
1380 xset_binary_mode (STDIN_FILENO, O_BINARY);
1381 input_fh = stdin;
1383 else
1385 input_fh = fopen (infile, "rb");
1386 if (input_fh == nullptr)
1387 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1390 fadvise (input_fh, FADVISE_SEQUENTIAL);
1392 if (decode)
1393 do_decode (input_fh, infile, stdout, ignore_garbage);
1394 else
1395 do_encode (input_fh, infile, stdout, wrap_column);