1 /* Base64, base32, and similar encoding/decoding strings or files.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Simon Josefsson <simon@josefsson.org>. */
23 #include <sys/types.h>
31 #include "xdectoint.h"
32 #include "xbinary-io.h"
36 proper_name ("Simon Josefsson"), \
37 proper_name ("Assaf Gordon")
39 # define AUTHORS proper_name ("Simon Josefsson")
44 # define PROGRAM_NAME "base32"
47 # define PROGRAM_NAME "base64"
52 # define PROGRAM_NAME "basenc"
54 # error missing/invalid BASE_TYPE definition
62 BASE64_OPTION
= CHAR_MAX
+ 1,
73 static struct option
const long_options
[] =
75 {"decode", no_argument
, 0, 'd'},
76 {"wrap", required_argument
, 0, 'w'},
77 {"ignore-garbage", no_argument
, 0, 'i'},
79 {"base64", no_argument
, 0, BASE64_OPTION
},
80 {"base64url", no_argument
, 0, BASE64URL_OPTION
},
81 {"base32", no_argument
, 0, BASE32_OPTION
},
82 {"base32hex", no_argument
, 0, BASE32HEX_OPTION
},
83 {"base16", no_argument
, 0, BASE16_OPTION
},
84 {"base2msbf", no_argument
, 0, BASE2MSBF_OPTION
},
85 {"base2lsbf", no_argument
, 0, BASE2LSBF_OPTION
},
86 {"z85", no_argument
, 0, Z85_OPTION
},
88 {GETOPT_HELP_OPTION_DECL
},
89 {GETOPT_VERSION_OPTION_DECL
},
90 {nullptr, 0, nullptr, 0}
96 if (status
!= EXIT_SUCCESS
)
101 Usage: %s [OPTION]... [FILE]\n\
106 basenc encode or decode FILE, or standard input, to standard output.\n\
110 Base%d encode or decode FILE, or standard input, to standard output.\n\
115 emit_mandatory_arg_note ();
118 --base64 same as 'base64' program (RFC4648 section 4)\n\
121 --base64url file- and url-safe base64 (RFC4648 section 5)\n\
124 --base32 same as 'base32' program (RFC4648 section 6)\n\
127 --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\
130 --base16 hex encoding (RFC4648 section 8)\n\
133 --base2msbf bit string with most significant bit (msb) first\n\
136 --base2lsbf bit string with least significant bit (lsb) first\n\
140 -d, --decode decode data\n\
141 -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\
142 -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\
143 Use 0 to disable line wrapping\n\
147 --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
148 when encoding, input length must be a multiple of 4;\n\
149 when decoding, input length must be a multiple of 5\n\
152 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
153 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
157 When decoding, the input may contain newlines in addition to the bytes of\n\
158 the formal alphabet. Use --ignore-garbage to attempt to recover\n\
159 from any other non-alphabet bytes in the encoded stream.\n\
164 The data are encoded as described for the %s alphabet in RFC 4648.\n\
165 When decoding, the input may contain newlines in addition to the bytes of\n\
166 the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\
167 from any other non-alphabet bytes in the encoded stream.\n"),
168 PROGRAM_NAME
, PROGRAM_NAME
);
170 emit_ancillary_info (PROGRAM_NAME
);
178 base32_required_padding (int len
)
180 int partial
= len
% 8;
181 return partial
? 8 - partial
: 0;
187 base64_required_padding (int len
)
189 int partial
= len
% 4;
190 return partial
? 4 - partial
: 0;
196 no_required_padding (int len
)
202 #define ENC_BLOCKSIZE (1024 * 3 * 10)
205 # define BASE_LENGTH BASE32_LENGTH
206 # define REQUIRED_PADDING base32_required_padding
207 /* Note that increasing this may decrease performance if --ignore-garbage
208 is used, because of the memmove operation below. */
209 # define DEC_BLOCKSIZE (1024 * 5)
211 /* Ensure that BLOCKSIZE is a multiple of 5 and 8. */
212 static_assert (ENC_BLOCKSIZE
% 40 == 0); /* Padding chars only on last block. */
213 static_assert (DEC_BLOCKSIZE
% 40 == 0); /* Complete encoded blocks are used. */
215 # define base_encode base32_encode
216 # define base_decode_context base32_decode_context
217 # define base_decode_ctx_init base32_decode_ctx_init
218 # define base_decode_ctx base32_decode_ctx
219 # define isubase isubase32
220 #elif BASE_TYPE == 64
221 # define BASE_LENGTH BASE64_LENGTH
222 # define REQUIRED_PADDING base64_required_padding
223 /* Note that increasing this may decrease performance if --ignore-garbage
224 is used, because of the memmove operation below. */
225 # define DEC_BLOCKSIZE (1024 * 3)
227 /* Ensure that BLOCKSIZE is a multiple of 3 and 4. */
228 static_assert (ENC_BLOCKSIZE
% 12 == 0); /* Padding chars only on last block. */
229 static_assert (DEC_BLOCKSIZE
% 12 == 0); /* Complete encoded blocks are used. */
231 # define base_encode base64_encode
232 # define base_decode_context base64_decode_context
233 # define base_decode_ctx_init base64_decode_ctx_init
234 # define base_decode_ctx base64_decode_ctx
235 # define isubase isubase64
236 #elif BASE_TYPE == 42
239 # define BASE_LENGTH base_length
240 # define REQUIRED_PADDING required_padding
242 /* Note that increasing this may decrease performance if --ignore-garbage
243 is used, because of the memmove operation below. */
244 # define DEC_BLOCKSIZE (4200)
245 static_assert (DEC_BLOCKSIZE
% 40 == 0); /* complete encoded blocks for base32*/
246 static_assert (DEC_BLOCKSIZE
% 12 == 0); /* complete encoded blocks for base64*/
248 static int (*base_length
) (int i
);
249 static int (*required_padding
) (int i
);
250 static bool (*isubase
) (unsigned char ch
);
251 static void (*base_encode
) (char const *restrict in
, idx_t inlen
,
252 char *restrict out
, idx_t outlen
);
254 struct base16_decode_context
256 /* Either a 4-bit nibble, or negative if we have no nibble. */
260 struct z85_decode_context
263 unsigned char octets
[5];
266 struct base2_decode_context
271 struct base_decode_context
273 int i
; /* will be updated manually */
275 struct base64_decode_context base64
;
276 struct base32_decode_context base32
;
277 struct base16_decode_context base16
;
278 struct base2_decode_context base2
;
279 struct z85_decode_context z85
;
284 static void (*base_decode_ctx_init
) (struct base_decode_context
*ctx
);
285 static bool (*base_decode_ctx
) (struct base_decode_context
*ctx
,
286 char const *restrict in
, idx_t inlen
,
287 char *restrict out
, idx_t
*outlen
);
296 base64_length_wrapper (int len
)
298 return BASE64_LENGTH (len
);
302 base64_decode_ctx_init_wrapper (struct base_decode_context
*ctx
)
304 base64_decode_ctx_init (&ctx
->ctx
.base64
);
308 base64_decode_ctx_wrapper (struct base_decode_context
*ctx
,
309 char const *restrict in
, idx_t inlen
,
310 char *restrict out
, idx_t
*outlen
)
312 bool b
= base64_decode_ctx (&ctx
->ctx
.base64
, in
, inlen
, out
, outlen
);
313 ctx
->i
= ctx
->ctx
.base64
.i
;
318 init_inbuf (struct base_decode_context
*ctx
)
320 ctx
->bufsize
= DEC_BLOCKSIZE
;
321 ctx
->inbuf
= xcharalloc (ctx
->bufsize
);
325 prepare_inbuf (struct base_decode_context
*ctx
, idx_t inlen
)
327 if (ctx
->bufsize
< inlen
)
329 ctx
->bufsize
= inlen
* 2;
330 ctx
->inbuf
= xnrealloc (ctx
->inbuf
, ctx
->bufsize
, sizeof (char));
336 base64url_encode (char const *restrict in
, idx_t inlen
,
337 char *restrict out
, idx_t outlen
)
339 base64_encode (in
, inlen
, out
, outlen
);
340 /* translate 62nd and 63rd characters */
353 isubase64url (unsigned char ch
)
355 return (ch
== '-' || ch
== '_'
356 || (ch
!= '+' && ch
!= '/' && isubase64 (ch
)));
360 base64url_decode_ctx_init_wrapper (struct base_decode_context
*ctx
)
362 base64_decode_ctx_init (&ctx
->ctx
.base64
);
368 base64url_decode_ctx_wrapper (struct base_decode_context
*ctx
,
369 char const *restrict in
, idx_t inlen
,
370 char *restrict out
, idx_t
*outlen
)
372 prepare_inbuf (ctx
, inlen
);
373 memcpy (ctx
->inbuf
, in
, inlen
);
375 /* translate 62nd and 63rd characters */
377 char *p
= ctx
->inbuf
;
380 if (*p
== '+' || *p
== '/')
383 return false; /* reject base64 input */
392 bool b
= base64_decode_ctx (&ctx
->ctx
.base64
, ctx
->inbuf
, inlen
,
394 ctx
->i
= ctx
->ctx
.base64
.i
;
402 base32_length_wrapper (int len
)
404 return BASE32_LENGTH (len
);
408 base32_decode_ctx_init_wrapper (struct base_decode_context
*ctx
)
410 base32_decode_ctx_init (&ctx
->ctx
.base32
);
414 base32_decode_ctx_wrapper (struct base_decode_context
*ctx
,
415 char const *restrict in
, idx_t inlen
,
416 char *restrict out
, idx_t
*outlen
)
418 bool b
= base32_decode_ctx (&ctx
->ctx
.base32
, in
, inlen
, out
, outlen
);
419 ctx
->i
= ctx
->ctx
.base32
.i
;
423 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
425 0123456789ABCDEFGHIJKLMNOPQRSTUV */
426 static const char base32_norm_to_hex
[32 + 9] = {
427 /*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
428 'Q', 'R', 'S', 'T', 'U', 'V',
430 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
432 /*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
433 '0', '1', '2', '3', '4', '5', '6', '7',
435 /*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
436 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
438 /*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
439 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
445 /* 0123456789ABCDEFGHIJKLMNOPQRSTUV
447 ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
448 static const char base32_hex_to_norm
[32 + 9] = {
449 /* from: 0x30 .. 0x39 ('0' to '9') */
450 /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
452 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
454 /* from: 0x41 .. 0x4A ('A' to 'J') */
455 /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
457 /* from: 0x4B .. 0x54 ('K' to 'T') */
458 /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
460 /* from: 0x55 .. 0x56 ('U' to 'V') */
466 isubase32hex (unsigned char ch
)
468 return ('0' <= ch
&& ch
<= '9') || ('A' <= ch
&& ch
<= 'V');
473 base32hex_encode (char const *restrict in
, idx_t inlen
,
474 char *restrict out
, idx_t outlen
)
476 base32_encode (in
, inlen
, out
, outlen
);
478 for (char *p
= out
; outlen
--; p
++)
480 affirm (0x32 <= *p
&& *p
<= 0x5a); /* LCOV_EXCL_LINE */
481 *p
= base32_norm_to_hex
[*p
- 0x32];
487 base32hex_decode_ctx_init_wrapper (struct base_decode_context
*ctx
)
489 base32_decode_ctx_init (&ctx
->ctx
.base32
);
495 base32hex_decode_ctx_wrapper (struct base_decode_context
*ctx
,
496 char const *restrict in
, idx_t inlen
,
497 char *restrict out
, idx_t
*outlen
)
499 prepare_inbuf (ctx
, inlen
);
502 char *p
= ctx
->inbuf
;
505 if (isubase32hex (*in
))
506 *p
= base32_hex_to_norm
[*in
- 0x30];
513 bool b
= base32_decode_ctx (&ctx
->ctx
.base32
, ctx
->inbuf
, inlen
,
515 ctx
->i
= ctx
->ctx
.base32
.i
;
519 /* With this approach this file works independent of the charset used
520 (think EBCDIC). However, it does assume that the characters in the
521 Base32 alphabet (A-Z2-7) are encoded in 0..255. POSIX
522 1003.1-2001 require that char and unsigned char are 8-bit
523 quantities, though, taking care of that problem. But this may be a
524 potential problem on non-POSIX C99 platforms.
526 IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
527 as the formal parameter rather than "x". */
539 : (_) == 'A' || (_) == 'a' ? 10 \
540 : (_) == 'B' || (_) == 'b' ? 11 \
541 : (_) == 'C' || (_) == 'c' ? 12 \
542 : (_) == 'D' || (_) == 'd' ? 13 \
543 : (_) == 'E' || (_) == 'e' ? 14 \
544 : (_) == 'F' || (_) == 'f' ? 15 \
547 static signed char const base16_to_int
[256] = {
548 B16 (0), B16 (1), B16 (2), B16 (3),
549 B16 (4), B16 (5), B16 (6), B16 (7),
550 B16 (8), B16 (9), B16 (10), B16 (11),
551 B16 (12), B16 (13), B16 (14), B16 (15),
552 B16 (16), B16 (17), B16 (18), B16 (19),
553 B16 (20), B16 (21), B16 (22), B16 (23),
554 B16 (24), B16 (25), B16 (26), B16 (27),
555 B16 (28), B16 (29), B16 (30), B16 (31),
556 B16 (32), B16 (33), B16 (34), B16 (35),
557 B16 (36), B16 (37), B16 (38), B16 (39),
558 B16 (40), B16 (41), B16 (42), B16 (43),
559 B16 (44), B16 (45), B16 (46), B16 (47),
560 B16 (48), B16 (49), B16 (50), B16 (51),
561 B16 (52), B16 (53), B16 (54), B16 (55),
562 B16 (56), B16 (57), B16 (58), B16 (59),
563 B16 (60), B16 (61), B16 (62), B16 (63),
564 B16 (32), B16 (65), B16 (66), B16 (67),
565 B16 (68), B16 (69), B16 (70), B16 (71),
566 B16 (72), B16 (73), B16 (74), B16 (75),
567 B16 (76), B16 (77), B16 (78), B16 (79),
568 B16 (80), B16 (81), B16 (82), B16 (83),
569 B16 (84), B16 (85), B16 (86), B16 (87),
570 B16 (88), B16 (89), B16 (90), B16 (91),
571 B16 (92), B16 (93), B16 (94), B16 (95),
572 B16 (96), B16 (97), B16 (98), B16 (99),
573 B16 (100), B16 (101), B16 (102), B16 (103),
574 B16 (104), B16 (105), B16 (106), B16 (107),
575 B16 (108), B16 (109), B16 (110), B16 (111),
576 B16 (112), B16 (113), B16 (114), B16 (115),
577 B16 (116), B16 (117), B16 (118), B16 (119),
578 B16 (120), B16 (121), B16 (122), B16 (123),
579 B16 (124), B16 (125), B16 (126), B16 (127),
580 B16 (128), B16 (129), B16 (130), B16 (131),
581 B16 (132), B16 (133), B16 (134), B16 (135),
582 B16 (136), B16 (137), B16 (138), B16 (139),
583 B16 (140), B16 (141), B16 (142), B16 (143),
584 B16 (144), B16 (145), B16 (146), B16 (147),
585 B16 (148), B16 (149), B16 (150), B16 (151),
586 B16 (152), B16 (153), B16 (154), B16 (155),
587 B16 (156), B16 (157), B16 (158), B16 (159),
588 B16 (160), B16 (161), B16 (162), B16 (163),
589 B16 (132), B16 (165), B16 (166), B16 (167),
590 B16 (168), B16 (169), B16 (170), B16 (171),
591 B16 (172), B16 (173), B16 (174), B16 (175),
592 B16 (176), B16 (177), B16 (178), B16 (179),
593 B16 (180), B16 (181), B16 (182), B16 (183),
594 B16 (184), B16 (185), B16 (186), B16 (187),
595 B16 (188), B16 (189), B16 (190), B16 (191),
596 B16 (192), B16 (193), B16 (194), B16 (195),
597 B16 (196), B16 (197), B16 (198), B16 (199),
598 B16 (200), B16 (201), B16 (202), B16 (203),
599 B16 (204), B16 (205), B16 (206), B16 (207),
600 B16 (208), B16 (209), B16 (210), B16 (211),
601 B16 (212), B16 (213), B16 (214), B16 (215),
602 B16 (216), B16 (217), B16 (218), B16 (219),
603 B16 (220), B16 (221), B16 (222), B16 (223),
604 B16 (224), B16 (225), B16 (226), B16 (227),
605 B16 (228), B16 (229), B16 (230), B16 (231),
606 B16 (232), B16 (233), B16 (234), B16 (235),
607 B16 (236), B16 (237), B16 (238), B16 (239),
608 B16 (240), B16 (241), B16 (242), B16 (243),
609 B16 (244), B16 (245), B16 (246), B16 (247),
610 B16 (248), B16 (249), B16 (250), B16 (251),
611 B16 (252), B16 (253), B16 (254), B16 (255)
615 isubase16 (unsigned char ch
)
617 return ch
< sizeof base16_to_int
&& 0 <= base16_to_int
[ch
];
621 base16_length (int len
)
628 base16_encode (char const *restrict in
, idx_t inlen
,
629 char *restrict out
, idx_t outlen
)
631 static const char base16
[16] = "0123456789ABCDEF";
635 unsigned char c
= *in
;
636 *out
++ = base16
[c
>> 4];
637 *out
++ = base16
[c
& 0x0F];
644 base16_decode_ctx_init (struct base_decode_context
*ctx
)
647 ctx
->ctx
.base16
.nibble
= -1;
653 base16_decode_ctx (struct base_decode_context
*ctx
,
654 char const *restrict in
, idx_t inlen
,
655 char *restrict out
, idx_t
*outlen
)
657 bool ignore_lines
= true; /* for now, always ignore them */
659 signed char nibble
= ctx
->ctx
.base16
.nibble
;
661 /* inlen==0 is request to flush output.
662 if there is a dangling high nibble - we are missing the low nibble,
663 so return false - indicating an invalid input. */
672 unsigned char c
= *in
++;
673 if (ignore_lines
&& c
== '\n')
676 if (sizeof base16_to_int
<= c
|| base16_to_int
[c
] < 0)
678 *outlen
= out
- out0
;
679 return false; /* garbage - return false */
683 nibble
= base16_to_int
[c
];
686 /* have both nibbles, write octet */
687 *out
++ = (nibble
<< 4) + base16_to_int
[c
];
692 ctx
->ctx
.base16
.nibble
= nibble
;
693 *outlen
= out
- out0
;
703 /* Z85 does not allow padding, so no need to round to highest integer. */
704 int outlen
= (len
* 5) / 4;
709 isuz85 (unsigned char ch
)
711 return c_isalnum (ch
) || strchr (".-:+=^!/*?&<>()[]{}@%$#", ch
) != nullptr;
714 static char const z85_encoding
[85] =
716 "abcdefghijklmnopqrstuvwxyz"
717 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
718 ".-:+=^!/*?&<>()[]{}@%$#";
721 z85_encode (char const *restrict in
, idx_t inlen
,
722 char *restrict out
, idx_t outlen
)
725 unsigned char quad
[4];
732 /* no more input, exactly on 4 octet boundary. */
736 /* currently, there's no way to return an error in encoding. */
737 error (EXIT_FAILURE
, 0,
738 _("invalid input (length must be multiple of 4 characters)"));
746 /* Got a quad, encode it */
749 int_fast64_t val
= quad
[0];
750 val
= (val
<< 24) + (quad
[1] << 16) + (quad
[2] << 8) + quad
[3];
752 for (int j
= 4; j
>= 0; --j
)
757 /* NOTE: if there is padding (which is trimmed by z85
758 before outputting the result), the output buffer 'out'
759 might not include enough allocated bytes for the padding,
760 so don't store them. */
761 if (outidx
+ j
< outlen
)
762 out
[j
] = z85_encoding
[c
];
772 z85_decode_ctx_init (struct base_decode_context
*ctx
)
780 # define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
781 (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \
782 ((ctx)->ctx.z85.octets[2] * 85 * 85) + \
783 ((ctx)->ctx.z85.octets[3] * 85) + \
784 ((ctx)->ctx.z85.octets[4]))
787 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
788 ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
791 0 - 9: 0 1 2 3 4 5 6 7 8 9
792 10 - 19: a b c d e f g h i j
793 20 - 29: k l m n o p q r s t
794 30 - 39: u v w x y z A B C D
795 40 - 49: E F G H I J K L M N
796 50 - 59: O P Q R S T U V W X
797 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check
798 70 - 79: * ? & < > ( ) [ ] {
801 static signed char const z85_decoding
[93] = {
802 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
803 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
804 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
805 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
806 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
807 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
808 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
809 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
810 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
811 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
812 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
813 79, -1, 80 /* { | } */
817 z85_decode_ctx (struct base_decode_context
*ctx
,
818 char const *restrict in
, idx_t inlen
,
819 char *restrict out
, idx_t
*outlen
)
821 bool ignore_lines
= true; /* for now, always ignore them */
825 /* inlen==0 is request to flush output.
826 if there are dangling values - we are missing entries,
827 so return false - indicating an invalid input. */
830 if (ctx
->ctx
.z85
.i
> 0)
832 /* Z85 variant does not allow padding - input must
833 be a multiple of 5 - so return error. */
841 if (ignore_lines
&& *in
== '\n')
848 unsigned char c
= *in
;
850 if (c
>= 33 && c
<= 125)
852 signed char ch
= z85_decoding
[c
- 33];
854 return false; /* garbage - return false */
858 return false; /* garbage - return false */
862 ctx
->ctx
.z85
.octets
[ctx
->ctx
.z85
.i
++] = c
;
863 if (ctx
->ctx
.z85
.i
== 5)
865 /* decode the lowest 4 octets, then check for overflows. */
866 int_fast64_t val
= Z85_LO_CTX_TO_32BIT_VAL (ctx
);
868 /* The Z85 spec and the reference implementation say nothing
869 about overflows. To be on the safe side, reject them. */
871 val
+= Z85_HI_CTX_TO_32BIT_VAL (ctx
);
872 if ((val
>> 24) & ~0xFF)
876 *out
++ = (val
>> 16) & 0xFF;
877 *out
++ = (val
>> 8) & 0xFF;
885 ctx
->i
= ctx
->ctx
.z85
.i
;
891 isubase2 (unsigned char ch
)
893 return ch
== '0' || ch
== '1';
897 base2_length (int len
)
904 base2msbf_encode (char const *restrict in
, idx_t inlen
,
905 char *restrict out
, idx_t outlen
)
909 unsigned char c
= *in
;
910 for (int i
= 0; i
< 8; i
++)
912 *out
++ = c
& 0x80 ? '1' : '0';
921 base2lsbf_encode (char const *restrict in
, idx_t inlen
,
922 char *restrict out
, idx_t outlen
)
926 unsigned char c
= *in
;
927 for (int i
= 0; i
< 8; i
++)
929 *out
++ = c
& 0x01 ? '1' : '0';
939 base2_decode_ctx_init (struct base_decode_context
*ctx
)
942 ctx
->ctx
.base2
.octet
= 0;
948 base2lsbf_decode_ctx (struct base_decode_context
*ctx
,
949 char const *restrict in
, idx_t inlen
,
950 char *restrict out
, idx_t
*outlen
)
952 bool ignore_lines
= true; /* for now, always ignore them */
956 /* inlen==0 is request to flush output.
957 if there is a dangling bit - we are missing some bits,
958 so return false - indicating an invalid input. */
964 if (ignore_lines
&& *in
== '\n')
973 bool bit
= (*in
== '1');
974 ctx
->ctx
.base2
.octet
|= bit
<< ctx
->i
;
979 *out
++ = ctx
->ctx
.base2
.octet
;
980 ctx
->ctx
.base2
.octet
= 0;
992 base2msbf_decode_ctx (struct base_decode_context
*ctx
,
993 char const *restrict in
, idx_t inlen
,
994 char *restrict out
, idx_t
*outlen
)
996 bool ignore_lines
= true; /* for now, always ignore them */
1000 /* inlen==0 is request to flush output.
1001 if there is a dangling bit - we are missing some bits,
1002 so return false - indicating an invalid input. */
1008 if (ignore_lines
&& *in
== '\n')
1014 if (!isubase2 (*in
))
1017 bool bit
= (*in
== '1');
1021 ctx
->ctx
.base2
.octet
|= bit
<< ctx
->i
;
1025 *out
++ = ctx
->ctx
.base2
.octet
;
1026 ctx
->ctx
.base2
.octet
= 0;
1037 #endif /* BASE_TYPE == 42, i.e., "basenc"*/
1042 wrap_write (char const *buffer
, idx_t len
,
1043 idx_t wrap_column
, idx_t
*current_column
, FILE *out
)
1045 if (wrap_column
== 0)
1048 if (fwrite (buffer
, 1, len
, stdout
) < len
)
1052 for (idx_t written
= 0; written
< len
; )
1054 idx_t to_write
= MIN (wrap_column
- *current_column
, len
- written
);
1058 if (fputc ('\n', out
) == EOF
)
1060 *current_column
= 0;
1064 if (fwrite (buffer
+ written
, 1, to_write
, stdout
) < to_write
)
1066 *current_column
+= to_write
;
1067 written
+= to_write
;
1072 static _Noreturn
void
1073 finish_and_exit (FILE *in
, char const *infile
)
1075 if (fclose (in
) != 0)
1077 if (STREQ (infile
, "-"))
1078 error (EXIT_FAILURE
, errno
, _("closing standard input"));
1080 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
1083 exit (EXIT_SUCCESS
);
1086 static _Noreturn
void
1087 do_encode (FILE *in
, char const *infile
, FILE *out
, idx_t wrap_column
)
1089 idx_t current_column
= 0;
1090 char *inbuf
, *outbuf
;
1093 inbuf
= xmalloc (ENC_BLOCKSIZE
);
1094 outbuf
= xmalloc (BASE_LENGTH (ENC_BLOCKSIZE
));
1103 n
= fread (inbuf
+ sum
, 1, ENC_BLOCKSIZE
- sum
, in
);
1106 while (!feof (in
) && !ferror (in
) && sum
< ENC_BLOCKSIZE
);
1110 /* Process input one block at a time. Note that ENC_BLOCKSIZE
1111 is sized so that no pad chars will appear in output. */
1112 base_encode (inbuf
, sum
, outbuf
, BASE_LENGTH (sum
));
1114 wrap_write (outbuf
, BASE_LENGTH (sum
), wrap_column
,
1115 ¤t_column
, out
);
1118 while (!feof (in
) && !ferror (in
) && sum
== ENC_BLOCKSIZE
);
1120 /* When wrapping, terminate last line. */
1121 if (wrap_column
&& current_column
> 0 && fputc ('\n', out
) == EOF
)
1125 error (EXIT_FAILURE
, errno
, _("read error"));
1127 finish_and_exit (in
, infile
);
1130 static _Noreturn
void
1131 do_decode (FILE *in
, char const *infile
, FILE *out
, bool ignore_garbage
)
1133 char *inbuf
, *outbuf
;
1135 struct base_decode_context ctx
;
1137 char padbuf
[8] = "========";
1138 inbuf
= xmalloc (BASE_LENGTH (DEC_BLOCKSIZE
));
1139 outbuf
= xmalloc (DEC_BLOCKSIZE
);
1142 ctx
.inbuf
= nullptr;
1144 base_decode_ctx_init (&ctx
);
1153 idx_t n
= fread (inbuf
+ sum
,
1154 1, BASE_LENGTH (DEC_BLOCKSIZE
) - sum
, in
);
1158 for (idx_t i
= 0; n
> 0 && i
< n
;)
1160 if (isubase (inbuf
[sum
+ i
]) || inbuf
[sum
+ i
] == '=')
1163 memmove (inbuf
+ sum
+ i
, inbuf
+ sum
+ i
+ 1, --n
- i
);
1170 error (EXIT_FAILURE
, errno
, _("read error"));
1172 while (sum
< BASE_LENGTH (DEC_BLOCKSIZE
) && !feof (in
));
1174 /* The following "loop" is usually iterated just once.
1175 However, when it processes the final input buffer, we want
1176 to iterate it one additional time, but with an indicator
1177 telling it to flush what is in CTX. */
1178 for (int k
= 0; k
< 1 + !!feof (in
); k
++)
1185 /* auto pad input (at eof). */
1186 idx_t auto_padding
= REQUIRED_PADDING (ctx
.i
);
1187 if (auto_padding
&& (sum
== 0 || inbuf
[sum
- 1] != '='))
1189 affirm (auto_padding
<= sizeof (padbuf
));
1190 IF_LINT (free (inbuf
));
1195 sum
= 0; /* process ctx buffer only */
1197 idx_t n
= DEC_BLOCKSIZE
;
1198 ok
= base_decode_ctx (&ctx
, inbuf
, sum
, outbuf
, &n
);
1200 if (fwrite (outbuf
, 1, n
, out
) < n
)
1204 error (EXIT_FAILURE
, 0, _("invalid input"));
1209 finish_and_exit (in
, infile
);
1213 main (int argc
, char **argv
)
1219 /* True if --decode has been given and we should decode data. */
1220 bool decode
= false;
1221 /* True if we should ignore non-base-alphabetic characters. */
1222 bool ignore_garbage
= false;
1223 /* Wrap encoded data around the 76th column, by default. */
1224 idx_t wrap_column
= 76;
1230 initialize_main (&argc
, &argv
);
1231 set_program_name (argv
[0]);
1232 setlocale (LC_ALL
, "");
1233 bindtextdomain (PACKAGE
, LOCALEDIR
);
1234 textdomain (PACKAGE
);
1236 atexit (close_stdout
);
1238 while ((opt
= getopt_long (argc
, argv
, "diw:", long_options
, nullptr)) != -1)
1248 strtol_error s_err
= xstrtoimax (optarg
, nullptr, 10, &w
, "");
1249 if (LONGINT_OVERFLOW
< s_err
|| w
< 0)
1250 error (EXIT_FAILURE
, 0, "%s: %s",
1251 _("invalid wrap size"), quote (optarg
));
1252 wrap_column
= s_err
== LONGINT_OVERFLOW
|| IDX_MAX
< w
? 0 : w
;
1257 ignore_garbage
= true;
1262 case BASE64URL_OPTION
:
1264 case BASE32HEX_OPTION
:
1266 case BASE2MSBF_OPTION
:
1267 case BASE2LSBF_OPTION
:
1273 case_GETOPT_HELP_CHAR
;
1275 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1278 usage (EXIT_FAILURE
);
1286 base_length
= base64_length_wrapper
;
1287 required_padding
= base64_required_padding
;
1288 isubase
= isubase64
;
1289 base_encode
= base64_encode
;
1290 base_decode_ctx_init
= base64_decode_ctx_init_wrapper
;
1291 base_decode_ctx
= base64_decode_ctx_wrapper
;
1294 case BASE64URL_OPTION
:
1295 base_length
= base64_length_wrapper
;
1296 required_padding
= base64_required_padding
;
1297 isubase
= isubase64url
;
1298 base_encode
= base64url_encode
;
1299 base_decode_ctx_init
= base64url_decode_ctx_init_wrapper
;
1300 base_decode_ctx
= base64url_decode_ctx_wrapper
;
1304 base_length
= base32_length_wrapper
;
1305 required_padding
= base32_required_padding
;
1306 isubase
= isubase32
;
1307 base_encode
= base32_encode
;
1308 base_decode_ctx_init
= base32_decode_ctx_init_wrapper
;
1309 base_decode_ctx
= base32_decode_ctx_wrapper
;
1312 case BASE32HEX_OPTION
:
1313 base_length
= base32_length_wrapper
;
1314 required_padding
= base32_required_padding
;
1315 isubase
= isubase32hex
;
1316 base_encode
= base32hex_encode
;
1317 base_decode_ctx_init
= base32hex_decode_ctx_init_wrapper
;
1318 base_decode_ctx
= base32hex_decode_ctx_wrapper
;
1322 base_length
= base16_length
;
1323 required_padding
= no_required_padding
;
1324 isubase
= isubase16
;
1325 base_encode
= base16_encode
;
1326 base_decode_ctx_init
= base16_decode_ctx_init
;
1327 base_decode_ctx
= base16_decode_ctx
;
1330 case BASE2MSBF_OPTION
:
1331 base_length
= base2_length
;
1332 required_padding
= no_required_padding
;
1334 base_encode
= base2msbf_encode
;
1335 base_decode_ctx_init
= base2_decode_ctx_init
;
1336 base_decode_ctx
= base2msbf_decode_ctx
;
1339 case BASE2LSBF_OPTION
:
1340 base_length
= base2_length
;
1341 required_padding
= no_required_padding
;
1343 base_encode
= base2lsbf_encode
;
1344 base_decode_ctx_init
= base2_decode_ctx_init
;
1345 base_decode_ctx
= base2lsbf_decode_ctx
;
1349 base_length
= z85_length
;
1350 required_padding
= no_required_padding
;
1352 base_encode
= z85_encode
;
1353 base_decode_ctx_init
= z85_decode_ctx_init
;
1354 base_decode_ctx
= z85_decode_ctx
;
1358 error (0, 0, _("missing encoding type"));
1359 usage (EXIT_FAILURE
);
1363 if (argc
- optind
> 1)
1365 error (0, 0, _("extra operand %s"), quote (argv
[optind
+ 1]));
1366 usage (EXIT_FAILURE
);
1370 infile
= argv
[optind
];
1374 if (STREQ (infile
, "-"))
1376 xset_binary_mode (STDIN_FILENO
, O_BINARY
);
1381 input_fh
= fopen (infile
, "rb");
1382 if (input_fh
== nullptr)
1383 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
1386 fadvise (input_fh
, FADVISE_SEQUENTIAL
);
1389 do_decode (input_fh
, infile
, stdout
, ignore_garbage
);
1391 do_encode (input_fh
, infile
, stdout
, wrap_column
);