rephist: Introduce a fraction and period for overload onionskin
[tor.git] / src / lib / encoding / binascii.c
blob1b87b4fc2bf38672baefa2572e426763bab6d38f
1 /* Copyright (c) 2001, Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
7 /**
8 * \file binascii.c
10 * \brief Miscellaneous functions for encoding and decoding various things
11 * in base{16,32,64}.
14 #include "orconfig.h"
16 #include "lib/encoding/binascii.h"
17 #include "lib/log/log.h"
18 #include "lib/log/util_bug.h"
19 #include "lib/cc/torint.h"
20 #include "lib/string/compat_ctype.h"
21 #include "lib/intmath/muldiv.h"
22 #include "lib/malloc/malloc.h"
24 #include <stddef.h>
25 #include <string.h>
26 #include <stdlib.h>
28 /** Return a pointer to a NUL-terminated hexadecimal string encoding
29 * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
30 * result does not need to be deallocated, but repeated calls to
31 * hex_str will trash old results.
33 const char *
34 hex_str(const char *from, size_t fromlen)
36 static char buf[65];
37 if (fromlen>(sizeof(buf)-1)/2)
38 fromlen = (sizeof(buf)-1)/2;
39 base16_encode(buf,sizeof(buf),from,fromlen);
40 return buf;
43 /* Return the base32 encoded size in bytes using the source length srclen.
45 * (WATCH OUT: This API counts the terminating NUL byte, but
46 * base64_encode_size does not.)
48 size_t
49 base32_encoded_size(size_t srclen)
51 size_t enclen;
52 tor_assert(srclen < SIZE_T_CEILING / 8);
53 enclen = BASE32_NOPAD_BUFSIZE(srclen);
54 tor_assert(enclen < INT_MAX && enclen > srclen);
55 return enclen;
58 /** Implements base32 encoding as in RFC 4648. */
59 void
60 base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
62 unsigned int i, v, u;
63 size_t nbits = srclen * 8;
64 size_t bit;
66 /* We need enough space for the encoded data and the extra NUL byte. */
67 tor_assert(base32_encoded_size(srclen) <= destlen);
68 tor_assert(destlen < SIZE_T_CEILING);
70 /* Make sure we leave no uninitialized data in the destination buffer. */
71 memset(dest, 0, destlen);
73 for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
74 /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
75 size_t idx = bit / 8;
76 v = ((uint8_t)src[idx]) << 8;
77 if (idx+1 < srclen)
78 v += (uint8_t)src[idx+1];
79 /* set u to the 5-bit value at the bit'th bit of buf. */
80 u = (v >> (11-(bit%8))) & 0x1F;
81 dest[i] = BASE32_CHARS[u];
83 dest[i] = '\0';
86 /** Implements base32 decoding as in RFC 4648.
87 * Return the number of bytes decoded if successful; -1 otherwise.
89 int
90 base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
92 /* XXXX we might want to rewrite this along the lines of base64_decode, if
93 * it ever shows up in the profile. */
94 unsigned int i;
95 size_t nbits, j, bit;
96 char *tmp;
97 nbits = ((srclen * 5) / 8) * 8;
99 tor_assert(srclen < SIZE_T_CEILING / 5);
100 tor_assert((nbits/8) <= destlen); /* We need enough space. */
101 tor_assert(destlen < SIZE_T_CEILING);
103 /* Make sure we leave no uninitialized data in the destination buffer. */
104 memset(dest, 0, destlen);
106 /* Convert base32 encoded chars to the 5-bit values that they represent. */
107 tmp = tor_malloc_zero(srclen);
108 for (j = 0; j < srclen; ++j) {
109 if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
110 else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
111 else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
112 else {
113 log_warn(LD_GENERAL, "illegal character in base32 encoded string");
114 tor_free(tmp);
115 return -1;
119 /* Assemble result byte-wise by applying five possible cases. */
120 for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
121 switch (bit % 40) {
122 case 0:
123 dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
124 (((uint8_t)tmp[(bit/5)+1]) >> 2);
125 break;
126 case 8:
127 dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
128 (((uint8_t)tmp[(bit/5)+1]) << 1) +
129 (((uint8_t)tmp[(bit/5)+2]) >> 4);
130 break;
131 case 16:
132 dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
133 (((uint8_t)tmp[(bit/5)+1]) >> 1);
134 break;
135 case 24:
136 dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
137 (((uint8_t)tmp[(bit/5)+1]) << 2) +
138 (((uint8_t)tmp[(bit/5)+2]) >> 3);
139 break;
140 case 32:
141 dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
142 ((uint8_t)tmp[(bit/5)+1]);
143 break;
147 memset(tmp, 0, srclen); /* on the heap, this should be safe */
148 tor_free(tmp);
149 tmp = NULL;
150 return i;
153 #define BASE64_OPENSSL_LINELEN 64
155 /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
156 * bytes.
158 * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
159 * but base32_encoded_size does.)
161 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
162 * of the encoded output as multiline output (64 character, `\n' terminated
163 * lines).
165 size_t
166 base64_encode_size(size_t srclen, int flags)
168 size_t enclen;
170 /* Use INT_MAX for overflow checking because base64_encode() returns int. */
171 tor_assert(srclen < INT_MAX);
172 tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
174 enclen = BASE64_LEN(srclen);
175 if (flags & BASE64_ENCODE_MULTILINE)
176 enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
178 tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
179 return enclen;
182 /** Return an upper bound on the number of bytes that might be needed to hold
183 * the data from decoding the base64 string <b>srclen</b>. This is only an
184 * upper bound, since some part of the base64 string might be padding or
185 * space. */
186 size_t
187 base64_decode_maxsize(size_t srclen)
189 tor_assert(srclen < INT_MAX / 3);
191 return CEIL_DIV(srclen * 3, 4);
194 /** Internal table mapping 6 bit values to the Base64 alphabet. */
195 static const char base64_encode_table[64] = {
196 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
197 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
198 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
199 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
200 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
201 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
202 'w', 'x', 'y', 'z', '0', '1', '2', '3',
203 '4', '5', '6', '7', '8', '9', '+', '/'
206 /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
207 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
208 * bytes. Return the number of bytes written on success; -1 if
209 * destlen is too short, or other failure.
211 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
212 * output in multiline format (64 character, `\n' terminated lines).
215 base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
216 int flags)
218 const unsigned char *usrc = (unsigned char *)src;
219 const unsigned char *eous = usrc + srclen;
220 char *d = dest;
221 uint32_t n = 0;
222 size_t linelen = 0;
223 size_t enclen;
224 int n_idx = 0;
226 if (!src || !dest)
227 return -1;
229 /* Ensure that there is sufficient space, including the NUL. */
230 enclen = base64_encode_size(srclen, flags);
231 if (destlen < enclen + 1)
232 return -1;
233 if (destlen > SIZE_T_CEILING)
234 return -1;
235 if (enclen > INT_MAX)
236 return -1;
238 /* Make sure we leave no uninitialized data in the destination buffer. */
239 memset(dest, 0, destlen);
241 /* XXX/Yawning: If this ends up being too slow, this can be sped up
242 * by separating the multiline format case and the normal case, and
243 * processing 48 bytes of input at a time when newlines are desired.
245 #define ENCODE_CHAR(ch) \
246 STMT_BEGIN \
247 *d++ = ch; \
248 if (flags & BASE64_ENCODE_MULTILINE) { \
249 if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
250 linelen = 0; \
251 *d++ = '\n'; \
254 STMT_END
256 #define ENCODE_N(idx) \
257 ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
259 #define ENCODE_PAD() ENCODE_CHAR('=')
261 /* Iterate over all the bytes in src. Each one will add 8 bits to the
262 * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
263 * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
265 for ( ; usrc < eous; ++usrc) {
266 n = (n << 8) | *usrc;
267 if ((++n_idx) == 3) {
268 ENCODE_N(0);
269 ENCODE_N(1);
270 ENCODE_N(2);
271 ENCODE_N(3);
272 n_idx = 0;
273 n = 0;
276 switch (n_idx) {
277 case 0:
278 /* 0 leftover bits, no padding to add. */
279 break;
280 case 1:
281 /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
282 * by 2 padding characters.
284 n <<= 4;
285 ENCODE_N(2);
286 ENCODE_N(3);
287 ENCODE_PAD();
288 ENCODE_PAD();
289 break;
290 case 2:
291 /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
292 * by 1 padding character.
294 n <<= 2;
295 ENCODE_N(1);
296 ENCODE_N(2);
297 ENCODE_N(3);
298 ENCODE_PAD();
299 break;
300 // LCOV_EXCL_START -- we can't reach this point, because we enforce
301 // 0 <= ncov_idx < 3 in the loop above.
302 default:
303 /* Something went catastrophically wrong. */
304 tor_fragile_assert();
305 return -1;
306 // LCOV_EXCL_STOP
309 #undef ENCODE_N
310 #undef ENCODE_PAD
311 #undef ENCODE_CHAR
313 /* Multiline output always includes at least one newline. */
314 if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
315 *d++ = '\n';
317 tor_assert(d - dest == (ptrdiff_t)enclen);
319 *d++ = '\0'; /* NUL terminate the output. */
321 return (int) enclen;
324 /** As base64_encode, but do not add any internal spaces, and remove external
325 * padding from the output stream.
326 * dest must be at least base64_encode_size(srclen, 0), including space for
327 * the removed external padding. */
329 base64_encode_nopad(char *dest, size_t destlen,
330 const uint8_t *src, size_t srclen)
332 int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
333 if (n <= 0)
334 return n;
335 tor_assert((size_t)n < destlen && dest[n] == 0);
336 char *in, *out;
337 in = out = dest;
338 while (*in) {
339 if (*in == '=' || *in == '\n') {
340 ++in;
341 } else {
342 *out++ = *in++;
345 *out = 0;
347 tor_assert(out - dest <= INT_MAX);
349 return (int)(out - dest);
352 #undef BASE64_OPENSSL_LINELEN
354 /** @{ */
355 /** Special values used for the base64_decode_table */
356 #define X 255
357 #define SP 64
358 #define PAD 65
359 /** @} */
360 /** Internal table mapping byte values to what they represent in base64.
361 * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
362 * skipped. Xs are invalid and must not appear in base64. PAD indicates
363 * end-of-string. */
364 static const uint8_t base64_decode_table[256] = {
365 X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
366 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
367 SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
368 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
369 X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
370 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
371 X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
372 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
373 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
374 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
375 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
376 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
377 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
378 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383 /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
384 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
385 * bytes. Return the number of bytes written on success; -1 if
386 * destlen is too short, or other failure.
388 * NOTE 1: destlen is checked conservatively, as though srclen contained no
389 * spaces or padding.
391 * NOTE 2: This implementation does not check for the correct number of
392 * padding "=" characters at the end of the string, and does not check
393 * for internal padding characters.
396 base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
398 const char *eos = src+srclen;
399 uint32_t n=0;
400 int n_idx=0;
401 size_t di = 0;
403 if (destlen > INT_MAX)
404 return -1;
406 /* Make sure we leave no uninitialized data in the destination buffer. */
407 memset(dest, 0, destlen);
409 /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
410 * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
411 * 24 bits, batch them into 3 bytes and flush those bytes to dest.
413 for ( ; src < eos; ++src) {
414 unsigned char c = (unsigned char) *src;
415 uint8_t v = base64_decode_table[c];
416 switch (v) {
417 case X:
418 /* This character isn't allowed in base64. */
419 return -1;
420 case SP:
421 /* This character is whitespace, and has no effect. */
422 continue;
423 case PAD:
424 /* We've hit an = character: the data is over. */
425 goto end_of_loop;
426 default:
427 /* We have an actual 6-bit value. Append it to the bits in n. */
428 n = (n<<6) | v;
429 if ((++n_idx) == 4) {
430 /* We've accumulated 24 bits in n. Flush them. */
431 if (destlen < 3 || di > destlen - 3)
432 return -1;
433 dest[di++] = (n>>16);
434 dest[di++] = (n>>8) & 0xff;
435 dest[di++] = (n) & 0xff;
436 n_idx = 0;
437 n = 0;
441 end_of_loop:
442 /* If we have leftover bits, we need to cope. */
443 switch (n_idx) {
444 case 0:
445 default:
446 /* No leftover bits. We win. */
447 break;
448 case 1:
449 /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
450 return -1;
451 case 2:
452 /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
453 if (destlen < 1 || di > destlen - 1)
454 return -1;
455 dest[di++] = n >> 4;
456 break;
457 case 3:
458 /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
459 if (destlen < 2 || di > destlen - 2)
460 return -1;
461 dest[di++] = n >> 10;
462 dest[di++] = n >> 2;
465 tor_assert(di <= destlen);
467 return (int)di;
469 #undef X
470 #undef SP
471 #undef PAD
473 /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
474 * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
475 * <b>dest</b>.
477 void
478 base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
480 const char *end;
481 char *cp;
483 tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
484 tor_assert(destlen >= BASE16_BUFSIZE(srclen));
485 tor_assert(destlen < SIZE_T_CEILING);
487 /* Make sure we leave no uninitialized data in the destination buffer. */
488 memset(dest, 0, destlen);
490 cp = dest;
491 end = src+srclen;
492 while (src<end) {
493 *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
494 *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
495 ++src;
497 *cp = '\0';
500 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
501 * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
502 * Return the number of bytes decoded on success, -1 on failure. If
503 * <b>destlen</b> is greater than INT_MAX or less than half of
504 * <b>srclen</b>, -1 is returned. */
506 base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
508 const char *end;
509 char *dest_orig = dest;
510 int v1,v2;
512 if ((srclen % 2) != 0)
513 return -1;
514 if (destlen < srclen/2 || destlen > INT_MAX)
515 return -1;
517 /* Make sure we leave no uninitialized data in the destination buffer. */
518 memset(dest, 0, destlen);
520 end = src+srclen;
521 while (src<end) {
522 v1 = hex_decode_digit(*src);
523 v2 = hex_decode_digit(*(src+1));
524 if (v1<0||v2<0)
525 return -1;
526 *(uint8_t*)dest = (v1<<4)|v2;
527 ++dest;
528 src+=2;
531 tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
533 return (int) (dest-dest_orig);