src/lib/encoding/binascii.c

   1 /* Copyright (c) 2001, Matej Pfajfar.
   2  * Copyright (c) 2001-2004, Roger Dingledine.
   3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
   4  * Copyright (c) 2007-2021, The Tor Project, Inc. */
   5 /* See LICENSE for licensing information */
   6
   7 /**
   8  * \file binascii.c
   9  *
  10  * \brief Miscellaneous functions for encoding and decoding various things
  11  *   in base{16,32,64}.
  12  */
  13
  14 #include "orconfig.h"
  15
  16 #include "lib/encoding/binascii.h"
  17 #include "lib/log/log.h"
  18 #include "lib/log/util_bug.h"
  19 #include "lib/cc/torint.h"
  20 #include "lib/string/compat_ctype.h"
  21 #include "lib/intmath/muldiv.h"
  22 #include "lib/malloc/malloc.h"
  23
  24 #include <stddef.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27
  28 /** Return a pointer to a NUL-terminated hexadecimal string encoding
  29  * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
  30  * result does not need to be deallocated, but repeated calls to
  31  * hex_str will trash old results.
  32  */
  33 const char *
  34 hex_str(const char *from, size_t fromlen)
  35 {
  36   static char buf[65];
  37   if (fromlen>(sizeof(buf)-1)/2)
  38     fromlen = (sizeof(buf)-1)/2;
  39   base16_encode(buf,sizeof(buf),from,fromlen);
  40   return buf;
  41 }
  42
  43 /* Return the base32 encoded size in bytes using the source length srclen.
  44  *
  45  * (WATCH OUT: This API counts the terminating NUL byte, but
  46  * base64_encode_size does not.)
  47  */
  48 size_t
  49 base32_encoded_size(size_t srclen)
  50 {
  51   size_t enclen;
  52   tor_assert(srclen < SIZE_T_CEILING / 8);
  53   enclen = BASE32_NOPAD_BUFSIZE(srclen);
  54   tor_assert(enclen < INT_MAX && enclen > srclen);
  55   return enclen;
  56 }
  57
  58 /** Implements base32 encoding as in RFC 4648. */
  59 void
  60 base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
  61 {
  62   unsigned int i, v, u;
  63   size_t nbits = srclen * 8;
  64   size_t bit;
  65
  66   /* We need enough space for the encoded data and the extra NUL byte. */
  67   tor_assert(base32_encoded_size(srclen) <= destlen);
  68   tor_assert(destlen < SIZE_T_CEILING);
  69
  70   /* Make sure we leave no uninitialized data in the destination buffer. */
  71   memset(dest, 0, destlen);
  72
  73   for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
  74     /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
  75     size_t idx = bit / 8;
  76     v = ((uint8_t)src[idx]) << 8;
  77     if (idx+1 < srclen)
  78       v += (uint8_t)src[idx+1];
  79     /* set u to the 5-bit value at the bit'th bit of buf. */
  80     u = (v >> (11-(bit%8))) & 0x1F;
  81     dest[i] = BASE32_CHARS[u];
  82   }
  83   dest[i] = '\0';
  84 }
  85
  86 /** Implements base32 decoding as in RFC 4648.
  87  * Return the number of bytes decoded if successful; -1 otherwise.
  88  */
  89 int
  90 base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
  91 {
  92   /* XXXX we might want to rewrite this along the lines of base64_decode, if
  93    * it ever shows up in the profile. */
  94   unsigned int i;
  95   size_t nbits, j, bit;
  96   char *tmp;
  97   nbits = ((srclen * 5) / 8) * 8;
  98
  99   tor_assert(srclen < SIZE_T_CEILING / 5);
 100   tor_assert((nbits/8) <= destlen); /* We need enough space. */
 101   tor_assert(destlen < SIZE_T_CEILING);
 102
 103   /* Make sure we leave no uninitialized data in the destination buffer. */
 104   memset(dest, 0, destlen);
 105
 106   /* Convert base32 encoded chars to the 5-bit values that they represent. */
 107   tmp = tor_malloc_zero(srclen);
 108   for (j = 0; j < srclen; ++j) {
 109     if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
 110     else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
 111     else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
 112     else {
 113       log_warn(LD_GENERAL, "illegal character in base32 encoded string");
 114       tor_free(tmp);
 115       return -1;
 116     }
 117   }
 118
 119   /* Assemble result byte-wise by applying five possible cases. */
 120   for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
 121     switch (bit % 40) {
 122     case 0:
 123       dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
 124                 (((uint8_t)tmp[(bit/5)+1]) >> 2);
 125       break;
 126     case 8:
 127       dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
 128                 (((uint8_t)tmp[(bit/5)+1]) << 1) +
 129                 (((uint8_t)tmp[(bit/5)+2]) >> 4);
 130       break;
 131     case 16:
 132       dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
 133                 (((uint8_t)tmp[(bit/5)+1]) >> 1);
 134       break;
 135     case 24:
 136       dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
 137                 (((uint8_t)tmp[(bit/5)+1]) << 2) +
 138                 (((uint8_t)tmp[(bit/5)+2]) >> 3);
 139       break;
 140     case 32:
 141       dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
 142                 ((uint8_t)tmp[(bit/5)+1]);
 143       break;
 144     }
 145   }
 146
 147   memset(tmp, 0, srclen); /* on the heap, this should be safe */
 148   tor_free(tmp);
 149   tmp = NULL;
 150   return i;
 151 }
 152
 153 #define BASE64_OPENSSL_LINELEN 64
 154
 155 /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
 156  * bytes.
 157  *
 158  * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
 159  * but base32_encoded_size does.)
 160  *
 161  * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
 162  * of the encoded output as multiline output (64 character, `\n' terminated
 163  * lines).
 164  */
 165 size_t
 166 base64_encode_size(size_t srclen, int flags)
 167 {
 168   size_t enclen;
 169
 170   /* Use INT_MAX for overflow checking because base64_encode() returns int. */
 171   tor_assert(srclen < INT_MAX);
 172   tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
 173
 174   enclen = BASE64_LEN(srclen);
 175   if (flags & BASE64_ENCODE_MULTILINE)
 176     enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
 177
 178   tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
 179   return enclen;
 180 }
 181
 182 /** Return an upper bound on the number of bytes that might be needed to hold
 183  * the data from decoding the base64 string <b>srclen</b>.  This is only an
 184  * upper bound, since some part of the base64 string might be padding or
 185  * space. */
 186 size_t
 187 base64_decode_maxsize(size_t srclen)
 188 {
 189   tor_assert(srclen < INT_MAX / 3);
 190
 191   return CEIL_DIV(srclen * 3, 4);
 192 }
 193
 194 /** Internal table mapping 6 bit values to the Base64 alphabet. */
 195 static const char base64_encode_table[64] = {
 196   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
 197   'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
 198   'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
 199   'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
 200   'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
 201   'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 202   'w', 'x', 'y', 'z', '0', '1', '2', '3',
 203   '4', '5', '6', '7', '8', '9', '+', '/'
 204 };
 205
 206 /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>.  Write
 207  * the result into <b>dest</b>, if it will fit within <b>destlen</b>
 208  * bytes. Return the number of bytes written on success; -1 if
 209  * destlen is too short, or other failure.
 210  *
 211  * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
 212  * output in multiline format (64 character, `\n' terminated lines).
 213  */
 214 int
 215 base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
 216               int flags)
 217 {
 218   const unsigned char *usrc = (unsigned char *)src;
 219   const unsigned char *eous = usrc + srclen;
 220   char *d = dest;
 221   uint32_t n = 0;
 222   size_t linelen = 0;
 223   size_t enclen;
 224   int n_idx = 0;
 225
 226   if (!src || !dest)
 227     return -1;
 228
 229   /* Ensure that there is sufficient space, including the NUL. */
 230   enclen = base64_encode_size(srclen, flags);
 231   if (destlen < enclen + 1)
 232     return -1;
 233   if (destlen > SIZE_T_CEILING)
 234     return -1;
 235   if (enclen > INT_MAX)
 236     return -1;
 237
 238   /* Make sure we leave no uninitialized data in the destination buffer. */
 239   memset(dest, 0, destlen);
 240
 241   /* XXX/Yawning: If this ends up being too slow, this can be sped up
 242    * by separating the multiline format case and the normal case, and
 243    * processing 48 bytes of input at a time when newlines are desired.
 244    */
 245 #define ENCODE_CHAR(ch) \
 246   STMT_BEGIN                                                    \
 247     *d++ = ch;                                                  \
 248     if (flags & BASE64_ENCODE_MULTILINE) {                      \
 249       if (++linelen % BASE64_OPENSSL_LINELEN == 0) {            \
 250         linelen = 0;                                            \
 251         *d++ = '\n';                                            \
 252       }                                                         \
 253     }                                                           \
 254   STMT_END
 255
 256 #define ENCODE_N(idx) \
 257   ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
 258
 259 #define ENCODE_PAD() ENCODE_CHAR('=')
 260
 261   /* Iterate over all the bytes in src.  Each one will add 8 bits to the
 262    * value we're encoding.  Accumulate bits in <b>n</b>, and whenever we
 263    * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
 264    */
 265   for ( ; usrc < eous; ++usrc) {
 266     n = (n << 8) | *usrc;
 267     if ((++n_idx) == 3) {
 268       ENCODE_N(0);
 269       ENCODE_N(1);
 270       ENCODE_N(2);
 271       ENCODE_N(3);
 272       n_idx = 0;
 273       n = 0;
 274     }
 275   }
 276   switch (n_idx) {
 277   case 0:
 278     /* 0 leftover bits, no padding to add. */
 279     break;
 280   case 1:
 281     /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
 282      * by 2 padding characters.
 283      */
 284     n <<= 4;
 285     ENCODE_N(2);
 286     ENCODE_N(3);
 287     ENCODE_PAD();
 288     ENCODE_PAD();
 289     break;
 290   case 2:
 291     /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
 292      * by 1 padding character.
 293      */
 294     n <<= 2;
 295     ENCODE_N(1);
 296     ENCODE_N(2);
 297     ENCODE_N(3);
 298     ENCODE_PAD();
 299     break;
 300   // LCOV_EXCL_START -- we can't reach this point, because we enforce
 301   // 0 <= ncov_idx < 3 in the loop above.
 302   default:
 303     /* Something went catastrophically wrong. */
 304     tor_fragile_assert();
 305     return -1;
 306   // LCOV_EXCL_STOP
 307   }
 308
 309 #undef ENCODE_N
 310 #undef ENCODE_PAD
 311 #undef ENCODE_CHAR
 312
 313   /* Multiline output always includes at least one newline. */
 314   if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
 315     *d++ = '\n';
 316
 317   tor_assert(d - dest == (ptrdiff_t)enclen);
 318
 319   *d++ = '\0'; /* NUL terminate the output. */
 320
 321   return (int) enclen;
 322 }
 323
 324 /** As base64_encode, but do not add any internal spaces, and remove external
 325  * padding from the output stream.
 326  * dest must be at least base64_encode_size(srclen, 0), including space for
 327  * the removed external padding. */
 328 int
 329 base64_encode_nopad(char *dest, size_t destlen,
 330                     const uint8_t *src, size_t srclen)
 331 {
 332   int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
 333   if (n <= 0)
 334     return n;
 335   tor_assert((size_t)n < destlen && dest[n] == 0);
 336   char *in, *out;
 337   in = out = dest;
 338   while (*in) {
 339     if (*in == '=' || *in == '\n') {
 340       ++in;
 341     } else {
 342       *out++ = *in++;
 343     }
 344   }
 345   *out = 0;
 346
 347   tor_assert(out - dest <= INT_MAX);
 348
 349   return (int)(out - dest);
 350 }
 351
 352 #undef BASE64_OPENSSL_LINELEN
 353
 354 /** @{ */
 355 /** Special values used for the base64_decode_table */
 356 #define X 255
 357 #define SP 64
 358 #define PAD 65
 359 /** @} */
 360 /** Internal table mapping byte values to what they represent in base64.
 361  * Numbers 0..63 are 6-bit integers.  SPs are spaces, and should be
 362  * skipped.  Xs are invalid and must not appear in base64. PAD indicates
 363  * end-of-string. */
 364 static const uint8_t base64_decode_table[256] = {
 365   X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
 366   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 367   SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
 368   52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
 369   X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
 370   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
 371   X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 372   41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
 373   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 374   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 375   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 376   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 377   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 378   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 379   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 380   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
 381 };
 382
 383 /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>.  Write
 384  * the result into <b>dest</b>, if it will fit within <b>destlen</b>
 385  * bytes.  Return the number of bytes written on success; -1 if
 386  * destlen is too short, or other failure.
 387  *
 388  * NOTE 1: destlen is checked conservatively, as though srclen contained no
 389  * spaces or padding.
 390  *
 391  * NOTE 2: This implementation does not check for the correct number of
 392  * padding "=" characters at the end of the string, and does not check
 393  * for internal padding characters.
 394  */
 395 int
 396 base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
 397 {
 398   const char *eos = src+srclen;
 399   uint32_t n=0;
 400   int n_idx=0;
 401   size_t di = 0;
 402
 403   if (destlen > INT_MAX)
 404     return -1;
 405
 406   /* Make sure we leave no uninitialized data in the destination buffer. */
 407   memset(dest, 0, destlen);
 408
 409   /* Iterate over all the bytes in src.  Each one will add 0 or 6 bits to the
 410    * value we're decoding.  Accumulate bits in <b>n</b>, and whenever we have
 411    * 24 bits, batch them into 3 bytes and flush those bytes to dest.
 412    */
 413   for ( ; src < eos; ++src) {
 414     unsigned char c = (unsigned char) *src;
 415     uint8_t v = base64_decode_table[c];
 416     switch (v) {
 417       case X:
 418         /* This character isn't allowed in base64. */
 419         return -1;
 420       case SP:
 421         /* This character is whitespace, and has no effect. */
 422         continue;
 423       case PAD:
 424         /* We've hit an = character: the data is over. */
 425         goto end_of_loop;
 426       default:
 427         /* We have an actual 6-bit value.  Append it to the bits in n. */
 428         n = (n<<6) | v;
 429         if ((++n_idx) == 4) {
 430           /* We've accumulated 24 bits in n. Flush them. */
 431           if (destlen < 3 || di > destlen - 3)
 432             return -1;
 433           dest[di++] = (n>>16);
 434           dest[di++] = (n>>8) & 0xff;
 435           dest[di++] = (n) & 0xff;
 436           n_idx = 0;
 437           n = 0;
 438         }
 439     }
 440   }
 441  end_of_loop:
 442   /* If we have leftover bits, we need to cope. */
 443   switch (n_idx) {
 444     case 0:
 445     default:
 446       /* No leftover bits.  We win. */
 447       break;
 448     case 1:
 449       /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
 450       return -1;
 451     case 2:
 452       /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
 453       if (destlen < 1 || di > destlen - 1)
 454         return -1;
 455       dest[di++] = n >> 4;
 456       break;
 457     case 3:
 458       /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
 459       if (destlen < 2 || di > destlen - 2)
 460         return -1;
 461       dest[di++] = n >> 10;
 462       dest[di++] = n >> 2;
 463   }
 464
 465   tor_assert(di <= destlen);
 466
 467   return (int)di;
 468 }
 469 #undef X
 470 #undef SP
 471 #undef PAD
 472
 473 /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
 474  * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
 475  * <b>dest</b>.
 476  */
 477 void
 478 base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
 479 {
 480   const char *end;
 481   char *cp;
 482
 483   tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
 484   tor_assert(destlen >= BASE16_BUFSIZE(srclen));
 485   tor_assert(destlen < SIZE_T_CEILING);
 486
 487   /* Make sure we leave no uninitialized data in the destination buffer. */
 488   memset(dest, 0, destlen);
 489
 490   cp = dest;
 491   end = src+srclen;
 492   while (src<end) {
 493     *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
 494     *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
 495     ++src;
 496   }
 497   *cp = '\0';
 498 }
 499
 500 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
 501  * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
 502  * Return the number of bytes decoded on success, -1 on failure. If
 503  * <b>destlen</b> is greater than INT_MAX or less than half of
 504  * <b>srclen</b>, -1 is returned. */
 505 int
 506 base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
 507 {
 508   const char *end;
 509   char *dest_orig = dest;
 510   int v1,v2;
 511
 512   if ((srclen % 2) != 0)
 513     return -1;
 514   if (destlen < srclen/2 || destlen > INT_MAX)
 515     return -1;
 516
 517   /* Make sure we leave no uninitialized data in the destination buffer. */
 518   memset(dest, 0, destlen);
 519
 520   end = src+srclen;
 521   while (src<end) {
 522     v1 = hex_decode_digit(*src);
 523     v2 = hex_decode_digit(*(src+1));
 524     if (v1<0||v2<0)
 525       return -1;
 526     *(uint8_t*)dest = (v1<<4)|v2;
 527     ++dest;
 528     src+=2;
 529   }
 530
 531   tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
 532
 533   return (int) (dest-dest_orig);
 534 }