mime_enc.c

   1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
   2  *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
   3  *@ - Quoted-Printable, section 6.7
   4  *@ - Base64, section 6.8
   5  *
   6  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
   7  * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
   8  */
   9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
  10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
  11 /*
  12  * Copyright (c) 2006 The NetBSD Foundation, Inc.
  13  * All rights reserved.
  14  *
  15  * This code is derived from software contributed to The NetBSD Foundation
  16  * by Anon Ymous.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions
  20  * are met:
  21  * 1. Redistributions of source code must retain the above copyright
  22  *    notice, this list of conditions and the following disclaimer.
  23  * 2. Redistributions in binary form must reproduce the above copyright
  24  *    notice, this list of conditions and the following disclaimer in the
  25  *    documentation and/or other materials provided with the distribution.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37  * POSSIBILITY OF SUCH DAMAGE.
  38  */
  39 #undef n_FILE
  40 #define n_FILE mime_enc
  41
  42 #ifndef HAVE_AMALGAMATION
  43 # include "nail.h"
  44 #endif
  45
  46 enum _qact {
  47     N =   0,   /* Do not quote */
  48     Q =   1,   /* Must quote */
  49    SP =   2,   /* sp */
  50    XF =   3,   /* Special character 'F' - maybe quoted */
  51    XD =   4,   /* Special character '.' - maybe quoted */
  52    UU =   5,   /* In header, _ must be quoted in encoded word */
  53    US = '_',   /* In header, ' ' must be quoted as _ in encoded word */
  54    QM = '?',   /* In header, special character ? not always quoted */
  55    EQ = '=',   /* In header, '=' must be quoted in encoded word */
  56    HT ='\t',   /* In body HT=SP, in head HT=HT, but quote in encoded word */
  57    NL =   N,   /* Don't quote '\n' (NL) */
  58    CR =   Q    /* Always quote a '\r' (CR) */
  59 };
  60
  61 /* Lookup tables to decide whether a character must be encoded or not.
  62  * Email header differences according to RFC 2047, section 4.2:
  63  * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
  64  * - don't care about the special ^F[rom] and ^.$ */
  65 static ui8_t const         _qtab_body[] = {
  66     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,SP,NL, Q,  Q,CR, Q, Q,
  67     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  68    SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
  69     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, Q, N, N,
  70
  71     N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
  72     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  73     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  74     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  75 },
  76                            _qtab_head[] = {
  77     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,HT, Q, Q,  Q, Q, Q, Q,
  78     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  79    US, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  80     N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N,QM,
  81
  82     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  83     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N,UU,
  84     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  85     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  86 };
  87
  88 /* The decoding table is only accessed via _B64_DECUI8() */
  89 static char const          _b64_enctbl[] =
  90     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  91 static signed char const   _b64__dectbl[] = {
  92    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  93    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  94    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
  95    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
  96    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
  97    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
  98    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
  99    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 100 };
 101 #define _B64_EQU           (ui32_t)-2
 102 #define _B64_BAD           (ui32_t)-1
 103 #define _B64_DECUI8(C)     \
 104    ((C) >= sizeof(_b64__dectbl) ? _B64_BAD : (ui32_t)_b64__dectbl[(ui8_t)(C)])
 105
 106 /* ASCII case-insensitive check whether Content-Transfer-Encoding: header body
 107  * hbody defined this encoding type */
 108 static bool_t        _is_ct_enc(char const *hbody, char const *encoding);
 109
 110 /* Check whether *s must be quoted according to flags, else body rules;
 111  * sol indicates whether we are at the first character of a line/field */
 112 SINLINE enum _qact   _mustquote(char const *s, char const *e, bool_t sol,
 113                         enum mime_enc_flags flags);
 114
 115 /* Trim WS and make work point to the decodable range of in*
 116  * Return the amount of bytes a b64_decode operation on that buffer requires */
 117 static size_t        _b64_decode_prepare(struct str *work,
 118                         struct str const *in);
 119
 120 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
 121  * Return number of useful bytes in out or -1 on error */
 122 static ssize_t       _b64_decode(struct str *out, struct str *in);
 123
 124 static bool_t
 125 _is_ct_enc(char const *hbody, char const *encoding)
 126 {
 127    bool_t quoted, rv;
 128    int c;
 129    NYD2_ENTER;
 130
 131    if (*hbody == '"')
 132       quoted = TRU1, ++hbody;
 133    else
 134       quoted = FAL0;
 135    rv = FAL0;
 136
 137    while (*hbody != '\0' && *encoding != '\0')
 138       if ((c = *hbody++, lowerconv(c) != *encoding++))
 139          goto jleave;
 140    rv = TRU1;
 141
 142    if (quoted && *hbody == '"')
 143       goto jleave;
 144    if (*hbody == '\0' || whitechar(*hbody))
 145       goto jleave;
 146    rv = FAL0;
 147 jleave:
 148    NYD2_LEAVE;
 149    return rv;
 150 }
 151
 152 SINLINE enum _qact
 153 _mustquote(char const *s, char const *e, bool_t sol, enum mime_enc_flags flags)
 154 {
 155    ui8_t const *qtab;
 156    enum _qact a, r;
 157    NYD2_ENTER;
 158
 159    qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
 160          ? _qtab_head : _qtab_body;
 161    a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s];
 162
 163    if ((r = a) == N || (r = a) == Q)
 164       goto jleave;
 165    r = Q;
 166
 167    /* Special header fields */
 168    if (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)) {
 169       /* Special massage for encoded words */
 170       if (flags & MIMEEF_ISENCWORD) {
 171          switch (a) {
 172          case HT:
 173          case US:
 174          case EQ:
 175             r = a;
 176             /* FALLTHRU */
 177          case UU:
 178             goto jleave;
 179          default:
 180             break;
 181          }
 182       }
 183
 184       /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
 185        * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
 186        * should be hard too match */
 187       if (a == QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
 188          goto jleave;
 189       goto jnquote;
 190    }
 191
 192    /* Body-only */
 193
 194    if (a == SP) {
 195       /* WS only if trailing white space */
 196       if (PTRCMP(s + 1, ==, e) || s[1] == '\n')
 197          goto jleave;
 198       goto jnquote;
 199    }
 200
 201    /* Rest are special begin-of-line cases */
 202    if (!sol)
 203       goto jnquote;
 204
 205    /* ^From */
 206    if (a == XF) {
 207       if (PTRCMP(s + 4, <, e) && s[1] == 'r' && s[2] == 'o' && s[3] == 'm')
 208          goto jleave;
 209       goto jnquote;
 210    }
 211    /* ^.$ */
 212    if (a == XD && (PTRCMP(s + 1, ==, e) || s[1] == '\n'))
 213       goto jleave;
 214 jnquote:
 215    r = N;
 216 jleave:
 217    NYD2_LEAVE;
 218    return r;
 219 }
 220
 221 static size_t
 222 _b64_decode_prepare(struct str *work, struct str const *in)
 223 {
 224    char *cp;
 225    size_t cp_len;
 226    NYD2_ENTER;
 227
 228    cp = in->s;
 229    cp_len = in->l;
 230
 231    while (cp_len > 0 && spacechar(*cp))
 232       ++cp, --cp_len;
 233    work->s = cp;
 234
 235    for (cp += cp_len; cp_len > 0; --cp_len) {
 236       char c = *--cp;
 237       if (!spacechar(c))
 238          break;
 239    }
 240    work->l = cp_len;
 241
 242    if (cp_len > 16)
 243       cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
 244    cp_len += 2*3 +1;
 245    NYD2_LEAVE;
 246    return cp_len;
 247 }
 248
 249 static ssize_t
 250 _b64_decode(struct str *out, struct str *in)
 251 {
 252    ssize_t rv = -1;
 253    ui8_t *p;
 254    ui8_t const *q, *end;
 255    NYD2_ENTER;
 256
 257    p = (ui8_t*)out->s + out->l;
 258    q = (ui8_t const*)in->s;
 259
 260    for (end = q + in->l; PTRCMP(q + 4, <=, end);) {
 261       ui32_t a = _B64_DECUI8(q[0]), b = _B64_DECUI8(q[1]),
 262          c = _B64_DECUI8(q[2]), d = _B64_DECUI8(q[3]);
 263       q += 4;
 264
 265       if (a >= _B64_EQU || b >= _B64_EQU || c == _B64_BAD || d == _B64_BAD)
 266          goto jleave;
 267
 268       *p++ = ((a << 2) | ((b & 0x30) >> 4));
 269       if (c == _B64_EQU)  { /* got '=' */
 270          if (d != _B64_EQU)
 271             goto jleave;
 272          break;
 273       }
 274       *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
 275       if (d == _B64_EQU) /* got '=' */
 276          break;
 277       *p++ = (((c & 0x03) << 6) | d);
 278    }
 279    rv ^= rv;
 280
 281 jleave: {
 282       size_t i = PTR2SIZE((char*)p - out->s);
 283       out->l = i;
 284       if (rv == 0)
 285          rv = (ssize_t)i;
 286    }
 287    in->l -= PTR2SIZE((char*)UNCONST(q) - in->s);
 288    in->s = UNCONST(q);
 289    NYD2_LEAVE;
 290    return rv;
 291 }
 292
 293 FL enum mime_enc
 294 mime_enc_target(void)
 295 {
 296    char const *cp;
 297    enum mime_enc rv;
 298    NYD2_ENTER;
 299
 300    if ((cp = ok_vlook(encoding)) == NULL)
 301       rv = MIME_DEFAULT_ENCODING;
 302    else if (!asccasecmp(cp, "quoted-printable"))
 303       rv = MIMEE_QP;
 304    else if (!asccasecmp(cp, "8bit"))
 305       rv = MIMEE_8B;
 306    else if (!asccasecmp(cp, "base64"))
 307       rv = MIMEE_B64;
 308    else {
 309       n_err(_("Warning: invalid *encoding*, using Base64: %s\n"), cp);
 310       rv = MIMEE_B64;
 311    }
 312    NYD2_LEAVE;
 313    return rv;
 314 }
 315
 316 FL enum mime_enc
 317 mime_enc_from_ctehead(char const *hbody)
 318 {
 319    enum mime_enc rv;
 320    NYD2_ENTER;
 321
 322    if (hbody == NULL || _is_ct_enc(hbody, "7bit"))
 323       rv = MIMEE_7B;
 324    else if (_is_ct_enc(hbody, "8bit"))
 325       rv = MIMEE_8B;
 326    else if (_is_ct_enc(hbody, "base64"))
 327       rv = MIMEE_B64;
 328    else if (_is_ct_enc(hbody, "binary"))
 329       rv = MIMEE_BIN;
 330    else if (_is_ct_enc(hbody, "quoted-printable"))
 331       rv = MIMEE_QP;
 332    else
 333       rv = MIMEE_NONE;
 334    NYD2_LEAVE;
 335    return rv;
 336 }
 337
 338 FL char const *
 339 mime_enc_from_conversion(enum conversion const convert) /* TODO booom */
 340 {
 341    char const *rv;
 342    NYD_ENTER;
 343
 344    switch (convert) {
 345    case CONV_7BIT:   rv = "7bit"; break;
 346    case CONV_8BIT:   rv = "8bit"; break;
 347    case CONV_TOQP:   rv = "quoted-printable"; break;
 348    case CONV_TOB64:  rv = "base64"; break;
 349    default:          rv = ""; break;
 350    }
 351    NYD_LEAVE;
 352    return rv;
 353 }
 354
 355 FL size_t
 356 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags)
 357 {
 358    size_t rv;
 359    bool_t sol;
 360    NYD_ENTER;
 361
 362    for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
 363       switch (_mustquote(ln, ln + lnlen, sol, flags)) {
 364       case US:
 365       case EQ:
 366       case HT:
 367          assert(flags & MIMEEF_ISENCWORD);
 368          /* FALLTHRU */
 369       case N:
 370          continue;
 371       default:
 372          ++rv;
 373       }
 374    NYD_LEAVE;
 375    return rv;
 376 }
 377
 378 FL size_t
 379 qp_encode_calc_size(size_t len)
 380 {
 381    size_t bytes, lines;
 382    NYD_ENTER;
 383
 384    /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
 385     * However, we must be aware that (a) the output may span multiple lines
 386     * and (b) the input does not end with a newline itself (nonetheless):
 387     *    LC_ALL=C awk 'BEGIN{
 388     *       for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
 389     *    }' |
 390     *    MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
 391     *       -Ssendcharsets=utf8 -s testsub ./LETTER */
 392    bytes = len * 3;
 393    lines = bytes / QP_LINESIZE;
 394    len += lines;
 395
 396    bytes = len * 3;
 397    /* Trailing hard NL may be missing, so there may be two lines.
 398     * Thus add soft + hard NL per line and a trailing NUL */
 399    lines = (bytes / QP_LINESIZE) + 1;
 400    lines <<= 1;
 401    bytes += lines;
 402    len = ++bytes;
 403
 404    NYD_LEAVE;
 405    return len;
 406 }
 407
 408 #ifdef notyet
 409 FL struct str *
 410 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags)
 411 {
 412    struct str in;
 413    NYD_ENTER;
 414
 415    in.s = UNCONST(cp);
 416    in.l = strlen(cp);
 417    out = qp_encode(out, &in, flags);
 418    NYD_LEAVE;
 419    return out;
 420 }
 421
 422 FL struct str *
 423 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
 424    enum qpflags flags)
 425 {
 426    struct str in;
 427    NYD_ENTER;
 428
 429    in.s = UNCONST(vp);
 430    in.l = vp_len;
 431    out = qp_encode(out, &in, flags);
 432    NYD_LEAVE;
 433    return out;
 434 }
 435 #endif /* notyet */
 436
 437 FL struct str *
 438 qp_encode(struct str *out, struct str const *in, enum qpflags flags)
 439 {
 440    bool_t sol = (flags & QP_ISHEAD ? FAL0 : TRU1), seenx;
 441    ssize_t lnlen;
 442    char *qp;
 443    char const *is, *ie;
 444    NYD_ENTER;
 445
 446    if (!(flags & QP_BUF)) {
 447       lnlen = qp_encode_calc_size(in->l);
 448       out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
 449    }
 450    qp = out->s;
 451    is = in->s;
 452    ie = is + in->l;
 453
 454    /* QP_ISHEAD? */
 455    if (!sol) {
 456       enum mime_enc_flags ef = MIMEEF_ISHEAD |
 457             (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
 458
 459       for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) {
 460          enum _qact mq = _mustquote(is, ie, sol, ef);
 461          char c = *is++;
 462
 463          if (mq == N) {
 464             /* We convert into a single *encoded-word*, that'll end up in
 465              * =?C?Q??=; quote '?' from when we're inside there on */
 466             if (seenx && c == '?')
 467                goto jheadq;
 468             *qp = c;
 469          } else if (mq == US)
 470             *qp = US;
 471          else {
 472             seenx = TRU1;
 473 jheadq:
 474             *qp++ = '=';
 475             qp = n_c_to_hex_base16(qp, c) + 1;
 476          }
 477       }
 478       goto jleave;
 479    }
 480
 481    /* The body needs to take care for soft line breaks etc. */
 482    for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) {
 483       enum _qact mq = _mustquote(is, ie, sol, MIMEEF_NONE);
 484       char c = *is++;
 485
 486       if (mq == N && (c != '\n' || !seenx)) {
 487          *qp++ = c;
 488          if (++lnlen < QP_LINESIZE - 1)
 489             continue;
 490          /* Don't write a soft line break when we're in the last possible
 491           * column and either an LF has been written or only an LF follows, as
 492           * that'll end the line anyway */
 493          /* XXX but - ensure is+1>=ie, then??
 494           * xxx and/or - what about resetting lnlen; that contra
 495           * xxx dicts input==1 input line assertion, though */
 496          if (c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
 497             continue;
 498 jsoftnl:
 499          qp[0] = '=';
 500          qp[1] = '\n';
 501          qp += 2;
 502          lnlen = 0;
 503          continue;
 504       }
 505
 506       if (lnlen > QP_LINESIZE - 3 - 1) {
 507          qp[0] = '=';
 508          qp[1] = '\n';
 509          qp += 2;
 510          lnlen = 0;
 511       }
 512       *qp++ = '=';
 513       qp = n_c_to_hex_base16(qp, c);
 514       qp += 2;
 515       lnlen += 3;
 516       if (c != '\n' || !seenx)
 517          seenx = (c == '\r');
 518       else {
 519          seenx = FAL0;
 520          goto jsoftnl;
 521       }
 522    }
 523
 524    /* Enforce soft line break if we haven't seen LF */
 525    if (in->l > 0 && *--is != '\n') {
 526       qp[0] = '=';
 527       qp[1] = '\n';
 528       qp += 2;
 529    }
 530 jleave:
 531    out->l = PTR2SIZE(qp - out->s);
 532    out->s[out->l] = '\0';
 533    NYD_LEAVE;
 534    return out;
 535 }
 536
 537 FL int
 538 qp_decode(struct str *out, struct str const *in, struct str *rest)
 539 {
 540    int rv = STOP;
 541    char *os, *oc;
 542    char const *is, *ie;
 543    NYD_ENTER;
 544
 545    if (rest != NULL && rest->l != 0) {
 546       os = out->s;
 547       *out = *rest;
 548       rest->s = os;
 549       rest->l = 0;
 550    }
 551
 552    oc = os =
 553    out->s = srealloc(out->s, out->l + in->l + 3);
 554    oc += out->l;
 555    is = in->s;
 556    ie = is + in->l;
 557
 558    /* Decoding encoded-word (RFC 2049) in a header field? */
 559    if (rest == NULL) {
 560       while (is < ie) {
 561          si32_t c = *is++;
 562          if (c == '=') {
 563             if (PTRCMP(is + 1, >=, ie)) {
 564                ++is;
 565                goto jehead;
 566             }
 567             c = n_c_from_hex_base16(is);
 568             is += 2;
 569             if (c >= 0)
 570                *oc++ = (char)c;
 571             else {
 572                /* Invalid according to RFC 2045, section 6.7. Almost follow */
 573 jehead:
 574                /* TODO 0xFFFD
 575                *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 576                *oc += 3; 0xFFFD TODO
 577                */ *oc++ = '?';
 578             }
 579          } else
 580             *oc++ = (c == '_' /* US */) ? ' ' : (char)c;
 581       }
 582       goto jleave; /* XXX QP decode, header: errors not reported */
 583    }
 584
 585    /* Decoding a complete message/mimepart body line */
 586    while (is < ie) {
 587       si32_t c = *is++;
 588       if (c != '=') {
 589          *oc++ = (char)c;
 590          continue;
 591       }
 592
 593       /* RFC 2045, 6.7:
 594        *   Therefore, when decoding a Quoted-Printable body, any
 595        *   trailing white space on a line must be deleted, as it will
 596        *   necessarily have been added by intermediate transport
 597        *   agents */
 598       for (; is < ie && blankchar(*is); ++is)
 599          ;
 600       if (PTRCMP(is + 1, >=, ie)) {
 601          /* Soft line break? */
 602          if (*is == '\n')
 603             goto jsoftnl;
 604          ++is;
 605          goto jebody;
 606       }
 607
 608       /* Not a soft line break? */
 609       if (*is != '\n') {
 610          c = n_c_from_hex_base16(is);
 611          is += 2;
 612          if (c >= 0)
 613             *oc++ = (char)c;
 614          else {
 615             /* Invalid according to RFC 2045, section 6.7.
 616              * Almost follow it and include the = and the follow char */
 617 jebody:
 618             /* TODO 0xFFFD
 619             *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 620             *oc += 3; 0xFFFD TODO
 621             */ *oc++ = '?';
 622          }
 623          continue;
 624       }
 625
 626       /* CRLF line endings are encoded as QP, followed by a soft line break, so
 627        * check for this special case, and simply forget we have seen one, so as
 628        * not to end up with the entire DOS file in a contiguous buffer */
 629 jsoftnl:
 630       if (oc > os && oc[-1] == '\n') {
 631 #if 0       /* TODO qp_decode() we do not normalize CRLF
 632           * TODO to LF because for that we would need
 633           * TODO to know if we are about to write to
 634           * TODO the display or do save the file!
 635           * TODO 'hope the MIME/send layer rewrite will
 636           * TODO offer the possibility to DTRT */
 637          if (oc - 1 > os && oc[-2] == '\r') {
 638             --oc;
 639             oc[-1] = '\n';
 640          }
 641 #endif
 642          break;
 643       }
 644       out->l = PTR2SIZE(oc - os);
 645       rest->s = srealloc(rest->s, rest->l + out->l);
 646       memcpy(rest->s + rest->l, out->s, out->l);
 647       rest->l += out->l;
 648       oc = os;
 649       break;
 650    }
 651    /* XXX RFC: QP decode should check no trailing WS on line */
 652 jleave:
 653    out->l = PTR2SIZE(oc - os);
 654    rv = OKAY;
 655    NYD_LEAVE;
 656    return rv;
 657 }
 658
 659 FL size_t
 660 b64_encode_calc_size(size_t len)
 661 {
 662    NYD_ENTER;
 663    len = (len * 4) / 3;
 664    len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
 665    len += 2 + 1; /* CRLF, \0 */
 666    NYD_LEAVE;
 667    return len;
 668 }
 669
 670 FL struct str *
 671 b64_encode(struct str *out, struct str const *in, enum b64flags flags)
 672 {
 673    ui8_t const *p;
 674    ssize_t i, lnlen;
 675    char *b64;
 676    NYD_ENTER;
 677
 678    assert(!(flags & B64_NOPAD) ||
 679       !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
 680
 681    p = (ui8_t const*)in->s;
 682
 683    if (!(flags & B64_BUF)) {
 684       i = b64_encode_calc_size(in->l);
 685       out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
 686    }
 687    b64 = out->s;
 688
 689    if (!(flags & (B64_CRLF | B64_LF)))
 690       flags &= ~B64_MULTILINE;
 691
 692    for (lnlen = 0, i = (ssize_t)in->l; i > 0; p += 3, i -= 3) {
 693       ui32_t a = p[0], b, c;
 694
 695       b64[0] = _b64_enctbl[a >> 2];
 696       switch (i) {
 697       case 1:
 698          b64[1] = _b64_enctbl[((a & 0x3) << 4)];
 699          b64[2] =
 700          b64[3] = '=';
 701          break;
 702       case 2:
 703          b = p[1];
 704          b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 705          b64[2] = _b64_enctbl[((b & 0x0F) << 2)];
 706          b64[3] = '=';
 707          break;
 708       default:
 709          b = p[1];
 710          c = p[2];
 711          b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 712          b64[2] = _b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
 713          b64[3] = _b64_enctbl[c & 0x3F];
 714          break;
 715       }
 716
 717       b64 += 4;
 718       if (!(flags & B64_MULTILINE))
 719          continue;
 720       lnlen += 4;
 721       if (lnlen < B64_LINESIZE)
 722          continue;
 723
 724       lnlen = 0;
 725       if (flags & B64_CRLF)
 726          *b64++ = '\r';
 727       if (flags & (B64_CRLF | B64_LF))
 728          *b64++ = '\n';
 729    }
 730
 731    if ((flags & (B64_CRLF | B64_LF)) &&
 732          (!(flags & B64_MULTILINE) || lnlen != 0)) {
 733       if (flags & B64_CRLF)
 734          *b64++ = '\r';
 735       if (flags & (B64_CRLF | B64_LF))
 736          *b64++ = '\n';
 737    } else if (flags & B64_NOPAD)
 738       while (b64 != out->s && b64[-1] == '=')
 739          --b64;
 740
 741    out->l = PTR2SIZE(b64 - out->s);
 742    out->s[out->l] = '\0';
 743
 744    /* Base64 includes + and /, replace them with _ and -.
 745     * This is base64url according to RFC 4648, then.  Since we only support
 746     * that for encoding and it is only used for boundary strings, this is
 747     * yet a primitive implementation; xxx use tables; support decoding */
 748    if (flags & B64_RFC4648URL) {
 749       char c;
 750
 751       for (b64 = out->s; (c = *b64) != '\0'; ++b64)
 752          if (c == '+')
 753             *b64 = '-';
 754          else if (c == '/')
 755                *b64 = '_';
 756    }
 757    NYD_LEAVE;
 758    return out;
 759 }
 760
 761 FL struct str *
 762 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
 763    enum b64flags flags)
 764 {
 765    struct str in;
 766    NYD_ENTER;
 767
 768    in.s = UNCONST(vp);
 769    in.l = vp_len;
 770    out = b64_encode(out, &in, flags);
 771    NYD_LEAVE;
 772    return out;
 773 }
 774
 775 #ifdef HAVE_SMTP
 776 FL struct str *
 777 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags)
 778 {
 779    struct str in;
 780    NYD_ENTER;
 781
 782    in.s = UNCONST(cp);
 783    in.l = strlen(cp);
 784    out = b64_encode(out, &in, flags);
 785    NYD_LEAVE;
 786    return out;
 787 }
 788 #endif
 789
 790 FL int
 791 b64_decode(struct str *out, struct str const *in, struct str *rest)
 792 {
 793    struct str work;
 794    char *x;
 795    size_t len;
 796    int rv; /* XXX -> bool_t */
 797    NYD_ENTER;
 798
 799    len = _b64_decode_prepare(&work, in);
 800    out->l = 0;
 801
 802    /* TODO B64_T is different since we must not fail for errors; in v15.0 this
 803     * TODO will be filter based and B64_T will have a different one than B64,
 804     * TODO for now special treat this all-horror */
 805    if (rest != NULL) {
 806       /* With B64_T there may be leftover decoded data for iconv(3), even if
 807        * that means it's incomplete multibyte character we have to copy over */
 808       /* TODO strictly speaking this should not be handled in here,
 809        * TODO since its leftover decoded data from an iconv(3);
 810        * TODO In v15.0 this path will be filter based, each filter having its
 811        * TODO own buffer for such purpose; for now we are BUSTED since for
 812        * TODO Base64 rest is owned by iconv(3) */
 813       if (rest->l > 0) {
 814          x = out->s;
 815          *out = *rest;
 816          rest->s = x; /* Just for ownership reasons (all TODO in here..) */
 817          rest->l = 0;
 818          len += out->l;
 819       }
 820
 821       out->s = srealloc(out->s, len +1);
 822
 823       for (;;) {
 824          if (_b64_decode(out, &work) >= 0) {
 825             if (work.l == 0)
 826                break;
 827          }
 828          x = out->s + out->l;
 829
 830          /* Partial/False last sequence.  TODO not solvable for non-EOF;
 831           * TODO yes, invalid, but seen in the wild and should be handled,
 832           * TODO but for that we had to have our v15.0 filter which doesn't
 833           * TODO work line based but content buffer based */
 834          if ((len = work.l) <= 4) {
 835             switch (len) {
 836             case 4:  /* FALLTHRU */
 837             case 3:  x[2] = '?'; /* FALLTHRU */
 838             case 2:  x[1] = '?'; /* FALLTHRU */
 839             default: x[0] = '?'; break;
 840             }
 841             out->l += len;
 842             break;
 843          }
 844
 845          /* TODO Bad content: this problem is not solvable!  I've seen
 846           * TODO messages which broke lines in the middle of a Base64
 847           * TODO tuple, followed by an invalid character ("!"), the follow
 848           * TODO line starting with whitespace and the remaining sequence.
 849           * TODO OpenSSL bailed, mutt(1) got it right (silently..).
 850           * TODO Since "rest" is not usable by us, we cannot continue
 851           * TODO sequences.  We will be able to do so with the v15.0 filter
 852           * TODO approach, if we */
 853          /* Bad content: skip over a single sequence */
 854          for (;;) {
 855             *x++ = '?';
 856             ++out->l;
 857             if (--work.l == 0)
 858                break;
 859             else {
 860                ui8_t bc = (ui8_t)*++work.s;
 861                ui32_t state = _B64_DECUI8(bc);
 862
 863                if (state != _B64_EQU && state != _B64_BAD)
 864                   break;
 865             }
 866          }
 867       }
 868       rv = OKAY;
 869       goto jleave;
 870    }
 871
 872    /* Ignore an empty input, as may happen for an empty final line */
 873    if (work.l == 0) {
 874       out->s = srealloc(out->s, 1);
 875       rv = OKAY;
 876    } else if (work.l >= 4 && !(work.l & 3)) {
 877       out->s = srealloc(out->s, len +1);
 878       if ((ssize_t)(len = _b64_decode(out, &work)) < 0)
 879          goto jerr;
 880       rv = OKAY;
 881    } else
 882       goto jerr;
 883
 884 jleave:
 885    out->s[out->l] = '\0';
 886    NYD_LEAVE;
 887    return rv;
 888
 889 jerr: {
 890    char const *err = _("[Invalid Base64 encoding]\n");
 891    out->l = len = strlen(err);
 892    out->s = srealloc(out->s, len +1);
 893    memcpy(out->s, err, len);
 894    rv = STOP;
 895    goto jleave;
 896    }
 897 }
 898
 899 /* s-it-mode */