mime_enc.c

   1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
   2  *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
   3  *@ - Quoted-Printable, section 6.7
   4  *@ - Base64, section 6.8
   5  *
   6  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
   7  * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
   8  */
   9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
  10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
  11 /*
  12  * Copyright (c) 2006 The NetBSD Foundation, Inc.
  13  * All rights reserved.
  14  *
  15  * This code is derived from software contributed to The NetBSD Foundation
  16  * by Anon Ymous.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions
  20  * are met:
  21  * 1. Redistributions of source code must retain the above copyright
  22  *    notice, this list of conditions and the following disclaimer.
  23  * 2. Redistributions in binary form must reproduce the above copyright
  24  *    notice, this list of conditions and the following disclaimer in the
  25  *    documentation and/or other materials provided with the distribution.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37  * POSSIBILITY OF SUCH DAMAGE.
  38  */
  39 #undef n_FILE
  40 #define n_FILE mime_enc
  41
  42 #ifndef HAVE_AMALGAMATION
  43 # include "nail.h"
  44 #endif
  45
  46 enum _qact {
  47     N =   0,   /* Do not quote */
  48     Q =   1,   /* Must quote */
  49    SP =   2,   /* sp */
  50    XF =   3,   /* Special character 'F' - maybe quoted */
  51    XD =   4,   /* Special character '.' - maybe quoted */
  52    UU =   5,   /* In header, _ must be quoted in encoded word */
  53    US = '_',   /* In header, ' ' must be quoted as _ in encoded word */
  54    QM = '?',   /* In header, special character ? not always quoted */
  55    EQ = '=',   /* In header, '=' must be quoted in encoded word */
  56    HT ='\t',   /* In body HT=SP, in head HT=HT, but quote in encoded word */
  57    NL =   N,   /* Don't quote '\n' (NL) */
  58    CR =   Q    /* Always quote a '\r' (CR) */
  59 };
  60
  61 /* Lookup tables to decide wether a character must be encoded or not.
  62  * Email header differences according to RFC 2047, section 4.2:
  63  * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
  64  * - don't care about the special ^F[rom] and ^.$ */
  65 static ui8_t const         _qtab_body[] = {
  66     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,SP,NL, Q,  Q,CR, Q, Q,
  67     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  68    SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
  69     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, Q, N, N,
  70
  71     N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
  72     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  73     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  74     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  75 },
  76                            _qtab_head[] = {
  77     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,HT, Q, Q,  Q, Q, Q, Q,
  78     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  79    US, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  80     N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N,QM,
  81
  82     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  83     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N,UU,
  84     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  85     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  86 };
  87
  88 /* For decoding be robust and allow lowercase letters, too */
  89 static char const          _qp_itoa16[] = "0123456789ABCDEF";
  90 static ui8_t const         _qp_atoi16[] = {
  91    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
  92    0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
  93    0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
  94    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
  95    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
  96    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
  97    0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF  /* 0x60-0x67 */
  98 };
  99
 100 /* The decoding table is only accessed via _B64_DECUI8() */
 101 static char const          _b64_enctbl[] =
 102     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 103 static signed char const   _b64__dectbl[] = {
 104    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 105    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 106    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 107    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
 108    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 109    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 110    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 111    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 112 };
 113 #define _B64_EQU           (ui32_t)-2
 114 #define _B64_BAD           (ui32_t)-1
 115 #define _B64_DECUI8(C)     \
 116    ((C) >= sizeof(_b64__dectbl) ? _B64_BAD : (ui32_t)_b64__dectbl[(ui8_t)(C)])
 117
 118 /* ASCII case-insensitive check wether Content-Transfer-Encoding: header body
 119  * hbody defined this encoding type */
 120 static bool_t        _is_ct_enc(char const *hbody, char const *encoding);
 121
 122 /* Check wether *s must be quoted according to flags, else body rules;
 123  * sol indicates wether we are at the first character of a line/field */
 124 SINLINE enum _qact   _mustquote(char const *s, char const *e, bool_t sol,
 125                         enum mime_enc_flags flags);
 126
 127 /* Convert c to/from a hexadecimal character string */
 128 SINLINE char *       _qp_ctohex(char *store, char c);
 129 SINLINE si32_t       _qp_cfromhex(char const *hex);
 130
 131 /* Trim WS and make work point to the decodable range of in*
 132  * Return the amount of bytes a b64_decode operation on that buffer requires */
 133 static size_t        _b64_decode_prepare(struct str *work,
 134                         struct str const *in);
 135
 136 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
 137  * Return number of useful bytes in out or -1 on error */
 138 static ssize_t       _b64_decode(struct str *out, struct str *in);
 139
 140 static bool_t
 141 _is_ct_enc(char const *hbody, char const *encoding)
 142 {
 143    bool_t quoted, rv;
 144    int c;
 145    NYD2_ENTER;
 146
 147    if (*hbody == '"')
 148       quoted = TRU1, ++hbody;
 149    else
 150       quoted = FAL0;
 151    rv = FAL0;
 152
 153    while (*hbody != '\0' && *encoding != '\0')
 154       if ((c = *hbody++, lowerconv(c) != *encoding++))
 155          goto jleave;
 156    rv = TRU1;
 157
 158    if (quoted && *hbody == '"')
 159       goto jleave;
 160    if (*hbody == '\0' || whitechar(*hbody))
 161       goto jleave;
 162    rv = FAL0;
 163 jleave:
 164    NYD2_LEAVE;
 165    return rv;
 166 }
 167
 168 SINLINE enum _qact
 169 _mustquote(char const *s, char const *e, bool_t sol, enum mime_enc_flags flags)
 170 {
 171    ui8_t const *qtab;
 172    enum _qact a, r;
 173    NYD2_ENTER;
 174
 175    qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
 176          ? _qtab_head : _qtab_body;
 177    a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s];
 178
 179    if ((r = a) == N || (r = a) == Q)
 180       goto jleave;
 181    r = Q;
 182
 183    /* Special header fields */
 184    if (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)) {
 185       /* Special massage for encoded words */
 186       if (flags & MIMEEF_ISENCWORD) {
 187          switch (a) {
 188          case HT:
 189          case US:
 190          case EQ:
 191             r = a;
 192             /* FALLTHRU */
 193          case UU:
 194             goto jleave;
 195          default:
 196             break;
 197          }
 198       }
 199
 200       /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
 201        * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
 202        * should be hard too match */
 203       if (a == QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
 204          goto jleave;
 205       goto jnquote;
 206    }
 207
 208    /* Body-only */
 209
 210    if (a == SP) {
 211       /* WS only if trailing white space */
 212       if (PTRCMP(s + 1, ==, e) || s[1] == '\n')
 213          goto jleave;
 214       goto jnquote;
 215    }
 216
 217    /* Rest are special begin-of-line cases */
 218    if (!sol)
 219       goto jnquote;
 220
 221    /* ^From */
 222    if (a == XF) {
 223       if (PTRCMP(s + 4, <, e) && s[1] == 'r' && s[2] == 'o' && s[3] == 'm')
 224          goto jleave;
 225       goto jnquote;
 226    }
 227    /* ^.$ */
 228    if (a == XD && (PTRCMP(s + 1, ==, e) || s[1] == '\n'))
 229       goto jleave;
 230 jnquote:
 231    r = N;
 232 jleave:
 233    NYD2_LEAVE;
 234    return r;
 235 }
 236
 237 SINLINE char *
 238 _qp_ctohex(char *store, char c)
 239 {
 240    NYD2_ENTER;
 241    store[2] = '\0';
 242    store[1] = _qp_itoa16[(ui8_t)c & 0x0F];
 243    c = ((ui8_t)c >> 4) & 0x0F;
 244    store[0] = _qp_itoa16[(ui8_t)c];
 245    NYD2_LEAVE;
 246    return store;
 247 }
 248
 249 SINLINE si32_t
 250 _qp_cfromhex(char const *hex)
 251 {
 252    ui8_t i1, i2;
 253    si32_t rv;
 254    NYD2_ENTER;
 255
 256    if ((i1 = (ui8_t)hex[0] - '0') >= NELEM(_qp_atoi16) ||
 257          (i2 = (ui8_t)hex[1] - '0') >= NELEM(_qp_atoi16))
 258       goto jerr;
 259    i1 = _qp_atoi16[i1];
 260    i2 = _qp_atoi16[i2];
 261    if ((i1 | i2) & 0xF0u)
 262       goto jerr;
 263    rv = i1;
 264    rv <<= 4;
 265    rv += i2;
 266 jleave:
 267    NYD2_LEAVE;
 268    return rv;
 269 jerr:
 270    rv = -1;
 271    goto jleave;
 272 }
 273
 274 static size_t
 275 _b64_decode_prepare(struct str *work, struct str const *in)
 276 {
 277    char *cp;
 278    size_t cp_len;
 279    NYD2_ENTER;
 280
 281    cp = in->s;
 282    cp_len = in->l;
 283
 284    while (cp_len > 0 && spacechar(*cp))
 285       ++cp, --cp_len;
 286    work->s = cp;
 287
 288    for (cp += cp_len; cp_len > 0; --cp_len) {
 289       char c = *--cp;
 290       if (!spacechar(c))
 291          break;
 292    }
 293    work->l = cp_len;
 294
 295    if (cp_len > 16)
 296       cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
 297    cp_len += 2*3 +1;
 298    NYD2_LEAVE;
 299    return cp_len;
 300 }
 301
 302 static ssize_t
 303 _b64_decode(struct str *out, struct str *in)
 304 {
 305    ssize_t rv = -1;
 306    ui8_t *p;
 307    ui8_t const *q, *end;
 308    NYD2_ENTER;
 309
 310    p = (ui8_t*)out->s + out->l;
 311    q = (ui8_t const*)in->s;
 312
 313    for (end = q + in->l; PTRCMP(q + 4, <=, end);) {
 314       ui32_t a = _B64_DECUI8(q[0]), b = _B64_DECUI8(q[1]),
 315          c = _B64_DECUI8(q[2]), d = _B64_DECUI8(q[3]);
 316       q += 4;
 317
 318       if (a >= _B64_EQU || b >= _B64_EQU || c == _B64_BAD || d == _B64_BAD)
 319          goto jleave;
 320
 321       *p++ = ((a << 2) | ((b & 0x30) >> 4));
 322       if (c == _B64_EQU)  { /* got '=' */
 323          if (d != _B64_EQU)
 324             goto jleave;
 325          break;
 326       }
 327       *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
 328       if (d == _B64_EQU) /* got '=' */
 329          break;
 330       *p++ = (((c & 0x03) << 6) | d);
 331    }
 332    rv ^= rv;
 333
 334 jleave: {
 335       size_t i = PTR2SIZE((char*)p - out->s);
 336       out->l = i;
 337       if (rv == 0)
 338          rv = (ssize_t)i;
 339    }
 340    in->l -= PTR2SIZE((char*)UNCONST(q) - in->s);
 341    in->s = UNCONST(q);
 342    NYD2_LEAVE;
 343    return rv;
 344 }
 345
 346 FL char *
 347 mime_char_to_hexseq(char store[3], char c)
 348 {
 349    char *rv;
 350    NYD2_ENTER;
 351
 352    rv = _qp_ctohex(store, c);
 353    NYD2_LEAVE;
 354    return rv;
 355 }
 356
 357 FL si32_t
 358 mime_hexseq_to_char(char const *hex)
 359 {
 360    si32_t rv;
 361    NYD2_ENTER;
 362
 363    rv = _qp_cfromhex(hex);
 364    NYD2_LEAVE;
 365    return rv;
 366 }
 367
 368 FL enum mime_enc
 369 mime_enc_target(void)
 370 {
 371    char const *cp;
 372    enum mime_enc rv;
 373    NYD2_ENTER;
 374
 375    if ((cp = ok_vlook(encoding)) == NULL)
 376       rv = MIME_DEFAULT_ENCODING;
 377    else if (!asccasecmp(cp, "quoted-printable"))
 378       rv = MIMEE_QP;
 379    else if (!asccasecmp(cp, "8bit"))
 380       rv = MIMEE_8B;
 381    else if (!asccasecmp(cp, "base64"))
 382       rv = MIMEE_B64;
 383    else {
 384       n_err(_("Warning: invalid *encoding*, using Base64: \"%s\"\n"), cp);
 385       rv = MIMEE_B64;
 386    }
 387    NYD2_LEAVE;
 388    return rv;
 389 }
 390
 391 FL enum mime_enc
 392 mime_enc_from_ctehead(char const *hbody)
 393 {
 394    enum mime_enc rv;
 395    NYD2_ENTER;
 396
 397    if (hbody == NULL || _is_ct_enc(hbody, "7bit"))
 398       rv = MIMEE_7B;
 399    else if (_is_ct_enc(hbody, "8bit"))
 400       rv = MIMEE_8B;
 401    else if (_is_ct_enc(hbody, "base64"))
 402       rv = MIMEE_B64;
 403    else if (_is_ct_enc(hbody, "binary"))
 404       rv = MIMEE_BIN;
 405    else if (_is_ct_enc(hbody, "quoted-printable"))
 406       rv = MIMEE_QP;
 407    else
 408       rv = MIMEE_NONE;
 409    NYD2_LEAVE;
 410    return rv;
 411 }
 412
 413 FL char const *
 414 mime_enc_from_conversion(enum conversion const convert) /* TODO booom */
 415 {
 416    char const *rv;
 417    NYD_ENTER;
 418
 419    switch (convert) {
 420    case CONV_7BIT:   rv = "7bit"; break;
 421    case CONV_8BIT:   rv = "8bit"; break;
 422    case CONV_TOQP:   rv = "quoted-printable"; break;
 423    case CONV_TOB64:  rv = "base64"; break;
 424    default:          rv = ""; break;
 425    }
 426    NYD_LEAVE;
 427    return rv;
 428 }
 429
 430 FL size_t
 431 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags)
 432 {
 433    size_t rv;
 434    bool_t sol;
 435    NYD_ENTER;
 436
 437    for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
 438       switch (_mustquote(ln, ln + lnlen, sol, flags)) {
 439       case US:
 440       case EQ:
 441       case HT:
 442          assert(flags & MIMEEF_ISENCWORD);
 443          /* FALLTHRU */
 444       case N:
 445          continue;
 446       default:
 447          ++rv;
 448       }
 449    NYD_LEAVE;
 450    return rv;
 451 }
 452
 453 FL size_t
 454 qp_encode_calc_size(size_t len)
 455 {
 456    size_t bytes, lines;
 457    NYD_ENTER;
 458
 459    /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
 460     * However, we must be aware that (a) the output may span multiple lines
 461     * and (b) the input does not end with a newline itself (nonetheless):
 462     *    LC_ALL=C awk 'BEGIN{
 463     *       for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
 464     *    }' |
 465     *    MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
 466     *       -Ssendcharsets=utf8 -s testsub ./LETTER */
 467    bytes = len * 3;
 468    lines = bytes / QP_LINESIZE;
 469    len += lines;
 470
 471    bytes = len * 3;
 472    /* Trailing hard NL may be missing, so there may be two lines.
 473     * Thus add soft + hard NL per line and a trailing NUL */
 474    lines = (bytes / QP_LINESIZE) + 1;
 475    lines <<= 1;
 476    bytes += lines;
 477    len = ++bytes;
 478
 479    NYD_LEAVE;
 480    return len;
 481 }
 482
 483 #ifdef notyet
 484 FL struct str *
 485 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags)
 486 {
 487    struct str in;
 488    NYD_ENTER;
 489
 490    in.s = UNCONST(cp);
 491    in.l = strlen(cp);
 492    out = qp_encode(out, &in, flags);
 493    NYD_LEAVE;
 494    return out;
 495 }
 496
 497 FL struct str *
 498 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
 499    enum qpflags flags)
 500 {
 501    struct str in;
 502    NYD_ENTER;
 503
 504    in.s = UNCONST(vp);
 505    in.l = vp_len;
 506    out = qp_encode(out, &in, flags);
 507    NYD_LEAVE;
 508    return out;
 509 }
 510 #endif /* notyet */
 511
 512 FL struct str *
 513 qp_encode(struct str *out, struct str const *in, enum qpflags flags)
 514 {
 515    bool_t sol = (flags & QP_ISHEAD ? FAL0 : TRU1), seenx;
 516    ssize_t lnlen;
 517    char *qp;
 518    char const *is, *ie;
 519    NYD_ENTER;
 520
 521    if (!(flags & QP_BUF)) {
 522       lnlen = qp_encode_calc_size(in->l);
 523       out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
 524    }
 525    qp = out->s;
 526    is = in->s;
 527    ie = is + in->l;
 528
 529    /* QP_ISHEAD? */
 530    if (!sol) {
 531       enum mime_enc_flags ef = MIMEEF_ISHEAD |
 532             (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
 533
 534       for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) {
 535          enum _qact mq = _mustquote(is, ie, sol, ef);
 536          char c = *is++;
 537
 538          if (mq == N) {
 539             /* We convert into a single *encoded-word*, that'll end up in
 540              * =?C?Q??=; quote '?' from when we're inside there on */
 541             if (seenx && c == '?')
 542                goto jheadq;
 543             *qp = c;
 544          } else if (mq == US)
 545             *qp = US;
 546          else {
 547             seenx = TRU1;
 548 jheadq:
 549             *qp++ = '=';
 550             qp = _qp_ctohex(qp, c) + 1;
 551          }
 552       }
 553       goto jleave;
 554    }
 555
 556    /* The body needs to take care for soft line breaks etc. */
 557    for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) {
 558       enum _qact mq = _mustquote(is, ie, sol, MIMEEF_NONE);
 559       char c = *is++;
 560
 561       if (mq == N && (c != '\n' || !seenx)) {
 562          *qp++ = c;
 563          if (++lnlen < QP_LINESIZE - 1)
 564             continue;
 565          /* Don't write a soft line break when we're in the last possible
 566           * column and either an LF has been written or only an LF follows, as
 567           * that'll end the line anyway */
 568          /* XXX but - ensure is+1>=ie, then??
 569           * xxx and/or - what about resetting lnlen; that contra
 570           * xxx dicts input==1 input line assertion, though */
 571          if (c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
 572             continue;
 573 jsoftnl:
 574          qp[0] = '=';
 575          qp[1] = '\n';
 576          qp += 2;
 577          lnlen = 0;
 578          continue;
 579       }
 580
 581       if (lnlen > QP_LINESIZE - 3 - 1) {
 582          qp[0] = '=';
 583          qp[1] = '\n';
 584          qp += 2;
 585          lnlen = 0;
 586       }
 587       *qp++ = '=';
 588       qp = _qp_ctohex(qp, c);
 589       qp += 2;
 590       lnlen += 3;
 591       if (c != '\n' || !seenx)
 592          seenx = (c == '\r');
 593       else {
 594          seenx = FAL0;
 595          goto jsoftnl;
 596       }
 597    }
 598
 599    /* Enforce soft line break if we haven't seen LF */
 600    if (in->l > 0 && *--is != '\n') {
 601       qp[0] = '=';
 602       qp[1] = '\n';
 603       qp += 2;
 604    }
 605 jleave:
 606    out->l = PTR2SIZE(qp - out->s);
 607    out->s[out->l] = '\0';
 608    NYD_LEAVE;
 609    return out;
 610 }
 611
 612 FL int
 613 qp_decode(struct str *out, struct str const *in, struct str *rest)
 614 {
 615    int rv = STOP;
 616    char *os, *oc;
 617    char const *is, *ie;
 618    NYD_ENTER;
 619
 620    if (rest != NULL && rest->l != 0) {
 621       os = out->s;
 622       *out = *rest;
 623       rest->s = os;
 624       rest->l = 0;
 625    }
 626
 627    oc = os =
 628    out->s = srealloc(out->s, out->l + in->l + 3);
 629    oc += out->l;
 630    is = in->s;
 631    ie = is + in->l;
 632
 633    /* Decoding encoded-word (RFC 2049) in a header field? */
 634    if (rest == NULL) {
 635       while (is < ie) {
 636          si32_t c = *is++;
 637          if (c == '=') {
 638             if (PTRCMP(is + 1, >=, ie)) {
 639                ++is;
 640                goto jehead;
 641             }
 642             c = _qp_cfromhex(is);
 643             is += 2;
 644             if (c >= 0)
 645                *oc++ = (char)c;
 646             else {
 647                /* Invalid according to RFC 2045, section 6.7. Almost follow */
 648 jehead:
 649                /* TODO 0xFFFD
 650                *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 651                *oc += 3; 0xFFFD TODO
 652                */ *oc++ = '?';
 653             }
 654          } else
 655             *oc++ = (c == '_' /* US */) ? ' ' : (char)c;
 656       }
 657       goto jleave; /* XXX QP decode, header: errors not reported */
 658    }
 659
 660    /* Decoding a complete message/mimepart body line */
 661    while (is < ie) {
 662       si32_t c = *is++;
 663       if (c != '=') {
 664          *oc++ = (char)c;
 665          continue;
 666       }
 667
 668       /* RFC 2045, 6.7:
 669        *   Therefore, when decoding a Quoted-Printable body, any
 670        *   trailing white space on a line must be deleted, as it will
 671        *   necessarily have been added by intermediate transport
 672        *   agents */
 673       for (; is < ie && blankchar(*is); ++is)
 674          ;
 675       if (PTRCMP(is + 1, >=, ie)) {
 676          /* Soft line break? */
 677          if (*is == '\n')
 678             goto jsoftnl;
 679          ++is;
 680          goto jebody;
 681       }
 682
 683       /* Not a soft line break? */
 684       if (*is != '\n') {
 685          c = _qp_cfromhex(is);
 686          is += 2;
 687          if (c >= 0)
 688             *oc++ = (char)c;
 689          else {
 690             /* Invalid according to RFC 2045, section 6.7.
 691              * Almost follow it and include the = and the follow char */
 692 jebody:
 693             /* TODO 0xFFFD
 694             *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 695             *oc += 3; 0xFFFD TODO
 696             */ *oc++ = '?';
 697          }
 698          continue;
 699       }
 700
 701       /* CRLF line endings are encoded as QP, followed by a soft line break, so
 702        * check for this special case, and simply forget we have seen one, so as
 703        * not to end up with the entire DOS file in a contiguous buffer */
 704 jsoftnl:
 705       if (oc > os && oc[-1] == '\n') {
 706 #if 0       /* TODO qp_decode() we do not normalize CRLF
 707           * TODO to LF because for that we would need
 708           * TODO to know if we are about to write to
 709           * TODO the display or do save the file!
 710           * TODO 'hope the MIME/send layer rewrite will
 711           * TODO offer the possibility to DTRT */
 712          if (oc - 1 > os && oc[-2] == '\r') {
 713             --oc;
 714             oc[-1] = '\n';
 715          }
 716 #endif
 717          break;
 718       }
 719       out->l = PTR2SIZE(oc - os);
 720       rest->s = srealloc(rest->s, rest->l + out->l);
 721       memcpy(rest->s + rest->l, out->s, out->l);
 722       rest->l += out->l;
 723       oc = os;
 724       break;
 725    }
 726    /* XXX RFC: QP decode should check no trailing WS on line */
 727 jleave:
 728    out->l = PTR2SIZE(oc - os);
 729    rv = OKAY;
 730    NYD_LEAVE;
 731    return rv;
 732 }
 733
 734 FL size_t
 735 b64_encode_calc_size(size_t len)
 736 {
 737    NYD_ENTER;
 738    len = (len * 4) / 3;
 739    len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
 740    len += 2 + 1; /* CRLF, \0 */
 741    NYD_LEAVE;
 742    return len;
 743 }
 744
 745 FL struct str *
 746 b64_encode(struct str *out, struct str const *in, enum b64flags flags)
 747 {
 748    ui8_t const *p;
 749    ssize_t i, lnlen;
 750    char *b64;
 751    NYD_ENTER;
 752
 753    assert(!(flags & B64_NOPAD) ||
 754       !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
 755
 756    p = (ui8_t const*)in->s;
 757
 758    if (!(flags & B64_BUF)) {
 759       i = b64_encode_calc_size(in->l);
 760       out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
 761    }
 762    b64 = out->s;
 763
 764    if (!(flags & (B64_CRLF | B64_LF)))
 765       flags &= ~B64_MULTILINE;
 766
 767    for (lnlen = 0, i = (ssize_t)in->l; i > 0; p += 3, i -= 3) {
 768       ui32_t a = p[0], b, c;
 769
 770       b64[0] = _b64_enctbl[a >> 2];
 771       switch (i) {
 772       case 1:
 773          b64[1] = _b64_enctbl[((a & 0x3) << 4)];
 774          b64[2] =
 775          b64[3] = '=';
 776          break;
 777       case 2:
 778          b = p[1];
 779          b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 780          b64[2] = _b64_enctbl[((b & 0x0F) << 2)];
 781          b64[3] = '=';
 782          break;
 783       default:
 784          b = p[1];
 785          c = p[2];
 786          b64[1] = _b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 787          b64[2] = _b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
 788          b64[3] = _b64_enctbl[c & 0x3F];
 789          break;
 790       }
 791
 792       b64 += 4;
 793       if (!(flags & B64_MULTILINE))
 794          continue;
 795       lnlen += 4;
 796       if (lnlen < B64_LINESIZE)
 797          continue;
 798
 799       lnlen = 0;
 800       if (flags & B64_CRLF)
 801          *b64++ = '\r';
 802       if (flags & (B64_CRLF | B64_LF))
 803          *b64++ = '\n';
 804    }
 805
 806    if ((flags & (B64_CRLF | B64_LF)) &&
 807          (!(flags & B64_MULTILINE) || lnlen != 0)) {
 808       if (flags & B64_CRLF)
 809          *b64++ = '\r';
 810       if (flags & (B64_CRLF | B64_LF))
 811          *b64++ = '\n';
 812    } else if (flags & B64_NOPAD)
 813       while (b64 != out->s && b64[-1] == '=')
 814          --b64;
 815
 816    out->l = PTR2SIZE(b64 - out->s);
 817    out->s[out->l] = '\0';
 818
 819    /* Base64 includes + and /, replace them with _ and -.
 820     * This is base64url according to RFC 4648, then.  Since we only support
 821     * that for encoding and it is only used for boundary strings, this is
 822     * yet a primitive implementation; xxx use tables; support decoding */
 823    if (flags & B64_RFC4648URL) {
 824       char c;
 825
 826       for (b64 = out->s; (c = *b64) != '\0'; ++b64)
 827          if (c == '+')
 828             *b64 = '-';
 829          else if (c == '/')
 830                *b64 = '_';
 831    }
 832    NYD_LEAVE;
 833    return out;
 834 }
 835
 836 FL struct str *
 837 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
 838    enum b64flags flags)
 839 {
 840    struct str in;
 841    NYD_ENTER;
 842
 843    in.s = UNCONST(vp);
 844    in.l = vp_len;
 845    out = b64_encode(out, &in, flags);
 846    NYD_LEAVE;
 847    return out;
 848 }
 849
 850 #ifdef HAVE_SMTP
 851 FL struct str *
 852 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags)
 853 {
 854    struct str in;
 855    NYD_ENTER;
 856
 857    in.s = UNCONST(cp);
 858    in.l = strlen(cp);
 859    out = b64_encode(out, &in, flags);
 860    NYD_LEAVE;
 861    return out;
 862 }
 863 #endif
 864
 865 FL int
 866 b64_decode(struct str *out, struct str const *in, struct str *rest)
 867 {
 868    struct str work;
 869    char *x;
 870    size_t len;
 871    int rv; /* XXX -> bool_t */
 872    NYD_ENTER;
 873
 874    len = _b64_decode_prepare(&work, in);
 875    out->l = 0;
 876
 877    /* TODO B64_T is different since we must not fail for errors; in v15.0 this
 878     * TODO will be filter based and B64_T will have a different one than B64,
 879     * TODO for now special treat this all-horror */
 880    if (rest != NULL) {
 881       /* With B64_T there may be leftover decoded data for iconv(3), even if
 882        * that means it's incomplete multibyte character we have to copy over */
 883       /* TODO strictly speaking this should not be handled in here,
 884        * TODO since its leftover decoded data from an iconv(3);
 885        * TODO In v15.0 this path will be filter based, each filter having its
 886        * TODO own buffer for such purpose; for now we are BUSTED since for
 887        * TODO Base64 rest is owned by iconv(3) */
 888       if (rest->l > 0) {
 889          x = out->s;
 890          *out = *rest;
 891          rest->s = x; /* Just for ownership reasons (all TODO in here..) */
 892          rest->l = 0;
 893          len += out->l;
 894       }
 895
 896       out->s = srealloc(out->s, len +1);
 897
 898       for (;;) {
 899          if (_b64_decode(out, &work) >= 0) {
 900             if (work.l == 0)
 901                break;
 902          }
 903          x = out->s + out->l;
 904
 905          /* Partial/False last sequence.  TODO not solvable for non-EOF;
 906           * TODO yes, invalid, but seen in the wild and should be handled,
 907           * TODO but for that we had to have our v15.0 filter which doesn't
 908           * TODO work line based but content buffer based */
 909          if ((len = work.l) <= 4) {
 910             switch (len) {
 911             case 4:  /* FALLTHRU */
 912             case 3:  x[2] = '?'; /* FALLTHRU */
 913             case 2:  x[1] = '?'; /* FALLTHRU */
 914             default: x[0] = '?'; break;
 915             }
 916             out->l += len;
 917             break;
 918          }
 919
 920          /* TODO Bad content: this problem is not solvable!  I've seen
 921           * TODO messages which broke lines in the middle of a Base64
 922           * TODO tuple, followed by an invalid character ("!"), the follow
 923           * TODO line starting with whitespace and the remaining sequence.
 924           * TODO OpenSSL bailed, mutt(1) got it right (silently..).
 925           * TODO Since "rest" is not usable by us, we cannot continue
 926           * TODO sequences.  We will be able to do so with the v15.0 filter
 927           * TODO approach, if we */
 928          /* Bad content: skip over a single sequence */
 929          for (;;) {
 930             *x++ = '?';
 931             ++out->l;
 932             if (--work.l == 0)
 933                break;
 934             else {
 935                ui8_t bc = (ui8_t)*++work.s;
 936                ui32_t state = _B64_DECUI8(bc);
 937
 938                if (state != _B64_EQU && state != _B64_BAD)
 939                   break;
 940             }
 941          }
 942       }
 943       rv = OKAY;
 944       goto jleave;
 945    }
 946
 947    /* Ignore an empty input, as may happen for an empty final line */
 948    if (work.l == 0) {
 949       out->s = srealloc(out->s, 1);
 950       rv = OKAY;
 951    } else if (work.l >= 4 && !(work.l & 3)) {
 952       out->s = srealloc(out->s, len +1);
 953       if ((ssize_t)(len = _b64_decode(out, &work)) < 0)
 954          goto jerr;
 955       rv = OKAY;
 956    } else
 957       goto jerr;
 958
 959 jleave:
 960    out->s[out->l] = '\0';
 961    NYD_LEAVE;
 962    return rv;
 963
 964 jerr: {
 965    char const *err = _("[Invalid Base64 encoding]\n");
 966    out->l = len = strlen(err);
 967    out->s = srealloc(out->s, len +1);
 968    memcpy(out->s, err, len);
 969    rv = STOP;
 970    goto jleave;
 971    }
 972 }
 973
 974 /* s-it-mode */