mime_cte.c

   1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
   2  *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047):
   3  *@ - Quoted-Printable, section 6.7
   4  *@ - Base64, section 6.8
   5  *
   6  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
   7  * Copyright (c) 2012 - 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
   8  */
   9 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
  10 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
  11 /*
  12  * Copyright (c) 2006 The NetBSD Foundation, Inc.
  13  * All rights reserved.
  14  *
  15  * This code is derived from software contributed to The NetBSD Foundation
  16  * by Anon Ymous.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions
  20  * are met:
  21  * 1. Redistributions of source code must retain the above copyright
  22  *    notice, this list of conditions and the following disclaimer.
  23  * 2. Redistributions in binary form must reproduce the above copyright
  24  *    notice, this list of conditions and the following disclaimer in the
  25  *    documentation and/or other materials provided with the distribution.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37  * POSSIBILITY OF SUCH DAMAGE.
  38  */
  39
  40 #ifndef HAVE_AMALGAMATION
  41 # include "nail.h"
  42 #endif
  43
  44 enum _qact {
  45     N =   0,   /* Do not quote */
  46     Q =   1,   /* Must quote */
  47    SP =   2,   /* sp */
  48    XF =   3,   /* Special character 'F' - maybe quoted */
  49    XD =   4,   /* Special character '.' - maybe quoted */
  50    UU =   5,   /* In header, _ must be quoted in encoded word */
  51    US = '_',   /* In header, ' ' must be quoted as _ in encoded word */
  52    QM = '?',   /* In header, special character ? not always quoted */
  53    EQ = '=',   /* In header, '=' must be quoted in encoded word */
  54    HT ='\t',   /* In body HT=SP, in head HT=HT, but quote in encoded word */
  55    NL =   N,   /* Don't quote '\n' (NL) */
  56    CR =   Q    /* Always quote a '\r' (CR) */
  57 };
  58
  59 /* Lookup tables to decide wether a character must be encoded or not.
  60  * Email header differences according to RFC 2047, section 4.2:
  61  * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
  62  * - don't care about the special ^F[rom] and ^.$ */
  63 static ui8_t const   _qtab_body[] = {
  64     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,SP,NL, Q,  Q,CR, Q, Q,
  65     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  66    SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
  67     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, Q, N, N,
  68
  69     N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
  70     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  71     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  72     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  73 },
  74                      _qtab_head[] = {
  75     Q, Q, Q, Q,  Q, Q, Q, Q,  Q,HT, Q, Q,  Q, Q, Q, Q,
  76     Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
  77    US, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  78     N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N,QM,
  79
  80     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  81     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N,UU,
  82     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
  83     N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
  84 };
  85
  86 /* Check wether *s must be quoted according to flags, else body rules;
  87  * sol indicates wether we are at the first character of a line/field */
  88 SINLINE enum _qact   _mustquote(char const *s, char const *e, bool_t sol,
  89                         enum mimecte_flags flags);
  90
  91 /* Convert c to/from a hexadecimal character string */
  92 SINLINE char *       _qp_ctohex(char *store, char c);
  93 SINLINE si32_t       _qp_cfromhex(char const *hex);
  94
  95 /* Trim WS and make work point to the decodable range of in*
  96  * Return the amount of bytes a b64_decode operation on that buffer requires */
  97 static size_t        _b64_decode_prepare(struct str *work,
  98                         struct str const *in);
  99
 100 /* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put).
 101  * Return number of useful bytes in out or -1 on error */
 102 static ssize_t       _b64_decode(struct str *out, struct str *in);
 103
 104 SINLINE enum _qact
 105 _mustquote(char const *s, char const *e, bool_t sol, enum mimecte_flags flags)
 106 {
 107    ui8_t const *qtab;
 108    enum _qact a, r;
 109    NYD2_ENTER;
 110
 111    qtab = (flags & (MIMECTE_ISHEAD | MIMECTE_ISENCWORD))
 112          ? _qtab_head : _qtab_body;
 113    a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s];
 114
 115    if ((r = a) == N || (r = a) == Q)
 116       goto jleave;
 117    r = Q;
 118
 119    /* Special header fields */
 120    if (flags & (MIMECTE_ISHEAD | MIMECTE_ISENCWORD)) {
 121       /* Special massage for encoded words */
 122       if (flags & MIMECTE_ISENCWORD) {
 123          switch (a) {
 124          case HT:
 125          case US:
 126          case EQ:
 127             r = a;
 128             /* FALLTHRU */
 129          case UU:
 130             goto jleave;
 131          default:
 132             break;
 133          }
 134       }
 135
 136       /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
 137        * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
 138        * should be hard too match */
 139       if (a == QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
 140          goto jleave;
 141       goto jnquote;
 142    }
 143
 144    /* Body-only */
 145
 146    if (a == SP) {
 147       /* WS only if trailing white space */
 148       if (PTRCMP(s + 1, ==, e) || s[1] == '\n')
 149          goto jleave;
 150       goto jnquote;
 151    }
 152
 153    /* Rest are special begin-of-line cases */
 154    if (!sol)
 155       goto jnquote;
 156
 157    /* ^From */
 158    if (a == XF) {
 159       if (PTRCMP(s + 4, <, e) && s[1] == 'r' && s[2] == 'o' && s[3] == 'm')
 160          goto jleave;
 161       goto jnquote;
 162    }
 163    /* ^.$ */
 164    if (a == XD && (PTRCMP(s + 1, ==, e) || s[1] == '\n'))
 165       goto jleave;
 166 jnquote:
 167    r = N;
 168 jleave:
 169    NYD2_LEAVE;
 170    return r;
 171 }
 172
 173 SINLINE char *
 174 _qp_ctohex(char *store, char c)
 175 {
 176    static char const hexmap[] = "0123456789ABCDEF";
 177    NYD2_ENTER;
 178
 179    store[2] = '\0';
 180    store[1] = hexmap[(ui8_t)c & 0x0F];
 181    c = ((ui8_t)c >> 4) & 0x0F;
 182    store[0] = hexmap[(ui8_t)c];
 183    NYD2_LEAVE;
 184    return store;
 185 }
 186
 187 SINLINE si32_t
 188 _qp_cfromhex(char const *hex)
 189 {
 190    /* Be robust, allow lowercase hexadecimal letters, too */
 191    static ui8_t const atoi16[] = {
 192       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x30-0x37 */
 193       0x08, 0x09, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F */
 194       0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF, /* 0x40-0x47 */
 195       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x48-0x4f */
 196       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x50-0x57 */
 197       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5f */
 198       0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xFF  /* 0x60-0x67 */
 199    };
 200
 201    ui8_t i1, i2;
 202    si32_t r;
 203    NYD2_ENTER;
 204
 205    if ((i1 = (ui8_t)hex[0] - '0') >= NELEM(atoi16) ||
 206          (i2 = (ui8_t)hex[1] - '0') >= NELEM(atoi16))
 207       goto jerr;
 208    i1 = atoi16[i1];
 209    i2 = atoi16[i2];
 210    if ((i1 | i2) & 0xF0u)
 211       goto jerr;
 212    r = i1;
 213    r <<= 4;
 214    r += i2;
 215 jleave:
 216    NYD2_LEAVE;
 217    return r;
 218 jerr:
 219    r = -1;
 220    goto jleave;
 221 }
 222
 223 static size_t
 224 _b64_decode_prepare(struct str *work, struct str const *in)
 225 {
 226    char *cp;
 227    size_t cp_len;
 228    NYD2_ENTER;
 229
 230    cp = in->s;
 231    cp_len = in->l;
 232
 233    while (cp_len > 0 && spacechar(*cp))
 234       ++cp, --cp_len;
 235    work->s = cp;
 236
 237    for (cp += cp_len; cp_len > 0; --cp_len) {
 238       char c = *--cp;
 239       if (!spacechar(c))
 240          break;
 241    }
 242    work->l = cp_len;
 243
 244    if (cp_len > 16)
 245       cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
 246    NYD2_LEAVE;
 247    return cp_len;
 248 }
 249
 250 static ssize_t
 251 _b64_decode(struct str *out, struct str *in)
 252 {
 253    static signed char const b64index[] = {
 254       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 255       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 256       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 257       52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
 258       -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 259       15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 260       -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 261       41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 262    };
 263 #define EQU          (ui32_t)-2
 264 #define BAD          (ui32_t)-1
 265 #define uchar64(c)   ((c) >= sizeof(b64index) ? BAD : (ui32_t)b64index[(c)])
 266
 267    ssize_t ret = -1;
 268    ui8_t *p;
 269    ui8_t const *q, *end;
 270    NYD2_ENTER;
 271
 272    p = (ui8_t*)out->s;
 273    q = (ui8_t const*)in->s;
 274    out->l = 0;
 275
 276    for (end = q + in->l; PTRCMP(q + 4, <=, end); q += 4) {
 277       ui32_t a = uchar64(q[0]), b = uchar64(q[1]), c = uchar64(q[2]),
 278          d = uchar64(q[3]);
 279
 280       if (a >= EQU || b >= EQU || c == BAD || d == BAD)
 281          goto jleave;
 282
 283       *p++ = ((a << 2) | ((b & 0x30) >> 4));
 284       if (c == EQU)  { /* got '=' */
 285          if (d != EQU)
 286             goto jleave;
 287          break;
 288       }
 289       *p++ = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
 290       if (d == EQU) /* got '=' */
 291          break;
 292       *p++ = (((c & 0x03) << 6) | d);
 293    }
 294 #undef uchar64
 295 #undef EQU
 296 #undef BAD
 297
 298    ret = PTR2SIZE((char*)p - out->s);
 299    out->l = (size_t)ret;
 300 jleave:
 301    in->l -= PTR2SIZE((char*)UNCONST(q) - in->s);
 302    in->s = UNCONST(q);
 303    NYD2_LEAVE;
 304    return ret;
 305 }
 306
 307 FL char *
 308 mime_char_to_hexseq(char store[3], char c)
 309 {
 310    char *rv;
 311    NYD2_ENTER;
 312
 313    rv = _qp_ctohex(store, c);
 314    NYD2_LEAVE;
 315    return rv;
 316 }
 317
 318 FL si32_t
 319 mime_hexseq_to_char(char const *hex)
 320 {
 321    si32_t rv;
 322    NYD2_ENTER;
 323
 324    rv = _qp_cfromhex(hex);
 325    NYD2_LEAVE;
 326    return rv;
 327 }
 328
 329 FL size_t
 330 mime_cte_mustquote(char const *ln, size_t lnlen, enum mimecte_flags flags)
 331 {
 332    size_t rv;
 333    bool_t sol;
 334    NYD_ENTER;
 335
 336    for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
 337       switch (_mustquote(ln, ln + lnlen, sol, flags)) {
 338       case US:
 339       case EQ:
 340       case HT:
 341          assert(flags & MIMECTE_ISENCWORD);
 342          /* FALLTHRU */
 343       case N:
 344          continue;
 345       default:
 346          ++rv;
 347       }
 348    NYD_LEAVE;
 349    return rv;
 350 }
 351
 352 FL size_t
 353 qp_encode_calc_size(size_t len)
 354 {
 355    size_t bytes, lines;
 356    NYD_ENTER;
 357
 358    /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
 359     * However, we must be aware that (a) the output may span multiple lines
 360     * and (b) the input does not end with a newline itself (nonetheless):
 361     *    LC_ALL=C awk 'BEGIN{
 362     *       for(i = 0; i < 100000; ++i) printf "\xC3\xBC"
 363     *    }' |
 364     *    MAILRC=/dev/null LC_ALL=en_US.UTF-8 s-nail -nvvd \
 365     *       -Ssendcharsets=utf8 -s testsub ./LETTER */
 366    bytes = len * 3;
 367    lines = bytes / QP_LINESIZE;
 368    len += lines;
 369
 370    bytes = len * 3;
 371    /* Trailing hard NL may be missing, so there may be two lines.
 372     * Thus add soft + hard NL per line and a trailing NUL */
 373    lines = (bytes / QP_LINESIZE) + 1;
 374    lines <<= 1;
 375    bytes += lines;
 376    len = ++bytes;
 377
 378    NYD_LEAVE;
 379    return len;
 380 }
 381
 382 #ifdef notyet
 383 FL struct str *
 384 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags)
 385 {
 386    struct str in;
 387    NYD_ENTER;
 388
 389    in.s = UNCONST(cp);
 390    in.l = strlen(cp);
 391    out = qp_encode(out, &in, flags);
 392    NYD_LEAVE;
 393    return out;
 394 }
 395
 396 FL struct str *
 397 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
 398    enum qpflags flags)
 399 {
 400    struct str in;
 401    NYD_ENTER;
 402
 403    in.s = UNCONST(vp);
 404    in.l = vp_len;
 405    out = qp_encode(out, &in, flags);
 406    NYD_LEAVE;
 407    return out;
 408 }
 409 #endif /* notyet */
 410
 411 FL struct str *
 412 qp_encode(struct str *out, struct str const *in, enum qpflags flags)
 413 {
 414    bool_t sol = (flags & QP_ISHEAD ? FAL0 : TRU1), seenx;
 415    ssize_t lnlen;
 416    char *qp;
 417    char const *is, *ie;
 418    NYD_ENTER;
 419
 420    if (!(flags & QP_BUF)) {
 421       lnlen = qp_encode_calc_size(in->l);
 422       out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
 423    }
 424    qp = out->s;
 425    is = in->s;
 426    ie = is + in->l;
 427
 428    /* QP_ISHEAD? */
 429    if (!sol) {
 430       enum mimecte_flags ctef = MIMECTE_ISHEAD |
 431             (flags & QP_ISENCWORD ? MIMECTE_ISENCWORD : 0);
 432
 433       for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) {
 434          enum _qact mq = _mustquote(is, ie, sol, ctef);
 435          char c = *is++;
 436
 437          if (mq == N) {
 438             /* We convert into a single *encoded-word*, that'll end up in
 439              * =?C?Q??=; quote '?' from when we're inside there on */
 440             if (seenx && c == '?')
 441                goto jheadq;
 442             *qp = c;
 443          } else if (mq == US)
 444             *qp = US;
 445          else {
 446             seenx = TRU1;
 447 jheadq:
 448             *qp++ = '=';
 449             qp = _qp_ctohex(qp, c) + 1;
 450          }
 451       }
 452       goto jleave;
 453    }
 454
 455    /* The body needs to take care for soft line breaks etc. */
 456    for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) {
 457       enum _qact mq = _mustquote(is, ie, sol, MIMECTE_NONE);
 458       char c = *is++;
 459
 460       if (mq == N && (c != '\n' || !seenx)) {
 461          *qp++ = c;
 462          if (++lnlen < QP_LINESIZE - 1)
 463             continue;
 464          /* Don't write a soft line break when we're in the last possible
 465           * column and either an LF has been written or only an LF follows, as
 466           * that'll end the line anyway */
 467          /* XXX but - ensure is+1>=ie, then??
 468           * xxx and/or - what about resetting lnlen; that contra
 469           * xxx dicts input==1 input line assertion, though */
 470          if (c == '\n' || is == ie || *is == '\n')
 471             continue;
 472 jsoftnl:
 473          qp[0] = '=';
 474          qp[1] = '\n';
 475          qp += 2;
 476          lnlen = 0;
 477          continue;
 478       }
 479
 480       if (lnlen > QP_LINESIZE - 3 - 1) {
 481          qp[0] = '=';
 482          qp[1] = '\n';
 483          qp += 2;
 484          lnlen = 0;
 485       }
 486       *qp++ = '=';
 487       qp = _qp_ctohex(qp, c);
 488       qp += 2;
 489       lnlen += 3;
 490       if (c != '\n' || !seenx)
 491          seenx = (c == '\r');
 492       else {
 493          seenx = FAL0;
 494          goto jsoftnl;
 495       }
 496    }
 497
 498    /* Enforce soft line break if we haven't seen LF */
 499    if (in->l > 0 && *--is != '\n') {
 500       qp[0] = '=';
 501       qp[1] = '\n';
 502       qp += 2;
 503    }
 504 jleave:
 505    out->l = PTR2SIZE(qp - out->s);
 506    out->s[out->l] = '\0';
 507    NYD_LEAVE;
 508    return out;
 509 }
 510
 511 FL int
 512 qp_decode(struct str *out, struct str const *in, struct str *rest)
 513 {
 514    int ret = STOP;
 515    char *os, *oc;
 516    char const *is, *ie;
 517    NYD_ENTER;
 518
 519    if (rest != NULL && rest->l != 0) {
 520       os = out->s;
 521       *out = *rest;
 522       rest->s = os;
 523       rest->l = 0;
 524    }
 525
 526    oc = os =
 527    out->s = srealloc(out->s, out->l + in->l + 3);
 528    oc += out->l;
 529    is = in->s;
 530    ie = is + in->l;
 531
 532    /* Decoding encoded-word (RFC 2049) in a header field? */
 533    if (rest == NULL) {
 534       while (is < ie) {
 535          si32_t c = *is++;
 536          if (c == '=') {
 537             if (PTRCMP(is + 1, >=, ie)) {
 538                ++is;
 539                goto jehead;
 540             }
 541             c = _qp_cfromhex(is);
 542             is += 2;
 543             if (c >= 0)
 544                *oc++ = (char)c;
 545             else {
 546                /* Invalid according to RFC 2045, section 6.7. Almost follow */
 547 jehead:
 548                /* TODO 0xFFFD
 549                *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 550                *oc += 3; 0xFFFD TODO
 551                */ *oc++ = '?';
 552             }
 553          } else
 554             *oc++ = (c == '_' /* US */) ? ' ' : (char)c;
 555       }
 556       goto jleave; /* XXX QP decode, header: errors not reported */
 557    }
 558
 559    /* Decoding a complete message/mimepart body line */
 560    while (is < ie) {
 561       si32_t c = *is++;
 562       if (c != '=') {
 563          *oc++ = (char)c;
 564          continue;
 565       }
 566
 567       /* RFC 2045, 6.7:
 568        *   Therefore, when decoding a Quoted-Printable body, any
 569        *   trailing white space on a line must be deleted, as it will
 570        *   necessarily have been added by intermediate transport
 571        *   agents */
 572       for (; is < ie && blankchar(*is); ++is)
 573          ;
 574       if (PTRCMP(is + 1, >=, ie)) {
 575          /* Soft line break? */
 576          if (*is == '\n')
 577             goto jsoftnl;
 578          ++is;
 579          goto jebody;
 580       }
 581
 582       /* Not a soft line break? */
 583       if (*is != '\n') {
 584          c = _qp_cfromhex(is);
 585          is += 2;
 586          if (c >= 0)
 587             *oc++ = (char)c;
 588          else {
 589             /* Invalid according to RFC 2045, section 6.7.
 590              * Almost follow it and include the = and the follow char */
 591 jebody:
 592             /* TODO 0xFFFD
 593             *oc[0] = '['; oc[1] = '?'; oc[2] = ']';
 594             *oc += 3; 0xFFFD TODO
 595             */ *oc++ = '?';
 596          }
 597          continue;
 598       }
 599
 600       /* CRLF line endings are encoded as QP, followed by a soft line break, so
 601        * check for this special case, and simply forget we have seen one, so as
 602        * not to end up with the entire DOS file in a contiguous buffer */
 603 jsoftnl:
 604       if (oc > os && oc[-1] == '\n') {
 605 #if 0       /* TODO qp_decode() we do not normalize CRLF
 606           * TODO to LF because for that we would need
 607           * TODO to know if we are about to write to
 608           * TODO the display or do save the file!
 609           * TODO 'hope the MIME/send layer rewrite will
 610           * TODO offer the possibility to DTRT */
 611          if (oc - 1 > os && oc[-2] == '\r') {
 612             --oc;
 613             oc[-1] = '\n';
 614          }
 615 #endif
 616          break;
 617       }
 618       out->l = PTR2SIZE(oc - os);
 619       rest->s = srealloc(rest->s, rest->l + out->l);
 620       memcpy(rest->s + rest->l, out->s, out->l);
 621       rest->l += out->l;
 622       oc = os;
 623       break;
 624    }
 625    /* XXX RFC: QP decode should check no trailing WS on line */
 626 jleave:
 627    out->l = PTR2SIZE(oc - os);
 628    ret = OKAY;
 629    NYD_LEAVE;
 630    return ret;
 631 }
 632
 633 FL size_t
 634 b64_encode_calc_size(size_t len)
 635 {
 636    NYD_ENTER;
 637    len = (len * 4) / 3;
 638    len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
 639    len += 2 + 1; /* CRLF, \0 */
 640    NYD_LEAVE;
 641    return len;
 642 }
 643
 644 FL struct str *
 645 b64_encode(struct str *out, struct str const *in, enum b64flags flags)
 646 {
 647    static char const b64table[] =
 648        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 649
 650    ui8_t const *p;
 651    ssize_t i, lnlen;
 652    char *b64;
 653    NYD_ENTER;
 654
 655    p = (ui8_t const*)in->s;
 656
 657    if (!(flags & B64_BUF)) {
 658       i = b64_encode_calc_size(in->l);
 659       out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
 660    }
 661    b64 = out->s;
 662
 663    if (!(flags & (B64_CRLF | B64_LF)))
 664       flags &= ~B64_MULTILINE;
 665
 666    for (lnlen = 0, i = (ssize_t)in->l; i > 0; p += 3, i -= 3) {
 667       ui32_t a = p[0], b, c;
 668
 669       b64[0] = b64table[a >> 2];
 670       switch (i) {
 671       case 1:
 672          b64[1] = b64table[((a & 0x3) << 4)];
 673          b64[2] =
 674          b64[3] = '=';
 675          break;
 676       case 2:
 677          b = p[1];
 678          b64[1] = b64table[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 679          b64[2] = b64table[((b & 0x0F) << 2)];
 680          b64[3] = '=';
 681          break;
 682       default:
 683          b = p[1];
 684          c = p[2];
 685          b64[1] = b64table[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 686          b64[2] = b64table[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
 687          b64[3] = b64table[c & 0x3F];
 688          break;
 689       }
 690
 691       b64 += 4;
 692       if (!(flags & B64_MULTILINE))
 693          continue;
 694       lnlen += 4;
 695       if (lnlen < B64_LINESIZE)
 696          continue;
 697
 698       lnlen = 0;
 699       if (flags & B64_CRLF)
 700          *b64++ = '\r';
 701       if (flags & (B64_CRLF | B64_LF))
 702          *b64++ = '\n';
 703    }
 704
 705    if ((flags & (B64_CRLF | B64_LF)) &&
 706          (!(flags & B64_MULTILINE) || lnlen != 0)) {
 707       if (flags & B64_CRLF)
 708          *b64++ = '\r';
 709       if (flags & (B64_CRLF | B64_LF))
 710          *b64++ = '\n';
 711    }
 712    out->l = PTR2SIZE(b64 - out->s);
 713    out->s[out->l] = '\0';
 714    NYD_LEAVE;
 715    return out;
 716 }
 717
 718 FL struct str *
 719 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
 720    enum b64flags flags)
 721 {
 722    struct str in;
 723    NYD_ENTER;
 724
 725    in.s = UNCONST(vp);
 726    in.l = vp_len;
 727    out = b64_encode(out, &in, flags);
 728    NYD_LEAVE;
 729    return out;
 730 }
 731
 732 #ifdef HAVE_SMTP
 733 FL struct str *
 734 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags)
 735 {
 736    struct str in;
 737    NYD_ENTER;
 738
 739    in.s = UNCONST(cp);
 740    in.l = strlen(cp);
 741    out = b64_encode(out, &in, flags);
 742    NYD_LEAVE;
 743    return out;
 744 }
 745 #endif
 746
 747 FL int
 748 b64_decode(struct str *out, struct str const *in, struct str *rest)
 749 {
 750    struct str work;
 751    char *x;
 752    int ret = STOP;
 753    size_t len;
 754    NYD_ENTER;
 755
 756    len = _b64_decode_prepare(&work, in);
 757
 758    /* Ignore an empty input, as may happen for an empty final line */
 759    if (work.l == 0) {
 760       /* With B64_T there may be leftover decoded data for iconv(3), even if
 761        * that means it's incomplete multibyte character we have to copy over */
 762       /* XXX strictly speaking this should not be handled in here,
 763        * XXX since its leftover decoded data from an iconv(3);
 764        * XXX like this we shared the prototype with QP, though?? */
 765       if (rest != NULL && rest->l > 0) {
 766          x = out->s;
 767          *out = *rest;
 768          rest->s = x;
 769          rest->l = 0;
 770       } else
 771          out->l = 0;
 772       ret = OKAY;
 773       goto jleave;
 774    }
 775    if (work.l >= 4 && !(work.l & 3)) {
 776       out->s = srealloc(out->s, len);
 777       ret = OKAY;
 778    }
 779    if (ret != OKAY || (ssize_t)(len = _b64_decode(out, &work)) < 0)
 780       goto jerr;
 781 jleave:
 782    NYD_LEAVE;
 783    return ret;
 784
 785 jerr: {
 786    char const *err = _("[Invalid Base64 encoding ignored]\n");
 787    len = strlen(err);
 788    x = out->s = srealloc(out->s, len + 1 +1);
 789    if (rest != NULL && rest->l)
 790       *x++ = '\n';
 791    memcpy(x, err, len);
 792    x += len;
 793    *x = '\0';
 794    out->l = PTR2SIZE(x - out->s);
 795    if (rest != NULL)
 796       rest->l = 0;
 797    ret = STOP;
 798    goto jleave;
 799    }
 800 }
 801
 802 /* s-it-mode */