mime-enc.c

   1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
   2  *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047;
   3  *@ for _header() versions: including "encoded word" as of RFC 2049):
   4  *@ - Quoted-Printable, section 6.7
   5  *@ - Base64, section 6.8
   6  *@ TODO We have no notion of a "current message context" and thus badly log.
   7  *@ TODO This is not final yet, v15 will bring "filters".
   8  *
   9  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
  10  * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
  11  */
  12 /* QP quoting idea, _b64_decode(), b64_encode() taken from NetBSDs mailx(1): */
  13 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
  14 /*
  15  * Copyright (c) 2006 The NetBSD Foundation, Inc.
  16  * All rights reserved.
  17  *
  18  * This code is derived from software contributed to The NetBSD Foundation
  19  * by Anon Ymous.
  20  *
  21  * Redistribution and use in source and binary forms, with or without
  22  * modification, are permitted provided that the following conditions
  23  * are met:
  24  * 1. Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  * 2. Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in the
  28  *    documentation and/or other materials provided with the distribution.
  29  *
  30  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  31  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  32  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  33  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  34  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  40  * POSSIBILITY OF SUCH DAMAGE.
  41  */
  42 #undef n_FILE
  43 #define n_FILE mime_enc
  44
  45 #ifndef HAVE_AMALGAMATION
  46 # include "nail.h"
  47 #endif
  48
  49 enum a_me_qact{
  50    a_ME_N = 0,
  51    a_ME_Q = 1,       /* Must quote */
  52    a_ME_SP = 2,      /* sp */
  53    a_ME_XF = 3,      /* Special character 'F' - maybe quoted */
  54    a_ME_XD = 4,      /* Special character '.' - maybe quoted */
  55    a_ME_UU = 5,      /* In header, _ must be quoted in encoded word */
  56    a_ME_US = '_',    /* In header, ' ' must be quoted as _ in encoded word */
  57    a_ME_QM = '?',    /* In header, special character ? not always quoted */
  58    a_ME_EQ = '=',    /* In header, '=' must be quoted in encoded word */
  59    a_ME_HT ='\t',    /* Body HT=SP.  Head HT=HT, BUT quote in encoded word */
  60    a_ME_NL = 0,      /* Don't quote '\n' (NL) */
  61    a_ME_CR = a_ME_Q  /* Always quote a '\r' (CR) */
  62 };
  63
  64 /* Lookup tables to decide whether a character must be encoded or not.
  65  * Email header differences according to RFC 2047, section 4.2:
  66  * - also quote SP (as the underscore _), TAB, ?, _, CR, LF
  67  * - don't care about the special ^F[rom] and ^.$ */
  68 static ui8_t const a_me_qp_body[] = {
  69     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  70     a_ME_Q, a_ME_SP, a_ME_NL,  a_ME_Q,  a_ME_Q, a_ME_CR,  a_ME_Q,  a_ME_Q,
  71     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  72     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  73    a_ME_SP,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  74     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_XD,  a_ME_N,
  75     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  76     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,  a_ME_N,  a_ME_N,
  77
  78     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_XF,  a_ME_N,
  79     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  80     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  81     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  82     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  83     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  84     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  85     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,
  86 }, a_me_qp_head[] = {
  87     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  88     a_ME_Q, a_ME_HT,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  89     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  90     a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,  a_ME_Q,
  91    a_ME_US,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  92     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  93     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  94     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_EQ,  a_ME_N, a_ME_QM,
  95
  96     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  97     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  98     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
  99     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N, a_ME_UU,
 100     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
 101     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
 102     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,
 103     a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_N,  a_ME_Q,
 104 };
 105
 106 /* The decoding table is only accessed via a_ME_B64_DECUI8() */
 107 static char const a_me_b64_enctbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 108       "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
 109 static signed char const a_me_b64__dectbl[] = {
 110    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 111    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 112    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 113    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
 114    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 115    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 116    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 117    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 118 };
 119 #define a_ME_B64_EQU (ui32_t)-2
 120 #define a_ME_B64_BAD (ui32_t)-1
 121 #define a_ME_B64_DECUI8(C) \
 122    ((ui8_t)(C) >= sizeof(a_me_b64__dectbl)\
 123     ? a_ME_B64_BAD : (ui32_t)a_me_b64__dectbl[(ui8_t)(C)])
 124
 125 /* (Ugly to place an enum here) */
 126 static char const a_me_ctes[] = "7bit\0" "8bit\0" \
 127       "base64\0" "quoted-printable\0" "binary\0" \
 128       /* abbrevs */ "8b\0" "b64\0" "qp\0";
 129 enum a_me_ctes_off{
 130    a_ME_CTES_7B_OFF = 0, a_ME_CTES_7B_LEN = 4,
 131    a_ME_CTES_8B_OFF = 5, a_ME_CTES_8B_LEN = 4,
 132    a_ME_CTES_B64_OFF = 10, a_ME_CTES_B64_LEN = 6,
 133    a_ME_CTES_QP_OFF = 17,  a_ME_CTES_QP_LEN = 16,
 134    a_ME_CTES_BIN_OFF = 34, a_ME_CTES_BIN_LEN = 6,
 135
 136    a_ME_CTES_S8B_OFF = 41, a_ME_CTES_S8B_LEN = 2,
 137    a_ME_CTES_SB64_OFF = 44, a_ME_CTES_SB64_LEN = 3,
 138    a_ME_CTES_SQP_OFF = 48, a_ME_CTES_SQP_LEN = 2
 139 };
 140
 141 /* Check whether *s must be quoted according to flags, else body rules;
 142  * sol indicates whether we are at the first character of a line/field */
 143 SINLINE enum a_me_qact a_me_mustquote(char const *s, char const *e, bool_t sol,
 144                         enum mime_enc_flags flags);
 145
 146 /* Trim WS and make work point to the decodable range of in.
 147  * Return the amount of bytes a b64_decode operation on that buffer requires,
 148  * or UIZ_MAX on overflow error */
 149 static size_t a_me_b64_decode_prepare(struct str *work, struct str const *in);
 150
 151 /* Perform b64_decode on in(put) to sufficiently spaced out(put).
 152  * Return number of useful bytes in out or -1 on error.
 153  * Note: may enter endless loop if in->l < 4 and 0 return is not handled! */
 154 static ssize_t a_me_b64_decode(struct str *out, struct str *in);
 155
 156 SINLINE enum a_me_qact
 157 a_me_mustquote(char const *s, char const *e, bool_t sol,
 158       enum mime_enc_flags flags){
 159    ui8_t const *qtab;
 160    enum a_me_qact a, r;
 161    NYD2_ENTER;
 162
 163    qtab = (flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD))
 164          ? a_me_qp_head : a_me_qp_body;
 165
 166    if((ui8_t)*s > 0x7F){
 167       r = a_ME_Q;
 168       goto jleave;
 169    }
 170
 171    a = qtab[(ui8_t)*s];
 172
 173    if((r = a) == a_ME_N || a == a_ME_Q)
 174       goto jleave;
 175
 176    r = a_ME_Q;
 177
 178    /* Special header fields */
 179    if(flags & (MIMEEF_ISHEAD | MIMEEF_ISENCWORD)){
 180       /* Special massage for encoded words */
 181       if(flags & MIMEEF_ISENCWORD){
 182          switch(a){
 183          case a_ME_HT:
 184          case a_ME_US:
 185          case a_ME_EQ:
 186             r = a;
 187             /* FALLTHRU */
 188          case a_ME_UU:
 189             goto jleave;
 190          default:
 191             break;
 192          }
 193       }
 194
 195       /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting
 196        * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?=
 197        * should be hard to match */
 198       if(a == a_ME_QM && ((!sol && s[-1] == '=') || (s < e && s[1] == '=')))
 199          goto jleave;
 200       goto jnquote;
 201    }
 202
 203    /* Body-only */
 204
 205    if(a == a_ME_SP){
 206       /* WS only if trailing white space */
 207       if(&s[1] == e || s[1] == '\n')
 208          goto jleave;
 209       goto jnquote;
 210    }
 211
 212    /* Rest are special begin-of-line cases */
 213    if(!sol)
 214       goto jnquote;
 215
 216    /* ^From */
 217    if(a == a_ME_XF){
 218       if(&s[4] < e && s[1] == 'r' && s[2] == 'o' && s[3] == 'm' && s[4] == ' ')
 219          goto jleave;
 220       goto jnquote;
 221    }
 222    /* ^.$ */
 223    if(a == a_ME_XD && (&s[1] == e || s[1] == '\n'))
 224       goto jleave;
 225 jnquote:
 226    r = 0;
 227 jleave:
 228    NYD2_LEAVE;
 229    return r;
 230 }
 231
 232 static size_t
 233 a_me_b64_decode_prepare(struct str *work, struct str const *in){
 234    size_t cp_len;
 235    NYD2_ENTER;
 236
 237    *work = *in;
 238    cp_len = n_str_trim(work)->l;
 239
 240    if(cp_len > 16){
 241       /* n_ERR_OVERFLOW */
 242       if(UIZ_MAX / 3 <= cp_len){
 243          cp_len = UIZ_MAX;
 244          goto jleave;
 245       }
 246       cp_len = ((cp_len * 3) >> 2) + (cp_len >> 3);
 247    }
 248    cp_len += (2 * 3) +1;
 249 jleave:
 250    NYD2_LEAVE;
 251    return cp_len;
 252 }
 253
 254 static ssize_t
 255 a_me_b64_decode(struct str *out, struct str *in){
 256    ui8_t *p, pb;
 257    ui8_t const *q, *end;
 258    ssize_t rv;
 259    NYD2_ENTER;
 260
 261    rv = -1;
 262    p = (ui8_t*)&out->s[out->l];
 263    q = (ui8_t const*)in->s;
 264
 265    for(end = &q[in->l]; PTR2SIZE(end - q) >= 4; q += 4){
 266       ui32_t a, b, c, d;
 267
 268       a = a_ME_B64_DECUI8(q[0]);
 269       b = a_ME_B64_DECUI8(q[1]);
 270       c = a_ME_B64_DECUI8(q[2]);
 271       d = a_ME_B64_DECUI8(q[3]);
 272
 273       if(n_UNLIKELY(a >= a_ME_B64_EQU || b >= a_ME_B64_EQU ||
 274             c == a_ME_B64_BAD || d == a_ME_B64_BAD))
 275          goto jleave;
 276
 277       pb = ((a << 2) | ((b & 0x30) >> 4));
 278       if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
 279          *p++ = pb;
 280
 281       if(c == a_ME_B64_EQU){ /* got '=' */
 282          q += 4;
 283          if(n_UNLIKELY(d != a_ME_B64_EQU))
 284             goto jleave;
 285          break;
 286       }
 287
 288       pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
 289       if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
 290          *p++ = pb;
 291
 292       if(d == a_ME_B64_EQU) /* got '=' */
 293          break;
 294       pb = (((c & 0x03) << 6) | d);
 295       if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
 296          *p++ = pb;
 297    }
 298    rv ^= rv;
 299
 300 jleave:{
 301       size_t i;
 302
 303       i = PTR2SIZE((char*)p - out->s);
 304       out->l = i;
 305       if(rv == 0)
 306          rv = (ssize_t)i;
 307    }
 308    in->l -= PTR2SIZE(q - (ui8_t*)in->s);
 309    in->s = n_UNCONST(q);
 310    NYD2_LEAVE;
 311    return rv;
 312 }
 313
 314 FL enum mime_enc
 315 mime_enc_target(void){
 316    char const *cp, *v15;
 317    enum mime_enc rv;
 318    NYD2_ENTER;
 319
 320    if((v15 = ok_vlook(encoding)) != NULL)
 321       n_OBSOLETE(_("please use *mime-encoding* instead of *encoding*"));
 322
 323    if((cp = ok_vlook(mime_encoding)) == NULL && (cp = v15) == NULL)
 324       rv = MIME_DEFAULT_ENCODING;
 325    else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_S8B_OFF]) ||
 326          !asccasecmp(cp, &a_me_ctes[a_ME_CTES_8B_OFF]))
 327       rv = MIMEE_8B;
 328    else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SB64_OFF]) ||
 329          !asccasecmp(cp, &a_me_ctes[a_ME_CTES_B64_OFF]))
 330       rv = MIMEE_B64;
 331    else if(!asccasecmp(cp, &a_me_ctes[a_ME_CTES_SQP_OFF]) ||
 332          !asccasecmp(cp, &a_me_ctes[a_ME_CTES_QP_OFF]))
 333       rv = MIMEE_QP;
 334    else{
 335       n_err(_("Warning: invalid *mime-encoding*, using Base64: %s\n"), cp);
 336       rv = MIMEE_B64;
 337    }
 338    NYD2_LEAVE;
 339    return rv;
 340 }
 341
 342 FL enum mime_enc
 343 mime_enc_from_ctehead(char const *hbody){
 344    enum mime_enc rv;
 345    NYD2_ENTER;
 346
 347    if(hbody == NULL)
 348       rv = MIMEE_7B;
 349    else{
 350       struct{
 351          ui8_t off;
 352          ui8_t len;
 353          ui8_t enc;
 354          ui8_t __dummy;
 355       } const *cte, cte_base[] = {
 356          {a_ME_CTES_7B_OFF, a_ME_CTES_7B_LEN, MIMEE_7B, 0},
 357          {a_ME_CTES_8B_OFF, a_ME_CTES_8B_LEN, MIMEE_8B, 0},
 358          {a_ME_CTES_B64_OFF, a_ME_CTES_B64_LEN, MIMEE_B64, 0},
 359          {a_ME_CTES_QP_OFF, a_ME_CTES_QP_LEN, MIMEE_QP, 0},
 360          {a_ME_CTES_BIN_OFF, a_ME_CTES_BIN_LEN, MIMEE_BIN, 0},
 361          {0, 0, MIMEE_NONE, 0}
 362       };
 363       union {char const *s; size_t l;} u;
 364
 365       if(*hbody == '"')
 366          for(u.s = ++hbody; *u.s != '\0' && *u.s != '"'; ++u.s)
 367             ;
 368       else
 369          for(u.s = hbody; *u.s != '\0' && !whitechar(*u.s); ++u.s)
 370             ;
 371       u.l = PTR2SIZE(u.s - hbody);
 372
 373       for(cte = cte_base;;)
 374          if(cte->len == u.l && !asccasecmp(&a_me_ctes[cte->off], hbody)){
 375             rv = cte->enc;
 376             break;
 377          }else if((++cte)->enc == MIMEE_NONE){
 378             rv = MIMEE_NONE;
 379             break;
 380          }
 381    }
 382    NYD2_LEAVE;
 383    return rv;
 384 }
 385
 386 FL char const *
 387 mime_enc_from_conversion(enum conversion const convert){
 388    char const *rv;
 389    NYD2_ENTER;
 390
 391    switch(convert){
 392    case CONV_7BIT: rv = &a_me_ctes[a_ME_CTES_7B_OFF]; break;
 393    case CONV_8BIT: rv = &a_me_ctes[a_ME_CTES_8B_OFF]; break;
 394    case CONV_TOQP: rv = &a_me_ctes[a_ME_CTES_QP_OFF]; break;
 395    case CONV_TOB64: rv = &a_me_ctes[a_ME_CTES_B64_OFF]; break;
 396    case CONV_NONE: rv = &a_me_ctes[a_ME_CTES_BIN_OFF]; break;
 397    default: rv = n_empty; break;
 398    }
 399    NYD2_LEAVE;
 400    return rv;
 401 }
 402
 403 FL size_t
 404 mime_enc_mustquote(char const *ln, size_t lnlen, enum mime_enc_flags flags){
 405    size_t rv;
 406    bool_t sol;
 407    NYD2_ENTER;
 408
 409    for(rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen)
 410       switch(a_me_mustquote(ln, ln + lnlen, sol, flags)){
 411       case a_ME_US:
 412       case a_ME_EQ:
 413       case a_ME_HT:
 414          assert(flags & MIMEEF_ISENCWORD);
 415          /* FALLTHRU */
 416       case 0:
 417          continue;
 418       default:
 419          ++rv;
 420       }
 421    NYD2_LEAVE;
 422    return rv;
 423 }
 424
 425 FL size_t
 426 qp_encode_calc_size(size_t len){
 427    size_t bytes, lines;
 428    NYD2_ENTER;
 429
 430    /* The worst case sequence is 'CRLF' -> '=0D=0A=\n\0'.
 431     * However, we must be aware that (a) the output may span multiple lines
 432     * and (b) the input does not end with a newline itself (nonetheless):
 433     *    LC_ALL=C awk 'BEGIN{
 434     *       for (i = 1; i < 100000; ++i) printf "\xC3\xBC"
 435     *    }' |
 436     *    s-nail -:/ -dSsendcharsets=utf8 -s testsub no@where */
 437
 438    /* Several n_ERR_OVERFLOW */
 439    if(len >= UIZ_MAX / 3){
 440       len = UIZ_MAX;
 441       goto jleave;
 442    }
 443    bytes = len * 3;
 444    lines = bytes / QP_LINESIZE;
 445    len += lines;
 446
 447    if(len >= UIZ_MAX / 3){
 448       len = UIZ_MAX;
 449       goto jleave;
 450    }
 451    /* Trailing hard NL may be missing, so there may be two lines.
 452     * Thus add soft + hard NL per line and a trailing NUL */
 453    bytes = len * 3;
 454    lines = (bytes / QP_LINESIZE) + 1;
 455    lines <<= 1;
 456    ++bytes;
 457    /*if(UIZ_MAX - bytes >= lines){
 458       len = UIZ_MAX;
 459       goto jleave;
 460    }*/
 461    bytes += lines;
 462    len = bytes;
 463 jleave:
 464    NYD2_LEAVE;
 465    return len;
 466 }
 467
 468 #ifdef notyet
 469 FL struct str *
 470 qp_encode_cp(struct str *out, char const *cp, enum qpflags flags){
 471    struct str in;
 472    NYD_ENTER;
 473
 474    in.s = n_UNCONST(cp);
 475    in.l = strlen(cp);
 476    out = qp_encode(out, &in, flags);
 477    NYD_LEAVE;
 478    return out;
 479 }
 480
 481 FL struct str *
 482 qp_encode_buf(struct str *out, void const *vp, size_t vp_len,
 483       enum qpflags flags){
 484    struct str in;
 485    NYD_ENTER;
 486
 487    in.s = n_UNCONST(vp);
 488    in.l = vp_len;
 489    out = qp_encode(out, &in, flags);
 490    NYD_LEAVE;
 491    return out;
 492 }
 493 #endif /* notyet */
 494
 495 FL struct str *
 496 qp_encode(struct str *out, struct str const *in, enum qpflags flags){
 497    size_t lnlen;
 498    char *qp;
 499    char const *is, *ie;
 500    bool_t sol, seenx;
 501    NYD_ENTER;
 502
 503    sol = (flags & QP_ISHEAD ? FAL0 : TRU1);
 504
 505    if(!(flags & QP_BUF)){
 506       if((lnlen = qp_encode_calc_size(in->l)) == UIZ_MAX){
 507          out = NULL;
 508          goto jerr;
 509       }
 510       out->s = (flags & QP_SALLOC) ? salloc(lnlen) : srealloc(out->s, lnlen);
 511    }
 512    qp = out->s;
 513    is = in->s;
 514    ie = is + in->l;
 515
 516    if(flags & QP_ISHEAD){
 517       enum mime_enc_flags ef;
 518
 519       ef = MIMEEF_ISHEAD | (flags & QP_ISENCWORD ? MIMEEF_ISENCWORD : 0);
 520
 521       for(seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp){
 522          char c;
 523          enum a_me_qact mq;
 524
 525          mq = a_me_mustquote(is, ie, sol, ef);
 526          c = *is++;
 527
 528          if(mq == a_ME_N){
 529             /* We convert into a single *encoded-word*, that'll end up in
 530              * =?C?Q??=; quote '?' from when we're inside there on */
 531             if(seenx && c == '?')
 532                goto jheadq;
 533             *qp = c;
 534          }else if(mq == a_ME_US)
 535             *qp = a_ME_US;
 536          else{
 537             seenx = TRU1;
 538 jheadq:
 539             *qp++ = '=';
 540             qp = n_c_to_hex_base16(qp, c) + 1;
 541          }
 542       }
 543       goto jleave;
 544    }
 545
 546    /* The body needs to take care for soft line breaks etc. */
 547    for(lnlen = 0, seenx = FAL0; is < ie; sol = FAL0){
 548       char c;
 549       enum a_me_qact mq;
 550
 551       mq = a_me_mustquote(is, ie, sol, MIMEEF_NONE);
 552       c = *is++;
 553
 554       if(mq == a_ME_N && (c != '\n' || !seenx)){
 555          *qp++ = c;
 556          if(++lnlen < QP_LINESIZE - 1)
 557             continue;
 558          /* Don't write a soft line break when we're in the last possible
 559           * column and either an LF has been written or only an LF follows, as
 560           * that'll end the line anyway */
 561          /* XXX but - ensure is+1>=ie, then??
 562           * xxx and/or - what about resetting lnlen; that contra
 563           * xxx dicts input==1 input line assertion, though */
 564          if(c == '\n' || is == ie || is[0] == '\n' || is[1] == '\n')
 565             continue;
 566 jsoftnl:
 567          qp[0] = '=';
 568          qp[1] = '\n';
 569          qp += 2;
 570          lnlen = 0;
 571          continue;
 572       }
 573
 574       if(lnlen > QP_LINESIZE - 3 - 1){
 575          qp[0] = '=';
 576          qp[1] = '\n';
 577          qp += 2;
 578          lnlen = 0;
 579       }
 580       *qp++ = '=';
 581       qp = n_c_to_hex_base16(qp, c);
 582       qp += 2;
 583       lnlen += 3;
 584       if(c != '\n' || !seenx)
 585          seenx = (c == '\r');
 586       else{
 587          seenx = FAL0;
 588          goto jsoftnl;
 589       }
 590    }
 591
 592    /* Enforce soft line break if we haven't seen LF */
 593    if(in->l > 0 && *--is != '\n'){
 594       qp[0] = '=';
 595       qp[1] = '\n';
 596       qp += 2;
 597    }
 598 jleave:
 599    out->l = PTR2SIZE(qp - out->s);
 600    out->s[out->l] = '\0';
 601 jerr:
 602    NYD_LEAVE;
 603    return out;
 604 }
 605
 606 FL bool_t
 607 qp_decode_header(struct str *out, struct str const *in){
 608    struct n_string s;
 609    char const *is, *ie;
 610    NYD_ENTER;
 611
 612    /* n_ERR_OVERFLOW */
 613    if(UIZ_MAX -1 - out->l <= in->l ||
 614          SI32_MAX <= out->l + in->l){ /* XXX wrong, we may replace */
 615       out->l = 0;
 616       out = NULL;
 617       goto jleave;
 618    }
 619
 620    n_string_creat(&s);
 621    n_string_reserve(n_string_take_ownership(&s, out->s,
 622          (out->l == 0 ? 0 : out->l +1), out->l),
 623       in->l + (in->l >> 2));
 624
 625    for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
 626       si32_t c;
 627
 628       c = *is++;
 629       if(c == '='){
 630          if(is >= ie){
 631             goto jpushc; /* TODO According to RFC 2045, 6.7,
 632             * ++is; TODO we should warn the user, but have no context
 633             * goto jehead; TODO to do so; can't over and over */
 634          }else if((c = n_c_from_hex_base16(is)) >= 0){
 635             is += 2;
 636             goto jpushc;
 637          }else{
 638             /* Invalid according to RFC 2045, section 6.7 */
 639             /* TODO Follow RFC 2045, 6.7 advise and simply put through */
 640             c = '=';
 641             goto jpushc;
 642 /* TODO jehead:
 643  * TODO      if(n_psonce & n_PSO_UNICODE)
 644  *              n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
 645  * TODO       else{
 646  * TODO          c = '?';
 647  * TODO          goto jpushc;
 648  * TODO       }*/
 649          }
 650       }else{
 651 jpushc:
 652          if(c == '_' /* a_ME_US */)
 653             c = ' ';
 654          n_string_push_c(&s, (char)c);
 655       }
 656    }
 657
 658    out->s = n_string_cp(&s);
 659    out->l = s.s_len;
 660    n_string_gut(n_string_drop_ownership(&s));
 661 jleave:
 662    NYD_LEAVE;
 663    return (out != NULL);
 664 }
 665
 666 FL bool_t
 667 qp_decode_part(struct str *out, struct str const *in, struct str *outrest,
 668       struct str *inrest_or_null){
 669    struct n_string s, *sp;
 670    char const *is, *ie;
 671    NYD_ENTER;
 672
 673    if(outrest->l != 0){
 674       is = out->s;
 675       *out = *outrest;
 676       outrest->s = n_UNCONST(is);
 677       outrest->l = 0;
 678    }
 679
 680    /* n_ERR_OVERFLOW */
 681    if(UIZ_MAX -1 - out->l <= in->l ||
 682          SI32_MAX <= out->l + in->l) /* XXX wrong, we may replace */
 683       goto jerr;
 684
 685    sp = n_string_creat(&s);
 686    sp = n_string_take_ownership(sp, out->s,
 687          (out->l == 0 ? 0 : out->l +1), out->l);
 688    sp = n_string_reserve(sp, in->l + (in->l >> 2));
 689
 690    for(is = in->s, ie = &is[in->l - 1]; is <= ie;){
 691       si32_t c;
 692
 693       if((c = *is++) != '='){
 694 jpushc:
 695          n_string_push_c(sp, (char)c);
 696          continue;
 697       }
 698
 699       /* RFC 2045, 6.7:
 700        *   Therefore, when decoding a Quoted-Printable body, any
 701        *   trailing white space on a line must be deleted, as it will
 702        *   necessarily have been added by intermediate transport
 703        *   agents */
 704       for(; is <= ie && blankchar(*is); ++is)
 705          ;
 706       if(is >= ie){
 707          /* Soft line break? */
 708          if(*is == '\n')
 709             goto jsoftnl;
 710         goto jpushc; /* TODO According to RFC 2045, 6.7,
 711          * ++is; TODO we should warn the user, but have no context
 712          * goto jebody; TODO to do so; can't over and over */
 713       }
 714
 715       /* Not a soft line break? */
 716       if(*is != '\n'){
 717          if((c = n_c_from_hex_base16(is)) >= 0){
 718             is += 2;
 719             goto jpushc;
 720          }
 721          /* Invalid according to RFC 2045, section 6.7 */
 722          /* TODO Follow RFC 2045, 6.7 advise and simply put through */
 723          c = '=';
 724          goto jpushc;
 725 /* TODO jebody:
 726  * TODO   if(n_psonce & n_PSO_UNICODE)
 727  *           n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
 728  * TODO    else{
 729  * TODO       c = '?';
 730  * TODO       goto jpushc;
 731  * TODO    }*/
 732       }
 733
 734       /* CRLF line endings are encoded as QP, followed by a soft line break, so
 735        * check for this special case, and simply forget we have seen one, so as
 736        * not to end up with the entire DOS file in a contiguous buffer */
 737 jsoftnl:
 738       if(sp->s_len > 0 && sp->s_dat[sp->s_len - 1] == '\n'){
 739 #if 0       /* TODO qp_decode_part() we do not normalize CRLF
 740           * TODO to LF because for that we would need
 741           * TODO to know if we are about to write to
 742           * TODO the display or do save the file!
 743           * TODO 'hope the MIME/send layer rewrite will
 744           * TODO offer the possibility to DTRT */
 745          if(sp->s_len > 1 && sp->s_dat[sp->s_len - 2] == '\r')
 746             n_string_push_c(n_string_trunc(sp, sp->s_len - 2), '\n');
 747 #endif
 748          break;
 749       }
 750
 751       /* C99 */{
 752          char *cp;
 753          size_t l;
 754
 755          if((l = PTR2SIZE(ie - is)) > 0){
 756             if(inrest_or_null == NULL)
 757                goto jerr;
 758             n_str_assign_buf(inrest_or_null, is, l);
 759          }
 760          cp = outrest->s;
 761          outrest->s = n_string_cp(sp);
 762          outrest->l = s.s_len;
 763          n_string_drop_ownership(sp);
 764          if(cp != NULL)
 765             free(cp);
 766       }
 767       break;
 768    }
 769
 770    out->s = n_string_cp(sp);
 771    out->l = sp->s_len;
 772    n_string_gut(n_string_drop_ownership(sp));
 773 jleave:
 774    NYD_LEAVE;
 775    return (out != NULL);
 776 jerr:
 777    out->l = 0;
 778    out = NULL;
 779    goto jleave;
 780 }
 781
 782 FL size_t
 783 b64_encode_calc_size(size_t len){
 784    NYD2_ENTER;
 785    if(len >= UIZ_MAX / 4)
 786       len = UIZ_MAX;
 787    else{
 788       len = (len * 4) / 3;
 789       len += (((len / B64_ENCODE_INPUT_PER_LINE) + 1) * 3);
 790       len += 2 + 1; /* CRLF, \0 */
 791    }
 792    NYD2_LEAVE;
 793    return len;
 794 }
 795
 796 FL struct str *
 797 b64_encode(struct str *out, struct str const *in, enum b64flags flags){
 798    ui8_t const *p;
 799    size_t i, lnlen;
 800    char *b64;
 801    NYD_ENTER;
 802
 803    assert(!(flags & B64_NOPAD) ||
 804       !(flags & (B64_CRLF | B64_LF | B64_MULTILINE)));
 805
 806    p = (ui8_t const*)in->s;
 807
 808    if(!(flags & B64_BUF)){
 809       if((i = b64_encode_calc_size(in->l)) == UIZ_MAX){
 810          out = NULL;
 811          goto jleave;
 812       }
 813       out->s = (flags & B64_SALLOC) ? salloc(i) : srealloc(out->s, i);
 814    }
 815    b64 = out->s;
 816
 817    if(!(flags & (B64_CRLF | B64_LF)))
 818       flags &= ~B64_MULTILINE;
 819
 820    for(lnlen = 0, i = in->l; (ssize_t)i > 0; p += 3, i -= 3){
 821       ui32_t a, b, c;
 822
 823       a = p[0];
 824       b64[0] = a_me_b64_enctbl[a >> 2];
 825
 826       switch(i){
 827       case 1:
 828          b64[1] = a_me_b64_enctbl[((a & 0x3) << 4)];
 829          b64[2] =
 830          b64[3] = '=';
 831          break;
 832       case 2:
 833          b = p[1];
 834          b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 835          b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2)];
 836          b64[3] = '=';
 837          break;
 838       default:
 839          b = p[1];
 840          c = p[2];
 841          b64[1] = a_me_b64_enctbl[((a & 0x03) << 4) | ((b & 0xF0u) >> 4)];
 842          b64[2] = a_me_b64_enctbl[((b & 0x0F) << 2) | ((c & 0xC0u) >> 6)];
 843          b64[3] = a_me_b64_enctbl[c & 0x3F];
 844          break;
 845       }
 846
 847       b64 += 4;
 848       if(!(flags & B64_MULTILINE))
 849          continue;
 850       lnlen += 4;
 851       if(lnlen < B64_LINESIZE)
 852          continue;
 853
 854       lnlen = 0;
 855       if(flags & B64_CRLF)
 856          *b64++ = '\r';
 857       if(flags & (B64_CRLF | B64_LF))
 858          *b64++ = '\n';
 859    }
 860
 861    if((flags & (B64_CRLF | B64_LF)) &&
 862          (!(flags & B64_MULTILINE) || lnlen != 0)){
 863       if(flags & B64_CRLF)
 864          *b64++ = '\r';
 865       if(flags & (B64_CRLF | B64_LF))
 866          *b64++ = '\n';
 867    }else if(flags & B64_NOPAD)
 868       while(b64 != out->s && b64[-1] == '=')
 869          --b64;
 870
 871    out->l = PTR2SIZE(b64 - out->s);
 872    out->s[out->l] = '\0';
 873
 874    /* Base64 includes + and /, replace them with _ and -.
 875     * This is base64url according to RFC 4648, then.  Since we only support
 876     * that for encoding and it is only used for boundary strings, this is
 877     * yet a primitive implementation; xxx use tables; support decoding */
 878    if(flags & B64_RFC4648URL){
 879       char c;
 880
 881       for(b64 = out->s; (c = *b64) != '\0'; ++b64)
 882          if(c == '+')
 883             *b64 = '-';
 884          else if(c == '/')
 885                *b64 = '_';
 886    }
 887 jleave:
 888    NYD_LEAVE;
 889    return out;
 890 }
 891
 892 FL struct str *
 893 b64_encode_buf(struct str *out, void const *vp, size_t vp_len,
 894       enum b64flags flags){
 895    struct str in;
 896    NYD_ENTER;
 897
 898    in.s = n_UNCONST(vp);
 899    in.l = vp_len;
 900    out = b64_encode(out, &in, flags);
 901    NYD_LEAVE;
 902    return out;
 903 }
 904
 905 #ifdef notyet
 906 FL struct str *
 907 b64_encode_cp(struct str *out, char const *cp, enum b64flags flags){
 908    struct str in;
 909    NYD_ENTER;
 910
 911    in.s = n_UNCONST(cp);
 912    in.l = strlen(cp);
 913    out = b64_encode(out, &in, flags);
 914    NYD_LEAVE;
 915    return out;
 916 }
 917 #endif /* notyet */
 918
 919 FL bool_t
 920 b64_decode(struct str *out, struct str const *in){
 921    struct str work;
 922    size_t len;
 923    NYD_ENTER;
 924
 925    out->l = 0;
 926
 927    if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
 928       goto jerr;
 929
 930    /* Ignore an empty input, as may happen for an empty final line */
 931    if(work.l == 0)
 932       out->s = srealloc(out->s, 1);
 933    else if(work.l >= 4 && !(work.l & 3)){
 934       out->s = srealloc(out->s, len +1);
 935       if((ssize_t)(len = a_me_b64_decode(out, &work)) < 0)
 936          goto jerr;
 937    }else
 938       goto jerr;
 939    out->s[out->l] = '\0';
 940 jleave:
 941    NYD_LEAVE;
 942    return (out != NULL);
 943 jerr:
 944    out = NULL;
 945    goto jleave;
 946 }
 947
 948 FL bool_t
 949 b64_decode_header(struct str *out, struct str const *in){
 950    struct str outr, inr;
 951    NYD_ENTER;
 952
 953    if(!b64_decode(out, in)){
 954       memset(&outr, 0, sizeof outr);
 955       memset(&inr, 0, sizeof inr);
 956
 957       if(!b64_decode_part(out, in, &outr, &inr) || outr.l > 0 || inr.l > 0)
 958          out = NULL;
 959
 960       if(inr.s != NULL)
 961          free(inr.s);
 962       if(outr.s != NULL)
 963          free(outr.s);
 964    }
 965    NYD_LEAVE;
 966    return (out != NULL);
 967 }
 968
 969 FL bool_t
 970 b64_decode_part(struct str *out, struct str const *in, struct str *outrest,
 971       struct str *inrest_or_null){
 972    struct str work, save;
 973    ui32_t a, b, c, b64l;
 974    char ca, cb, cc, cx;
 975    struct n_string s, workbuf;
 976    size_t len;
 977    NYD_ENTER;
 978
 979    n_string_creat(&s);
 980    if((len = out->l) > 0 && out->s[len] == '\0')
 981       n_string_take_ownership(&s, out->s, len +1, len);
 982    else{
 983       if(len > 0)
 984          n_string_push_buf(&s, out->s, len);
 985       if(out->s != NULL)
 986          free(out->s);
 987    }
 988    out->s = NULL, out->l = 0;
 989    n_string_creat(&workbuf);
 990
 991    if((len = a_me_b64_decode_prepare(&work, in)) == UIZ_MAX)
 992       goto jerr;
 993
 994    if(outrest->l > 0){
 995       n_string_push_buf(&s, outrest->s, outrest->l);
 996       outrest->l = 0;
 997    }
 998
 999    /* n_ERR_OVERFLOW */
1000    if(UIZ_MAX - len <= s.s_len ||
1001          SI32_MAX <= len + s.s_len) /* XXX wrong, we may replace */
1002       goto jerr;
1003
1004    if(work.l == 0)
1005       goto jok;
1006
1007    /* This text decoder is extremely expensive, especially given that in all
1008     * but _invalid_ cases it is not even needed!  So try once to do the normal
1009     * decoding, if that fails, go the hard way */
1010    save = work;
1011    out->s = n_string_resize(&s, len + (out->l = b64l = s.s_len))->s_dat;
1012
1013    if(work.l >= 4 && a_me_b64_decode(out, &work) >= 0){
1014       n_string_trunc(&s, out->l);
1015       if(work.l == 0)
1016          goto jok;
1017    }
1018
1019    n_string_trunc(&s, b64l);
1020    work = save;
1021    out->s = NULL, out->l = 0;
1022
1023    n_UNINIT(ca, 0);
1024    n_UNINIT(cb, 0);
1025    n_UNINIT(cc, 0);
1026    for(b64l = 0;;){
1027       ui32_t x;
1028
1029       x = a_ME_B64_DECUI8((ui8_t)(cx = *work.s));
1030       switch(b64l){
1031       case 0:
1032          if(x >= a_ME_B64_EQU)
1033             goto jrepl;
1034          ca = cx;
1035          a = x;
1036          ++b64l;
1037          break;
1038       case 1:
1039          if(x >= a_ME_B64_EQU)
1040             goto jrepl;
1041          cb = cx;
1042          b = x;
1043          ++b64l;
1044          break;
1045       case 2:
1046          if(x == a_ME_B64_BAD)
1047             goto jrepl;
1048          cc = cx;
1049          c = x;
1050          ++b64l;
1051          break;
1052       case 3:
1053          if(x == a_ME_B64_BAD){
1054 jrepl:
1055             /* TODO This would be wrong since iconv(3) may be applied first! */
1056 #if 0
1057             if(n_psonce & n_PSO_UNICODE)
1058                n_string_push_buf(&s, n_unirepl, sizeof(n_unirepl) -1);
1059             else
1060                n_string_push_c(&s, '?');
1061 #endif
1062             ;
1063          }else if(c == a_ME_B64_EQU && x != a_ME_B64_EQU){
1064             /* This is not only invalid but bogus.  Skip it over! */
1065             /* TODO This would be wrong since iconv(3) may be applied first! */
1066 #if 0
1067             n_string_push_buf(&s, n_UNIREPL n_UNIREPL n_UNIREPL n_UNIREPL,
1068                (sizeof(n_UNIREPL) -1) * 4);
1069 #endif
1070             b64l = 0;
1071          }else{
1072             ui8_t pb;
1073
1074             pb = ((a << 2) | ((b & 0x30) >> 4));
1075             if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1076                n_string_push_c(&s, (char)pb);
1077             pb = (((b & 0x0F) << 4) | ((c & 0x3C) >> 2));
1078             if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1079                n_string_push_c(&s, (char)pb);
1080             if(x != a_ME_B64_EQU){
1081                pb = (((c & 0x03) << 6) | x);
1082                if(pb != (ui8_t)'\r' || !(n_pstate & n_PS_BASE64_STRIP_CR))
1083                   n_string_push_c(&s, (char)pb);
1084             }
1085             ++b64l;
1086          }
1087          break;
1088       }
1089
1090       ++work.s;
1091       if(--work.l == 0){
1092          if(b64l > 0 && b64l != 4){
1093             if(inrest_or_null == NULL)
1094                goto jerr;
1095             inrest_or_null->s = srealloc(inrest_or_null->s, b64l +1);
1096             inrest_or_null->s[0] = ca;
1097             if(b64l > 1)
1098                inrest_or_null->s[1] = cb;
1099             if(b64l > 2)
1100                inrest_or_null->s[2] = cc;
1101             inrest_or_null->s[inrest_or_null->l = b64l] = '\0';
1102          }
1103          goto jok;
1104       }
1105       if(b64l == 4)
1106          b64l = 0;
1107    }
1108
1109 jok:
1110    out->s = n_string_cp(&s);
1111    out->l = s.s_len;
1112    n_string_drop_ownership(&s);
1113 jleave:
1114    n_string_gut(&workbuf);
1115    n_string_gut(&s);
1116    NYD_LEAVE;
1117    return (out != NULL);
1118 jerr:
1119    out = NULL;
1120    goto jleave;
1121 }
1122
1123 /* s-it-mode */