From 0add96eaa98829973b89283cd4d4b164903335cc Mon Sep 17 00:00:00 2001 From: "Steffen (Daode) Nurpmeso" Date: Fri, 26 Sep 2014 17:05:53 +0200 Subject: [PATCH] mime.c:mime_write_tohdr(): complete rewrite (Peter Hofmann).. This function is long known to be very, very sick. It must be said that the S-nail version was even more broken than the original Heirloom mailx one, not only because it was possible to crash it for overlong input. (On the other hand Heirloom mailx will generate invalid message headers for such input.) This new version is still crappy because S-nail doesn't know about RFC 2231, doesn't honour RFC 2047 completely, because we still don't make a difference in between structured and unstructed headers etc., all of that is in the TODO file for almost two years now, and awaiting the big MIME- and send-layer rewrite that hopefully addresses all these problems for the better. A lot more TODO statements on what is wrong in the function itself. I first considered to at least address the issue with broken multibyte sequences by also rewriting all callers and by moving the iconv(3) into mime_write_tohdr(), where it should belong, so that we can actually generate self-contained encoded-words instead of possibly separating multibyte sequences because we are working on already iconv(3)d data (oh what a crap), but this would require more testing and overall the standards RFC 2047 and RFC 2231 are also crap. Erm. Well, no, eh, yes, but i cannot do it right now, it has always been broken and it will take another two years and then we hopefully have a really correct implementation. Shit. Sigh. I credit Peter Hofmann because it was him who peeked the shitty MIME handling of S-nail which resulted in [c299c45] (Fix QP encoding canary violation, 2014-09-22), and was it so strange to just change the input a bit. --- mime.c | 424 ++++++++++++++++++++++++++++++++++++++----------------------- mime_cte.c | 83 +++++++----- nail.h | 27 ++-- nailfuns.h | 8 +- 4 files changed, 343 insertions(+), 199 deletions(-) diff --git a/mime.c b/mime.c index f041d1d7..99547bc1 100644 --- a/mime.c +++ b/mime.c @@ -379,180 +379,286 @@ jleave: } static ssize_t -mime_write_tohdr(struct str *in, FILE *fo) /* TODO rewrite - FAST! */ +mime_write_tohdr(struct str *in, FILE *fo) { - struct str cin, cout; - char buf[B64_LINESIZE +1]; /* (No CR/LF used) */ - char const *charset7, *charset, *upper, *wbeg, *wend, *lastspc, - *lastwordend = NULL; - size_t col = 0, quoteany, wr, charsetlen, - maxcol = 65 /* there is the header field's name, too */; - ssize_t sz = 0; - bool_t highbit, mustquote, broken; + /* TODO mime_write_tohdr(): we don't know the name of our header->maxcol.. + * TODO MIME/send layer rewrite: more available state!! + * TODO Because of this we cannot make a difference in between structured + * TODO and unstructured headers (RFC 2047, 5. (2)) + * TODO NOT MULTIBYTE SAFE IF AN ENCODED WORD HAS TO BE SPLITTED! + * TODO To be better we had to mbtowc_l() (non-std! and no locale!!) and + * TODO work char-wise! -> S-CText.. + * TODO The real problem for STD compatibility is however that "in" is + * TODO already iconv(3) encoded to the target character set! We could + * TODO also solve it (very expensively!) if we would narrow down to an + * TODO encoded word and then iconv(3)+CTencode in one go, in which case + * TODO multibyte errors could be catched! + * TODO All this doesn't take any care about RFC 2231, but simply and + * TODO falsely applies RFC 2047 and normal RFC 822/5322 folding to values + * TODO of parameters; part of the problem is that we just don't make a + * TODO difference in structured and unstructed headers, as long in TODO! + * TODO See also RFC 2047, 5., .." These are the ONLY locations".. + * TODO So, for now we require mutt(1)s "rfc2047_parameters=yes" support!! + * TODO BTW.: the purpose of QP is to allow non MIME-aware ASCII guys to + * TODO read the field nonetheless... */ + enum { + /* Maximum line length *//* XXX we are too inflexible and could use + * XXX MIME_LINELEN unless an RFC 2047 encoding was actually used */ + _MAXCOL = MIME_LINELEN_RFC2047 + }; + enum { + _FIRST = 1<<0, /* Nothing written yet, start of string */ + _NO_QP = 1<<1, /* No quoted-printable allowed */ + _NO_B64 = 1<<2, /* Ditto, base64 */ + _ENC_LAST = 1<<3, /* Last round generated encoded word */ + _SHOULD_BEE = 1<<4, /* Avoid lines longer than SHOULD via encoding */ + _RND_SHIFT = 5, + _RND_MASK = (1<<_RND_SHIFT) - 1, + _SPACE = 1<<(_RND_SHIFT+1), /* Leading whitespace */ + _8BIT = 1<<(_RND_SHIFT+2), /* High bit set */ + _ENCODE = 1<<(_RND_SHIFT+3), /* Need encoding */ + _ENC_B64 = 1<<(_RND_SHIFT+4), /* - let it be base64 */ + _OVERLONG = 1<<(_RND_SHIFT+5) /* Temporarily rised limit */ + } flags = _FIRST; + + struct str cout, cin; + char const *cset7, *cset8, *wbot, *upper, *wend, *wcur; + ui32_t cset7_len, cset8_len; + size_t col, i, j; + ssize_t sz; NYD_ENTER; - charset7 = charset_get_7bit(); - charset = _CS_ITER_GET(); /* TODO MIME/send layer: iter active? iter! else */ - wr = strlen(charset7); - charsetlen = strlen(charset); - charsetlen = MAX(charsetlen, wr); - upper = in->s + in->l; - - /* xxx note this results in too much hits since =/? force quoting even - * xxx if they don't form =? etc. */ - quoteany = mime_cte_mustquote(in->s, in->l, TRU1); - - highbit = FAL0; - if (quoteany != 0) - for (wbeg = in->s; wbeg < upper; ++wbeg) - if ((ui8_t)*wbeg & 0x80) { - highbit = TRU1; - if (charset == NULL) { - sz = -1; - goto jleave; - } - break; - } + cout.s = NULL, cout.l = 0; + cset7 = charset_get_7bit(); + cset7_len = (ui32_t)strlen(cset7); + cset8 = _CS_ITER_GET(); /* TODO MIME/send layer: iter active? iter! else */ + cset8_len = (ui32_t)strlen(cset8); + + /* RFC 1468, "MIME Considerations": + * ISO-2022-JP may also be used in MIME Part 2 headers. The "B" + * encoding should be used with ISO-2022-JP text. */ + /* TODO of course, our current implementation won't deal properly with + * TODO any stateful encoding at all... (the standard says each encoded + * TODO word must include all necessary reset sequences..., i.e., each + * TODO encoded word must be a self-contained iconv(3) life cycle) */ + if (!asccasecmp(cset8, "iso-2022-jp")) + flags |= _NO_QP; + + wbot = in->s; + upper = wbot + in->l; + col = sizeof("Content-Transfer-Encoding: ") -1; /* dreadful thing */ + + for (sz = 0; wbot < upper; flags &= ~_FIRST, wbot = wend) { + flags &= _RND_MASK; + wcur = wbot; + while (wcur < upper && whitechar(*wcur)) { + flags |= _SPACE; + ++wcur; + } - /* Use base64 encoding if more than 25% of the line must be quoted, - * otherwise step over the data and encode quoted-printable as necessary */ - if (quoteany << 2 > in->l) { - for (wbeg = in->s; wbeg < upper; wbeg = wend) { - wend = upper; - cin.s = UNCONST(wbeg); - for (;;) { - cin.l = PTR2SIZE(wend - wbeg); - if (cin.l * 4/3 + 7 + charsetlen < maxcol - col) { - cout.s = buf; - cout.l = sizeof buf; - wr = fprintf(fo, "=?%s?B?%s?=", (highbit ? charset : charset7), - b64_encode(&cout, &cin, B64_BUF)->s); - sz += wr; - col += wr; - if (wend < upper) { - fwrite("\n ", sizeof(char), 2, fo); - sz += 2; - col = 0; - maxcol = 76; - } - break; - } else { - if (col) { - fprintf(fo, "\n "); - sz += 2; - col = 0; - maxcol = 76; - } else - wend -= 4; - } - } + /* Any occurrence of whitespace resets prevention of lines >SHOULD via + * enforced encoding (xxx SHOULD, but.. encoding is expensive!!) */ + if (flags & _SPACE) + flags &= ~_SHOULD_BEE; + + /* Data ends with WS - dump it and done. + * Also, if we have seen multiple successive whitespace characters, then + * if there was no encoded word last, i.e., if we can simply take them + * over to the output as-is, keep one WS for possible later separation + * purposes and simply print the others as-is, directly! */ + if (wcur == upper) { + wend = wcur; + goto jnoenc_putws; } - } else { - broken = FAL0; - for (wbeg = in->s; wbeg < upper; wbeg = wend) { - lastspc = NULL; - while (wbeg < upper && whitechar(*wbeg)) { - lastspc = lastspc ? lastspc : wbeg; - ++wbeg; - ++col; - broken = FAL0; - } - if (wbeg == upper) { - if (lastspc) - while (lastspc < wbeg) { - putc(*lastspc&0377, fo); - ++lastspc; - ++sz; - } + if ((flags & (_ENC_LAST | _SPACE)) == _SPACE && wcur - wbot > 1) { + wend = wcur - 1; + goto jnoenc_putws; + } + + /* Skip over a word to next non-whitespace, keep track along the way + * wether our 7-bit charset suffices to represent the data */ + for (wend = wcur; wend < upper; ++wend) { + if (whitechar(*wend)) break; - } + if ((uc_i)*wend & 0x80) + flags |= _8BIT; + } - if (lastspc != NULL) - broken = FAL0; - highbit = FAL0; - for (wend = wbeg; wend < upper && !whitechar(*wend); ++wend) - if ((ui8_t)*wend & 0x80) - highbit = TRU1; - mustquote = (mime_cte_mustquote(wbeg, PTR2SIZE(wend - wbeg), TRU1) - != 0); - - if (mustquote || broken || - (PTR2SIZE(wend - wbeg) >= 76-5 && quoteany)) { - for (cout.s = NULL;;) { - cin.s = UNCONST(lastwordend ? lastwordend : wbeg); - cin.l = PTR2SIZE(wend - cin.s); - qp_encode(&cout, &cin, QP_ISHEAD); - wr = cout.l + charsetlen + 7; -jqp_retest: - if (col <= maxcol && wr <= maxcol - col) { - if (lastspc) { - /* TODO because we included the WS in the encoded str, - * TODO put SP only?? - * TODO RFC: "any 'linear-white-space' that separates - * TODO a pair of adjacent 'encoded-word's is ignored" */ - putc(' ', fo); - ++sz; - ++col; - } - fprintf(fo, "=?%s?Q?%.*s?=", - (highbit ? charset : charset7), (int)cout.l, cout.s); - sz += wr; - col += wr; - break; - } else if (col > 1) { - /* TODO assuming SP separator, ignore *lastspc* !?? */ - broken = TRU1; - if (lastspc != NULL) { - putc('\n', fo); - ++sz; - col = 0; - } else { - fputs("\n ", fo); - sz += 2; - col = 1; - } - maxcol = 76; - goto jqp_retest; - } else { - for (;;) { /* XXX */ - wend -= 4; - assert(wend > wbeg); - if (wr - 4 < maxcol) - break; - wr -= 4; - } - } - } - if (cout.s != NULL) - free(cout.s); - lastwordend = wend; - } else { - if (col && PTR2SIZE(wend - wbeg) > maxcol - col) { + /* Decide wether the range has to become encoded or not */ + i = PTR2SIZE(wend - wcur); + j = mime_cte_mustquote(wcur, i, MIMECTE_ISHEAD); + /* If it just cannot fit on a SHOULD line length, force encode */ + if (i >= _MAXCOL) { + flags |= _SHOULD_BEE; /* (Sigh: SHOULD only, not MUST..) */ + goto j_beejump; + } + if ((flags & _SHOULD_BEE) || j > 0) { +j_beejump: + flags |= _ENCODE; + /* Use base64 if requested or more than 50% -37.5-% of the bytes of + * the string need to be encoded */ + if ((flags & _NO_QP) || j >= i >> 1)/*(i >> 2) + (i >> 3))*/ + flags |= _ENC_B64; + } + DBG( if (flags & _8BIT) assert(flags & _ENCODE); ) + + if (!(flags & _ENCODE)) { + /* Encoded word produced, but no linear whitespace for necessary RFC + * 2047 separation? Generate artificial data (bad standard!) */ + if ((flags & (_ENC_LAST | _SPACE)) == _ENC_LAST) { + if (col >= _MAXCOL) { putc('\n', fo); ++sz; col = 0; - maxcol = 76; - if (lastspc == NULL) { - putc(' ', fo); - ++sz; - --maxcol; - } else - maxcol -= PTR2SIZE(wbeg - lastspc); } - if (lastspc) - while (lastspc < wbeg) { - putc(*lastspc&0377, fo); - ++lastspc; - ++sz; + putc(' ', fo); + ++sz; + ++col; + } + +jnoenc_putws: + flags &= ~_ENC_LAST; + + /* todo No effort here: (1) v15.0 has to bring complete rewrite, + * todo (2) the standard is braindead and (3) usually this is one + * todo word only, and why be smarter than the standard? */ +jnoenc_retry: + i = PTR2SIZE(wend - wbot); + if (i + col <= (flags & _OVERLONG ? MIME_LINELEN_MAX : _MAXCOL)) { + i = fwrite(wbot, sizeof *wbot, i, fo); + sz += i; + col += i; + continue; + } + + /* Doesn't fit, try to break the line first; */ + if (col > 1) { + putc('\n', fo); + if (whitechar(*wbot)) { + putc((uc_i)*wbot, fo); + ++wbot; + } else + putc(' ', fo); /* Bad standard: artificial data! */ + sz += 2; + col = 1; + flags |= _OVERLONG; + goto jnoenc_retry; + } + + /* It is so long that it needs to be broken, effectively causing + * artificial spaces to be inserted (bad standard), yuck */ + /* todo This is not multibyte safe, as above; and completely stupid + * todo P.S.: our _SHOULD_BEE prevents these cases in the meanwhile */ + wcur = wbot + MIME_LINELEN_MAX - 8; + while (wend > wcur) + wend -= 4; + goto jnoenc_retry; + } else { + /* Encoding to encoded word(s); deal with leading whitespace, place + * a separator first as necessary: encoded words must always be + * separated from text and other encoded words with linear WS. + * And if an encoded word was last, intermediate whitespace must + * also be encoded, otherwise it would get stripped away! */ + wcur = UNCONST(""); + if ((flags & (_ENC_LAST | _SPACE)) != _SPACE) { + /* Reinclude whitespace */ + flags &= ~_SPACE; + /* We don't need to place a separator at the very beginning */ + if (!(flags & _FIRST)) + wcur = UNCONST(" "); + } else + wcur = wbot++; + + flags |= _ENC_LAST; + + /* RFC 2047: + * An 'encoded-word' may not be more than 75 characters long, + * including 'charset', 'encoding', 'encoded-text', and + * delimiters. If it is desirable to encode more text than will + * fit in an 'encoded-word' of 75 characters, multiple + * 'encoded-word's (separated by CRLF SPACE) may be used. + * + * While there is no limit to the length of a multiple-line + * header field, each line of a header field that contains one + * or more 'encoded-word's is limited to 76 characters */ +jenc_retry: + cin.s = UNCONST(wbot); + cin.l = PTR2SIZE(wend - wbot); + + if (flags & _ENC_B64) + j = b64_encode(&cout, &cin, B64_ISHEAD | B64_ISENCWORD)->l; + else + j = qp_encode(&cout, &cin, QP_ISHEAD | QP_ISENCWORD)->l; + /* (Avoid trigraphs in the RFC 2047 placeholder..) */ + i = j + (flags & _8BIT ? cset8_len : cset7_len) + sizeof("=!!B!!=") -1; + if (*wcur != '\0') + ++i; + +jenc_retry_same: + /* Unfortunately RFC 2047 explicitly disallows encoded words to be + * longer (just like RFC 5322's "a line SHOULD fit in 78 but MAY be + * 998 characters long"), so we cannot use the _OVERLONG mechanism, + * even though all tested mailers seem to support it */ + if (i + col <= (/*flags & _OVERLONG ? MIME_LINELEN_MAX :*/ _MAXCOL)) { + fprintf(fo, "%.1s=?%s?%c?%.*s?=", + wcur, (flags & _8BIT ? cset8 : cset7), + (flags & _ENC_B64 ? 'B' : 'Q'), + (int)cout.l, cout.s); + sz += i; + col += i; + continue; + } + + /* Doesn't fit, try to break the line first */ + /* TODO I've commented out the _FIRST test since we (1) cannot do + * TODO _OVERLONG since (MUAs support but) the standard disallows, + * TODO and because of our iconv problem i prefer an empty first line + * TODO in favour of a possibly messed up multibytes character. :-( */ + if (col > 1 /* TODO && !(flags & _FIRST)*/) { + putc('\n', fo); + sz += 2; + col = 1; + if (!(flags & _SPACE)) { + putc(' ', fo); + wcur = UNCONST(""); + /*flags |= _OVERLONG;*/ + goto jenc_retry_same; + } else { + putc((uc_i)*wcur, fo); + if (whitechar(*(wcur = wbot))) + ++wbot; + else { + flags &= ~_SPACE; + wcur = UNCONST(""); } - wr = fwrite(wbeg, sizeof *wbeg, PTR2SIZE(wend - wbeg), fo); - sz += wr; - col += wr; - lastwordend = NULL; + /*flags &= ~_OVERLONG;*/ + goto jenc_retry; + } } + + /* It is so long that it needs to be broken, effectively causing + * artificial data to be inserted (bad standard), yuck */ + /* todo This is not multibyte safe, as above */ + /*if (!(flags & _OVERLONG)) { + flags |= _OVERLONG; + goto jenc_retry; + }*/ + i = PTR2SIZE(wend - wbot) + !!(flags & _SPACE); + j = 3 + !(flags & _ENC_B64); + for (;;) { + wend -= j; + i -= j; + /* (Note the problem most likely is the transfer-encoding blow, + * which is why we test this *after* the decrements.. */ + if (i <= _MAXCOL) + break; + } + goto jenc_retry; } } -jleave: + + if (cout.s != NULL) + free(cout.s); NYD_LEAVE; return sz; } diff --git a/mime_cte.c b/mime_cte.c index 7e72b972..8284162c 100644 --- a/mime_cte.c +++ b/mime_cte.c @@ -1,5 +1,5 @@ /*@ S-nail - a mail user agent derived from Berkeley Mail. - *@ Content-Transfer-Encodings as defined in RFC 2045: + *@ Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047): *@ - Quoted-Printable, section 6.7 *@ - Base64, section 6.8 * @@ -47,10 +47,11 @@ enum _qact { SP = 2, /* sp */ XF = 3, /* Special character 'F' - maybe quoted */ XD = 4, /* Special character '.' - maybe quoted */ - US = '_', /* In header, special character ' ' quoted as '_' */ + UU = 5, /* In header, _ must be quoted in encoded word */ + US = '_', /* In header, ' ' must be quoted as _ in encoded word */ QM = '?', /* In header, special character ? not always quoted */ - EQ = Q, /* '=' must be quoted */ - TB = SP, /* Treat '\t' as a space */ + EQ = '=', /* In header, '=' must be quoted in encoded word */ + HT ='\t', /* In body HT=SP, in head HT=HT, but quote in encoded word */ NL = N, /* Don't quote '\n' (NL) */ CR = Q /* Always quote a '\r' (CR) */ }; @@ -60,10 +61,10 @@ enum _qact { * - also quote SP (as the underscore _), TAB, ?, _, CR, LF * - don't care about the special ^F[rom] and ^.$ */ static ui8_t const _qtab_body[] = { - Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q, + Q, Q, Q, Q, Q, Q, Q, Q, Q,SP,NL, Q, Q,CR, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N, - N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N, + N, N, N, N, N, N, N, N, N, N, N, N, N, Q, N, N, N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, @@ -71,43 +72,44 @@ static ui8_t const _qtab_body[] = { N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, }, _qtab_head[] = { - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, + Q, Q, Q, Q, Q, Q, Q, Q, Q,HT, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, US, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N,QM, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, + N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,UU, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, }; -/* Check wether **s* must be quoted according to *ishead*, else body rules; - * *sol* indicates wether we are at the first character of a line/field */ +/* Check wether *s must be quoted according to flags, else body rules; + * sol indicates wether we are at the first character of a line/field */ SINLINE enum _qact _mustquote(char const *s, char const *e, bool_t sol, - bool_t ishead); + enum mimecte_flags flags); /* Convert c to/from a hexadecimal character string */ SINLINE char * _qp_ctohex(char *store, char c); SINLINE si32_t _qp_cfromhex(char const *hex); -/* Trim WS and make *work* point to the decodable range of *in*. +/* Trim WS and make work point to the decodable range of in* * Return the amount of bytes a b64_decode operation on that buffer requires */ static size_t _b64_decode_prepare(struct str *work, struct str const *in); -/* Perform b64_decode on sufficiently spaced & multiple-of-4 base *in*put. - * Return number of useful bytes in *out* or -1 on error */ +/* Perform b64_decode on sufficiently spaced & multiple-of-4 base in(put). + * Return number of useful bytes in out or -1 on error */ static ssize_t _b64_decode(struct str *out, struct str *in); SINLINE enum _qact -_mustquote(char const *s, char const *e, bool_t sol, bool_t ishead) +_mustquote(char const *s, char const *e, bool_t sol, enum mimecte_flags flags) { ui8_t const *qtab; enum _qact a, r; NYD2_ENTER; - qtab = ishead ? _qtab_head : _qtab_body; + qtab = (flags & (MIMECTE_ISHEAD | MIMECTE_ISENCWORD)) + ? _qtab_head : _qtab_body; a = ((ui8_t)*s > 0x7F) ? Q : qtab[(ui8_t)*s]; if ((r = a) == N || (r = a) == Q) @@ -115,12 +117,22 @@ _mustquote(char const *s, char const *e, bool_t sol, bool_t ishead) r = Q; /* Special header fields */ - if (ishead) { - /* ' ' -> '_' */ - if (a == US) { - r = US; - goto jleave; + if (flags & (MIMECTE_ISHEAD | MIMECTE_ISENCWORD)) { + /* Special massage for encoded words */ + if (flags & MIMECTE_ISENCWORD) { + switch (a) { + case HT: + case US: + case EQ: + r = a; + /* FALLTHRU */ + case UU: + goto jleave; + default: + break; + } } + /* Treat '?' only special if part of '=?' .. '?=' (still too much quoting * since it's '=?CHARSET?CTE?stuff?=', and especially the trailing ?= * should be hard too match */ @@ -315,16 +327,26 @@ mime_hexseq_to_char(char const *hex) } FL size_t -mime_cte_mustquote(char const *ln, size_t lnlen, bool_t ishead) +mime_cte_mustquote(char const *ln, size_t lnlen, enum mimecte_flags flags) { - size_t ret; + size_t rv; bool_t sol; NYD_ENTER; - for (ret = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen) - ret += (_mustquote(ln, ln + lnlen, sol, ishead) != N); + for (rv = 0, sol = TRU1; lnlen > 0; sol = FAL0, ++ln, --lnlen) + switch (_mustquote(ln, ln + lnlen, sol, flags)) { + case US: + case EQ: + case HT: + assert(flags & MIMECTE_ISENCWORD); + /* FALLTHRU */ + case N: + continue; + default: + ++rv; + } NYD_LEAVE; - return ret; + return rv; } FL size_t @@ -405,8 +427,11 @@ qp_encode(struct str *out, struct str const *in, enum qpflags flags) /* QP_ISHEAD? */ if (!sol) { + enum mimecte_flags ctef = MIMECTE_ISHEAD | + (flags & QP_ISENCWORD ? MIMECTE_ISENCWORD : 0); + for (seenx = FAL0, sol = TRU1; is < ie; sol = FAL0, ++qp) { - enum _qact mq = _mustquote(is, ie, sol, TRU1); + enum _qact mq = _mustquote(is, ie, sol, ctef); char c = *is++; if (mq == N) { @@ -429,7 +454,7 @@ jheadq: /* The body needs to take care for soft line breaks etc. */ for (lnlen = 0, seenx = FAL0; is < ie; sol = FAL0) { - enum _qact mq = _mustquote(is, ie, sol, FAL0); + enum _qact mq = _mustquote(is, ie, sol, MIMECTE_NONE); char c = *is++; if (mq == N && (c != '\n' || !seenx)) { @@ -526,7 +551,7 @@ jehead: */ *oc++ = '?'; } } else - *oc++ = (c == '_') ? ' ' : (char)c; + *oc++ = (c == '_' /* US */) ? ' ' : (char)c; } goto jleave; /* XXX QP decode, header: errors not reported */ } diff --git a/nail.h b/nail.h index 0f34e9b3..ba8a13ea 100644 --- a/nail.h +++ b/nail.h @@ -194,11 +194,11 @@ #define MIME_LINELEN_LIMIT (MIME_LINELEN_MAX - 48) /* Ditto, SHOULD */ -#define MIME_LINELEN_NORM 78 /* Plus CRLF */ +#define MIME_LINELEN 78 /* Plus CRLF */ /* And in headers which contain an encoded word according to RFC 2047 there is - * yet another limit */ -#define MIME_LINELEN_HEADER_WITH_ENCODED_WORD_MAX 76 + * yet another limit; also RFC 2045: 6.7, (5). */ +#define MIME_LINELEN_RFC2047 76 /* Locations of mime.types(5) */ #define MIME_TYPES_USR "~/.mime.types" @@ -759,7 +759,7 @@ enum mimecontent { #define B64_ENCODE_INPUT_PER_LINE 57 /* Max. input for Base64 encode/line */ /* xxx QP came later, maybe rewrite all to use mimecte_flags directly? */ -enum __mimecte_flags { +enum mimecte_flags { MIMECTE_NONE, MIMECTE_SALLOC = 1<<0, /* Use salloc(), not srealloc().. */ /* ..result .s,.l point to user buffer of *_LINESIZE+[+[+]] bytes instead */ @@ -769,15 +769,23 @@ enum __mimecte_flags { /* (encode) If one of _CRLF/_LF is set, honour *_LINESIZE+[+[+]] and * inject the desired line-ending whenever a linewrap is desired */ MIMECTE_MULTILINE = 1<<4, - /* (encode) Quote with header rules, do not generate soft NL breaks? */ - MIMECTE_ISHEAD = 1<<5 + /* (encode) Quote with header rules, do not generate soft NL breaks? + * For mustquote(), specifies wether special RFC 2047 header rules + * should be used instead */ + MIMECTE_ISHEAD = 1<<5, + /* (encode) Ditto; for mustquote() this furtherly fine-tunes behaviour in + * that characters which would not be reported as "must-quote" when + * detecting wether quoting is necessary at all will be reported as + * "must-quote" if they have to be encoded in an encoded word */ + MIMECTE_ISENCWORD = 1<<6 }; enum qpflags { QP_NONE = MIMECTE_NONE, QP_SALLOC = MIMECTE_SALLOC, QP_BUF = MIMECTE_BUF, - QP_ISHEAD = MIMECTE_ISHEAD + QP_ISHEAD = MIMECTE_ISHEAD, + QP_ISENCWORD = MIMECTE_ISENCWORD }; enum b64flags { @@ -786,7 +794,10 @@ enum b64flags { B64_BUF = MIMECTE_BUF, B64_CRLF = MIMECTE_CRLF, B64_LF = MIMECTE_LF, - B64_MULTILINE = MIMECTE_MULTILINE + B64_MULTILINE = MIMECTE_MULTILINE, + /* Not used, but for clarity only */ + B64_ISHEAD = MIMECTE_ISHEAD, + B64_ISENCWORD = MIMECTE_ISENCWORD }; enum oflags { diff --git a/nailfuns.h b/nailfuns.h index a4792068..391ce007 100644 --- a/nailfuns.h +++ b/nailfuns.h @@ -1224,7 +1224,7 @@ FL ssize_t xmime_write(char const *ptr, size_t size, /* TODO LEGACY */ /* * mime_cte.c - * Content-Transfer-Encodings as defined in RFC 2045: + * Content-Transfer-Encodings as defined in RFC 2045 (and RFC 2047): * - Quoted-Printable, section 6.7 * - Base64, section 6.8 */ @@ -1236,8 +1236,10 @@ FL ssize_t xmime_write(char const *ptr, size_t size, /* TODO LEGACY */ FL char * mime_char_to_hexseq(char store[3], char c); FL si32_t mime_hexseq_to_char(char const *hex); -/* How many characters of (the complete body) ln need to be quoted */ -FL size_t mime_cte_mustquote(char const *ln, size_t lnlen, bool_t ishead); +/* How many characters of (the complete body) ln need to be quoted. + * Only MIMECTE_ISHEAD and MIMECTE_ISENCWORD are understood */ +FL size_t mime_cte_mustquote(char const *ln, size_t lnlen, + enum mimecte_flags flags); /* How much space is necessary to encode len bytes in QP, worst case. * Includes room for terminator */ -- 2.11.4.GIT