make-config.in: complete path (leftover of [807f64e2], 2015-12-26!)
[s-mailx.git] / mime-param.c
blob5e85509c7f9cc83e72caf0ba2b09e50173a791f2
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME parameter handling.
4 * Copyright (c) 2016 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
5 * SPDX-License-Identifier: ISC
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #undef n_FILE
20 #define n_FILE mime_param
22 #ifndef HAVE_AMALGAMATION
23 # include "nail.h"
24 #endif
26 struct rfc2231_joiner {
27 struct rfc2231_joiner *rj_next;
28 ui32_t rj_no; /* Continuation number */
29 ui32_t rj_len; /* of useful data in .rj_dat */
30 ui32_t rj_val_off; /* Start of value data therein */
31 ui32_t rj_cs_len; /* Length of charset part */
32 bool_t rj_is_enc; /* Is percent encoded */
33 ui8_t __pad[7];
34 char const *rj_dat;
37 struct mime_param_builder {
38 struct mime_param_builder *mpb_next;
39 struct str *mpb_result;
40 ui32_t mpb_level; /* of recursion (<-> continuation number) */
41 ui32_t mpb_name_len; /* of the parameter .mpb_name */
42 ui32_t mpb_value_len; /* of remaining value */
43 ui32_t mpb_charset_len; /* of .mpb_charset (only in outermost level) */
44 ui32_t mpb_buf_len; /* Usable result of this level in .mpb_buf */
45 bool_t mpb_is_enc; /* Level requires encoding */
46 ui8_t __dummy[1];
47 bool_t mpb_is_utf8; /* Encoding is UTF-8 */
48 si8_t mpb_rv;
49 char const *mpb_name;
50 char const *mpb_value; /* Remains of, once the level was entered */
51 char const *mpb_charset; /* *ttycharset* */
52 char *mpb_buf; /* Pointer to on-stack buffer */
55 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
56 static bool_t const _rfc2231_etab[] = {
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* NUL..SI */
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* DLE..US */
59 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, /* CAN.. / */
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 0.. ? */
62 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @.. O */
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, /* P.. _ */
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* `.. o */
65 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p..DEL */
68 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
69 * entire construct, return pointer to the first byte thereafter or to NUL */
70 static char const * _mime_param_skip(char const *hbp);
72 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
73 * On successful return (1,-1; -1 is returned if the value was quoted via
74 * double quotation marks) a set end_or_null points to after the value and any
75 * possible separator and result->s is the autorec_alloc()d normalized value */
76 static si8_t _mime_param_value_trim(struct str *result, char const *start,
77 char const **end_or_null);
79 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
80 * extended syntax: perform full RFC 2231 parsing starting at this point.
81 * Note that _join() returns is-error */
82 static char * _rfc2231_param_parse(char const *param, size_t plen,
83 char const *hbp);
84 static bool_t __rfc2231_join(struct rfc2231_joiner *head, char **result,
85 char const **emsg);
87 /* Recursive parameter builder. Note we have a magic limit of 999 levels.
88 * Prepares a portion of output in self->mpb_buf;
89 * once >mpb_value is worked completely the deepmost level joins the result
90 * into >mpb_result and unrolls the stack. */
91 static void _mime_param_create(struct mime_param_builder *self);
92 static void __mime_param_join(struct mime_param_builder *head);
94 static char const *
95 _mime_param_skip(char const *hbp)
97 char co, cn;
98 NYD2_ENTER;
100 /* Skip over parameter name - note we may have skipped over an entire
101 * parameter name and thus point to a "="; i haven't yet truly checked
102 * against MIME RFCs, just test for ";" in the meanwhile XXX */
103 while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
104 ++hbp;
105 if (cn == '\0')
106 goto jleave;
107 ++hbp;
108 if (cn == ';')
109 goto jleave;
111 while (whitechar((cn = *hbp))) /* XXX */
112 ++hbp;
113 if (cn == '\0')
114 goto jleave;
116 if (cn == '"') {
117 co = '\0';
118 while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
119 co = (co == '\\') ? '\0' : cn;
120 if (cn != '\0' && (cn = *++hbp) == ';')
121 ++hbp;
122 } else {
123 for (;; cn = *++hbp)
124 if (cn == '\0' || cn == ';' || whitechar(cn))
125 break;
126 if (cn != '\0')
127 ++hbp;
129 jleave:
130 NYD2_LEAVE;
131 return hbp;
134 static si8_t
135 _mime_param_value_trim(struct str *result, char const *start,
136 char const **end_or_null)
138 char const *e;
139 char co, cn;
140 size_t i;
141 si8_t rv;
142 NYD2_ENTER;
144 while (whitechar(*start)) /* XXX? */
145 ++start;
147 if (*start == '"') {
148 for (co = '\0', e = ++start;; ++e)
149 if ((cn = *e) == '\0')
150 goto jerr;
151 else if (cn == '"' && co != '\\')
152 break;
153 else if (cn == '\\' && co == '\\')
154 co = '\0';
155 else
156 co = cn;
157 i = PTR2SIZE(e++ - start);
158 rv = -TRU1;
159 } else {
160 for (e = start; (cn = *e) != '\0' && !whitechar(cn) && cn != ';'; ++e)
162 i = PTR2SIZE(e - start);
163 rv = TRU1;
166 result->s = n_autorec_alloc(i +1);
167 if (rv > 0) {
168 memcpy(result->s, start, result->l = i);
169 result->s[i] = '\0';
170 } else {
171 size_t j;
172 char *cp;
174 for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
175 cn = *start++;
176 if (cn != '\\' || co == '\\') {
177 cp[j++] = cn;
178 if (cn == '\\')
179 cn = '\0';
182 cp[j] = '\0';
184 result->s = cp;
185 result->l = j;
188 if (end_or_null != NULL) {
189 while (*e != '\0' && *e == ';')
190 ++e;
191 *end_or_null = e;
193 jleave:
194 NYD2_LEAVE;
195 return rv;
196 jerr:
197 rv = FAL0;
198 goto jleave;
201 static char *
202 _rfc2231_param_parse(char const *param, size_t plen, char const *hbp)
204 /* TODO Do it for real and unite with mime_param_get() */
205 struct str xval;
206 char nobuf[32], *eptr, *rv = NULL, c;
207 char const *hbp_base, *cp, *emsg = NULL;
208 struct rfc2231_joiner *head = NULL, *np;
209 bool_t errors = FAL0;
210 size_t i;
211 NYD2_ENTER;
213 /* We were called by mime_param_get() after a param name match that
214 * involved "*", so jump to the matching code */
215 hbp_base = hbp;
216 goto jumpin;
218 for (; *hbp != '\0'; hbp_base = hbp) {
219 while (whitechar(*hbp))
220 ++hbp;
222 if (!ascncasecmp(hbp, param, plen)) {
223 hbp += plen;
224 while (whitechar(*hbp))
225 ++hbp;
226 if (*hbp++ != '*')
227 goto jerr;
229 /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
230 * parameter continuation and the lone asterisk "*" percent encoded
231 * values -- if encoding is used the "*0" or lone parameter value
232 * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
233 * where both of CHARSET and LANGUAGE are optional (we do effectively
234 * generate error if CHARSET is missing though).
235 * Continuations may not use that "C'L'" construct, but be tolerant
236 * and ignore those. Also encoded and non-encoded continuations may
237 * occur, i.e., perform percent en-/decoding only as necessary.
238 * Continuations may occur in any order */
239 /* xxx RFC 2231 parsing ignores language tags */
240 jumpin:
241 for (cp = hbp; digitchar(*cp); ++cp)
243 i = PTR2SIZE(cp - hbp);
244 if (i != 0) {
245 if (i >= sizeof(nobuf)) {
246 emsg = N_("too many digits to form a valid number");
247 goto jerr;
248 } else if ((c = *cp) != '=' && c != '*') {
249 emsg = N_("expected = or * after leading digits");
250 goto jerr;
252 memcpy(nobuf, hbp, i);
253 nobuf[i] = '\0';
254 if((n_idec_uiz_cp(&i, nobuf, 10, NULL
255 ) & (n_IDEC_STATE_EMASK | n_IDEC_STATE_CONSUMED)
256 ) != n_IDEC_STATE_CONSUMED || i >= 999){
257 emsg = N_("invalid continuation sequence number");
258 goto jerr;
260 hbp = ++cp;
262 /* Value encoded? */
263 if (c == '*') {
264 if (*hbp++ != '=')
265 goto jeeqaaster;
266 } else if (c != '=') {
267 jeeqaaster:
268 emsg = N_("expected = after asterisk *");
269 goto jerr;
271 } else {
272 /* In continuation mode that is an error, however */
273 if (head != NULL) {
274 emsg = N_("missing continuation sequence number");
275 goto jerr;
277 /* Parameter value is encoded, may define encoding */
278 c = '*';
279 if (*cp != '=')
280 goto jeeqaaster;
281 hbp = ++cp;
282 i = 0;
285 /* Create new node and insert it sorted; should be faster than
286 * creating an unsorted list and sorting it after parsing */
287 np = n_alloc(sizeof *np);
288 np->rj_next = NULL;
289 np->rj_no = (ui32_t)i;
290 np->rj_is_enc = (c == '*');
291 np->rj_val_off = np->rj_cs_len = 0;
293 if (head == NULL)
294 head = np;
295 else if (i < head->rj_no) {
296 np->rj_next = head;
297 head = np;
298 } else {
299 struct rfc2231_joiner *l = NULL, *x = head;
301 while (x != NULL && i > x->rj_no)
302 l = x, x = x->rj_next;
303 if (x != NULL)
304 np->rj_next = x;
305 assert(l != NULL);
306 l->rj_next = np;
309 switch (_mime_param_value_trim(&xval, hbp, &cp)) {
310 default:
311 emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
312 : N_("faulty value - missing closing quotation mark \"?");
313 goto jerr;
314 case -1:
315 /* XXX if (np->is_enc && memchr(np->dat, '\'', i) != NULL) {
316 * XXX emsg = N_("character set info not allowed here");
317 * XXX goto jerr;
318 * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
319 /* FALLTHRU */
320 case 1:
321 if (xval.l >= UI32_MAX) {
322 emsg = N_("parameter value too long");
323 goto jerr;
325 np->rj_len = (ui32_t)xval.l;
326 np->rj_dat = xval.s;
327 break;
330 /* Watch out for character set and language info */
331 if (np->rj_is_enc && (eptr = memchr(xval.s, '\'', xval.l)) != NULL) {
332 np->rj_cs_len = PTR2SIZE(eptr - xval.s);
333 if ((eptr = memchr(eptr + 1, '\'', xval.l - np->rj_cs_len - 1))
334 == NULL) {
335 emsg = N_("faulty RFC 2231 parameter extension");
336 goto jerr;
338 np->rj_val_off = PTR2SIZE(++eptr - xval.s);
341 hbp = cp;
342 } else
343 hbp = _mime_param_skip(hbp);
345 assert(head != NULL); /* (always true due to jumpin:, but..) */
347 errors |= __rfc2231_join(head, &rv, &emsg);
348 if (errors && (n_poption & n_PO_D_V)) {
349 /* TODO should set global flags so that at the end of an operation
350 * TODO (for a message) a summary can be printed: faulty MIME, xy */
351 if (emsg == NULL)
352 emsg = N_("multiple causes");
353 n_err(_("Message had MIME errors: %s\n"), V_(emsg));
355 jleave:
356 NYD2_LEAVE;
357 return rv;
359 jerr:
360 while ((np = head) != NULL) {
361 head = np->rj_next;
362 n_free(np);
364 if (n_poption & n_PO_D_V) {
365 if (emsg == NULL)
366 emsg = N_("expected asterisk *");
367 n_err(_("Faulty RFC 2231 MIME parameter value: %s: %s\n"
368 "Near: %s\n"), param, V_(emsg), hbp_base);
370 rv = NULL;
371 goto jleave;
374 static bool_t
375 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
377 struct str sin, sou;
378 struct rfc2231_joiner *np;
379 char const *cp;
380 size_t i;
381 enum {
382 _NONE = 0,
383 _HAVE_ENC = 1<<0,
384 _HAVE_ICONV = 1<<1,
385 _SEEN_ANY = 1<<2,
386 _ERRORS = 1<<3
387 } f = _NONE;
388 ui32_t no;
389 #ifdef HAVE_ICONV
390 iconv_t fhicd;
391 #endif
392 NYD2_ENTER;
394 #ifdef HAVE_ICONV
395 n_UNINIT(fhicd, (iconv_t)-1);
397 if (head->rj_is_enc) {
398 char const *tcs;
400 f |= _HAVE_ENC;
401 if (head->rj_cs_len == 0) {
402 /* It is an error if the character set is not set, the language alone
403 * cannot convert characters, let aside that we don't use it at all */
404 *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
405 f |= _ERRORS;
406 } else if (ascncasecmp(tcs = ok_vlook(ttycharset),
407 head->rj_dat, head->rj_cs_len)) {
408 char *cs = n_lofi_alloc(head->rj_cs_len +1);
410 memcpy(cs, head->rj_dat, head->rj_cs_len);
411 cs[head->rj_cs_len] = '\0';
412 if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
413 f |= _HAVE_ICONV;
414 else {
415 *emsg = N_("necessary character set conversion missing");
416 f |= _ERRORS;
418 n_lofi_free(cs);
421 #endif
423 if (head->rj_no != 0) {
424 if (!(f & _ERRORS))
425 *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
426 f |= _ERRORS;
429 for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; n_free(np)) {
430 head = np->rj_next;
432 if (np->rj_no != no++) {
433 if (!(f & _ERRORS))
434 *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
435 f |= _ERRORS;
438 /* RFC 2231 allows such info only in the first continuation, and
439 * furthermore MUSTs the first to be encoded, then */
440 if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
441 (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
442 if (!(f & _ERRORS))
443 *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
444 f |= _ERRORS;
446 f |= _SEEN_ANY;
448 i = np->rj_len - np->rj_val_off;
449 if (!np->rj_is_enc)
450 n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
451 else {
452 /* Perform percent decoding */
453 sin.s = n_alloc(i +1);
454 sin.l = 0;
456 for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
457 char c;
459 if ((c = *cp++) == '%') {
460 si32_t cc;
462 if (i < 3 || (cc = n_c_from_hex_base16(cp)) < 0) {
463 if (!(f & _ERRORS))
464 *emsg = N_("invalid RFC 2231 percent encoded sequence");
465 f |= _ERRORS;
466 goto jhex_putc;
468 sin.s[sin.l++] = (char)cc;
469 cp += 2;
470 i -= 3;
471 } else {
472 jhex_putc:
473 sin.s[sin.l++] = c;
474 --i;
477 sin.s[sin.l] = '\0';
479 n_str_add_buf(&sou, sin.s, sin.l);
480 n_free(sin.s);
484 /* And add character set conversion on top as necessary.
485 * RFC 2231 is pragmatic: encode only mentions percent encoding and the
486 * character set for the entire string ("[no] facility for using more
487 * than one character set or language"), therefore "continuations may
488 * contain a mixture of encoded and unencoded segments" applies to
489 * a contiguous string of a single character set that has been torn in
490 * pieces due to space restrictions, and it happened that some pieces
491 * didn't need to be percent encoded.
493 * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
494 * that encoded-words-are-atomic, meaning that a single character-set
495 * conversion run over the final, joined, partially percent-decoded value
496 * should be sufficient */
497 #ifdef HAVE_ICONV
498 if (f & _HAVE_ICONV) {
499 sin.s = NULL;
500 sin.l = 0;
501 if (n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &sin, &sou, NULL) != 0) {
502 if (!(f & _ERRORS)) /* XXX won't be reported with _UNIDFEFAULT */
503 *emsg = N_("character set conversion failed on value");
504 f |= _ERRORS;
506 n_free(sou.s);
507 sou = sin;
509 n_iconv_close(fhicd);
511 #endif
513 memcpy(*result = n_autorec_alloc(sou.l +1), sou.s, sou.l +1);
514 n_free(sou.s);
515 NYD2_LEAVE;
516 return ((f & _ERRORS) != 0);
519 static void
520 _mime_param_create(struct mime_param_builder *self)
522 struct mime_param_builder next;
523 /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
524 * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
525 * On the other hand we may excess _LINELEN to avoid breaking up possible
526 * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
527 * support stateful encodings and (b) will try to synchronize on UTF-8 this
528 * problem is scarce, possibly even artificial */
529 char buf[n_MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
530 *bp, *bp_max, *bp_xmax, *bp_lanoenc;
531 char const *vb, *vb_lanoenc;
532 size_t vl;
533 enum {
534 _NONE = 0,
535 _ISENC = 1<<0,
536 _HADRAW = 1<<1,
537 _RAW = 1<<2
538 } f = _NONE;
539 NYD2_ENTER;
540 n_LCTA(sizeof(buf) >= MIME_LINELEN * 2, "Buffer to small for operation");
542 jneed_enc:
543 self->mpb_buf = bp = bp_lanoenc = buf;
544 self->mpb_buf_len = 0;
545 self->mpb_is_enc = ((f & _ISENC) != 0);
546 vb_lanoenc = vb = self->mpb_value;
547 vl = self->mpb_value_len;
549 /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
550 bp_max = (buf + MIME_LINELEN) -
551 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
552 bp_xmax = (buf + sizeof(buf)) -
553 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
554 if ((f & _ISENC) && self->mpb_level == 0) {
555 bp_max -= self->mpb_charset_len;
556 bp_xmax -= self->mpb_charset_len;
558 if (PTRCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
559 DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
560 bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
562 assert(PTRCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
564 f &= _ISENC;
565 while (vl > 0) {
566 union {char c; ui8_t uc;} u; u.c = *vb;
568 f |= _RAW;
569 if (!(f & _ISENC)) {
570 if (u.uc > 0x7F || cntrlchar(u.c)) { /* XXX reject cntrlchar? */
571 /* We need to percent encode this character, possibly changing
572 * overall strategy, but anyway the one of this level, possibly
573 * rendering invalid any output byte we yet have produced here.
574 * Instead of throwing away that work just recurse if some fancy
575 * magic condition is true */
576 /* *However*, many tested MUAs fail to deal with parameters that
577 * are splitted across "too many" fields, including ones that
578 * misread RFC 2231 to allow only one digit, i.e., a maximum of
579 * ten. This is plain wrong, but that won't help their users */
580 if (PTR2SIZE(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
581 goto jrecurse;
582 f |= _ISENC;
583 goto jneed_enc;
586 if (u.uc == '"' || u.uc == '\\') {
587 f ^= _RAW;
588 bp[0] = '\\';
589 bp[1] = u.c;
590 bp += 2;
592 } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
593 f ^= _RAW;
594 bp[0] = '%';
595 n_c_to_hex_base16(bp + 1, u.c);
596 bp += 3;
599 ++vb;
600 --vl;
601 if (f & _RAW) {
602 f |= _HADRAW;
603 vb_lanoenc = vb;
604 *bp++ = u.c;
605 bp_lanoenc = bp;
608 /* If all available space has been consumed we must split.
609 * Due to compatibility reasons we must take care not to break up
610 * multibyte sequences -- even though RFC 2231 rather implies that the
611 * splitted value should be joined (after percent encoded fields have
612 * been percent decoded) and the resulting string be treated in the
613 * specified character set / language, MUAs have been seen which apply
614 * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
615 * if stateful encodings cannot truly be supported like that?!..
617 * So split at 7-bit character if we have seen any and the wastage isn't
618 * too large; recall that we need to keep the overall number of P=V
619 * values as low as possible due to compatibility reasons.
620 * If we haven't seen any plain bytes be laxe and realize that bp_max
621 * reflects SHOULD lines, and try to extend this as long as possible.
622 * However, with UTF-8, try to backward synchronize on sequence start */
623 if (bp <= bp_max)
624 continue;
626 if ((f & _HADRAW) && (PTRCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
627 (!self->mpb_is_utf8 &&
628 PTR2SIZE(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
629 bp = bp_lanoenc;
630 vl += PTR2SIZE(vb - vb_lanoenc);
631 vb = vb_lanoenc;
632 goto jrecurse;
635 if (self->mpb_is_utf8 && ((ui8_t)(vb[-1]) & 0xC0) != 0x80) {
636 bp -= 3;
637 --vb;
638 ++vl;
639 goto jrecurse;
642 if (bp <= bp_xmax)
643 continue;
644 /* (Shit.) */
645 goto jrecurse;
648 /* That level made the great and completed encoding. Build result */
649 self->mpb_is_enc = ((f & _ISENC) != 0);
650 self->mpb_buf_len = PTR2SIZE(bp - buf);
651 __mime_param_join(self);
652 jleave:
653 NYD2_LEAVE;
654 return;
656 /* Need to recurse, take care not to excess magical limit of 999 levels */
657 jrecurse:
658 if (self->mpb_level == 999) {
659 if (n_poption & n_PO_D_V)
660 n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
661 goto jleave;
664 self->mpb_is_enc = ((f & _ISENC) != 0);
665 self->mpb_buf_len = PTR2SIZE(bp - buf);
667 memset(&next, 0, sizeof next);
668 next.mpb_next = self;
669 next.mpb_level = self->mpb_level + 1;
670 next.mpb_name_len = self->mpb_name_len;
671 next.mpb_value_len = vl;
672 next.mpb_is_utf8 = self->mpb_is_utf8;
673 next.mpb_name = self->mpb_name;
674 next.mpb_value = vb;
675 _mime_param_create(&next);
676 goto jleave;
679 static void
680 __mime_param_join(struct mime_param_builder *head)
682 char nobuf[16];
683 struct mime_param_builder *np;
684 size_t i, ll; DBG( size_t len_max; )
685 struct str *result;
686 char *cp;
687 enum {
688 _NONE = 0,
689 _ISENC = 1<<0,
690 _ISQUOTE = 1<<1,
691 _ISCONT = 1<<2
692 } f = _NONE;
693 NYD2_ENTER;
695 /* Traverse the stack upwards to find out result length (worst case).
696 * Reverse the list while doing so */
697 for (i = 0, np = head, head = NULL; np != NULL;) {
698 struct mime_param_builder *tmp;
700 i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
701 if (np->mpb_is_enc)
702 f |= _ISENC;
704 tmp = np->mpb_next;
705 np->mpb_next = head;
706 head = np;
707 np = tmp;
709 if (f & _ISENC)
710 i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
711 DBG( len_max = i; )
712 head->mpb_rv = TRU1;
714 result = head->mpb_result;
715 if (head->mpb_next != NULL)
716 f |= _ISCONT;
717 cp = result->s = n_autorec_alloc(i +1);
719 for (ll = 0, np = head;;) {
720 /* Name part */
721 memcpy(cp, np->mpb_name, i = np->mpb_name_len);
722 cp += i;
723 ll += i;
725 if (f & _ISCONT) {
726 char *cpo = cp, *nop = nobuf + sizeof(nobuf);
727 ui32_t noi = np->mpb_level;
729 *--nop = '\0';
731 *--nop = "0123456789"[noi % 10];
732 while ((noi /= 10) != 0);
734 *cp++ = '*';
735 while (*nop != '\0')
736 *cp++ = *nop++;
738 ll += PTR2SIZE(cp - cpo);
741 if ((f & _ISENC) || np->mpb_is_enc) {
742 *cp++ = '*';
743 ++ll;
745 *cp++ = '=';
746 ++ll;
748 /* Value part */
749 if (f & _ISENC) {
750 f &= ~_ISENC;
751 memcpy(cp, np->mpb_charset, i = np->mpb_charset_len);
752 cp += i;
753 cp[0] = '\'';
754 cp[1] = '\'';
755 cp += 2;
756 ll += i + 2;
757 } else if (!np->mpb_is_enc) {
758 f |= _ISQUOTE;
759 *cp++ = '"';
760 ++ll;
763 memcpy(cp, np->mpb_buf, i = np->mpb_buf_len);
764 cp += i;
765 ll += i;
767 if (f & _ISQUOTE) {
768 f ^= _ISQUOTE;
769 *cp++ = '"';
770 ++ll;
773 if ((np = np->mpb_next) == NULL)
774 break;
775 *cp++ = ';';
776 ++ll;
778 i = ll;
779 i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
780 if (i >= MIME_LINELEN) {
781 head->mpb_rv = -TRU1;
782 *cp++ = '\n';
783 ll = 0;
786 *cp++ = ' ';
787 ++ll;
789 *cp = '\0';
790 result->l = PTR2SIZE(cp - result->s);
791 assert(result->l < len_max);
792 NYD2_LEAVE;
795 FL char *
796 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
798 struct str xval;
799 char *rv = NULL;
800 size_t plen;
801 char const *p;
802 NYD_ENTER;
804 plen = strlen(param);
805 p = headerbody;
807 /* At the beginning of headerbody there is no parameter=value pair xxx */
808 if (!whitechar(*p))
809 goto jskip1st;
811 for (;;) {
812 while (whitechar(*p))
813 ++p;
815 if (!ascncasecmp(p, param, plen)) {
816 p += plen;
817 while (whitechar(*p)) /* XXX? */
818 ++p;
819 switch (*p++) {
820 case '*':
821 rv = _rfc2231_param_parse(param, plen, p);
822 goto jleave;
823 case '=':
824 if (!_mime_param_value_trim(&xval, p, NULL)) {
825 /* XXX LOG? */
826 goto jleave;
828 rv = xval.s;
830 /* We do have a result, but some (elder) software (S-nail <v14.8)
831 * will use RFC 2047 encoded words in parameter values, too */
832 /* TODO Automatically check whether the value seems to be RFC 2047
833 * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
834 if ((p = strstr(rv, "=?")) != NULL && strstr(p, "?=") != NULL) {
835 struct str ti, to;
837 ti.l = strlen(ti.s = rv);
838 mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
839 rv = savestrbuf(to.s, to.l);
840 n_free(to.s);
842 goto jleave;
843 default:
844 /* Not our desired parameter, skip and continue */
845 break;
849 jskip1st:
850 if (*(p = _mime_param_skip(p)) == '\0')
851 goto jleave;
854 jleave:
855 NYD_LEAVE;
856 return rv;
859 FL si8_t
860 mime_param_create(struct str *result, char const *name, char const *value)
862 /* TODO All this needs rework when we have (1) a real string and even more
863 * TODO (2) use objects instead of stupid string concat; it's temporary
864 * TODO I.e., this function should return a HeaderBodyParam */
865 struct mime_param_builder top;
866 size_t i;
867 NYD_ENTER;
869 memset(result, 0, sizeof *result);
871 memset(&top, 0, sizeof top);
872 top.mpb_result = result;
873 if ((i = strlen(top.mpb_name = name)) >= UI32_MAX)
874 goto jleave;
875 top.mpb_name_len = (ui32_t)i;
876 if ((i = strlen(top.mpb_value = value)) >= UI32_MAX)
877 goto jleave;
878 top.mpb_value_len = (ui32_t)i;
879 if ((i = strlen(name = ok_vlook(ttycharset))) >= UI32_MAX)
880 goto jleave;
881 top.mpb_charset_len = (ui32_t)i;
882 top.mpb_charset = n_autorec_alloc(++i);
883 memcpy(n_UNCONST(top.mpb_charset), name, i);
884 if(top.mpb_charset_len >= 4 && !memcmp(top.mpb_charset, "utf", 3) &&
885 ((top.mpb_charset[3] == '-' && top.mpb_charset[4] == '8' &&
886 top.mpb_charset_len == 5) || (top.mpb_charset[3] == '8' &&
887 top.mpb_charset_len == 4)))
888 top.mpb_is_utf8 = TRU1;
889 else
890 top.mpb_is_utf8 = FAL0;
892 _mime_param_create(&top);
893 jleave:
894 NYD_LEAVE;
895 return top.mpb_rv;
898 FL char *
899 mime_param_boundary_get(char const *headerbody, size_t *len)
901 char *q = NULL, *p;
902 NYD_ENTER;
904 if ((p = mime_param_get("boundary", headerbody)) != NULL) {
905 size_t sz = strlen(p);
907 if (len != NULL)
908 *len = sz + 2;
909 q = n_autorec_alloc(sz + 2 +1);
910 q[0] = q[1] = '-';
911 memcpy(q + 2, p, sz);
912 *(q + sz + 2) = '\0';
914 NYD_LEAVE;
915 return q;
918 FL char *
919 mime_param_boundary_create(void)
921 static ui32_t reprocnt;
922 char *bp;
923 NYD_ENTER;
925 bp = n_autorec_alloc(36 + 6 +1);
926 bp[0] = bp[2] = bp[39] = bp[41] = '=';
927 bp[1] = bp[40] = '-';
928 memcpy(bp + 3, n_random_create_cp(36, &reprocnt), 36);
929 bp[42] = '\0';
930 NYD_LEAVE;
931 return bp;
934 /* s-it-mode */