WANT_AMALGAMATION will henceforth work through main.c
[s-mailx.git] / mime_param.c
blob0c6f180eb743be484d36dca0300292700c0f58f1
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME parameter handling.
4 * Copyright (c) 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #undef n_FILE
19 #define n_FILE mime_param
21 #ifndef HAVE_AMALGAMATION
22 # include "nail.h"
23 #endif
25 struct rfc2231_joiner {
26 struct rfc2231_joiner *rj_next;
27 ui32_t rj_no; /* Continuation number */
28 ui32_t rj_len; /* of useful data in .rj_dat */
29 ui32_t rj_val_off; /* Start of value data therein */
30 ui32_t rj_cs_len; /* Length of charset part */
31 bool_t rj_is_enc; /* Is percent encoded */
32 ui8_t __pad[7];
33 char const *rj_dat;
36 struct mime_param_builder {
37 struct mime_param_builder *mpb_next;
38 struct str *mpb_result;
39 ui32_t mpb_level; /* of recursion (<-> continuation number) */
40 ui32_t mpb_name_len; /* of the parameter .mpb_name */
41 ui32_t mpb_value_len; /* of remaining value */
42 ui32_t mpb_charset_len; /* of .mpb_charset (only in outermost level) */
43 ui32_t mpb_buf_len; /* Usable result of this level in .mpb_buf */
44 bool_t mpb_is_enc; /* Level requires encoding */
45 ui8_t __dummy[1];
46 bool_t mpb_is_utf8; /* Encoding is UTF-8 */
47 si8_t mpb_rv;
48 char const *mpb_name;
49 char const *mpb_value; /* Remains of, once the level was entered */
50 char const *mpb_charset; /* charset_get_lc() */
51 char *mpb_buf; /* Pointer to on-stack buffer */
54 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
55 static bool_t const _rfc2231_etab[] = {
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* NUL..SI */
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* DLE..US */
58 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, /* CAN.. / */
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 0.. ? */
61 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @.. O */
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, /* P.. _ */
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* `.. o */
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p..DEL */
67 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
68 * entire construct, return pointer to the first byte thereafter or to NUL */
69 static char const * _mime_param_skip(char const *hbp);
71 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
72 * On successful return (1,-1; -1 is returned if the value was quoted via
73 * double quotation marks) a set end_or_null points to after the value and any
74 * possible separator and result->s is the salloc()d normalized value */
75 static si8_t _mime_param_value_trim(struct str *result, char const *start,
76 char const **end_or_null);
78 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
79 * extended syntax: perform full RFC 2231 parsing starting at this point.
80 * Note that _join() returns is-error */
81 static char * _rfc2231_param_parse(char const *param, size_t plen,
82 char const *hbp);
83 static bool_t __rfc2231_join(struct rfc2231_joiner *head, char **result,
84 char const **emsg);
86 /* Recursive parameter builder. Note we have a magic limit of 999 levels.
87 * Prepares a portion of output in self->mpb_buf;
88 * once >mpb_value is worked completely the deepmost level joins the result
89 * into >mpb_result and unrolls the stack. */
90 static void _mime_param_create(struct mime_param_builder *self);
91 static void __mime_param_join(struct mime_param_builder *head);
93 static char const *
94 _mime_param_skip(char const *hbp)
96 char co, cn;
97 NYD2_ENTER;
99 /* Skip over parameter name - note we may have skipped over an entire
100 * parameter name and thus point to a "="; i haven't yet truly checked
101 * against MIME RFCs, just test for ";" in the meanwhile XXX */
102 while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
103 ++hbp;
104 if (cn == '\0')
105 goto jleave;
106 ++hbp;
107 if (cn == ';')
108 goto jleave;
110 while (whitechar((cn = *hbp))) /* XXX */
111 ++hbp;
112 if (cn == '\0')
113 goto jleave;
115 if (cn == '"') {
116 co = '\0';
117 while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
118 co = (co == '\\') ? '\0' : cn;
119 if (cn != '\0' && (cn = *++hbp) == ';')
120 ++hbp;
121 } else {
122 for (;; cn = *++hbp)
123 if (cn == '\0' || cn == ';' || whitechar(cn))
124 break;
125 if (cn != '\0')
126 ++hbp;
128 jleave:
129 NYD2_LEAVE;
130 return hbp;
133 static si8_t
134 _mime_param_value_trim(struct str *result, char const *start,
135 char const **end_or_null)
137 char const *e;
138 char co, cn;
139 size_t i;
140 si8_t rv;
141 NYD2_ENTER;
143 while (whitechar(*start)) /* XXX? */
144 ++start;
146 if (*start == '"') {
147 for (co = '\0', e = ++start;; ++e)
148 if ((cn = *e) == '\0')
149 goto jerr;
150 else if (cn == '"' && co != '\\')
151 break;
152 else if (cn == '\\' && co == '\\')
153 co = '\0';
154 else
155 co = cn;
156 i = PTR2SIZE(e++ - start);
157 rv = -TRU1;
158 } else {
159 for (e = start; (cn = *e) != '\0' && !whitechar(cn) && cn != ';'; ++e)
161 i = PTR2SIZE(e - start);
162 rv = TRU1;
165 result->s = salloc(i +1);
166 if (rv > 0) {
167 memcpy(result->s, start, result->l = i);
168 result->s[i] = '\0';
169 } else {
170 size_t j;
171 char *cp;
173 for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
174 cn = *start++;
175 if (cn != '\\' || co == '\\') {
176 cp[j++] = cn;
177 if (cn == '\\')
178 cn = '\0';
181 cp[j] = '\0';
183 result->s = cp;
184 result->l = j;
187 if (end_or_null != NULL) {
188 while (*e != '\0' && *e == ';')
189 ++e;
190 *end_or_null = e;
192 jleave:
193 NYD2_LEAVE;
194 return rv;
195 jerr:
196 rv = FAL0;
197 goto jleave;
200 static char *
201 _rfc2231_param_parse(char const *param, size_t plen, char const *hbp)
203 /* TODO Do it for real and unite with mime_param_get() */
204 struct str xval;
205 char nobuf[32], *eptr, *rv = NULL, c;
206 char const *hbp_base, *cp, *emsg = NULL;
207 struct rfc2231_joiner *head = NULL, *np;
208 bool_t errors = FAL0;
209 size_t i;
210 NYD2_ENTER;
212 /* We were called by mime_param_get() after a param name match that
213 * involved "*", so jump to the matching code */
214 hbp_base = hbp;
215 goto jumpin;
217 for (; *hbp != '\0'; hbp_base = hbp) {
218 while (whitechar(*hbp))
219 ++hbp;
221 if (!ascncasecmp(hbp, param, plen)) {
222 hbp += plen;
223 while (whitechar(*hbp))
224 ++hbp;
225 if (*hbp++ != '*')
226 goto jerr;
228 /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
229 * parameter continuation and the lone asterisk "*" percent encoded
230 * values -- if encoding is used the "*0" or lone parameter value
231 * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
232 * where both of CHARSET and LANGUAGE are optional (we do effectively
233 * generate error if CHARSET is missing though).
234 * Continuations may not use that "C'L'" construct, but be tolerant
235 * and ignore those. Also encoded and non-encoded continuations may
236 * occur, i.e., perform percent en-/decoding only as necessary.
237 * Continuations may occur in any order */
238 /* xxx RFC 2231 parsing ignores language tags */
239 jumpin:
240 for (cp = hbp; digitchar(*cp); ++cp)
242 i = PTR2SIZE(cp - hbp);
243 if (i != 0) {
244 if (i >= sizeof(nobuf)) {
245 emsg = N_("too many digits to form a valid number");
246 goto jerr;
247 } else if ((c = *cp) != '=' && c != '*') {
248 emsg = N_("expected \"=\" or \"*\" after leading digits");
249 goto jerr;
251 memcpy(nobuf, hbp, i);
252 nobuf[i] = '\0';
253 i = (size_t)strtol(nobuf, UNCONST(&eptr), 10);
254 if (i >= 999 || *eptr != '\0') {
255 emsg = N_("invalid continuation sequence number");
256 goto jerr;
258 hbp = ++cp;
260 /* Value encoded? */
261 if (c == '*') {
262 if (*hbp++ != '=')
263 goto jeeqaaster;
264 } else if (c != '=') {
265 jeeqaaster:
266 emsg = N_("expected \"=\" after asterisk \"*\"");
267 goto jerr;
269 } else {
270 /* In continuation mode that is an error, however */
271 if (head != NULL) {
272 emsg = N_("missing continuation sequence number");
273 goto jerr;
275 /* Parameter value is encoded, may define encoding */
276 c = '*';
277 if (*cp != '=')
278 goto jeeqaaster;
279 hbp = ++cp;
280 i = 0;
283 /* Create new node and insert it sorted; should be faster than
284 * creating an unsorted list and sorting it after parsing */
285 np = smalloc(sizeof *np);
286 np->rj_next = NULL;
287 np->rj_no = (ui32_t)i;
288 np->rj_is_enc = (c == '*');
289 np->rj_val_off = np->rj_cs_len = 0;
291 if (head == NULL)
292 head = np;
293 else if (i < head->rj_no) {
294 np->rj_next = head;
295 head = np;
296 } else {
297 struct rfc2231_joiner *l = NULL, *x = head;
299 while (x != NULL && i > x->rj_no)
300 l = x, x = x->rj_next;
301 if (x != NULL)
302 np->rj_next = x;
303 l->rj_next = np;
306 switch (_mime_param_value_trim(&xval, hbp, &cp)) {
307 default:
308 emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
309 : N_("faulty value - missing closing quotation mark \"\"\"?");
310 goto jerr;
311 case -1:
312 /* XXX if (np->is_enc && memchr(np->dat, '\'', i) != NULL) {
313 * XXX emsg = N_("character set info not allowed here");
314 * XXX goto jerr;
315 * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
316 /* FALLTHRU */
317 case 1:
318 if (xval.l >= UI32_MAX) {
319 emsg = N_("parameter value too long");
320 goto jerr;
322 np->rj_len = (ui32_t)xval.l;
323 np->rj_dat = xval.s;
324 break;
327 /* Watch out for character set and language info */
328 if (np->rj_is_enc && (eptr = memchr(xval.s, '\'', xval.l)) != NULL) {
329 np->rj_cs_len = PTR2SIZE(eptr - xval.s);
330 if ((eptr = memchr(eptr + 1, '\'', xval.l - np->rj_cs_len - 1))
331 == NULL) {
332 emsg = N_("faulty RFC 2231 parameter extension");
333 goto jerr;
335 np->rj_val_off = PTR2SIZE(++eptr - xval.s);
338 hbp = cp;
339 } else
340 hbp = _mime_param_skip(hbp);
342 assert(head != NULL); /* (always true due to jumpin:, but..) */
344 errors |= __rfc2231_join(head, &rv, &emsg);
345 if (errors && (options & OPT_D_VV)) {
346 /* TODO 1. we need our error ring; 2. such errors in particular
347 * TODO should set global flags so that at the end of an operation
348 * TODO (for a message) a summary can be printed: faulty MIME, xy */
349 if (emsg == NULL)
350 emsg = N_("multiple causes");
351 n_err(_("Message had MIME errors: %s\n"), V_(emsg));
353 jleave:
354 NYD2_LEAVE;
355 return rv;
357 jerr:
358 while ((np = head) != NULL) {
359 head = np->rj_next;
360 free(np);
362 if (options & OPT_D_V) {
363 if (emsg == NULL)
364 emsg = N_("expected asterisk \"*\"");
365 n_err(_("Faulty \"%s\" RFC 2231 MIME parameter value: %s\n"
366 "Near: %s\n"), param, V_(emsg), hbp_base);
368 rv = NULL;
369 goto jleave;
372 static bool_t
373 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
375 struct str sin, sou;
376 struct rfc2231_joiner *np;
377 char const *cp;
378 size_t i;
379 enum {
380 _NONE = 0,
381 _HAVE_ENC = 1<<0,
382 _HAVE_ICONV = 1<<1,
383 _SEEN_ANY = 1<<2,
384 _ERRORS = 1<<3
385 } f = _NONE;
386 ui32_t no;
387 #ifdef HAVE_ICONV
388 iconv_t fhicd = (iconv_t)-1;/* XXX pacify compiler */
389 #endif
390 NYD2_ENTER;
392 #ifdef HAVE_ICONV
393 if (head->rj_is_enc) {
394 char const *tcs;
396 f |= _HAVE_ENC;
397 if (head->rj_cs_len == 0) {
398 /* It is an error if the character set is not set, the language alone
399 * cannot convert characters, let aside that we don't use it at all */
400 *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
401 f |= _ERRORS;
402 } else if (ascncasecmp(tcs = charset_get_lc(),
403 head->rj_dat, head->rj_cs_len)) {
404 char *cs = ac_alloc(head->rj_cs_len +1);
406 memcpy(cs, head->rj_dat, head->rj_cs_len);
407 cs[head->rj_cs_len] = '\0';
408 if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
409 f |= _HAVE_ICONV;
410 else {
411 *emsg = N_("necessary character set conversion missing");
412 f |= _ERRORS;
414 ac_free(cs);
417 #endif
419 if (head->rj_no != 0) {
420 if (!(f & _ERRORS))
421 *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
422 f |= _ERRORS;
425 for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; free(np)) {
426 head = np->rj_next;
428 if (np->rj_no != no++) {
429 if (!(f & _ERRORS))
430 *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
431 f |= _ERRORS;
434 /* RFC 2231 allows such info only in the first continuation, and
435 * furthermore MUSTs the first to be encoded, then */
436 if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
437 (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
438 if (!(f & _ERRORS))
439 *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
440 f |= _ERRORS;
442 f |= _SEEN_ANY;
444 i = np->rj_len - np->rj_val_off;
445 if (!np->rj_is_enc) {
446 n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
447 continue;
450 /* Always perform percent decoding */
451 sin.s = smalloc(i +1);
452 sin.l = 0;
453 for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
454 char c;
456 if ((c = *cp++) == '%') {
457 si32_t cc;
459 if (i < 3 || (cc = mime_hexseq_to_char(cp)) < 0) {
460 if (!(f & _ERRORS))
461 *emsg = N_("invalid RFC 2231 percent encoded sequence");
462 f |= _ERRORS;
463 goto jhex_putc;
465 sin.s[sin.l++] = (char)cc;
466 cp += 2;
467 i -= 3;
468 } else {
469 jhex_putc:
470 sin.s[sin.l++] = c;
471 --i;
474 sin.s[sin.l] = '\0';
476 /* And add character set conversion on top as necessary.
477 * RFC 2231 is pragmatic: encode only mentions percent encoding and the
478 * character set for the entire string ("[no] facility for using more
479 * than one character set or language"), therefore "continuations may
480 * contain a mixture of encoded and unencoded segments" applies to
481 * a contiguous string of a single character set that has been torn in
482 * pieces due to space restrictions, and it happened that some pieces
483 * didn't need to be percent encoded.
485 * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
486 * that encoded-words-are-atomic, meaning that a single character-set
487 * conversion run over the final, joined, partially percent-decoded value
488 * should be sufficient */
489 #ifdef HAVE_ICONV
490 if (f & _HAVE_ICONV) {
491 struct str sio = {NULL, 0}; /* TODO string pool */
493 if (n_iconv_str(fhicd, &sio, &sin, NULL, TRU1) != 0) {
494 n_iconv_reset(fhicd);
495 if (!(f & _ERRORS))
496 *emsg = N_("character set conversion failed on value");
497 f |= _ERRORS;
498 n_str_add_buf(&sio, "?", 1);
500 free(sin.s);
501 sin = sio;
503 #endif
505 n_str_add_buf(&sou, sin.s, sin.l);
506 free(sin.s);
509 #ifdef HAVE_ICONV
510 if ((f & _HAVE_ICONV) && /* XXX pacify compiler */ fhicd != (iconv_t)-1)
511 n_iconv_close(fhicd);
512 #endif
513 memcpy(*result = salloc(sou.l +1), sou.s, sou.l +1);
514 free(sou.s);
515 NYD2_LEAVE;
516 return ((f & _ERRORS) != 0);
519 static void
520 _mime_param_create(struct mime_param_builder *self)
522 struct mime_param_builder next;
523 /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
524 * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
525 * On the other hand we may excess _LINELEN to avoid breaking up possible
526 * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
527 * support stateful encodings and (b) will try to synchronize on UTF-8 this
528 * problem is scarce, possibly even artificial */
529 char buf[MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
530 *bp, *bp_max, *bp_xmax, *bp_lanoenc;
531 char const *vb, *vb_lanoenc;
532 size_t vl;
533 enum {
534 _NONE = 0,
535 _ISENC = 1<<0,
536 _HADRAW = 1<<1,
537 _RAW = 1<<2
538 } f = _NONE;
539 NYD2_ENTER;
540 LCTA(sizeof(buf) >= MIME_LINELEN * 2);
542 jneed_enc:
543 self->mpb_buf = bp = bp_lanoenc = buf;
544 self->mpb_buf_len = 0;
545 self->mpb_is_enc = ((f & _ISENC) != 0);
546 vb_lanoenc = vb = self->mpb_value;
547 vl = self->mpb_value_len;
549 /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
550 bp_max = (buf + MIME_LINELEN) -
551 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
552 bp_xmax = (buf + sizeof(buf)) -
553 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
554 if ((f & _ISENC) && self->mpb_level == 0) {
555 bp_max -= self->mpb_charset_len;
556 bp_xmax -= self->mpb_charset_len;
558 if (PTRCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
559 DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
560 bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
562 assert(PTRCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
564 f &= _ISENC;
565 while (vl > 0) {
566 union {char c; ui8_t uc;} u; u.c = *vb;
568 f |= _RAW;
569 if (!(f & _ISENC)) {
570 if (u.uc > 0x7F || cntrlchar(u.c)) { /* XXX reject cntrlchar? */
571 /* We need to percent encode this character, possibly changing
572 * overall strategy, but anyway the one of this level, possibly
573 * rendering invalid any output byte we yet have produced here.
574 * Instead of throwing away that work just recurse if some fancy
575 * magic condition is true */
576 /* *However*, many tested MUAs fail to deal with parameters that
577 * are splitted across "too many" fields, including ones that
578 * misread RFC 2231 to allow only one digit, i.e., a maximum of
579 * ten. This is plain wrong, but that won't help their users */
580 if (PTR2SIZE(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
581 goto jrecurse;
582 f |= _ISENC;
583 goto jneed_enc;
586 if (u.uc == '"' || u.uc == '\\') {
587 f ^= _RAW;
588 bp[0] = '\\';
589 bp[1] = u.c;
590 bp += 2;
592 } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
593 f ^= _RAW;
594 bp[0] = '%';
595 mime_char_to_hexseq(bp + 1, u.c);
596 bp += 3;
599 ++vb;
600 --vl;
601 if (f & _RAW) {
602 f |= _HADRAW;
603 vb_lanoenc = vb;
604 *bp++ = u.c;
605 bp_lanoenc = bp;
608 /* If all available space has been consumed we must split.
609 * Due to compatibility reasons we must take care not to break up
610 * multibyte sequences -- even though RFC 2231 rather implies that the
611 * splitted value should be joined (after percent encoded fields have
612 * been percent decoded) and the resulting string be treated in the
613 * specified character set / language, MUAs have been seen which apply
614 * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
615 * if stateful encodings cannot truly be supported like that?!..
617 * So split at 7-bit character if we have seen any and the wastage isn't
618 * too large; recall that we need to keep the overall number of P=V
619 * values as low as possible due to compatibility reasons.
620 * If we haven't seen any plain bytes be laxe and realize that bp_max
621 * reflects SHOULD lines, and try to extend this as long as possible.
622 * However, with UTF-8, try to backward synchronize on sequence start */
623 if (bp <= bp_max)
624 continue;
626 if ((f & _HADRAW) && (PTRCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
627 (!self->mpb_is_utf8 &&
628 PTR2SIZE(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
629 bp = bp_lanoenc;
630 vl += PTR2SIZE(vb - vb_lanoenc);
631 vb = vb_lanoenc;
632 goto jrecurse;
635 if (self->mpb_is_utf8 && ((ui8_t)(vb[-1]) & 0xC0) != 0x80) {
636 bp -= 3;
637 --vb;
638 ++vl;
639 goto jrecurse;
642 if (bp <= bp_xmax)
643 continue;
644 /* (Shit.) */
645 goto jrecurse;
648 /* That level made the great and completed encoding. Build result */
649 self->mpb_is_enc = ((f & _ISENC) != 0);
650 self->mpb_buf_len = PTR2SIZE(bp - buf);
651 __mime_param_join(self);
652 jleave:
653 NYD2_LEAVE;
654 return;
656 /* Need to recurse, take care not to excess magical limit of 999 levels */
657 jrecurse:
658 if (self->mpb_level == 999) {
659 if (options & OPT_D_VV)
660 n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
661 goto jleave;
664 self->mpb_is_enc = ((f & _ISENC) != 0);
665 self->mpb_buf_len = PTR2SIZE(bp - buf);
667 memset(&next, 0, sizeof next);
668 next.mpb_next = self;
669 next.mpb_level = self->mpb_level + 1;
670 next.mpb_name_len = self->mpb_name_len;
671 next.mpb_value_len = vl;
672 next.mpb_is_utf8 = self->mpb_is_utf8;
673 next.mpb_name = self->mpb_name;
674 next.mpb_value = vb;
675 _mime_param_create(&next);
676 goto jleave;
679 static void
680 __mime_param_join(struct mime_param_builder *head)
682 char nobuf[16];
683 struct mime_param_builder *np;
684 size_t i, ll; DBG( size_t len_max; )
685 struct str *result;
686 char *cp;
687 enum {
688 _NONE = 0,
689 _ISENC = 1<<0,
690 _ISQUOTE = 1<<1,
691 _ISCONT = 1<<2
692 } f = _NONE;
693 NYD2_ENTER;
695 /* Traverse the stack upwards to find out result length (worst case).
696 * Reverse the list while doing so */
697 for (i = 0, np = head, head = NULL; np != NULL;) {
698 struct mime_param_builder *tmp;
700 i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
701 if (np->mpb_is_enc)
702 f |= _ISENC;
704 tmp = np->mpb_next;
705 np->mpb_next = head;
706 head = np;
707 np = tmp;
709 if (f & _ISENC)
710 i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
711 DBG( len_max = i; )
712 head->mpb_rv = TRU1;
714 result = head->mpb_result;
715 if (head->mpb_next != NULL)
716 f |= _ISCONT;
717 cp = result->s = salloc(i +1);
719 for (ll = 0, np = head;;) {
720 /* Name part */
721 memcpy(cp, np->mpb_name, i = np->mpb_name_len);
722 cp += i;
723 ll += i;
725 if (f & _ISCONT) {
726 char *cpo = cp, *nop = nobuf + sizeof(nobuf);
727 ui32_t noi = np->mpb_level;
729 *--nop = '\0';
731 *--nop = "0123456789"[noi % 10];
732 while ((noi /= 10) != 0);
734 *cp++ = '*';
735 while (*nop != '\0')
736 *cp++ = *nop++;
738 ll += PTR2SIZE(cp - cpo);
741 if ((f & _ISENC) || np->mpb_is_enc) {
742 *cp++ = '*';
743 ++ll;
745 *cp++ = '=';
746 ++ll;
748 /* Value part */
749 if (f & _ISENC) {
750 f &= ~_ISENC;
751 memcpy(cp, np->mpb_charset, i = np->mpb_charset_len);
752 cp += i;
753 cp[0] = '\'';
754 cp[1] = '\'';
755 cp += 2;
756 ll += i + 2;
757 } else if (!np->mpb_is_enc) {
758 f |= _ISQUOTE;
759 *cp++ = '"';
760 ++ll;
763 memcpy(cp, np->mpb_buf, i = np->mpb_buf_len);
764 cp += i;
765 ll += i;
767 if (f & _ISQUOTE) {
768 f ^= _ISQUOTE;
769 *cp++ = '"';
770 ++ll;
773 if ((np = np->mpb_next) == NULL)
774 break;
775 *cp++ = ';';
776 ++ll;
778 i = ll;
779 i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
780 if (i >= MIME_LINELEN) {
781 head->mpb_rv = -TRU1;
782 *cp++ = '\n';
783 ll = 0;
786 *cp++ = ' ';
787 ++ll;
789 *cp = '\0';
790 result->l = PTR2SIZE(cp - result->s);
791 assert(result->l < len_max);
792 NYD2_LEAVE;
795 FL char *
796 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
798 struct str xval;
799 char *rv = NULL;
800 size_t plen;
801 char const *p;
802 NYD_ENTER;
804 plen = strlen(param);
805 p = headerbody;
807 /* At the beginning of headerbody there is no parameter=value pair xxx */
808 if (!whitechar(*p))
809 goto jskip1st;
811 for (;;) {
812 while (whitechar(*p))
813 ++p;
815 if (!ascncasecmp(p, param, plen)) {
816 p += plen;
817 while (whitechar(*p)) /* XXX? */
818 ++p;
819 switch (*p++) {
820 case '*':
821 rv = _rfc2231_param_parse(param, plen, p);
822 goto jleave;
823 case '=':
824 if (!_mime_param_value_trim(&xval, p, NULL)) {
825 /* XXX LOG? */
826 goto jleave;
828 rv = xval.s;
830 /* We do have a result, but some (elder) software (S-nail <v14.8)
831 * will use RFC 2047 encoded words in parameter values, too */
832 /* TODO Automatically check wether the value seems to be RFC 2047
833 * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
834 if ((p = strstr(rv, "=?")) != NULL && strstr(p, "?=") != NULL) {
835 struct str ti, to;
837 ti.l = strlen(ti.s = rv);
838 mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
839 rv = savestrbuf(to.s, to.l);
840 free(to.s);
842 goto jleave;
843 default:
844 /* Not our desired parameter, skip and continue */
845 break;
849 jskip1st:
850 if (*(p = _mime_param_skip(p)) == '\0')
851 goto jleave;
854 jleave:
855 NYD_LEAVE;
856 return rv;
859 FL si8_t
860 mime_param_create(struct str *result, char const *name, char const *value)
862 /* TODO All this needs rework when we have (1) a real string and even more
863 * TODO (2) use objects instead of stupid string concat; it's temporary
864 * TODO I.e., this function should return a HeaderBodyParam */
865 struct mime_param_builder top;
866 size_t i;
867 NYD_ENTER;
869 memset(result, 0, sizeof *result);
871 memset(&top, 0, sizeof top);
872 top.mpb_result = result;
873 if ((i = strlen(top.mpb_name = name)) > UI32_MAX)
874 goto jleave;
875 top.mpb_name_len = (ui32_t)i;
876 if ((i = strlen(top.mpb_value = value)) > UI32_MAX)
877 goto jleave;
878 top.mpb_value_len = (ui32_t)i;
879 top.mpb_charset_len = (ui32_t)strlen(top.mpb_charset = charset_get_lc());
880 top.mpb_is_utf8 = !ascncasecmp(top.mpb_charset, "utf-8",
881 top.mpb_charset_len);
883 _mime_param_create(&top);
884 jleave:
885 NYD_LEAVE;
886 return top.mpb_rv;
889 FL char *
890 mime_param_boundary_get(char const *headerbody, size_t *len)
892 char *q = NULL, *p;
893 NYD_ENTER;
895 if ((p = mime_param_get("boundary", headerbody)) != NULL) {
896 size_t sz = strlen(p);
898 if (len != NULL)
899 *len = sz + 2;
900 q = salloc(sz + 2 +1);
901 q[0] = q[1] = '-';
902 memcpy(q + 2, p, sz);
903 *(q + sz + 2) = '\0';
905 NYD_LEAVE;
906 return q;
909 FL char *
910 mime_param_boundary_create(void)
912 char *bp;
913 NYD_ENTER;
915 bp = salloc(36 + 6 +1);
916 bp[0] = bp[2] = bp[39] = bp[41] = '=';
917 bp[1] = bp[40] = '-';
918 memcpy(bp + 3, getrandstring(36), 36);
919 bp[42] = '\0';
920 NYD_LEAVE;
921 return bp;
924 /* s-it-mode */