FIX [ee4de6e4] and finally get asccaseprefix() right!
[s-mailx.git] / mime_param.c
blob7cb507f792876faa805361a1efd9b96d2e0bfb71
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME parameter handling.
4 * Copyright (c) 2016 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #undef n_FILE
19 #define n_FILE mime_param
21 #ifndef HAVE_AMALGAMATION
22 # include "nail.h"
23 #endif
25 struct rfc2231_joiner {
26 struct rfc2231_joiner *rj_next;
27 ui32_t rj_no; /* Continuation number */
28 ui32_t rj_len; /* of useful data in .rj_dat */
29 ui32_t rj_val_off; /* Start of value data therein */
30 ui32_t rj_cs_len; /* Length of charset part */
31 bool_t rj_is_enc; /* Is percent encoded */
32 ui8_t __pad[7];
33 char const *rj_dat;
36 struct mime_param_builder {
37 struct mime_param_builder *mpb_next;
38 struct str *mpb_result;
39 ui32_t mpb_level; /* of recursion (<-> continuation number) */
40 ui32_t mpb_name_len; /* of the parameter .mpb_name */
41 ui32_t mpb_value_len; /* of remaining value */
42 ui32_t mpb_charset_len; /* of .mpb_charset (only in outermost level) */
43 ui32_t mpb_buf_len; /* Usable result of this level in .mpb_buf */
44 bool_t mpb_is_enc; /* Level requires encoding */
45 ui8_t __dummy[1];
46 bool_t mpb_is_utf8; /* Encoding is UTF-8 */
47 si8_t mpb_rv;
48 char const *mpb_name;
49 char const *mpb_value; /* Remains of, once the level was entered */
50 char const *mpb_charset; /* *ttycharset* */
51 char *mpb_buf; /* Pointer to on-stack buffer */
54 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
55 static bool_t const _rfc2231_etab[] = {
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* NUL..SI */
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* DLE..US */
58 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, /* CAN.. / */
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 0.. ? */
61 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @.. O */
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, /* P.. _ */
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* `.. o */
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p..DEL */
67 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
68 * entire construct, return pointer to the first byte thereafter or to NUL */
69 static char const * _mime_param_skip(char const *hbp);
71 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
72 * On successful return (1,-1; -1 is returned if the value was quoted via
73 * double quotation marks) a set end_or_null points to after the value and any
74 * possible separator and result->s is the salloc()d normalized value */
75 static si8_t _mime_param_value_trim(struct str *result, char const *start,
76 char const **end_or_null);
78 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
79 * extended syntax: perform full RFC 2231 parsing starting at this point.
80 * Note that _join() returns is-error */
81 static char * _rfc2231_param_parse(char const *param, size_t plen,
82 char const *hbp);
83 static bool_t __rfc2231_join(struct rfc2231_joiner *head, char **result,
84 char const **emsg);
86 /* Recursive parameter builder. Note we have a magic limit of 999 levels.
87 * Prepares a portion of output in self->mpb_buf;
88 * once >mpb_value is worked completely the deepmost level joins the result
89 * into >mpb_result and unrolls the stack. */
90 static void _mime_param_create(struct mime_param_builder *self);
91 static void __mime_param_join(struct mime_param_builder *head);
93 static char const *
94 _mime_param_skip(char const *hbp)
96 char co, cn;
97 NYD2_ENTER;
99 /* Skip over parameter name - note we may have skipped over an entire
100 * parameter name and thus point to a "="; i haven't yet truly checked
101 * against MIME RFCs, just test for ";" in the meanwhile XXX */
102 while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
103 ++hbp;
104 if (cn == '\0')
105 goto jleave;
106 ++hbp;
107 if (cn == ';')
108 goto jleave;
110 while (whitechar((cn = *hbp))) /* XXX */
111 ++hbp;
112 if (cn == '\0')
113 goto jleave;
115 if (cn == '"') {
116 co = '\0';
117 while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
118 co = (co == '\\') ? '\0' : cn;
119 if (cn != '\0' && (cn = *++hbp) == ';')
120 ++hbp;
121 } else {
122 for (;; cn = *++hbp)
123 if (cn == '\0' || cn == ';' || whitechar(cn))
124 break;
125 if (cn != '\0')
126 ++hbp;
128 jleave:
129 NYD2_LEAVE;
130 return hbp;
133 static si8_t
134 _mime_param_value_trim(struct str *result, char const *start,
135 char const **end_or_null)
137 char const *e;
138 char co, cn;
139 size_t i;
140 si8_t rv;
141 NYD2_ENTER;
143 while (whitechar(*start)) /* XXX? */
144 ++start;
146 if (*start == '"') {
147 for (co = '\0', e = ++start;; ++e)
148 if ((cn = *e) == '\0')
149 goto jerr;
150 else if (cn == '"' && co != '\\')
151 break;
152 else if (cn == '\\' && co == '\\')
153 co = '\0';
154 else
155 co = cn;
156 i = PTR2SIZE(e++ - start);
157 rv = -TRU1;
158 } else {
159 for (e = start; (cn = *e) != '\0' && !whitechar(cn) && cn != ';'; ++e)
161 i = PTR2SIZE(e - start);
162 rv = TRU1;
165 result->s = salloc(i +1);
166 if (rv > 0) {
167 memcpy(result->s, start, result->l = i);
168 result->s[i] = '\0';
169 } else {
170 size_t j;
171 char *cp;
173 for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
174 cn = *start++;
175 if (cn != '\\' || co == '\\') {
176 cp[j++] = cn;
177 if (cn == '\\')
178 cn = '\0';
181 cp[j] = '\0';
183 result->s = cp;
184 result->l = j;
187 if (end_or_null != NULL) {
188 while (*e != '\0' && *e == ';')
189 ++e;
190 *end_or_null = e;
192 jleave:
193 NYD2_LEAVE;
194 return rv;
195 jerr:
196 rv = FAL0;
197 goto jleave;
200 static char *
201 _rfc2231_param_parse(char const *param, size_t plen, char const *hbp)
203 /* TODO Do it for real and unite with mime_param_get() */
204 struct str xval;
205 char nobuf[32], *eptr, *rv = NULL, c;
206 char const *hbp_base, *cp, *emsg = NULL;
207 struct rfc2231_joiner *head = NULL, *np;
208 bool_t errors = FAL0;
209 size_t i;
210 NYD2_ENTER;
212 /* We were called by mime_param_get() after a param name match that
213 * involved "*", so jump to the matching code */
214 hbp_base = hbp;
215 goto jumpin;
217 for (; *hbp != '\0'; hbp_base = hbp) {
218 while (whitechar(*hbp))
219 ++hbp;
221 if (!ascncasecmp(hbp, param, plen)) {
222 hbp += plen;
223 while (whitechar(*hbp))
224 ++hbp;
225 if (*hbp++ != '*')
226 goto jerr;
228 /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
229 * parameter continuation and the lone asterisk "*" percent encoded
230 * values -- if encoding is used the "*0" or lone parameter value
231 * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
232 * where both of CHARSET and LANGUAGE are optional (we do effectively
233 * generate error if CHARSET is missing though).
234 * Continuations may not use that "C'L'" construct, but be tolerant
235 * and ignore those. Also encoded and non-encoded continuations may
236 * occur, i.e., perform percent en-/decoding only as necessary.
237 * Continuations may occur in any order */
238 /* xxx RFC 2231 parsing ignores language tags */
239 jumpin:
240 for (cp = hbp; digitchar(*cp); ++cp)
242 i = PTR2SIZE(cp - hbp);
243 if (i != 0) {
244 if (i >= sizeof(nobuf)) {
245 emsg = N_("too many digits to form a valid number");
246 goto jerr;
247 } else if ((c = *cp) != '=' && c != '*') {
248 emsg = N_("expected = or * after leading digits");
249 goto jerr;
251 memcpy(nobuf, hbp, i);
252 nobuf[i] = '\0';
253 i = (size_t)strtol(nobuf, n_UNCONST(&eptr), 10);
254 if (i >= 999 || *eptr != '\0') {
255 emsg = N_("invalid continuation sequence number");
256 goto jerr;
258 hbp = ++cp;
260 /* Value encoded? */
261 if (c == '*') {
262 if (*hbp++ != '=')
263 goto jeeqaaster;
264 } else if (c != '=') {
265 jeeqaaster:
266 emsg = N_("expected = after asterisk *");
267 goto jerr;
269 } else {
270 /* In continuation mode that is an error, however */
271 if (head != NULL) {
272 emsg = N_("missing continuation sequence number");
273 goto jerr;
275 /* Parameter value is encoded, may define encoding */
276 c = '*';
277 if (*cp != '=')
278 goto jeeqaaster;
279 hbp = ++cp;
280 i = 0;
283 /* Create new node and insert it sorted; should be faster than
284 * creating an unsorted list and sorting it after parsing */
285 np = smalloc(sizeof *np);
286 np->rj_next = NULL;
287 np->rj_no = (ui32_t)i;
288 np->rj_is_enc = (c == '*');
289 np->rj_val_off = np->rj_cs_len = 0;
291 if (head == NULL)
292 head = np;
293 else if (i < head->rj_no) {
294 np->rj_next = head;
295 head = np;
296 } else {
297 struct rfc2231_joiner *l = NULL, *x = head;
299 while (x != NULL && i > x->rj_no)
300 l = x, x = x->rj_next;
301 if (x != NULL)
302 np->rj_next = x;
303 assert(l != NULL);
304 l->rj_next = np;
307 switch (_mime_param_value_trim(&xval, hbp, &cp)) {
308 default:
309 emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
310 : N_("faulty value - missing closing quotation mark \"?");
311 goto jerr;
312 case -1:
313 /* XXX if (np->is_enc && memchr(np->dat, '\'', i) != NULL) {
314 * XXX emsg = N_("character set info not allowed here");
315 * XXX goto jerr;
316 * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
317 /* FALLTHRU */
318 case 1:
319 if (xval.l >= UI32_MAX) {
320 emsg = N_("parameter value too long");
321 goto jerr;
323 np->rj_len = (ui32_t)xval.l;
324 np->rj_dat = xval.s;
325 break;
328 /* Watch out for character set and language info */
329 if (np->rj_is_enc && (eptr = memchr(xval.s, '\'', xval.l)) != NULL) {
330 np->rj_cs_len = PTR2SIZE(eptr - xval.s);
331 if ((eptr = memchr(eptr + 1, '\'', xval.l - np->rj_cs_len - 1))
332 == NULL) {
333 emsg = N_("faulty RFC 2231 parameter extension");
334 goto jerr;
336 np->rj_val_off = PTR2SIZE(++eptr - xval.s);
339 hbp = cp;
340 } else
341 hbp = _mime_param_skip(hbp);
343 assert(head != NULL); /* (always true due to jumpin:, but..) */
345 errors |= __rfc2231_join(head, &rv, &emsg);
346 if (errors && (options & OPT_D_V_VV)) {
347 /* TODO should set global flags so that at the end of an operation
348 * TODO (for a message) a summary can be printed: faulty MIME, xy */
349 if (emsg == NULL)
350 emsg = N_("multiple causes");
351 n_err(_("Message had MIME errors: %s\n"), V_(emsg));
353 jleave:
354 NYD2_LEAVE;
355 return rv;
357 jerr:
358 while ((np = head) != NULL) {
359 head = np->rj_next;
360 free(np);
362 if (options & OPT_D_V) {
363 if (emsg == NULL)
364 emsg = N_("expected asterisk *");
365 n_err(_("Faulty RFC 2231 MIME parameter value: %s: %s\n"
366 "Near: %s\n"), param, V_(emsg), hbp_base);
368 rv = NULL;
369 goto jleave;
372 static bool_t
373 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
375 struct str sin, sou;
376 struct rfc2231_joiner *np;
377 char const *cp;
378 size_t i;
379 enum {
380 _NONE = 0,
381 _HAVE_ENC = 1<<0,
382 _HAVE_ICONV = 1<<1,
383 _SEEN_ANY = 1<<2,
384 _ERRORS = 1<<3
385 } f = _NONE;
386 ui32_t no;
387 #ifdef HAVE_ICONV
388 iconv_t fhicd;
389 #endif
390 NYD2_ENTER;
392 #ifdef HAVE_ICONV
393 n_UNINIT(fhicd, (iconv_t)-1);
395 if (head->rj_is_enc) {
396 char const *tcs;
398 f |= _HAVE_ENC;
399 if (head->rj_cs_len == 0) {
400 /* It is an error if the character set is not set, the language alone
401 * cannot convert characters, let aside that we don't use it at all */
402 *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
403 f |= _ERRORS;
404 } else if (ascncasecmp(tcs = ok_vlook(ttycharset),
405 head->rj_dat, head->rj_cs_len)) {
406 char *cs = ac_alloc(head->rj_cs_len +1);
408 memcpy(cs, head->rj_dat, head->rj_cs_len);
409 cs[head->rj_cs_len] = '\0';
410 if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
411 f |= _HAVE_ICONV;
412 else {
413 *emsg = N_("necessary character set conversion missing");
414 f |= _ERRORS;
416 ac_free(cs);
419 #endif
421 if (head->rj_no != 0) {
422 if (!(f & _ERRORS))
423 *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
424 f |= _ERRORS;
427 for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; free(np)) {
428 head = np->rj_next;
430 if (np->rj_no != no++) {
431 if (!(f & _ERRORS))
432 *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
433 f |= _ERRORS;
436 /* RFC 2231 allows such info only in the first continuation, and
437 * furthermore MUSTs the first to be encoded, then */
438 if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
439 (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
440 if (!(f & _ERRORS))
441 *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
442 f |= _ERRORS;
444 f |= _SEEN_ANY;
446 i = np->rj_len - np->rj_val_off;
447 if (!np->rj_is_enc)
448 n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
449 else {
450 /* Perform percent decoding */
451 sin.s = smalloc(i +1);
452 sin.l = 0;
454 for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
455 char c;
457 if ((c = *cp++) == '%') {
458 si32_t cc;
460 if (i < 3 || (cc = n_c_from_hex_base16(cp)) < 0) {
461 if (!(f & _ERRORS))
462 *emsg = N_("invalid RFC 2231 percent encoded sequence");
463 f |= _ERRORS;
464 goto jhex_putc;
466 sin.s[sin.l++] = (char)cc;
467 cp += 2;
468 i -= 3;
469 } else {
470 jhex_putc:
471 sin.s[sin.l++] = c;
472 --i;
475 sin.s[sin.l] = '\0';
477 n_str_add_buf(&sou, sin.s, sin.l);
478 free(sin.s);
482 /* And add character set conversion on top as necessary.
483 * RFC 2231 is pragmatic: encode only mentions percent encoding and the
484 * character set for the entire string ("[no] facility for using more
485 * than one character set or language"), therefore "continuations may
486 * contain a mixture of encoded and unencoded segments" applies to
487 * a contiguous string of a single character set that has been torn in
488 * pieces due to space restrictions, and it happened that some pieces
489 * didn't need to be percent encoded.
491 * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
492 * that encoded-words-are-atomic, meaning that a single character-set
493 * conversion run over the final, joined, partially percent-decoded value
494 * should be sufficient */
495 #ifdef HAVE_ICONV
496 if (f & _HAVE_ICONV) {
497 sin.s = NULL;
498 sin.l = 0;
499 if (n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &sin, &sou, NULL) != 0) {
500 if (!(f & _ERRORS)) /* XXX won't be reported with _UNIDFEFAULT */
501 *emsg = N_("character set conversion failed on value");
502 f |= _ERRORS;
504 free(sou.s);
505 sou = sin;
507 n_iconv_close(fhicd);
509 #endif
511 memcpy(*result = salloc(sou.l +1), sou.s, sou.l +1);
512 free(sou.s);
513 NYD2_LEAVE;
514 return ((f & _ERRORS) != 0);
517 static void
518 _mime_param_create(struct mime_param_builder *self)
520 struct mime_param_builder next;
521 /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
522 * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
523 * On the other hand we may excess _LINELEN to avoid breaking up possible
524 * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
525 * support stateful encodings and (b) will try to synchronize on UTF-8 this
526 * problem is scarce, possibly even artificial */
527 char buf[n_MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
528 *bp, *bp_max, *bp_xmax, *bp_lanoenc;
529 char const *vb, *vb_lanoenc;
530 size_t vl;
531 enum {
532 _NONE = 0,
533 _ISENC = 1<<0,
534 _HADRAW = 1<<1,
535 _RAW = 1<<2
536 } f = _NONE;
537 NYD2_ENTER;
538 n_LCTA(sizeof(buf) >= MIME_LINELEN * 2, "Buffer to small for operation");
540 jneed_enc:
541 self->mpb_buf = bp = bp_lanoenc = buf;
542 self->mpb_buf_len = 0;
543 self->mpb_is_enc = ((f & _ISENC) != 0);
544 vb_lanoenc = vb = self->mpb_value;
545 vl = self->mpb_value_len;
547 /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
548 bp_max = (buf + MIME_LINELEN) -
549 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
550 bp_xmax = (buf + sizeof(buf)) -
551 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
552 if ((f & _ISENC) && self->mpb_level == 0) {
553 bp_max -= self->mpb_charset_len;
554 bp_xmax -= self->mpb_charset_len;
556 if (PTRCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
557 DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
558 bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
560 assert(PTRCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
562 f &= _ISENC;
563 while (vl > 0) {
564 union {char c; ui8_t uc;} u; u.c = *vb;
566 f |= _RAW;
567 if (!(f & _ISENC)) {
568 if (u.uc > 0x7F || cntrlchar(u.c)) { /* XXX reject cntrlchar? */
569 /* We need to percent encode this character, possibly changing
570 * overall strategy, but anyway the one of this level, possibly
571 * rendering invalid any output byte we yet have produced here.
572 * Instead of throwing away that work just recurse if some fancy
573 * magic condition is true */
574 /* *However*, many tested MUAs fail to deal with parameters that
575 * are splitted across "too many" fields, including ones that
576 * misread RFC 2231 to allow only one digit, i.e., a maximum of
577 * ten. This is plain wrong, but that won't help their users */
578 if (PTR2SIZE(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
579 goto jrecurse;
580 f |= _ISENC;
581 goto jneed_enc;
584 if (u.uc == '"' || u.uc == '\\') {
585 f ^= _RAW;
586 bp[0] = '\\';
587 bp[1] = u.c;
588 bp += 2;
590 } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
591 f ^= _RAW;
592 bp[0] = '%';
593 n_c_to_hex_base16(bp + 1, u.c);
594 bp += 3;
597 ++vb;
598 --vl;
599 if (f & _RAW) {
600 f |= _HADRAW;
601 vb_lanoenc = vb;
602 *bp++ = u.c;
603 bp_lanoenc = bp;
606 /* If all available space has been consumed we must split.
607 * Due to compatibility reasons we must take care not to break up
608 * multibyte sequences -- even though RFC 2231 rather implies that the
609 * splitted value should be joined (after percent encoded fields have
610 * been percent decoded) and the resulting string be treated in the
611 * specified character set / language, MUAs have been seen which apply
612 * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
613 * if stateful encodings cannot truly be supported like that?!..
615 * So split at 7-bit character if we have seen any and the wastage isn't
616 * too large; recall that we need to keep the overall number of P=V
617 * values as low as possible due to compatibility reasons.
618 * If we haven't seen any plain bytes be laxe and realize that bp_max
619 * reflects SHOULD lines, and try to extend this as long as possible.
620 * However, with UTF-8, try to backward synchronize on sequence start */
621 if (bp <= bp_max)
622 continue;
624 if ((f & _HADRAW) && (PTRCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
625 (!self->mpb_is_utf8 &&
626 PTR2SIZE(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
627 bp = bp_lanoenc;
628 vl += PTR2SIZE(vb - vb_lanoenc);
629 vb = vb_lanoenc;
630 goto jrecurse;
633 if (self->mpb_is_utf8 && ((ui8_t)(vb[-1]) & 0xC0) != 0x80) {
634 bp -= 3;
635 --vb;
636 ++vl;
637 goto jrecurse;
640 if (bp <= bp_xmax)
641 continue;
642 /* (Shit.) */
643 goto jrecurse;
646 /* That level made the great and completed encoding. Build result */
647 self->mpb_is_enc = ((f & _ISENC) != 0);
648 self->mpb_buf_len = PTR2SIZE(bp - buf);
649 __mime_param_join(self);
650 jleave:
651 NYD2_LEAVE;
652 return;
654 /* Need to recurse, take care not to excess magical limit of 999 levels */
655 jrecurse:
656 if (self->mpb_level == 999) {
657 if (options & OPT_D_V_VV)
658 n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
659 goto jleave;
662 self->mpb_is_enc = ((f & _ISENC) != 0);
663 self->mpb_buf_len = PTR2SIZE(bp - buf);
665 memset(&next, 0, sizeof next);
666 next.mpb_next = self;
667 next.mpb_level = self->mpb_level + 1;
668 next.mpb_name_len = self->mpb_name_len;
669 next.mpb_value_len = vl;
670 next.mpb_is_utf8 = self->mpb_is_utf8;
671 next.mpb_name = self->mpb_name;
672 next.mpb_value = vb;
673 _mime_param_create(&next);
674 goto jleave;
677 static void
678 __mime_param_join(struct mime_param_builder *head)
680 char nobuf[16];
681 struct mime_param_builder *np;
682 size_t i, ll; DBG( size_t len_max; )
683 struct str *result;
684 char *cp;
685 enum {
686 _NONE = 0,
687 _ISENC = 1<<0,
688 _ISQUOTE = 1<<1,
689 _ISCONT = 1<<2
690 } f = _NONE;
691 NYD2_ENTER;
693 /* Traverse the stack upwards to find out result length (worst case).
694 * Reverse the list while doing so */
695 for (i = 0, np = head, head = NULL; np != NULL;) {
696 struct mime_param_builder *tmp;
698 i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
699 if (np->mpb_is_enc)
700 f |= _ISENC;
702 tmp = np->mpb_next;
703 np->mpb_next = head;
704 head = np;
705 np = tmp;
707 if (f & _ISENC)
708 i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
709 DBG( len_max = i; )
710 head->mpb_rv = TRU1;
712 result = head->mpb_result;
713 if (head->mpb_next != NULL)
714 f |= _ISCONT;
715 cp = result->s = salloc(i +1);
717 for (ll = 0, np = head;;) {
718 /* Name part */
719 memcpy(cp, np->mpb_name, i = np->mpb_name_len);
720 cp += i;
721 ll += i;
723 if (f & _ISCONT) {
724 char *cpo = cp, *nop = nobuf + sizeof(nobuf);
725 ui32_t noi = np->mpb_level;
727 *--nop = '\0';
729 *--nop = "0123456789"[noi % 10];
730 while ((noi /= 10) != 0);
732 *cp++ = '*';
733 while (*nop != '\0')
734 *cp++ = *nop++;
736 ll += PTR2SIZE(cp - cpo);
739 if ((f & _ISENC) || np->mpb_is_enc) {
740 *cp++ = '*';
741 ++ll;
743 *cp++ = '=';
744 ++ll;
746 /* Value part */
747 if (f & _ISENC) {
748 f &= ~_ISENC;
749 memcpy(cp, np->mpb_charset, i = np->mpb_charset_len);
750 cp += i;
751 cp[0] = '\'';
752 cp[1] = '\'';
753 cp += 2;
754 ll += i + 2;
755 } else if (!np->mpb_is_enc) {
756 f |= _ISQUOTE;
757 *cp++ = '"';
758 ++ll;
761 memcpy(cp, np->mpb_buf, i = np->mpb_buf_len);
762 cp += i;
763 ll += i;
765 if (f & _ISQUOTE) {
766 f ^= _ISQUOTE;
767 *cp++ = '"';
768 ++ll;
771 if ((np = np->mpb_next) == NULL)
772 break;
773 *cp++ = ';';
774 ++ll;
776 i = ll;
777 i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
778 if (i >= MIME_LINELEN) {
779 head->mpb_rv = -TRU1;
780 *cp++ = '\n';
781 ll = 0;
784 *cp++ = ' ';
785 ++ll;
787 *cp = '\0';
788 result->l = PTR2SIZE(cp - result->s);
789 assert(result->l < len_max);
790 NYD2_LEAVE;
793 FL char *
794 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
796 struct str xval;
797 char *rv = NULL;
798 size_t plen;
799 char const *p;
800 NYD_ENTER;
802 plen = strlen(param);
803 p = headerbody;
805 /* At the beginning of headerbody there is no parameter=value pair xxx */
806 if (!whitechar(*p))
807 goto jskip1st;
809 for (;;) {
810 while (whitechar(*p))
811 ++p;
813 if (!ascncasecmp(p, param, plen)) {
814 p += plen;
815 while (whitechar(*p)) /* XXX? */
816 ++p;
817 switch (*p++) {
818 case '*':
819 rv = _rfc2231_param_parse(param, plen, p);
820 goto jleave;
821 case '=':
822 if (!_mime_param_value_trim(&xval, p, NULL)) {
823 /* XXX LOG? */
824 goto jleave;
826 rv = xval.s;
828 /* We do have a result, but some (elder) software (S-nail <v14.8)
829 * will use RFC 2047 encoded words in parameter values, too */
830 /* TODO Automatically check whether the value seems to be RFC 2047
831 * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
832 if ((p = strstr(rv, "=?")) != NULL && strstr(p, "?=") != NULL) {
833 struct str ti, to;
835 ti.l = strlen(ti.s = rv);
836 mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
837 rv = savestrbuf(to.s, to.l);
838 free(to.s);
840 goto jleave;
841 default:
842 /* Not our desired parameter, skip and continue */
843 break;
847 jskip1st:
848 if (*(p = _mime_param_skip(p)) == '\0')
849 goto jleave;
852 jleave:
853 NYD_LEAVE;
854 return rv;
857 FL si8_t
858 mime_param_create(struct str *result, char const *name, char const *value)
860 /* TODO All this needs rework when we have (1) a real string and even more
861 * TODO (2) use objects instead of stupid string concat; it's temporary
862 * TODO I.e., this function should return a HeaderBodyParam */
863 struct mime_param_builder top;
864 size_t i;
865 NYD_ENTER;
867 memset(result, 0, sizeof *result);
869 memset(&top, 0, sizeof top);
870 top.mpb_result = result;
871 if ((i = strlen(top.mpb_name = name)) > UI32_MAX)
872 goto jleave;
873 top.mpb_name_len = (ui32_t)i;
874 if ((i = strlen(top.mpb_value = value)) > UI32_MAX)
875 goto jleave;
876 top.mpb_value_len = (ui32_t)i;
877 if ((i = strlen(name = ok_vlook(ttycharset))) > UI32_MAX)
878 goto jleave;
879 top.mpb_charset = salloc((top.mpb_charset_len = (ui32_t)i) +1);
880 for (i = 0; *name != '\0'; ++i, ++name)
881 ((char*)n_UNCONST(top.mpb_charset))[i] = lowerconv(*name);
882 ((char*)n_UNCONST(top.mpb_charset))[i] = '\0';
883 if(top.mpb_charset_len >= 4 && !ascncasecmp(top.mpb_charset, "utf", 3) &&
884 ((top.mpb_charset[3] == '-' && top.mpb_charset[4] == '8' &&
885 top.mpb_charset_len == 5) || (top.mpb_charset[3] == '8' &&
886 top.mpb_charset_len == 4)))
887 top.mpb_is_utf8 = TRU1;
888 else
889 top.mpb_is_utf8 = FAL0;
891 _mime_param_create(&top);
892 jleave:
893 NYD_LEAVE;
894 return top.mpb_rv;
897 FL char *
898 mime_param_boundary_get(char const *headerbody, size_t *len)
900 char *q = NULL, *p;
901 NYD_ENTER;
903 if ((p = mime_param_get("boundary", headerbody)) != NULL) {
904 size_t sz = strlen(p);
906 if (len != NULL)
907 *len = sz + 2;
908 q = salloc(sz + 2 +1);
909 q[0] = q[1] = '-';
910 memcpy(q + 2, p, sz);
911 *(q + sz + 2) = '\0';
913 NYD_LEAVE;
914 return q;
917 FL char *
918 mime_param_boundary_create(void)
920 char *bp;
921 NYD_ENTER;
923 bp = salloc(36 + 6 +1);
924 bp[0] = bp[2] = bp[39] = bp[41] = '=';
925 bp[1] = bp[40] = '-';
926 memcpy(bp + 3, getrandstring(36), 36);
927 bp[42] = '\0';
928 NYD_LEAVE;
929 return bp;
932 /* s-it-mode */