`read': work more like shells..
[s-mailx.git] / mime_param.c
blob5bb1ca29afe15de29ee59694f68bffb550d3bd5e
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ MIME parameter handling.
4 * Copyright (c) 2016 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #undef n_FILE
19 #define n_FILE mime_param
21 #ifndef HAVE_AMALGAMATION
22 # include "nail.h"
23 #endif
25 struct rfc2231_joiner {
26 struct rfc2231_joiner *rj_next;
27 ui32_t rj_no; /* Continuation number */
28 ui32_t rj_len; /* of useful data in .rj_dat */
29 ui32_t rj_val_off; /* Start of value data therein */
30 ui32_t rj_cs_len; /* Length of charset part */
31 bool_t rj_is_enc; /* Is percent encoded */
32 ui8_t __pad[7];
33 char const *rj_dat;
36 struct mime_param_builder {
37 struct mime_param_builder *mpb_next;
38 struct str *mpb_result;
39 ui32_t mpb_level; /* of recursion (<-> continuation number) */
40 ui32_t mpb_name_len; /* of the parameter .mpb_name */
41 ui32_t mpb_value_len; /* of remaining value */
42 ui32_t mpb_charset_len; /* of .mpb_charset (only in outermost level) */
43 ui32_t mpb_buf_len; /* Usable result of this level in .mpb_buf */
44 bool_t mpb_is_enc; /* Level requires encoding */
45 ui8_t __dummy[1];
46 bool_t mpb_is_utf8; /* Encoding is UTF-8 */
47 si8_t mpb_rv;
48 char const *mpb_name;
49 char const *mpb_value; /* Remains of, once the level was entered */
50 char const *mpb_charset; /* *ttycharset* */
51 char *mpb_buf; /* Pointer to on-stack buffer */
54 /* All ASCII characters which cause RFC 2231 to be applied XXX check -1 slots*/
55 static bool_t const _rfc2231_etab[] = {
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* NUL..SI */
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* DLE..US */
58 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, /* CAN.. / */
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 0.. ? */
61 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @.. O */
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, /* P.. _ */
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* `.. o */
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p..DEL */
67 /* In a headerbody, at a "param=XY" that we're not interested in, skip over the
68 * entire construct, return pointer to the first byte thereafter or to NUL */
69 static char const * _mime_param_skip(char const *hbp);
71 /* Trim value, which points to after the "name[RFC 2231 stuff]=".
72 * On successful return (1,-1; -1 is returned if the value was quoted via
73 * double quotation marks) a set end_or_null points to after the value and any
74 * possible separator and result->s is the salloc()d normalized value */
75 static si8_t _mime_param_value_trim(struct str *result, char const *start,
76 char const **end_or_null);
78 /* mime_param_get() found the desired parameter but it seems to use RFC 2231
79 * extended syntax: perform full RFC 2231 parsing starting at this point.
80 * Note that _join() returns is-error */
81 static char * _rfc2231_param_parse(char const *param, size_t plen,
82 char const *hbp);
83 static bool_t __rfc2231_join(struct rfc2231_joiner *head, char **result,
84 char const **emsg);
86 /* Recursive parameter builder. Note we have a magic limit of 999 levels.
87 * Prepares a portion of output in self->mpb_buf;
88 * once >mpb_value is worked completely the deepmost level joins the result
89 * into >mpb_result and unrolls the stack. */
90 static void _mime_param_create(struct mime_param_builder *self);
91 static void __mime_param_join(struct mime_param_builder *head);
93 static char const *
94 _mime_param_skip(char const *hbp)
96 char co, cn;
97 NYD2_ENTER;
99 /* Skip over parameter name - note we may have skipped over an entire
100 * parameter name and thus point to a "="; i haven't yet truly checked
101 * against MIME RFCs, just test for ";" in the meanwhile XXX */
102 while ((cn = *hbp) != '\0' && cn != '=' && cn != ';')
103 ++hbp;
104 if (cn == '\0')
105 goto jleave;
106 ++hbp;
107 if (cn == ';')
108 goto jleave;
110 while (whitechar((cn = *hbp))) /* XXX */
111 ++hbp;
112 if (cn == '\0')
113 goto jleave;
115 if (cn == '"') {
116 co = '\0';
117 while ((cn = *++hbp) != '\0' && (cn != '"' || co == '\\'))
118 co = (co == '\\') ? '\0' : cn;
119 if (cn != '\0' && (cn = *++hbp) == ';')
120 ++hbp;
121 } else {
122 for (;; cn = *++hbp)
123 if (cn == '\0' || cn == ';' || whitechar(cn))
124 break;
125 if (cn != '\0')
126 ++hbp;
128 jleave:
129 NYD2_LEAVE;
130 return hbp;
133 static si8_t
134 _mime_param_value_trim(struct str *result, char const *start,
135 char const **end_or_null)
137 char const *e;
138 char co, cn;
139 size_t i;
140 si8_t rv;
141 NYD2_ENTER;
143 while (whitechar(*start)) /* XXX? */
144 ++start;
146 if (*start == '"') {
147 for (co = '\0', e = ++start;; ++e)
148 if ((cn = *e) == '\0')
149 goto jerr;
150 else if (cn == '"' && co != '\\')
151 break;
152 else if (cn == '\\' && co == '\\')
153 co = '\0';
154 else
155 co = cn;
156 i = PTR2SIZE(e++ - start);
157 rv = -TRU1;
158 } else {
159 for (e = start; (cn = *e) != '\0' && !whitechar(cn) && cn != ';'; ++e)
161 i = PTR2SIZE(e - start);
162 rv = TRU1;
165 result->s = salloc(i +1);
166 if (rv > 0) {
167 memcpy(result->s, start, result->l = i);
168 result->s[i] = '\0';
169 } else {
170 size_t j;
171 char *cp;
173 for (j = 0, cp = result->s, co = '\0'; i-- > 0; co = cn) {
174 cn = *start++;
175 if (cn != '\\' || co == '\\') {
176 cp[j++] = cn;
177 if (cn == '\\')
178 cn = '\0';
181 cp[j] = '\0';
183 result->s = cp;
184 result->l = j;
187 if (end_or_null != NULL) {
188 while (*e != '\0' && *e == ';')
189 ++e;
190 *end_or_null = e;
192 jleave:
193 NYD2_LEAVE;
194 return rv;
195 jerr:
196 rv = FAL0;
197 goto jleave;
200 static char *
201 _rfc2231_param_parse(char const *param, size_t plen, char const *hbp)
203 /* TODO Do it for real and unite with mime_param_get() */
204 struct str xval;
205 char nobuf[32], *eptr, *rv = NULL, c;
206 char const *hbp_base, *cp, *emsg = NULL;
207 struct rfc2231_joiner *head = NULL, *np;
208 bool_t errors = FAL0;
209 size_t i;
210 NYD2_ENTER;
212 /* We were called by mime_param_get() after a param name match that
213 * involved "*", so jump to the matching code */
214 hbp_base = hbp;
215 goto jumpin;
217 for (; *hbp != '\0'; hbp_base = hbp) {
218 while (whitechar(*hbp))
219 ++hbp;
221 if (!ascncasecmp(hbp, param, plen)) {
222 hbp += plen;
223 while (whitechar(*hbp))
224 ++hbp;
225 if (*hbp++ != '*')
226 goto jerr;
228 /* RFC 2231 extensions: "NAME[*DIGITS][*]=", where "*DIGITS" indicates
229 * parameter continuation and the lone asterisk "*" percent encoded
230 * values -- if encoding is used the "*0" or lone parameter value
231 * MUST be encoded and start with a "CHARSET'LANGUAGE'" construct,
232 * where both of CHARSET and LANGUAGE are optional (we do effectively
233 * generate error if CHARSET is missing though).
234 * Continuations may not use that "C'L'" construct, but be tolerant
235 * and ignore those. Also encoded and non-encoded continuations may
236 * occur, i.e., perform percent en-/decoding only as necessary.
237 * Continuations may occur in any order */
238 /* xxx RFC 2231 parsing ignores language tags */
239 jumpin:
240 for (cp = hbp; digitchar(*cp); ++cp)
242 i = PTR2SIZE(cp - hbp);
243 if (i != 0) {
244 if (i >= sizeof(nobuf)) {
245 emsg = N_("too many digits to form a valid number");
246 goto jerr;
247 } else if ((c = *cp) != '=' && c != '*') {
248 emsg = N_("expected = or * after leading digits");
249 goto jerr;
251 memcpy(nobuf, hbp, i);
252 nobuf[i] = '\0';
253 if((n_idec_uiz_cp(&i, nobuf, 10, NULL
254 ) & (n_IDEC_STATE_EMASK | n_IDEC_STATE_CONSUMED)
255 ) != n_IDEC_STATE_CONSUMED || i >= 999){
256 emsg = N_("invalid continuation sequence number");
257 goto jerr;
259 hbp = ++cp;
261 /* Value encoded? */
262 if (c == '*') {
263 if (*hbp++ != '=')
264 goto jeeqaaster;
265 } else if (c != '=') {
266 jeeqaaster:
267 emsg = N_("expected = after asterisk *");
268 goto jerr;
270 } else {
271 /* In continuation mode that is an error, however */
272 if (head != NULL) {
273 emsg = N_("missing continuation sequence number");
274 goto jerr;
276 /* Parameter value is encoded, may define encoding */
277 c = '*';
278 if (*cp != '=')
279 goto jeeqaaster;
280 hbp = ++cp;
281 i = 0;
284 /* Create new node and insert it sorted; should be faster than
285 * creating an unsorted list and sorting it after parsing */
286 np = smalloc(sizeof *np);
287 np->rj_next = NULL;
288 np->rj_no = (ui32_t)i;
289 np->rj_is_enc = (c == '*');
290 np->rj_val_off = np->rj_cs_len = 0;
292 if (head == NULL)
293 head = np;
294 else if (i < head->rj_no) {
295 np->rj_next = head;
296 head = np;
297 } else {
298 struct rfc2231_joiner *l = NULL, *x = head;
300 while (x != NULL && i > x->rj_no)
301 l = x, x = x->rj_next;
302 if (x != NULL)
303 np->rj_next = x;
304 assert(l != NULL);
305 l->rj_next = np;
308 switch (_mime_param_value_trim(&xval, hbp, &cp)) {
309 default:
310 emsg = (c == '*') ? N_("invalid value encoding")/* XXX fake */
311 : N_("faulty value - missing closing quotation mark \"?");
312 goto jerr;
313 case -1:
314 /* XXX if (np->is_enc && memchr(np->dat, '\'', i) != NULL) {
315 * XXX emsg = N_("character set info not allowed here");
316 * XXX goto jerr;
317 * XXX } */np->rj_is_enc = FAL0; /* Silently ignore */
318 /* FALLTHRU */
319 case 1:
320 if (xval.l >= UI32_MAX) {
321 emsg = N_("parameter value too long");
322 goto jerr;
324 np->rj_len = (ui32_t)xval.l;
325 np->rj_dat = xval.s;
326 break;
329 /* Watch out for character set and language info */
330 if (np->rj_is_enc && (eptr = memchr(xval.s, '\'', xval.l)) != NULL) {
331 np->rj_cs_len = PTR2SIZE(eptr - xval.s);
332 if ((eptr = memchr(eptr + 1, '\'', xval.l - np->rj_cs_len - 1))
333 == NULL) {
334 emsg = N_("faulty RFC 2231 parameter extension");
335 goto jerr;
337 np->rj_val_off = PTR2SIZE(++eptr - xval.s);
340 hbp = cp;
341 } else
342 hbp = _mime_param_skip(hbp);
344 assert(head != NULL); /* (always true due to jumpin:, but..) */
346 errors |= __rfc2231_join(head, &rv, &emsg);
347 if (errors && (n_poption & n_PO_D_V_VV)) {
348 /* TODO should set global flags so that at the end of an operation
349 * TODO (for a message) a summary can be printed: faulty MIME, xy */
350 if (emsg == NULL)
351 emsg = N_("multiple causes");
352 n_err(_("Message had MIME errors: %s\n"), V_(emsg));
354 jleave:
355 NYD2_LEAVE;
356 return rv;
358 jerr:
359 while ((np = head) != NULL) {
360 head = np->rj_next;
361 free(np);
363 if (n_poption & n_PO_D_V) {
364 if (emsg == NULL)
365 emsg = N_("expected asterisk *");
366 n_err(_("Faulty RFC 2231 MIME parameter value: %s: %s\n"
367 "Near: %s\n"), param, V_(emsg), hbp_base);
369 rv = NULL;
370 goto jleave;
373 static bool_t
374 __rfc2231_join(struct rfc2231_joiner *head, char **result, char const **emsg)
376 struct str sin, sou;
377 struct rfc2231_joiner *np;
378 char const *cp;
379 size_t i;
380 enum {
381 _NONE = 0,
382 _HAVE_ENC = 1<<0,
383 _HAVE_ICONV = 1<<1,
384 _SEEN_ANY = 1<<2,
385 _ERRORS = 1<<3
386 } f = _NONE;
387 ui32_t no;
388 #ifdef HAVE_ICONV
389 iconv_t fhicd;
390 #endif
391 NYD2_ENTER;
393 #ifdef HAVE_ICONV
394 n_UNINIT(fhicd, (iconv_t)-1);
396 if (head->rj_is_enc) {
397 char const *tcs;
399 f |= _HAVE_ENC;
400 if (head->rj_cs_len == 0) {
401 /* It is an error if the character set is not set, the language alone
402 * cannot convert characters, let aside that we don't use it at all */
403 *emsg = N_("MIME RFC 2231 invalidity: missing character set\n");
404 f |= _ERRORS;
405 } else if (ascncasecmp(tcs = ok_vlook(ttycharset),
406 head->rj_dat, head->rj_cs_len)) {
407 char *cs = ac_alloc(head->rj_cs_len +1);
409 memcpy(cs, head->rj_dat, head->rj_cs_len);
410 cs[head->rj_cs_len] = '\0';
411 if ((fhicd = n_iconv_open(tcs, cs)) != (iconv_t)-1)
412 f |= _HAVE_ICONV;
413 else {
414 *emsg = N_("necessary character set conversion missing");
415 f |= _ERRORS;
417 ac_free(cs);
420 #endif
422 if (head->rj_no != 0) {
423 if (!(f & _ERRORS))
424 *emsg = N_("First RFC 2231 parameter value chunk number is not 0");
425 f |= _ERRORS;
428 for (sou.s = NULL, sou.l = 0, no = 0; (np = head) != NULL; free(np)) {
429 head = np->rj_next;
431 if (np->rj_no != no++) {
432 if (!(f & _ERRORS))
433 *emsg = N_("RFC 2231 parameter value chunks are not contiguous");
434 f |= _ERRORS;
437 /* RFC 2231 allows such info only in the first continuation, and
438 * furthermore MUSTs the first to be encoded, then */
439 if (/*np->rj_is_enc &&*/ np->rj_val_off > 0 &&
440 (f & (_HAVE_ENC | _SEEN_ANY)) != _HAVE_ENC) {
441 if (!(f & _ERRORS))
442 *emsg = N_("invalid redundant RFC 2231 charset/language ignored");
443 f |= _ERRORS;
445 f |= _SEEN_ANY;
447 i = np->rj_len - np->rj_val_off;
448 if (!np->rj_is_enc)
449 n_str_add_buf(&sou, np->rj_dat + np->rj_val_off, i);
450 else {
451 /* Perform percent decoding */
452 sin.s = smalloc(i +1);
453 sin.l = 0;
455 for (cp = np->rj_dat + np->rj_val_off; i > 0;) {
456 char c;
458 if ((c = *cp++) == '%') {
459 si32_t cc;
461 if (i < 3 || (cc = n_c_from_hex_base16(cp)) < 0) {
462 if (!(f & _ERRORS))
463 *emsg = N_("invalid RFC 2231 percent encoded sequence");
464 f |= _ERRORS;
465 goto jhex_putc;
467 sin.s[sin.l++] = (char)cc;
468 cp += 2;
469 i -= 3;
470 } else {
471 jhex_putc:
472 sin.s[sin.l++] = c;
473 --i;
476 sin.s[sin.l] = '\0';
478 n_str_add_buf(&sou, sin.s, sin.l);
479 free(sin.s);
483 /* And add character set conversion on top as necessary.
484 * RFC 2231 is pragmatic: encode only mentions percent encoding and the
485 * character set for the entire string ("[no] facility for using more
486 * than one character set or language"), therefore "continuations may
487 * contain a mixture of encoded and unencoded segments" applies to
488 * a contiguous string of a single character set that has been torn in
489 * pieces due to space restrictions, and it happened that some pieces
490 * didn't need to be percent encoded.
492 * _In particular_ it therefore doesn't repeat the RFC 2047 paradigm
493 * that encoded-words-are-atomic, meaning that a single character-set
494 * conversion run over the final, joined, partially percent-decoded value
495 * should be sufficient */
496 #ifdef HAVE_ICONV
497 if (f & _HAVE_ICONV) {
498 sin.s = NULL;
499 sin.l = 0;
500 if (n_iconv_str(fhicd, n_ICONV_UNIDEFAULT, &sin, &sou, NULL) != 0) {
501 if (!(f & _ERRORS)) /* XXX won't be reported with _UNIDFEFAULT */
502 *emsg = N_("character set conversion failed on value");
503 f |= _ERRORS;
505 free(sou.s);
506 sou = sin;
508 n_iconv_close(fhicd);
510 #endif
512 memcpy(*result = salloc(sou.l +1), sou.s, sou.l +1);
513 free(sou.s);
514 NYD2_LEAVE;
515 return ((f & _ERRORS) != 0);
518 static void
519 _mime_param_create(struct mime_param_builder *self)
521 struct mime_param_builder next;
522 /* Don't use MIME_LINELEN_(MAX|LIMIT) stack buffer sizes: normally we won't
523 * exceed plain MIME_LINELEN, so that this would be a factor 10 wastage.
524 * On the other hand we may excess _LINELEN to avoid breaking up possible
525 * multibyte sequences until sizeof(buf) is reached, but since we (a) don't
526 * support stateful encodings and (b) will try to synchronize on UTF-8 this
527 * problem is scarce, possibly even artificial */
528 char buf[n_MIN(MIME_LINELEN_MAX >> 1, MIME_LINELEN * 2)],
529 *bp, *bp_max, *bp_xmax, *bp_lanoenc;
530 char const *vb, *vb_lanoenc;
531 size_t vl;
532 enum {
533 _NONE = 0,
534 _ISENC = 1<<0,
535 _HADRAW = 1<<1,
536 _RAW = 1<<2
537 } f = _NONE;
538 NYD2_ENTER;
539 n_LCTA(sizeof(buf) >= MIME_LINELEN * 2, "Buffer to small for operation");
541 jneed_enc:
542 self->mpb_buf = bp = bp_lanoenc = buf;
543 self->mpb_buf_len = 0;
544 self->mpb_is_enc = ((f & _ISENC) != 0);
545 vb_lanoenc = vb = self->mpb_value;
546 vl = self->mpb_value_len;
548 /* Configure bp_max to fit in SHOULD, bp_xmax to extent */
549 bp_max = (buf + MIME_LINELEN) -
550 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
551 bp_xmax = (buf + sizeof(buf)) -
552 (1 + self->mpb_name_len + sizeof("*999*='';") -1 + 2);
553 if ((f & _ISENC) && self->mpb_level == 0) {
554 bp_max -= self->mpb_charset_len;
555 bp_xmax -= self->mpb_charset_len;
557 if (PTRCMP(bp_max, <=, buf + sizeof("Hunky Dory"))) {
558 DBG( n_alert("_mime_param_create(): Hunky Dory!"); )
559 bp_max = buf + (MIME_LINELEN >> 1); /* And then it is SHOULD, anyway */
561 assert(PTRCMP(bp_max + (4 * 3), <=, bp_xmax)); /* UTF-8 extra pad, below */
563 f &= _ISENC;
564 while (vl > 0) {
565 union {char c; ui8_t uc;} u; u.c = *vb;
567 f |= _RAW;
568 if (!(f & _ISENC)) {
569 if (u.uc > 0x7F || cntrlchar(u.c)) { /* XXX reject cntrlchar? */
570 /* We need to percent encode this character, possibly changing
571 * overall strategy, but anyway the one of this level, possibly
572 * rendering invalid any output byte we yet have produced here.
573 * Instead of throwing away that work just recurse if some fancy
574 * magic condition is true */
575 /* *However*, many tested MUAs fail to deal with parameters that
576 * are splitted across "too many" fields, including ones that
577 * misread RFC 2231 to allow only one digit, i.e., a maximum of
578 * ten. This is plain wrong, but that won't help their users */
579 if (PTR2SIZE(bp - buf) > /*10 (strawberry) COMPAT*/MIME_LINELEN>>1)
580 goto jrecurse;
581 f |= _ISENC;
582 goto jneed_enc;
585 if (u.uc == '"' || u.uc == '\\') {
586 f ^= _RAW;
587 bp[0] = '\\';
588 bp[1] = u.c;
589 bp += 2;
591 } else if (u.uc > 0x7F || _rfc2231_etab[u.uc]) {
592 f ^= _RAW;
593 bp[0] = '%';
594 n_c_to_hex_base16(bp + 1, u.c);
595 bp += 3;
598 ++vb;
599 --vl;
600 if (f & _RAW) {
601 f |= _HADRAW;
602 vb_lanoenc = vb;
603 *bp++ = u.c;
604 bp_lanoenc = bp;
607 /* If all available space has been consumed we must split.
608 * Due to compatibility reasons we must take care not to break up
609 * multibyte sequences -- even though RFC 2231 rather implies that the
610 * splitted value should be joined (after percent encoded fields have
611 * been percent decoded) and the resulting string be treated in the
612 * specified character set / language, MUAs have been seen which apply
613 * the RFC 2047 encoded-words-are-atomic even to RFC 2231 values, even
614 * if stateful encodings cannot truly be supported like that?!..
616 * So split at 7-bit character if we have seen any and the wastage isn't
617 * too large; recall that we need to keep the overall number of P=V
618 * values as low as possible due to compatibility reasons.
619 * If we haven't seen any plain bytes be laxe and realize that bp_max
620 * reflects SHOULD lines, and try to extend this as long as possible.
621 * However, with UTF-8, try to backward synchronize on sequence start */
622 if (bp <= bp_max)
623 continue;
625 if ((f & _HADRAW) && (PTRCMP(bp - bp_lanoenc, <=, bp_lanoenc - buf) ||
626 (!self->mpb_is_utf8 &&
627 PTR2SIZE(bp_lanoenc - buf) >= (MIME_LINELEN >> 2)))) {
628 bp = bp_lanoenc;
629 vl += PTR2SIZE(vb - vb_lanoenc);
630 vb = vb_lanoenc;
631 goto jrecurse;
634 if (self->mpb_is_utf8 && ((ui8_t)(vb[-1]) & 0xC0) != 0x80) {
635 bp -= 3;
636 --vb;
637 ++vl;
638 goto jrecurse;
641 if (bp <= bp_xmax)
642 continue;
643 /* (Shit.) */
644 goto jrecurse;
647 /* That level made the great and completed encoding. Build result */
648 self->mpb_is_enc = ((f & _ISENC) != 0);
649 self->mpb_buf_len = PTR2SIZE(bp - buf);
650 __mime_param_join(self);
651 jleave:
652 NYD2_LEAVE;
653 return;
655 /* Need to recurse, take care not to excess magical limit of 999 levels */
656 jrecurse:
657 if (self->mpb_level == 999) {
658 if (n_poption & n_PO_D_V_VV)
659 n_err(_("Message RFC 2231 parameters nested too deeply!\n"));
660 goto jleave;
663 self->mpb_is_enc = ((f & _ISENC) != 0);
664 self->mpb_buf_len = PTR2SIZE(bp - buf);
666 memset(&next, 0, sizeof next);
667 next.mpb_next = self;
668 next.mpb_level = self->mpb_level + 1;
669 next.mpb_name_len = self->mpb_name_len;
670 next.mpb_value_len = vl;
671 next.mpb_is_utf8 = self->mpb_is_utf8;
672 next.mpb_name = self->mpb_name;
673 next.mpb_value = vb;
674 _mime_param_create(&next);
675 goto jleave;
678 static void
679 __mime_param_join(struct mime_param_builder *head)
681 char nobuf[16];
682 struct mime_param_builder *np;
683 size_t i, ll; DBG( size_t len_max; )
684 struct str *result;
685 char *cp;
686 enum {
687 _NONE = 0,
688 _ISENC = 1<<0,
689 _ISQUOTE = 1<<1,
690 _ISCONT = 1<<2
691 } f = _NONE;
692 NYD2_ENTER;
694 /* Traverse the stack upwards to find out result length (worst case).
695 * Reverse the list while doing so */
696 for (i = 0, np = head, head = NULL; np != NULL;) {
697 struct mime_param_builder *tmp;
699 i += np->mpb_buf_len + np->mpb_name_len + sizeof(" *999*=\"\";\n") -1;
700 if (np->mpb_is_enc)
701 f |= _ISENC;
703 tmp = np->mpb_next;
704 np->mpb_next = head;
705 head = np;
706 np = tmp;
708 if (f & _ISENC)
709 i += head->mpb_charset_len; /* sizeof("''") -1 covered by \"\" above */
710 DBG( len_max = i; )
711 head->mpb_rv = TRU1;
713 result = head->mpb_result;
714 if (head->mpb_next != NULL)
715 f |= _ISCONT;
716 cp = result->s = salloc(i +1);
718 for (ll = 0, np = head;;) {
719 /* Name part */
720 memcpy(cp, np->mpb_name, i = np->mpb_name_len);
721 cp += i;
722 ll += i;
724 if (f & _ISCONT) {
725 char *cpo = cp, *nop = nobuf + sizeof(nobuf);
726 ui32_t noi = np->mpb_level;
728 *--nop = '\0';
730 *--nop = "0123456789"[noi % 10];
731 while ((noi /= 10) != 0);
733 *cp++ = '*';
734 while (*nop != '\0')
735 *cp++ = *nop++;
737 ll += PTR2SIZE(cp - cpo);
740 if ((f & _ISENC) || np->mpb_is_enc) {
741 *cp++ = '*';
742 ++ll;
744 *cp++ = '=';
745 ++ll;
747 /* Value part */
748 if (f & _ISENC) {
749 f &= ~_ISENC;
750 memcpy(cp, np->mpb_charset, i = np->mpb_charset_len);
751 cp += i;
752 cp[0] = '\'';
753 cp[1] = '\'';
754 cp += 2;
755 ll += i + 2;
756 } else if (!np->mpb_is_enc) {
757 f |= _ISQUOTE;
758 *cp++ = '"';
759 ++ll;
762 memcpy(cp, np->mpb_buf, i = np->mpb_buf_len);
763 cp += i;
764 ll += i;
766 if (f & _ISQUOTE) {
767 f ^= _ISQUOTE;
768 *cp++ = '"';
769 ++ll;
772 if ((np = np->mpb_next) == NULL)
773 break;
774 *cp++ = ';';
775 ++ll;
777 i = ll;
778 i += np->mpb_name_len + np->mpb_buf_len + sizeof(" *999*=\"\";\n") -1;
779 if (i >= MIME_LINELEN) {
780 head->mpb_rv = -TRU1;
781 *cp++ = '\n';
782 ll = 0;
785 *cp++ = ' ';
786 ++ll;
788 *cp = '\0';
789 result->l = PTR2SIZE(cp - result->s);
790 assert(result->l < len_max);
791 NYD2_LEAVE;
794 FL char *
795 mime_param_get(char const *param, char const *headerbody) /* TODO rewr. */
797 struct str xval;
798 char *rv = NULL;
799 size_t plen;
800 char const *p;
801 NYD_ENTER;
803 plen = strlen(param);
804 p = headerbody;
806 /* At the beginning of headerbody there is no parameter=value pair xxx */
807 if (!whitechar(*p))
808 goto jskip1st;
810 for (;;) {
811 while (whitechar(*p))
812 ++p;
814 if (!ascncasecmp(p, param, plen)) {
815 p += plen;
816 while (whitechar(*p)) /* XXX? */
817 ++p;
818 switch (*p++) {
819 case '*':
820 rv = _rfc2231_param_parse(param, plen, p);
821 goto jleave;
822 case '=':
823 if (!_mime_param_value_trim(&xval, p, NULL)) {
824 /* XXX LOG? */
825 goto jleave;
827 rv = xval.s;
829 /* We do have a result, but some (elder) software (S-nail <v14.8)
830 * will use RFC 2047 encoded words in parameter values, too */
831 /* TODO Automatically check whether the value seems to be RFC 2047
832 * TODO encwd. -- instead use *rfc2047_parameters* like mutt(1)? */
833 if ((p = strstr(rv, "=?")) != NULL && strstr(p, "?=") != NULL) {
834 struct str ti, to;
836 ti.l = strlen(ti.s = rv);
837 mime_fromhdr(&ti, &to, TD_ISPR | TD_ICONV | TD_DELCTRL);
838 rv = savestrbuf(to.s, to.l);
839 free(to.s);
841 goto jleave;
842 default:
843 /* Not our desired parameter, skip and continue */
844 break;
848 jskip1st:
849 if (*(p = _mime_param_skip(p)) == '\0')
850 goto jleave;
853 jleave:
854 NYD_LEAVE;
855 return rv;
858 FL si8_t
859 mime_param_create(struct str *result, char const *name, char const *value)
861 /* TODO All this needs rework when we have (1) a real string and even more
862 * TODO (2) use objects instead of stupid string concat; it's temporary
863 * TODO I.e., this function should return a HeaderBodyParam */
864 struct mime_param_builder top;
865 size_t i;
866 NYD_ENTER;
868 memset(result, 0, sizeof *result);
870 memset(&top, 0, sizeof top);
871 top.mpb_result = result;
872 if ((i = strlen(top.mpb_name = name)) > UI32_MAX)
873 goto jleave;
874 top.mpb_name_len = (ui32_t)i;
875 if ((i = strlen(top.mpb_value = value)) > UI32_MAX)
876 goto jleave;
877 top.mpb_value_len = (ui32_t)i;
878 if ((i = strlen(name = ok_vlook(ttycharset))) > UI32_MAX)
879 goto jleave;
880 top.mpb_charset = salloc((top.mpb_charset_len = (ui32_t)i) +1);
881 for (i = 0; *name != '\0'; ++i, ++name)
882 ((char*)n_UNCONST(top.mpb_charset))[i] = lowerconv(*name);
883 ((char*)n_UNCONST(top.mpb_charset))[i] = '\0';
884 if(top.mpb_charset_len >= 4 && !ascncasecmp(top.mpb_charset, "utf", 3) &&
885 ((top.mpb_charset[3] == '-' && top.mpb_charset[4] == '8' &&
886 top.mpb_charset_len == 5) || (top.mpb_charset[3] == '8' &&
887 top.mpb_charset_len == 4)))
888 top.mpb_is_utf8 = TRU1;
889 else
890 top.mpb_is_utf8 = FAL0;
892 _mime_param_create(&top);
893 jleave:
894 NYD_LEAVE;
895 return top.mpb_rv;
898 FL char *
899 mime_param_boundary_get(char const *headerbody, size_t *len)
901 char *q = NULL, *p;
902 NYD_ENTER;
904 if ((p = mime_param_get("boundary", headerbody)) != NULL) {
905 size_t sz = strlen(p);
907 if (len != NULL)
908 *len = sz + 2;
909 q = salloc(sz + 2 +1);
910 q[0] = q[1] = '-';
911 memcpy(q + 2, p, sz);
912 *(q + sz + 2) = '\0';
914 NYD_LEAVE;
915 return q;
918 FL char *
919 mime_param_boundary_create(void)
921 char *bp;
922 NYD_ENTER;
924 bp = salloc(36 + 6 +1);
925 bp[0] = bp[2] = bp[39] = bp[41] = '=';
926 bp[1] = bp[40] = '-';
927 memcpy(bp + 3, getrandstring(36), 36);
928 bp[42] = '\0';
929 NYD_LEAVE;
930 return bp;
933 /* s-it-mode */