mime.c:mime_write_tohdr(): complete rewrite (Peter Hofmann)..
[s-mailx.git] / filter.c
blob8960cfb347197b367b6df9a5cbcbbdc2b9eea820
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Filter objects.
4 * Copyright (c) 2013 - 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #ifndef HAVE_AMALGAMATION
20 # include "nail.h"
21 #endif
24 * Quotation filter
28 * TODO quotation filter: anticipate in future data: don't break if only WS
29 * TODO or a LF escaping \ follows on the line (simply reuse the latter).
32 #ifdef HAVE_QUOTE_FOLD
33 CTA(QUOTE_MAX > 3);
35 enum qf_state {
36 _QF_CLEAN,
37 _QF_PREFIX,
38 _QF_DATA
41 struct qf_vc {
42 struct quoteflt *self;
43 char const *buf;
44 size_t len;
47 /* Print out prefix and current quote */
48 static ssize_t _qf_dump_prefix(struct quoteflt *self);
50 /* Add one data character */
51 static ssize_t _qf_add_data(struct quoteflt *self, wchar_t wc);
53 /* State machine handlers */
54 static ssize_t _qf_state_prefix(struct qf_vc *vc);
55 static ssize_t _qf_state_data(struct qf_vc *vc);
57 static ssize_t
58 _qf_dump_prefix(struct quoteflt *self)
60 ssize_t rv;
61 size_t i;
62 NYD_ENTER;
64 if ((i = self->qf_pfix_len) > 0 && i != fwrite(self->qf_pfix, 1, i,
65 self->qf_os))
66 goto jerr;
67 rv = i;
69 if ((i = self->qf_currq.l) > 0 && i != fwrite(self->qf_currq.s, 1, i,
70 self->qf_os))
71 goto jerr;
72 rv += i;
73 jleave:
74 NYD_LEAVE;
75 return rv;
76 jerr:
77 rv = -1;
78 goto jleave;
81 static ssize_t
82 _qf_add_data(struct quoteflt *self, wchar_t wc)
84 char *save_b;
85 ui32_t save_l, save_w;
86 ssize_t rv = 0;
87 int w, l;
88 NYD_ENTER;
90 save_l = save_w = 0; /* silence cc */
91 save_b = NULL;
92 /* <newline> ends state */
93 if (wc == L'\n')
94 goto jflush;
95 if (wc == L'\r') /* TODO CR should be stripped in lower level!! */
96 goto jleave;
98 /* Unroll <tab> to spaces */
99 if (wc == L'\t') {
100 save_l = self->qf_datw;
101 save_w = (save_l + QUOTE_TAB_SPACES) & ~(QUOTE_TAB_SPACES - 1);
102 save_w -= save_l;
103 while (save_w-- > 0) {
104 ssize_t j = _qf_add_data(self, L' ');
105 if (j < 0) {
106 rv = j;
107 break;
109 rv += j;
111 goto jleave;
114 w = wcwidth(wc);
115 if (w == -1) {
116 jbad:
117 ++self->qf_datw;
118 self->qf_dat.s[self->qf_dat.l++] = '?';
119 } else {
120 l = wctomb(self->qf_dat.s + self->qf_dat.l, wc);
121 if (l < 0)
122 goto jbad;
123 self->qf_datw += (ui32_t)w;
124 self->qf_dat.l += (size_t)l;
127 /* TODO The last visual may excess (adjusted!) *qfold-max* if it's a wide;
128 * TODO place it on the next line, break before */
129 if (self->qf_datw >= self->qf_qfold_max) {
130 /* If we have seen a nice breakpoint during traversal, shuffle data
131 * around a bit so as to restore the trailing part after flushing */
132 if (self->qf_brkl > 0) {
133 save_w = self->qf_datw - self->qf_brkw;
134 save_l = self->qf_dat.l - self->qf_brkl;
135 save_b = self->qf_dat.s + self->qf_brkl + 2;
136 memmove(save_b, save_b - 2, save_l);
137 self->qf_dat.l = self->qf_brkl;
140 self->qf_dat.s[self->qf_dat.l++] = '\\';
141 jflush:
142 self->qf_dat.s[self->qf_dat.l++] = '\n';
143 rv = quoteflt_flush(self);
145 /* Restore takeovers, if any */
146 if (save_b != NULL) {
147 self->qf_brk_isws = FAL0;
148 self->qf_datw += save_w;
149 self->qf_dat.l = save_l;
150 memmove(self->qf_dat.s, save_b, save_l);
152 } else if (self->qf_datw >= self->qf_qfold_min && !self->qf_brk_isws) {
153 bool_t isws = iswspace(wc);
155 if ((isws && !self->qf_brk_isws) || self->qf_brkl == 0) {
156 self->qf_brkl = self->qf_dat.l;
157 self->qf_brkw = self->qf_datw;
158 self->qf_brk_isws = isws;
162 /* If state changed to prefix, perform full reset (note this implies that
163 * quoteflt_flush() performs too much work..) */
164 if (wc == '\n') {
165 self->qf_state = _QF_PREFIX;
166 self->qf_wscnt = self->qf_datw = 0;
167 self->qf_currq.l = 0;
169 jleave:
170 NYD_LEAVE;
171 return rv;
174 static ssize_t
175 _qf_state_prefix(struct qf_vc *vc)
177 struct quoteflt *self;
178 ssize_t rv;
179 char const *buf;
180 size_t len, i;
181 wchar_t wc;
182 NYD_ENTER;
184 self = vc->self;
185 rv = 0;
187 for (buf = vc->buf, len = vc->len; len > 0;) {
188 /* xxx NULL BYTE! */
189 i = mbrtowc(&wc, buf, len, self->qf_mbps);
190 if (i == (size_t)-1) {
191 /* On hard error, don't modify mbstate_t and step one byte */
192 self->qf_mbps[0] = self->qf_mbps[1];
193 ++buf;
194 --len;
195 self->qf_wscnt = 0;
196 continue;
198 self->qf_mbps[1] = self->qf_mbps[0];
199 if (i == (size_t)-2) {
200 /* Redundant shift sequence, out of buffer */
201 len = 0;
202 break;
204 buf += i;
205 len -= i;
207 if (wc == L'\n')
208 goto jfin;
209 if (iswspace(wc)) {
210 ++self->qf_wscnt;
211 continue;
213 if (i == 1 && ISQUOTE(wc)) {
214 self->qf_wscnt = 0;
215 if (self->qf_currq.l >= QUOTE_MAX - 3) {
216 self->qf_currq.s[QUOTE_MAX - 3] = '.';
217 self->qf_currq.s[QUOTE_MAX - 2] = '.';
218 self->qf_currq.s[QUOTE_MAX - 1] = '.';
219 self->qf_currq.l = QUOTE_MAX;
220 } else
221 self->qf_currq.s[self->qf_currq.l++] = buf[-1];
222 continue;
225 /* The quote is parsed and compressed; dump it */
226 jfin:
227 self->qf_state = _QF_DATA;
228 /* Overtake WS to the current quote in order to preserve it for eventual
229 * necessary follow lines, too */
230 /* TODO we de-facto "normalize" to ASCII SP here which MESSES tabs!! */
231 while (self->qf_wscnt-- > 0 && self->qf_currq.l < QUOTE_MAX)
232 self->qf_currq.s[self->qf_currq.l++] = ' ';
233 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
234 self->qf_wscnt = 0;
235 rv = _qf_add_data(self, wc);
236 break;
239 vc->buf = buf;
240 vc->len = len;
241 NYD_LEAVE;
242 return rv;
245 static ssize_t
246 _qf_state_data(struct qf_vc *vc)
248 struct quoteflt *self;
249 ssize_t rv;
250 char const *buf;
251 size_t len, i;
252 wchar_t wc;
253 NYD_ENTER;
255 self = vc->self;
256 rv = 0;
258 for (buf = vc->buf, len = vc->len; len > 0;) {
259 /* xxx NULL BYTE! */
260 i = mbrtowc(&wc, buf, len, self->qf_mbps);
261 if (i == (size_t)-1) {
262 /* On hard error, don't modify mbstate_t and step one byte */
263 self->qf_mbps[0] = self->qf_mbps[1];
264 ++buf;
265 --len;
266 continue;
268 self->qf_mbps[1] = self->qf_mbps[0];
269 if (i == (size_t)-2) {
270 /* Redundant shift sequence, out of buffer */
271 len = 0;
272 break;
274 buf += i;
275 len -= i;
277 { ssize_t j = _qf_add_data(self, wc);
278 if (j < 0) {
279 rv = j;
280 break;
282 rv += j;
285 if (self->qf_state != _QF_DATA)
286 break;
289 vc->buf = buf;
290 vc->len = len;
291 NYD_LEAVE;
292 return rv;
294 #endif /* HAVE_QUOTE_FOLD */
296 FL struct quoteflt *
297 quoteflt_dummy(void) /* TODO LEGACY (until filters are plugged when needed) */
299 static struct quoteflt qf_i;
301 return &qf_i;
304 FL void
305 quoteflt_init(struct quoteflt *self, char const *prefix)
307 #ifdef HAVE_QUOTE_FOLD
308 char *xcp, *cp;
309 #endif
310 NYD_ENTER;
312 memset(self, 0, sizeof *self);
314 if ((self->qf_pfix = prefix) != NULL)
315 self->qf_pfix_len = (ui32_t)strlen(prefix);
317 /* Check wether the user wants the more fancy quoting algorithm */
318 /* TODO *quote-fold*: QUOTE_MAX may excess it! */
319 #ifdef HAVE_QUOTE_FOLD
320 if (self->qf_pfix_len > 0 && (cp = ok_vlook(quote_fold)) != NULL) {
321 ui32_t qmin, qmax = (ui32_t)strtol(cp, &xcp, 10);
322 /* These magic values ensure we don't bail :) */
323 if (qmax < self->qf_pfix_len + 6)
324 qmax = self->qf_pfix_len + 6;
325 --qmax; /* The newline escape */
326 if (cp == xcp || *xcp == '\0')
327 qmin = (qmax >> 1) + (qmax >> 2) + (qmax >> 5);
328 else {
329 qmin = (ui32_t)strtol(xcp + 1, NULL, 10);
330 if (qmin < qmax >> 1)
331 qmin = qmax >> 1;
332 else if (qmin > qmax - 2)
333 qmin = qmax - 2;
335 self->qf_qfold_min = qmin;
336 self->qf_qfold_max = qmax;
338 /* Add pad for takeover copies, backslash and newline */
339 self->qf_dat.s = salloc((qmax + 3) * mb_cur_max);
340 self->qf_currq.s = salloc((QUOTE_MAX + 1) * mb_cur_max);
342 #endif
343 NYD_LEAVE;
346 FL void
347 quoteflt_destroy(struct quoteflt *self) /* xxx inline */
349 NYD_ENTER;
350 UNUSED(self);
351 NYD_LEAVE;
354 FL void
355 quoteflt_reset(struct quoteflt *self, FILE *f) /* xxx inline */
357 NYD_ENTER;
358 self->qf_os = f;
359 #ifdef HAVE_QUOTE_FOLD
360 self->qf_state = _QF_CLEAN;
361 self->qf_dat.l =
362 self->qf_currq.l = 0;
363 memset(self->qf_mbps, 0, sizeof self->qf_mbps);
364 #endif
365 NYD_LEAVE;
368 FL ssize_t
369 quoteflt_push(struct quoteflt *self, char const *dat, size_t len)
371 /* (xxx Ideally the actual push() [and flush()] would be functions on their
372 * xxx own, via indirect vtbl call ..) */
373 ssize_t rv = 0;
374 NYD_ENTER;
376 if (len == 0)
377 goto jleave;
379 /* Bypass? XXX Finally, this filter simply should not be used, then */
380 if (self->qf_pfix_len == 0) {
381 if (len != fwrite(dat, 1, len, self->qf_os))
382 goto jerr;
383 rv = len;
385 /* Normal: place *indentprefix* at every BOL */
386 else
387 #ifdef HAVE_QUOTE_FOLD
388 if (self->qf_qfold_max == 0)
389 #endif
391 void *vp;
392 size_t ll;
393 bool_t pxok = (self->qf_qfold_min != 0);
395 for (;;) {
396 if (!pxok) {
397 ll = self->qf_pfix_len;
398 if (ll != fwrite(self->qf_pfix, 1, ll, self->qf_os))
399 goto jerr;
400 rv += ll;
401 pxok = TRU1;
404 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
405 * xxx mandated by POSIX.1-2008 as "invariant across all locales
406 * xxx supported"; though there is no charset known which uses this
407 * xxx control char as part of a multibyte character; note that S-nail
408 * XXX (and the Mail codebase as such) do not support EBCDIC */
409 if ((vp = memchr(dat, '\n', len)) == NULL)
410 ll = len;
411 else {
412 pxok = FAL0;
413 ll = PTR2SIZE((char*)vp - dat) + 1;
416 if (ll != fwrite(dat, sizeof *dat, ll, self->qf_os))
417 goto jerr;
418 rv += ll;
419 if ((len -= ll) == 0)
420 break;
421 dat += ll;
424 self->qf_qfold_min = pxok;
426 /* Overly complicated, though still only line-per-line: *quote-fold*.
427 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
428 * TODO note this means we assume that lines start with reset escape seq,
429 * TODO but i don't think this is any worse than what we currently do;
430 * TODO in 15.0, with the value carrier, we should carry conversion states
431 * TODO all along, only resetting on error (or at words for header =???=);
432 * TODO this still is weird for error handling, but we need to act more
433 * TODO stream-alike (though in practice i don't think cross-line states
434 * TODO can be found, because of compatibility reasons; however, being
435 * TODO a problem rather than a solution is not a good thing (tm))
436 * - Lookout for a newline */
437 #ifdef HAVE_QUOTE_FOLD
438 else {
439 struct qf_vc vc;
440 ssize_t i;
442 vc.self = self;
443 vc.buf = dat;
444 vc.len = len;
445 while (vc.len > 0) {
446 switch (self->qf_state) {
447 case _QF_CLEAN:
448 case _QF_PREFIX:
449 i = _qf_state_prefix(&vc);
450 break;
451 default: /* silence cc (`i' unused) */
452 case _QF_DATA:
453 i = _qf_state_data(&vc);
454 break;
456 if (i < 0)
457 goto jerr;
458 rv += i;
461 #endif /* HAVE_QUOTE_FOLD */
463 jleave:
464 NYD_LEAVE;
465 return rv;
466 jerr:
467 rv = -1;
468 goto jleave;
471 FL ssize_t
472 quoteflt_flush(struct quoteflt *self)
474 ssize_t rv = 0;
475 NYD_ENTER;
476 UNUSED(self);
478 #ifdef HAVE_QUOTE_FOLD
479 if (self->qf_dat.l > 0) {
480 rv = _qf_dump_prefix(self);
481 if (rv >= 0) {
482 size_t i = self->qf_dat.l;
483 if (i == fwrite(self->qf_dat.s, 1, i, self->qf_os))
484 rv += i;
485 else
486 rv = -1;
487 self->qf_dat.l = 0;
488 self->qf_brk_isws = FAL0;
489 self->qf_wscnt = self->qf_brkl = self->qf_brkw = 0;
490 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
493 #endif
494 NYD_LEAVE;
495 return rv;
498 /* s-it-mode */