Merge branch 'topic/srelax'
[s-mailx.git] / filter.c
blob5110c82020b96c79b10feaafb065232e4d3c47bb
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Filter objects.
4 * Copyright (c) 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #include "nail.h"
22 * Quotation filter
26 * TODO quotation filter: anticipate in future data: don't break if only WS
27 * TODO or a LF escaping \ follows on the line (simply reuse the latter).
30 #ifdef HAVE_QUOTE_FOLD
31 CTA(QUOTE_MAX > 3);
33 enum qf_state {
34 _QF_CLEAN,
35 _QF_PREFIX,
36 _QF_DATA
39 struct qf_vc {
40 struct quoteflt *self;
41 char const *buf;
42 size_t len;
45 /* Print out prefix and current quote */
46 static ssize_t _qf_dump_prefix(struct quoteflt *self);
48 /* Add one data character */
49 static ssize_t _qf_add_data(struct quoteflt *self, wchar_t wc);
51 /* State machine handlers */
52 static ssize_t _qf_state_prefix(struct qf_vc *vc);
53 static ssize_t _qf_state_data(struct qf_vc *vc);
55 static ssize_t
56 _qf_dump_prefix(struct quoteflt *self)
58 ssize_t rv;
59 size_t i;
61 if ((i = self->qf_pfix_len) > 0 && i != fwrite(self->qf_pfix, 1, i,
62 self->qf_os))
63 goto jerr;
64 rv = i;
66 if ((i = self->qf_currq.l) > 0 && i != fwrite(self->qf_currq.s, 1, i,
67 self->qf_os))
68 goto jerr;
69 rv += i;
70 jleave:
71 return rv;
72 jerr:
73 rv = -1;
74 goto jleave;
77 static ssize_t
78 _qf_add_data(struct quoteflt *self, wchar_t wc)
80 char *save_b;
81 ui_it save_l, save_w;
82 ssize_t rv = 0;
83 int w, l;
85 save_l = save_w = 0; /* silence cc */
86 save_b = NULL;
87 /* <newline> ends state */
88 if (wc == L'\n')
89 goto jflush;
90 if (wc == L'\r') /* TODO CR should be stripped in lower level!! */
91 goto jleave;
93 /* Unroll <tab> to spaces */
94 if (wc == L'\t') {
95 save_l = self->qf_datw;
96 save_w = (save_l + QUOTE_TAB_SPACES) & ~(QUOTE_TAB_SPACES - 1);
97 save_w -= save_l;
98 while (save_w-- > 0) {
99 ssize_t j = _qf_add_data(self, L' ');
100 if (j < 0) {
101 rv = j;
102 break;
104 rv += j;
106 goto jleave;
109 w = wcwidth(wc);
110 if (w == -1) {
111 jbad:
112 ++self->qf_datw;
113 self->qf_dat.s[self->qf_dat.l++] = '?';
114 } else {
115 l = wctomb(self->qf_dat.s + self->qf_dat.l, wc);
116 if (l < 0)
117 goto jbad;
118 self->qf_datw += (ui_it)w;
119 self->qf_dat.l += (size_t)l;
122 /* TODO The last visual may excess *qfold-max* if it's a wide one;
123 * TODO place it on the next line, break before */
124 if (self->qf_datw >= self->qf_qfold_max) {
125 /* If we have seen a nice breakpoint during traversal, shuffle data
126 * around a bit so as to restore the trailing part after flushing */
127 if (self->qf_brkl > 0) {
128 save_w = self->qf_datw - self->qf_brkw;
129 save_l = self->qf_dat.l - self->qf_brkl;
130 save_b = self->qf_dat.s + self->qf_brkl + 2;
131 memmove(save_b, save_b - 2, save_l);
132 self->qf_dat.l = self->qf_brkl;
135 self->qf_dat.s[self->qf_dat.l++] = '\\';
136 jflush:
137 self->qf_dat.s[self->qf_dat.l++] = '\n';
138 rv = quoteflt_flush(self);
140 /* Restore takeovers, if any */
141 if (save_b != NULL) {
142 self->qf_brk_isws = FAL0;
143 self->qf_datw += save_w;
144 self->qf_dat.l = save_l;
145 memmove(self->qf_dat.s, save_b, save_l);
148 } else if (self->qf_datw >= self->qf_qfold_min && ! self->qf_brk_isws) {
149 bool_t isws = iswspace(wc);
151 if ((isws && ! self->qf_brk_isws) || self->qf_brkl == 0) {
152 self->qf_brkl = self->qf_dat.l;
153 self->qf_brkw = self->qf_datw;
154 self->qf_brk_isws = isws;
158 /* If state changed to prefix, perform full reset (note this implies that
159 * quoteflt_flush() performs too much work..) */
160 if (wc == '\n') {
161 self->qf_state = _QF_PREFIX;
162 self->qf_wscnt = self->qf_datw = 0;
163 self->qf_currq.l = 0;
165 jleave:
166 return rv;
169 static ssize_t
170 _qf_state_prefix(struct qf_vc *vc)
172 struct quoteflt *self = vc->self;
173 ssize_t rv = 0;
174 char const *buf;
175 size_t len, i;
176 wchar_t wc;
178 for (buf = vc->buf, len = vc->len; len > 0;) {
179 /* TODO NULL BYTE! */
180 i = mbrtowc(&wc, buf, len, self->qf_mbps);
181 if (i == (size_t)-1) {
182 /* On hard error, don't modify mbstate_t and step one byte */
183 self->qf_mbps[0] = self->qf_mbps[1];
184 ++buf;
185 --len;
186 self->qf_wscnt = 0;
187 continue;
189 self->qf_mbps[1] = self->qf_mbps[0];
190 if (i == (size_t)-2) {
191 /* Redundant shift sequence, out of buffer */
192 len = 0;
193 break;
195 buf += i;
196 len -= i;
198 if (wc == L'\n')
199 goto jfin;
200 if (iswspace(wc)) {
201 ++self->qf_wscnt;
202 continue;
204 if (i == 1 && ISQUOTE(wc)) {
205 self->qf_wscnt = 0;
206 if (self->qf_currq.l >= QUOTE_MAX - 3) {
207 self->qf_currq.s[QUOTE_MAX - 3] = '.';
208 self->qf_currq.s[QUOTE_MAX - 2] = '.';
209 self->qf_currq.s[QUOTE_MAX - 1] = '.';
210 self->qf_currq.l = QUOTE_MAX;
211 } else
212 self->qf_currq.s[self->qf_currq.l++] = buf[-1];
213 continue;
216 /* The quote is parsed and compressed; dump it */
217 jfin:
218 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
219 self->qf_state = _QF_DATA;
220 /* Overtake WS (xxx but we de-facto "normalize" to ASCII SP here) */
221 while (self->qf_wscnt-- > 0 && self->qf_currq.l < QUOTE_MAX)
222 self->qf_currq.s[self->qf_currq.l++] = ' ';
223 self->qf_wscnt = 0;
224 rv = _qf_add_data(self, wc);
225 break;
228 vc->buf = buf;
229 vc->len = len;
230 return rv;
233 static ssize_t
234 _qf_state_data(struct qf_vc *vc)
236 struct quoteflt *self = vc->self;
237 ssize_t rv = 0;
238 char const *buf;
239 size_t len, i;
240 wchar_t wc;
242 for (buf = vc->buf, len = vc->len; len > 0;) {
243 /* TODO NULL BYTE! */
244 i = mbrtowc(&wc, buf, len, self->qf_mbps);
245 if (i == (size_t)-1) {
246 /* On hard error, don't modify mbstate_t and step one byte */
247 self->qf_mbps[0] = self->qf_mbps[1];
248 ++buf;
249 --len;
250 continue;
252 self->qf_mbps[1] = self->qf_mbps[0];
253 if (i == (size_t)-2) {
254 /* Redundant shift sequence, out of buffer */
255 len = 0;
256 break;
258 buf += i;
259 len -= i;
261 { ssize_t j = _qf_add_data(self, wc);
262 if (j < 0) {
263 rv = j;
264 break;
266 rv += j;
269 if (self->qf_state != _QF_DATA)
270 break;
273 vc->buf = buf;
274 vc->len = len;
275 return rv;
277 #endif /* HAVE_QUOTE_FOLD */
279 struct quoteflt *
280 quoteflt_dummy(void) /* TODO LEGACY */
282 static struct quoteflt qf_i;
284 return &qf_i;
287 void
288 quoteflt_init(struct quoteflt *self, char const *prefix)
290 #ifdef HAVE_QUOTE_FOLD
291 char *xcp, *cp;
292 #endif
294 memset(self, 0, sizeof *self);
296 if ((self->qf_pfix = prefix) != NULL)
297 self->qf_pfix_len = (ui_it)strlen(prefix);
299 /* Check wether the user wants the more fancy quoting algorithm */
300 /* TODO *quote-fold*: QUOTE_MAX may excess it! */
301 #ifdef HAVE_QUOTE_FOLD
302 if (self->qf_pfix_len > 0 && (cp = voption("quote-fold")) != NULL) {
303 ui_it qmin, qmax = (ui_it)strtol(cp, (char**)&xcp, 10);
304 /* These magic values ensure we don't bail :) */
305 if (qmax < self->qf_pfix_len + 6)
306 qmax = self->qf_pfix_len + 6;
307 --qmax; /* The newline escape */
308 if (cp == xcp || *xcp == '\0')
309 qmin = (qmax >> 1) + (qmax >> 2) + (qmax >> 5);
310 else {
311 qmin = (ui_it)strtol(xcp + 1, NULL, 10);
312 if (qmin < qmax >> 1)
313 qmin = qmax >> 1;
314 else if (qmin > qmax - 2)
315 qmin = qmax - 2;
317 self->qf_qfold_min = qmin;
318 self->qf_qfold_max = qmax;
320 /* Add pad for takeover copies, backslash and newline */
321 self->qf_dat.s = salloc((qmax + 3) * mb_cur_max);
322 self->qf_currq.s = salloc((QUOTE_MAX + 1) * mb_cur_max);
324 #endif
327 void
328 quoteflt_destroy(struct quoteflt *self) /* xxx inline */
330 (void)self;
333 void
334 quoteflt_reset(struct quoteflt *self, FILE *f) /* xxx inline */
336 self->qf_os = f;
337 #ifdef HAVE_QUOTE_FOLD
338 self->qf_state = _QF_CLEAN;
339 self->qf_dat.l =
340 self->qf_currq.l = 0;
341 memset(self->qf_mbps, 0, sizeof self->qf_mbps);
342 #endif
345 ssize_t
346 quoteflt_push(struct quoteflt *self, char const *dat, size_t len)
348 /* (xxx Ideally the actual push() [and flush()] would be functions on their
349 * xxx own, via indirect vtbl call ..) */
350 ssize_t i, rv = 0;
352 if (len == 0)
353 goto jleave;
355 /* Bypass? XXX Finally, this filter simply should not be used, then */
356 if (self->qf_pfix_len == 0) {
357 if (len != fwrite(dat, 1, len, self->qf_os))
358 goto jerr;
359 rv = len;
361 /* Normal: place *indentprefix* at every BOL */
362 else
363 #ifdef HAVE_QUOTE_FOLD
364 if (self->qf_qfold_max == 0)
365 #endif
367 void *vp;
368 size_t ll;
369 bool_t pxok = (self->qf_qfold_min != 0);
371 for (;;) {
372 if (!pxok) {
373 ll = self->qf_pfix_len;
374 if (ll != fwrite(self->qf_pfix, 1, ll, self->qf_os))
375 goto jerr;
376 rv += ll;
377 pxok = TRU1;
380 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
381 * xxx mandated by POSIX.1-2008 as "invariant across all locales
382 * xxx supported"; though there is no charset known which uses this
383 * xxx control char as part of a multibyte character; note that S-nail
384 * XXX (and the Mail codebase as such) do not support EBCDIC */
385 if ((vp = memchr(dat, '\n', len)) == NULL)
386 ll = len;
387 else {
388 pxok = FAL0;
389 ll = (size_t)((char*)vp - dat) + 1;
392 if (ll != fwrite(dat, sizeof *dat, ll, self->qf_os))
393 goto jerr;
394 rv += ll;
395 if ((len -= ll) == 0)
396 break;
397 dat += ll;
400 self->qf_qfold_min = pxok;
402 /* Overly complicated, though still only line-per-line: *quote-fold*.
403 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
404 * TODO note this means we assume that lines start with reset escape seq,
405 * TODO but i don't think this is any worse than what we currently do;
406 * TODO in 15.0, with the value carrier, we should carry conversion states
407 * TODO all along, only resetting on error (or at words for header =???=);
408 * TODO this still is weird for error handling, but we need to act more
409 * TODO stream-alike (though in practice i don't think cross-line states
410 * TODO can be found, because of compatibility reasons; however, being
411 * TODO a problem rather than a solution is not a good thing (tm))
412 * - Lookout for a newline */
413 #ifdef HAVE_QUOTE_FOLD
414 else {
415 struct qf_vc vc;
417 vc.self = self;
418 vc.buf = dat;
419 vc.len = len;
420 while (vc.len > 0) {
421 switch (self->qf_state) {
422 case _QF_CLEAN:
423 case _QF_PREFIX:
424 i = _qf_state_prefix(&vc);
425 break;
426 default: /* silence cc (`i' unused) */
427 case _QF_DATA:
428 i = _qf_state_data(&vc);
429 break;
431 if (i < 0)
432 goto jerr;
433 rv += i;
436 #endif /* HAVE_QUOTE_FOLD */
438 jleave:
439 return rv;
441 jerr:
442 rv = -1;
443 goto jleave;
446 ssize_t
447 quoteflt_flush(struct quoteflt *self)
449 ssize_t rv = 0;
450 UNUSED(self);
452 #ifdef HAVE_QUOTE_FOLD
453 if (self->qf_dat.l > 0) {
454 rv = _qf_dump_prefix(self);
455 if (rv >= 0) {
456 size_t i = self->qf_dat.l;
457 if (i == fwrite(self->qf_dat.s, 1, i, self->qf_os))
458 rv += i;
459 else
460 rv = -1;
461 self->qf_dat.l = 0;
462 self->qf_brk_isws = FAL0;
463 self->qf_wscnt = self->qf_brkl = self->qf_brkw = 0;
464 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
467 #endif
468 return rv;
471 /* vim:set fenc=utf-8:s-it-mode */