Add vok_*() family, backed by (non-final) _var_vok*()
[s-mailx.git] / filter.c
blob7c596f2fcc5dac71125531ea07fd9215db355185
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Filter objects.
4 * Copyright (c) 2013 Steffen "Daode" Nurpmeso <sdaoden@users.sf.net>.
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 #ifndef HAVE_AMALGAMATION
20 # include "nail.h"
21 #endif
24 * Quotation filter
28 * TODO quotation filter: anticipate in future data: don't break if only WS
29 * TODO or a LF escaping \ follows on the line (simply reuse the latter).
32 #ifdef HAVE_QUOTE_FOLD
33 CTA(QUOTE_MAX > 3);
35 enum qf_state {
36 _QF_CLEAN,
37 _QF_PREFIX,
38 _QF_DATA
41 struct qf_vc {
42 struct quoteflt *self;
43 char const *buf;
44 size_t len;
47 /* Print out prefix and current quote */
48 static ssize_t _qf_dump_prefix(struct quoteflt *self);
50 /* Add one data character */
51 static ssize_t _qf_add_data(struct quoteflt *self, wchar_t wc);
53 /* State machine handlers */
54 static ssize_t _qf_state_prefix(struct qf_vc *vc);
55 static ssize_t _qf_state_data(struct qf_vc *vc);
57 static ssize_t
58 _qf_dump_prefix(struct quoteflt *self)
60 ssize_t rv;
61 size_t i;
63 if ((i = self->qf_pfix_len) > 0 && i != fwrite(self->qf_pfix, 1, i,
64 self->qf_os))
65 goto jerr;
66 rv = i;
68 if ((i = self->qf_currq.l) > 0 && i != fwrite(self->qf_currq.s, 1, i,
69 self->qf_os))
70 goto jerr;
71 rv += i;
72 jleave:
73 return rv;
74 jerr:
75 rv = -1;
76 goto jleave;
79 static ssize_t
80 _qf_add_data(struct quoteflt *self, wchar_t wc)
82 char *save_b;
83 ui_it save_l, save_w;
84 ssize_t rv = 0;
85 int w, l;
87 save_l = save_w = 0; /* silence cc */
88 save_b = NULL;
89 /* <newline> ends state */
90 if (wc == L'\n')
91 goto jflush;
92 if (wc == L'\r') /* TODO CR should be stripped in lower level!! */
93 goto jleave;
95 /* Unroll <tab> to spaces */
96 if (wc == L'\t') {
97 save_l = self->qf_datw;
98 save_w = (save_l + QUOTE_TAB_SPACES) & ~(QUOTE_TAB_SPACES - 1);
99 save_w -= save_l;
100 while (save_w-- > 0) {
101 ssize_t j = _qf_add_data(self, L' ');
102 if (j < 0) {
103 rv = j;
104 break;
106 rv += j;
108 goto jleave;
111 w = wcwidth(wc);
112 if (w == -1) {
113 jbad:
114 ++self->qf_datw;
115 self->qf_dat.s[self->qf_dat.l++] = '?';
116 } else {
117 l = wctomb(self->qf_dat.s + self->qf_dat.l, wc);
118 if (l < 0)
119 goto jbad;
120 self->qf_datw += (ui_it)w;
121 self->qf_dat.l += (size_t)l;
124 /* TODO The last visual may excess *qfold-max* if it's a wide one;
125 * TODO place it on the next line, break before */
126 if (self->qf_datw >= self->qf_qfold_max) {
127 /* If we have seen a nice breakpoint during traversal, shuffle data
128 * around a bit so as to restore the trailing part after flushing */
129 if (self->qf_brkl > 0) {
130 save_w = self->qf_datw - self->qf_brkw;
131 save_l = self->qf_dat.l - self->qf_brkl;
132 save_b = self->qf_dat.s + self->qf_brkl + 2;
133 memmove(save_b, save_b - 2, save_l);
134 self->qf_dat.l = self->qf_brkl;
137 self->qf_dat.s[self->qf_dat.l++] = '\\';
138 jflush:
139 self->qf_dat.s[self->qf_dat.l++] = '\n';
140 rv = quoteflt_flush(self);
142 /* Restore takeovers, if any */
143 if (save_b != NULL) {
144 self->qf_brk_isws = FAL0;
145 self->qf_datw += save_w;
146 self->qf_dat.l = save_l;
147 memmove(self->qf_dat.s, save_b, save_l);
150 } else if (self->qf_datw >= self->qf_qfold_min && ! self->qf_brk_isws) {
151 bool_t isws = iswspace(wc);
153 if ((isws && ! self->qf_brk_isws) || self->qf_brkl == 0) {
154 self->qf_brkl = self->qf_dat.l;
155 self->qf_brkw = self->qf_datw;
156 self->qf_brk_isws = isws;
160 /* If state changed to prefix, perform full reset (note this implies that
161 * quoteflt_flush() performs too much work..) */
162 if (wc == '\n') {
163 self->qf_state = _QF_PREFIX;
164 self->qf_wscnt = self->qf_datw = 0;
165 self->qf_currq.l = 0;
167 jleave:
168 return rv;
171 static ssize_t
172 _qf_state_prefix(struct qf_vc *vc)
174 struct quoteflt *self = vc->self;
175 ssize_t rv = 0;
176 char const *buf;
177 size_t len, i;
178 wchar_t wc;
180 for (buf = vc->buf, len = vc->len; len > 0;) {
181 /* TODO NULL BYTE! */
182 i = mbrtowc(&wc, buf, len, self->qf_mbps);
183 if (i == (size_t)-1) {
184 /* On hard error, don't modify mbstate_t and step one byte */
185 self->qf_mbps[0] = self->qf_mbps[1];
186 ++buf;
187 --len;
188 self->qf_wscnt = 0;
189 continue;
191 self->qf_mbps[1] = self->qf_mbps[0];
192 if (i == (size_t)-2) {
193 /* Redundant shift sequence, out of buffer */
194 len = 0;
195 break;
197 buf += i;
198 len -= i;
200 if (wc == L'\n')
201 goto jfin;
202 if (iswspace(wc)) {
203 ++self->qf_wscnt;
204 continue;
206 if (i == 1 && ISQUOTE(wc)) {
207 self->qf_wscnt = 0;
208 if (self->qf_currq.l >= QUOTE_MAX - 3) {
209 self->qf_currq.s[QUOTE_MAX - 3] = '.';
210 self->qf_currq.s[QUOTE_MAX - 2] = '.';
211 self->qf_currq.s[QUOTE_MAX - 1] = '.';
212 self->qf_currq.l = QUOTE_MAX;
213 } else
214 self->qf_currq.s[self->qf_currq.l++] = buf[-1];
215 continue;
218 /* The quote is parsed and compressed; dump it */
219 jfin:
220 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
221 self->qf_state = _QF_DATA;
222 /* Overtake WS (xxx but we de-facto "normalize" to ASCII SP here) */
223 while (self->qf_wscnt-- > 0 && self->qf_currq.l < QUOTE_MAX)
224 self->qf_currq.s[self->qf_currq.l++] = ' ';
225 self->qf_wscnt = 0;
226 rv = _qf_add_data(self, wc);
227 break;
230 vc->buf = buf;
231 vc->len = len;
232 return rv;
235 static ssize_t
236 _qf_state_data(struct qf_vc *vc)
238 struct quoteflt *self = vc->self;
239 ssize_t rv = 0;
240 char const *buf;
241 size_t len, i;
242 wchar_t wc;
244 for (buf = vc->buf, len = vc->len; len > 0;) {
245 /* TODO NULL BYTE! */
246 i = mbrtowc(&wc, buf, len, self->qf_mbps);
247 if (i == (size_t)-1) {
248 /* On hard error, don't modify mbstate_t and step one byte */
249 self->qf_mbps[0] = self->qf_mbps[1];
250 ++buf;
251 --len;
252 continue;
254 self->qf_mbps[1] = self->qf_mbps[0];
255 if (i == (size_t)-2) {
256 /* Redundant shift sequence, out of buffer */
257 len = 0;
258 break;
260 buf += i;
261 len -= i;
263 { ssize_t j = _qf_add_data(self, wc);
264 if (j < 0) {
265 rv = j;
266 break;
268 rv += j;
271 if (self->qf_state != _QF_DATA)
272 break;
275 vc->buf = buf;
276 vc->len = len;
277 return rv;
279 #endif /* HAVE_QUOTE_FOLD */
281 FL struct quoteflt *
282 quoteflt_dummy(void) /* TODO LEGACY */
284 static struct quoteflt qf_i;
286 return &qf_i;
289 FL void
290 quoteflt_init(struct quoteflt *self, char const *prefix)
292 #ifdef HAVE_QUOTE_FOLD
293 char *xcp, *cp;
294 #endif
296 memset(self, 0, sizeof *self);
298 if ((self->qf_pfix = prefix) != NULL)
299 self->qf_pfix_len = (ui_it)strlen(prefix);
301 /* Check wether the user wants the more fancy quoting algorithm */
302 /* TODO *quote-fold*: QUOTE_MAX may excess it! */
303 #ifdef HAVE_QUOTE_FOLD
304 if (self->qf_pfix_len > 0 && (cp = ok_vlook(quote_fold)) != NULL) {
305 ui_it qmin, qmax = (ui_it)strtol(cp, (char**)&xcp, 10);
306 /* These magic values ensure we don't bail :) */
307 if (qmax < self->qf_pfix_len + 6)
308 qmax = self->qf_pfix_len + 6;
309 --qmax; /* The newline escape */
310 if (cp == xcp || *xcp == '\0')
311 qmin = (qmax >> 1) + (qmax >> 2) + (qmax >> 5);
312 else {
313 qmin = (ui_it)strtol(xcp + 1, NULL, 10);
314 if (qmin < qmax >> 1)
315 qmin = qmax >> 1;
316 else if (qmin > qmax - 2)
317 qmin = qmax - 2;
319 self->qf_qfold_min = qmin;
320 self->qf_qfold_max = qmax;
322 /* Add pad for takeover copies, backslash and newline */
323 self->qf_dat.s = salloc((qmax + 3) * mb_cur_max);
324 self->qf_currq.s = salloc((QUOTE_MAX + 1) * mb_cur_max);
326 #endif
329 FL void
330 quoteflt_destroy(struct quoteflt *self) /* xxx inline */
332 (void)self;
335 FL void
336 quoteflt_reset(struct quoteflt *self, FILE *f) /* xxx inline */
338 self->qf_os = f;
339 #ifdef HAVE_QUOTE_FOLD
340 self->qf_state = _QF_CLEAN;
341 self->qf_dat.l =
342 self->qf_currq.l = 0;
343 memset(self->qf_mbps, 0, sizeof self->qf_mbps);
344 #endif
347 FL ssize_t
348 quoteflt_push(struct quoteflt *self, char const *dat, size_t len)
350 /* (xxx Ideally the actual push() [and flush()] would be functions on their
351 * xxx own, via indirect vtbl call ..) */
352 ssize_t rv = 0;
354 if (len == 0)
355 goto jleave;
357 /* Bypass? XXX Finally, this filter simply should not be used, then */
358 if (self->qf_pfix_len == 0) {
359 if (len != fwrite(dat, 1, len, self->qf_os))
360 goto jerr;
361 rv = len;
363 /* Normal: place *indentprefix* at every BOL */
364 else
365 #ifdef HAVE_QUOTE_FOLD
366 if (self->qf_qfold_max == 0)
367 #endif
369 void *vp;
370 size_t ll;
371 bool_t pxok = (self->qf_qfold_min != 0);
373 for (;;) {
374 if (!pxok) {
375 ll = self->qf_pfix_len;
376 if (ll != fwrite(self->qf_pfix, 1, ll, self->qf_os))
377 goto jerr;
378 rv += ll;
379 pxok = TRU1;
382 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
383 * xxx mandated by POSIX.1-2008 as "invariant across all locales
384 * xxx supported"; though there is no charset known which uses this
385 * xxx control char as part of a multibyte character; note that S-nail
386 * XXX (and the Mail codebase as such) do not support EBCDIC */
387 if ((vp = memchr(dat, '\n', len)) == NULL)
388 ll = len;
389 else {
390 pxok = FAL0;
391 ll = (size_t)((char*)vp - dat) + 1;
394 if (ll != fwrite(dat, sizeof *dat, ll, self->qf_os))
395 goto jerr;
396 rv += ll;
397 if ((len -= ll) == 0)
398 break;
399 dat += ll;
402 self->qf_qfold_min = pxok;
404 /* Overly complicated, though still only line-per-line: *quote-fold*.
405 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
406 * TODO note this means we assume that lines start with reset escape seq,
407 * TODO but i don't think this is any worse than what we currently do;
408 * TODO in 15.0, with the value carrier, we should carry conversion states
409 * TODO all along, only resetting on error (or at words for header =???=);
410 * TODO this still is weird for error handling, but we need to act more
411 * TODO stream-alike (though in practice i don't think cross-line states
412 * TODO can be found, because of compatibility reasons; however, being
413 * TODO a problem rather than a solution is not a good thing (tm))
414 * - Lookout for a newline */
415 #ifdef HAVE_QUOTE_FOLD
416 else {
417 struct qf_vc vc;
418 ssize_t i;
420 vc.self = self;
421 vc.buf = dat;
422 vc.len = len;
423 while (vc.len > 0) {
424 switch (self->qf_state) {
425 case _QF_CLEAN:
426 case _QF_PREFIX:
427 i = _qf_state_prefix(&vc);
428 break;
429 default: /* silence cc (`i' unused) */
430 case _QF_DATA:
431 i = _qf_state_data(&vc);
432 break;
434 if (i < 0)
435 goto jerr;
436 rv += i;
439 #endif /* HAVE_QUOTE_FOLD */
441 jleave:
442 return rv;
444 jerr:
445 rv = -1;
446 goto jleave;
449 FL ssize_t
450 quoteflt_flush(struct quoteflt *self)
452 ssize_t rv = 0;
453 UNUSED(self);
455 #ifdef HAVE_QUOTE_FOLD
456 if (self->qf_dat.l > 0) {
457 rv = _qf_dump_prefix(self);
458 if (rv >= 0) {
459 size_t i = self->qf_dat.l;
460 if (i == fwrite(self->qf_dat.s, 1, i, self->qf_os))
461 rv += i;
462 else
463 rv = -1;
464 self->qf_dat.l = 0;
465 self->qf_brk_isws = FAL0;
466 self->qf_wscnt = self->qf_brkl = self->qf_brkw = 0;
467 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
470 #endif
471 return rv;
474 /* vim:set fenc=utf-8:s-it-mode */