1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
4 * Copyright (c) 2013 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
5 * SPDX-License-Identifier: ISC
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #ifndef HAVE_AMALGAMATION
31 * TODO quotation filter: anticipate in future data: don't break if only WS
32 * TODO or a LF escaping \ follows on the line (simply reuse the latter).
35 #ifdef HAVE_QUOTE_FOLD
36 n_CTAV(n_QUOTE_MAX
> 3);
45 struct quoteflt
*self
;
50 /* Print out prefix and current quote */
51 static ssize_t
_qf_dump_prefix(struct quoteflt
*self
);
53 /* Add one data character */
54 static ssize_t
_qf_add_data(struct quoteflt
*self
, wchar_t wc
);
56 /* State machine handlers */
57 static ssize_t
_qf_state_prefix(struct qf_vc
*vc
);
58 static ssize_t
_qf_state_data(struct qf_vc
*vc
);
61 _qf_dump_prefix(struct quoteflt
*self
)
67 if ((i
= self
->qf_pfix_len
) > 0 && i
!= fwrite(self
->qf_pfix
, 1, i
,
72 if ((i
= self
->qf_currq
.l
) > 0 && i
!= fwrite(self
->qf_currq
.s
, 1, i
,
85 _qf_add_data(struct quoteflt
*self
, wchar_t wc
)
89 ui32_t save_l
, save_w
;
94 save_l
= save_w
= 0; /* silence cc */
97 /* <newline> ends state */
102 if (wc
== L
'\r') /* TODO CR should be stripped in lower level!! */
105 /* Unroll <tab> to spaces */
107 save_l
= self
->qf_datw
;
108 save_w
= (save_l
+ n_QUOTE_TAB_SPACES
) & ~(n_QUOTE_TAB_SPACES
- 1);
110 while (save_w
-- > 0) {
111 ssize_t j
= _qf_add_data(self
, L
' ');
121 /* To avoid that the last visual excesses *qfold-max*, which may happen for
122 * multi-column characters, use w as an indicator for this and move that
123 * thing to the next line */
129 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '?';
130 } else if (self
->qf_datw
> self
->qf_qfold_max
- w
) {
134 l
= wctomb(self
->qf_dat
.s
+ self
->qf_dat
.l
, wc
);
137 self
->qf_datw
+= (ui32_t
)w
;
138 self
->qf_dat
.l
+= (size_t)l
;
141 if (self
->qf_datw
>= self
->qf_qfold_max
) {
142 /* If we have seen a nice breakpoint during traversal, shuffle data
143 * around a bit so as to restore the trailing part after flushing */
145 if (self
->qf_brkl
> 0) {
146 save_w
= self
->qf_datw
- self
->qf_brkw
;
147 save_l
= self
->qf_dat
.l
- self
->qf_brkl
;
148 save_b
= self
->qf_dat
.s
+ self
->qf_brkl
+ 2;
149 memmove(save_b
, save_b
- 2, save_l
);
150 self
->qf_dat
.l
= self
->qf_brkl
;
153 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '\\';
155 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '\n';
156 rv
= quoteflt_flush(self
);
158 /* Restore takeovers, if any */
159 if (save_b
!= NULL
) {
160 self
->qf_brk_isws
= FAL0
;
161 self
->qf_datw
+= save_w
;
162 self
->qf_dat
.l
= save_l
;
163 memmove(self
->qf_dat
.s
, save_b
, save_l
);
165 } else if (self
->qf_datw
>= self
->qf_qfold_min
&& !self
->qf_brk_isws
) {
166 bool_t isws
= (iswspace(wc
) != 0);
168 if (isws
|| !self
->qf_brk_isws
|| self
->qf_brkl
== 0) {
169 if((self
->qf_brk_isws
= isws
) ||
170 self
->qf_brkl
< self
->qf_qfold_maxnws
){
171 self
->qf_brkl
= self
->qf_dat
.l
;
172 self
->qf_brkw
= self
->qf_datw
;
177 /* Did we hold this back to avoid qf_fold_max excess? Then do it now */
178 if(rv
>= 0 && w
== -1){
179 ssize_t j
= _qf_add_data(self
, wc
);
185 /* If state changed to prefix, perform full reset (note this implies that
186 * quoteflt_flush() performs too much work..) */
187 else if (wc
== '\n') {
188 self
->qf_state
= _QF_PREFIX
;
189 self
->qf_wscnt
= self
->qf_datw
= 0;
190 self
->qf_currq
.l
= 0;
198 _qf_state_prefix(struct qf_vc
*vc
)
200 struct quoteflt
*self
;
210 for (buf
= vc
->buf
, len
= vc
->len
; len
> 0;) {
212 i
= mbrtowc(&wc
, buf
, len
, self
->qf_mbps
);
213 if (i
== (size_t)-1) {
214 /* On hard error, don't modify mbstate_t and step one byte */
215 self
->qf_mbps
[0] = self
->qf_mbps
[1];
221 self
->qf_mbps
[1] = self
->qf_mbps
[0];
222 if (i
== (size_t)-2) {
223 /* Redundant shift sequence, out of buffer */
236 if (i
== 1 && n_uasciichar(wc
) &&
237 strchr(self
->qf_quote_chars
, (char)wc
) != NULL
){
239 if (self
->qf_currq
.l
>= n_QUOTE_MAX
- 3) {
240 self
->qf_currq
.s
[n_QUOTE_MAX
- 3] = '.';
241 self
->qf_currq
.s
[n_QUOTE_MAX
- 2] = '.';
242 self
->qf_currq
.s
[n_QUOTE_MAX
- 1] = '.';
243 self
->qf_currq
.l
= n_QUOTE_MAX
;
245 self
->qf_currq
.s
[self
->qf_currq
.l
++] = buf
[-1];
249 /* The quote is parsed and compressed; dump it */
251 self
->qf_state
= _QF_DATA
;
252 /* Overtake WS to the current quote in order to preserve it for eventual
253 * necessary follow lines, too */
254 /* TODO we de-facto "normalize" to ASCII SP here which MESSES tabs!! */
255 while (self
->qf_wscnt
-- > 0 && self
->qf_currq
.l
< n_QUOTE_MAX
)
256 self
->qf_currq
.s
[self
->qf_currq
.l
++] = ' ';
257 self
->qf_datw
= self
->qf_pfix_len
+ self
->qf_currq
.l
;
259 rv
= _qf_add_data(self
, wc
);
270 _qf_state_data(struct qf_vc
*vc
)
272 struct quoteflt
*self
;
282 for (buf
= vc
->buf
, len
= vc
->len
; len
> 0;) {
284 i
= mbrtowc(&wc
, buf
, len
, self
->qf_mbps
);
285 if (i
== (size_t)-1) {
286 /* On hard error, don't modify mbstate_t and step one byte */
287 self
->qf_mbps
[0] = self
->qf_mbps
[1];
292 self
->qf_mbps
[1] = self
->qf_mbps
[0];
293 if (i
== (size_t)-2) {
294 /* Redundant shift sequence, out of buffer */
301 { ssize_t j
= _qf_add_data(self
, wc
);
309 if (self
->qf_state
!= _QF_DATA
)
318 #endif /* HAVE_QUOTE_FOLD */
321 quoteflt_dummy(void) /* TODO LEGACY (until filters are plugged when needed) */
323 static struct quoteflt qf_i
;
325 qf_i
.qf_bypass
= TRU1
;
330 quoteflt_init(struct quoteflt
*self
, char const *prefix
, bool_t bypass
)
332 #ifdef HAVE_QUOTE_FOLD
333 char const *xcp
, *cp
;
337 memset(self
, 0, sizeof *self
);
339 if ((self
->qf_pfix
= prefix
) != NULL
)
340 self
->qf_pfix_len
= (ui32_t
)strlen(prefix
);
341 self
->qf_bypass
= bypass
;
343 /* Check whether the user wants the more fancy quoting algorithm */
344 /* TODO *quote-fold*: n_QUOTE_MAX may excess it! */
345 #ifdef HAVE_QUOTE_FOLD
346 if (!bypass
&& (cp
= ok_vlook(quote_fold
)) != NULL
) {
347 ui32_t qmax
, qmaxnws
, qmin
;
349 /* These magic values ensure we don't bail */
350 n_idec_ui32_cp(&qmax
, cp
, 10, &xcp
);
351 if (qmax
< self
->qf_pfix_len
+ 6)
352 qmax
= self
->qf_pfix_len
+ 6;
353 qmaxnws
= --qmax
; /* The newline escape */
354 if (cp
== xcp
|| *xcp
== '\0')
355 qmin
= (qmax
>> 1) + (qmax
>> 2) + (qmax
>> 5);
357 n_idec_ui32_cp(&qmin
, &xcp
[1], 10, &xcp
);
358 if (qmin
< qmax
>> 1)
360 else if (qmin
> qmax
- 2)
363 if (cp
!= xcp
&& *xcp
!= '\0') {
364 n_idec_ui32_cp(&qmaxnws
, &xcp
[1], 10, &xcp
);
365 if (qmaxnws
> qmax
|| qmaxnws
< qmin
)
369 self
->qf_qfold_min
= qmin
;
370 self
->qf_qfold_max
= qmax
;
371 self
->qf_qfold_maxnws
= qmaxnws
;
372 self
->qf_quote_chars
= ok_vlook(quote_chars
);
374 /* Add pad for takeover copies, reverse solidus and newline */
375 self
->qf_dat
.s
= n_autorec_alloc((qmax
+ 3) * n_mb_cur_max
);
376 self
->qf_currq
.s
= n_autorec_alloc((n_QUOTE_MAX
+ 1) * n_mb_cur_max
);
383 quoteflt_destroy(struct quoteflt
*self
) /* xxx inline */
391 quoteflt_reset(struct quoteflt
*self
, FILE *f
) /* xxx inline */
395 #ifdef HAVE_QUOTE_FOLD
396 self
->qf_state
= _QF_CLEAN
;
398 self
->qf_currq
.l
= 0;
399 memset(self
->qf_mbps
, 0, sizeof self
->qf_mbps
);
405 quoteflt_push(struct quoteflt
*self
, char const *dat
, size_t len
)
407 /* (xxx Ideally the actual push() [and flush()] would be functions on their
408 * xxx own, via indirect vtbl call ..) */
412 self
->qf_nl_last
= (len
> 0 && dat
[len
- 1] == '\n'); /* TODO HACK */
417 /* Bypass? TODO Finally, this filter simply should not be used, then
418 * (TODO It supercedes prefix_write() or something) */
419 if (self
->qf_bypass
) {
420 if (len
!= fwrite(dat
, 1, len
, self
->qf_os
))
424 /* Normal: place *indentprefix* at every BOL */
426 #ifdef HAVE_QUOTE_FOLD
427 if (self
->qf_qfold_max
== 0)
432 bool_t pxok
= (self
->qf_qfold_min
!= 0);
435 if (!pxok
&& (ll
= self
->qf_pfix_len
) > 0) {
436 if (ll
!= fwrite(self
->qf_pfix
, 1, ll
, self
->qf_os
))
442 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
443 * xxx mandated by POSIX.1-2008 as "invariant across all locales
444 * xxx supported"; though there is no charset known which uses this
445 * xxx control char as part of a multibyte character; note that S-nail
446 * XXX (and the Mail codebase as such) do not support EBCDIC */
447 if ((vp
= memchr(dat
, '\n', len
)) == NULL
)
451 ll
= PTR2SIZE((char*)vp
- dat
) + 1;
454 if (ll
!= fwrite(dat
, sizeof *dat
, ll
, self
->qf_os
))
457 if ((len
-= ll
) == 0)
462 self
->qf_qfold_min
= pxok
;
464 /* Overly complicated, though still only line-per-line: *quote-fold*.
465 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
466 * TODO note this means we assume that lines start with reset escape seq,
467 * TODO but i don't think this is any worse than what we currently do;
468 * TODO in 15.0, with the value carrier, we should carry conversion states
469 * TODO all along, only resetting on error (or at words for header =???=);
470 * TODO this still is weird for error handling, but we need to act more
471 * TODO stream-alike (though in practice i don't think cross-line states
472 * TODO can be found, because of compatibility reasons; however, being
473 * TODO a problem rather than a solution is not a good thing (tm))
474 * - Lookout for a newline */
475 #ifdef HAVE_QUOTE_FOLD
484 switch (self
->qf_state
) {
487 i
= _qf_state_prefix(&vc
);
489 default: /* silence cc (`i' unused) */
491 i
= _qf_state_data(&vc
);
499 #endif /* HAVE_QUOTE_FOLD */
510 quoteflt_flush(struct quoteflt
*self
)
516 #ifdef HAVE_QUOTE_FOLD
517 if (self
->qf_dat
.l
> 0) {
518 rv
= _qf_dump_prefix(self
);
520 size_t i
= self
->qf_dat
.l
;
521 if (i
== fwrite(self
->qf_dat
.s
, 1, i
, self
->qf_os
))
526 self
->qf_brk_isws
= FAL0
;
527 self
->qf_wscnt
= self
->qf_brkl
= self
->qf_brkw
= 0;
528 self
->qf_datw
= self
->qf_pfix_len
+ self
->qf_currq
.l
;
537 * HTML tagsoup filter TODO rewrite wchar_t based (require HAVE_C90AMEND1)
538 * TODO . Numeric &#NO; entities should also be treated by struct hf_ent
539 * TODO . Yes, we COULD support CSS based quoting when we'd check type="quote"
540 * TODO (nonstandard) and watch out for style="gmail_quote" (or so, VERY
541 * TODO nonstandard) and tracking a stack of such elements (to be popped
542 * TODO once the closing element is seen). Then, after writing a newline,
543 * TODO place sizeof(stack) ">"s first. But aren't these HTML mails rude?
544 * TODO Interlocking and non-well-formed data will break us down
546 #ifdef HAVE_FILTER_HTML_TAGSOUP
549 _HF_MINLEN
= 10, /* Minimum line length (can't really be smaller) */
550 _HF_BRKSUB
= 8 /* Start considering line break MAX - BRKSUB */
554 _HF_BQUOTE_MASK
= 0xFFFFu
,
555 _HF_UTF8
= 1u<<16, /* Data is in UTF-8 */
556 _HF_ERROR
= 1u<<17, /* A hard error occurred, bail as soon as possible */
557 _HF_NOPUT
= 1u<<18, /* (In a tag,) Don't generate output */
558 _HF_IGN
= 1u<<19, /* Ignore mode on */
559 _HF_ANY
= 1u<<20, /* Yet seen just any output */
560 _HF_PRE
= 1u<<21, /* In <pre>formatted mode */
561 _HF_ENT
= 1u<<22, /* Currently parsing an entity */
562 _HF_BLANK
= 1u<<23, /* Whitespace last */
563 _HF_HREF
= 1u<<24, /* External <a href=> was the last href seen */
565 _HF_NL_1
= 1u<<25, /* One \n seen */
566 _HF_NL_2
= 2u<<25, /* We have produced an all empty line */
567 _HF_NL_MASK
= _HF_NL_1
| _HF_NL_2
570 enum hf_special_actions
{
571 _HFSA_NEEDSEP
= -1, /* Need an empty line (paragraph separator) */
572 _HFSA_NEEDNL
= -2, /* Need a new line start (table row) */
573 _HFSA_IGN
= -3, /* Things like <style>..</style>, <script>.. */
574 _HFSA_PRE
= -4, /* <pre>.. */
576 _HFSA_IMG
= -6, /* <img> */
577 _HFSA_HREF
= -7, /* <a>.. */
579 _HFSA_BQUOTE
= -9, /* <blockquote>, interpreted as citation! */
580 _HFSA_BQUOTE_END
= -10
583 enum hf_entity_flags
{
584 _HFE_HAVE_UNI
= 1<<6, /* Have a Unicode replacement character */
585 _HFE_HAVE_CSTR
= 1<<7, /* Have a string replacement */
586 /* We store the length of the entity name in the flags, too */
587 _HFE_LENGTH_MASK
= (1<<6) - 1
590 struct htmlflt_href
{
591 struct htmlflt_href
*hfh_next
;
592 ui32_t hfh_no
; /* Running sequence */
593 ui32_t hfh_len
; /* of .hfh_dat */
594 char hfh_dat
[n_VFIELD_SIZE(0)];
598 si32_t hft_act
; /* char or hf_special_actions */
599 /* Not NUL: character to inject, with high bit set: place a space
600 * afterwards. Note: only recognized with _HFSA_NEEDSEP or _HFSA_NEEDNL */
602 ui8_t hft_len
; /* Useful bytes in (NUL terminated) .hft_tag */
603 char const hft_tag
[10]; /* Tag less < and > surroundings (TR, /TR, ..) */
605 n_CTA(n_SIZEOF_FIELD(struct htmlflt_tag
, hft_tag
) < LINESIZE
,
606 "Structure field too large a size"); /* .hf_ign_tag */
609 ui8_t hfe_flags
; /* enum hf_entity_flags plus length of .hfe_ent */
610 char hfe_c
; /* Plain replacement character */
611 ui16_t hfe_uni
; /* Unicode codepoint if _HFE_HAVE_UNI */
612 char hfe_cstr
[5]; /* _HFE_HAVE_CSTR (e.g., … -> ...) */
613 char const hfe_ent
[7]; /* Entity less & and ; surroundings */
616 /* Tag list; not binary searched :(, so try to take care a bit */
617 static struct htmlflt_tag
const _hf_tags
[] = {
620 # define _X(S,A) {A, '\0', sizeof(S) -1, S "\0"}
621 # define _XC(S,C,A) {A, C, sizeof(S) -1, S "\0"}
623 # if 0 /* This is treated very special (to avoid wasting space in .hft_tag) */
624 _X("BLOCKQUOTE", _HFSA_BQUOTE
), _X("/BLOCKQUOTE", _HFSA_BQUOTE_END
),
627 _X("P", _HFSA_NEEDSEP
), _X("/P", _HFSA_NEEDNL
),
628 _X("DIV", _HFSA_NEEDSEP
), _X("/DIV", _HFSA_NEEDNL
),
629 _X("TR", _HFSA_NEEDNL
),
632 /* Let it stand out; also since we don't support implicit paragraphs after
633 * block elements, plain running text after a list (seen in Unicode
634 * announcement via Firefox) */
635 _X("UL", _HFSA_NEEDSEP
), _X("/UL", _HFSA_NEEDSEP
),
636 _XC("LI", (char)0x80 | '*', _HFSA_NEEDSEP
),
637 _X("DL", _HFSA_NEEDSEP
),
638 _X("DT", _HFSA_NEEDNL
),
640 _X("A", _HFSA_HREF
), _X("/A", _HFSA_HREF_END
),
641 _X("IMG", _HFSA_IMG
),
643 _X("PRE", _HFSA_PRE
), _X("/PRE", _HFSA_PRE_END
),
644 _X("TITLE", _HFSA_NEEDSEP
), /*_X("/TITLE", '\n'),*/
645 _X("H1", _HFSA_NEEDSEP
), /*_X("/H1", '\n'),*/
646 _X("H2", _HFSA_NEEDSEP
), /*_X("/H2", '\n'),*/
647 _X("H3", _HFSA_NEEDSEP
), /*_X("/H3", '\n'),*/
648 _X("H4", _HFSA_NEEDSEP
), /*_X("/H4", '\n'),*/
649 _X("H5", _HFSA_NEEDSEP
), /*_X("/H5", '\n'),*/
650 _X("H6", _HFSA_NEEDSEP
), /*_X("/H6", '\n'),*/
652 _X("STYLE", _HFSA_IGN
),
653 _X("SCRIPT", _HFSA_IGN
),
658 /* Entity list; not binary searched.. */
659 static struct hf_ent
const _hf_ents
[] = {
664 # define _X(E,C) {(sizeof(E) -1), C, 0x0u, "", E "\0"}
665 # define _XU(E,C,U) {(sizeof(E) -1) | _HFE_HAVE_UNI, C, U, "", E "\0"}
666 # define _XS(E,S) {(sizeof(E) -1) | _HFE_HAVE_CSTR, '\0', 0x0u,S "\0",E "\0"}
667 # define _XSU(E,S,U) \
668 {(sizeof(E) -1) | _HFE_HAVE_UNI | _HFE_HAVE_CSTR, '\0', U, S "\0", E "\0"}
672 _X("lt", '<'), _X("gt", '>'),
674 _XU("nbsp", ' ', 0x0020 /* Note: not 0x00A0 seems to be better for us */),
675 _XU("middot", '.', 0x00B7),
676 _XSU("hellip", "...", 0x2026),
677 _XSU("mdash", "---", 0x2014), _XSU("ndash", "--", 0x2013),
678 _XSU("laquo", "<<", 0x00AB), _XSU("raquo", ">>", 0x00BB),
679 _XSU("lsaquo", "<", 0x2039), _XSU("rsaquo", ">", 0x203A),
680 _XSU("lsquo", "'", 0x2018), _XSU("rsquo", "'", 0x2019),
681 _XSU("ldquo", "\"", 0x201C), _XSU("rdquo", "\"", 0x201D),
682 _XSU("uarr", "^|", 0x2191), _XSU("darr", "|v", 0x2193),
684 _XSU("cent", "CENT", 0x00A2),
685 _XSU("copy", "(C)", 0x00A9),
686 _XSU("euro", "EUR", 0x20AC),
687 _XSU("infin", "INFY", 0x221E),
688 _XSU("pound", "GBP", 0x00A3),
689 _XSU("reg", "(R)", 0x00AE),
690 _XSU("sect", "S:", 0x00A7),
691 _XSU("yen", "JPY", 0x00A5),
694 _XSU("Auml", "Ae", 0x00C4), _XSU("auml", "ae", 0x00E4),
695 _XSU("Ouml", "Oe", 0x00D6), _XSU("ouml", "oe", 0x00F6),
696 _XSU("Uuml", "Ue", 0x00DC), _XSU("uuml", "ue", 0x00FC),
697 _XSU("szlig", "ss", 0x00DF)
706 static struct htmlflt
* _hf_dump_hrefs(struct htmlflt
*self
);
707 static struct htmlflt
* _hf_dump(struct htmlflt
*self
);
708 static struct htmlflt
* _hf_store(struct htmlflt
*self
, char c
);
709 # ifdef HAVE_NATCH_CHAR
710 static struct htmlflt
* __hf_sync_mbstuff(struct htmlflt
*self
);
714 static struct htmlflt
* _hf_nl(struct htmlflt
*self
);
715 static struct htmlflt
* _hf_nl_force(struct htmlflt
*self
);
716 static struct htmlflt
* _hf_putc(struct htmlflt
*self
, char c
);
717 static struct htmlflt
* _hf_putc_premode(struct htmlflt
*self
, char c
);
718 static struct htmlflt
* _hf_puts(struct htmlflt
*self
, char const *cp
);
719 static struct htmlflt
* _hf_putbuf(struct htmlflt
*self
,
720 char const *cp
, size_t len
);
722 /* Try to locate a param'eter in >hf_bdat, store it (non-terminated!) or NULL */
723 static struct htmlflt
* _hf_param(struct htmlflt
*self
, struct str
*store
,
726 /* Expand all entities in the given parameter */
727 static struct htmlflt
* _hf_expand_all_ents(struct htmlflt
*self
,
728 struct str
const *param
);
730 /* Completely parsed over a tag / an entity, interpret that */
731 static struct htmlflt
* _hf_check_tag(struct htmlflt
*self
, char const *s
);
732 static struct htmlflt
* _hf_check_ent(struct htmlflt
*self
, char const *s
,
736 static ssize_t
_hf_add_data(struct htmlflt
*self
,
737 char const *dat
, size_t len
);
739 static struct htmlflt
*
740 _hf_dump_hrefs(struct htmlflt
*self
)
742 struct htmlflt_href
*hhp
;
745 if (!(self
->hf_flags
& _HF_NL_2
) && putc('\n', self
->hf_os
) == EOF
) {
746 self
->hf_flags
|= _HF_ERROR
;
750 /* Reverse the list */
751 for (hhp
= self
->hf_hrefs
, self
->hf_hrefs
= NULL
; hhp
!= NULL
;) {
752 struct htmlflt_href
*tmp
= hhp
->hfh_next
;
753 hhp
->hfh_next
= self
->hf_hrefs
;
754 self
->hf_hrefs
= hhp
;
759 while ((hhp
= self
->hf_hrefs
) != NULL
) {
760 self
->hf_hrefs
= hhp
->hfh_next
;
762 if (!(self
->hf_flags
& _HF_ERROR
)) {
763 int w
= fprintf(self
->hf_os
, " [%u] %.*s\n",
764 hhp
->hfh_no
, (int)hhp
->hfh_len
, hhp
->hfh_dat
);
766 self
->hf_flags
|= _HF_ERROR
;
771 self
->hf_flags
|= (putc('\n', self
->hf_os
) == EOF
)
772 ? _HF_ERROR
: _HF_NL_1
| _HF_NL_2
;
773 self
->hf_href_dist
= (ui32_t
)n_realscreenheight
>> 1;
779 static struct htmlflt
*
780 _hf_dump(struct htmlflt
*self
)
786 f
= self
->hf_flags
& ~_HF_BLANK
;
789 self
->hf_mbwidth
= self
->hf_mboff
= self
->hf_last_ws
= self
->hf_len
= 0;
791 for (c
= '\0'; l
> 0; --l
) {
794 if (putc(c
, self
->hf_os
) == EOF
) {
795 self
->hf_flags
= (f
|= _HF_ERROR
);
801 f
|= (f
& _HF_NL_1
) ? _HF_NL_2
: _HF_NL_1
;
808 /* Check whether there are HREFs to dump; there is so much messy tagsoup out
809 * there that it seems best not to simply dump HREFs in each _dump(), but
810 * only with some gap, let's say half the real screen height */
811 if (--self
->hf_href_dist
< 0 && (f
& _HF_NL_2
) && self
->hf_hrefs
!= NULL
)
812 self
= _hf_dump_hrefs(self
);
818 static struct htmlflt
*
819 _hf_store(struct htmlflt
*self
, char c
)
827 if(n_UNLIKELY(l
== 0) && (i
= (self
->hf_flags
& _HF_BQUOTE_MASK
)) != 0 &&
828 self
->hf_lmax
> _HF_MINLEN
){
832 ip
= ok_vlook(indentprefix
);
834 if(len
== 0 || len
>= _HF_MINLEN
){
835 ip
= " |"; /* XXX something from *quote-chars* */
836 len
= sizeof(" |") -1;
840 for(j
= len
; j
-- != 0;){
843 if((x
= ip
[j
]) == '\t')
845 self
->hf_line
[j
] = x
;
848 while(--i
> 0 && self
->hf_len
< self
->hf_lmax
- _HF_BRKSUB
)
849 self
= _hf_store(self
, '|'); /* XXX something from *quote-chars* */
854 self
->hf_line
[l
] = (c
== '\t' ? ' ' : c
);
856 if (blankspacechar(c
)) {
858 i
= 8 - ((l
- 1) & 7); /* xxx magic tab width of 8 */
861 self
= _hf_store(self
, ' ');
866 self
->hf_last_ws
= l
;
867 } else if (/*c == '.' ||*/ c
== ',' || c
== ';' || c
== '-')
868 self
->hf_last_ws
= l
;
871 # ifdef HAVE_NATCH_CHAR /* XXX This code is really ridiculous! */
872 if (n_mb_cur_max
> 1) { /* XXX should mbrtowc() and THEN store, at least */
876 if((x
= mbtowc(&wc
, self
->hf_line
+ self
->hf_mboff
, l
- self
->hf_mboff
)
878 if ((w
= wcwidth(wc
)) == -1 ||
879 /* Actively filter out L-TO-R and R-TO-R marks TODO ctext */
880 (wc
== 0x200E || wc
== 0x200F ||
881 (wc
>= 0x202A && wc
<= 0x202E)) ||
882 /* And some zero-width messes */
883 wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D) ||
884 /* Oh about the ISO C wide character interfaces, baby! */
888 } else if (iswspace(wc
))
889 self
->hf_last_ws
= l
;
891 i
= (self
->hf_mbwidth
+= w
);
894 (void)mbtowc(&wc
, NULL
, n_mb_cur_max
);
895 if (UICMP(32, l
- self
->hf_mboff
, >=, n_mb_cur_max
)) { /* XXX */
900 i
= self
->hf_mbwidth
;
905 /* Do we need to break the line? */
906 if (i
>= self
->hf_lmax
- _HF_BRKSUB
) {
910 /* Let's hope we saw a sane place to break this line! */
911 if (self
->hf_last_ws
>= (lim
= self
->hf_lmax
>> 1)) {
913 i
= self
->hf_len
= self
->hf_last_ws
;
914 self
= _hf_dump(self
);
915 if ((self
->hf_len
= (l
-= i
)) > 0) {
916 self
->hf_flags
&= ~_HF_NL_MASK
;
917 memmove(self
->hf_line
, self
->hf_line
+ i
, l
);
918 # ifdef HAVE_NATCH_CHAR
919 __hf_sync_mbstuff(self
);
925 /* Any 7-bit characters? */
927 for (i
= l
; i
-- >= lim
;)
928 if (asciichar((c
= self
->hf_line
[i
]))) {
929 self
->hf_last_ws
= ++i
;
931 } else if ((f
& _HF_UTF8
) && ((ui8_t
)c
& 0xC0) != 0x80) {
932 self
->hf_last_ws
= i
;
936 /* Hard break necessary! xxx really badly done */
937 if (l
>= self
->hf_lmax
- 1)
938 self
= _hf_dump(self
);
945 # ifdef HAVE_NATCH_CHAR
946 static struct htmlflt
*
947 __hf_sync_mbstuff(struct htmlflt
*self
)
960 int x
= mbtowc(&wc
, b
, l
);
969 if ((x
= wcwidth(wc
)) == -1)
975 /* Bad, skip over a single character.. XXX very bad indeed */
981 (void)mbtowc(&wc
, NULL
, n_mb_cur_max
);
985 self
->hf_mbwidth
= w
;
990 # endif /* HAVE_NATCH_CHAR */
992 static struct htmlflt
*
993 _hf_nl(struct htmlflt
*self
)
998 if (!((f
= self
->hf_flags
) & _HF_ERROR
)) {
1000 if ((f
& _HF_NL_MASK
) != _HF_NL_MASK
)
1001 self
= _hf_dump(self
);
1003 self
->hf_flags
= (f
|= _HF_NL_MASK
);
1009 static struct htmlflt
*
1010 _hf_nl_force(struct htmlflt
*self
)
1013 if (!(self
->hf_flags
& _HF_ERROR
))
1014 self
= _hf_dump(self
);
1019 static struct htmlflt
*
1020 _hf_putc(struct htmlflt
*self
, char c
)
1025 if ((f
= self
->hf_flags
) & _HF_ERROR
)
1029 self
= _hf_nl(self
);
1031 } else if (c
== ' ' || c
== '\t') {
1032 if ((f
& _HF_BLANK
) || self
->hf_len
== 0)
1038 self
->hf_flags
= (f
|= _HF_ANY
);
1039 self
= _hf_store(self
, c
);
1045 static struct htmlflt
*
1046 _hf_putc_premode(struct htmlflt
*self
, char c
)
1051 if ((f
= self
->hf_flags
) & _HF_ERROR
) {
1053 } else if (c
== '\n')
1054 self
= _hf_nl_force(self
);
1057 self
->hf_flags
= (f
|= _HF_ANY
);
1058 self
= _hf_store(self
, c
);
1064 static struct htmlflt
*
1065 _hf_puts(struct htmlflt
*self
, char const *cp
)
1070 while ((c
= *cp
++) != '\0')
1071 self
= _hf_putc(self
, c
);
1076 static struct htmlflt
*
1077 _hf_putbuf(struct htmlflt
*self
, char const *cp
, size_t len
)
1082 self
= _hf_putc(self
, *cp
++);
1087 static struct htmlflt
*
1088 _hf_param(struct htmlflt
*self
, struct str
*store
, char const *param
)
1100 /* Skip over any non-WS first; be aware of soup, if it slipped through */
1102 if((c
= *cp
++) == '\0' || c
== '>')
1108 /* Search for the parameter, take care of other quoting along the way */
1114 if((c
= *cp
++) == '\0' || c
== '>')
1121 /* Could it be a parameter? */
1125 /* Is it the desired one? */
1126 if((c
= upperconv(c
)) == x
&& !ascncasecmp(param
, cp
, i
)){
1127 char const *cp2
= cp
+ i
;
1129 if((quote
= *cp2
++) != '='){
1130 if(quote
== '\0' || quote
== '>')
1132 while(whitechar(quote
))
1139 continue; /* XXX Optimize: i bytes or even cp2 can't be it! */
1143 /* Not the desired one; but a parameter? */
1146 /* If so, properly skip over the value */
1147 if((c
= *cp
++) == '"' || c
== '\''){
1148 /* TODO i have forgotten whether reverse solidus quoting is allowed
1149 * TODO quoted HTML parameter values? not supporting that for now.. */
1150 for(quote
= c
; (c
= *cp
++) != '\0' && c
!= quote
;)
1153 while(c
!= '\0' && !whitechar(c
) && c
!= '>')
1159 /* Skip further whitespace */
1161 if((c
= *cp
++) == '\0' || c
== '>')
1167 if(c
== '"' || c
== '\''){
1168 /* TODO i have forgotten whether reverse solisud quoting is allowed in
1169 * TODO quoted HTML parameter values? not supporting that for now.. */
1170 store
->s
= n_UNCONST(cp
);
1171 for(quote
= c
; (c
= *cp
) != '\0' && c
!= quote
; ++cp
)
1173 /* XXX ... and we simply ignore a missing trailing " :> */
1175 store
->s
= n_UNCONST(cp
- 1);
1177 while((c
= *cp
) != '\0' && !whitechar(c
) && c
!= '>')
1180 i
= PTR2SIZE(cp
- store
->s
);
1182 /* Terrible tagsoup out there, e.g., groups.google.com produces href=""
1183 * parameter values prefixed and suffixed by newlines! Therefore trim the
1184 * value content TODO join into the parse step above! */
1185 for (cp
= store
->s
; i
> 0 && spacechar(*cp
); ++cp
, --i
)
1187 store
->s
= n_UNCONST(cp
);
1188 for (cp
+= i
- 1; i
> 0 && spacechar(*cp
); --cp
, --i
)
1190 if ((store
->l
= i
) == 0)
1197 static struct htmlflt
*
1198 _hf_expand_all_ents(struct htmlflt
*self
, struct str
const *param
)
1200 char const *cp
, *maxcp
, *ep
;
1205 for (cp
= param
->s
, maxcp
= cp
+ param
->l
; cp
< maxcp
;)
1206 if ((c
= *cp
++) != '&')
1208 self
= _hf_putc(self
, c
);
1211 if (ep
== maxcp
|| (c
= *ep
++) == '\0') {
1212 for (; cp
< ep
; ++cp
)
1213 self
= _hf_putc(self
, *cp
);
1215 } else if (c
== ';') {
1216 if ((i
= PTR2SIZE(ep
- cp
)) > 1) {
1217 self
= _hf_check_ent(self
, cp
, i
);
1232 static struct htmlflt
*
1233 _hf_check_tag(struct htmlflt
*self
, char const *s
)
1238 struct htmlflt_tag
const *hftp
;
1242 /* Extra check only */
1245 DBG( n_alert("HTML tagsoup filter _hf_check_tag() called on soup!"); )
1247 self
= _hf_puts(self
, self
->hf_bdat
);
1251 for (++s
, i
= 0; (c
= s
[i
]) != '\0' && c
!= '>' && !whitechar(c
); ++i
)
1252 /* Special massage for things like <br/>: after the slash only whitespace
1253 * may separate us from the closing right angle! */
1257 while ((c
= s
[j
]) != '\0' && c
!= '>' && whitechar(c
))
1263 for (hftp
= _hf_tags
;;) {
1264 if (i
== hftp
->hft_len
&& !ascncasecmp(s
, hftp
->hft_tag
, i
)) {
1265 c
= s
[hftp
->hft_len
];
1266 if (c
== '>' || c
== '/' || whitechar(c
))
1269 if (n_UNLIKELY(PTRCMP(++hftp
, >=, _hf_tags
+ n_NELEM(_hf_tags
)))){
1270 /* A <blockquote> is very special xxx */
1273 if((isct
= (i
> 1 && *s
== '/'))){
1278 if(i
!= sizeof("blockquote") -1 || ascncasecmp(s
, "blockquote", i
) ||
1279 ((c
= s
[sizeof("blockquote") -1]) != '>' && !whitechar(c
))){
1285 if(!isct
&& !(self
->hf_flags
& _HF_NL_2
))
1286 self
= _hf_nl(self
);
1287 if(!(self
->hf_flags
& _HF_NL_1
))
1288 self
= _hf_nl(self
);
1290 f
&= _HF_BQUOTE_MASK
;
1292 if(f
!= _HF_BQUOTE_MASK
)
1296 f
|= (self
->hf_flags
& ~_HF_BQUOTE_MASK
);
1303 switch (hftp
->hft_act
) {
1315 if (!(self
->hf_flags
& _HF_NL_2
))
1316 self
= _hf_nl(self
);
1319 if (!(f
& _HF_NL_1
))
1320 self
= _hf_nl(self
);
1321 if (hftp
->hft_injc
!= '\0') {
1322 self
= _hf_putc(self
, hftp
->hft_injc
& 0x7F);
1323 if ((uc_i
)hftp
->hft_injc
& 0x80)
1324 self
= _hf_putc(self
, ' ');
1329 self
->hf_ign_tag
= hftp
;
1330 self
->hf_flags
= (f
|= _HF_IGN
| _HF_NOPUT
);
1334 self
= _hf_param(self
, ¶m
, "alt");
1335 self
= _hf_putc(self
, '[');
1336 if (param
.s
== NULL
) {
1337 param
.s
= n_UNCONST("IMG");
1340 } /* else */ if (memchr(param
.s
, '&', param
.l
) != NULL
)
1341 self
= _hf_expand_all_ents(self
, ¶m
);
1344 self
= _hf_putbuf(self
, param
.s
, param
.l
);
1345 self
= _hf_putc(self
, ']');
1349 self
= _hf_param(self
, ¶m
, "href");
1350 /* Ignore non-external links */
1351 if (param
.s
!= NULL
&& *param
.s
!= '#') {
1352 struct htmlflt_href
*hhp
= n_alloc(
1353 n_VSTRUCT_SIZEOF(struct htmlflt_href
, hfh_dat
) + param
.l
+1);
1355 hhp
->hfh_next
= self
->hf_hrefs
;
1356 hhp
->hfh_no
= ++self
->hf_href_no
;
1357 hhp
->hfh_len
= (ui32_t
)param
.l
;
1358 memcpy(hhp
->hfh_dat
, param
.s
, param
.l
);
1360 snprintf(nobuf
, sizeof nobuf
, "[%u]", hhp
->hfh_no
);
1361 self
->hf_flags
= (f
|= _HF_HREF
);
1362 self
->hf_hrefs
= hhp
;
1363 self
= _hf_puts(self
, nobuf
);
1365 self
->hf_flags
= (f
&= ~_HF_HREF
);
1367 case _HFSA_HREF_END
:
1369 snprintf(nobuf
, sizeof nobuf
, "[/%u]", self
->hf_href_no
);
1370 self
= _hf_puts(self
, nobuf
);
1375 c
= (char)(hftp
->hft_act
& 0xFF);
1376 self
= _hf_putc(self
, c
);
1385 /* The problem is that even invalid tagsoup is widely used, without real
1386 * searching i have seen e-mail address in <N@H.D> notation, and more.
1387 * To protect us a bit look around and possibly write the content as such */
1392 /* Ignore <!DOCTYPE, <!-- comments, <? PIs.. */
1395 /* Print out an empty tag as such */
1408 /* Also skip over : in order to suppress v:roundrect, w:anchorlock.. */
1409 while ((c
= *s
++) != '\0' && c
!= '>' && !whitechar(c
) && c
!= ':')
1410 if (!asciichar(c
) || punctchar(c
)) {
1411 self
= _hf_puts(self
, self
->hf_bdat
);
1417 static struct htmlflt
*
1418 _hf_check_ent(struct htmlflt
*self
, char const *s
, size_t l
)
1423 struct hf_ent
const *hfep
;
1431 /* False entities seen in the wild assert(s[l - 1] == ';'); */
1435 /* Numeric entity, or try named search */
1437 i
= (*++s
== 'x' ? 16 : 10);
1439 if ((i
!= 16 || (++s
, --l
) > 0) && l
< sizeof(nobuf
)) {
1440 memcpy(nobuf
, s
, l
);
1442 n_idec_uiz_cp(&i
, nobuf
, i
, NULL
);
1444 self
= _hf_putc(self
, (char)i
);
1445 else if (self
->hf_flags
& _HF_UTF8
) {
1447 l
= n_utf32_to_utf8((ui32_t
)i
, nobuf
);
1448 self
= _hf_putbuf(self
, nobuf
, l
);
1454 ui32_t f
= self
->hf_flags
, hf
;
1456 for (hfep
= _hf_ents
; PTRCMP(hfep
, <, _hf_ents
+ n_NELEM(_hf_ents
));
1458 if (l
== ((hf
= hfep
->hfe_flags
) & _HFE_LENGTH_MASK
) &&
1459 !strncmp(s
, hfep
->hfe_ent
, l
)) {
1460 if ((hf
& _HFE_HAVE_UNI
) && (f
& _HF_UTF8
)) {
1463 } else if (hf
& _HFE_HAVE_CSTR
)
1464 self
= _hf_puts(self
, hfep
->hfe_cstr
);
1466 self
= _hf_putc(self
, hfep
->hfe_c
);
1470 self
= _hf_putbuf(self
, s_save
, l_save
);
1478 _hf_add_data(struct htmlflt
*self
, char const *dat
, size_t len
)
1480 char c
, *cp
, *cp_max
;
1485 /* Final put request? */
1487 if (self
->hf_len
> 0 || self
->hf_hrefs
!= NULL
) {
1488 self
= _hf_dump(self
);
1489 if (self
->hf_hrefs
!= NULL
)
1490 self
= _hf_dump_hrefs(self
);
1496 /* Always ensure some initial buffer */
1497 if ((cp
= self
->hf_curr
) != NULL
)
1498 cp_max
= self
->hf_bmax
;
1500 cp
= self
->hf_curr
= self
->hf_bdat
= n_alloc(LINESIZE
);
1501 cp_max
= self
->hf_bmax
= cp
+ LINESIZE
-1; /* (Always room for NUL!) */
1503 hot
= (cp
!= self
->hf_bdat
);
1505 for (rv
= (ssize_t
)len
; len
> 0; --len
) {
1506 ui32_t f
= self
->hf_flags
;
1512 /* Soup is really weird, and scripts may contain almost anything (and
1513 * newer CSS standards are also cryptic): therefore prefix the _HF_IGN
1514 * test and walk until we see the required end tag */
1515 /* TODO For real safety _HF_IGN soup condome would also need to know
1516 * TODO about quoted strings so that 'var i = "</script>";' couldn't
1517 * TODO fool it! We really want this mode also for _HF_NOPUT to be
1518 * TODO able to *gracefully* detect the tag-closing '>', but then if
1519 * TODO that is a single mechanism we should have made it! */
1521 struct htmlflt_tag
const *hftp
= self
->hf_ign_tag
;
1528 } else if (c
== '>') {
1530 if ((i
= PTR2SIZE(cp
- self
->hf_bdat
)) > 1 &&
1531 --i
== hftp
->hft_len
&&
1532 !ascncasecmp(self
->hf_bdat
+ 1, hftp
->hft_tag
, i
))
1533 self
->hf_flags
= (f
&= ~(_HF_IGN
| _HF_NOPUT
));
1539 i
= PTR2SIZE(cp
- self
->hf_bdat
);
1540 if ((i
== 1 && c
!= '/') || --i
> hftp
->hft_len
) {
1547 /* People are using & without &ing it, ditto <; be aware */
1548 if (f
& (_HF_NOPUT
| _HF_ENT
)) {
1550 /* Special case "<!--" buffer content to deal with really weird
1551 * things that can be done with "<!--[if gte mso 9]>" syntax */
1552 if (PTR2SIZE(cp
- self
->hf_bdat
) != 4 ||
1553 memcmp(self
->hf_bdat
, "<!--", 4)) {
1556 self
= _hf_puts(self
, self
->hf_bdat
);
1562 self
->hf_flags
= (f
|= _HF_NOPUT
);
1565 /* Weird tagsoup around, do we actually parse a tag? */
1566 if (!(f
& _HF_NOPUT
))
1570 f
&= ~(_HF_NOPUT
| _HF_ENT
);
1572 self
= _hf_check_tag(self
, self
->hf_bdat
);
1573 *(cp
= self
->hf_bdat
) = '\0'; /* xxx extra safety */
1574 /* Quick hack to get rid of redundant newline after <pre> XXX */
1575 if (!(f
& _HF_PRE
) && (self
->hf_flags
& _HF_PRE
) &&
1576 len
> 1 && *dat
== '\n')
1580 case '\r': /* TODO CR should be stripped in lower level!! (Only B64!?!) */
1583 /* End of line is not considered unless we are in PRE section.
1584 * However, in _HF_NOPUT mode we must be aware of tagsoup which uses
1585 * newlines for separating parameters */
1588 self
= (f
& _HF_PRE
) ? _hf_nl_force(self
) : _hf_putc(self
, ' ');
1597 /* If not currently parsing a tag and bypassing normal output.. */
1598 if (!(f
& _HF_NOPUT
)) {
1604 self
->hf_flags
= (f
|= _HF_NOPUT
| _HF_ENT
);
1605 } else if (f
& _HF_PRE
) {
1606 self
= _hf_putc_premode(self
, c
);
1607 self
->hf_flags
&= ~_HF_BLANK
;
1609 self
= _hf_putc(self
, c
);
1610 } else if ((f
& _HF_ENT
) && c
== ';') {
1613 f
&= ~(_HF_NOPUT
| _HF_ENT
);
1615 self
= _hf_check_ent(self
, self
->hf_bdat
,
1616 PTR2SIZE(cp
+ 1 - self
->hf_bdat
));
1618 /* We may need to grow the buffer */
1619 if (PTRCMP(cp
+ 42/2, >=, cp_max
)) {
1620 size_t i
= PTR2SIZE(cp
- self
->hf_bdat
),
1621 m
= PTR2SIZE(self
->hf_bmax
- self
->hf_bdat
) + LINESIZE
;
1623 cp
= self
->hf_bdat
= n_realloc(self
->hf_bdat
, m
);
1624 self
->hf_bmax
= cp_max
= &cp
[m
-1];
1625 self
->hf_curr
= (cp
+= i
);
1634 return (self
->hf_flags
& _HF_ERROR
) ? -1 : rv
;
1638 * TODO Because we don't support filter chains yet this filter will be run
1639 * TODO in a dedicated subprocess, driven via a special Popen() mode
1641 static bool_t __hf_hadpipesig
;
1643 __hf_onpipe(int signo
)
1645 NYD_X
; /* Signal handler */
1647 __hf_hadpipesig
= TRU1
;
1651 htmlflt_process_main(void)
1653 char buf
[BUFFER_SIZE
];
1659 __hf_hadpipesig
= FAL0
;
1660 safe_signal(SIGPIPE
, &__hf_onpipe
);
1663 htmlflt_reset(&hf
, n_stdout
);
1666 if ((i
= fread(buf
, sizeof(buf
[0]), n_NELEM(buf
), n_stdin
)) == 0) {
1667 rv
= !feof(n_stdin
);
1671 if ((rv
= __hf_hadpipesig
))
1673 /* Just use this directly.. */
1674 if (htmlflt_push(&hf
, buf
, i
) < 0) {
1679 if (rv
== 0 && htmlflt_flush(&hf
) < 0)
1682 htmlflt_destroy(&hf
);
1684 rv
|= __hf_hadpipesig
;
1690 htmlflt_init(struct htmlflt
*self
)
1693 /* (Rather redundant though) */
1694 memset(self
, 0, sizeof *self
);
1699 htmlflt_destroy(struct htmlflt
*self
)
1702 htmlflt_reset(self
, NULL
);
1707 htmlflt_reset(struct htmlflt
*self
, FILE *f
)
1709 struct htmlflt_href
*hfhp
;
1712 while ((hfhp
= self
->hf_hrefs
) != NULL
) {
1713 self
->hf_hrefs
= hfhp
->hfh_next
;
1717 if (self
->hf_bdat
!= NULL
)
1718 n_free(self
->hf_bdat
);
1719 if (self
->hf_line
!= NULL
)
1720 n_free(self
->hf_line
);
1722 memset(self
, 0, sizeof *self
);
1725 ui32_t sw
= n_MAX(_HF_MINLEN
, (ui32_t
)n_scrnwidth
);
1727 self
->hf_line
= n_alloc((size_t)sw
* n_mb_cur_max
+1);
1730 if (n_psonce
& n_PSO_UNICODE
) /* TODO not truly generic */
1731 self
->hf_flags
= _HF_UTF8
;
1738 htmlflt_push(struct htmlflt
*self
, char const *dat
, size_t len
)
1743 rv
= _hf_add_data(self
, dat
, len
);
1749 htmlflt_flush(struct htmlflt
*self
)
1754 rv
= _hf_add_data(self
, NULL
, 0);
1755 rv
|= !fflush(self
->hf_os
) ? 0 : -1;
1759 #endif /* HAVE_FILTER_HTML_TAGSOUP */