* Width of characters is not always determined correctly when wcwidth
[alpine.git] / pith / filter.c
blob9f0182ba25aa6d9e8c3638edb0f38d1e45bfc489
1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
5 /*
6 * ========================================================================
7 * Copyright 2013-2018 Eduardo Chappa
8 * Copyright 2006-2008 University of Washington
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
20 filter.c
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
30 call another filter).
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
38 TODO:
39 reasonable error handling
41 ====*/
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
65 * Internal prototypes
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
72 int gf_fwritec(int);
73 int gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
80 int gf_pwritec(int);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
85 int gf_swritec(int);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S *, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S *, int);
97 * System specific options
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S *gf_master = NULL;
115 static gf_io_t last_filter;
116 static char *gf_error_string;
117 static long gf_byte_count;
118 static jmp_buf gf_error_state;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
131 #define DFL 0
132 #define EQUAL 1
133 #define HEX 2
134 #define WSPACE 3
135 #define CCR 4
136 #define CLF 5
137 #define TOKEN 6
138 #define TAG 7
139 #define HANDLE 8
140 #define HDATA 9
141 #define ESC 10
142 #define ESCDOL 11
143 #define ESCPAR 12
144 #define EUC 13
145 #define BOL 14
146 #define FL_QLEV 15
147 #define FL_STF 16
148 #define FL_SIG 17
149 #define STOP_DECODING 18
150 #define SPACECR 19
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
207 char *p; \
208 char cb[RGBLEN+1]; \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
213 p = cb; \
214 for(; *p; p++) \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
220 p = cb; \
221 for(; *p; p++) \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
232 * functions
234 static struct gf_io_struct {
235 FILE *file;
236 PIPE_S *pipe;
237 char *txtp;
238 unsigned long n;
239 int flags;
240 CBUF_S cb;
241 } gf_in, gf_out;
243 #define GF_SO_STACK struct gf_so_stack
244 static GF_SO_STACK {
245 STORE_S *so;
246 GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc)
261 return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 gf_so_out->so->src == ExternalText);
268 * setup to use and return a pointer to the generic
269 * getc function
271 void
272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
274 gf_in.n = len;
275 gf_in.flags = flags;
276 gf_in.cb.cbuf[0] = '\0';
277 gf_in.cb.cbufp = gf_in.cb.cbuf;
278 gf_in.cb.cbufend = gf_in.cb.cbuf;
280 if(src == FileStar){
281 gf_in.file = (FILE *)txt;
282 fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 : gf_freadc;
286 #else /* UNIX */
287 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 : gf_freadc;
289 #endif /* UNIX */
291 else if(src == PipeStar){
292 gf_in.pipe = (PIPE_S *)txt;
293 *gc = gf_preadc;
294 *gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 : gf_preadc;
297 else{
298 gf_in.txtp = (char *)txt;
299 *gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 : gf_sreadc;
306 * setup to use and return a pointer to the generic
307 * putc function
309 void
310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
312 gf_out.n = len;
313 gf_out.flags = flags;
314 gf_out.cb.cbuf[0] = '\0';
315 gf_out.cb.cbufp = gf_out.cb.cbuf;
316 gf_out.cb.cbufend = gf_out.cb.cbuf;
318 if(src == FileStar){
319 gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 *pc = gf_fwritec;
322 #else /* UNIX */
323 *pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 : gf_fwritec;
325 #endif /* UNIX */
327 else if(src == PipeStar){
328 gf_out.pipe = (PIPE_S *)txt;
329 *pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 : gf_pwritec;
332 else{
333 gf_out.txtp = (char *)txt;
334 *pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 : gf_swritec;
341 * setup to use and return a pointer to the generic
342 * getc function
344 void
345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
347 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
349 sp->so = so;
350 sp->next = gf_so_in;
351 gf_so_in = sp;
352 *gc = gf_so_readc;
356 void
357 gf_clear_so_readc(STORE_S *so)
359 GF_SO_STACK *sp;
361 if((sp = gf_so_in) != NULL){
362 if(so == sp->so){
363 gf_so_in = gf_so_in->next;
364 fs_give((void **) &sp);
366 else
367 alpine_panic("Programmer botch: Can't unstack store readc");
369 else
370 alpine_panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
376 * putc function
378 void
379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
381 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
383 sp->so = so;
384 sp->next = gf_so_out;
385 gf_so_out = sp;
386 *pc = gf_so_writec;
390 void
391 gf_clear_so_writec(STORE_S *so)
393 GF_SO_STACK *sp;
395 if((sp = gf_so_out) != NULL){
396 if(so == sp->so){
397 gf_so_out = gf_so_out->next;
398 fs_give((void **) &sp);
400 else
401 alpine_panic("Programmer botch: Can't unstack store writec");
403 else
404 alpine_panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
412 gf_so_writec(int c)
414 return(so_writec(c, gf_so_out->so));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c)
424 return(so_readc(c, gf_so_in->so));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c)
433 int rv = 0;
435 do {
436 errno = 0;
437 clearerr(gf_in.file);
438 rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439 } while(!rv && ferror(gf_in.file) && errno == EINTR);
441 return(rv);
446 gf_freadc_locale(unsigned char *c)
448 return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c, void *extraarg)
458 FILE *file;
459 int rv = 0;
461 file = (FILE *) extraarg;
463 do {
464 errno = 0;
465 clearerr(file);
466 rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467 } while(!rv && ferror(file) && errno == EINTR);
469 return(rv);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
479 gf_fwritec(int c)
481 unsigned char ch = (unsigned char)c;
482 int rv = 0;
485 rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486 while(!rv && ferror(gf_out.file) && errno == EINTR);
488 return(rv);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c)
499 int rv = 1;
500 int i, outchars;
501 unsigned char obuf[MAX(MB_LEN_MAX,32)];
503 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 for(i = 0; i < outchars; i++)
505 if(gf_fwritec(obuf[i]) != 1){
506 rv = 0;
507 break;
511 return(rv);
515 #ifdef _WINDOWS
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c)
527 int rv = 0;
528 UCS ucs;
530 /* already got some from previous call? */
531 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 *c = *gf_in.cb.cbufp;
533 gf_in.cb.cbufp++;
534 rv++;
535 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 gf_in.cb.cbufend = gf_in.cb.cbuf;
537 gf_in.cb.cbufp = gf_in.cb.cbuf;
540 return(rv);
543 if(gf_in.file){
544 /* windows only so second arg is ignored */
545 ucs = read_a_wide_char(gf_in.file, NULL);
546 rv = (ucs == CCONV_EOF) ? 0 : 1;
549 if(rv){
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 gf_in.cb.cbufp = gf_in.cb.cbuf;
556 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 *c = *gf_in.cb.cbufp;
558 gf_in.cb.cbufp++;
559 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 gf_in.cb.cbufend = gf_in.cb.cbuf;
561 gf_in.cb.cbufp = gf_in.cb.cbuf;
564 else
565 *c = '?';
568 return(rv);
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c)
576 return(pipe_readc(c, gf_in.pipe));
581 gf_preadc_locale(unsigned char *c)
583 return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c, void *extraarg)
593 PIPE_S *pipe;
595 pipe = (PIPE_S *) extraarg;
597 return(pipe_readc(c, pipe));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
607 gf_pwritec(int c)
609 return(pipe_writec(c, gf_out.pipe));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c)
620 int rv = 1;
621 int i, outchars;
622 unsigned char obuf[MAX(MB_LEN_MAX,32)];
624 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 for(i = 0; i < outchars; i++)
626 if(gf_pwritec(obuf[i]) != 1){
627 rv = 0;
628 break;
632 return(rv);
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c)
641 return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
646 gf_sreadc_locale(unsigned char *c)
648 return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
669 gf_swritec(int c)
671 return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c)
682 int rv = 1;
683 int i, outchars;
684 unsigned char obuf[MAX(MB_LEN_MAX,32)];
686 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 for(i = 0; i < outchars; i++)
688 if(gf_swritec(obuf[i]) != 1){
689 rv = 0;
690 break;
694 return(rv);
699 * output the given string with the given function
702 gf_puts(register char *s, gf_io_t pc)
704 while(*s != '\0')
705 if(!(*pc)((unsigned char)*s++))
706 return(0); /* ERROR putting char ! */
708 return(1);
713 * output the given string with the given function
716 gf_nputs(register char *s, long int n, gf_io_t pc)
718 while(n--)
719 if(!(*pc)((unsigned char)*s++))
720 return(0); /* ERROR putting char ! */
722 return(1);
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c,
743 int (*get_a_char)(unsigned char *, void *),
744 void *extraarg,
745 CBUF_S *cb)
747 unsigned long octets_so_far = 0, remaining_octets;
748 unsigned char *inputp;
749 unsigned char ch;
750 UCS ucs;
751 unsigned char inputbuf[20];
752 int rv = 0;
753 int got_one = 0;
755 /* already got some from previous call? */
756 if(cb->cbufend > cb->cbuf){
757 *c = *cb->cbufp;
758 cb->cbufp++;
759 rv++;
760 if(cb->cbufp >= cb->cbufend){
761 cb->cbufend = cb->cbuf;
762 cb->cbufp = cb->cbuf;
765 return(rv);
768 memset(inputbuf, 0, sizeof(inputbuf));
769 if((*get_a_char)(&ch, extraarg) == 0)
770 return(0);
772 inputbuf[octets_so_far++] = ch;
774 while(!got_one){
775 remaining_octets = octets_so_far;
776 inputp = inputbuf;
777 ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 switch(ucs){
779 case CCONV_BADCHAR:
780 return(rv);
782 case CCONV_NEEDMORE:
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far >= sizeof(inputbuf))
790 return(rv);
792 if((*get_a_char)(&ch, extraarg) == 0)
793 return(rv);
795 inputbuf[octets_so_far++] = ch;
796 break;
798 default:
799 /* got a good UCS-4 character */
800 got_one++;
801 break;
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
809 rv++;
810 cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811 cb->cbufp = cb->cbuf;
812 if(cb->cbufend > cb->cbuf){
813 *c = *cb->cbufp;
814 cb->cbufp++;
815 if(cb->cbufp >= cb->cbufend){
816 cb->cbufend = cb->cbuf;
817 cb->cbufp = cb->cbuf;
820 else
821 *c = '?';
823 return(rv);
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
834 void
835 gf_filter_init(void)
837 FILTER_S *flt, *fltn = gf_master;
839 while((flt = fltn) != NULL){ /* free list of old filters */
840 fltn = flt->next;
841 fs_give((void **)&flt);
844 gf_master = NULL;
845 gf_error_string = NULL; /* clear previous errors */
846 gf_byte_count = 0L; /* reset counter */
852 * link the given filter into the filter chain
854 void
855 gf_link_filter(filter_t f, void *data)
857 FILTER_S *new, *tail;
859 #ifdef CRLF_NEWLINES
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865 return;
866 #endif
868 new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869 memset(new, 0, sizeof(FILTER_S));
871 new->f = f; /* set the function pointer */
872 new->opt = data; /* set any optional parameter data */
873 (*f)(new, GF_RESET); /* have it setup initial state */
875 if((tail = gf_master) != NULL){ /* or add it to end of existing */
876 while(tail->next) /* list */
877 tail = tail->next;
879 tail->next = new;
881 else /* attach new struct to list */
882 gf_master = new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
890 void
891 gf_terminal(FILTER_S *f, int flg)
893 if(flg == GF_DATA){
894 GF_INIT(f, f);
896 while(op < eob)
897 if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 gf_error(errno ? error_description(errno) : "Error writing pipe");
900 GF_CH_RESET(f);
902 else if(flg == GF_RESET)
903 errno = 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
911 void
912 gf_set_terminal(gf_io_t f) /* function to set generic filter */
915 last_filter = f;
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
923 void
924 gf_error(char *s)
926 /* let the user know the error passed in s */
927 gf_error_string = s;
928 longjmp(gf_error_state, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
938 char *
939 gf_pipe(gf_io_t gc, gf_io_t pc)
940 /* how to get a character */
942 unsigned char c;
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string ? gf_error_string : "NULL"));
952 return(gf_error_string); /* */
956 * set and link in the terminal filter
958 gf_set_terminal(pc);
959 gf_link_filter(gf_terminal, NULL);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
968 * objects.
971 GF_INIT(gf_master, gf_master);
973 while((*gc)(&c)){
974 gf_byte_count++;
976 #ifdef _WINDOWS
977 if(!(gf_byte_count & 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
980 * mechinism.
982 mswin_yield ();
983 #endif
985 GF_PUTC(gf_master, c & 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master);
993 (*gf_master->f)(gf_master, GF_EOD);
996 dprint((4, "done.\n"));
997 return(NULL); /* everything went OK */
1002 * return the number of bytes piped so far
1004 long
1005 gf_bytes_piped(void)
1007 return(gf_byte_count);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1022 char *
1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 void (*pipecb_f)(PIPE_S *, int, void *))
1027 unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028 int flags, outchars, i;
1029 char *errstr = NULL, buf[MAILTMPLEN];
1030 PIPE_S *fpipe;
1031 CBUF_S cb;
1032 #ifdef NON_BLOCKING_IO
1033 int n;
1034 #endif
1036 dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1038 gf_filter_init();
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045 gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1047 for( ; aux_filters && aux_filters->filter; aux_filters++)
1048 gf_link_filter(aux_filters->filter, aux_filters->data);
1050 gf_set_terminal(pc);
1051 gf_link_filter(gf_terminal, NULL);
1053 cb.cbuf[0] = '\0';
1054 cb.cbufp = cb.cbuf;
1055 cb.cbufend = cb.cbuf;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so, 0L, 0);
1061 flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 | (silent ? PIPE_SILENT : 0)
1063 | (!disable_reset ? PIPE_RESET : 0);
1065 if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 errstr = "Can't set up non-blocking IO";
1072 if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 || fputc('\n', fpipe->out.f) == EOF))
1074 errstr = error_description(errno);
1076 while(!errstr){
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 if(!so_readc(&c, source_so)){
1082 fclose(fpipe->out.f);
1083 fpipe->out.f = NULL;
1085 else{
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 for(i = 0; i < outchars && !errstr; i++)
1093 if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 errstr = error_description(errno);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1102 errno = 0;
1103 clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 errstr = gf_filter_puts(buf);
1108 /* then fgets failed! */
1109 if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 if(feof(fpipe->in.f)) /* nothing else interesting! */
1111 break;
1112 else if(ferror(fpipe->in.f)) /* bummer. */
1113 errstr = error_description(errno);
1115 else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 clearerr(fpipe->in.f);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 || pipe_putc('\n', fpipe) == EOF))
1123 errstr = error_description(errno);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr && so_readc(&c, source_so))
1130 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 for(i = 0; i < outchars && !errstr; i++)
1132 if(pipe_putc(obuf[i], fpipe) == EOF)
1133 errstr = error_description(errno);
1135 if(pipe_close_write(fpipe))
1136 errstr = _("Pipe command returned error.");
1138 while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 errstr = gf_filter_puts(buf);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 errstr = _("Pipe command returned error.");
1146 gf_filter_eod();
1148 else
1149 errstr = _("Error setting up pipe command.");
1151 return(errstr);
1156 * gf_filter_puts - write the given string down the filter's pipe
1158 char *
1159 gf_filter_puts(register char *s)
1161 GF_INIT(gf_master, gf_master);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string ? gf_error_string : "NULL"));
1169 return(gf_error_string);
1172 while(*s)
1173 GF_PUTC(gf_master, (*s++) & 0xff);
1175 GF_END(gf_master, gf_master);
1176 return(NULL);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1184 void
1185 gf_filter_eod(void)
1187 GF_INIT(gf_master, gf_master);
1188 (void) GF_FLUSH(gf_master);
1189 (*gf_master->f)(gf_master, GF_EOD);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1211 * via a vector.)
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1217 * void
1218 * gf_xxx_filter(f, flg)
1219 * FILTER_S *f;
1220 * int flg;
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1274 void
1275 gf_binary_b64(FILTER_S *f, int flg)
1277 static char *v =
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f, f->next);
1281 if(flg == GF_DATA){
1282 register unsigned char c;
1283 register unsigned char t = f->t;
1284 register long n = f->n;
1286 while(GF_GETC(f, c)){
1288 switch(n++){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 case 42: case 45:
1292 GF_PUTC(f->next, v[c >> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t = c << 4; /* remember high 2 bits for next */
1295 break;
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 case 43:
1300 GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 t = c << 2;
1302 break;
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 case 44:
1307 GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 GF_PUTC(f->next, v[c & 0x3f]);
1309 break;
1312 if(n == 45){ /* start a new line? */
1313 GF_PUTC(f->next, '\015');
1314 GF_PUTC(f->next, '\012');
1315 n = 0L;
1319 f->n = n;
1320 f->t = t;
1321 GF_END(f, f->next);
1323 else if(flg == GF_EOD){ /* no more data */
1324 switch (f->n % 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1326 break;
1328 case 1:
1329 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 GF_PUTC(f->next, '='); /* byte 3 */
1331 GF_PUTC(f->next, '='); /* byte 4 */
1332 break;
1334 case 2:
1335 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 GF_PUTC(f->next, '='); /* byte 4 */
1337 break;
1340 /* end with CRLF */
1341 if(f->n){
1342 GF_PUTC(f->next, '\015');
1343 GF_PUTC(f->next, '\012');
1346 (void) GF_FLUSH(f->next);
1347 (*f->next->f)(f->next, GF_EOD);
1349 else if(flg == GF_RESET){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1351 f->n = 0L;
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1360 void
1361 gf_b64_binary(FILTER_S *f, int flg)
1363 static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f, f->next);
1373 if(flg == GF_DATA){
1374 register unsigned char c;
1375 register unsigned char t = f->t;
1376 register int n = (int) f->n;
1377 register int state = f->f1;
1379 while(GF_GETC(f, c)){
1381 if(state){
1382 state = 0;
1383 if (c != '=') {
1384 gf_error("Illegal '=' in base64 text");
1385 /* NO RETURN */
1389 /* in range, and a valid value? */
1390 if((c & ~0x7f) || (c = v[c]) > 63){
1391 if(c == 64){
1392 switch (n++) { /* check quantum position */
1393 case 2:
1394 state++; /* expect an equal as next char */
1395 break;
1397 case 3:
1398 n = 0L; /* restart quantum */
1399 break;
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1403 /* NO RETURN */
1407 else{
1408 switch (n++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1410 t = c << 2;
1411 break;
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f->next, (t|(c >> 4)));
1415 t = c << 4; /* byte 2: high 4 bits */
1416 break;
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f->next, (t|(c >> 2)));
1420 t = c << 6; /* byte 3: high 2 bits */
1421 break;
1423 case 3:
1424 GF_PUTC(f->next, t | c);
1425 n = 0L; /* reinitialize mechanism */
1426 break;
1431 f->f1 = state;
1432 f->t = t;
1433 f->n = n;
1434 GF_END(f, f->next);
1436 else if(flg == GF_EOD){
1437 (void) GF_FLUSH(f->next);
1438 (*f->next->f)(f->next, GF_EOD);
1440 else if(flg == GF_RESET){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f->n = 0L; /* quantum position */
1443 f->f1 = 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1461 if((c) == ' '){ \
1462 state = WSPACE; \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1468 state = EQUAL; \
1470 else \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1478 void
1479 gf_qp_8bit(FILTER_S *f, int flg)
1482 GF_INIT(f, f->next);
1484 if(flg == GF_DATA){
1485 register unsigned char c;
1486 register int state = f->f1;
1488 while(GF_GETC(f, c)){
1490 switch(state){
1491 case DFL : /* default case */
1492 default:
1493 GF_QP_DEFAULT(f, c);
1494 break;
1496 case CCR : /* non-significant space */
1497 state = DFL;
1498 if(c == '\012')
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f, c);
1502 break;
1504 case EQUAL :
1505 if(c == '\015'){ /* "=\015" is a soft EOL */
1506 state = CCR;
1507 break;
1510 if(c == '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f->next, '=');
1512 state = DFL;
1513 break;
1516 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1534 * below, as well.
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state = STOP_DECODING;
1540 GF_PUTC(f->next, '=');
1541 GF_PUTC(f->next, c);
1542 q_status_message(SM_ORDER,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 break;
1549 if (isdigit ((unsigned char)c))
1550 f->t = c - '0';
1551 else
1552 f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1554 f->f2 = c; /* store character in case we have to
1555 back out in !isxdigit below */
1557 state = HEX;
1558 break;
1560 case HEX :
1561 state = DFL;
1562 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1563 state = STOP_DECODING;
1564 GF_PUTC(f->next, '=');
1565 GF_PUTC(f->next, f->f2);
1566 GF_PUTC(f->next, c);
1567 q_status_message(SM_ORDER,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 break;
1574 if (isdigit((unsigned char)c))
1575 c -= '0';
1576 else
1577 c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f->next, c + (f->t << 4));
1580 break;
1582 case WSPACE :
1583 if(c == ' '){ /* toss it in with other spaces */
1584 if(f->linep - f->line < GF_MAXLINE)
1585 *(f->linep)++ = ' ';
1586 break;
1589 state = DFL;
1590 if(c == '\015'){ /* not our white space! */
1591 f->linep = f->line; /* reset buffer */
1592 GF_PUTC(f->next, '\015');
1593 break;
1596 /* the spaces are ours, write 'em */
1597 f->n = f->linep - f->line;
1598 while((f->n)--)
1599 GF_PUTC(f->next, ' ');
1601 GF_QP_DEFAULT(f, c); /* take care of 'c' in default way */
1602 break;
1604 case STOP_DECODING :
1605 GF_PUTC(f->next, c);
1606 break;
1610 f->f1 = state;
1611 GF_END(f, f->next);
1613 else if(flg == GF_EOD){
1614 fs_give((void **)&(f->line));
1615 (void) GF_FLUSH(f->next);
1616 (*f->next->f)(f->next, GF_EOD);
1618 else if(flg == GF_RESET){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1620 f->f1 = DFL;
1621 f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1648 GF_8BIT_WRAP(f); \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658 * if needed.
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1662 GF_8BIT_WRAP(f); \
1663 f->n = 1L; \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1667 f->n = 3; \
1669 else \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1678 state = WSPACE; \
1680 else if(c == '\015'){ \
1681 state = CCR; \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1687 else{ \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1695 void
1696 gf_8bit_qp(FILTER_S *f, int flg)
1698 short dummy_dots = 0, dummy_dmap = 1;
1699 GF_INIT(f, f->next);
1701 if(flg == GF_DATA){
1702 register unsigned char c;
1703 register int state = f->f1;
1705 while(GF_GETC(f, c)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1710 switch(state){
1711 case DFL : /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f, c);
1713 break;
1715 case CCR : /* true line break? */
1716 state = DFL;
1717 if(c == '\012'){
1718 GF_PUTC(f->next, '\015');
1719 GF_PUTC(f->next, '\012');
1720 f->n = 0L;
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f, '\015');
1724 GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1726 break;
1728 case WSPACE:
1729 state = DFL;
1730 if(c == '\015' || f->t){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f, ' ');
1732 f->t = 0; /* reset From flag */
1734 else
1735 GF_8BIT_PUT(f, ' ');
1737 GF_8BIT_DEFAULT(f, c); /* handle 'c' in the default way */
1738 break;
1742 f->f1 = state;
1743 GF_END(f, f->next);
1745 else if(flg == GF_EOD){
1746 switch(f->f1){
1747 case CCR :
1748 GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 break;
1751 case WSPACE :
1752 GF_8BIT_PUT_QUOTE(f, ' '); /* write the last space */
1753 break;
1756 (void) GF_FLUSH(f->next);
1757 (*f->next->f)(f->next, GF_EOD);
1759 else if(flg == GF_RESET){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f->f1 = DFL; /* state from last character */
1762 f->f2 = 1; /* state of "^NFrom " bitmap */
1763 f->t = 0;
1764 f->n = 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1772 void
1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1775 static unsigned char *conv_table = NULL;
1776 GF_INIT(f, f->next);
1778 if(flg == GF_DATA){
1779 register unsigned char c;
1781 while(GF_GETC(f, c)){
1782 GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1785 GF_END(f, f->next);
1787 else if(flg == GF_EOD){
1788 (void) GF_FLUSH(f->next);
1789 (*f->next->f)(f->next, GF_EOD);
1791 else if(flg == GF_RESET){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1799 typedef struct _utf8c_s {
1800 void *conv_table;
1801 int report_err;
1802 } UTF8C_S;
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1810 void
1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1813 static unsigned short *conv_table = NULL;
1814 static int report_err = 0;
1815 register int more = f->f2;
1816 register long u = f->n;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f, f->next);
1826 if(flg == GF_DATA){
1827 register unsigned char c;
1829 while(GF_GETC(f, c)){
1830 if(!conv_table){ /* can't do much if no conversion table */
1831 GF_PUTC(f->next, c);
1833 /* UTF-8 continuation? */
1834 else if((c > 0x7f) && (c < 0xc0)){
1835 if(more){
1836 u <<= 6; /* shift current value by 6 bits */
1837 u |= c & 0x3f;
1838 if (!--more){ /* last octet? */
1839 if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1845 c = '?';
1846 if(report_err){
1847 if(f->opt)
1848 fs_give((void **) &f->opt);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1854 /* NO RETURN */
1857 else{
1858 if(u > 0xff){
1859 c = (unsigned char) (u >> 8);
1860 GF_PUTC(f->next, c);
1863 c = (unsigned char) u & 0xff;
1866 GF_PUTC(f->next, c);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f->next, '?');
1873 else{
1874 if(more){ /* incomplete UTF-8 character */
1875 GF_PUTC(f->next, '?');
1876 more = 0;
1878 if(c < 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f->next, c);
1881 else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 u = c & 0x1f; /* first 5 bits of 12 */
1883 more = 1;
1885 else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 u = c & 0x0f; /* first 4 bits of 16 */
1887 more = 2;
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 u = c & 0x07; /* first 3 bits of 20.5 */
1892 more = 3;
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u = c & 0x03; /* first 2 bits of 26 */
1897 more = 4;
1899 else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u = c & 0x03; /* first 2 bits of 26 */
1901 more = 5;
1903 #endif
1904 else{ /* not in Unicode */
1905 GF_PUTC(f->next, '?');
1910 f->f2 = more;
1911 f->n = u;
1912 GF_END(f, f->next);
1914 else if(flg == GF_EOD){
1915 (void) GF_FLUSH(f->next);
1916 if(f->opt)
1917 fs_give((void **) &f->opt);
1918 (*f->next->f)(f->next, GF_EOD);
1920 else if(flg == GF_RESET){
1921 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1922 conv_table = ((UTF8C_S *) f->opt)->conv_table;
1923 report_err = ((UTF8C_S *) f->opt)->report_err;
1924 f->f2 = 0;
1925 f->n = 0L;
1930 void *
1931 gf_convert_utf8_charset_opt(void *table, int report_err)
1933 UTF8C_S *utf8c;
1935 utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1936 utf8c->conv_table = table;
1937 utf8c->report_err = report_err;
1938 return((void *) utf8c);
1943 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1945 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1946 * or to Shift-JIS (if PC-Pine).
1948 void
1949 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1951 register unsigned char c;
1952 register int state = f->f1;
1955 * f->t lit means we're in middle of decoding a sequence of characters.
1956 * f->f2 keeps track of first character of pair for Shift-JIS.
1957 * f->f1 is the state.
1960 GF_INIT(f, f->next);
1962 if(flg == GF_DATA){
1963 while(GF_GETC(f, c)){
1964 switch(state){
1965 case ESC: /* saw ESC */
1966 if(!f->t && c == '$')
1967 state = ESCDOL;
1968 else if(f->t && c == '(')
1969 state = ESCPAR;
1970 else{
1971 GF_PUTC(f->next, '\033');
1972 GF_PUTC(f->next, c);
1973 state = DFL;
1976 break;
1978 case ESCDOL: /* saw ESC $ */
1979 if(c == 'B' || c == '@'){
1980 state = EUC;
1981 f->t = 1; /* filtering into euc */
1982 f->f2 = -1; /* first character of pair */
1984 else{
1985 GF_PUTC(f->next, '\033');
1986 GF_PUTC(f->next, '$');
1987 GF_PUTC(f->next, c);
1988 state = DFL;
1991 break;
1993 case ESCPAR: /* saw ESC ( */
1994 if(c == 'B' || c == 'J' || c == 'H'){
1995 state = DFL;
1996 f->t = 0; /* done filtering */
1998 else{
1999 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2000 GF_PUTC(f->next, '('); /* escape sequences, which */
2001 GF_PUTC(f->next, c); /* this appears to be. */
2004 break;
2006 case EUC: /* filtering into euc */
2007 if(c == '\033')
2008 state = ESC;
2009 else{
2010 #ifdef _WINDOWS /* Shift-JIS */
2011 c &= 0x7f; /* 8-bit can't win */
2012 if (f->f2 >= 0){ /* second of a pair? */
2013 int rowOffset = (f->f2 < 95) ? 112 : 176;
2014 int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2015 : 126;
2017 GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2018 GF_PUTC(f->next, c + cellOffset);
2019 f->f2 = -1; /* restart */
2021 else if(c > 0x20 && c < 0x7f)
2022 f->f2 = c; /* first of pair */
2023 else{
2024 GF_PUTC(f->next, c); /* write CTL as itself */
2025 f->f2 = -1;
2027 #else /* EUC */
2028 GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2029 #endif
2032 break;
2034 case DFL:
2035 default:
2036 if(c == '\033')
2037 state = ESC;
2038 else
2039 GF_PUTC(f->next, c);
2041 break;
2045 f->f1 = state;
2046 GF_END(f, f->next);
2048 else if(flg == GF_EOD){
2049 switch(state){
2050 case ESC:
2051 GF_PUTC(f->next, '\033');
2052 break;
2054 case ESCDOL:
2055 GF_PUTC(f->next, '\033');
2056 GF_PUTC(f->next, '$');
2057 break;
2059 case ESCPAR:
2060 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2061 GF_PUTC(f->next, '('); /* escape sequences. */
2062 break;
2065 (void) GF_FLUSH(f->next);
2066 (*f->next->f)(f->next, GF_EOD);
2068 else if(flg == GF_RESET){
2069 dprint((9, "-- gf_reset jp_to_euc\n"));
2070 f->f1 = DFL; /* state */
2071 f->t = 0; /* not translating to euc */
2077 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2079 void
2080 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2082 #ifdef _WINDOWS
2083 gf_sjis_to_2022_jp(f, flg);
2084 #else
2085 gf_euc_to_2022_jp(f, flg);
2086 #endif
2090 void
2091 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2093 register unsigned char c;
2096 * f->t lit means we've sent the start esc seq but not the end seq.
2097 * f->f2 keeps track of first character of pair for Shift-JIS.
2100 GF_INIT(f, f->next);
2102 if(flg == GF_DATA){
2103 while(GF_GETC(f, c)){
2104 if(f->t){
2105 if(c & 0x80){
2106 GF_PUTC(f->next, c & 0x7f);
2108 else{
2109 GF_PUTC(f->next, '\033');
2110 GF_PUTC(f->next, '(');
2111 GF_PUTC(f->next, 'B');
2112 GF_PUTC(f->next, c);
2113 f->f2 = -1;
2114 f->t = 0;
2117 else{
2118 if(c & 0x80){
2119 GF_PUTC(f->next, '\033');
2120 GF_PUTC(f->next, '$');
2121 GF_PUTC(f->next, 'B');
2122 GF_PUTC(f->next, c & 0x7f);
2123 f->t = 1;
2125 else{
2126 GF_PUTC(f->next, c);
2131 GF_END(f, f->next);
2133 else if(flg == GF_EOD){
2134 if(f->t){
2135 GF_PUTC(f->next, '\033');
2136 GF_PUTC(f->next, '(');
2137 GF_PUTC(f->next, 'B');
2138 f->t = 0;
2139 f->f2 = -1;
2142 (void) GF_FLUSH(f->next);
2143 (*f->next->f)(f->next, GF_EOD);
2145 else if(flg == GF_RESET){
2146 dprint((9, "-- gf_reset euc_to_jp\n"));
2147 f->t = 0;
2148 f->f2 = -1;
2152 void
2153 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2155 register unsigned char c;
2158 * f->t lit means we've sent the start esc seq but not the end seq.
2159 * f->f2 keeps track of first character of pair for Shift-JIS.
2162 GF_INIT(f, f->next);
2164 if(flg == GF_DATA){
2165 while(GF_GETC(f, c)){
2166 if(f->t){
2167 if(f->f2 >= 0){ /* second of a pair? */
2168 int adjust = c < 159;
2169 int rowOffset = f->f2 < 160 ? 112 : 176;
2170 int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2172 GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2173 GF_PUTC(f->next, c - cellOffset);
2174 f->f2 = -1;
2176 else if(c & 0x80){
2177 f->f2 = c; /* remember first of pair */
2179 else{
2180 GF_PUTC(f->next, '\033');
2181 GF_PUTC(f->next, '(');
2182 GF_PUTC(f->next, 'B');
2183 GF_PUTC(f->next, c);
2184 f->f2 = -1;
2185 f->t = 0;
2188 else{
2189 if(c & 0x80){
2190 GF_PUTC(f->next, '\033');
2191 GF_PUTC(f->next, '$');
2192 GF_PUTC(f->next, 'B');
2193 f->f2 = c;
2194 f->t = 1;
2196 else{
2197 GF_PUTC(f->next, c);
2202 GF_END(f, f->next);
2204 else if(flg == GF_EOD){
2205 if(f->t){
2206 GF_PUTC(f->next, '\033');
2207 GF_PUTC(f->next, '(');
2208 GF_PUTC(f->next, 'B');
2209 f->t = 0;
2210 f->f2 = -1;
2213 (void) GF_FLUSH(f->next);
2214 (*f->next->f)(f->next, GF_EOD);
2216 else if(flg == GF_RESET){
2217 dprint((9, "-- gf_reset sjis_to_jp\n"));
2218 f->t = 0;
2219 f->f2 = -1;
2226 * Various charset to UTF-8 Translation filter
2230 * utf8 conversion options
2232 typedef struct _utf8_s {
2233 CHARSET *charset;
2234 unsigned long ucsc;
2235 } UTF8_S;
2237 #define UTF8_BLOCK 1024
2238 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2239 #define UTF8_ADD(f, c) \
2241 if(p >= eobuf){ \
2242 f->f2 += UTF8_BLOCK; \
2243 fs_resize((void **)&f->line, \
2244 (size_t) f->f2 * sizeof(char)); \
2245 eobuf = UTF8_EOB(f); \
2246 p = eobuf - UTF8_BLOCK; \
2248 *p++ = c; \
2250 #define GF_UTF8_FLUSH(f) { \
2251 register long n; \
2252 SIZEDTEXT intext, outtext; \
2253 intext.data = (unsigned char *) f->line; \
2254 intext.size = p - f->line; \
2255 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2256 if(!((UTF8_S *) f->opt)->charset){ \
2257 for(n = 0; n < intext.size; n++) \
2258 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2260 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2261 for(n = 0; n < outtext.size; n++) \
2262 GF_PUTC(f->next, outtext.data[n]); \
2263 if(outtext.data && intext.data != outtext.data) \
2264 fs_give((void **) &outtext.data); \
2266 else{ \
2267 for(n = 0; n < intext.size; n++) \
2268 GF_PUTC(f->next, '?'); \
2274 * gf_utf8 - text in specified charset to to UTF-8 filter
2275 * Process line-at-a-time rather than character
2276 * because ISO-2022-JP. Call utf8_text_cs by hand
2277 * rather than utf8_text to reduce the cost of
2278 * utf8_charset() for each line.
2280 void
2281 gf_utf8(FILTER_S *f, int flg)
2283 register char *p = f->linep;
2284 register char *eobuf = UTF8_EOB(f);
2285 GF_INIT(f, f->next);
2287 if(flg == GF_DATA){
2288 register int state = f->f1;
2289 register unsigned char c;
2291 while(GF_GETC(f, c)){
2293 switch(state){
2294 case CCR :
2295 state = DFL;
2296 if(c == '\012'){
2297 GF_UTF8_FLUSH(f);
2298 p = f->line;
2299 GF_PUTC(f->next, '\015');
2300 GF_PUTC(f->next, '\012');
2302 else{
2303 UTF8_ADD(f, '\015');
2304 UTF8_ADD(f, c);
2307 break;
2309 default :
2310 if(c == '\015'){
2311 state = CCR;
2313 else
2314 UTF8_ADD(f, c);
2318 f->f1 = state;
2319 GF_END(f, f->next);
2321 else if(flg == GF_EOD){
2323 if(p != f->line)
2324 GF_UTF8_FLUSH(f);
2326 fs_give((void **) &f->line);
2327 fs_give((void **) &f->opt);
2328 (void) GF_FLUSH(f->next);
2329 (*f->next->f)(f->next, GF_EOD);
2331 else if(GF_RESET){
2332 dprint((9, "-- gf_reset utf8\n"));
2333 f->f1 = DFL;
2334 f->f2 = UTF8_BLOCK; /* input buffer length */
2335 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2338 f->linep = p;
2342 void *
2343 gf_utf8_opt(char *charset)
2345 UTF8_S *utf8;
2347 utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2349 utf8->charset = (CHARSET *) utf8_charset(charset);
2352 * When we get 8-bit non-ascii characters but it is supposed to
2353 * be ascii we want it to turn into question marks, not
2354 * just behave as if it is UTF-8 which is what happens
2355 * with ascii because there is no translation table.
2356 * So we need to catch the ascii special case here.
2358 if(utf8->charset && utf8->charset->type == CT_ASCII)
2359 utf8->charset = NULL;
2361 return((void *) utf8);
2366 * RICHTEXT-TO-PLAINTEXT filter
2370 * option to be used by rich2plain (NOTE: if this filter is ever
2371 * used more than once in a pipe, all instances will have the same
2372 * option value)
2376 /*----------------------------------------------------------------------
2377 richtext to plaintext filter
2379 Args: f --
2380 flg --
2382 This basically removes all richtext formatting. A cute hack is used
2383 to get bold and underlining to work.
2384 Further work could be done to handle things like centering and right
2385 and left flush, but then it could no longer be done in place. This
2386 operates on text *with* CRLF's.
2388 WARNING: does not wrap lines!
2389 ----*/
2390 void
2391 gf_rich2plain(FILTER_S *f, int flg)
2393 static int rich_bold_on = 0, rich_uline_on = 0;
2395 /* BUG: qoute incoming \255 values */
2396 GF_INIT(f, f->next);
2398 if(flg == GF_DATA){
2399 register unsigned char c;
2400 register int state = f->f1;
2401 register int plain;
2403 plain = f->opt ? (*(int *) f->opt) : 0;
2405 while(GF_GETC(f, c)){
2407 switch(state){
2408 case TOKEN : /* collect a richtext token */
2409 if(c == '>'){ /* what should we do with it? */
2410 state = DFL; /* return to default next time */
2411 *(f->linep) = '\0'; /* cap off token */
2412 if(f->line[0] == 'l' && f->line[1] == 't'){
2413 GF_PUTC(f->next, '<'); /* literal '<' */
2415 else if(f->line[0] == 'n' && f->line[1] == 'l'){
2416 GF_PUTC(f->next, '\015');/* newline! */
2417 GF_PUTC(f->next, '\012');
2419 else if(!strcmp("comment", f->line)){
2420 (f->f2)++;
2422 else if(!strcmp("/comment", f->line)){
2423 f->f2 = 0;
2425 else if(!strcmp("/paragraph", f->line)) {
2426 GF_PUTC(f->next, '\r');
2427 GF_PUTC(f->next, '\n');
2428 GF_PUTC(f->next, '\r');
2429 GF_PUTC(f->next, '\n');
2431 else if(!plain /* gf_rich_plain */){
2432 if(!strcmp(f->line, "bold")) {
2433 GF_PUTC(f->next, TAG_EMBED);
2434 GF_PUTC(f->next, TAG_BOLDON);
2435 rich_bold_on = 1;
2436 } else if(!strcmp(f->line, "/bold")) {
2437 GF_PUTC(f->next, TAG_EMBED);
2438 GF_PUTC(f->next, TAG_BOLDOFF);
2439 rich_bold_on = 0;
2440 } else if(!strcmp(f->line, "italic")) {
2441 GF_PUTC(f->next, TAG_EMBED);
2442 GF_PUTC(f->next, TAG_ULINEON);
2443 rich_uline_on = 1;
2444 } else if(!strcmp(f->line, "/italic")) {
2445 GF_PUTC(f->next, TAG_EMBED);
2446 GF_PUTC(f->next, TAG_ULINEOFF);
2447 rich_uline_on = 0;
2448 } else if(!strcmp(f->line, "underline")) {
2449 GF_PUTC(f->next, TAG_EMBED);
2450 GF_PUTC(f->next, TAG_ULINEON);
2451 rich_uline_on = 1;
2452 } else if(!strcmp(f->line, "/underline")) {
2453 GF_PUTC(f->next, TAG_EMBED);
2454 GF_PUTC(f->next, TAG_ULINEOFF);
2455 rich_uline_on = 0;
2458 /* else we just ignore the token! */
2460 f->linep = f->line; /* reset token buffer */
2462 else{ /* add char to token */
2463 if(f->linep - f->line > 40){
2464 /* What? rfc1341 says 40 char tokens MAX! */
2465 fs_give((void **)&(f->line));
2466 gf_error("Richtext token over 40 characters");
2467 /* NO RETURN */
2470 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2472 break;
2474 case CCR :
2475 state = DFL; /* back to default next time */
2476 if(c == '\012'){ /* treat as single space? */
2477 GF_PUTC(f->next, ' ');
2478 break;
2480 /* fall thru to process c */
2482 case DFL :
2483 default:
2484 if(c == '<')
2485 state = TOKEN;
2486 else if(c == '\015')
2487 state = CCR;
2488 else if(!f->f2) /* not in comment! */
2489 GF_PUTC(f->next, c);
2491 break;
2495 f->f1 = state;
2496 GF_END(f, f->next);
2498 else if(flg == GF_EOD){
2499 if((f->f1 = (f->linep != f->line)) != 0){
2500 /* incomplete token!! */
2501 gf_error("Incomplete token in richtext");
2502 /* NO RETURN */
2505 if(rich_uline_on){
2506 GF_PUTC(f->next, TAG_EMBED);
2507 GF_PUTC(f->next, TAG_ULINEOFF);
2508 rich_uline_on = 0;
2510 if(rich_bold_on){
2511 GF_PUTC(f->next, TAG_EMBED);
2512 GF_PUTC(f->next, TAG_BOLDOFF);
2513 rich_bold_on = 0;
2516 fs_give((void **)&(f->line));
2517 (void) GF_FLUSH(f->next);
2518 (*f->next->f)(f->next, GF_EOD);
2520 else if(flg == GF_RESET){
2521 dprint((9, "-- gf_reset rich2plain\n"));
2522 f->f1 = DFL; /* state */
2523 f->f2 = 0; /* set means we're in a comment */
2524 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2530 * function called from the outside to set
2531 * richtext filter's options
2533 void *
2534 gf_rich2plain_opt(int *plain)
2536 return((void *) plain);
2542 * ENRICHED-TO-PLAIN text filter
2545 #define TEF_QUELL 0x01
2546 #define TEF_NOFILL 0x02
2550 /*----------------------------------------------------------------------
2551 enriched text to plain text filter (ala rfc1523)
2553 Args: f -- state and input data
2554 flg --
2556 This basically removes all enriched formatting. A cute hack is used
2557 to get bold and underlining to work.
2559 Further work could be done to handle things like centering and right
2560 and left flush, but then it could no longer be done in place. This
2561 operates on text *with* CRLF's.
2563 WARNING: does not wrap lines!
2564 ----*/
2565 void
2566 gf_enriched2plain(FILTER_S *f, int flg)
2568 static int enr_uline_on = 0, enr_bold_on = 0;
2570 /* BUG: qoute incoming \255 values */
2571 GF_INIT(f, f->next);
2573 if(flg == GF_DATA){
2574 register unsigned char c;
2575 register int state = f->f1;
2576 register int plain;
2578 plain = f->opt ? (*(int *) f->opt) : 0;
2580 while(GF_GETC(f, c)){
2582 switch(state){
2583 case TOKEN : /* collect a richtext token */
2584 if(c == '>'){ /* what should we do with it? */
2585 int off = *f->line == '/';
2586 char *token = f->line + (off ? 1 : 0);
2587 state = DFL;
2588 *f->linep = '\0';
2589 if(!strcmp("param", token)){
2590 if(off)
2591 f->f2 &= ~TEF_QUELL;
2592 else
2593 f->f2 |= TEF_QUELL;
2595 else if(!strcmp("nofill", token)){
2596 if(off)
2597 f->f2 &= ~TEF_NOFILL;
2598 else
2599 f->f2 |= TEF_NOFILL;
2601 else if(!plain /* gf_enriched_plain */){
2602 /* Following is a cute hack or two to get
2603 bold and underline on the screen.
2604 See Putline0n() where these codes are
2605 interpreted */
2606 if(!strcmp("bold", token)) {
2607 GF_PUTC(f->next, TAG_EMBED);
2608 GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2609 enr_bold_on = off ? 0 : 1;
2610 } else if(!strcmp("italic", token)) {
2611 GF_PUTC(f->next, TAG_EMBED);
2612 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2613 enr_uline_on = off ? 0 : 1;
2614 } else if(!strcmp("underline", token)) {
2615 GF_PUTC(f->next, TAG_EMBED);
2616 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2617 enr_uline_on = off ? 0 : 1;
2620 /* else we just ignore the token! */
2622 f->linep = f->line; /* reset token buffer */
2624 else if(c == '<'){ /* literal '<'? */
2625 if(f->linep == f->line){
2626 GF_PUTC(f->next, '<');
2627 state = DFL;
2629 else{
2630 fs_give((void **)&(f->line));
2631 gf_error("Malformed Enriched text: unexpected '<'");
2632 /* NO RETURN */
2635 else{ /* add char to token */
2636 if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2637 fs_give((void **)&(f->line));
2638 gf_error("Malformed Enriched text: token too long");
2639 /* NO RETURN */
2642 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2644 break;
2646 case CCR :
2647 if(c != '\012'){ /* treat as single space? */
2648 state = DFL; /* lone cr? */
2649 f->f2 &= ~TEF_QUELL;
2650 GF_PUTC(f->next, '\015');
2651 goto df;
2654 state = CLF;
2655 break;
2657 case CLF :
2658 if(c == '\015'){ /* treat as single space? */
2659 state = CCR; /* repeat crlf's mean real newlines */
2660 f->f2 |= TEF_QUELL;
2661 GF_PUTC(f->next, '\r');
2662 GF_PUTC(f->next, '\n');
2663 break;
2665 else{
2666 state = DFL;
2667 if(!((f->f2) & TEF_QUELL))
2668 GF_PUTC(f->next, ' ');
2670 f->f2 &= ~TEF_QUELL;
2673 /* fall thru to take care of 'c' */
2675 case DFL :
2676 default :
2677 df :
2678 if(c == '<')
2679 state = TOKEN;
2680 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2681 state = CCR;
2682 else if(!((f->f2) & TEF_QUELL))
2683 GF_PUTC(f->next, c);
2685 break;
2689 f->f1 = state;
2690 GF_END(f, f->next);
2692 else if(flg == GF_EOD){
2693 if((f->f1 = (f->linep != f->line)) != 0){
2694 /* incomplete token!! */
2695 gf_error("Incomplete token in richtext");
2696 /* NO RETURN */
2698 if(enr_uline_on){
2699 GF_PUTC(f->next, TAG_EMBED);
2700 GF_PUTC(f->next, TAG_ULINEOFF);
2701 enr_uline_on = 0;
2703 if(enr_bold_on){
2704 GF_PUTC(f->next, TAG_EMBED);
2705 GF_PUTC(f->next, TAG_BOLDOFF);
2706 enr_bold_on = 0;
2709 /* Make sure we end with a newline so everything gets flushed */
2710 GF_PUTC(f->next, '\015');
2711 GF_PUTC(f->next, '\012');
2713 fs_give((void **)&(f->line));
2715 (void) GF_FLUSH(f->next);
2716 (*f->next->f)(f->next, GF_EOD);
2718 else if(flg == GF_RESET){
2719 dprint((9, "-- gf_reset enriched2plain\n"));
2720 f->f1 = DFL; /* state */
2721 f->f2 = 0; /* set means we're in a comment */
2722 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2728 * function called from the outside to set
2729 * richtext filter's options
2731 void *
2732 gf_enriched2plain_opt(int *plain)
2734 return((void *) plain);
2740 * HTML-TO-PLAIN text filter
2744 /* OK, here's the plan:
2746 * a universal output function handles writing chars and worries
2747 * about wrapping.
2749 * a unversal element collector reads chars and collects params
2750 * and dispatches the appropriate element handler.
2752 * element handlers are stacked. The most recently dispatched gets
2753 * first crack at the incoming character stream. It passes bytes it's
2754 * done with or not interested in to the next
2756 * installs that handler as the current one collecting data...
2758 * stacked handlers take their params from the element collector and
2759 * accept chars or do whatever they need to do. Sort of a vertical
2760 * piping? recursion-like? hmmm.
2762 * at least I think this is how it'll work. tres simple, non?
2768 * Some important constants
2770 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2771 #define MAX_ENTITY 20 /* maximum length of an entity */
2772 #define MAX_ELEMENT 72 /* maximum length of an element */
2773 #define HTML_MOREDATA 0 /* expect more entity data */
2774 #define HTML_ENTITY 1 /* valid entity collected */
2775 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2776 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2777 #define HTML_LITERAL 0x0400 /* Literal character value */
2778 #define HTML_NEWLINE 0x010A /* hard newline */
2779 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2780 #define HTML_ID_GET 0 /* indent func: return current val */
2781 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2782 #define HTML_ID_INC 2 /* indent func: increment by val */
2783 #define HTML_HX_CENTER 0x0001
2784 #define HTML_HX_ULINE 0x0002
2785 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2788 /* types of lists that we will support */
2789 #define LIST_DECIMAL (long) 0
2790 #define LIST_ALPHALO (long) 1
2791 #define LIST_ALPHAUP (long) 2
2792 #define LIST_ROMANLO (long) 3
2793 #define LIST_ROMANUP (long) 4
2794 #define LIST_UNKNOWN (long) 10
2797 * Handler data, state information including function that uses it
2799 typedef struct handler_s {
2800 FILTER_S *html_data;
2801 void *element;
2802 long x, y, z;
2803 void *dp;
2804 unsigned char *s;
2805 struct handler_s *below;
2806 } HANDLER_S;
2809 * Element Property structure
2811 typedef struct _element_properties {
2812 char *element;
2813 size_t len;
2814 int (*handler)(HANDLER_S *, int, int);
2815 unsigned blocklevel:1;
2816 unsigned alternate:1;
2817 } ELPROP_S;
2820 * Types used to manage HTML parsing
2822 static void html_handoff(HANDLER_S *, int);
2826 * to help manage line wrapping.
2828 typedef struct _wrap_line {
2829 char *buf; /* buf to collect wrapped text */
2830 int used, /* number of chars in buf */
2831 width, /* text's width as displayed */
2832 len; /* length of allocated buf */
2833 } WRAPLINE_S;
2837 * to help manage centered text
2839 typedef struct _center_s {
2840 WRAPLINE_S line; /* buf to assembled centered text */
2841 WRAPLINE_S word; /* word being to append to Line */
2842 int anchor;
2843 short space;
2844 } CENTER_S;
2848 * Collector data and state information
2850 typedef struct collector_s {
2851 char buf[HTML_BUF_LEN]; /* buffer to collect data */
2852 int len; /* length of that buffer */
2853 unsigned end_tag:1; /* collecting a closing tag */
2854 unsigned hit_equal:1; /* collecting right half of attrib */
2855 unsigned mkup_decl:1; /* markup declaration */
2856 unsigned start_comment:1; /* markup declaration comment */
2857 unsigned end_comment:1; /* legit comment format */
2858 unsigned hyphen:1; /* markup hyphen read */
2859 unsigned badform:1; /* malformed markup element */
2860 unsigned overrun:1; /* Overran buf above */
2861 unsigned proc_inst:1; /* XML processing instructions */
2862 unsigned empty:1; /* empty element */
2863 unsigned was_quoted:1; /* basically to catch null string */
2864 char quoted; /* quoted element param value */
2865 char *element; /* element's collected name */
2866 PARAMETER *attribs; /* element's collected attributes */
2867 PARAMETER *cur_attrib; /* attribute now being collected */
2868 } CLCTR_S;
2872 * State information for all element handlers
2874 typedef struct html_data {
2875 HANDLER_S *h_stack; /* handler list */
2876 CLCTR_S *el_data; /* element collector data */
2877 CENTER_S *centered; /* struct to manage centered text */
2878 int (*token)(FILTER_S *, int);
2879 char quoted; /* quoted, by either ' or ", text */
2880 short indent_level; /* levels of indention */
2881 int in_anchor; /* text now being written to anchor */
2882 int blanks; /* Consecutive blank line count */
2883 int wrapcol; /* column to wrap lines on */
2884 int *prefix; /* buffer containing Anchor prefix */
2885 int prefix_used;
2886 long line_bufsize; /* current size of the line buffer */
2887 COLOR_PAIR *color;
2888 struct {
2889 int state; /* embedded data state */
2890 char *color; /* embedded color pointer */
2891 } embedded;
2892 CBUF_S cb; /* utf8->ucs4 conversion state */
2893 unsigned wrapstate:1; /* whether or not to wrap output */
2894 unsigned li_pending:1; /* <LI> next token expected */
2895 unsigned de_pending:1; /* <DT> or <DD> next token expected */
2896 unsigned bold_on:1; /* currently bolding text */
2897 unsigned uline_on:1; /* currently underlining text */
2898 unsigned center:1; /* center output text */
2899 unsigned bitbucket:1; /* Ignore input */
2900 unsigned head:1; /* In doc's HEAD */
2901 unsigned body:1; /* In doc's BODY */
2902 unsigned alt_entity:1; /* use alternative entity values */
2903 unsigned wrote:1; /* anything witten yet? */
2904 } HTML_DATA_S;
2908 * HTML filter options
2910 typedef struct _html_opts {
2911 char *base; /* Base URL for this html file */
2912 int columns, /* Display columns (excluding margins) */
2913 indent; /* Left margin */
2914 HANDLE_S **handlesp; /* Head of handles */
2915 htmlrisk_t warnrisk_f; /* Nasty link warning call */
2916 ELPROP_S *element_table; /* markup element table */
2917 RSS_FEED_S **feedp; /* hook for RSS feed response */
2918 unsigned strip:1; /* Hilite TAGs allowed */
2919 unsigned handles_loc:1; /* Local handles requested? */
2920 unsigned showserver:1; /* Display server after anchors */
2921 unsigned outputted:1; /* any */
2922 unsigned no_relative_links:1; /* Disable embeded relative links */
2923 unsigned related_content:1; /* Embeded related content */
2924 unsigned html:1; /* Output content in HTML */
2925 unsigned html_imgs:1; /* Output IMG tags in HTML content */
2926 } HTML_OPT_S;
2931 * Some macros to make life a little easier
2933 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2934 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2935 #define HTML_WROTE(X) (HD(X)->wrote)
2936 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2937 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2938 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2939 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2940 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2941 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2942 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2943 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2944 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2945 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2946 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2947 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2948 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2949 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2950 #define HD(X) ((HTML_DATA_S *)(X)->data)
2951 #define ED(X) (HD(X)->el_data)
2952 #define EL(X) ((ELPROP_S *) (X)->element)
2953 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2954 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2955 #define NEW_CLCTR(X) { \
2956 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2957 memset(ED(X), 0, sizeof(CLCTR_S)); \
2958 HD(X)->token = html_element_collector; \
2961 #define FREE_CLCTR(X) { \
2962 if(ED(X)->attribs){ \
2963 PARAMETER *p; \
2964 while((p = ED(X)->attribs) != NULL){ \
2965 ED(X)->attribs = ED(X)->attribs->next; \
2966 if(p->attribute) \
2967 fs_give((void **)&p->attribute); \
2968 if(p->value) \
2969 fs_give((void **)&p->value); \
2970 fs_give((void **)&p); \
2973 if(ED(X)->element) \
2974 fs_give((void **) &ED(X)->element); \
2975 fs_give((void **) &ED(X)); \
2976 HD(X)->token = NULL; \
2978 #define HANDLERS(X) (HD(X)->h_stack)
2979 #define BOLD_BIT(X) (HD(X)->bold_on)
2980 #define ULINE_BIT(X) (HD(X)->uline_on)
2981 #define CENTER_BIT(X) (HD(X)->center)
2982 #define HTML_FLUSH(X) { \
2983 html_write(X, (X)->line, (X)->linep - (X)->line); \
2984 (X)->linep = (X)->line; \
2985 (X)->f2 = 0L; \
2987 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2988 if((S)){ \
2989 html_output((X), TAG_EMBED); \
2990 html_output((X), TAG_BOLDON); \
2992 else if(!(S)){ \
2993 html_output((X), TAG_EMBED); \
2994 html_output((X), TAG_BOLDOFF); \
2997 #define HTML_ULINE(X, S) \
2998 if(! STRIP(X)){ \
2999 if((S)){ \
3000 html_output((X), TAG_EMBED); \
3001 html_output((X), TAG_ULINEON); \
3003 else if(!(S)){ \
3004 html_output((X), TAG_EMBED); \
3005 html_output((X), TAG_ULINEOFF); \
3008 #define HTML_ITALIC(X, S) \
3009 if(! STRIP(X)){ \
3010 if(S){ \
3011 html_output((X), TAG_EMBED); \
3012 html_output((X), TAG_ITALICON); \
3014 else if(!(S)){ \
3015 html_output((X), TAG_EMBED); \
3016 html_output((X), TAG_ITALICOFF); \
3019 #define HTML_STRIKE(X, S) \
3020 if(! STRIP(X)){ \
3021 if(S){ \
3022 html_output((X), TAG_EMBED); \
3023 html_output((X), TAG_STRIKEON); \
3025 else if(!(S)){ \
3026 html_output((X), TAG_EMBED); \
3027 html_output((X), TAG_STRIKEOFF); \
3030 #define HTML_BIG(X, S) \
3031 if(! STRIP(X)){ \
3032 if(S){ \
3033 html_output((X), TAG_EMBED); \
3034 html_output((X), TAG_BIGON); \
3036 else if(!(S)){ \
3037 html_output((X), TAG_EMBED); \
3038 html_output((X), TAG_BIGOFF); \
3041 #define HTML_SMALL(X, S) \
3042 if(! STRIP(X)){ \
3043 if(S){ \
3044 html_output((X), TAG_EMBED); \
3045 html_output((X), TAG_SMALLON); \
3047 else if(!(S)){ \
3048 html_output((X), TAG_EMBED); \
3049 html_output((X), TAG_SMALLOFF); \
3052 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3053 ? (HD(f)->centered->line.width \
3054 + HD(f)->centered->word.width \
3055 + ((HD(f)->centered->line.width \
3056 && HD(f)->centered->word.width) \
3057 ? 1 : 0)) \
3058 : 0)
3059 #define HTML_DUMP_LIT(F, S, L) { \
3060 int i, c; \
3061 for(i = 0; i < (L); i++){ \
3062 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3063 ? (S)[i] \
3064 : MAKE_LITERAL((S)[i]); \
3065 HTML_TEXT(F, c); \
3068 #define HTML_PROC(F, C) { \
3069 if(HD(F)->token){ \
3070 int i; \
3071 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3072 if(i < 0){ \
3073 HTML_DUMP_LIT(F, "<", 1); \
3074 if(HD(F)->el_data->element){ \
3075 HTML_DUMP_LIT(F, \
3076 HD(F)->el_data->element, \
3077 strlen(HD(F)->el_data->element));\
3079 if(HD(F)->el_data->len){ \
3080 HTML_DUMP_LIT(F, \
3081 HD(F)->el_data->buf, \
3082 HD(F)->el_data->len); \
3084 HTML_TEXT(F, C); \
3086 FREE_CLCTR(F); \
3089 else if((C) == '<'){ \
3090 NEW_CLCTR(F); \
3092 else \
3093 HTML_TEXT(F, C); \
3095 #define HTML_LINEP_PUTC(F, C) { \
3096 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3097 size_t offset = (F)->linep - (F)->line; \
3098 fs_resize((void **) &(F)->line, \
3099 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3100 HD(F)->line_bufsize *= 2; \
3101 (F)->linep = &(F)->line[offset]; \
3103 *(F)->linep++ = (C); \
3105 #define HTML_TEXT(F, C) switch((F)->f1){ \
3106 case WSPACE : \
3107 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3108 break; \
3109 HTML_TEXT_OUT(F, ' '); \
3110 (F)->f1 = DFL;/* stop sending chars here */ \
3111 /* fall thru to process 'c' */ \
3112 case DFL: \
3113 if(HD(F)->bitbucket) \
3114 (F)->f1 = DFL; /* no op */ \
3115 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3116 (F)->f1 = WSPACE;/* coalesce white space */ \
3117 else HTML_TEXT_OUT(F, C); \
3118 break; \
3120 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3121 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3122 else \
3123 html_output(F, C);
3124 #ifdef DEBUG
3125 #define HTML_DEBUG_EL(S, D) { \
3126 dprint((5, "-- html %s: %s\n", \
3127 S ? S : "?", \
3128 (D)->element \
3129 ? (D)->element : "NULL")); \
3130 if(debug > 5){ \
3131 PARAMETER *p; \
3132 for(p = (D)->attribs; \
3133 p && p->attribute; \
3134 p = p->next) \
3135 dprint((6, \
3136 " PARM: %s%s%s\n", \
3137 p->attribute \
3138 ? p->attribute : "NULL",\
3139 p->value ? "=" : "", \
3140 p->value ? p->value : ""));\
3143 #else
3144 #define HTML_DEBUG_EL(S, D)
3145 #endif
3147 #ifndef SYSTEM_PINE_INFO_PATH
3148 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3149 #endif
3150 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3151 ? SYSTEM_PINE_INFO_PATH : S)
3154 * Protos for Tag handlers
3156 int html_head(HANDLER_S *, int, int);
3157 int html_base(HANDLER_S *, int, int);
3158 int html_title(HANDLER_S *, int, int);
3159 int html_body(HANDLER_S *, int, int);
3160 int html_a(HANDLER_S *, int, int);
3161 int html_br(HANDLER_S *, int, int);
3162 int html_hr(HANDLER_S *, int, int);
3163 int html_p(HANDLER_S *, int, int);
3164 int html_table(HANDLER_S *, int, int);
3165 int html_caption(HANDLER_S *, int, int);
3166 int html_tr(HANDLER_S *, int, int);
3167 int html_td(HANDLER_S *, int, int);
3168 int html_th(HANDLER_S *, int, int);
3169 int html_thead(HANDLER_S *, int, int);
3170 int html_tbody(HANDLER_S *, int, int);
3171 int html_tfoot(HANDLER_S *, int, int);
3172 int html_col(HANDLER_S *, int, int);
3173 int html_colgroup(HANDLER_S *, int, int);
3174 int html_b(HANDLER_S *, int, int);
3175 int html_u(HANDLER_S *, int, int);
3176 int html_i(HANDLER_S *, int, int);
3177 int html_em(HANDLER_S *, int, int);
3178 int html_strong(HANDLER_S *, int, int);
3179 int html_s(HANDLER_S *, int, int);
3180 int html_big(HANDLER_S *, int, int);
3181 int html_small(HANDLER_S *, int, int);
3182 int html_font(HANDLER_S *, int, int);
3183 int html_img(HANDLER_S *, int, int);
3184 int html_map(HANDLER_S *, int, int);
3185 int html_area(HANDLER_S *, int, int);
3186 int html_form(HANDLER_S *, int, int);
3187 int html_input(HANDLER_S *, int, int);
3188 int html_option(HANDLER_S *, int, int);
3189 int html_optgroup(HANDLER_S *, int, int);
3190 int html_button(HANDLER_S *, int, int);
3191 int html_select(HANDLER_S *, int, int);
3192 int html_textarea(HANDLER_S *, int, int);
3193 int html_label(HANDLER_S *, int, int);
3194 int html_fieldset(HANDLER_S *, int, int);
3195 int html_ul(HANDLER_S *, int, int);
3196 int html_ol(HANDLER_S *, int, int);
3197 int html_menu(HANDLER_S *, int, int);
3198 int html_dir(HANDLER_S *, int, int);
3199 int html_li(HANDLER_S *, int, int);
3200 int html_h1(HANDLER_S *, int, int);
3201 int html_h2(HANDLER_S *, int, int);
3202 int html_h3(HANDLER_S *, int, int);
3203 int html_h4(HANDLER_S *, int, int);
3204 int html_h5(HANDLER_S *, int, int);
3205 int html_h6(HANDLER_S *, int, int);
3206 int html_blockquote(HANDLER_S *, int, int);
3207 int html_address(HANDLER_S *, int, int);
3208 int html_pre(HANDLER_S *, int, int);
3209 int html_center(HANDLER_S *, int, int);
3210 int html_div(HANDLER_S *, int, int);
3211 int html_span(HANDLER_S *, int, int);
3212 int html_dl(HANDLER_S *, int, int);
3213 int html_dt(HANDLER_S *, int, int);
3214 int html_dd(HANDLER_S *, int, int);
3215 int html_script(HANDLER_S *, int, int);
3216 int html_applet(HANDLER_S *, int, int);
3217 int html_style(HANDLER_S *, int, int);
3218 int html_kbd(HANDLER_S *, int, int);
3219 int html_dfn(HANDLER_S *, int, int);
3220 int html_var(HANDLER_S *, int, int);
3221 int html_tt(HANDLER_S *, int, int);
3222 int html_samp(HANDLER_S *, int, int);
3223 int html_sub(HANDLER_S *, int, int);
3224 int html_sup(HANDLER_S *, int, int);
3225 int html_cite(HANDLER_S *, int, int);
3226 int html_code(HANDLER_S *, int, int);
3227 int html_ins(HANDLER_S *, int, int);
3228 int html_del(HANDLER_S *, int, int);
3229 int html_abbr(HANDLER_S *, int, int);
3232 * Protos for RSS 2.0 Tag handlers
3234 int rss_rss(HANDLER_S *, int, int);
3235 int rss_channel(HANDLER_S *, int, int);
3236 int rss_title(HANDLER_S *, int, int);
3237 int rss_image(HANDLER_S *, int, int);
3238 int rss_link(HANDLER_S *, int, int);
3239 int rss_description(HANDLER_S *, int, int);
3240 int rss_ttl(HANDLER_S *, int, int);
3241 int rss_item(HANDLER_S *, int, int);
3244 * Proto's for support routines
3246 void html_pop(FILTER_S *, ELPROP_S *);
3247 int html_push(FILTER_S *, ELPROP_S *);
3248 int html_element_collector(FILTER_S *, int);
3249 int html_element_flush(CLCTR_S *);
3250 void html_element_comment(FILTER_S *, char *);
3251 void html_element_output(FILTER_S *, int);
3252 int html_entity_collector(FILTER_S *, int, UCS *, char **);
3253 void html_a_prefix(FILTER_S *);
3254 void html_a_finish(HANDLER_S *);
3255 void html_a_output_prefix(FILTER_S *, int);
3256 void html_a_output_info(HANDLER_S *);
3257 void html_a_relative(char *, char *, HANDLE_S *);
3258 int html_href_relative(char *);
3259 int html_indent(FILTER_S *, int, int);
3260 void html_blank(FILTER_S *, int);
3261 void html_newline(FILTER_S *);
3262 void html_output(FILTER_S *, int);
3263 void html_output_string(FILTER_S *, char *);
3264 void html_output_raw_tag(FILTER_S *, char *);
3265 void html_output_normal(FILTER_S *, int, int, int);
3266 void html_output_flush(FILTER_S *);
3267 void html_output_centered(FILTER_S *, int, int, int);
3268 void html_centered_handle(int *, char *, int);
3269 void html_centered_putc(WRAPLINE_S *, int);
3270 void html_centered_flush(FILTER_S *);
3271 void html_centered_flush_line(FILTER_S *);
3272 void html_write_anchor(FILTER_S *, int);
3273 void html_write_newline(FILTER_S *);
3274 void html_write_indent(FILTER_S *, int);
3275 void html_write(FILTER_S *, char *, int);
3276 void html_putc(FILTER_S *, int);
3277 int html_event_attribute(char *);
3278 char *rss_skip_whitespace(char *s);
3279 ELPROP_S *element_properties(FILTER_S *, char *);
3283 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3284 * W3C doc "Additional named entities for HTML"
3286 static struct html_entities {
3287 char *name; /* entity name */
3288 UCS value; /* UCS entity value */
3289 char *plain; /* US-ASCII representation */
3290 } entity_tab[] = {
3291 {"quot", 0x0022}, /* 34 - quotation mark */
3292 {"amp", 0x0026}, /* 38 - ampersand */
3293 {"apos", 0x0027}, /* 39 - apostrophe */
3294 {"lt", 0x003C}, /* 60 - less-than sign */
3295 {"gt", 0x003E}, /* 62 - greater-than sign */
3296 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3297 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3298 {"cent", 0x00A2}, /* 162 - cent sign */
3299 {"pound", 0x00A3}, /* 163 - pound sign */
3300 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3301 {"yen", 0x00A5}, /* 165 - yen sign */
3302 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3303 {"sect", 0x00A7}, /* 167 - section sign */
3304 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3305 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3306 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3307 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3308 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3309 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3310 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3311 {"macr", 0x00AF}, /* 175 - macron */
3312 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3313 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3314 {"sup2", 0x00B2}, /* 178 - superscript two */
3315 {"sup3", 0x00B3}, /* 179 - superscript three */
3316 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3317 {"micro", 0x00B5}, /* 181 - micro sign */
3318 {"para", 0x00B6}, /* 182 - pilcrow sign */
3319 {"middot", 0x00B7}, /* 183 - middle dot */
3320 {"cedil", 0x00B8}, /* 184 - cedilla */
3321 {"sup1", 0x00B9}, /* 185 - superscript one */
3322 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3323 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3324 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3325 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3326 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3327 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3328 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3329 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3330 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3331 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3332 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3333 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3334 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3335 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3336 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3337 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3338 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3339 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3340 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3341 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3342 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3343 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3344 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3345 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3346 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3347 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3348 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3349 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3350 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3351 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3352 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3353 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3354 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3355 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3356 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3357 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3358 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3359 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3360 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3361 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3362 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3363 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3364 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3365 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3366 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3367 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3368 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3369 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3370 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3371 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3372 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3373 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3374 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3375 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3376 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3377 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3378 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3379 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3380 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3381 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3382 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3383 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3384 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3385 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3386 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3387 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3388 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3389 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3390 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3391 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3392 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3393 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3394 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3395 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3396 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3397 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3398 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3399 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3400 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3401 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3402 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3403 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3404 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3405 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3406 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3407 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3408 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3409 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3410 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3411 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3412 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3413 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3414 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3415 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3416 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3417 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3418 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3419 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3420 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3421 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3422 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3423 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3424 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3425 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3426 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3427 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3428 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3429 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3430 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3431 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3432 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3433 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3434 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3435 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3436 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3437 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3438 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3439 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3440 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3441 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3442 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3443 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3444 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3445 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3446 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3447 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3448 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3449 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3450 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3451 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3452 {"ensp", 0x2002}, /* 8194 - en space */
3453 {"emsp", 0x2003}, /* 8195 - em space */
3454 {"thinsp", 0x2009}, /* 8201 - thin space */
3455 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3456 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3457 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3458 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3459 {"ndash", 0x2013}, /* 8211 - en dash */
3460 {"mdash", 0x2014}, /* 8212 - em dash */
3461 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3462 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3463 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3464 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3465 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3466 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3467 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3468 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3469 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3470 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3471 {"dagger", 0x2020}, /* 8224 - dagger */
3472 {"Dagger", 0x2021}, /* 8225 - double dagger */
3473 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3474 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3475 {"permil", 0x2030}, /* 8240 - per mille sign */
3476 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3477 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3478 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3479 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3480 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3481 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3482 {"oline", 0x203E, "-"}, /* 8254 - overline */
3483 {"frasl", 0x2044}, /* 8260 - fraction slash */
3484 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3485 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3486 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3487 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3488 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3489 {"image", 0x2111}, /* 8465 - black-letter capital i */
3490 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3491 {"real", 0x211C}, /* 8476 - black-letter capital r */
3492 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3493 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3494 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3495 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3496 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3497 {"darr", 0x2193}, /* 8595 - downwards arrow */
3498 {"harr", 0x2194}, /* 8596 - left right arrow */
3499 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3500 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3501 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3502 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3503 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3504 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3505 {"forall", 0x2200}, /* 8704 - for all */
3506 {"part", 0x2202}, /* 8706 - partial differential */
3507 {"exist", 0x2203}, /* 8707 - there exists */
3508 {"empty", 0x2205}, /* 8709 - empty set */
3509 {"nabla", 0x2207}, /* 8711 - nabla */
3510 {"isin", 0x2208}, /* 8712 - element of */
3511 {"notin", 0x2209}, /* 8713 - not an element of */
3512 {"ni", 0x220B}, /* 8715 - contains as member */
3513 {"prod", 0x220F}, /* 8719 - n-ary product */
3514 {"sum", 0x2211}, /* 8721 - n-ary summation */
3515 {"minus", 0x2212}, /* 8722 - minus sign */
3516 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3517 {"radic", 0x221A}, /* 8730 - square root */
3518 {"prop", 0x221D}, /* 8733 - proportional to */
3519 {"infin", 0x221E}, /* 8734 - infinity */
3520 {"ang", 0x2220}, /* 8736 - angle */
3521 {"and", 0x2227}, /* 8743 - logical and */
3522 {"or", 0x2228}, /* 8744 - logical or */
3523 {"cap", 0x2229}, /* 8745 - intersection */
3524 {"cup", 0x222A}, /* 8746 - union */
3525 {"int", 0x222B}, /* 8747 - integral */
3526 {"there4", 0x2234}, /* 8756 - therefore */
3527 {"sim", 0x223C}, /* 8764 - tilde operator */
3528 {"cong", 0x2245}, /* 8773 - congruent to */
3529 {"asymp", 0x2248}, /* 8776 - almost equal to */
3530 {"ne", 0x2260}, /* 8800 - not equal to */
3531 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3532 {"le", 0x2264}, /* 8804 - less-than or equal to */
3533 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3534 {"sub", 0x2282}, /* 8834 - subset of */
3535 {"sup", 0x2283}, /* 8835 - superset of */
3536 {"nsub", 0x2284}, /* 8836 - not a subset of */
3537 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3538 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3539 {"oplus", 0x2295}, /* 8853 - circled plus */
3540 {"otimes", 0x2297}, /* 8855 - circled times */
3541 {"perp", 0x22A5}, /* 8869 - up tack */
3542 {"sdot", 0x22C5}, /* 8901 - dot operator */
3543 {"lceil", 0x2308}, /* 8968 - left ceiling */
3544 {"rceil", 0x2309}, /* 8969 - right ceiling */
3545 {"lfloor", 0x230A}, /* 8970 - left floor */
3546 {"rfloor", 0x230B}, /* 8971 - right floor */
3547 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3548 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3549 {"loz", 0x25CA}, /* 9674 - lozenge */
3550 {"spades", 0x2660}, /* 9824 - black spade suit */
3551 {"clubs", 0x2663}, /* 9827 - black club suit */
3552 {"hearts", 0x2665}, /* 9829 - black heart suit */
3553 {"diams", 0x2666} /* 9830 - black diamond suit */
3558 * Table of supported elements and corresponding handlers
3560 static ELPROP_S html_element_table[] = {
3561 {"HTML", 4}, /* HTML ignore if seen? */
3562 {"HEAD", 4, html_head}, /* slurp until <BODY> ? */
3563 {"TITLE", 5, html_title}, /* Document Title */
3564 {"BASE", 4, html_base}, /* HREF base */
3565 {"BODY", 4, html_body}, /* HTML BODY */
3566 {"A", 1, html_a}, /* Anchor */
3567 {"ABBR", 4, html_abbr}, /* Abbreviation */
3568 {"IMG", 3, html_img}, /* Image */
3569 {"MAP", 3, html_map}, /* Image Map */
3570 {"AREA", 4, html_area}, /* Image Map Area */
3571 {"HR", 2, html_hr, 1, 1}, /* Horizontal Rule */
3572 {"BR", 2, html_br, 0, 1}, /* Line Break */
3573 {"P", 1, html_p, 1}, /* Paragraph */
3574 {"OL", 2, html_ol, 1}, /* Ordered List */
3575 {"UL", 2, html_ul, 1}, /* Unordered List */
3576 {"MENU", 4, html_menu}, /* Menu List */
3577 {"DIR", 3, html_dir}, /* Directory List */
3578 {"LI", 2, html_li}, /* ... List Item */
3579 {"DL", 2, html_dl, 1}, /* Definition List */
3580 {"DT", 2, html_dt}, /* ... Def. Term */
3581 {"DD", 2, html_dd}, /* ... Def. Definition */
3582 {"I", 1, html_i}, /* Italic Text */
3583 {"EM", 2, html_em}, /* Typographic Emphasis */
3584 {"STRONG", 6, html_strong}, /* STRONG Typo Emphasis */
3585 {"VAR", 3, html_i}, /* Variable Name */
3586 {"B", 1, html_b}, /* Bold Text */
3587 {"U", 1, html_u}, /* Underline Text */
3588 {"S", 1, html_s}, /* Strike-Through Text */
3589 {"STRIKE", 6, html_s}, /* Strike-Through Text */
3590 {"BIG", 3, html_big}, /* Big Font Text */
3591 {"SMALL", 5, html_small}, /* Small Font Text */
3592 {"FONT", 4, html_font}, /* Font display directives */
3593 {"BLOCKQUOTE", 10, html_blockquote, 1}, /* Blockquote */
3594 {"ADDRESS", 7, html_address, 1}, /* Address */
3595 {"CENTER", 6, html_center}, /* Centered Text v3.2 */
3596 {"DIV", 3, html_div, 1}, /* Document Division 3.2 */
3597 {"SPAN", 4, html_span}, /* Text Span */
3598 {"H1", 2, html_h1, 1}, /* Headings... */
3599 {"H2", 2, html_h2, 1},
3600 {"H3", 2, html_h3,1},
3601 {"H4", 2, html_h4, 1},
3602 {"H5", 2, html_h5, 1},
3603 {"H6", 2, html_h6, 1},
3604 {"PRE", 3, html_pre, 1}, /* Preformatted Text */
3605 {"KBD", 3, html_kbd}, /* Keyboard Input (NO OP) */
3606 {"DFN", 3, html_dfn}, /* Definition (NO OP) */
3607 {"VAR", 3, html_var}, /* Variable (NO OP) */
3608 {"TT", 2, html_tt}, /* Typetype (NO OP) */
3609 {"SAMP", 4, html_samp}, /* Sample Text (NO OP) */
3610 {"CITE", 4, html_cite}, /* Citation (NO OP) */
3611 {"CODE", 4, html_code}, /* Code Text (NO OP) */
3612 {"INS", 3, html_ins}, /* Text Inseted (NO OP) */
3613 {"DEL", 3, html_del}, /* Text Deleted (NO OP) */
3614 {"SUP", 3, html_sup}, /* Text Superscript (NO OP) */
3615 {"SUB", 3, html_sub}, /* Text Superscript (NO OP) */
3616 {"STYLE", 5, html_style}, /* CSS Definitions */
3618 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3620 {"FORM", 4, html_form, 1}, /* form within a document */
3621 {"INPUT", 5, html_input}, /* One input field, options */
3622 {"BUTTON", 6, html_button}, /* Push Button */
3623 {"OPTION", 6, html_option}, /* One option within Select */
3624 {"OPTION", 6, html_optgroup}, /* Option Group Definition */
3625 {"SELECT", 6, html_select}, /* Selection from a set */
3626 {"TEXTAREA", 8, html_textarea}, /* A multi-line input field */
3627 {"LABEL", 5, html_label}, /* Control Label */
3628 {"FIELDSET", 8, html_fieldset, 1}, /* Fieldset Control Group */
3630 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3631 {"SCRIPT", 6, html_script}, /* Embedded scripting statements */
3632 {"APPLET", 6, NULL}, /* Embedded applet statements */
3633 {"OBJECT", 6, NULL}, /* Embedded object statements */
3634 {"LINK", 4, NULL}, /* References to external data */
3635 {"PARAM", 5, NULL}, /* Applet/Object parameters */
3637 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3639 {"TABLE", 5, html_table, 1}, /* Table */
3640 {"CAPTION", 7, html_caption}, /* Table Caption */
3641 {"TR", 2, html_tr}, /* Table Table Row */
3642 {"TD", 2, html_td}, /* Table Table Data */
3643 {"TH", 2, html_th}, /* Table Table Head */
3644 {"THEAD", 5, html_thead}, /* Table Table Head */
3645 {"TBODY", 5, html_tbody}, /* Table Table Body */
3646 {"TFOOT", 5, html_tfoot}, /* Table Table Foot */
3647 {"COL", 3, html_col}, /* Table Column Attibutes */
3648 {"COLGROUP", 8, html_colgroup}, /* Table Column Group Attibutes */
3650 {NULL, 0, NULL}
3655 * Table of supported RSS 2.0 elements
3657 static ELPROP_S rss_element_table[] = {
3658 {"RSS", 3, rss_rss}, /* RSS 2.0 version */
3659 {"CHANNEL", 7, rss_channel}, /* RSS 2.0 Channel */
3660 {"TITLE", 5, rss_title}, /* RSS 2.0 Title */
3661 {"IMAGE", 5, rss_image}, /* RSS 2.0 Channel Image */
3662 {"LINK", 4, rss_link}, /* RSS 2.0 Channel/Item Link */
3663 {"DESCRIPTION", 11, rss_description}, /* RSS 2.0 Channel/Item Description */
3664 {"ITEM", 4, rss_item}, /* RSS 2.0 Channel ITEM */
3665 {"TTL", 3, rss_ttl}, /* RSS 2.0 Item TTL */
3666 {NULL, 0, NULL}
3671 * Initialize the given handler, and add it to the stack if it
3672 * requests it.
3674 * Returns: 1 if handler chose to get pushed on stack
3675 * 0 if handler declined
3678 html_push(FILTER_S *fd, ELPROP_S *ep)
3680 HANDLER_S *new;
3682 new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3683 memset(new, 0, sizeof(HANDLER_S));
3684 new->html_data = fd;
3685 new->element = ep;
3686 if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3687 new->below = HANDLERS(fd);
3688 HANDLERS(fd) = new; /* push */
3689 return(1);
3692 fs_give((void **) &new);
3693 return(0);
3698 * Remove the most recently installed the given handler
3699 * after letting it accept its demise.
3701 void
3702 html_pop(FILTER_S *fd, ELPROP_S *ep)
3704 HANDLER_S *tp;
3706 for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3707 HANDLER_S *tp2;
3709 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3710 /* if no evidence of opening tag, ignore given closing tag */
3711 for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3714 if(!tp2){
3715 dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3716 return;
3719 (void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3720 HANDLERS(fd) = tp->below;
3723 if(tp){
3724 (void) (*EL(tp)->handler)(tp, 0, GF_EOD); /* may adjust handler list */
3725 if(tp != HANDLERS(fd)){
3726 HANDLER_S *p;
3728 for(p = HANDLERS(fd); p->below != tp; p = p->below)
3731 if(p)
3732 p->below = tp->below; /* remove from middle of stack */
3733 /* BUG: else programming botch and we should die */
3735 else
3736 HANDLERS(fd) = tp->below; /* pop */
3738 fs_give((void **)&tp);
3740 else{
3741 /* BUG: should MAKE SURE NOT TO EMIT IT */
3742 dprint((3, "-- html error: end tag without a start: %s", ep->element));
3748 * Deal with data passed a hander in its GF_DATA state
3750 static void
3751 html_handoff(HANDLER_S *hd, int ch)
3753 if(hd->below)
3754 (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3755 else
3756 html_output(hd->html_data, ch);
3761 * HTML <BR> element handler
3764 html_br(HANDLER_S *hd, int ch, int cmd)
3766 if(cmd == GF_RESET){
3767 if(PASS_HTML(hd->html_data)){
3768 html_output_raw_tag(hd->html_data, "br");
3770 else{
3771 html_output(hd->html_data, HTML_NEWLINE);
3775 return(0); /* don't get linked */
3780 * HTML <HR> (Horizontal Rule) element handler
3783 html_hr(HANDLER_S *hd, int ch, int cmd)
3785 if(cmd == GF_RESET){
3786 if(PASS_HTML(hd->html_data)){
3787 html_output_raw_tag(hd->html_data, "hr");
3789 else{
3790 int i, old_wrap, width, align;
3791 PARAMETER *p;
3793 width = WRAP_COLS(hd->html_data);
3794 align = 0;
3795 for(p = HD(hd->html_data)->el_data->attribs;
3796 p && p->attribute;
3797 p = p->next)
3798 if(p->value){
3799 if(!strucmp(p->attribute, "ALIGN")){
3800 if(!strucmp(p->value, "LEFT"))
3801 align = 1;
3802 else if(!strucmp(p->value, "RIGHT"))
3803 align = 2;
3805 else if(!strucmp(p->attribute, "WIDTH")){
3806 char *cp;
3808 width = 0;
3809 for(cp = p->value; *cp; cp++)
3810 if(*cp == '%'){
3811 width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3812 break;
3814 else if(isdigit((unsigned char) *cp))
3815 width = (width * 10) + (*cp - '0');
3817 width = MIN(width, WRAP_COLS(hd->html_data));
3821 html_blank(hd->html_data, 1); /* at least one blank line */
3823 old_wrap = HD(hd->html_data)->wrapstate;
3824 HD(hd->html_data)->wrapstate = 0;
3825 if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3826 && ((align == 0) ? i /= 2 : (align == 2)))
3827 for(; i > 0; i--)
3828 html_output(hd->html_data, ' ');
3830 for(i = 0; i < width; i++)
3831 html_output(hd->html_data, '_');
3833 html_blank(hd->html_data, 1);
3834 HD(hd->html_data)->wrapstate = old_wrap;
3838 return(0); /* don't get linked */
3843 * HTML <P> (paragraph) element handler
3846 html_p(HANDLER_S *hd, int ch, int cmd)
3848 if(cmd == GF_DATA){
3849 html_handoff(hd, ch);
3851 else if(cmd == GF_RESET){
3852 if(PASS_HTML(hd->html_data)){
3853 html_output_raw_tag(hd->html_data, "p");
3855 else{
3856 /* Make sure there's at least 1 blank line */
3857 html_blank(hd->html_data, 1);
3859 /* adjust indent level if needed */
3860 if(HD(hd->html_data)->li_pending){
3861 html_indent(hd->html_data, 4, HTML_ID_INC);
3862 HD(hd->html_data)->li_pending = 0;
3866 else if(cmd == GF_EOD){
3867 if(PASS_HTML(hd->html_data)){
3868 html_output_string(hd->html_data, "</p>");
3870 else{
3871 /* Make sure there's at least 1 blank line */
3872 html_blank(hd->html_data, 1);
3876 return(1); /* GET linked */
3881 * HTML Table <TABLE> (paragraph) table row
3884 html_table(HANDLER_S *hd, int ch, int cmd)
3886 if(cmd == GF_DATA){
3887 if(PASS_HTML(hd->html_data)){
3888 html_handoff(hd, ch);
3891 else if(cmd == GF_RESET){
3892 if(PASS_HTML(hd->html_data)){
3893 html_output_raw_tag(hd->html_data, "table");
3895 else
3896 /* Make sure there's at least 1 blank line */
3897 html_blank(hd->html_data, 0);
3899 else if(cmd == GF_EOD){
3900 if(PASS_HTML(hd->html_data)){
3901 html_output_string(hd->html_data, "</table>");
3903 else
3904 /* Make sure there's at least 1 blank line */
3905 html_blank(hd->html_data, 0);
3907 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3912 * HTML <CAPTION> (Table Caption) element handler
3915 html_caption(HANDLER_S *hd, int ch, int cmd)
3917 if(cmd == GF_DATA){
3918 html_handoff(hd, ch);
3920 else if(cmd == GF_RESET){
3921 if(PASS_HTML(hd->html_data)){
3922 html_output_raw_tag(hd->html_data, "caption");
3924 else{
3925 /* turn ON the centered bit */
3926 CENTER_BIT(hd->html_data) = 1;
3929 else if(cmd == GF_EOD){
3930 if(PASS_HTML(hd->html_data)){
3931 html_output_string(hd->html_data, "</caption>");
3933 else{
3934 /* turn OFF the centered bit */
3935 CENTER_BIT(hd->html_data) = 0;
3939 return(1);
3944 * HTML Table <TR> (paragraph) table row
3947 html_tr(HANDLER_S *hd, int ch, int cmd)
3949 if(cmd == GF_DATA){
3950 if(PASS_HTML(hd->html_data)){
3951 html_handoff(hd, ch);
3954 else if(cmd == GF_RESET){
3955 if(PASS_HTML(hd->html_data)){
3956 html_output_raw_tag(hd->html_data, "tr");
3958 else
3959 /* Make sure there's at least 1 blank line */
3960 html_blank(hd->html_data, 0);
3962 else if(cmd == GF_EOD){
3963 if(PASS_HTML(hd->html_data)){
3964 html_output_string(hd->html_data, "</tr>");
3966 else
3967 /* Make sure there's at least 1 blank line */
3968 html_blank(hd->html_data, 0);
3970 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3975 * HTML Table <TD> (paragraph) table data
3978 html_td(HANDLER_S *hd, int ch, int cmd)
3980 if(cmd == GF_DATA){
3981 if(PASS_HTML(hd->html_data)){
3982 html_handoff(hd, ch);
3985 else if(cmd == GF_RESET){
3986 if(PASS_HTML(hd->html_data)){
3987 html_output_raw_tag(hd->html_data, "td");
3989 else{
3990 PARAMETER *p;
3992 for(p = HD(hd->html_data)->el_data->attribs;
3993 p && p->attribute;
3994 p = p->next)
3995 if(!strucmp(p->attribute, "nowrap")
3996 && (hd->html_data->f2 || hd->html_data->n)){
3997 HTML_DUMP_LIT(hd->html_data, " | ", 3);
3998 break;
4002 else if(cmd == GF_EOD){
4003 if(PASS_HTML(hd->html_data)){
4004 html_output_string(hd->html_data, "</td>");
4008 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4013 * HTML Table <TH> (paragraph) table head
4016 html_th(HANDLER_S *hd, int ch, int cmd)
4018 if(cmd == GF_DATA){
4019 if(PASS_HTML(hd->html_data)){
4020 html_handoff(hd, ch);
4023 else if(cmd == GF_RESET){
4024 if(PASS_HTML(hd->html_data)){
4025 html_output_raw_tag(hd->html_data, "th");
4027 else{
4028 PARAMETER *p;
4030 for(p = HD(hd->html_data)->el_data->attribs;
4031 p && p->attribute;
4032 p = p->next)
4033 if(!strucmp(p->attribute, "nowrap")
4034 && (hd->html_data->f2 || hd->html_data->n)){
4035 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4036 break;
4040 else if(cmd == GF_EOD){
4041 if(PASS_HTML(hd->html_data)){
4042 html_output_string(hd->html_data, "</th>");
4046 return(PASS_HTML(hd->html_data)); /* don't get linked */
4051 * HTML Table <THEAD> table head
4054 html_thead(HANDLER_S *hd, int ch, int cmd)
4056 if(PASS_HTML(hd->html_data)){
4057 if(cmd == GF_DATA){
4058 html_handoff(hd, ch);
4060 else if(cmd == GF_RESET){
4061 html_output_raw_tag(hd->html_data, "thead");
4063 else if(cmd == GF_EOD){
4064 html_output_string(hd->html_data, "</thead>");
4067 return(1); /* GET linked */
4070 return(0); /* don't get linked */
4075 * HTML Table <TBODY> table body
4078 html_tbody(HANDLER_S *hd, int ch, int cmd)
4080 if(PASS_HTML(hd->html_data)){
4081 if(cmd == GF_DATA){
4082 html_handoff(hd, ch);
4084 else if(cmd == GF_RESET){
4085 html_output_raw_tag(hd->html_data, "tbody");
4087 else if(cmd == GF_EOD){
4088 html_output_string(hd->html_data, "</tbody>");
4091 return(1); /* GET linked */
4094 return(0); /* don't get linked */
4099 * HTML Table <TFOOT> table body
4102 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4104 if(PASS_HTML(hd->html_data)){
4105 if(cmd == GF_DATA){
4106 html_handoff(hd, ch);
4108 else if(cmd == GF_RESET){
4109 html_output_raw_tag(hd->html_data, "tfoot");
4111 else if(cmd == GF_EOD){
4112 html_output_string(hd->html_data, "</tfoot>");
4115 return(1); /* GET linked */
4118 return(0); /* don't get linked */
4123 * HTML <COL> (Table Column Attributes) element handler
4126 html_col(HANDLER_S *hd, int ch, int cmd)
4128 if(cmd == GF_RESET){
4129 if(PASS_HTML(hd->html_data)){
4130 html_output_raw_tag(hd->html_data, "col");
4134 return(0); /* don't get linked */
4139 * HTML Table <COLGROUP> table body
4142 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4144 if(PASS_HTML(hd->html_data)){
4145 if(cmd == GF_DATA){
4146 html_handoff(hd, ch);
4148 else if(cmd == GF_RESET){
4149 html_output_raw_tag(hd->html_data, "colgroup");
4151 else if(cmd == GF_EOD){
4152 html_output_string(hd->html_data, "</colgroup>");
4155 return(1); /* GET linked */
4158 return(0); /* don't get linked */
4163 * HTML <I> (italic text) element handler
4166 html_i(HANDLER_S *hd, int ch, int cmd)
4168 if(cmd == GF_DATA){
4169 /* include LITERAL in spaceness test! */
4170 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4171 HTML_ITALIC(hd->html_data, 1);
4172 hd->x = 0;
4175 html_handoff(hd, ch);
4177 else if(cmd == GF_RESET){
4178 hd->x = 1;
4180 else if(cmd == GF_EOD){
4181 if(!hd->x)
4182 HTML_ITALIC(hd->html_data, 0);
4185 return(1); /* get linked */
4190 * HTML <EM> element handler
4193 html_em(HANDLER_S *hd, int ch, int cmd)
4195 if(cmd == GF_DATA){
4196 if(!PASS_HTML(hd->html_data)){
4197 /* include LITERAL in spaceness test! */
4198 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4199 HTML_ITALIC(hd->html_data, 1);
4200 hd->x = 0;
4204 html_handoff(hd, ch);
4206 else if(cmd == GF_RESET){
4207 if(PASS_HTML(hd->html_data)){
4208 html_output_raw_tag(hd->html_data, "em");
4210 else{
4211 hd->x = 1;
4214 else if(cmd == GF_EOD){
4215 if(PASS_HTML(hd->html_data)){
4216 html_output_string(hd->html_data, "</em>");
4218 else{
4219 if(!hd->x)
4220 HTML_ITALIC(hd->html_data, 0);
4224 return(1); /* get linked */
4229 * HTML <STRONG> element handler
4232 html_strong(HANDLER_S *hd, int ch, int cmd)
4234 if(cmd == GF_DATA){
4235 if(!PASS_HTML(hd->html_data)){
4236 /* include LITERAL in spaceness test! */
4237 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4238 HTML_ITALIC(hd->html_data, 1);
4239 hd->x = 0;
4243 html_handoff(hd, ch);
4245 else if(cmd == GF_RESET){
4246 if(PASS_HTML(hd->html_data)){
4247 html_output_raw_tag(hd->html_data, "strong");
4249 else{
4250 hd->x = 1;
4253 else if(cmd == GF_EOD){
4254 if(PASS_HTML(hd->html_data)){
4255 html_output_string(hd->html_data, "</strong>");
4257 else{
4258 if(!hd->x)
4259 HTML_ITALIC(hd->html_data, 0);
4263 return(1); /* get linked */
4268 * HTML <u> (Underline text) element handler
4271 html_u(HANDLER_S *hd, int ch, int cmd)
4273 if(PASS_HTML(hd->html_data)){
4274 if(cmd == GF_DATA){
4275 html_handoff(hd, ch);
4277 else if(cmd == GF_RESET){
4278 html_output_raw_tag(hd->html_data, "u");
4280 else if(cmd == GF_EOD){
4281 html_output_string(hd->html_data, "</u>");
4284 return(1); /* get linked */
4287 return(0); /* do NOT get linked */
4292 * HTML <b> (Bold text) element handler
4295 html_b(HANDLER_S *hd, int ch, int cmd)
4297 if(cmd == GF_DATA){
4298 if(!PASS_HTML(hd->html_data)){
4299 /* include LITERAL in spaceness test! */
4300 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4301 HTML_BOLD(hd->html_data, 1);
4302 hd->x = 0;
4306 html_handoff(hd, ch);
4308 else if(cmd == GF_RESET){
4309 if(PASS_HTML(hd->html_data)){
4310 html_output_raw_tag(hd->html_data, "b");
4312 else{
4313 hd->x = 1;
4316 else if(cmd == GF_EOD){
4317 if(PASS_HTML(hd->html_data)){
4318 html_output_string(hd->html_data, "</b>");
4320 else{
4321 if(!hd->x)
4322 HTML_BOLD(hd->html_data, 0);
4326 return(1); /* get linked */
4331 * HTML <s> (strike-through text) element handler
4334 html_s(HANDLER_S *hd, int ch, int cmd)
4336 if(cmd == GF_DATA){
4337 if(!PASS_HTML(hd->html_data)){
4338 /* include LITERAL in spaceness test! */
4339 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4340 HTML_STRIKE(hd->html_data, 1);
4341 hd->x = 0;
4345 html_handoff(hd, ch);
4347 else if(cmd == GF_RESET){
4348 if(PASS_HTML(hd->html_data)){
4349 html_output_raw_tag(hd->html_data, "s");
4351 else{
4352 hd->x = 1;
4355 else if(cmd == GF_EOD){
4356 if(PASS_HTML(hd->html_data)){
4357 html_output_string(hd->html_data, "</s>");
4359 else{
4360 if(!hd->x)
4361 HTML_STRIKE(hd->html_data, 0);
4365 return(1); /* get linked */
4370 * HTML <big> (BIG text) element handler
4373 html_big(HANDLER_S *hd, int ch, int cmd)
4375 if(cmd == GF_DATA){
4376 /* include LITERAL in spaceness test! */
4377 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4378 HTML_BIG(hd->html_data, 1);
4379 hd->x = 0;
4382 html_handoff(hd, ch);
4384 else if(cmd == GF_RESET){
4385 hd->x = 1;
4387 else if(cmd == GF_EOD){
4388 if(!hd->x)
4389 HTML_BIG(hd->html_data, 0);
4392 return(1); /* get linked */
4397 * HTML <small> (SMALL text) element handler
4400 html_small(HANDLER_S *hd, int ch, int cmd)
4402 if(cmd == GF_DATA){
4403 /* include LITERAL in spaceness test! */
4404 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4405 HTML_SMALL(hd->html_data, 1);
4406 hd->x = 0;
4409 html_handoff(hd, ch);
4411 else if(cmd == GF_RESET){
4412 hd->x = 1;
4414 else if(cmd == GF_EOD){
4415 if(!hd->x)
4416 HTML_SMALL(hd->html_data, 0);
4419 return(1); /* get linked */
4424 * HTML <FONT> element handler
4427 html_font(HANDLER_S *hd, int ch, int cmd)
4429 if(PASS_HTML(hd->html_data)){
4430 if(cmd == GF_DATA){
4431 html_handoff(hd, ch);
4433 else if(cmd == GF_RESET){
4434 html_output_raw_tag(hd->html_data, "font");
4436 else if(cmd == GF_EOD){
4437 html_output_string(hd->html_data, "</font>");
4440 return(1); /* get linked */
4443 return(0);
4448 * HTML <IMG> element handler
4451 html_img(HANDLER_S *hd, int ch, int cmd)
4453 PARAMETER *p;
4454 char *alt = NULL, *src = NULL, *s;
4456 if(cmd == GF_RESET){
4457 if(PASS_HTML(hd->html_data)){
4458 html_output_raw_tag(hd->html_data, "img");
4460 else{
4461 for(p = HD(hd->html_data)->el_data->attribs;
4462 p && p->attribute;
4463 p = p->next)
4464 if(p->value && p->value[0]){
4465 if(!strucmp(p->attribute, "alt"))
4466 alt = p->value;
4467 if(!strucmp(p->attribute, "src"))
4468 src = p->value;
4472 * Multipart/Related Content ID pointer
4473 * ONLY attached messages are recognized
4474 * if we ever decide web bugs aren't a problem
4475 * anymore then we might expand the scope
4477 if(src
4478 && DO_HANDLES(hd->html_data)
4479 && RELATED_OK(hd->html_data)
4480 && struncmp(src, "cid:", 4) == 0){
4481 char buf[32];
4482 int i, n;
4483 HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4485 h->type = IMG;
4486 h->h.img.src = cpystr(src + 4);
4487 h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4489 HTML_TEXT(hd->html_data, TAG_EMBED);
4490 HTML_TEXT(hd->html_data, TAG_HANDLE);
4492 sprintf(buf, "%d", h->key);
4493 n = strlen(buf);
4494 HTML_TEXT(hd->html_data, n);
4495 for(i = 0; i < n; i++){
4496 unsigned int uic = buf[i];
4497 HTML_TEXT(hd->html_data, uic);
4500 return(0);
4502 else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4503 HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4504 HTML_TEXT(hd->html_data, ' ');
4505 return(0);
4507 else if(src
4508 && (s = strrindex(src, '/'))
4509 && *++s != '\0'){
4510 HTML_TEXT(hd->html_data, '[');
4511 HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4512 HTML_TEXT(hd->html_data, ']');
4513 HTML_TEXT(hd->html_data, ' ');
4514 return(0);
4517 /* text filler of last resort */
4518 HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4522 return(0); /* don't get linked */
4527 * HTML <MAP> (Image Map) element handler
4530 html_map(HANDLER_S *hd, int ch, int cmd)
4532 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4533 if(cmd == GF_DATA){
4534 html_handoff(hd, ch);
4536 else if(cmd == GF_RESET){
4537 html_output_raw_tag(hd->html_data, "map");
4539 else if(cmd == GF_EOD){
4540 html_output_string(hd->html_data, "</map>");
4543 return(1);
4546 return(0);
4551 * HTML <AREA> (Image Map Area) element handler
4554 html_area(HANDLER_S *hd, int ch, int cmd)
4556 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4557 if(cmd == GF_DATA){
4558 html_handoff(hd, ch);
4560 else if(cmd == GF_RESET){
4561 html_output_raw_tag(hd->html_data, "area");
4563 else if(cmd == GF_EOD){
4564 html_output_string(hd->html_data, "</area>");
4567 return(1);
4570 return(0);
4575 * HTML <FORM> (Form) element handler
4578 html_form(HANDLER_S *hd, int ch, int cmd)
4580 if(PASS_HTML(hd->html_data)){
4581 if(cmd == GF_DATA){
4582 html_handoff(hd, ch);
4584 else if(cmd == GF_RESET){
4585 PARAMETER **pp;
4587 /* SECURITY: make sure to redirect to new browser instance */
4588 for(pp = &(HD(hd->html_data)->el_data->attribs);
4589 *pp && (*pp)->attribute;
4590 pp = &(*pp)->next)
4591 if(!strucmp((*pp)->attribute, "target")){
4592 if((*pp)->value)
4593 fs_give((void **) &(*pp)->value);
4595 (*pp)->value = cpystr("_blank");
4598 if(!*pp){
4599 *pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4600 memset(*pp, 0, sizeof(PARAMETER));
4601 (*pp)->attribute = cpystr("target");
4602 (*pp)->value = cpystr("_blank");
4605 html_output_raw_tag(hd->html_data, "form");
4607 else if(cmd == GF_EOD){
4608 html_output_string(hd->html_data, "</form>");
4611 else{
4612 if(cmd == GF_RESET){
4613 html_blank(hd->html_data, 0);
4614 HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4615 html_blank(hd->html_data, 0);
4619 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4624 * HTML <INPUT> (Form) element handler
4627 html_input(HANDLER_S *hd, int ch, int cmd)
4629 if(PASS_HTML(hd->html_data)){
4630 if(cmd == GF_RESET){
4631 html_output_raw_tag(hd->html_data, "input");
4635 return(0); /* don't get linked */
4640 * HTML <BUTTON> (Form) element handler
4643 html_button(HANDLER_S *hd, int ch, int cmd)
4645 if(PASS_HTML(hd->html_data)){
4646 if(cmd == GF_DATA){
4647 html_handoff(hd, ch);
4649 else if(cmd == GF_RESET){
4650 html_output_raw_tag(hd->html_data, "button");
4652 else if(cmd == GF_EOD){
4653 html_output_string(hd->html_data, "</button>");
4656 return(1); /* get linked */
4659 return(0);
4664 * HTML <OPTION> (Form) element handler
4667 html_option(HANDLER_S *hd, int ch, int cmd)
4669 if(PASS_HTML(hd->html_data)){
4670 if(cmd == GF_DATA){
4671 html_handoff(hd, ch);
4673 else if(cmd == GF_RESET){
4674 html_output_raw_tag(hd->html_data, "option");
4676 else if(cmd == GF_EOD){
4677 html_output_string(hd->html_data, "</option>");
4680 return(1); /* get linked */
4683 return(0);
4688 * HTML <OPTGROUP> (Form) element handler
4691 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4693 if(PASS_HTML(hd->html_data)){
4694 if(cmd == GF_DATA){
4695 html_handoff(hd, ch);
4697 else if(cmd == GF_RESET){
4698 html_output_raw_tag(hd->html_data, "optgroup");
4700 else if(cmd == GF_EOD){
4701 html_output_string(hd->html_data, "</optgroup>");
4704 return(1); /* get linked */
4707 return(0);
4712 * HTML <SELECT> (Form) element handler
4715 html_select(HANDLER_S *hd, int ch, int cmd)
4717 if(PASS_HTML(hd->html_data)){
4718 if(cmd == GF_DATA){
4719 html_handoff(hd, ch);
4721 else if(cmd == GF_RESET){
4722 html_output_raw_tag(hd->html_data, "select");
4724 else if(cmd == GF_EOD){
4725 html_output_string(hd->html_data, "</select>");
4728 return(1); /* get linked */
4731 return(0);
4736 * HTML <TEXTAREA> (Form) element handler
4739 html_textarea(HANDLER_S *hd, int ch, int cmd)
4741 if(PASS_HTML(hd->html_data)){
4742 if(cmd == GF_DATA){
4743 html_handoff(hd, ch);
4745 else if(cmd == GF_RESET){
4746 html_output_raw_tag(hd->html_data, "textarea");
4748 else if(cmd == GF_EOD){
4749 html_output_string(hd->html_data, "</textarea>");
4752 return(1); /* get linked */
4755 return(0);
4760 * HTML <LABEL> (Form) element handler
4763 html_label(HANDLER_S *hd, int ch, int cmd)
4765 if(PASS_HTML(hd->html_data)){
4766 if(cmd == GF_DATA){
4767 html_handoff(hd, ch);
4769 else if(cmd == GF_RESET){
4770 html_output_raw_tag(hd->html_data, "label");
4772 else if(cmd == GF_EOD){
4773 html_output_string(hd->html_data, "</label>");
4776 return(1); /* get linked */
4779 return(0);
4784 * HTML <FIELDSET> (Form) element handler
4787 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4789 if(PASS_HTML(hd->html_data)){
4790 if(cmd == GF_DATA){
4791 html_handoff(hd, ch);
4793 else if(cmd == GF_RESET){
4794 html_output_raw_tag(hd->html_data, "fieldset");
4796 else if(cmd == GF_EOD){
4797 html_output_string(hd->html_data, "</fieldset>");
4800 return(1); /* get linked */
4803 return(0);
4808 * HTML <HEAD> element handler
4811 html_head(HANDLER_S *hd, int ch, int cmd)
4813 if(cmd == GF_DATA){
4814 html_handoff(hd, ch);
4816 else if(cmd == GF_RESET){
4817 HD(hd->html_data)->head = 1;
4819 else if(cmd == GF_EOD){
4820 HD(hd->html_data)->head = 0;
4823 return(1); /* get linked */
4828 * HTML <BASE> element handler
4831 html_base(HANDLER_S *hd, int ch, int cmd)
4833 if(cmd == GF_RESET){
4834 if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4835 PARAMETER *p;
4837 for(p = HD(hd->html_data)->el_data->attribs;
4838 p && p->attribute && strucmp(p->attribute, "HREF");
4839 p = p->next)
4842 if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4843 ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4847 return(0); /* DON'T get linked */
4852 * HTML <TITLE> element handler
4855 html_title(HANDLER_S *hd, int ch, int cmd)
4857 if(cmd == GF_DATA){
4858 if(hd->x + 1 >= hd->y){
4859 hd->y += 80;
4860 fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4863 hd->s[hd->x++] = (unsigned char) ch;
4865 else if(cmd == GF_RESET){
4866 hd->x = 0L;
4867 hd->y = 80L;
4868 hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4870 else if(cmd == GF_EOD){
4871 /* Down the road we probably want to give these bytes to
4872 * someone...
4874 hd->s[hd->x] = '\0';
4875 fs_give((void **)&hd->s);
4878 return(1); /* get linked */
4883 * HTML <BODY> element handler
4886 html_body(HANDLER_S *hd, int ch, int cmd)
4888 if(cmd == GF_DATA){
4889 html_handoff(hd, ch);
4891 else if(cmd == GF_RESET){
4892 if(PASS_HTML(hd->html_data)){
4893 PARAMETER *p, *tp;
4894 char **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4896 /* modify any attributes in a useful way? */
4897 for(p = HD(hd->html_data)->el_data->attribs;
4898 p && p->attribute;
4899 p = p->next)
4900 if(p->value){
4901 if(!strucmp(p->attribute, "style"))
4902 style = &p->value;
4903 else if(!strucmp(p->attribute, "text"))
4904 text = p->value;
4906 * bgcolor NOT passed since user setting takes precedence
4908 else if(!strucmp(p->attribute, "bgcolor"))
4909 bgcolor = p->value;
4913 /* colors pretty much it */
4914 if(text || bgcolor){
4915 if(!style){
4916 tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4917 memset(tp, 0, sizeof(PARAMETER));
4918 tp->next = HD(hd->html_data)->el_data->attribs;
4919 HD(hd->html_data)->el_data->attribs = tp;
4920 tp->attribute = cpystr("style");
4922 tmp_20k_buf[0] = '\0';
4923 style = &tp->value;
4924 pcs = "%s%s%s%s%s";
4926 else{
4927 snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4928 fs_give((void **) style);
4929 pcs = "; %s%s%s%s%s";
4932 snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4933 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4934 pcs,
4935 (text) ? "color: " : "", (text) ? text : "",
4936 (text && bgcolor) ? ";" : "",
4937 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4938 *style = cpystr(tmp_20k_buf);
4941 html_output_raw_tag(hd->html_data, "div");
4944 HD(hd->html_data)->body = 1;
4946 else if(cmd == GF_EOD){
4947 if(PASS_HTML(hd->html_data)){
4948 html_output_string(hd->html_data, "</div>");
4951 HD(hd->html_data)->body = 0;
4954 return(1); /* get linked */
4959 * HTML <A> (Anchor) element handler
4962 html_a(HANDLER_S *hd, int ch, int cmd)
4964 if(cmd == GF_DATA){
4965 html_handoff(hd, ch);
4967 if(hd->dp) /* remember text within anchor tags */
4968 so_writec(ch, (STORE_S *) hd->dp);
4970 else if(cmd == GF_RESET){
4971 int i, n, x;
4972 char buf[256];
4973 HANDLE_S *h;
4974 PARAMETER *p, *href = NULL, *name = NULL;
4977 * Pending Anchor!?!?
4978 * space insertion/line breaking that's yet to get done...
4980 if(HD(hd->html_data)->prefix){
4981 dprint((2, "-- html error: nested or unterminated anchor\n"));
4982 html_a_finish(hd);
4986 * Look for valid Anchor data vis the filter installer's parms
4987 * (e.g., Only allow references to our internal URLs if asked)
4989 for(p = HD(hd->html_data)->el_data->attribs;
4990 p && p->attribute;
4991 p = p->next)
4992 if(!strucmp(p->attribute, "HREF")
4993 && p->value
4994 && (HANDLES_LOC(hd->html_data)
4995 || struncmp(p->value, "x-alpine-", 9)
4996 || struncmp(p->value, "x-pine-help", 11)
4997 || p->value[0] == '#'))
4998 href = p;
4999 else if(!strucmp(p->attribute, "NAME"))
5000 name = p;
5002 if(DO_HANDLES(hd->html_data) && (href || name)){
5003 h = new_handle(HANDLESP(hd->html_data));
5006 * Enhancement: we might want to get fancier and parse the
5007 * href a bit further such that we can launch images using
5008 * our image viewer, or browse local files or directories
5009 * with our internal tools. Of course, having the jump-off
5010 * point into text/html always be the defined "web-browser",
5011 * just might be the least confusing UI-wise...
5013 h->type = URL;
5015 if(name && name->value)
5016 h->h.url.name = cpystr(name->value);
5019 * Prepare to build embedded prefix...
5021 HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5022 x = 0;
5025 * Is this something that looks like a URL? If not and
5026 * we were giving some "base" string, proceed ala RFC1808...
5028 if(href){
5029 if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5030 html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5032 else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5033 h->h.url.path = cpystr(href->value);
5035 if(pico_usingcolor()){
5036 char *fg = NULL, *bg = NULL, *q;
5038 if(ps_global->VAR_SLCTBL_FORE_COLOR
5039 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5040 ps_global->VAR_NORM_FORE_COLOR))
5041 fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5043 if(ps_global->VAR_SLCTBL_BACK_COLOR
5044 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5045 ps_global->VAR_NORM_BACK_COLOR))
5046 bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5048 if(fg || bg){
5049 COLOR_PAIR *tmp;
5052 * The blacks are just known good colors for testing
5053 * whether the other color is good.
5055 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5056 bg ? bg : colorx(COL_BLACK));
5057 if(pico_is_good_colorpair(tmp)){
5058 q = color_embed(fg, bg);
5060 for(i = 0; q[i]; i++)
5061 HD(hd->html_data)->prefix[x++] = q[i];
5064 if(tmp)
5065 free_color_pair(&tmp);
5068 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5069 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5071 else
5072 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5075 HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5076 HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5078 snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5079 HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5080 for(i = 0; i < n; i++)
5081 HD(hd->html_data)->prefix[x++] = buf[i];
5083 HD(hd->html_data)->prefix_used = x;
5085 hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5088 else if(cmd == GF_EOD){
5089 html_a_finish(hd);
5092 return(1); /* get linked */
5096 void
5097 html_a_prefix(FILTER_S *f)
5099 int *prefix, n;
5101 /* Do this so we don't visit from html_output... */
5102 prefix = HD(f)->prefix;
5103 HD(f)->prefix = NULL;
5105 for(n = 0; n < HD(f)->prefix_used; n++)
5106 html_a_output_prefix(f, prefix[n]);
5108 fs_give((void **) &prefix);
5113 * html_a_finish - house keeping associated with end of link tag
5115 void
5116 html_a_finish(HANDLER_S *hd)
5118 if(DO_HANDLES(hd->html_data)){
5119 if(HD(hd->html_data)->prefix){
5120 if(!PASS_HTML(hd->html_data)){
5121 char *empty_link = "[LINK]";
5122 int i;
5124 html_a_prefix(hd->html_data);
5125 for(i = 0; empty_link[i]; i++)
5126 html_output(hd->html_data, empty_link[i]);
5130 if(pico_usingcolor()){
5131 char *fg = NULL, *bg = NULL, *p;
5132 int i;
5134 if(ps_global->VAR_SLCTBL_FORE_COLOR
5135 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5136 ps_global->VAR_NORM_FORE_COLOR))
5137 fg = ps_global->VAR_NORM_FORE_COLOR;
5139 if(ps_global->VAR_SLCTBL_BACK_COLOR
5140 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5141 ps_global->VAR_NORM_BACK_COLOR))
5142 bg = ps_global->VAR_NORM_BACK_COLOR;
5144 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5145 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5147 if(fg || bg){
5148 COLOR_PAIR *tmp;
5151 * The blacks are just known good colors for testing
5152 * whether the other color is good.
5154 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5155 bg ? bg : colorx(COL_BLACK));
5156 if(pico_is_good_colorpair(tmp)){
5157 p = color_embed(fg, bg);
5159 for(i = 0; p[i]; i++)
5160 html_output(hd->html_data, p[i]);
5163 if(tmp)
5164 free_color_pair(&tmp);
5167 else
5168 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5170 html_output(hd->html_data, TAG_EMBED);
5171 html_output(hd->html_data, TAG_HANDLEOFF);
5173 html_a_output_info(hd);
5179 * html_output_a_prefix - dump Anchor prefix data
5181 void
5182 html_a_output_prefix(FILTER_S *f, int c)
5184 switch(c){
5185 case HTML_DOBOLD :
5186 HTML_BOLD(f, 1);
5187 break;
5189 default :
5190 html_output(f, c);
5191 break;
5198 * html_a_output_info - dump possibly deceptive link info into text.
5199 * phark the phishers.
5201 void
5202 html_a_output_info(HANDLER_S *hd)
5204 int l, risky = 0, hl = 0, tl;
5205 char *url = NULL, *hn = NULL, *txt;
5206 HANDLE_S *h;
5208 /* find host anchor references */
5209 if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5210 && h->h.url.path != NULL
5211 && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5212 && (hn = srchstr(hn,"://")) != NULL){
5214 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5218 if(hn && hl){
5220 * look over anchor's text to see if there's a
5221 * mismatch between href target and url-ish
5222 * looking text. throw a red flag if so.
5223 * similarly, toss one if the target's referenced
5224 * by a
5226 if(hd->dp){
5227 so_writec('\0', (STORE_S *) hd->dp);
5229 if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5230 && (txt = rfc1738_scan(txt, &tl)) != NULL
5231 && (txt = srchstr(txt,"://")) != NULL){
5233 for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5236 if(tl != hl)
5237 risky++;
5238 else
5239 /* look for non matching text */
5240 for(l = 0; l < tl && l < hl; l++)
5241 if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5242 risky++;
5243 break;
5247 so_give((STORE_S **) &hd->dp);
5250 /* look for literal IP, anything possibly encoded or auth specifier */
5251 if(!risky){
5252 int digits = 1;
5254 for(l = 0; l < hl; l++){
5255 if(hn[l] == '@' || hn[l] == '%'){
5256 risky++;
5257 break;
5259 else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5260 digits = 0;
5263 if(digits)
5264 risky++;
5267 /* Insert text of link's domain */
5268 if(SHOWSERVER(hd->html_data)){
5269 char *q;
5270 COLOR_PAIR *col = NULL, *colnorm = NULL;
5272 html_output(hd->html_data, ' ');
5273 html_output(hd->html_data, '[');
5275 if(pico_usingcolor()
5276 && ps_global->VAR_METAMSG_FORE_COLOR
5277 && ps_global->VAR_METAMSG_BACK_COLOR
5278 && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5279 ps_global->VAR_METAMSG_BACK_COLOR))){
5280 if(!pico_is_good_colorpair(col))
5281 free_color_pair(&col);
5283 if(col){
5284 q = color_embed(col->fg, col->bg);
5286 for(l = 0; q[l]; l++)
5287 html_output(hd->html_data, q[l]);
5291 for(l = 0; l < hl; l++)
5292 html_output(hd->html_data, hn[l]);
5294 if(col){
5295 if(ps_global->VAR_NORM_FORE_COLOR
5296 && ps_global->VAR_NORM_BACK_COLOR
5297 && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5298 ps_global->VAR_NORM_BACK_COLOR))){
5299 if(!pico_is_good_colorpair(colnorm))
5300 free_color_pair(&colnorm);
5302 if(colnorm){
5303 q = color_embed(colnorm->fg, colnorm->bg);
5304 free_color_pair(&colnorm);
5306 for(l = 0; q[l]; l++)
5307 html_output(hd->html_data, q[l]);
5311 free_color_pair(&col);
5314 html_output(hd->html_data, ']');
5319 * if things look OK so far, make sure nothing within
5320 * the url looks too fishy...
5322 while(!risky && hn
5323 && (hn = rfc1738_scan(hn, &l)) != NULL
5324 && (hn = srchstr(hn,"://")) != NULL){
5325 int digits = 1;
5327 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5329 * auth spec, encoded characters, or possibly non-standard port
5330 * should raise a red flag
5332 if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5333 risky++;
5334 break;
5336 else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5337 digits = 0;
5340 /* dotted-dec/raw-int address should cause suspicion as well */
5341 if(digits)
5342 risky++;
5345 if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5346 (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5348 if(hd->dp)
5349 so_give((STORE_S **) &hd->dp);
5352 fs_give((void **) &url);
5358 * relative_url - put full url path in h based on base and relative url
5360 void
5361 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5363 size_t len;
5364 char tmp[MAILTMPLEN], *p, *q;
5365 char *scheme = NULL, *net = NULL, *path = NULL,
5366 *parms = NULL, *query = NULL, *frag = NULL,
5367 *base_scheme = NULL, *base_net_loc = NULL,
5368 *base_path = NULL, *base_parms = NULL,
5369 *base_query = NULL, *base_frag = NULL,
5370 *rel_scheme = NULL, *rel_net_loc = NULL,
5371 *rel_path = NULL, *rel_parms = NULL,
5372 *rel_query = NULL, *rel_frag = NULL;
5374 /* Rough parse of base URL */
5375 rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5376 &base_parms, &base_query, &base_frag);
5378 /* Rough parse of this URL */
5379 rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5380 &rel_parms, &rel_query, &rel_frag);
5382 scheme = rel_scheme; /* defaults */
5383 net = rel_net_loc;
5384 path = rel_path;
5385 parms = rel_parms;
5386 query = rel_query;
5387 frag = rel_frag;
5388 if(!scheme && base_scheme){
5389 scheme = base_scheme;
5390 if(!net){
5391 net = base_net_loc;
5392 if(path){
5393 if(*path != '/'){
5394 if(base_path){
5395 for(p = q = base_path; /* Drop base path's tail */
5396 (p = strchr(p, '/'));
5397 q = ++p)
5400 len = q - base_path;
5402 else
5403 len = 0;
5405 if(len + strlen(rel_path) < sizeof(tmp)-1){
5406 if(len)
5407 snprintf(path = tmp, sizeof(tmp), "%.*s", (int) len, base_path);
5409 strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5410 tmp[sizeof(tmp)-1] = '\0';
5412 /* Follow RFC 1808 "Step 6" */
5413 for(p = tmp; (p = strchr(p, '.')); )
5414 switch(*(p+1)){
5416 * a) All occurrences of "./", where "." is a
5417 * complete path segment, are removed.
5419 case '/' :
5420 if(p > tmp)
5421 for(q = p; (*q = *(q+2)) != '\0'; q++)
5423 else
5424 p++;
5426 break;
5429 * b) If the path ends with "." as a
5430 * complete path segment, that "." is
5431 * removed.
5433 case '\0' :
5434 if(p == tmp || *(p-1) == '/')
5435 *p = '\0';
5436 else
5437 p++;
5439 break;
5442 * c) All occurrences of "<segment>/../",
5443 * where <segment> is a complete path
5444 * segment not equal to "..", are removed.
5445 * Removal of these path segments is
5446 * performed iteratively, removing the
5447 * leftmost matching pattern on each
5448 * iteration, until no matching pattern
5449 * remains.
5451 * d) If the path ends with "<segment>/..",
5452 * where <segment> is a complete path
5453 * segment not equal to "..", that
5454 * "<segment>/.." is removed.
5456 case '.' :
5457 if(p > tmp + 1){
5458 for(q = p - 2; q > tmp && *q != '/'; q--)
5461 if(*q == '/')
5462 q++;
5464 if(q + 1 == p /* no "//.." */
5465 || (*q == '.' /* and "../.." */
5466 && *(q+1) == '.'
5467 && *(q+2) == '/')){
5468 p += 2;
5469 break;
5472 switch(*(p+2)){
5473 case '/' :
5474 len = (p - q) + 3;
5475 p = q;
5476 for(; (*q = *(q+len)) != '\0'; q++)
5479 break;
5481 case '\0':
5482 *(p = q) = '\0';
5483 break;
5485 default:
5486 p += 2;
5487 break;
5490 else
5491 p += 2;
5493 break;
5495 default :
5496 p++;
5497 break;
5500 else
5501 path = ""; /* lame. */
5504 else{
5505 path = base_path;
5506 if(!parms){
5507 parms = base_parms;
5508 if(!query)
5509 query = base_query;
5515 len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5516 + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5517 + (query ? strlen(query) : 0) + (frag ? strlen(frag ) : 0) + 8;
5519 h->h.url.path = (char *) fs_get(len * sizeof(char));
5520 snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5521 scheme ? scheme : "", scheme ? ":" : "",
5522 net ? "//" : "", net ? net : "",
5523 (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5524 path ? path : "",
5525 parms ? ";" : "", parms ? parms : "",
5526 query ? "?" : "", query ? query : "",
5527 frag ? "#" : "", frag ? frag : "");
5529 if(base_scheme)
5530 fs_give((void **) &base_scheme);
5532 if(base_net_loc)
5533 fs_give((void **) &base_net_loc);
5535 if(base_path)
5536 fs_give((void **) &base_path);
5538 if(base_parms)
5539 fs_give((void **) &base_parms);
5541 if(base_query)
5542 fs_give((void **) &base_query);
5544 if(base_frag)
5545 fs_give((void **) &base_frag);
5547 if(rel_scheme)
5548 fs_give((void **) &rel_scheme);
5550 if(rel_net_loc)
5551 fs_give((void **) &rel_net_loc);
5553 if(rel_parms)
5554 fs_give((void **) &rel_parms);
5556 if(rel_query)
5557 fs_give((void **) &rel_query);
5559 if(rel_frag)
5560 fs_give((void **) &rel_frag);
5562 if(rel_path)
5563 fs_give((void **) &rel_path);
5568 * html_href_relative - href
5571 html_href_relative(char *url)
5573 int i;
5575 if(url)
5576 for(i = 0; i < 32 && url[i]; i++)
5577 if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5578 if(url[i] == ':')
5579 return(FALSE);
5580 else
5581 break;
5584 return(TRUE);
5589 * HTML <UL> (Unordered List) element handler
5592 html_ul(HANDLER_S *hd, int ch, int cmd)
5594 if(cmd == GF_DATA){
5595 html_handoff(hd, ch);
5597 else if(cmd == GF_RESET){
5598 if(PASS_HTML(hd->html_data)){
5599 html_output_raw_tag(hd->html_data, "ul");
5601 else{
5602 HD(hd->html_data)->li_pending = 1;
5603 html_blank(hd->html_data, 0);
5606 else if(cmd == GF_EOD){
5607 if(PASS_HTML(hd->html_data)){
5608 html_output_string(hd->html_data, "</ul>");
5610 else{
5611 html_blank(hd->html_data, 0);
5613 if(!HD(hd->html_data)->li_pending)
5614 html_indent(hd->html_data, -4, HTML_ID_INC);
5615 else
5616 HD(hd->html_data)->li_pending = 0;
5620 return(1); /* get linked */
5625 * HTML <OL> (Ordered List) element handler
5628 html_ol(HANDLER_S *hd, int ch, int cmd)
5630 if(cmd == GF_DATA){
5631 html_handoff(hd, ch);
5633 else if(cmd == GF_RESET){
5634 if(PASS_HTML(hd->html_data)){
5635 html_output_raw_tag(hd->html_data, "ol");
5637 else{
5638 PARAMETER *p;
5640 * Signal that we're expecting to see <LI> as our next elemnt
5641 * and set the the initial ordered count.
5643 hd->x = 1L; /* set default */
5644 hd->y = LIST_DECIMAL; /* set default */
5645 for(p = HD(hd->html_data)->el_data->attribs;
5646 p && p->attribute;
5647 p = p->next)
5648 if(p->value){
5649 if(!strucmp(p->attribute, "TYPE")){
5650 if(!strucmp(p->value, "a")) /* alpha, lowercase */
5651 hd->y = LIST_ALPHALO;
5652 else if(!strucmp(p->value, "A")) /* alpha, uppercase */
5653 hd->y = LIST_ALPHAUP;
5654 else if(!strucmp(p->value, "i")) /* roman, lowercase */
5655 hd->y = LIST_ROMANLO;
5656 else if(!strucmp(p->value, "I")) /* roman, uppercase */
5657 hd->y = LIST_ROMANUP;
5658 else if(strucmp(p->value, "1")) /* decimal, the default */
5659 hd->y = LIST_UNKNOWN;
5661 else if(!strucmp(p->attribute, "START"))
5662 hd->x = atol(p->value);
5663 // else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5664 // this is not so simple. The main missing support
5665 // is for the STYLE attribute, but implementing that
5666 // correctly will take time, so will be implemented
5667 // after version 2.21 is released.
5669 HD(hd->html_data)->li_pending = 1;
5670 html_blank(hd->html_data, 0);
5673 else if(cmd == GF_EOD){
5674 if(PASS_HTML(hd->html_data)){
5675 html_output_string(hd->html_data, "</ol>");
5677 else{
5678 html_blank(hd->html_data, 0);
5680 if(!HD(hd->html_data)->li_pending)
5681 html_indent(hd->html_data, -4, HTML_ID_INC);
5682 else
5683 HD(hd->html_data)->li_pending = 0;
5687 return(1); /* get linked */
5692 * HTML <MENU> (Menu List) element handler
5695 html_menu(HANDLER_S *hd, int ch, int cmd)
5697 if(cmd == GF_DATA){
5698 html_handoff(hd, ch);
5700 else if(cmd == GF_RESET){
5701 if(PASS_HTML(hd->html_data)){
5702 html_output_raw_tag(hd->html_data, "menu");
5704 else{
5705 HD(hd->html_data)->li_pending = 1;
5708 else if(cmd == GF_EOD){
5709 if(PASS_HTML(hd->html_data)){
5710 html_output_string(hd->html_data, "</menu>");
5712 else{
5713 html_blank(hd->html_data, 0);
5715 if(!HD(hd->html_data)->li_pending)
5716 html_indent(hd->html_data, -4, HTML_ID_INC);
5717 else
5718 HD(hd->html_data)->li_pending = 0;
5722 return(1); /* get linked */
5727 * HTML <DIR> (Directory List) element handler
5730 html_dir(HANDLER_S *hd, int ch, int cmd)
5732 if(cmd == GF_DATA){
5733 html_handoff(hd, ch);
5735 else if(cmd == GF_RESET){
5736 if(PASS_HTML(hd->html_data)){
5737 html_output_raw_tag(hd->html_data, "dir");
5739 else{
5740 HD(hd->html_data)->li_pending = 1;
5743 else if(cmd == GF_EOD){
5744 if(PASS_HTML(hd->html_data)){
5745 html_output_string(hd->html_data, "</dir>");
5747 else{
5748 html_blank(hd->html_data, 0);
5750 if(!HD(hd->html_data)->li_pending)
5751 html_indent(hd->html_data, -4, HTML_ID_INC);
5752 else
5753 HD(hd->html_data)->li_pending = 0;
5757 return(1); /* get linked */
5762 * HTML <LI> (List Item) element handler
5765 html_li(HANDLER_S *hd, int ch, int cmd)
5767 if(cmd == GF_DATA){
5768 if(PASS_HTML(hd->html_data)){
5769 html_handoff(hd, ch);
5772 else if(cmd == GF_RESET){
5773 HANDLER_S *p, *found = NULL;
5776 * There better be a an unordered list, ordered list,
5777 * Menu or Directory handler installed
5778 * or else we crap out...
5780 for(p = HANDLERS(hd->html_data); p; p = p->below)
5781 if(EL(p)->handler == html_ul
5782 || EL(p)->handler == html_ol
5783 || EL(p)->handler == html_menu
5784 || EL(p)->handler == html_dir){
5785 found = p;
5786 break;
5789 if(found){
5790 if(PASS_HTML(hd->html_data)){
5792 else{
5793 char buf[16], tmp[16], *p;
5794 int wrapstate;
5796 /* Start a new line */
5797 html_blank(hd->html_data, 0);
5799 /* adjust indent level if needed */
5800 if(HD(hd->html_data)->li_pending){
5801 html_indent(hd->html_data, 4, HTML_ID_INC);
5802 HD(hd->html_data)->li_pending = 0;
5805 if(EL(found)->handler == html_ul){
5806 int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5808 strncpy(buf, " ", sizeof(buf));
5809 buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5811 else if(EL(found)->handler == html_ol){
5812 if(found->y == LIST_DECIMAL || found->y == LIST_UNKNOWN)
5813 snprintf(tmp, sizeof(tmp), "%ld", found->x++);
5814 else if(found->y == LIST_ALPHALO)
5815 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'a');
5816 else if(found->y == LIST_ALPHAUP)
5817 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'A');
5818 else if(found->y == LIST_ROMANLO)
5819 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'i');
5820 else if(found->y == LIST_ROMANUP)
5821 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'I');
5822 snprintf(buf, sizeof(buf), " %s.", tmp);
5823 buf[sizeof(buf)-1] = '\0';
5825 else if(EL(found)->handler == html_menu){
5826 strncpy(buf, " ->", sizeof(buf));
5827 buf[sizeof(buf)-1] = '\0';
5830 html_indent(hd->html_data, -4, HTML_ID_INC);
5832 /* So we don't munge whitespace */
5833 wrapstate = HD(hd->html_data)->wrapstate;
5834 HD(hd->html_data)->wrapstate = 0;
5836 html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5837 for(p = buf; *p; p++)
5838 html_output(hd->html_data, (int) *p);
5839 HD(hd->html_data)->wrapstate = wrapstate;
5840 html_indent(hd->html_data, 4, HTML_ID_INC);
5842 /* else BUG: should really bitch about this */
5845 if(PASS_HTML(hd->html_data)){
5846 html_output_raw_tag(hd->html_data, "li");
5847 return(1); /* get linked */
5850 else if(cmd == GF_EOD){
5851 if(PASS_HTML(hd->html_data)){
5852 html_output_string(hd->html_data, "</li>");
5856 return(PASS_HTML(hd->html_data)); /* DON'T get linked */
5861 * HTML <DL> (Definition List) element handler
5864 html_dl(HANDLER_S *hd, int ch, int cmd)
5866 if(cmd == GF_DATA){
5867 html_handoff(hd, ch);
5869 else if(cmd == GF_RESET){
5870 if(PASS_HTML(hd->html_data)){
5871 html_output_raw_tag(hd->html_data, "dl");
5873 else{
5875 * Set indention level for definition terms and definitions...
5877 hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5878 hd->y = hd->x + 2;
5879 hd->z = hd->y + 4;
5882 else if(cmd == GF_EOD){
5883 if(PASS_HTML(hd->html_data)){
5884 html_output_string(hd->html_data, "</dl>");
5886 else{
5887 html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5888 html_blank(hd->html_data, 1);
5892 return(1); /* get linked */
5897 * HTML <DT> (Definition Term) element handler
5900 html_dt(HANDLER_S *hd, int ch, int cmd)
5902 if(PASS_HTML(hd->html_data)){
5903 if(cmd == GF_DATA){
5904 html_handoff(hd, ch);
5906 else if(cmd == GF_RESET){
5907 html_output_raw_tag(hd->html_data, "dt");
5909 else if(cmd == GF_EOD){
5910 html_output_string(hd->html_data, "</dt>");
5913 return(1); /* get linked */
5916 if(cmd == GF_RESET){
5917 HANDLER_S *p;
5920 * There better be a Definition Handler installed
5921 * or else we crap out...
5923 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5926 if(p){ /* adjust indent level if needed */
5927 html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5928 html_blank(hd->html_data, 1);
5930 /* BUG: else should really bitch about this */
5933 return(0); /* DON'T get linked */
5938 * HTML <DD> (Definition Definition) element handler
5941 html_dd(HANDLER_S *hd, int ch, int cmd)
5943 if(PASS_HTML(hd->html_data)){
5944 if(cmd == GF_DATA){
5945 html_handoff(hd, ch);
5947 else if(cmd == GF_RESET){
5948 html_output_raw_tag(hd->html_data, "dd");
5950 else if(cmd == GF_EOD){
5951 html_output_string(hd->html_data, "</dd>");
5954 return(1); /* get linked */
5957 if(cmd == GF_RESET){
5958 HANDLER_S *p;
5961 * There better be a Definition Handler installed
5962 * or else we crap out...
5964 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5967 if(p){ /* adjust indent level if needed */
5968 html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5969 html_blank(hd->html_data, 0);
5971 /* BUG: should really bitch about this */
5974 return(0); /* DON'T get linked */
5979 * HTML <H1> (Headings 1) element handler.
5981 * Bold, very-large font, CENTERED. One or two blank lines
5982 * above and below. For our silly character cell's that
5983 * means centered and ALL CAPS...
5986 html_h1(HANDLER_S *hd, int ch, int cmd)
5988 if(cmd == GF_DATA){
5989 html_handoff(hd, ch);
5991 else if(cmd == GF_RESET){
5992 if(PASS_HTML(hd->html_data)){
5993 html_output_raw_tag(hd->html_data, "h1");
5995 else{
5996 /* turn ON the centered bit */
5997 CENTER_BIT(hd->html_data) = 1;
6000 else if(cmd == GF_EOD){
6001 if(PASS_HTML(hd->html_data)){
6002 html_output_string(hd->html_data, "</h1>");
6004 else{
6005 /* turn OFF the centered bit, add blank line */
6006 CENTER_BIT(hd->html_data) = 0;
6007 html_blank(hd->html_data, 1);
6011 return(1); /* get linked */
6016 * HTML <H2> (Headings 2) element handler
6019 html_h2(HANDLER_S *hd, int ch, int cmd)
6021 if(cmd == GF_DATA){
6022 if(PASS_HTML(hd->html_data)){
6023 html_handoff(hd, ch);
6025 else{
6026 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6027 HTML_ULINE(hd->html_data, 1);
6028 hd->x ^= HTML_HX_ULINE; /* only once! */
6031 html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
6032 ? toupper((unsigned char) ch) : ch);
6035 else if(cmd == GF_RESET){
6036 if(PASS_HTML(hd->html_data)){
6037 html_output_raw_tag(hd->html_data, "h2");
6039 else{
6041 * Bold, large font, flush-left. One or two blank lines
6042 * above and below.
6044 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6045 hd->x = HTML_HX_CENTER;
6046 else
6047 hd->x = 0;
6049 hd->x |= HTML_HX_ULINE;
6051 CENTER_BIT(hd->html_data) = 0;
6052 hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6053 hd->z = HD(hd->html_data)->wrapcol;
6054 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6055 html_blank(hd->html_data, 1);
6058 else if(cmd == GF_EOD){
6059 if(PASS_HTML(hd->html_data)){
6060 html_output_string(hd->html_data, "</h2>");
6062 else{
6064 * restore previous centering, and indent level
6066 if(!(hd->x & HTML_HX_ULINE))
6067 HTML_ULINE(hd->html_data, 0);
6069 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6070 html_blank(hd->html_data, 1);
6071 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6072 HD(hd->html_data)->wrapcol = hd->z;
6076 return(1); /* get linked */
6081 * HTML <H3> (Headings 3) element handler
6084 html_h3(HANDLER_S *hd, int ch, int cmd)
6086 if(cmd == GF_DATA){
6087 if(!PASS_HTML(hd->html_data)){
6088 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6089 HTML_ULINE(hd->html_data, 1);
6090 hd->x ^= HTML_HX_ULINE; /* only once! */
6094 html_handoff(hd, ch);
6096 else if(cmd == GF_RESET){
6097 if(PASS_HTML(hd->html_data)){
6098 html_output_raw_tag(hd->html_data, "h3");
6100 else{
6102 * Italic, large font, slightly indented from the left
6103 * margin. One or two blank lines above and below.
6105 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6106 hd->x = HTML_HX_CENTER;
6107 else
6108 hd->x = 0;
6110 hd->x |= HTML_HX_ULINE;
6111 CENTER_BIT(hd->html_data) = 0;
6112 hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6113 hd->z = HD(hd->html_data)->wrapcol;
6114 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6115 html_blank(hd->html_data, 1);
6118 else if(cmd == GF_EOD){
6119 if(PASS_HTML(hd->html_data)){
6120 html_output_string(hd->html_data, "</h3>");
6122 else{
6124 * restore previous centering, and indent level
6126 if(!(hd->x & HTML_HX_ULINE))
6127 HTML_ULINE(hd->html_data, 0);
6129 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6130 html_blank(hd->html_data, 1);
6131 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6132 HD(hd->html_data)->wrapcol = hd->z;
6136 return(1); /* get linked */
6141 * HTML <H4> (Headings 4) element handler
6144 html_h4(HANDLER_S *hd, int ch, int cmd)
6146 if(cmd == GF_DATA){
6147 html_handoff(hd, ch);
6149 else if(cmd == GF_RESET){
6150 if(PASS_HTML(hd->html_data)){
6151 html_output_raw_tag(hd->html_data, "h4");
6153 else{
6155 * Bold, normal font, indented more than H3. One blank line
6156 * above and below.
6158 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6159 CENTER_BIT(hd->html_data) = 0;
6160 hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6161 hd->z = HD(hd->html_data)->wrapcol;
6162 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6163 html_blank(hd->html_data, 1);
6166 else if(cmd == GF_EOD){
6167 if(PASS_HTML(hd->html_data)){
6168 html_output_string(hd->html_data, "</h4>");
6170 else{
6172 * restore previous centering, and indent level
6174 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6175 html_blank(hd->html_data, 1);
6176 CENTER_BIT(hd->html_data) = hd->x;
6177 HD(hd->html_data)->wrapcol = hd->z;
6181 return(1); /* get linked */
6186 * HTML <H5> (Headings 5) element handler
6189 html_h5(HANDLER_S *hd, int ch, int cmd)
6191 if(cmd == GF_DATA){
6192 html_handoff(hd, ch);
6194 else if(cmd == GF_RESET){
6195 if(PASS_HTML(hd->html_data)){
6196 html_output_raw_tag(hd->html_data, "h5");
6198 else{
6200 * Italic, normal font, indented as H4. One blank line
6201 * above.
6203 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6204 CENTER_BIT(hd->html_data) = 0;
6205 hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6206 hd->z = HD(hd->html_data)->wrapcol;
6207 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6208 html_blank(hd->html_data, 1);
6211 else if(cmd == GF_EOD){
6212 if(PASS_HTML(hd->html_data)){
6213 html_output_string(hd->html_data, "</h5>");
6215 else{
6217 * restore previous centering, and indent level
6219 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6220 html_blank(hd->html_data, 1);
6221 CENTER_BIT(hd->html_data) = hd->x;
6222 HD(hd->html_data)->wrapcol = hd->z;
6226 return(1); /* get linked */
6231 * HTML <H6> (Headings 6) element handler
6234 html_h6(HANDLER_S *hd, int ch, int cmd)
6236 if(cmd == GF_DATA){
6237 html_handoff(hd, ch);
6239 else if(cmd == GF_RESET){
6240 if(PASS_HTML(hd->html_data)){
6241 html_output_raw_tag(hd->html_data, "h6");
6243 else{
6245 * Bold, indented same as normal text, more than H5. One
6246 * blank line above.
6248 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6249 CENTER_BIT(hd->html_data) = 0;
6250 hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6251 hd->z = HD(hd->html_data)->wrapcol;
6252 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6253 html_blank(hd->html_data, 1);
6256 else if(cmd == GF_EOD){
6257 if(PASS_HTML(hd->html_data)){
6258 html_output_string(hd->html_data, "</h6>");
6260 else{
6262 * restore previous centering, and indent level
6264 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6265 html_blank(hd->html_data, 1);
6266 CENTER_BIT(hd->html_data) = hd->x;
6267 HD(hd->html_data)->wrapcol = hd->z;
6271 return(1); /* get linked */
6276 * HTML <BlockQuote> element handler
6279 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6281 int j;
6282 #define HTML_BQ_INDENT 6
6284 if(cmd == GF_DATA){
6285 html_handoff(hd, ch);
6287 else if(cmd == GF_RESET){
6288 if(PASS_HTML(hd->html_data)){
6289 html_output_raw_tag(hd->html_data, "blockquote");
6291 else{
6293 * A typical rendering might be a slight extra left and
6294 * right indent, and/or italic font. The Blockquote element
6295 * causes a paragraph break, and typically provides space
6296 * above and below the quote.
6298 html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6299 j = HD(hd->html_data)->wrapstate;
6300 HD(hd->html_data)->wrapstate = 0;
6301 html_blank(hd->html_data, 1);
6302 HD(hd->html_data)->wrapstate = j;
6303 HD(hd->html_data)->wrapcol -= HTML_BQ_INDENT;
6306 else if(cmd == GF_EOD){
6307 if(PASS_HTML(hd->html_data)){
6308 html_output_string(hd->html_data, "</blockquote>");
6310 else{
6311 html_blank(hd->html_data, 1);
6313 j = HD(hd->html_data)->wrapstate;
6314 HD(hd->html_data)->wrapstate = 0;
6315 html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6316 HD(hd->html_data)->wrapstate = j;
6317 HD(hd->html_data)->wrapcol += HTML_BQ_INDENT;
6321 return(1); /* get linked */
6326 * HTML <Address> element handler
6329 html_address(HANDLER_S *hd, int ch, int cmd)
6331 int j;
6332 #define HTML_ADD_INDENT 2
6334 if(cmd == GF_DATA){
6335 html_handoff(hd, ch);
6337 else if(cmd == GF_RESET){
6338 if(PASS_HTML(hd->html_data)){
6339 html_output_raw_tag(hd->html_data, "address");
6341 else{
6343 * A typical rendering might be a slight extra left and
6344 * right indent, and/or italic font. The Blockquote element
6345 * causes a paragraph break, and typically provides space
6346 * above and below the quote.
6348 html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6349 j = HD(hd->html_data)->wrapstate;
6350 HD(hd->html_data)->wrapstate = 0;
6351 html_blank(hd->html_data, 1);
6352 HD(hd->html_data)->wrapstate = j;
6355 else if(cmd == GF_EOD){
6356 if(PASS_HTML(hd->html_data)){
6357 html_output_string(hd->html_data, "</address>");
6359 else{
6360 html_blank(hd->html_data, 1);
6362 j = HD(hd->html_data)->wrapstate;
6363 HD(hd->html_data)->wrapstate = 0;
6364 html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6365 HD(hd->html_data)->wrapstate = j;
6369 return(1); /* get linked */
6374 * HTML <PRE> (Preformatted Text) element handler
6377 html_pre(HANDLER_S *hd, int ch, int cmd)
6379 if(cmd == GF_DATA){
6381 * remove CRLF after '>' in element.
6382 * We see CRLF because wrapstate is off.
6384 switch(hd->y){
6385 case 2 :
6386 if(ch == '\012'){
6387 hd->y = 3;
6388 return(1);
6390 else
6391 html_handoff(hd, '\015');
6393 break;
6395 case 1 :
6396 if(ch == '\015'){
6397 hd->y = 2;
6398 return(1);
6401 case 3 :
6402 /* passing tags? replace CRLF with <BR> to make
6403 * sure hard newline survives in the end...
6405 if(PASS_HTML(hd->html_data))
6406 hd->y = 4; /* keep looking for CRLF */
6407 else
6408 hd->y = 0; /* stop looking */
6410 break;
6412 case 4 :
6413 if(ch == '\015'){
6414 hd->y = 5;
6415 return(1);
6418 break;
6420 case 5 :
6421 hd->y = 4;
6422 if(ch == '\012'){
6423 html_output_string(hd->html_data, "<br />");
6424 return(1);
6426 else
6427 html_handoff(hd, '\015'); /* not CRLF, pass raw CR */
6429 break;
6431 default : /* zero case */
6432 break;
6435 html_handoff(hd, ch);
6437 else if(cmd == GF_RESET){
6438 hd->y = 1;
6439 if(PASS_HTML(hd->html_data)){
6440 html_output_raw_tag(hd->html_data, "pre");
6442 else{
6443 if(hd->html_data)
6444 hd->html_data->f1 = DFL; \
6446 html_blank(hd->html_data, 1);
6447 hd->x = HD(hd->html_data)->wrapstate;
6448 HD(hd->html_data)->wrapstate = 0;
6451 else if(cmd == GF_EOD){
6452 if(PASS_HTML(hd->html_data)){
6453 html_output_string(hd->html_data, "</pre>");
6455 else{
6456 HD(hd->html_data)->wrapstate = (hd->x != 0);
6457 html_blank(hd->html_data, 0);
6461 return(1);
6466 * HTML <CENTER> (Centerd Text) element handler
6469 html_center(HANDLER_S *hd, int ch, int cmd)
6471 if(cmd == GF_DATA){
6472 html_handoff(hd, ch);
6474 else if(cmd == GF_RESET){
6475 if(PASS_HTML(hd->html_data)){
6476 html_output_raw_tag(hd->html_data, "center");
6478 else{
6479 /* turn ON the centered bit */
6480 CENTER_BIT(hd->html_data) = 1;
6483 else if(cmd == GF_EOD){
6484 if(PASS_HTML(hd->html_data)){
6485 html_output_string(hd->html_data, "</center>");
6487 else{
6488 /* turn OFF the centered bit */
6489 CENTER_BIT(hd->html_data) = 0;
6493 return(1);
6498 * HTML <DIV> (Document Divisions) element handler
6501 html_div(HANDLER_S *hd, int ch, int cmd)
6503 if(cmd == GF_DATA){
6504 html_handoff(hd, ch);
6506 else if(cmd == GF_RESET){
6507 if(PASS_HTML(hd->html_data)){
6508 html_output_raw_tag(hd->html_data, "div");
6510 else{
6511 PARAMETER *p;
6513 for(p = HD(hd->html_data)->el_data->attribs;
6514 p && p->attribute;
6515 p = p->next)
6516 if(!strucmp(p->attribute, "ALIGN")){
6517 if(p->value){
6518 /* remember previous values */
6519 hd->x = CENTER_BIT(hd->html_data);
6520 hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6522 html_blank(hd->html_data, 0);
6523 CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6524 html_indent(hd->html_data, 0, HTML_ID_SET);
6525 /* NOTE: "RIGHT" not supported yet */
6530 else if(cmd == GF_EOD){
6531 if(PASS_HTML(hd->html_data)){
6532 html_output_string(hd->html_data, "</div>");
6534 else{
6535 /* restore centered bit and indentiousness */
6536 CENTER_BIT(hd->html_data) = hd->y;
6537 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6538 html_blank(hd->html_data, 0);
6542 return(1);
6547 * HTML <SPAN> (Text Span) element handler
6550 html_span(HANDLER_S *hd, int ch, int cmd)
6552 if(PASS_HTML(hd->html_data)){
6553 if(cmd == GF_DATA){
6554 html_handoff(hd, ch);
6556 else if(cmd == GF_RESET){
6557 html_output_raw_tag(hd->html_data, "span");
6559 else if(cmd == GF_EOD){
6560 html_output_string(hd->html_data, "</span>");
6563 return(1);
6566 return(0);
6571 * HTML <KBD> (Text Kbd) element handler
6574 html_kbd(HANDLER_S *hd, int ch, int cmd)
6576 if(PASS_HTML(hd->html_data)){
6577 if(cmd == GF_DATA){
6578 html_handoff(hd, ch);
6580 else if(cmd == GF_RESET){
6581 html_output_raw_tag(hd->html_data, "kbd");
6583 else if(cmd == GF_EOD){
6584 html_output_string(hd->html_data, "</kbd>");
6587 return(1);
6590 return(0);
6595 * HTML <DFN> (Text Definition) element handler
6598 html_dfn(HANDLER_S *hd, int ch, int cmd)
6600 if(PASS_HTML(hd->html_data)){
6601 if(cmd == GF_DATA){
6602 html_handoff(hd, ch);
6604 else if(cmd == GF_RESET){
6605 html_output_raw_tag(hd->html_data, "dfn");
6607 else if(cmd == GF_EOD){
6608 html_output_string(hd->html_data, "</dfn>");
6611 return(1);
6614 return(0);
6619 * HTML <TT> (Text Tt) element handler
6622 html_tt(HANDLER_S *hd, int ch, int cmd)
6624 if(PASS_HTML(hd->html_data)){
6625 if(cmd == GF_DATA){
6626 html_handoff(hd, ch);
6628 else if(cmd == GF_RESET){
6629 html_output_raw_tag(hd->html_data, "tt");
6631 else if(cmd == GF_EOD){
6632 html_output_string(hd->html_data, "</tt>");
6635 return(1);
6638 return(0);
6643 * HTML <VAR> (Text Var) element handler
6646 html_var(HANDLER_S *hd, int ch, int cmd)
6648 if(PASS_HTML(hd->html_data)){
6649 if(cmd == GF_DATA){
6650 html_handoff(hd, ch);
6652 else if(cmd == GF_RESET){
6653 html_output_raw_tag(hd->html_data, "var");
6655 else if(cmd == GF_EOD){
6656 html_output_string(hd->html_data, "</var>");
6659 return(1);
6662 return(0);
6667 * HTML <SAMP> (Text Samp) element handler
6670 html_samp(HANDLER_S *hd, int ch, int cmd)
6672 if(PASS_HTML(hd->html_data)){
6673 if(cmd == GF_DATA){
6674 html_handoff(hd, ch);
6676 else if(cmd == GF_RESET){
6677 html_output_raw_tag(hd->html_data, "samp");
6679 else if(cmd == GF_EOD){
6680 html_output_string(hd->html_data, "</samp>");
6683 return(1);
6686 return(0);
6691 * HTML <SUP> (Text Superscript) element handler
6694 html_sup(HANDLER_S *hd, int ch, int cmd)
6696 if(PASS_HTML(hd->html_data)){
6697 if(cmd == GF_DATA){
6698 html_handoff(hd, ch);
6700 else if(cmd == GF_RESET){
6701 html_output_raw_tag(hd->html_data, "sup");
6703 else if(cmd == GF_EOD){
6704 html_output_string(hd->html_data, "</sup>");
6707 return(1);
6710 return(0);
6715 * HTML <SUB> (Text Subscript) element handler
6718 html_sub(HANDLER_S *hd, int ch, int cmd)
6720 if(PASS_HTML(hd->html_data)){
6721 if(cmd == GF_DATA){
6722 html_handoff(hd, ch);
6724 else if(cmd == GF_RESET){
6725 html_output_raw_tag(hd->html_data, "sub");
6727 else if(cmd == GF_EOD){
6728 html_output_string(hd->html_data, "</sub>");
6731 return(1);
6734 return(0);
6739 * HTML <CITE> (Text Citation) element handler
6742 html_cite(HANDLER_S *hd, int ch, int cmd)
6744 if(PASS_HTML(hd->html_data)){
6745 if(cmd == GF_DATA){
6746 html_handoff(hd, ch);
6748 else if(cmd == GF_RESET){
6749 html_output_raw_tag(hd->html_data, "cite");
6751 else if(cmd == GF_EOD){
6752 html_output_string(hd->html_data, "</cite>");
6755 return(1);
6758 return(0);
6763 * HTML <CODE> (Text Code) element handler
6766 html_code(HANDLER_S *hd, int ch, int cmd)
6768 if(PASS_HTML(hd->html_data)){
6769 if(cmd == GF_DATA){
6770 html_handoff(hd, ch);
6772 else if(cmd == GF_RESET){
6773 html_output_raw_tag(hd->html_data, "code");
6775 else if(cmd == GF_EOD){
6776 html_output_string(hd->html_data, "</code>");
6779 return(1);
6782 return(0);
6787 * HTML <INS> (Text Inserted) element handler
6790 html_ins(HANDLER_S *hd, int ch, int cmd)
6792 if(PASS_HTML(hd->html_data)){
6793 if(cmd == GF_DATA){
6794 html_handoff(hd, ch);
6796 else if(cmd == GF_RESET){
6797 html_output_raw_tag(hd->html_data, "ins");
6799 else if(cmd == GF_EOD){
6800 html_output_string(hd->html_data, "</ins>");
6803 return(1);
6806 return(0);
6811 * HTML <DEL> (Text Deleted) element handler
6814 html_del(HANDLER_S *hd, int ch, int cmd)
6816 if(PASS_HTML(hd->html_data)){
6817 if(cmd == GF_DATA){
6818 html_handoff(hd, ch);
6820 else if(cmd == GF_RESET){
6821 html_output_raw_tag(hd->html_data, "del");
6823 else if(cmd == GF_EOD){
6824 html_output_string(hd->html_data, "</del>");
6827 return(1);
6830 return(0);
6835 * HTML <ABBR> (Text Abbreviation) element handler
6838 html_abbr(HANDLER_S *hd, int ch, int cmd)
6840 if(PASS_HTML(hd->html_data)){
6841 if(cmd == GF_DATA){
6842 html_handoff(hd, ch);
6844 else if(cmd == GF_RESET){
6845 html_output_raw_tag(hd->html_data, "abbr");
6847 else if(cmd == GF_EOD){
6848 html_output_string(hd->html_data, "</abbr>");
6851 return(1);
6854 return(0);
6859 * HTML <SCRIPT> element handler
6862 html_script(HANDLER_S *hd, int ch, int cmd)
6864 /* Link in and drop everything within on the floor */
6865 return(1);
6870 * HTML <APPLET> element handler
6873 html_applet(HANDLER_S *hd, int ch, int cmd)
6875 /* Link in and drop everything within on the floor */
6876 return(1);
6881 * HTML <STYLE> CSS element handler
6884 html_style(HANDLER_S *hd, int ch, int cmd)
6886 static STORE_S *css_stuff ;
6888 if(PASS_HTML(hd->html_data)){
6889 if(cmd == GF_DATA){
6890 /* collect style settings */
6891 so_writec(ch, css_stuff);
6893 else if(cmd == GF_RESET){
6894 if(css_stuff)
6895 so_give(&css_stuff);
6897 css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6899 else if(cmd == GF_EOD){
6901 * TODO: strip anything mischievous and pass on
6904 so_give(&css_stuff);
6908 return(1);
6912 * RSS 2.0 <RSS> version
6915 rss_rss(HANDLER_S *hd, int ch, int cmd)
6917 if(cmd == GF_RESET){
6918 PARAMETER *p;
6920 for(p = HD(hd->html_data)->el_data->attribs;
6921 p && p->attribute;
6922 p = p->next)
6923 if(!strucmp(p->attribute, "VERSION")){
6924 if(p->value && !strucmp(p->value,"2.0"))
6925 return(0); /* do not link in */
6928 gf_error("Incompatible RSS version");
6929 /* NO RETURN */
6932 return(0); /* not linked or error means we never get here */
6936 * RSS 2.0 <CHANNEL>
6939 rss_channel(HANDLER_S *hd, int ch, int cmd)
6941 if(cmd == GF_DATA){
6942 html_handoff(hd, ch);
6944 else if(cmd == GF_RESET){
6945 RSS_FEED_S *feed;
6947 feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6948 memset(feed, 0, sizeof(RSS_FEED_S));
6951 return(1); /* link in */
6955 * RSS 2.0 <TITLE>
6958 rss_title(HANDLER_S *hd, int ch, int cmd)
6960 static STORE_S *title_so;
6962 if(cmd == GF_DATA){
6963 /* collect data */
6964 if(title_so){
6965 so_writec(ch, title_so);
6968 else if(cmd == GF_RESET){
6969 if(RSS_FEED(hd->html_data)){
6970 /* prepare for data */
6971 if(title_so)
6972 so_give(&title_so);
6974 title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6977 else if(cmd == GF_EOD){
6978 if(title_so){
6979 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6980 RSS_ITEM_S *rip;
6982 if(feed){
6983 if((rip = feed->items) != NULL){
6984 for(; rip->next; rip = rip->next)
6987 if(rip->title)
6988 fs_give((void **) &rip->title);
6990 rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6992 else{
6993 if(feed->title)
6994 fs_give((void **) &feed->title);
6996 feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
7000 so_give(&title_so);
7004 return(1); /* link in */
7008 * RSS 2.0 <IMAGE>
7011 rss_image(HANDLER_S *hd, int ch, int cmd)
7013 static STORE_S *img_so;
7015 if(cmd == GF_DATA){
7016 /* collect data */
7017 if(img_so){
7018 so_writec(ch, img_so);
7021 else if(cmd == GF_RESET){
7022 if(RSS_FEED(hd->html_data)){
7023 /* prepare to collect data */
7024 if(img_so)
7025 so_give(&img_so);
7027 img_so = so_get(CharStar, NULL, EDIT_ACCESS);
7030 else if(cmd == GF_EOD){
7031 if(img_so){
7032 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7034 if(feed){
7035 if(feed->image)
7036 fs_give((void **) &feed->image);
7038 feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
7041 so_give(&img_so);
7045 return(1); /* link in */
7049 * RSS 2.0 <LINK>
7052 rss_link(HANDLER_S *hd, int ch, int cmd)
7054 static STORE_S *link_so;
7056 if(cmd == GF_DATA){
7057 /* collect data */
7058 if(link_so){
7059 so_writec(ch, link_so);
7062 else if(cmd == GF_RESET){
7063 if(RSS_FEED(hd->html_data)){
7064 /* prepare to collect data */
7065 if(link_so)
7066 so_give(&link_so);
7068 link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7071 else if(cmd == GF_EOD){
7072 if(link_so){
7073 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7074 RSS_ITEM_S *rip;
7076 if(feed){
7077 if((rip = feed->items) != NULL){
7078 for(; rip->next; rip = rip->next)
7081 if(rip->link)
7082 fs_give((void **) &rip->link);
7084 rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7086 else{
7087 if(feed->link)
7088 fs_give((void **) &feed->link);
7090 feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7094 so_give(&link_so);
7098 return(1); /* link in */
7102 * RSS 2.0 <DESCRIPTION>
7105 rss_description(HANDLER_S *hd, int ch, int cmd)
7107 static STORE_S *desc_so;
7109 if(cmd == GF_DATA){
7110 /* collect data */
7111 if(desc_so){
7112 so_writec(ch, desc_so);
7115 else if(cmd == GF_RESET){
7116 if(RSS_FEED(hd->html_data)){
7117 /* prepare to collect data */
7118 if(desc_so)
7119 so_give(&desc_so);
7121 desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7124 else if(cmd == GF_EOD){
7125 if(desc_so){
7126 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7127 RSS_ITEM_S *rip;
7129 if(feed){
7130 if((rip = feed->items) != NULL){
7131 for(; rip->next; rip = rip->next)
7134 if(rip->description)
7135 fs_give((void **) &rip->description);
7137 rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7139 else{
7140 if(feed->description)
7141 fs_give((void **) &feed->description);
7143 feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7147 so_give(&desc_so);
7151 return(1); /* link in */
7155 * RSS 2.0 <TTL> (in minutes)
7158 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7160 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7162 if(cmd == GF_DATA){
7163 if(isdigit((unsigned char) ch))
7164 feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7166 else if(cmd == GF_RESET){
7167 /* prepare to collect data */
7168 feed->ttl = 0;
7170 else if(cmd == GF_EOD){
7173 return(1); /* link in */
7177 * RSS 2.0 <ITEM>
7180 rss_item(HANDLER_S *hd, int ch, int cmd)
7182 /* BUG: verify no ITEM nesting? */
7183 if(cmd == GF_RESET){
7184 RSS_FEED_S *feed;
7186 if((feed = RSS_FEED(hd->html_data)) != NULL){
7187 RSS_ITEM_S **rip;
7188 int n = 0;
7190 for(rip = &feed->items; *rip; rip = &(*rip)->next)
7191 if(++n > RSS_ITEM_LIMIT)
7192 return(0);
7194 *rip = fs_get(sizeof(RSS_ITEM_S));
7195 memset(*rip, 0, sizeof(RSS_ITEM_S));
7199 return(0); /* don't link in */
7203 char *
7204 rss_skip_whitespace(char *s)
7206 for(; *s && isspace((unsigned char) *s); s++)
7209 return(s);
7214 * return the function associated with the given element name
7216 ELPROP_S *
7217 element_properties(FILTER_S *fd, char *el_name)
7219 register ELPROP_S *el_table = ELEMENTS(fd);
7220 size_t len_name = strlen(el_name);
7222 for(; el_table->element; el_table++)
7223 if(!strucmp(el_name, el_table->element)
7224 || (el_table->alternate
7225 && len_name == el_table->len + 1
7226 && el_name[el_table->len] == '/'
7227 && !struncmp(el_name, el_table->element, el_table->len)))
7228 return(el_table);
7230 return(NULL);
7235 * collect element's name and any attribute/value pairs then
7236 * dispatch to the appropriate handler.
7238 * Returns 1 : got what we wanted
7239 * 0 : we need more data
7240 * -1 : bogus input
7243 html_element_collector(FILTER_S *fd, int ch)
7245 if(ch == '>'){
7246 if(ED(fd)->overrun){
7248 * If problem processing, don't bother doing anything
7249 * internally, just return such that none of what we've
7250 * digested is displayed.
7252 HTML_DEBUG_EL("too long", ED(fd));
7253 return(1); /* Let it go, Jim */
7255 else if(ED(fd)->mkup_decl){
7256 if(ED(fd)->badform){
7257 dprint((2, "-- html error: bad form: %.*s\n",
7258 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7260 * Invalid comment -- make some guesses as
7261 * to whether we should stop with this greater-than...
7263 if(ED(fd)->buf[0] != '-'
7264 || ED(fd)->len < 4
7265 || (ED(fd)->buf[1] == '-'
7266 && ED(fd)->buf[ED(fd)->len - 1] == '-'
7267 && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7268 return(1);
7270 else{
7271 dprint((5, "-- html: OK: %.*s\n",
7272 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7273 if(ED(fd)->start_comment == ED(fd)->end_comment){
7274 if(ED(fd)->len > 10){
7275 ED(fd)->buf[ED(fd)->len - 2] = '\0';
7276 html_element_comment(fd, ED(fd)->buf + 2);
7279 return(1);
7281 /* else keep collecting comment below */
7284 else if(ED(fd)->proc_inst){
7285 return(1); /* return without display... */
7287 else if(!ED(fd)->quoted || ED(fd)->badform){
7288 ELPROP_S *ep;
7291 * We either have the whole thing or all that we could
7292 * salvage from it. Try our best...
7295 if(HD(fd)->bitbucket)
7296 return(1); /* element inside chtml clause! */
7298 if(!ED(fd)->badform && html_element_flush(ED(fd)))
7299 return(1); /* return without display... */
7302 * If we ran into an empty tag or we don't know how to deal
7303 * with it, just go on, ignoring it...
7305 if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7306 if(ep->handler){
7307 /* dispatch the element's handler */
7308 HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7309 if(ED(fd)->end_tag){
7310 html_pop(fd, ep); /* remove it's handler */
7312 else{
7313 /* if a block element, pop any open <p>'s */
7314 if(ep->blocklevel){
7315 HANDLER_S *tp;
7317 for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7318 HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7319 html_pop(fd, EL(tp));
7320 break;
7324 /* enforce table nesting */
7325 if(!strucmp(ep->element, "tr")){
7326 if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7327 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7328 if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7329 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7330 html_pop(fd, EL(HANDLERS(fd)));
7332 else{
7333 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7334 html_push(fd, element_properties(fd, "table"));
7338 else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7339 if(!HANDLERS(fd)){
7340 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7341 html_push(fd, element_properties(fd, "table"));
7342 html_push(fd, element_properties(fd, "tr"));
7344 else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7345 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7346 html_push(fd, element_properties(fd, "tr"));
7348 else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7349 dprint((2, "-- html error: bad nesting popping <TD>"));
7350 html_pop(fd, EL(HANDLERS(fd)));
7354 /* add it's handler */
7355 if(html_push(fd, ep)){
7356 if(ED(fd)->empty){
7357 /* remove empty element */
7358 html_pop(fd, ep);
7363 else {
7364 HTML_DEBUG_EL("IGNORED", ED(fd));
7367 else{ /* else, empty or unrecognized */
7368 HTML_DEBUG_EL("?", ED(fd));
7371 return(1); /* all done! see, that didn't hurt */
7374 else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7375 ED(fd)->empty = 1;
7377 else
7378 ED(fd)->empty = 0;
7380 if(ED(fd)->mkup_decl){
7381 if((ch &= 0xff) == '-'){
7382 if(ED(fd)->hyphen){
7383 ED(fd)->hyphen = 0;
7384 if(ED(fd)->start_comment)
7385 ED(fd)->end_comment = 1;
7386 else
7387 ED(fd)->start_comment = 1;
7389 else
7390 ED(fd)->hyphen = 1;
7392 else{
7393 if(ED(fd)->end_comment)
7394 ED(fd)->start_comment = ED(fd)->end_comment = 0;
7397 * no "--" after ! or non-whitespace between comments - bad
7399 if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7400 && !ASCII_ISSPACE((unsigned char) ch)))
7401 ED(fd)->badform = 1; /* non-comment! */
7403 ED(fd)->hyphen = 0;
7407 * Remember the comment for possible later processing, if
7408 * it get's too long, remember first and last few chars
7409 * so we know when to terminate (and throw some garbage
7410 * in between when we toss out what's between.
7412 if(ED(fd)->len == HTML_BUF_LEN){
7413 ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7414 ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7415 ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7416 ED(fd)->len = 6;
7419 ED(fd)->buf[(ED(fd)->len)++] = ch;
7420 return(0); /* comments go in the bit bucket */
7422 else if(ED(fd)->overrun || ED(fd)->badform){
7423 return(0); /* swallow char's until next '>' */
7425 else if(!ED(fd)->element && !ED(fd)->len){
7426 if(ch == '/'){ /* validate leading chars */
7427 ED(fd)->end_tag = 1;
7428 return(0);
7430 else if(ch == '!'){
7431 ED(fd)->mkup_decl = 1;
7432 return(0);
7434 else if(ch == '?'){
7435 ED(fd)->proc_inst = 1;
7436 return(0);
7438 else if(!isalpha((unsigned char) ch))
7439 return(-1); /* can't be a tag! */
7441 else if(ch == '\"' || ch == '\''){
7442 if(!ED(fd)->hit_equal){
7443 ED(fd)->badform = 1; /* quote in element name?!? */
7444 return(0);
7447 if(ED(fd)->quoted){
7448 if(ED(fd)->quoted == (char) ch){
7449 /* end of a quoted value */
7450 ED(fd)->quoted = 0;
7451 if(ED(fd)->len && html_element_flush(ED(fd)))
7452 ED(fd)->badform = 1;
7454 return(0); /* continue collecting chars */
7456 /* ELSE fall thru writing other quoting char */
7458 else{
7459 ED(fd)->quoted = (char) ch;
7460 ED(fd)->was_quoted = 1;
7461 return(0); /* need more data */
7465 ch &= 0xff; /* strip any "literal" high bits */
7466 if(ED(fd)->quoted
7467 || isalnum(ch)
7468 || strchr("#-.!", ch)){
7469 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7470 ? HTML_BUF_LEN:MAX_ELEMENT)){
7471 ED(fd)->buf[(ED(fd)->len)++] = ch;
7473 else
7474 ED(fd)->overrun = 1; /* flag it broken */
7476 else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7477 if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7478 ED(fd)->badform = 1;
7479 return(0); /* else, we ain't done yet */
7482 if(!ED(fd)->hit_equal)
7483 ED(fd)->hit_equal = (ch == '=');
7485 else if(ch == '/' && ED(fd)->len && !ED(fd)->element){
7486 ELPROP_S *ep;
7487 ep = element_properties(fd, ED(fd)->buf);
7488 if(ep){
7489 if(!ep->alternate)
7490 ED(fd)->badform = 1;
7491 else{
7492 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7493 ? HTML_BUF_LEN:MAX_ELEMENT)){
7494 ED(fd)->buf[(ED(fd)->len)++] = ch; /* add this exception */
7496 else
7497 ED(fd)->overrun = 1;
7500 else
7501 ED(fd)->badform = 1;
7503 else
7504 ED(fd)->badform = 1; /* unrecognized data?? */
7506 return(0); /* keep collecting */
7511 * Element collector found complete string, integrate it and reset
7512 * internal collection buffer.
7514 * Returns zero if element collection buffer flushed, error flag otherwise
7517 html_element_flush(CLCTR_S *el_data)
7519 int rv = 0;
7521 if(el_data->hit_equal){ /* adding a value */
7522 el_data->hit_equal = 0;
7523 if(el_data->cur_attrib){
7524 if(!el_data->cur_attrib->value){
7525 el_data->cur_attrib->value = cpystr(el_data->len
7526 ? el_data->buf : "");
7528 else{
7529 dprint((2, "** element: unexpected value: %.10s...\n",
7530 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7531 rv = 1;
7534 else{
7535 dprint((2, "** element: missing attribute name: %.10s...\n",
7536 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7537 rv = 2;
7540 else if(el_data->len){
7541 if(!el_data->element){
7542 el_data->element = cpystr(el_data->buf);
7544 else{
7545 PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7546 memset(p, 0, sizeof(PARAMETER));
7547 if(el_data->attribs){
7548 el_data->cur_attrib->next = p;
7549 el_data->cur_attrib = p;
7551 else
7552 el_data->attribs = el_data->cur_attrib = p;
7554 p->attribute = cpystr(el_data->buf);
7559 el_data->was_quoted = 0; /* reset collector buf and state */
7560 el_data->len = 0;
7561 memset(el_data->buf, 0, HTML_BUF_LEN);
7562 return(rv); /* report whatever happened above */
7567 * html_element_comment - "Special" comment handling here
7569 void
7570 html_element_comment(FILTER_S *f, char *s)
7572 char *p;
7574 while(*s && ASCII_ISSPACE((unsigned char) *s))
7575 s++;
7578 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7580 if(!struncmp(s, "chtml ", 6)){
7581 s += 6;
7582 if(!struncmp(s, "if ", 3)){
7583 HD(f)->bitbucket = 1; /* default is failure! */
7584 switch(*(s += 3)){
7585 case 'P' :
7586 case 'p' :
7587 if(!struncmp(s + 1, "inemode=", 8)){
7588 if(!strucmp(s = removing_quotes(s + 9), "function_key")
7589 && F_ON(F_USE_FK, ps_global))
7590 HD(f)->bitbucket = 0;
7591 else if(!strucmp(s, "running"))
7592 HD(f)->bitbucket = 0;
7593 else if(!strucmp(s, "PHONE_HOME") && ps_global->phone_home)
7594 HD(f)->bitbucket = 0;
7595 #ifdef _WINDOWS
7596 else if(!strucmp(s, "os_windows"))
7597 HD(f)->bitbucket = 0;
7598 #endif
7601 break;
7603 case '[' : /* test */
7604 if((p = strindex(++s, ']')) != NULL){
7605 *p = '\0'; /* tie off test string */
7606 removing_leading_white_space(s);
7607 removing_trailing_white_space(s);
7608 if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7609 for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7613 HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7614 READ_ACCESS) != 0);
7618 break;
7620 default :
7621 break;
7624 else if(!strucmp(s, "else")){
7625 HD(f)->bitbucket = !HD(f)->bitbucket;
7627 else if(!strucmp(s, "endif")){
7628 /* Clean up after chtml here */
7629 HD(f)->bitbucket = 0;
7632 else if(!HD(f)->bitbucket){
7633 if(!struncmp(s, "#include ", 9)){
7634 char buf[MAILTMPLEN], *bufp;
7635 int len, end_of_line;
7636 FILE *fp;
7638 /* Include the named file */
7639 if(!struncmp(s += 9, "file=", 5)
7640 && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7641 html_element_output(f, HTML_NEWLINE);
7643 while(fgets(buf, sizeof(buf), fp)){
7644 if((len = strlen(buf)) && buf[len-1] == '\n'){
7645 end_of_line = 1;
7646 buf[--len] = '\0';
7648 else
7649 end_of_line = 0;
7651 for(bufp = buf; len; bufp++, len--)
7652 html_element_output(f, (int) *bufp);
7654 if(end_of_line)
7655 html_element_output(f, HTML_NEWLINE);
7658 fclose(fp);
7659 html_element_output(f, HTML_NEWLINE);
7660 HD(f)->blanks = 0;
7661 if(f->f1 == WSPACE)
7662 f->f1 = DFL;
7665 else if(!struncmp(s, "#echo ", 6)){
7666 if(!struncmp(s += 6, "var=", 4)){
7667 char *p, buf[MAILTMPLEN];
7668 ADDRESS *adr;
7669 extern char datestamp[];
7671 if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7672 p = ALPINE_VERSION;
7674 else if(!strcmp(s, "ALPINE_REVISION")){
7675 p = get_alpine_revision_string(buf, sizeof(buf));
7677 else if(!strcmp(s, "C_CLIENT_VERSION")){
7678 p = CCLIENTVERSION;
7680 else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7681 p = datestamp;
7683 else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7684 rfc822_date(p = buf);
7686 else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7687 p = (ps_global->VAR_LOCAL_FULLNAME
7688 && ps_global->VAR_LOCAL_FULLNAME[0])
7689 ? ps_global->VAR_LOCAL_FULLNAME
7690 : "Local Support";
7692 else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7693 p = (ps_global->VAR_LOCAL_ADDRESS
7694 && ps_global->VAR_LOCAL_ADDRESS[0])
7695 ? ps_global->VAR_LOCAL_ADDRESS
7696 : "postmaster";
7697 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7698 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7699 mail_free_address(&adr);
7701 else if(!strcmp(s, "_BUGS_FULLNAME_")){
7702 p = (ps_global->VAR_BUGS_FULLNAME
7703 && ps_global->VAR_BUGS_FULLNAME[0])
7704 ? ps_global->VAR_BUGS_FULLNAME
7705 : "Place to report Alpine Bugs";
7707 else if(!strcmp(s, "_BUGS_ADDRESS_")){
7708 p = (ps_global->VAR_BUGS_ADDRESS
7709 && ps_global->VAR_BUGS_ADDRESS[0])
7710 ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7711 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7712 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7713 mail_free_address(&adr);
7715 else if(!strcmp(s, "CURRENT_DIR")){
7716 getcwd(p = buf, sizeof(buf));
7718 else if(!strcmp(s, "HOME_DIR")){
7719 p = ps_global->home_dir;
7721 else if(!strcmp(s, "PINE_CONF_PATH")){
7722 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7723 p = "/usr/local/lib/pine.conf";
7724 #else
7725 p = SYSTEM_PINERC;
7726 #endif
7728 else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7729 #ifdef SYSTEM_PINERC_FIXED
7730 p = SYSTEM_PINERC_FIXED;
7731 #else
7732 p = "/usr/local/lib/pine.conf.fixed";
7733 #endif
7735 else if(!strcmp(s, "PINE_INFO_PATH")){
7736 p = SYSTEM_PINE_INFO_PATH;
7738 else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7739 p = sysinbox();
7741 else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7742 /* Don't put the leading /tmp/. */
7743 int i, j;
7745 p = sysinbox();
7746 if(p){
7747 for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7748 if(p[i] == '/')
7749 buf[j++] = '\\';
7750 else
7751 buf[j++] = p[i];
7753 buf[j++] = '\0';
7754 p = buf;
7757 else if(!struncmp(s, "VAR_", 4)){
7758 p = s+4;
7759 if(pith_opt_pretty_var_name)
7760 p = (*pith_opt_pretty_var_name)(p);
7762 else if(!struncmp(s, "FEAT_", 5)){
7763 p = s+5;
7764 if(pith_opt_pretty_feature_name)
7765 p = (*pith_opt_pretty_feature_name)(p, -1);
7767 else
7768 p = NULL;
7770 if(p){
7771 if(f->f1 == WSPACE){
7772 html_element_output(f, ' ');
7773 f->f1 = DFL; /* clear it */
7776 while(*p)
7777 html_element_output(f, (int) *p++);
7785 void
7786 html_element_output(FILTER_S *f, int ch)
7788 if(HANDLERS(f))
7789 (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7790 else
7791 html_output(f, ch);
7794 #define ISHEX_DIGIT(X) (isdigit((X)) || \
7795 ((X) >= 'a' && (X) <= 'f') || \
7796 ((X) >= 'A' && (X) <= 'F'))
7799 * collect html entity and return its UCS value when done.
7801 * Returns HTML_MOREDATA : we need more data
7802 * HTML_ENTITY : entity collected
7803 * HTML_BADVALUE : good data, but no named match or out of range
7804 * HTML_BADDATA : invalid input
7806 * NOTES:
7807 * - entity format is "'&' tag ';'" and represents a literal char
7808 * - named entities are CASE SENSITIVE.
7809 * - numeric char references (where the tag is prefixed with a '#')
7810 * are a char with that numbers value
7811 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7814 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7816 static int len = 0;
7817 static char buf[MAX_ENTITY+2];
7818 int rv, i;
7820 if(len == MAX_ENTITY){
7821 rv = HTML_BADDATA;
7823 else if((len == 0)
7824 ? (isalpha((unsigned char) ch) || ch == '#')
7825 : ((isdigit((unsigned char) ch)
7826 || (len == 1 && (unsigned char) ch == 'x')
7827 || (len == 1 &&(unsigned char) ch == 'X')
7828 || (len > 1 && isxdigit((unsigned char) ch))
7829 || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7830 buf[len++] = ch;
7831 return(HTML_MOREDATA);
7833 else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7834 buf[len] = '\0'; /* got something! */
7835 if(buf[0] == '#'){
7836 if(buf[1] == 'x' || buf[1] == 'X')
7837 *ucs = (UCS) strtoul(&buf[2], NULL, 16);
7838 else
7839 *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7841 if(alt){
7842 *alt = NULL;
7843 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7844 if(entity_tab[i].value == *ucs){
7845 *alt = entity_tab[i].plain;
7846 break;
7850 len = 0;
7851 return(HTML_ENTITY);
7853 else{
7854 rv = HTML_BADVALUE; /* in case of no match */
7855 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7856 if(strcmp(entity_tab[i].name, buf) == 0){
7857 *ucs = entity_tab[i].value;
7858 if(alt)
7859 *alt = entity_tab[i].plain;
7861 len = 0;
7862 return(HTML_ENTITY);
7866 else
7867 rv = HTML_BADDATA; /* bogus input! */
7869 if(alt){
7870 buf[len] = '\0';
7871 *alt = buf;
7874 len = 0;
7875 return(rv);
7879 /*----------------------------------------------------------------------
7880 HTML text to plain text filter
7882 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7883 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7884 formatting.
7886 ----*/
7887 void
7888 gf_html2plain(FILTER_S *f, int flg)
7890 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7891 if(flg == GF_DATA){
7892 register int c;
7893 GF_INIT(f, f->next);
7895 if(!HTML_WROTE(f)){
7896 int ii;
7898 for(ii = HTML_INDENT(f); ii > 0; ii--)
7899 html_putc(f, ' ');
7901 HTML_WROTE(f) = 1;
7904 while(GF_GETC(f, c)){
7906 * First we have to collect any literal entities...
7907 * that is, IF we're not already collecting one
7908 * AND we're not in element's text or, if we are, we're
7909 * not in quoted text. Whew.
7911 if(f->t){
7912 char *alt = NULL;
7913 UCS ucs;
7915 switch(html_entity_collector(f, c, &ucs, &alt)){
7916 case HTML_MOREDATA: /* more data required? */
7917 continue; /* go get another char */
7919 case HTML_BADVALUE :
7920 case HTML_BADDATA :
7921 /* if supplied, process bogus data */
7922 HTML_PROC(f, '&');
7923 for(; *alt; alt++){
7924 unsigned int uic = *alt;
7925 HTML_PROC(f, uic);
7928 if(c == '&' && !HD(f)->quoted){
7929 f->t = '&';
7930 continue;
7932 else
7933 f->t = 0; /* don't come back next time */
7935 break;
7937 default : /* thing to process */
7938 f->t = 0; /* don't come back */
7941 * do something with UCS codepoint. If it's
7942 * not displayable then use the alt version
7943 * otherwise
7944 * cvt UCS to UTF-8 and toss into next filter.
7946 if(ucs > 127 && wcellwidth(ucs) < 0){
7947 if(alt){
7948 for(; *alt; alt++){
7949 c = MAKE_LITERAL(*alt);
7950 HTML_PROC(f, c);
7953 continue;
7955 else
7956 c = MAKE_LITERAL('?');
7958 else{
7959 unsigned char utf8buf[8], *p1, *p2;
7961 p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7962 for(; p1 < p2; p1++){
7963 c = MAKE_LITERAL(*p1);
7964 HTML_PROC(f, c);
7967 continue;
7970 break;
7973 else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7974 f->t = '&';
7975 continue;
7979 * then we process whatever we got...
7982 HTML_PROC(f, c);
7985 GF_OP_END(f); /* clean up our input pointers */
7987 else if(flg == GF_EOD){
7988 while(HANDLERS(f)){
7989 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7990 html_pop(f, EL(HANDLERS(f)));
7993 html_output(f, HTML_NEWLINE);
7994 if(ULINE_BIT(f))
7995 HTML_ULINE(f, ULINE_BIT(f) = 0);
7997 if(BOLD_BIT(f))
7998 HTML_BOLD(f, BOLD_BIT(f) = 0);
8000 HTML_FLUSH(f);
8001 fs_give((void **)&f->line);
8002 if(HD(f)->color)
8003 free_color_pair(&HD(f)->color);
8005 fs_give(&f->data);
8006 if(f->opt){
8007 if(((HTML_OPT_S *)f->opt)->base)
8008 fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
8010 fs_give(&f->opt);
8013 (*f->next->f)(f->next, GF_DATA);
8014 (*f->next->f)(f->next, GF_EOD);
8016 else if(flg == GF_RESET){
8017 dprint((9, "-- gf_reset html2plain\n"));
8018 f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
8019 memset(f->data, 0, sizeof(HTML_DATA_S));
8020 /* start with flowing text */
8021 HD(f)->wrapstate = !PASS_HTML(f);
8022 HD(f)->wrapcol = WRAP_COLS(f);
8023 f->f1 = DFL; /* state */
8024 f->f2 = 0; /* chars in wrap buffer */
8025 f->n = 0L; /* chars on line so far */
8026 f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
8027 HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
8028 HD(f)->alt_entity = (!ps_global->display_charmap
8029 || strucmp(ps_global->display_charmap, "iso-8859-1"));
8030 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8037 * html_indent - do the requested indent level function with appropriate
8038 * flushing and such.
8040 * Returns: indent level prior to set/increment
8043 html_indent(FILTER_S *f, int val, int func)
8045 int old = HD(f)->indent_level;
8047 /* flush pending data at old indent level */
8048 switch(func){
8049 case HTML_ID_INC :
8050 html_output_flush(f);
8051 if((HD(f)->indent_level += val) < 0)
8052 HD(f)->indent_level = 0;
8054 break;
8056 case HTML_ID_SET :
8057 html_output_flush(f);
8058 HD(f)->indent_level = val;
8059 break;
8061 default :
8062 break;
8065 return(old);
8071 * html_blanks - Insert n blank lines into output
8073 void
8074 html_blank(FILTER_S *f, int n)
8076 /* Cap off any flowing text, and then write blank lines */
8077 if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
8078 html_output(f, HTML_NEWLINE);
8080 if(HD(f)->wrapstate)
8081 while(HD(f)->blanks < n) /* blanks inc'd by HTML_NEWLINE */
8082 html_output(f, HTML_NEWLINE);
8088 * html_newline -- insert a newline mindful of embedded tags
8090 void
8091 html_newline(FILTER_S *f)
8093 html_write_newline(f); /* commit an actual newline */
8095 if(f->n){ /* and keep track of blank lines */
8096 HD(f)->blanks = 0;
8097 f->n = 0L;
8099 else
8100 HD(f)->blanks++;
8105 * output the given char, handling any requested wrapping.
8106 * It's understood that all whitespace handed us is written. In other
8107 * words, junk whitespace is weeded out before it's given to us here.
8110 void
8111 html_output(FILTER_S *f, int ch)
8113 UCS uc;
8114 int width;
8115 void (*o_f)(FILTER_S *, int, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8118 * if ch is a control token, just pass it on, else, collect
8119 * utf8-encoded characters to determine width,then feed into
8120 * output routines
8122 if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8123 (*o_f)(f, ch, 1, 0);
8125 else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8126 unsigned char *cp;
8128 for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8129 (*o_f)(f, *cp, width, HD(f)->cb.cbufend - cp);
8130 width = 0; /* only count it once */
8133 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8135 else
8136 HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8137 /* else do nothing until we have a full character */
8141 void
8142 html_output_string(FILTER_S *f, char *s)
8144 for(; *s; s++)
8145 html_output(f, *s);
8149 void
8150 html_output_raw_tag(FILTER_S *f, char *tag)
8152 PARAMETER *p;
8153 char *vp;
8154 int i;
8156 html_output(f, '<');
8157 html_output_string(f, tag);
8158 for(p = HD(f)->el_data->attribs;
8159 p && p->attribute;
8160 p = p->next){
8161 /* SECURITY: no javascript */
8162 /* PRIVACY: no img src without permission */
8163 /* BUGS: no class collisions since <head> ignored */
8164 if(html_event_attribute(p->attribute)
8165 || !strucmp(p->attribute, "class")
8166 || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8167 continue;
8169 /* PRIVACY: sniff out background images */
8170 if(p->value && !PASS_IMAGES(f)){
8171 if(!strucmp(p->attribute, "style")){
8172 if((vp = srchstr(p->value, "background-image")) != NULL){
8173 /* neuter in place */
8174 vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8176 else{
8177 for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8178 if(vp[10] == ' ' || vp[10] == ':')
8179 for(i = 11; vp[i] && vp[i] != ';'; i++)
8180 if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8181 || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8182 vp[0] = 'X';
8185 else if(!strucmp(p->attribute, "background")){
8186 char *ip;
8188 for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8191 if(ip)
8192 continue;
8196 html_output(f, ' ');
8197 html_output_string(f, p->attribute);
8198 if(p->value){
8199 html_output(f, '=');
8200 html_output(f, '\"');
8201 html_output_string(f, p->value);
8202 html_output(f, '\"');
8206 /* append warning to form submission */
8207 if(!strucmp(tag, "form")){
8208 html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8211 if(ED(f)->end_tag){
8212 html_output(f, ' ');
8213 html_output(f, '/');
8216 html_output(f, '>');
8221 html_event_attribute(char *attr)
8223 int i;
8224 static char *events[] = {
8225 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8226 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8227 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8228 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8231 if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8232 for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8233 if(!strucmp(attr, events[i]))
8234 return(TRUE);
8236 return(FALSE);
8240 void
8241 html_output_normal(FILTER_S *f, int ch, int width, int remaining)
8243 static int written = 0;
8244 static int cwidth;
8246 if(HD(f)->centered){
8247 html_centered_flush(f);
8248 fs_give((void **) &HD(f)->centered->line.buf);
8249 fs_give((void **) &HD(f)->centered->word.buf);
8250 fs_give((void **) &HD(f)->centered);
8253 if(HD(f)->wrapstate){
8254 if(ch == HTML_NEWLINE){ /* hard newline */
8255 html_output_flush(f);
8256 html_newline(f);
8258 else
8259 HD(f)->blanks = 0; /* reset blank line counter */
8261 if(ch == TAG_EMBED){ /* takes up no space */
8262 HD(f)->embedded.state = -5;
8263 HTML_LINEP_PUTC(f, TAG_EMBED);
8265 else if(HD(f)->embedded.state){ /* ditto */
8266 if(HD(f)->embedded.state == -5){
8267 /* looking for specially handled tags following TAG_EMBED */
8268 if(ch == TAG_HANDLE)
8269 HD(f)->embedded.state = -1; /* next ch is length */
8270 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8271 if(!HD(f)->color)
8272 HD(f)->color = new_color_pair(NULL, NULL);
8274 if(ch == TAG_FGCOLOR)
8275 HD(f)->embedded.color = HD(f)->color->fg;
8276 else
8277 HD(f)->embedded.color = HD(f)->color->bg;
8279 HD(f)->embedded.state = RGBLEN;
8281 else
8282 HD(f)->embedded.state = 0; /* non-special */
8284 else if(HD(f)->embedded.state > 0){
8285 /* collecting up an RGBLEN color or length, ignore tags */
8286 (HD(f)->embedded.state)--;
8287 if(HD(f)->embedded.color)
8288 *HD(f)->embedded.color++ = ch;
8290 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8291 *HD(f)->embedded.color = '\0';
8292 HD(f)->embedded.color = NULL;
8295 else if(HD(f)->embedded.state < 0){
8296 HD(f)->embedded.state = ch; /* number of embedded chars */
8298 else{
8299 (HD(f)->embedded.state)--;
8300 if(HD(f)->embedded.color)
8301 *HD(f)->embedded.color++ = ch;
8303 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8304 *HD(f)->embedded.color = '\0';
8305 HD(f)->embedded.color = NULL;
8309 HTML_LINEP_PUTC(f, ch);
8311 else if(HTML_ISSPACE(ch)){
8312 html_output_flush(f);
8314 else{
8315 if(HD(f)->prefix)
8316 html_a_prefix(f);
8318 if(written == 0)
8319 cwidth = width;
8321 if(f->f2 + cwidth + 1 >= WRAP_COLS(f)){
8322 HTML_LINEP_PUTC(f, ch & 0xff);
8323 written++;
8324 if(remaining == 0){
8325 HTML_FLUSH(f);
8326 html_newline(f);
8328 if(HD(f)->in_anchor)
8329 html_write_anchor(f, HD(f)->in_anchor);
8331 else{
8332 HTML_LINEP_PUTC(f, ch & 0xff);
8333 written++;
8336 if(remaining == 0){
8337 written = 0;
8338 f->f2 += cwidth;
8342 else{
8343 if(HD(f)->prefix)
8344 html_a_prefix(f);
8346 html_output_flush(f);
8348 switch(HD(f)->embedded.state){
8349 case 0 :
8350 switch(ch){
8351 default :
8353 * It's difficult to both preserve whitespace and wrap at the
8354 * same time so we'll do a dumb wrap at the edge of the screen.
8355 * Since this shouldn't come up much in real life we'll hope
8356 * it is good enough.
8358 if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8359 html_newline(f);
8361 f->n += width; /* inc displayed char count */
8362 HD(f)->blanks = 0; /* reset blank line counter */
8363 html_putc(f, ch & 0xff);
8364 break;
8366 case TAG_EMBED : /* takes up no space */
8367 html_putc(f, TAG_EMBED);
8368 HD(f)->embedded.state = -2;
8369 break;
8371 case HTML_NEWLINE : /* newline handling */
8372 if(!f->n)
8373 break;
8375 case '\n' :
8376 html_newline(f);
8378 case '\r' :
8379 break;
8382 break;
8384 case -2 :
8385 HD(f)->embedded.state = 0;
8386 switch(ch){
8387 case TAG_HANDLE :
8388 HD(f)->embedded.state = -1; /* next ch is length */
8389 break;
8391 case TAG_BOLDON :
8392 BOLD_BIT(f) = 1;
8393 break;
8395 case TAG_BOLDOFF :
8396 BOLD_BIT(f) = 0;
8397 break;
8399 case TAG_ULINEON :
8400 ULINE_BIT(f) = 1;
8401 break;
8403 case TAG_ULINEOFF :
8404 ULINE_BIT(f) = 0;
8405 break;
8407 case TAG_FGCOLOR :
8408 if(!HD(f)->color)
8409 HD(f)->color = new_color_pair(NULL, NULL);
8411 HD(f)->embedded.color = HD(f)->color->fg;
8412 HD(f)->embedded.state = 11;
8413 break;
8415 case TAG_BGCOLOR :
8416 if(!HD(f)->color)
8417 HD(f)->color = new_color_pair(NULL, NULL);
8419 HD(f)->embedded.color = HD(f)->color->bg;
8420 HD(f)->embedded.state = 11;
8421 break;
8423 case TAG_HANDLEOFF :
8424 ch = TAG_INVOFF;
8425 HD(f)->in_anchor = 0;
8426 break;
8428 default :
8429 break;
8432 html_putc(f, ch);
8433 break;
8435 case -1 :
8436 HD(f)->embedded.state = ch; /* number of embedded chars */
8437 html_putc(f, ch);
8438 break;
8440 default :
8441 HD(f)->embedded.state--;
8442 if(HD(f)->embedded.color)
8443 *HD(f)->embedded.color++ = ch;
8445 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8446 *HD(f)->embedded.color = '\0';
8447 HD(f)->embedded.color = NULL;
8450 html_putc(f, ch);
8451 break;
8458 * flush any buffered chars waiting for wrapping.
8460 void
8461 html_output_flush(FILTER_S *f)
8463 if(f->f2){
8464 if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8465 html_newline(f); /* wrap? */
8467 if(f->n){ /* text already on the line? */
8468 html_putc(f, ' ');
8469 f->n++; /* increment count */
8471 else{
8472 /* write at start of new line */
8473 html_write_indent(f, HD(f)->indent_level);
8475 if(HD(f)->in_anchor)
8476 html_write_anchor(f, HD(f)->in_anchor);
8479 f->n += f->f2;
8480 HTML_FLUSH(f);
8487 * html_output_centered - managed writing centered text
8489 void
8490 html_output_centered(FILTER_S *f, int ch, int width, int remaining)
8492 static int written;
8493 static int cwidth;
8495 if(!HD(f)->centered){ /* new text? */
8496 html_output_flush(f);
8497 if(f->n) /* start on blank line */
8498 html_newline(f);
8500 HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8501 memset(HD(f)->centered, 0, sizeof(CENTER_S));
8502 /* and grab a buf to start collecting centered text */
8503 HD(f)->centered->line.len = WRAP_COLS(f);
8504 HD(f)->centered->line.buf = (char *) fs_get(HD(f)->centered->line.len
8505 * sizeof(char));
8506 HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8507 HD(f)->centered->word.len = 32;
8508 HD(f)->centered->word.buf = (char *) fs_get(HD(f)->centered->word.len
8509 * sizeof(char));
8510 HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8513 if(ch == HTML_NEWLINE){ /* hard newline */
8514 html_centered_flush(f);
8516 else if(ch == TAG_EMBED){ /* takes up no space */
8517 HD(f)->embedded.state = -5;
8518 html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8520 else if(HD(f)->embedded.state){
8521 if(HD(f)->embedded.state == -5){
8522 /* looking for specially handled tags following TAG_EMBED */
8523 if(ch == TAG_HANDLE)
8524 HD(f)->embedded.state = -1; /* next ch is length */
8525 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8526 if(!HD(f)->color)
8527 HD(f)->color = new_color_pair(NULL, NULL);
8529 if(ch == TAG_FGCOLOR)
8530 HD(f)->embedded.color = HD(f)->color->fg;
8531 else
8532 HD(f)->embedded.color = HD(f)->color->bg;
8534 HD(f)->embedded.state = RGBLEN;
8536 else
8537 HD(f)->embedded.state = 0; /* non-special */
8539 else if(HD(f)->embedded.state > 0){
8540 /* collecting up an RGBLEN color or length, ignore tags */
8541 (HD(f)->embedded.state)--;
8542 if(HD(f)->embedded.color)
8543 *HD(f)->embedded.color++ = ch;
8545 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8546 *HD(f)->embedded.color = '\0';
8547 HD(f)->embedded.color = NULL;
8550 else if(HD(f)->embedded.state < 0){
8551 HD(f)->embedded.state = ch; /* number of embedded chars */
8553 else{
8554 (HD(f)->embedded.state)--;
8555 if(HD(f)->embedded.color)
8556 *HD(f)->embedded.color++ = ch;
8558 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8559 *HD(f)->embedded.color = '\0';
8560 HD(f)->embedded.color = NULL;
8564 html_centered_putc(&HD(f)->centered->word, ch);
8566 else if(ASCII_ISSPACE((unsigned char) ch)){
8567 if(!HD(f)->centered->space++){ /* end of a word? flush! */
8568 int i;
8570 if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8571 html_centered_flush_line(f);
8572 /* fall thru to put current "word" on blank "line" */
8574 else if(HD(f)->centered->line.width){
8575 /* put space char between line and appended word */
8576 html_centered_putc(&HD(f)->centered->line, ' ');
8577 HD(f)->centered->line.width++;
8580 for(i = 0; i < HD(f)->centered->word.used; i++)
8581 html_centered_putc(&HD(f)->centered->line,
8582 HD(f)->centered->word.buf[i]);
8584 HD(f)->centered->line.width += HD(f)->centered->word.width;
8585 HD(f)->centered->word.used = 0;
8586 HD(f)->centered->word.width = 0;
8589 else{
8590 if(HD(f)->prefix)
8591 html_a_prefix(f);
8593 /* ch is start of next word */
8594 HD(f)->centered->space = 0;
8595 if(HD(f)->centered->word.width >= WRAP_COLS(f))
8596 html_centered_flush(f);
8598 html_centered_putc(&HD(f)->centered->word, ch);
8600 if(written == 0)
8601 cwidth = width;
8603 written++;
8605 if(remaining == 0){
8606 written = 0;
8607 HD(f)->centered->word.width += cwidth;
8614 * html_centered_putc -- add given char to given WRAPLINE_S
8616 void
8617 html_centered_putc(WRAPLINE_S *wp, int ch)
8619 if(wp->used + 1 >= wp->len){
8620 wp->len += 64;
8621 fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8624 wp->buf[wp->used++] = ch;
8630 * html_centered_flush - finish writing any pending centered output
8632 void
8633 html_centered_flush(FILTER_S *f)
8635 int i;
8638 * If word present (what about line?) we need to deal with
8639 * appending it...
8641 if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8642 html_centered_flush_line(f);
8644 if(WRAPPED_LEN(f)){
8645 /* figure out how much to indent */
8646 if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8647 html_write_indent(f, i);
8649 if(HD(f)->centered->anchor)
8650 html_write_anchor(f, HD(f)->centered->anchor);
8652 html_centered_handle(&HD(f)->centered->anchor,
8653 HD(f)->centered->line.buf,
8654 HD(f)->centered->line.used);
8655 html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8657 if(HD(f)->centered->word.used){
8658 if(HD(f)->centered->line.width)
8659 html_putc(f, ' ');
8661 html_centered_handle(&HD(f)->centered->anchor,
8662 HD(f)->centered->word.buf,
8663 HD(f)->centered->word.used);
8664 html_write(f, HD(f)->centered->word.buf,
8665 HD(f)->centered->word.used);
8668 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8669 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8671 else{
8672 if(HD(f)->centered->word.used){
8673 html_write(f, HD(f)->centered->word.buf,
8674 HD(f)->centered->word.used);
8675 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8676 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8678 HD(f)->blanks++; /* advance the blank line counter */
8681 html_newline(f); /* finish the line */
8686 * html_centered_handle - scan the line for embedded handles
8688 void
8689 html_centered_handle(int *h, char *line, int len)
8691 int n;
8693 while(len-- > 0)
8694 if(*line++ == TAG_EMBED && len-- > 0)
8695 switch(*line++){
8696 case TAG_HANDLE :
8697 if((n = *line++) >= --len){
8698 *h = 0;
8699 len -= n;
8700 while(n--)
8701 *h = (*h * 10) + (*line++ - '0');
8703 break;
8705 case TAG_HANDLEOFF :
8706 case TAG_INVOFF :
8707 *h = 0; /* assumption 23,342: inverse off ends tags */
8708 break;
8710 default :
8711 break;
8718 * html_centered_flush_line - flush the centered "line" only
8720 void
8721 html_centered_flush_line(FILTER_S *f)
8723 if(HD(f)->centered->line.used){
8724 int i, j;
8726 /* hide "word" from flush */
8727 i = HD(f)->centered->word.used;
8728 j = HD(f)->centered->word.width;
8729 HD(f)->centered->word.used = 0;
8730 HD(f)->centered->word.width = 0;
8731 html_centered_flush(f);
8733 HD(f)->centered->word.used = i;
8734 HD(f)->centered->word.width = j;
8740 * html_write_indent - write indention mindful of display attributes
8742 void
8743 html_write_indent(FILTER_S *f, int indent)
8745 if(! STRIP(f)){
8746 if(BOLD_BIT(f)){
8747 html_putc(f, TAG_EMBED);
8748 html_putc(f, TAG_BOLDOFF);
8751 if(ULINE_BIT(f)){
8752 html_putc(f, TAG_EMBED);
8753 html_putc(f, TAG_ULINEOFF);
8757 f->n = indent;
8758 while(indent-- > 0)
8759 html_putc(f, ' '); /* indent as needed */
8762 * Resume any previous embedded state
8764 if(! STRIP(f)){
8765 if(BOLD_BIT(f)){
8766 html_putc(f, TAG_EMBED);
8767 html_putc(f, TAG_BOLDON);
8770 if(ULINE_BIT(f)){
8771 html_putc(f, TAG_EMBED);
8772 html_putc(f, TAG_ULINEON);
8781 void
8782 html_write_anchor(FILTER_S *f, int anchor)
8784 char buf[256];
8785 int i;
8787 html_putc(f, TAG_EMBED);
8788 html_putc(f, TAG_HANDLE);
8789 snprintf(buf, sizeof(buf), "%d", anchor);
8790 html_putc(f, (int) strlen(buf));
8792 for(i = 0; buf[i]; i++)
8793 html_putc(f, buf[i]);
8798 * html_write_newline - write a newline mindful of display attributes
8800 void
8801 html_write_newline(FILTER_S *f)
8803 int i;
8805 if(! STRIP(f)){ /* First tie, off any embedded state */
8806 if(HD(f)->in_anchor){
8807 html_putc(f, TAG_EMBED);
8808 html_putc(f, TAG_INVOFF);
8811 if(BOLD_BIT(f)){
8812 html_putc(f, TAG_EMBED);
8813 html_putc(f, TAG_BOLDOFF);
8816 if(ULINE_BIT(f)){
8817 html_putc(f, TAG_EMBED);
8818 html_putc(f, TAG_ULINEOFF);
8821 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8822 char *p;
8823 int i;
8825 p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8826 ps_global->VAR_NORM_BACK_COLOR);
8827 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8828 html_putc(f, p[i]);
8832 html_write(f, "\015\012", 2);
8833 for(i = HTML_INDENT(f); i > 0; i--)
8834 html_putc(f, ' ');
8836 if(! STRIP(f)){ /* First tie, off any embedded state */
8837 if(BOLD_BIT(f)){
8838 html_putc(f, TAG_EMBED);
8839 html_putc(f, TAG_BOLDON);
8842 if(ULINE_BIT(f)){
8843 html_putc(f, TAG_EMBED);
8844 html_putc(f, TAG_ULINEON);
8847 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8848 char *p, *tfg, *tbg;
8849 int i;
8850 COLOR_PAIR *tmp;
8852 tfg = HD(f)->color->fg;
8853 tbg = HD(f)->color->bg;
8854 tmp = new_color_pair(tfg[0] ? tfg
8855 : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8856 tbg[0] ? tbg
8857 : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8858 if(pico_is_good_colorpair(tmp)){
8859 p = color_embed(tfg[0] ? tfg
8860 : ps_global->VAR_NORM_FORE_COLOR,
8861 tbg[0] ? tbg
8862 : ps_global->VAR_NORM_BACK_COLOR);
8863 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8864 html_putc(f, p[i]);
8867 if(tmp)
8868 free_color_pair(&tmp);
8875 * html_write - write given n-length string to next filter
8877 void
8878 html_write(FILTER_S *f, char *s, int n)
8880 GF_INIT(f, f->next);
8882 while(n-- > 0){
8883 /* keep track of attribute state? Not if last char! */
8884 if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8885 GF_PUTC(f->next, TAG_EMBED);
8886 switch(*++s){
8887 case TAG_BOLDON :
8888 BOLD_BIT(f) = 1;
8889 break;
8890 case TAG_BOLDOFF :
8891 BOLD_BIT(f) = 0;
8892 break;
8893 case TAG_ULINEON :
8894 ULINE_BIT(f) = 1;
8895 break;
8896 case TAG_ULINEOFF :
8897 ULINE_BIT(f) = 0;
8898 break;
8899 case TAG_HANDLEOFF :
8900 HD(f)->in_anchor = 0;
8901 GF_PUTC(f->next, TAG_INVOFF);
8902 s++;
8903 continue;
8904 case TAG_HANDLE :
8905 if(n-- > 0){
8906 int i = *++s;
8908 GF_PUTC(f->next, TAG_HANDLE);
8909 if(i <= n){
8910 int anum = 0;
8911 HANDLE_S *h;
8913 n -= i;
8914 GF_PUTC(f->next, i);
8915 while(1){
8916 anum = (anum * 10) + (*++s - '0');
8917 if(--i)
8918 GF_PUTC(f->next, *s);
8919 else
8920 break;
8923 if(DO_HANDLES(f)
8924 && (h = get_handle(*HANDLESP(f), anum)) != NULL
8925 && (h->type == URL || h->type == Attach)){
8926 HD(f)->in_anchor = anum;
8931 break;
8932 default:
8933 break;
8937 GF_PUTC(f->next, (*s++) & 0xff);
8940 GF_IP_END(f->next); /* clean up next's input pointers */
8945 * html_putc -- actual work of writing to next filter.
8946 * NOTE: Small opt not using full GF_END since our input
8947 * pointers don't need adjusting.
8949 void
8950 html_putc(FILTER_S *f, int ch)
8952 GF_INIT(f, f->next);
8953 GF_PUTC(f->next, ch & 0xff);
8954 GF_IP_END(f->next); /* clean up next's input pointers */
8960 * Only current option is to turn on embedded data stripping for text
8961 * bound to a printer or composer.
8963 void *
8964 gf_html2plain_opt(char *base,
8965 int columns,
8966 int *margin,
8967 HANDLE_S **handlesp,
8968 htmlrisk_t risk_f,
8969 int flags)
8971 HTML_OPT_S *op;
8972 int margin_l, margin_r;
8974 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8976 op->base = cpystr(base);
8977 margin_l = (margin) ? margin[0] : 0;
8978 margin_r = (margin) ? margin[1] : 0;
8979 op->indent = margin_l;
8980 op->columns = columns - (margin_l + margin_r);
8981 op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8982 op->handlesp = handlesp;
8983 op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8984 op->showserver = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8985 op->warnrisk_f = risk_f;
8986 op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8987 op->related_content = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8988 op->html = ((flags & GFHP_HTML) == GFHP_HTML);
8989 op->html_imgs = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8990 op->element_table = html_element_table;
8991 return((void *) op);
8995 void *
8996 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
8998 HTML_OPT_S *op;
9000 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
9001 memset(op, 0, sizeof(HTML_OPT_S));
9003 op->base = cpystr("");
9004 op->element_table = rss_element_table;
9005 *(op->feedp = feedp) = NULL;
9006 return((void *) op);
9009 void
9010 gf_html2plain_rss_free(RSS_FEED_S **feedp)
9012 if(feedp && *feedp){
9013 if((*feedp)->title)
9014 fs_give((void **) &(*feedp)->title);
9016 if((*feedp)->link)
9017 fs_give((void **) &(*feedp)->link);
9019 if((*feedp)->description)
9020 fs_give((void **) &(*feedp)->description);
9022 if((*feedp)->source)
9023 fs_give((void **) &(*feedp)->source);
9025 if((*feedp)->image)
9026 fs_give((void **) &(*feedp)->image);
9028 gf_html2plain_rss_free_items(&((*feedp)->items));
9029 fs_give((void **) feedp);
9033 void
9034 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
9036 if(itemp && *itemp){
9037 if((*itemp)->title)
9038 fs_give((void **) &(*itemp)->title);
9040 if((*itemp)->link)
9041 fs_give((void **) &(*itemp)->link);
9043 if((*itemp)->description)
9044 fs_give((void **) &(*itemp)->description);
9046 if((*itemp)->source)
9047 fs_give((void **) &(*itemp)->source);
9049 gf_html2plain_rss_free_items(&(*itemp)->next);
9050 fs_give((void **) itemp);
9055 /* END OF HTML-TO-PLAIN text filter */
9058 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9059 * from the text stream.
9062 #define MAX_ESC_LEN 5
9065 * the simple filter, removes unknown escape codes from the stream
9067 void
9068 gf_escape_filter(FILTER_S *f, int flg)
9070 register char *p;
9071 GF_INIT(f, f->next);
9073 if(flg == GF_DATA){
9074 register unsigned char c;
9075 register int state = f->f1;
9077 while(GF_GETC(f, c)){
9079 if(state){
9080 if(c == '\033' || f->n == MAX_ESC_LEN){
9081 f->line[f->n] = '\0';
9082 f->n = 0L;
9083 if(!match_escapes(f->line)){
9084 GF_PUTC(f->next, '^');
9085 GF_PUTC(f->next, '[');
9087 else
9088 GF_PUTC(f->next, '\033');
9090 p = f->line;
9091 while(*p)
9092 GF_PUTC(f->next, *p++);
9094 if(c == '\033')
9095 continue;
9096 else
9097 state = 0; /* fall thru */
9099 else{
9100 f->line[f->n++] = c; /* collect */
9101 continue;
9105 if(c == '\033')
9106 state = 1;
9107 else
9108 GF_PUTC(f->next, c);
9111 f->f1 = state;
9112 GF_END(f, f->next);
9114 else if(flg == GF_EOD){
9115 if(f->f1){
9116 if(!match_escapes(f->line)){
9117 GF_PUTC(f->next, '^');
9118 GF_PUTC(f->next, '[');
9120 else
9121 GF_PUTC(f->next, '\033');
9124 for(p = f->line; f->n; f->n--, p++)
9125 GF_PUTC(f->next, *p);
9127 fs_give((void **)&(f->line)); /* free temp line buffer */
9128 (void) GF_FLUSH(f->next);
9129 (*f->next->f)(f->next, GF_EOD);
9131 else if(flg == GF_RESET){
9132 dprint((9, "-- gf_reset escape\n"));
9133 f->f1 = 0;
9134 f->n = 0L;
9135 f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9142 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9143 * corresponding string representations (you know, ^blah and such)...
9147 * the simple filter transforms unknown control characters in the stream
9148 * into harmless strings.
9150 void
9151 gf_control_filter(FILTER_S *f, int flg)
9153 GF_INIT(f, f->next);
9155 if(flg == GF_DATA){
9156 register unsigned char c;
9157 register int filt_only_c0;
9159 filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9161 while(GF_GETC(f, c)){
9163 if(((c < 0x20 || c == 0x7f)
9164 || (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9165 && !(ASCII_ISSPACE((unsigned char) c)
9166 || c == '\016' || c == '\017' || c == '\033')){
9167 GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9168 GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9170 else
9171 GF_PUTC(f->next, c);
9174 GF_END(f, f->next);
9176 else if(flg == GF_EOD){
9177 (void) GF_FLUSH(f->next);
9178 (*f->next->f)(f->next, GF_EOD);
9184 * function called from the outside to set
9185 * control filter's option, which says to filter C0 control characters
9186 * but not C1 control chars. We don't call it at all if we don't want
9187 * to filter C0 chars either.
9189 void *
9190 gf_control_filter_opt(int *filt_only_c0)
9192 return((void *) filt_only_c0);
9197 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9198 * This prevents the possibility of embedding other tags.
9199 * We assume that this filter should only be used for something
9200 * that is eventually writing to a display, which has the special
9201 * knowledge of quoted TAG_EMBEDs.
9203 void
9204 gf_tag_filter(FILTER_S *f, int flg)
9206 GF_INIT(f, f->next);
9208 if(flg == GF_DATA){
9209 register unsigned char c;
9211 while(GF_GETC(f, c)){
9213 if((c & 0xff) == (TAG_EMBED & 0xff)){
9214 GF_PUTC(f->next, TAG_EMBED);
9215 GF_PUTC(f->next, c);
9217 else
9218 GF_PUTC(f->next, c);
9221 GF_END(f, f->next);
9223 else if(flg == GF_EOD){
9224 (void) GF_FLUSH(f->next);
9225 (*f->next->f)(f->next, GF_EOD);
9231 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9232 * specified line width
9236 typedef struct wrap_col_s {
9237 unsigned bold:1;
9238 unsigned uline:1;
9239 unsigned inverse:1;
9240 unsigned tags:1;
9241 unsigned do_indent:1;
9242 unsigned on_comma:1;
9243 unsigned flowed:1;
9244 unsigned delsp:1;
9245 unsigned quoted:1;
9246 unsigned allwsp:1;
9247 unsigned hard_nl:1;
9248 unsigned leave_flowed:1;
9249 unsigned use_color:1;
9250 unsigned hdr_color:1;
9251 unsigned for_compose:1;
9252 unsigned handle_soft_hyphen:1;
9253 unsigned saw_soft_hyphen:1;
9254 unsigned trailing_space:1;
9255 unsigned char utf8buf[7];
9256 unsigned char *utf8bufp;
9257 COLOR_PAIR *color;
9258 STORE_S *spaces;
9259 short embedded,
9260 space_len;
9261 char *lineendp;
9262 int anchor,
9263 prefbrk,
9264 prefbrkn,
9265 quote_depth,
9266 quote_count,
9267 sig,
9268 state,
9269 wrap_col,
9270 wrap_max,
9271 margin_l,
9272 margin_r,
9273 indent;
9274 char special[256];
9275 } WRAP_S;
9277 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9278 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9279 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9280 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9281 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9282 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9283 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9284 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9285 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9286 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9287 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9288 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9289 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9290 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9291 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9292 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9293 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9294 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9295 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9296 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9297 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9298 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9299 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9300 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9301 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9302 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9303 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9304 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9305 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9306 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9307 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9308 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9309 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9310 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9311 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9312 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9313 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9314 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9315 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9316 #define WRAP_PUTC(F,C,W) { \
9317 if((F)->linep == WRAP_LASTC(F)){ \
9318 size_t offset = (F)->linep - (F)->line; \
9319 fs_resize((void **) &(F)->line, \
9320 (2 * offset) * sizeof(char)); \
9321 (F)->linep = &(F)->line[offset]; \
9322 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9324 *(F)->linep++ = (C); \
9325 (F)->f2 += (W); \
9328 #define WRAP_EMBED_PUTC(F,C) { \
9329 if((F)->f2){ \
9330 WRAP_PUTC((F), C, 0); \
9332 else \
9333 so_writec(C, WRAP_SPACES(F)); \
9336 #define WRAP_COLOR_UNSET(F) { \
9337 if(WRAP_COLOR_SET(F)){ \
9338 WRAP_COLOR(F)->fg[0] = '\0'; \
9343 * wrap_flush_embed flags
9345 #define WFE_NONE 0 /* Nothing special */
9346 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9349 int wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9350 int wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9351 unsigned char **, unsigned char **);
9352 int wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9353 unsigned char **, unsigned char **, int);
9354 int wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9355 unsigned char **, unsigned char **);
9356 int wrap_bol(FILTER_S *, int, int, unsigned char **,
9357 unsigned char **, unsigned char **, unsigned char **);
9358 int wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9359 unsigned char **, unsigned char **);
9362 * the no longer simple filter, breaks lines at end of white space nearest
9363 * to global "gf_wrap_width" in length
9364 * It also supports margins, indents (inverse indenting, really) and
9365 * flowed text (ala RFC 3676)
9368 void
9369 gf_wrap(FILTER_S *f, int flg)
9371 register long i;
9372 GF_INIT(f, f->next);
9375 * f->f1 state
9376 * f->line buffer where next "word" being considered is stored
9377 * f->f2 width in screen cells of f->line stuff
9378 * f->n width in screen cells of the part of this line committed to next
9379 * filter so far
9382 if(flg == GF_DATA){
9383 register unsigned char c;
9384 register int state = f->f1;
9385 int width, full_character;
9387 while(GF_GETC(f, c)){
9389 switch(state){
9390 case CCR : /* CRLF or CR in text ? */
9391 state = BOL; /* either way, handle start */
9393 if(WRAP_FLOW(f)){
9394 /* wrapped line? */
9395 if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9397 * whack trailing space char, but be aware
9398 * of embeds in space buffer. grok them just
9399 * in case they contain a 0x20 value
9401 if(WRAP_DELSP(f)){
9402 char *sb, *sbp, *scp = NULL;
9403 int x;
9405 for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9406 switch(*sbp){
9407 case ' ' :
9408 scp = sbp;
9409 break;
9411 case TAG_EMBED :
9412 sbp++;
9413 switch (*sbp++){
9414 case TAG_HANDLE :
9415 x = (int) *sbp++;
9416 if(strlen(sbp) >= x)
9417 sbp += (x - 1);
9419 break;
9421 case TAG_FGCOLOR :
9422 case TAG_BGCOLOR :
9423 if(strlen(sbp) >= RGBLEN)
9424 sbp += (RGBLEN - 1);
9426 break;
9428 default :
9429 break;
9432 break;
9434 default :
9435 break;
9439 /* replace space buf without trailing space char */
9440 if(scp){
9441 STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9443 *scp++ = '\0';
9444 WRAP_SPC_LEN(f)--;
9445 WRAP_TRL_SPC(f) = 0;
9447 so_puts(ns, sb);
9448 so_puts(ns, scp);
9450 so_give(&WRAP_SPACES(f));
9451 WRAP_SPACES(f) = ns;
9455 else{ /* fixed line */
9456 WRAP_HARD(f) = 1;
9457 wrap_flush(f, &ip, &eib, &op, &eob);
9458 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9461 * When we get to a real end of line, we don't need to
9462 * remember what the special color was anymore because
9463 * we aren't going to be changing back to it. We unset it
9464 * so that we don't keep resetting the color to normal.
9466 WRAP_COLOR_UNSET(f);
9469 if(c == '\012'){ /* get c following LF */
9470 break;
9472 /* else c is first char of new line, fall thru */
9474 else{
9475 wrap_flush(f, &ip, &eib, &op, &eob);
9476 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9477 WRAP_COLOR_UNSET(f); /* see note above */
9478 if(c == '\012'){
9479 break;
9481 /* else fall thru to deal with beginning of line */
9484 case BOL :
9485 if(WRAP_FLOW(f)){
9486 if(c == '>'){
9487 WRAP_FL_QC(f) = 1; /* init it */
9488 state = FL_QLEV; /* go collect it */
9490 else {
9491 /* if EMBEDed, process it and return here */
9492 if(c == (unsigned char) TAG_EMBED){
9493 WRAP_EMBED_PUTC(f, TAG_EMBED);
9494 WRAP_STATE(f) = state;
9495 state = TAG;
9496 continue;
9499 /* quote level change implies new paragraph */
9500 if(WRAP_FL_QD(f)){
9501 WRAP_FL_QD(f) = 0;
9502 if(WRAP_HARD(f) == 0){
9503 WRAP_HARD(f) = 1;
9504 wrap_flush(f, &ip, &eib, &op, &eob);
9505 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9506 WRAP_COLOR_UNSET(f); /* see note above */
9510 if(WRAP_HARD(f)){
9511 wrap_bol(f, 0, 1, &ip, &eib, &op,
9512 &eob); /* write quoting prefix */
9513 WRAP_HARD(f) = 0;
9516 switch (c) {
9517 case '\015' : /* a blank line? */
9518 wrap_flush(f, &ip, &eib, &op, &eob);
9519 state = CCR; /* go collect it */
9520 break;
9522 case ' ' : /* space stuffed */
9523 state = FL_STF; /* just eat it */
9524 break;
9526 case '-' : /* possible sig-dash */
9527 WRAP_FL_SIG(f) = 1; /* init state */
9528 state = FL_SIG; /* go collect it */
9529 break;
9531 default :
9532 state = DFL; /* go back to normal */
9533 goto case_dfl; /* handle c like DFL case */
9537 else{
9538 state = DFL;
9539 if(WRAP_COMMA(f) && c == TAB){
9540 wrap_bol(f, 1, 0, &ip, &eib, &op,
9541 &eob); /* convert to normal indent */
9542 break;
9545 wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9546 goto case_dfl; /* handle c like DFL case */
9549 break;
9551 case FL_QLEV :
9552 if(c == '>'){ /* another level */
9553 WRAP_FL_QC(f)++;
9555 else {
9556 /* if EMBEDed, process it and return here */
9557 if(c == (unsigned char) TAG_EMBED){
9558 WRAP_EMBED_PUTC(f, TAG_EMBED);
9559 WRAP_STATE(f) = state;
9560 state = TAG;
9561 continue;
9564 /* quote level change signals new paragraph */
9565 if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9566 WRAP_FL_QD(f) = WRAP_FL_QC(f);
9567 if(WRAP_HARD(f) == 0){ /* add hard newline */
9568 WRAP_HARD(f) = 1; /* hard newline */
9569 wrap_flush(f, &ip, &eib, &op, &eob);
9570 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9571 WRAP_COLOR_UNSET(f); /* see note above */
9575 if(WRAP_HARD(f)){
9576 wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9577 WRAP_HARD(f) = 0;
9580 switch (c) {
9581 case '\015' : /* a blank line? */
9582 wrap_flush(f, &ip, &eib, &op, &eob);
9583 state = CCR; /* go collect it */
9584 break;
9586 case ' ' : /* space-stuffed! */
9587 state = FL_STF; /* just eat it */
9588 break;
9590 case '-' : /* sig dash? */
9591 WRAP_FL_SIG(f) = 1;
9592 state = FL_SIG;
9593 break;
9595 default : /* something else */
9596 state = DFL;
9597 goto case_dfl; /* handle c like DFL */
9601 break;
9603 case FL_STF : /* space stuffed */
9604 switch (c) {
9605 case '\015' : /* a blank line? */
9606 wrap_flush(f, &ip, &eib, &op, &eob);
9607 state = CCR; /* go collect it */
9608 break;
9610 case (unsigned char) TAG_EMBED : /* process TAG data */
9611 WRAP_EMBED_PUTC(f, TAG_EMBED);
9612 WRAP_STATE(f) = state; /* and return */
9613 state = TAG;
9614 continue;
9616 case '-' : /* sig dash? */
9617 WRAP_FL_SIG(f) = 1;
9618 WRAP_ALLWSP(f) = 0;
9619 state = FL_SIG;
9620 break;
9622 default : /* something else */
9623 state = DFL;
9624 goto case_dfl; /* handle c like DFL */
9627 break;
9629 case FL_SIG : /* sig-dash collector */
9630 switch (WRAP_FL_SIG(f)){ /* possible sig-dash? */
9631 case 1 :
9632 if(c != '-'){ /* not a sigdash */
9633 if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9634 wrap_flush_embed(f, &ip, &eib, &op,
9635 &eob); /* note any embedded*/
9636 wrap_eol(f, 1, &ip, &eib,
9637 &op, &eob); /* plunk down newline */
9638 wrap_bol(f, 1, 1, &ip, &eib,
9639 &op, &eob); /* write any prefix */
9642 WRAP_PUTC(f,'-', 1); /* write what we got */
9644 WRAP_FL_SIG(f) = 0;
9645 state = DFL;
9646 goto case_dfl;
9649 /* don't put anything yet until we know to wrap or not */
9650 WRAP_FL_SIG(f) = 2;
9651 break;
9653 case 2 :
9654 if(c != ' '){ /* not a sigdash */
9655 WRAP_PUTC(f, '-', 1);
9656 if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9657 wrap_flush_embed(f, &ip, &eib, &op,
9658 &eob); /* note any embedded*/
9659 wrap_eol(f, 1, &ip, &eib,
9660 &op, &eob); /* plunk down newline */
9661 wrap_bol(f, 1, 1, &ip, &eib, &op,
9662 &eob); /* write any prefix */
9665 WRAP_PUTC(f,'-', 1); /* write what we got */
9667 WRAP_FL_SIG(f) = 0;
9668 state = DFL;
9669 goto case_dfl;
9672 /* don't put anything yet until we know to wrap or not */
9673 WRAP_FL_SIG(f) = 3;
9674 break;
9676 case 3 :
9677 if(c == '\015'){ /* success! */
9678 /* known sigdash, newline if soft nl */
9679 if(WRAP_SPC_LEN(f)){
9680 wrap_flush(f, &ip, &eib, &op, &eob);
9681 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9682 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9684 WRAP_PUTC(f,'-',1);
9685 WRAP_PUTC(f,'-',1);
9686 WRAP_PUTC(f,' ',1);
9688 state = CCR;
9689 break;
9691 else{
9692 WRAP_FL_SIG(f) = 4; /* possible success */
9695 case 4 :
9696 switch(c){
9697 case (unsigned char) TAG_EMBED :
9699 * At this point we're almost 100% sure that we've got
9700 * a sigdash. Putc it (adding newline if previous
9701 * was a soft nl) so we get it the right color
9702 * before we store this new embedded stuff
9704 if(WRAP_SPC_LEN(f)){
9705 wrap_flush(f, &ip, &eib, &op, &eob);
9706 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9707 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9709 WRAP_PUTC(f,'-',1);
9710 WRAP_PUTC(f,'-',1);
9711 WRAP_PUTC(f,' ',1);
9713 WRAP_FL_SIG(f) = 5;
9714 break;
9716 case '\015' : /* success! */
9718 * We shouldn't get here, but in case we do, we have
9719 * not yet put the sigdash
9721 if(WRAP_SPC_LEN(f)){
9722 wrap_flush(f, &ip, &eib, &op, &eob);
9723 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9724 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9726 WRAP_PUTC(f,'-',1);
9727 WRAP_PUTC(f,'-',1);
9728 WRAP_PUTC(f,' ',1);
9730 state = CCR;
9731 break;
9733 default : /* that's no sigdash! */
9734 /* write what we got but didn't put yet */
9735 WRAP_PUTC(f,'-', 1);
9736 WRAP_PUTC(f,'-', 1);
9737 WRAP_PUTC(f,' ', 1);
9739 WRAP_FL_SIG(f) = 0;
9740 wrap_flush(f, &ip, &eib, &op, &eob);
9741 WRAP_SPC_LEN(f) = 1;
9742 state = DFL; /* set normal state */
9743 goto case_dfl; /* and go do "c" */
9746 break;
9748 case 5 :
9749 WRAP_STATE(f) = FL_SIG; /* come back here */
9750 WRAP_FL_SIG(f) = 6; /* and seek EOL */
9751 WRAP_EMBED_PUTC(f, TAG_EMBED);
9752 state = TAG; /* process embed */
9753 goto case_tag;
9755 case 6 :
9757 * at this point we've already putc the sigdash in case 4
9759 switch(c){
9760 case (unsigned char) TAG_EMBED :
9761 WRAP_FL_SIG(f) = 5;
9762 break;
9764 case '\015' : /* success! */
9765 state = CCR;
9766 break;
9768 default : /* that's no sigdash! */
9770 * probably never reached (fake sigdash with embedded
9771 * stuff) but if this did get reached, then we
9772 * might have accidentally disobeyed a soft nl
9774 WRAP_FL_SIG(f) = 0;
9775 wrap_flush(f, &ip, &eib, &op, &eob);
9776 WRAP_SPC_LEN(f) = 1;
9777 state = DFL; /* set normal state */
9778 goto case_dfl; /* and go do "c" */
9781 break;
9784 default :
9785 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9786 WRAP_FL_SIG(f)));
9787 WRAP_FL_SIG(f) = 0;
9788 state = DFL; /* set normal state */
9789 goto case_dfl; /* and go process "c" */
9792 break;
9794 case_dfl :
9795 case DFL :
9797 * This was just if(WRAP_SPEC(f, c)) before the change to add
9798 * the == 0 test. This isn't quite right, either. We should really
9799 * be looking for special characters in the UCS characters, not
9800 * in the incoming stream of UTF-8. It is not right to
9801 * call this on bytes that are in the middle of a UTF-8 character,
9802 * hence the == 0 test which restricts it to the first byte
9803 * of a character. This isn't right, either, but it's closer.
9804 * Also change the definition of WRAP_SPEC so that isspace only
9805 * matches ascii characters, which will never be in the middle
9806 * of a UTF-8 multi-byte character.
9808 if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9809 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9810 switch(c){
9811 default :
9812 if(WRAP_QUOTED(f))
9813 break;
9815 if(f->f2){ /* any non-lwsp to flush? */
9816 if(WRAP_COMMA(f)){
9817 /* remember our second best break point */
9818 WRAP_PB_OFF(f) = f->linep - f->line;
9819 WRAP_PB_LEN(f) = f->f2;
9820 break;
9822 else
9823 wrap_flush(f, &ip, &eib, &op, &eob);
9826 switch(c){ /* remember separator */
9827 case ' ' :
9828 WRAP_SPC_LEN(f)++;
9829 WRAP_TRL_SPC(f) = 1;
9830 so_writec(' ',WRAP_SPACES(f));
9831 break;
9833 case TAB :
9835 int i = (int) f->n + WRAP_SPC_LEN(f);
9838 WRAP_SPC_LEN(f)++;
9839 while(++i & 0x07);
9841 so_writec(TAB,WRAP_SPACES(f));
9842 WRAP_TRL_SPC(f) = 0;
9845 break;
9847 default : /* some control char? */
9848 WRAP_SPC_LEN(f) += 2;
9849 WRAP_TRL_SPC(f) = 0;
9850 break;
9853 continue;
9855 case '\"' :
9856 WRAP_QUOTED(f) = !WRAP_QUOTED(f);
9857 break;
9859 case '\015' : /* already has newline? */
9860 state = CCR;
9861 continue;
9863 case '\012' : /* bare LF in text? */
9864 wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
9865 wrap_eol(f, 0, &ip, &eib, &op, &eob); /* meant */
9866 wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
9867 continue;
9869 case (unsigned char) TAG_EMBED :
9870 WRAP_EMBED_PUTC(f, TAG_EMBED);
9871 WRAP_STATE(f) = state;
9872 state = TAG;
9873 continue;
9875 case ',' :
9876 if(!WRAP_QUOTED(f)){
9877 /* handle this special case in general code below */
9878 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
9879 && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
9880 break;
9882 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
9883 if(WRAP_ALLWSP(f)) /* if anything visible */
9884 wrap_flush(f, &ip, &eib, &op,
9885 &eob); /* ... blat buf'd chars */
9887 wrap_eol(f, 1, &ip, &eib, &op,
9888 &eob); /* plunk down newline */
9889 wrap_bol(f, 1, 1, &ip, &eib, &op,
9890 &eob); /* write any prefix */
9893 WRAP_PUTC(f, ',', 1); /* put out comma */
9894 wrap_flush(f, &ip, &eib, &op,
9895 &eob); /* write buf'd chars */
9896 continue;
9899 break;
9902 else if(WRAP_HANDLE_SOFT_HYPHEN(f)
9903 && (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
9904 && WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
9906 * This is a soft hyphen. If there is enough space for
9907 * a real hyphen to fit on the line here then we can
9908 * flush everything up to before the soft hyphen,
9909 * and simply remember that we saw a soft hyphen.
9910 * If it turns out that we can't fit the next piece in
9911 * then wrap_eol will append a real hyphen to the line.
9912 * If we can fit another piece in it will be because we've
9913 * reached the next break point. At that point we'll flush
9914 * everything but won't include the unneeded hyphen. We erase
9915 * the fact that we saw this soft hyphen because it have
9916 * become irrelevant.
9918 * If the hyphen is the character that puts us over the edge
9919 * we go through the else case.
9922 /* erase this soft hyphen character from buffer */
9923 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
9925 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9926 if(f->f2) /* any non-lwsp to flush? */
9927 wrap_flush(f, &ip, &eib, &op, &eob);
9929 /* remember that we saw the soft hyphen */
9930 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9932 else{
9934 * Everything up to the hyphen fits, otherwise it
9935 * would have already been flushed the last time
9936 * through the loop. But the hyphen won't fit. So
9937 * we need to go back to the last line break and
9938 * break there instead. Then start a new line with
9939 * the buffered up characters and the soft hyphen.
9941 wrap_flush_embed(f, &ip, &eib, &op, &eob);
9942 wrap_eol(f, 1, &ip, &eib, &op,
9943 &eob); /* plunk down newline */
9944 wrap_bol(f,1,1, &ip, &eib, &op,
9945 &eob); /* write any prefix */
9948 * Now we're in the same situation as we would have
9949 * been above except we're on a new line. Try to
9950 * flush out the characters seen up to the hyphen.
9952 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9953 if(f->f2) /* any non-lwsp to flush? */
9954 wrap_flush(f, &ip, &eib, &op, &eob);
9956 /* remember that we saw the soft hyphen */
9957 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9959 else
9960 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9963 continue;
9966 full_character = 0;
9969 unsigned char *inputp;
9970 unsigned long remaining_octets;
9971 UCS ucs;
9973 if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){ /* always true */
9975 *WRAP_UTF8BUFP(f)++ = c;
9976 remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9977 if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
9978 full_character++;
9979 if(c == TAB){
9980 int i = (int) f->n;
9982 while(i & 0x07)
9983 i++;
9985 width = i - f->n;
9987 else if(c < 0x80 && iscntrl((unsigned char) c))
9988 width = 2;
9989 else
9990 width = 1;
9992 else{
9993 inputp = &WRAP_UTF8BUF(f, 0);
9994 ucs = (UCS) utf8_get(&inputp, &remaining_octets);
9995 switch(ucs){
9996 case U8G_ENDSTRG: /* incomplete character, wait */
9997 case U8G_ENDSTRI: /* incomplete character, wait */
9998 width = 0;
9999 break;
10001 default:
10002 if(ucs & U8G_ERROR || ucs == UBOGON){
10004 * None of these cases is supposed to happen. If it
10005 * does happen then the input stream isn't UTF-8
10006 * so something is wrong. Writechar will treat
10007 * each octet in the input buffer as a separate
10008 * error character and print a '?' for each,
10009 * so the width will be the number of octets.
10011 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10012 full_character++;
10014 else{
10015 /* got a character */
10016 width = wcellwidth(ucs);
10017 full_character++;
10019 if(width < 0){
10021 * This happens when we have a UTF-8 character that
10022 * we aren't able to print in our locale. For example,
10023 * if the locale is setup with the terminal
10024 * expecting ISO-8859-1 characters then there are
10025 * lots of UTF-8 characters that can't be printed.
10026 * Print a '?' instead.
10028 width = 1;
10032 break;
10036 else{
10038 * This cannot happen because an error would have
10039 * happened at least by character #6. So if we get
10040 * here there is a bug in utf8_get().
10042 if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
10043 *WRAP_UTF8BUFP(f)++ = c;
10047 * We could possibly do some more sophisticated
10048 * resynchronization here, but we aren't doing
10049 * anything in Writechar so it wouldn't match up
10050 * with that anyway. Just figure each character will
10051 * end up being printed as a ? character.
10053 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10054 full_character++;
10058 if(WRAP_ALLWSP(f)){
10060 * Nothing is visible yet but the first word may be too long
10061 * all by itself. We need to break early.
10063 if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
10065 * A little reaching behind the curtain here.
10066 * if there's at least a preferable break point, use
10067 * it and stuff what's left back into the wrap buffer.
10068 * The "nwsp" latch is used to skip leading whitespace
10069 * The second half of the test prevents us from wrapping
10070 * at the preferred break point in the case that it
10071 * is so early in the line that it doesn't help.
10072 * That is, the width of the indent is even more than
10073 * the width of the first part before the preferred
10074 * break point. An example would be breaking after
10075 * "To:" when the indent is 4 which is > 3.
10077 if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
10078 char *p1 = f->line + WRAP_PB_OFF(f);
10079 char *p2 = f->linep;
10080 char c2;
10081 int nwsp = 0, left_after_wrap;
10083 left_after_wrap = f->f2 - WRAP_PB_LEN(f);
10085 f->f2 = WRAP_PB_LEN(f);
10086 f->linep = p1;
10088 wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
10090 /* put back rest of characters */
10091 while(p1 < p2){
10092 c2 = *p1++;
10093 if(!(c2 == ' ' || c2 == '\t') || nwsp){
10094 WRAP_PUTC(f, c2, 0);
10095 nwsp = 1;
10097 else
10098 left_after_wrap--; /* wrong if a tab! */
10101 f->f2 = MAX(left_after_wrap, 0);
10103 wrap_eol(f, 1, &ip, &eib, &op,
10104 &eob); /* plunk down newline */
10105 wrap_bol(f,1,1, &ip, &eib, &op,
10106 &eob); /* write any prefix */
10109 * What's this for?
10110 * If we do the less preferable break point at
10111 * the space we don't want to lose the fact that
10112 * we might be able to break at this comma for
10113 * the next one.
10115 if(full_character && c == ','){
10116 WRAP_PUTC(f, c, 1);
10117 wrap_flush(f, &ip, &eib, &op, &eob);
10118 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10121 else{
10122 wrap_flush(f, &ip, &eib, &op, &eob);
10124 wrap_eol(f, 1, &ip, &eib, &op,
10125 &eob); /* plunk down newline */
10126 wrap_bol(f,1,1, &ip, &eib, &op,
10127 &eob); /* write any prefix */
10131 else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10132 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10133 wrap_eol(f, 1, &ip, &eib, &op,
10134 &eob); /* plunk down newline */
10135 wrap_bol(f,1,1, &ip, &eib, &op,
10136 &eob); /* write any prefix */
10140 * Commit entire multibyte UTF-8 character at once
10141 * instead of writing partial characters into the
10142 * buffer.
10144 if(full_character){
10145 unsigned char *q;
10147 for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10148 WRAP_PUTC(f, *q, width);
10149 width = 0;
10152 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10155 break;
10157 case_tag :
10158 case TAG :
10159 WRAP_EMBED_PUTC(f, c);
10160 switch(c){
10161 case TAG_HANDLE :
10162 WRAP_EMBED(f) = -1;
10163 state = HANDLE;
10164 break;
10166 case TAG_FGCOLOR :
10167 case TAG_BGCOLOR :
10168 WRAP_EMBED(f) = RGBLEN;
10169 state = HDATA;
10170 break;
10172 default :
10173 state = WRAP_STATE(f);
10174 break;
10177 break;
10179 case HANDLE :
10180 WRAP_EMBED_PUTC(f, c);
10181 WRAP_EMBED(f) = c;
10182 state = HDATA;
10183 break;
10185 case HDATA :
10186 if(f->f2){
10187 WRAP_PUTC(f, c, 0);
10189 else
10190 so_writec(c, WRAP_SPACES(f));
10192 if(!(WRAP_EMBED(f) -= 1)){
10193 state = WRAP_STATE(f);
10196 break;
10200 f->f1 = state;
10201 GF_END(f, f->next);
10203 else if(flg == GF_EOD){
10204 wrap_flush(f, &ip, &eib, &op, &eob);
10205 if(WRAP_COLOR(f))
10206 free_color_pair(&WRAP_COLOR(f));
10208 fs_give((void **) &f->line); /* free temp line buffer */
10209 so_give(&WRAP_SPACES(f));
10210 fs_give((void **) &f->opt); /* free wrap widths struct */
10211 (void) GF_FLUSH(f->next);
10212 (*f->next->f)(f->next, GF_EOD);
10214 else if(flg == GF_RESET){
10215 dprint((9, "-- gf_reset wrap\n"));
10216 f->f1 = BOL;
10217 f->n = 0L; /* displayed length of line so far */
10218 f->f2 = 0; /* displayed length of buffered chars */
10219 WRAP_HARD(f) = 1; /* starting at beginning of line */
10220 if(! (WRAP_S *) f->opt)
10221 f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10223 while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10224 WRAP_INDENT(f) /= 2;
10226 f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10227 f->linep = f->line;
10228 WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10230 for(i = 0; i < 256; i++)
10231 ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10232 || i == '\015'
10233 || i == '\012'
10234 || (i == (unsigned char) TAG_EMBED
10235 && WRAP_TAGS(f))
10236 || (i == ',' && WRAP_COMMA(f)
10237 && !WRAP_QUOTED(f))
10238 || ASCII_ISSPACE(i));
10239 WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10240 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10245 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10246 unsigned char **opp, unsigned char **eobp)
10248 register char *s;
10249 register int n;
10251 s = (char *)so_text(WRAP_SPACES(f));
10252 n = so_tell(WRAP_SPACES(f));
10253 so_seek(WRAP_SPACES(f), 0L, 0);
10254 wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10255 so_truncate(WRAP_SPACES(f), 0L);
10256 WRAP_SPC_LEN(f) = 0;
10257 WRAP_TRL_SPC(f) = 0;
10258 s = f->line;
10259 n = f->linep - f->line;
10260 wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10261 f->f2 = 0;
10262 f->linep = f->line;
10263 WRAP_PB_OFF(f) = 0;
10264 WRAP_PB_LEN(f) = 0;
10266 return 0;
10270 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10272 register char *s;
10273 register int n;
10274 s = (char *)so_text(WRAP_SPACES(f));
10275 n = so_tell(WRAP_SPACES(f));
10276 so_seek(WRAP_SPACES(f), 0L, 0);
10277 wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10278 so_truncate(WRAP_SPACES(f), 0L);
10279 WRAP_SPC_LEN(f) = 0;
10280 WRAP_TRL_SPC(f) = 0;
10282 return 0;
10286 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10287 unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10289 f->n += w;
10291 for(; n > 0; n--,s++){
10292 if(*s == TAG_EMBED){
10293 if(n-- > 0){
10294 switch(*++s){
10295 case TAG_BOLDON :
10296 GF_PUTC_GLO(f->next,TAG_EMBED);
10297 GF_PUTC_GLO(f->next,TAG_BOLDON);
10298 WRAP_BOLD(f) = 1;
10299 break;
10300 case TAG_BOLDOFF :
10301 GF_PUTC_GLO(f->next,TAG_EMBED);
10302 GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10303 WRAP_BOLD(f) = 0;
10304 break;
10305 case TAG_ULINEON :
10306 GF_PUTC_GLO(f->next,TAG_EMBED);
10307 GF_PUTC_GLO(f->next,TAG_ULINEON);
10308 WRAP_ULINE(f) = 1;
10309 break;
10310 case TAG_ULINEOFF :
10311 GF_PUTC_GLO(f->next,TAG_EMBED);
10312 GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10313 WRAP_ULINE(f) = 0;
10314 break;
10315 case TAG_INVOFF :
10316 GF_PUTC_GLO(f->next,TAG_EMBED);
10317 GF_PUTC_GLO(f->next,TAG_INVOFF);
10318 WRAP_ANCHOR(f) = 0;
10319 break;
10320 case TAG_HANDLE :
10321 if((flags & WFE_CNT_HANDLE) == 0)
10322 GF_PUTC_GLO(f->next,TAG_EMBED);
10324 if(n-- > 0){
10325 int i = *++s;
10327 if((flags & WFE_CNT_HANDLE) == 0)
10328 GF_PUTC_GLO(f->next, TAG_HANDLE);
10330 if(i <= n){
10331 n -= i;
10333 if((flags & WFE_CNT_HANDLE) == 0)
10334 GF_PUTC_GLO(f->next, i);
10336 WRAP_ANCHOR(f) = 0;
10337 while(i-- > 0){
10338 WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10340 if((flags & WFE_CNT_HANDLE) == 0)
10341 GF_PUTC_GLO(f->next,*s);
10346 break;
10347 case TAG_FGCOLOR :
10348 if(pico_usingcolor() && n >= RGBLEN){
10349 int i;
10350 GF_PUTC_GLO(f->next,TAG_EMBED);
10351 GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10352 if(!WRAP_COLOR(f))
10353 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10354 strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10355 WRAP_COLOR(f)->fg[RGBLEN]='\0';
10356 i = RGBLEN;
10357 n -= i;
10358 while(i-- > 0)
10359 GF_PUTC_GLO(f->next,
10360 (*++s) & 0xff);
10362 break;
10363 case TAG_BGCOLOR :
10364 if(pico_usingcolor() && n >= RGBLEN){
10365 int i;
10366 GF_PUTC_GLO(f->next,TAG_EMBED);
10367 GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10368 if(!WRAP_COLOR(f))
10369 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10370 strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10371 WRAP_COLOR(f)->bg[RGBLEN]='\0';
10372 i = RGBLEN;
10373 n -= i;
10374 while(i-- > 0)
10375 GF_PUTC_GLO(f->next,
10376 (*++s) & 0xff);
10378 break;
10379 default :
10380 break;
10384 else if(w){
10386 if(f->n <= WRAP_MAX_COL(f)){
10387 GF_PUTC_GLO(f->next, (*s) & 0xff);
10389 else{
10390 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10393 WRAP_ALLWSP(f) = 0;
10397 return 0;
10401 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10402 unsigned char **opp, unsigned char **eobp)
10404 if(WRAP_SAW_SOFT_HYPHEN(f)){
10405 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10406 GF_PUTC_GLO(f->next, '-'); /* real hyphen */
10409 if(c && WRAP_LV_FLD(f))
10410 GF_PUTC_GLO(f->next, ' ');
10412 if(WRAP_BOLD(f)){
10413 GF_PUTC_GLO(f->next, TAG_EMBED);
10414 GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10417 if(WRAP_ULINE(f)){
10418 GF_PUTC_GLO(f->next, TAG_EMBED);
10419 GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10422 if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10423 GF_PUTC_GLO(f->next, TAG_EMBED);
10424 GF_PUTC_GLO(f->next, TAG_INVOFF);
10427 if(WRAP_COLOR_SET(f)){
10428 char *p;
10429 char cb[RGBLEN+1];
10430 GF_PUTC_GLO(f->next, TAG_EMBED);
10431 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10432 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10433 cb[sizeof(cb)-1] = '\0';
10434 p = cb;
10435 for(; *p; p++)
10436 GF_PUTC_GLO(f->next, *p);
10437 GF_PUTC_GLO(f->next, TAG_EMBED);
10438 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10439 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10440 cb[sizeof(cb)-1] = '\0';
10441 p = cb;
10442 for(; *p; p++)
10443 GF_PUTC_GLO(f->next, *p);
10446 GF_PUTC_GLO(f->next, '\015');
10447 GF_PUTC_GLO(f->next, '\012');
10448 f->n = 0L;
10449 so_truncate(WRAP_SPACES(f), 0L);
10450 WRAP_SPC_LEN(f) = 0;
10451 WRAP_TRL_SPC(f) = 0;
10453 return 0;
10457 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10458 unsigned char **opp, unsigned char **eobp)
10460 int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10462 if(WRAP_HDR_CLR(f)){
10463 char *p;
10464 char cbuf[RGBLEN+1];
10465 int k;
10467 if((k = WRAP_MARG_L(f)) > 0)
10468 while(k-- > 0){
10469 n--;
10470 f->n++;
10471 GF_PUTC_GLO(f->next, ' ');
10474 GF_PUTC_GLO(f->next, TAG_EMBED);
10475 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10476 strncpy(cbuf,
10477 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10478 sizeof(cbuf));
10479 cbuf[sizeof(cbuf)-1] = '\0';
10480 p = cbuf;
10481 for(; *p; p++)
10482 GF_PUTC_GLO(f->next, *p);
10483 GF_PUTC_GLO(f->next, TAG_EMBED);
10484 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10485 strncpy(cbuf,
10486 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10487 sizeof(cbuf));
10488 cbuf[sizeof(cbuf)-1] = '\0';
10489 p = cbuf;
10490 for(; *p; p++)
10491 GF_PUTC_GLO(f->next, *p);
10494 while(n-- > 0){
10495 f->n++;
10496 GF_PUTC_GLO(f->next, ' ');
10499 WRAP_ALLWSP(f) = 1;
10501 if(q)
10502 wrap_quote_insert(f, ipp, eibp, opp, eobp);
10504 if(WRAP_BOLD(f)){
10505 GF_PUTC_GLO(f->next, TAG_EMBED);
10506 GF_PUTC_GLO(f->next, TAG_BOLDON);
10508 if(WRAP_ULINE(f)){
10509 GF_PUTC_GLO(f->next, TAG_EMBED);
10510 GF_PUTC_GLO(f->next, TAG_ULINEON);
10512 if(WRAP_INVERSE(f)){
10513 GF_PUTC_GLO(f->next, TAG_EMBED);
10514 GF_PUTC_GLO(f->next, TAG_INVON);
10516 if(WRAP_COLOR_SET(f)){
10517 char *p;
10518 if(WRAP_COLOR(f)->fg[0]){
10519 char cb[RGBLEN+1];
10520 GF_PUTC_GLO(f->next, TAG_EMBED);
10521 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10522 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10523 cb[sizeof(cb)-1] = '\0';
10524 p = cb;
10525 for(; *p; p++)
10526 GF_PUTC_GLO(f->next, *p);
10528 if(WRAP_COLOR(f)->bg[0]){
10529 char cb[RGBLEN+1];
10530 GF_PUTC_GLO(f->next, TAG_EMBED);
10531 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10532 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10533 cb[sizeof(cb)-1] = '\0';
10534 p = cb;
10535 for(; *p; p++)
10536 GF_PUTC_GLO(f->next, *p);
10539 if(WRAP_ANCHOR(f)){
10540 char buf[64]; int i;
10541 GF_PUTC_GLO(f->next, TAG_EMBED);
10542 GF_PUTC_GLO(f->next, TAG_HANDLE);
10543 snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10544 GF_PUTC_GLO(f->next, (int) strlen(buf));
10545 for(i = 0; buf[i]; i++)
10546 GF_PUTC_GLO(f->next, buf[i]);
10549 return 0;
10553 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10554 unsigned char **opp, unsigned char **eobp)
10556 int j, i;
10557 COLOR_PAIR *col = NULL;
10558 char *prefix = NULL, *last_prefix = NULL;
10560 if(ps_global->VAR_QUOTE_REPLACE_STRING){
10561 get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10562 if(!prefix && last_prefix){
10563 prefix = last_prefix;
10564 last_prefix = NULL;
10568 for(j = 0; j < WRAP_FL_QD(f); j++){
10569 if(WRAP_USE_CLR(f)){
10570 if((j % 3) == 0
10571 && ps_global->VAR_QUOTE1_FORE_COLOR
10572 && ps_global->VAR_QUOTE1_BACK_COLOR
10573 && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10574 ps_global->VAR_QUOTE1_BACK_COLOR))
10575 && pico_is_good_colorpair(col)){
10576 GF_COLOR_PUTC(f, col);
10578 else if((j % 3) == 1
10579 && ps_global->VAR_QUOTE2_FORE_COLOR
10580 && ps_global->VAR_QUOTE2_BACK_COLOR
10581 && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10582 ps_global->VAR_QUOTE2_BACK_COLOR))
10583 && pico_is_good_colorpair(col)){
10584 GF_COLOR_PUTC(f, col);
10586 else if((j % 3) == 2
10587 && ps_global->VAR_QUOTE3_FORE_COLOR
10588 && ps_global->VAR_QUOTE3_BACK_COLOR
10589 && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10590 ps_global->VAR_QUOTE3_BACK_COLOR))
10591 && pico_is_good_colorpair(col)){
10592 GF_COLOR_PUTC(f, col);
10594 if(col){
10595 free_color_pair(&col);
10596 col = NULL;
10600 if(!WRAP_LV_FLD(f)){
10601 if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10602 for(i = 0; prefix[i]; i++)
10603 GF_PUTC_GLO(f->next, prefix[i]);
10604 f->n += utf8_width(prefix);
10606 else if(ps_global->VAR_REPLY_STRING
10607 && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10608 || !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10609 GF_PUTC_GLO(f->next, '>');
10610 f->n += 1;
10612 else{
10613 GF_PUTC_GLO(f->next, '>');
10614 GF_PUTC_GLO(f->next, ' ');
10615 f->n += 2;
10618 else{
10619 GF_PUTC_GLO(f->next, '>');
10620 f->n += 1;
10623 if(j && WRAP_LV_FLD(f)){
10624 GF_PUTC_GLO(f->next, ' ');
10625 f->n++;
10627 else if(j && last_prefix){
10628 for(i = 0; last_prefix[i]; i++)
10629 GF_PUTC_GLO(f->next, last_prefix[i]);
10630 f->n += utf8_width(last_prefix);
10633 if(prefix)
10634 fs_give((void **)&prefix);
10635 if(last_prefix)
10636 fs_give((void **)&last_prefix);
10638 return 0;
10643 * function called from the outside to set
10644 * wrap filter's width option
10646 void *
10647 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10649 WRAP_S *wrap;
10651 /* NOTE: variables MUST be sanity checked before they get here */
10652 wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10653 memset(wrap, 0, sizeof(WRAP_S));
10654 wrap->wrap_col = width;
10655 wrap->wrap_max = width_max;
10656 wrap->indent = indent;
10657 wrap->margin_l = (margin) ? margin[0] : 0;
10658 wrap->margin_r = (margin) ? margin[1] : 0;
10659 wrap->tags = (GFW_HANDLES & flags) == GFW_HANDLES;
10660 wrap->on_comma = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10661 wrap->flowed = (GFW_FLOWED & flags) == GFW_FLOWED;
10662 wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10663 wrap->delsp = (GFW_DELSP & flags) == GFW_DELSP;
10664 wrap->use_color = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10665 wrap->hdr_color = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10666 wrap->for_compose = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10667 wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10669 return((void *) wrap);
10673 void *
10674 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10676 if(uh){
10677 memset(uh, 0, sizeof(URL_HILITE_S));
10678 uh->handlesp = handlesp;
10679 uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10682 return((void *) uh);
10686 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10687 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10688 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10690 typedef struct preflow_s {
10691 int quote_depth,
10692 quote_count,
10693 sig;
10694 } PREFLOW_S;
10697 * This would normally be handled in gf_wrap. If there is a possibility
10698 * that a url we want to recognize is cut in half by a soft newline we
10699 * want to fix that up by putting the halves back together. We do that
10700 * by deleting the soft newline and putting it all in one line. It will
10701 * still get wrapped later in gf_wrap. It isn't pretty with all the
10702 * goto's, but whatta ya gonna do?
10704 void
10705 gf_preflow(FILTER_S *f, int flg)
10707 GF_INIT(f, f->next);
10709 if(flg == GF_DATA){
10710 register unsigned char c;
10711 register int state = f->f1;
10712 register int pending = f->f2;
10714 while(GF_GETC(f, c)){
10715 switch(state){
10716 case DFL:
10717 default_case:
10718 switch(c){
10719 case ' ':
10720 state = WSPACE;
10721 break;
10723 case '\015':
10724 state = CCR;
10725 break;
10727 default:
10728 GF_PUTC(f->next, c);
10729 break;
10732 break;
10734 case CCR:
10735 switch(c){
10736 case '\012':
10737 pending = 1;
10738 state = BOL;
10739 break;
10741 default:
10742 GF_PUTC(f->next, '\012');
10743 state = DFL;
10744 goto default_case;
10745 break;
10748 break;
10750 case WSPACE:
10751 switch(c){
10752 case '\015':
10753 state = SPACECR;
10754 break;
10756 default:
10757 GF_PUTC(f->next, ' ');
10758 state = DFL;
10759 goto default_case;
10760 break;
10763 break;
10765 case SPACECR:
10766 switch(c){
10767 case '\012':
10768 pending = 2;
10769 state = BOL;
10770 break;
10772 default:
10773 GF_PUTC(f->next, ' ');
10774 GF_PUTC(f->next, '\012');
10775 state = DFL;
10776 goto default_case;
10777 break;
10780 break;
10782 case BOL:
10783 PF_QC(f) = 0;
10784 if(c == '>'){ /* count quote level */
10785 PF_QC(f)++;
10786 state = FL_QLEV;
10788 else{
10789 done_counting_quotes:
10790 if(c == ' '){ /* eat stuffed space */
10791 state = FL_STF;
10792 break;
10795 done_with_stuffed_space:
10796 if(c == '-'){ /* look for signature */
10797 PF_SIG(f) = 1;
10798 state = FL_SIG;
10799 break;
10802 done_with_sig:
10803 if(pending == 2){
10804 if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10805 /* delete pending */
10807 PF_QD(f) = PF_QC(f);
10809 /* suppress quotes, too */
10810 PF_QC(f) = 0;
10812 else{
10814 * This should have been a hard new line
10815 * instead so leave out the trailing space.
10817 GF_PUTC(f->next, '\015');
10818 GF_PUTC(f->next, '\012');
10820 PF_QD(f) = PF_QC(f);
10823 else if(pending == 1){
10824 GF_PUTC(f->next, '\015');
10825 GF_PUTC(f->next, '\012');
10826 PF_QD(f) = PF_QC(f);
10828 else{
10829 PF_QD(f) = PF_QC(f);
10832 pending = 0;
10833 state = DFL;
10834 while(PF_QC(f)-- > 0)
10835 GF_PUTC(f->next, '>');
10837 switch(PF_SIG(f)){
10838 case 0:
10839 default:
10840 break;
10842 case 1:
10843 GF_PUTC(f->next, '-');
10844 break;
10846 case 2:
10847 GF_PUTC(f->next, '-');
10848 GF_PUTC(f->next, '-');
10849 break;
10851 case 3:
10852 case 4:
10853 GF_PUTC(f->next, '-');
10854 GF_PUTC(f->next, '-');
10855 GF_PUTC(f->next, ' ');
10856 break;
10859 PF_SIG(f) = 0;
10860 goto default_case; /* to handle c */
10863 break;
10865 case FL_QLEV: /* count quote level */
10866 if(c == '>')
10867 PF_QC(f)++;
10868 else
10869 goto done_counting_quotes;
10871 break;
10873 case FL_STF: /* eat stuffed space */
10874 goto done_with_stuffed_space;
10875 break;
10877 case FL_SIG: /* deal with sig indicator */
10878 switch(PF_SIG(f)){
10879 case 1: /* saw '-' */
10880 if(c == '-')
10881 PF_SIG(f) = 2;
10882 else
10883 goto done_with_sig;
10885 break;
10887 case 2: /* saw '--' */
10888 if(c == ' ')
10889 PF_SIG(f) = 3;
10890 else
10891 goto done_with_sig;
10893 break;
10895 case 3: /* saw '-- ' */
10896 if(c == '\015')
10897 PF_SIG(f) = 4; /* it really is a sig line */
10899 goto done_with_sig;
10900 break;
10903 break;
10907 f->f1 = state;
10908 f->f2 = pending;
10909 GF_END(f, f->next);
10911 else if(flg == GF_EOD){
10912 fs_give((void **) &f->opt);
10913 (void) GF_FLUSH(f->next);
10914 (*f->next->f)(f->next, GF_EOD);
10916 else if(flg == GF_RESET){
10917 PREFLOW_S *pf;
10919 pf = (PREFLOW_S *) fs_get(sizeof(*pf));
10920 memset(pf, 0, sizeof(*pf));
10921 f->opt = (void *) pf;
10923 f->f1 = BOL; /* state */
10924 f->f2 = 0; /* pending */
10925 PF_QD(f) = 0; /* quote depth */
10926 PF_QC(f) = 0; /* quote count */
10927 PF_SIG(f) = 0; /* sig level */
10935 * LINE PREFIX FILTER - insert given text at beginning of each
10936 * line
10940 #define GF_PREFIX_WRITE(s) { \
10941 register char *p; \
10942 if((p = (s)) != NULL) \
10943 while(*p) \
10944 GF_PUTC(f->next, *p++); \
10949 * the simple filter, prepends each line with the requested prefix.
10950 * if prefix is null, does nothing, and as with all filters, assumes
10951 * NVT end of lines.
10953 void
10954 gf_prefix(FILTER_S *f, int flg)
10956 GF_INIT(f, f->next);
10958 if(flg == GF_DATA){
10959 register unsigned char c;
10960 register int state = f->f1;
10961 register int first = f->f2;
10963 while(GF_GETC(f, c)){
10965 if(first){ /* write initial prefix!! */
10966 first = 0; /* but just once */
10967 GF_PREFIX_WRITE((char *) f->opt);
10971 * State == 0 is the starting state and the usual state.
10972 * State == 1 means we saw a CR and haven't acted on it yet.
10973 * We are looking for a LF to get the CRLF end of line.
10974 * However, we also treat bare CR and bare LF as if they
10975 * were CRLF sequences. What else could it mean in text?
10976 * This filter is only used for text so that is probably
10977 * a reasonable interpretation of the bad input.
10979 if(c == '\015'){ /* CR */
10980 if(state){ /* Treat pending CR as endofline, */
10981 GF_PUTC(f->next, '\015'); /* and remain in saw-a-CR state. */
10982 GF_PUTC(f->next, '\012');
10983 GF_PREFIX_WRITE((char *) f->opt);
10985 else{
10986 state = 1;
10989 else if(c == '\012'){ /* LF */
10990 GF_PUTC(f->next, '\015'); /* Got either a CRLF or a bare LF, */
10991 GF_PUTC(f->next, '\012'); /* treat both as if a CRLF. */
10992 GF_PREFIX_WRITE((char *) f->opt);
10993 state = 0;
10995 else{ /* any other character */
10996 if(state){
10997 GF_PUTC(f->next, '\015'); /* Treat pending CR as endofline. */
10998 GF_PUTC(f->next, '\012');
10999 GF_PREFIX_WRITE((char *) f->opt);
11000 state = 0;
11003 GF_PUTC(f->next, c);
11007 f->f1 = state; /* save state for next chunk of data */
11008 f->f2 = first;
11009 GF_END(f, f->next);
11011 else if(flg == GF_EOD){
11012 (void) GF_FLUSH(f->next);
11013 (*f->next->f)(f->next, GF_EOD);
11015 else if(flg == GF_RESET){
11016 dprint((9, "-- gf_reset prefix\n"));
11017 f->f1 = 0;
11018 f->f2 = 1; /* nothing written yet */
11024 * function called from the outside to set
11025 * prefix filter's prefix string
11027 void *
11028 gf_prefix_opt(char *prefix)
11030 return((void *) prefix);
11035 * LINE TEST FILTER - accumulate lines and offer each to the provided
11036 * test function.
11039 typedef struct _linetest_s {
11040 linetest_t f;
11041 void *local;
11042 } LINETEST_S;
11045 /* accumulator growth increment */
11046 #define LINE_TEST_BLOCK 1024
11048 #define GF_LINE_TEST_EOB(f) \
11049 ((f)->line + ((f)->f2 - 1))
11051 #define GF_LINE_TEST_ADD(f, c) \
11053 if(p >= eobuf){ \
11054 f->f2 += LINE_TEST_BLOCK; \
11055 fs_resize((void **)&f->line, \
11056 (size_t) f->f2 * sizeof(char)); \
11057 eobuf = GF_LINE_TEST_EOB(f); \
11058 p = eobuf - LINE_TEST_BLOCK; \
11060 *p++ = c; \
11063 #define GF_LINE_TEST_TEST(F, D) \
11065 unsigned char c; \
11066 register char *cp; \
11067 register int l; \
11068 LT_INS_S *ins = NULL, *insp; \
11069 *p = '\0'; \
11070 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11071 (F)->line, &ins, \
11072 ((LINETEST_S *) (F)->opt)->local); \
11073 if((D) < 2){ \
11074 if((D) < 0){ \
11075 if((F)->line) \
11076 fs_give((void **) &(F)->line); \
11077 if((F)->opt) \
11078 fs_give((void **) &(F)->opt); \
11079 gf_error(_("translation error")); \
11080 /* NO RETURN */ \
11082 for(insp = ins, cp = (F)->line; cp < p; ){ \
11083 if(insp && cp == insp->where){ \
11084 if(insp->len > 0){ \
11085 for(l = 0; l < insp->len; l++){ \
11086 c = (unsigned char) insp->text[l]; \
11087 GF_PUTC((F)->next, c); \
11089 insp = insp->next; \
11090 continue; \
11091 } else if(insp->len < 0){ \
11092 cp -= insp->len; \
11093 insp = insp->next; \
11094 continue; \
11097 GF_PUTC((F)->next, *cp); \
11098 cp++; \
11100 while(insp){ \
11101 for(l = 0; l < insp->len; l++){ \
11102 c = (unsigned char) insp->text[l]; \
11103 GF_PUTC((F)->next, c); \
11105 insp = insp->next; \
11107 gf_line_test_free_ins(&ins); \
11114 * this simple filter accumulates characters until a newline, offers it
11115 * to the provided test function, and then passes it on. It assumes
11116 * NVT EOLs.
11118 void
11119 gf_line_test(FILTER_S *f, int flg)
11121 register char *p = f->linep;
11122 register char *eobuf = GF_LINE_TEST_EOB(f);
11123 GF_INIT(f, f->next);
11125 if(flg == GF_DATA){
11126 register unsigned char c;
11127 register int state = f->f1;
11129 while(GF_GETC(f, c)){
11131 if(state){
11132 state = 0;
11133 if(c == '\012'){
11134 int done;
11136 GF_LINE_TEST_TEST(f, done);
11138 p = (f)->line;
11140 if(done == 2) /* skip this line! */
11141 continue;
11143 GF_PUTC(f->next, '\015');
11144 GF_PUTC(f->next, '\012');
11146 * if the line tester returns TRUE, it's
11147 * telling us its seen enough and doesn't
11148 * want to see any more. Remove ourself
11149 * from the pipeline...
11151 if(done){
11152 if(gf_master == f){
11153 gf_master = f->next;
11155 else{
11156 FILTER_S *fprev;
11158 for(fprev = gf_master;
11159 fprev && fprev->next != f;
11160 fprev = fprev->next)
11163 if(fprev) /* wha??? */
11164 fprev->next = f->next;
11165 else
11166 continue;
11169 while(GF_GETC(f, c)) /* pass input */
11170 GF_PUTC(f->next, c);
11172 (void) GF_FLUSH(f->next); /* and drain queue */
11173 fs_give((void **)&f->line);
11174 fs_give((void **)&f); /* wax our data */
11175 return;
11177 else
11178 continue;
11180 else /* add CR to buffer */
11181 GF_LINE_TEST_ADD(f, '\015');
11182 } /* fall thru to handle 'c' */
11184 if(c == '\015') /* newline? */
11185 state = 1;
11186 else
11187 GF_LINE_TEST_ADD(f, c);
11190 f->f1 = state;
11191 GF_END(f, f->next);
11193 else if(flg == GF_EOD){
11194 int i;
11196 GF_LINE_TEST_TEST(f, i); /* examine remaining data */
11197 fs_give((void **) &f->line); /* free line buffer */
11198 fs_give((void **) &f->opt); /* free test struct */
11199 (void) GF_FLUSH(f->next);
11200 (*f->next->f)(f->next, GF_EOD);
11202 else if(flg == GF_RESET){
11203 dprint((9, "-- gf_reset line_test\n"));
11204 f->f1 = 0; /* state */
11205 f->n = 0L; /* line number */
11206 f->f2 = LINE_TEST_BLOCK; /* size of alloc'd line */
11207 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11210 f->linep = p;
11215 * function called from the outside to operate on accumulated line.
11217 void *
11218 gf_line_test_opt(linetest_t test_f, void *local)
11220 LINETEST_S *ltp;
11222 ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11223 memset(ltp, 0, sizeof(LINETEST_S));
11224 ltp->f = test_f;
11225 ltp->local = local;
11226 return((void *) ltp);
11231 LT_INS_S **
11232 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11234 *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11235 if(((*ins)->len = n) > 0)
11236 strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11237 else
11238 (*ins)->text = NULL;
11240 (*ins)->where = p;
11241 (*ins)->next = NULL;
11242 return(&(*ins)->next);
11246 void
11247 gf_line_test_free_ins(LT_INS_S **ins)
11249 if(ins && *ins){
11250 if((*ins)->next)
11251 gf_line_test_free_ins(&(*ins)->next);
11253 if((*ins)->text)
11254 fs_give((void **) &(*ins)->text);
11256 fs_give((void **) ins);
11262 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11263 * with editorial comment
11266 typedef struct _preped_s {
11267 prepedtest_t f;
11268 char *text;
11269 } PREPED_S;
11273 * gf_prepend_editorial - accumulate filtered text and prepend its
11274 * output with given text
11278 void
11279 gf_prepend_editorial(FILTER_S *f, int flg)
11281 GF_INIT(f, f->next);
11283 if(flg == GF_DATA){
11284 register unsigned char c;
11286 while(GF_GETC(f, c)){
11287 so_writec(c, (STORE_S *) f->data);
11290 GF_END(f, f->next);
11292 else if(flg == GF_EOD){
11293 unsigned char c;
11295 if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11296 char *p = ((PREPED_S *)(f)->opt)->text;
11298 for( ; p && *p; p++)
11299 GF_PUTC(f->next, *p);
11302 so_seek((STORE_S *) f->data, 0L, 0);
11303 while(so_readc(&c, (STORE_S *) f->data)){
11304 GF_PUTC(f->next, c);
11307 so_give((STORE_S **) &f->data);
11308 fs_give((void **) &f->opt);
11309 (void) GF_FLUSH(f->next);
11310 (*f->next->f)(f->next, GF_EOD);
11312 else if(flg == GF_RESET){
11313 dprint((9, "-- gf_reset line_test\n"));
11314 f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11320 * function called from the outside to setup prepending editorial
11321 * to output text
11323 void *
11324 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11326 PREPED_S *pep;
11328 pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11329 memset(pep, 0, sizeof(PREPED_S));
11330 pep->f = test_f;
11331 pep->text = text;
11332 return((void *) pep);
11337 * Network virtual terminal to local newline convention filter
11339 void
11340 gf_nvtnl_local(FILTER_S *f, int flg)
11342 GF_INIT(f, f->next);
11344 if(flg == GF_DATA){
11345 register unsigned char c;
11346 register int state = f->f1;
11348 while(GF_GETC(f, c)){
11349 if(state){
11350 state = 0;
11351 if(c == '\012'){
11352 GF_PUTC(f->next, '\012');
11353 continue;
11355 else
11356 GF_PUTC(f->next, '\015');
11357 /* fall thru to deal with 'c' */
11360 if(c == '\015')
11361 state = 1;
11362 else
11363 GF_PUTC(f->next, c);
11366 f->f1 = state;
11367 GF_END(f, f->next);
11369 else if(flg == GF_EOD){
11370 (void) GF_FLUSH(f->next);
11371 (*f->next->f)(f->next, GF_EOD);
11373 else if(flg == GF_RESET){
11374 dprint((9, "-- gf_reset nvtnl_local\n"));
11375 f->f1 = 0;
11381 * local to network newline convention filter
11383 void
11384 gf_local_nvtnl(FILTER_S *f, int flg)
11386 GF_INIT(f, f->next);
11388 if(flg == GF_DATA){
11389 register unsigned char c;
11391 while(GF_GETC(f, c)){
11392 if(c == '\012'){
11393 GF_PUTC(f->next, '\015');
11394 GF_PUTC(f->next, '\012');
11396 else if(c != '\015') /* do not copy isolated \015 into source */
11397 GF_PUTC(f->next, c);
11400 GF_END(f, f->next);
11402 else if(flg == GF_EOD){
11403 (void) GF_FLUSH(f->next);
11404 (*f->next->f)(f->next, GF_EOD);
11406 else if(GF_RESET){
11407 dprint((9, "-- gf_reset local_nvtnl\n"));
11408 /* no op */
11413 void
11414 free_filter_module_globals(void)
11416 FILTER_S *flt, *fltn = gf_master;
11418 while((flt = fltn) != NULL){ /* free list of old filters */
11419 fltn = flt->next;
11420 fs_give((void **)&flt);