* More changes to make Valgrind happy. Work in progress.
[alpine.git] / pith / filter.c
blobea98a4eab74e49c0850817ed308edb38466e18ed
1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
5 /*
6 * ========================================================================
7 * Copyright 2013-2018 Eduardo Chappa
8 * Copyright 2006-2008 University of Washington
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
20 filter.c
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
30 call another filter).
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
38 TODO:
39 reasonable error handling
41 ====*/
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
65 * Internal prototypes
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
72 int gf_fwritec(int);
73 int gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
80 int gf_pwritec(int);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
85 int gf_swritec(int);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S *, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S *, int);
97 * System specific options
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S *gf_master = NULL;
115 static gf_io_t last_filter;
116 static char *gf_error_string;
117 static long gf_byte_count;
118 static jmp_buf gf_error_state;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
131 #define DFL 0
132 #define EQUAL 1
133 #define HEX 2
134 #define WSPACE 3
135 #define CCR 4
136 #define CLF 5
137 #define TOKEN 6
138 #define TAG 7
139 #define HANDLE 8
140 #define HDATA 9
141 #define ESC 10
142 #define ESCDOL 11
143 #define ESCPAR 12
144 #define EUC 13
145 #define BOL 14
146 #define FL_QLEV 15
147 #define FL_STF 16
148 #define FL_SIG 17
149 #define STOP_DECODING 18
150 #define SPACECR 19
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
207 char *p; \
208 char cb[RGBLEN+1]; \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
213 p = cb; \
214 for(; *p; p++) \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
220 p = cb; \
221 for(; *p; p++) \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
232 * functions
234 static struct gf_io_struct {
235 FILE *file;
236 PIPE_S *pipe;
237 char *txtp;
238 unsigned long n;
239 int flags;
240 CBUF_S cb;
241 } gf_in, gf_out;
243 #define GF_SO_STACK struct gf_so_stack
244 static GF_SO_STACK {
245 STORE_S *so;
246 GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc)
261 return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 gf_so_out->so->src == ExternalText);
268 * setup to use and return a pointer to the generic
269 * getc function
271 void
272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
274 gf_in.n = len;
275 gf_in.flags = flags;
276 gf_in.cb.cbuf[0] = '\0';
277 gf_in.cb.cbufp = gf_in.cb.cbuf;
278 gf_in.cb.cbufend = gf_in.cb.cbuf;
280 if(src == FileStar){
281 gf_in.file = (FILE *)txt;
282 fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 : gf_freadc;
286 #else /* UNIX */
287 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 : gf_freadc;
289 #endif /* UNIX */
291 else if(src == PipeStar){
292 gf_in.pipe = (PIPE_S *)txt;
293 *gc = gf_preadc;
294 *gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 : gf_preadc;
297 else{
298 gf_in.txtp = (char *)txt;
299 *gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 : gf_sreadc;
306 * setup to use and return a pointer to the generic
307 * putc function
309 void
310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
312 gf_out.n = len;
313 gf_out.flags = flags;
314 gf_out.cb.cbuf[0] = '\0';
315 gf_out.cb.cbufp = gf_out.cb.cbuf;
316 gf_out.cb.cbufend = gf_out.cb.cbuf;
318 if(src == FileStar){
319 gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 *pc = gf_fwritec;
322 #else /* UNIX */
323 *pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 : gf_fwritec;
325 #endif /* UNIX */
327 else if(src == PipeStar){
328 gf_out.pipe = (PIPE_S *)txt;
329 *pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 : gf_pwritec;
332 else{
333 gf_out.txtp = (char *)txt;
334 *pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 : gf_swritec;
341 * setup to use and return a pointer to the generic
342 * getc function
344 void
345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
347 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
349 sp->so = so;
350 sp->next = gf_so_in;
351 gf_so_in = sp;
352 *gc = gf_so_readc;
356 void
357 gf_clear_so_readc(STORE_S *so)
359 GF_SO_STACK *sp;
361 if((sp = gf_so_in) != NULL){
362 if(so == sp->so){
363 gf_so_in = gf_so_in->next;
364 fs_give((void **) &sp);
366 else
367 alpine_panic("Programmer botch: Can't unstack store readc");
369 else
370 alpine_panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
376 * putc function
378 void
379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
381 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
383 sp->so = so;
384 sp->next = gf_so_out;
385 gf_so_out = sp;
386 *pc = gf_so_writec;
390 void
391 gf_clear_so_writec(STORE_S *so)
393 GF_SO_STACK *sp;
395 if((sp = gf_so_out) != NULL){
396 if(so == sp->so){
397 gf_so_out = gf_so_out->next;
398 fs_give((void **) &sp);
400 else
401 alpine_panic("Programmer botch: Can't unstack store writec");
403 else
404 alpine_panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
412 gf_so_writec(int c)
414 return(so_writec(c, gf_so_out->so));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c)
424 return(so_readc(c, gf_so_in->so));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c)
433 int rv = 0;
435 do {
436 errno = 0;
437 clearerr(gf_in.file);
438 rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439 } while(!rv && ferror(gf_in.file) && errno == EINTR);
441 return(rv);
446 gf_freadc_locale(unsigned char *c)
448 return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c, void *extraarg)
458 FILE *file;
459 int rv = 0;
461 file = (FILE *) extraarg;
463 do {
464 errno = 0;
465 clearerr(file);
466 rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467 } while(!rv && ferror(file) && errno == EINTR);
469 return(rv);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
479 gf_fwritec(int c)
481 unsigned char ch = (unsigned char)c;
482 int rv = 0;
485 rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486 while(!rv && ferror(gf_out.file) && errno == EINTR);
488 return(rv);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c)
499 int rv = 1;
500 int i, outchars;
501 unsigned char obuf[MAX(MB_LEN_MAX,32)];
503 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 for(i = 0; i < outchars; i++)
505 if(gf_fwritec(obuf[i]) != 1){
506 rv = 0;
507 break;
511 return(rv);
515 #ifdef _WINDOWS
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c)
527 int rv = 0;
528 UCS ucs;
530 /* already got some from previous call? */
531 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 *c = *gf_in.cb.cbufp;
533 gf_in.cb.cbufp++;
534 rv++;
535 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 gf_in.cb.cbufend = gf_in.cb.cbuf;
537 gf_in.cb.cbufp = gf_in.cb.cbuf;
540 return(rv);
543 if(gf_in.file){
544 /* windows only so second arg is ignored */
545 ucs = read_a_wide_char(gf_in.file, NULL);
546 rv = (ucs == CCONV_EOF) ? 0 : 1;
549 if(rv){
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 gf_in.cb.cbufp = gf_in.cb.cbuf;
556 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 *c = *gf_in.cb.cbufp;
558 gf_in.cb.cbufp++;
559 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 gf_in.cb.cbufend = gf_in.cb.cbuf;
561 gf_in.cb.cbufp = gf_in.cb.cbuf;
564 else
565 *c = '?';
568 return(rv);
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c)
576 return(pipe_readc(c, gf_in.pipe));
581 gf_preadc_locale(unsigned char *c)
583 return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c, void *extraarg)
593 PIPE_S *pipe;
595 pipe = (PIPE_S *) extraarg;
597 return(pipe_readc(c, pipe));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
607 gf_pwritec(int c)
609 return(pipe_writec(c, gf_out.pipe));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c)
620 int rv = 1;
621 int i, outchars;
622 unsigned char obuf[MAX(MB_LEN_MAX,32)];
624 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 for(i = 0; i < outchars; i++)
626 if(gf_pwritec(obuf[i]) != 1){
627 rv = 0;
628 break;
632 return(rv);
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c)
641 return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
646 gf_sreadc_locale(unsigned char *c)
648 return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
669 gf_swritec(int c)
671 return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c)
682 int rv = 1;
683 int i, outchars;
684 unsigned char obuf[MAX(MB_LEN_MAX,32)];
686 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 for(i = 0; i < outchars; i++)
688 if(gf_swritec(obuf[i]) != 1){
689 rv = 0;
690 break;
694 return(rv);
699 * output the given string with the given function
702 gf_puts(register char *s, gf_io_t pc)
704 while(*s != '\0')
705 if(!(*pc)((unsigned char)*s++))
706 return(0); /* ERROR putting char ! */
708 return(1);
713 * output the given string with the given function
716 gf_nputs(register char *s, long int n, gf_io_t pc)
718 while(n--)
719 if(!(*pc)((unsigned char)*s++))
720 return(0); /* ERROR putting char ! */
722 return(1);
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c,
743 int (*get_a_char)(unsigned char *, void *),
744 void *extraarg,
745 CBUF_S *cb)
747 unsigned long octets_so_far = 0, remaining_octets;
748 unsigned char *inputp;
749 unsigned char ch;
750 UCS ucs;
751 unsigned char inputbuf[20];
752 int rv = 0;
753 int got_one = 0;
755 /* already got some from previous call? */
756 if(cb->cbufend > cb->cbuf){
757 *c = *cb->cbufp;
758 cb->cbufp++;
759 rv++;
760 if(cb->cbufp >= cb->cbufend){
761 cb->cbufend = cb->cbuf;
762 cb->cbufp = cb->cbuf;
765 return(rv);
768 memset(inputbuf, 0, sizeof(inputbuf));
769 if((*get_a_char)(&ch, extraarg) == 0)
770 return(0);
772 inputbuf[octets_so_far++] = ch;
774 while(!got_one){
775 remaining_octets = octets_so_far;
776 inputp = inputbuf;
777 ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 switch(ucs){
779 case CCONV_BADCHAR:
780 return(rv);
782 case CCONV_NEEDMORE:
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far >= sizeof(inputbuf))
790 return(rv);
792 if((*get_a_char)(&ch, extraarg) == 0)
793 return(rv);
795 inputbuf[octets_so_far++] = ch;
796 break;
798 default:
799 /* got a good UCS-4 character */
800 got_one++;
801 break;
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
809 rv++;
810 cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811 cb->cbufp = cb->cbuf;
812 if(cb->cbufend > cb->cbuf){
813 *c = *cb->cbufp;
814 cb->cbufp++;
815 if(cb->cbufp >= cb->cbufend){
816 cb->cbufend = cb->cbuf;
817 cb->cbufp = cb->cbuf;
820 else
821 *c = '?';
823 return(rv);
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
834 void
835 gf_filter_init(void)
837 FILTER_S *flt, *fltn = gf_master;
839 while((flt = fltn) != NULL){ /* free list of old filters */
840 fltn = flt->next;
841 fs_give((void **)&flt);
844 gf_master = NULL;
845 gf_error_string = NULL; /* clear previous errors */
846 gf_byte_count = 0L; /* reset counter */
852 * link the given filter into the filter chain
854 void
855 gf_link_filter(filter_t f, void *data)
857 FILTER_S *new, *tail;
859 #ifdef CRLF_NEWLINES
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865 return;
866 #endif
868 new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869 memset(new, 0, sizeof(FILTER_S));
871 new->f = f; /* set the function pointer */
872 new->opt = data; /* set any optional parameter data */
873 (*f)(new, GF_RESET); /* have it setup initial state */
875 if((tail = gf_master) != NULL){ /* or add it to end of existing */
876 while(tail->next) /* list */
877 tail = tail->next;
879 tail->next = new;
881 else /* attach new struct to list */
882 gf_master = new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
890 void
891 gf_terminal(FILTER_S *f, int flg)
893 if(flg == GF_DATA){
894 GF_INIT(f, f);
896 while(op < eob)
897 if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 gf_error(errno ? error_description(errno) : "Error writing pipe");
900 GF_CH_RESET(f);
902 else if(flg == GF_RESET)
903 errno = 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
911 void
912 gf_set_terminal(gf_io_t f) /* function to set generic filter */
915 last_filter = f;
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
923 void
924 gf_error(char *s)
926 /* let the user know the error passed in s */
927 gf_error_string = s;
928 longjmp(gf_error_state, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
938 char *
939 gf_pipe(gf_io_t gc, gf_io_t pc)
940 /* how to get a character */
942 unsigned char c;
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string ? gf_error_string : "NULL"));
952 return(gf_error_string); /* */
956 * set and link in the terminal filter
958 gf_set_terminal(pc);
959 gf_link_filter(gf_terminal, NULL);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
968 * objects.
971 GF_INIT(gf_master, gf_master);
973 while((*gc)(&c)){
974 gf_byte_count++;
976 #ifdef _WINDOWS
977 if(!(gf_byte_count & 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
980 * mechinism.
982 mswin_yield ();
983 #endif
985 GF_PUTC(gf_master, c & 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master);
993 (*gf_master->f)(gf_master, GF_EOD);
996 dprint((4, "done.\n"));
997 return(NULL); /* everything went OK */
1002 * return the number of bytes piped so far
1004 long
1005 gf_bytes_piped(void)
1007 return(gf_byte_count);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1022 char *
1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 void (*pipecb_f)(PIPE_S *, int, void *))
1027 unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028 int flags, outchars, i;
1029 char *errstr = NULL, buf[MAILTMPLEN];
1030 PIPE_S *fpipe;
1031 CBUF_S cb;
1032 #ifdef NON_BLOCKING_IO
1033 int n;
1034 #endif
1036 dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1038 gf_filter_init();
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045 gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1047 for( ; aux_filters && aux_filters->filter; aux_filters++)
1048 gf_link_filter(aux_filters->filter, aux_filters->data);
1050 gf_set_terminal(pc);
1051 gf_link_filter(gf_terminal, NULL);
1053 cb.cbuf[0] = '\0';
1054 cb.cbufp = cb.cbuf;
1055 cb.cbufend = cb.cbuf;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so, 0L, 0);
1061 flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 | (silent ? PIPE_SILENT : 0)
1063 | (!disable_reset ? PIPE_RESET : 0);
1065 if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 errstr = "Can't set up non-blocking IO";
1072 if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 || fputc('\n', fpipe->out.f) == EOF))
1074 errstr = error_description(errno);
1076 while(!errstr){
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 if(!so_readc(&c, source_so)){
1082 fclose(fpipe->out.f);
1083 fpipe->out.f = NULL;
1085 else{
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 for(i = 0; i < outchars && !errstr; i++)
1093 if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 errstr = error_description(errno);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1102 errno = 0;
1103 clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 errstr = gf_filter_puts(buf);
1108 /* then fgets failed! */
1109 if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 if(feof(fpipe->in.f)) /* nothing else interesting! */
1111 break;
1112 else if(ferror(fpipe->in.f)) /* bummer. */
1113 errstr = error_description(errno);
1115 else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 clearerr(fpipe->in.f);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 || pipe_putc('\n', fpipe) == EOF))
1123 errstr = error_description(errno);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr && so_readc(&c, source_so))
1130 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 for(i = 0; i < outchars && !errstr; i++)
1132 if(pipe_putc(obuf[i], fpipe) == EOF)
1133 errstr = error_description(errno);
1135 if(pipe_close_write(fpipe))
1136 errstr = _("Pipe command returned error.");
1138 while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 errstr = gf_filter_puts(buf);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 errstr = _("Pipe command returned error.");
1146 gf_filter_eod();
1148 else
1149 errstr = _("Error setting up pipe command.");
1151 return(errstr);
1156 * gf_filter_puts - write the given string down the filter's pipe
1158 char *
1159 gf_filter_puts(register char *s)
1161 GF_INIT(gf_master, gf_master);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string ? gf_error_string : "NULL"));
1169 return(gf_error_string);
1172 while(*s)
1173 GF_PUTC(gf_master, (*s++) & 0xff);
1175 GF_END(gf_master, gf_master);
1176 return(NULL);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1184 void
1185 gf_filter_eod(void)
1187 GF_INIT(gf_master, gf_master);
1188 (void) GF_FLUSH(gf_master);
1189 (*gf_master->f)(gf_master, GF_EOD);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1211 * via a vector.)
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1217 * void
1218 * gf_xxx_filter(f, flg)
1219 * FILTER_S *f;
1220 * int flg;
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1274 void
1275 gf_binary_b64(FILTER_S *f, int flg)
1277 static char *v =
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f, f->next);
1281 if(flg == GF_DATA){
1282 register unsigned char c;
1283 register unsigned char t = f->t;
1284 register long n = f->n;
1286 while(GF_GETC(f, c)){
1288 switch(n++){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 case 42: case 45:
1292 GF_PUTC(f->next, v[c >> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t = c << 4; /* remember high 2 bits for next */
1295 break;
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 case 43:
1300 GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 t = c << 2;
1302 break;
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 case 44:
1307 GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 GF_PUTC(f->next, v[c & 0x3f]);
1309 break;
1312 if(n == 45){ /* start a new line? */
1313 GF_PUTC(f->next, '\015');
1314 GF_PUTC(f->next, '\012');
1315 n = 0L;
1319 f->n = n;
1320 f->t = t;
1321 GF_END(f, f->next);
1323 else if(flg == GF_EOD){ /* no more data */
1324 switch (f->n % 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1326 break;
1328 case 1:
1329 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 GF_PUTC(f->next, '='); /* byte 3 */
1331 GF_PUTC(f->next, '='); /* byte 4 */
1332 break;
1334 case 2:
1335 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 GF_PUTC(f->next, '='); /* byte 4 */
1337 break;
1340 /* end with CRLF */
1341 if(f->n){
1342 GF_PUTC(f->next, '\015');
1343 GF_PUTC(f->next, '\012');
1346 (void) GF_FLUSH(f->next);
1347 (*f->next->f)(f->next, GF_EOD);
1349 else if(flg == GF_RESET){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1351 f->n = 0L;
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1360 void
1361 gf_b64_binary(FILTER_S *f, int flg)
1363 static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f, f->next);
1373 if(flg == GF_DATA){
1374 register unsigned char c;
1375 register unsigned char t = f->t;
1376 register int n = (int) f->n;
1377 register int state = f->f1;
1379 while(GF_GETC(f, c)){
1381 if(state){
1382 state = 0;
1383 if (c != '=') {
1384 gf_error("Illegal '=' in base64 text");
1385 /* NO RETURN */
1389 /* in range, and a valid value? */
1390 if((c & ~0x7f) || (c = v[c]) > 63){
1391 if(c == 64){
1392 switch (n++) { /* check quantum position */
1393 case 2:
1394 state++; /* expect an equal as next char */
1395 break;
1397 case 3:
1398 n = 0L; /* restart quantum */
1399 break;
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1403 /* NO RETURN */
1407 else{
1408 switch (n++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1410 t = c << 2;
1411 break;
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f->next, (t|(c >> 4)));
1415 t = c << 4; /* byte 2: high 4 bits */
1416 break;
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f->next, (t|(c >> 2)));
1420 t = c << 6; /* byte 3: high 2 bits */
1421 break;
1423 case 3:
1424 GF_PUTC(f->next, t | c);
1425 n = 0L; /* reinitialize mechanism */
1426 break;
1431 f->f1 = state;
1432 f->t = t;
1433 f->n = n;
1434 GF_END(f, f->next);
1436 else if(flg == GF_EOD){
1437 (void) GF_FLUSH(f->next);
1438 (*f->next->f)(f->next, GF_EOD);
1440 else if(flg == GF_RESET){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f->n = 0L; /* quantum position */
1443 f->f1 = 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1461 if((c) == ' '){ \
1462 state = WSPACE; \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1468 state = EQUAL; \
1470 else \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1478 void
1479 gf_qp_8bit(FILTER_S *f, int flg)
1482 GF_INIT(f, f->next);
1484 if(flg == GF_DATA){
1485 register unsigned char c;
1486 register int state = f->f1;
1488 while(GF_GETC(f, c)){
1490 switch(state){
1491 case DFL : /* default case */
1492 default:
1493 GF_QP_DEFAULT(f, c);
1494 break;
1496 case CCR : /* non-significant space */
1497 state = DFL;
1498 if(c == '\012')
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f, c);
1502 break;
1504 case EQUAL :
1505 if(c == '\015'){ /* "=\015" is a soft EOL */
1506 state = CCR;
1507 break;
1510 if(c == '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f->next, '=');
1512 state = DFL;
1513 break;
1516 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1534 * below, as well.
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state = STOP_DECODING;
1540 GF_PUTC(f->next, '=');
1541 GF_PUTC(f->next, c);
1542 q_status_message(SM_ORDER,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 break;
1549 if (isdigit ((unsigned char)c))
1550 f->t = c - '0';
1551 else
1552 f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1554 f->f2 = c; /* store character in case we have to
1555 back out in !isxdigit below */
1557 state = HEX;
1558 break;
1560 case HEX :
1561 state = DFL;
1562 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1563 state = STOP_DECODING;
1564 GF_PUTC(f->next, '=');
1565 GF_PUTC(f->next, f->f2);
1566 GF_PUTC(f->next, c);
1567 q_status_message(SM_ORDER,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 break;
1574 if (isdigit((unsigned char)c))
1575 c -= '0';
1576 else
1577 c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f->next, c + (f->t << 4));
1580 break;
1582 case WSPACE :
1583 if(c == ' '){ /* toss it in with other spaces */
1584 if(f->linep - f->line < GF_MAXLINE)
1585 *(f->linep)++ = ' ';
1586 break;
1589 state = DFL;
1590 if(c == '\015'){ /* not our white space! */
1591 f->linep = f->line; /* reset buffer */
1592 GF_PUTC(f->next, '\015');
1593 break;
1596 /* the spaces are ours, write 'em */
1597 f->n = f->linep - f->line;
1598 while((f->n)--)
1599 GF_PUTC(f->next, ' ');
1601 GF_QP_DEFAULT(f, c); /* take care of 'c' in default way */
1602 break;
1604 case STOP_DECODING :
1605 GF_PUTC(f->next, c);
1606 break;
1610 f->f1 = state;
1611 GF_END(f, f->next);
1613 else if(flg == GF_EOD){
1614 fs_give((void **)&(f->line));
1615 (void) GF_FLUSH(f->next);
1616 (*f->next->f)(f->next, GF_EOD);
1618 else if(flg == GF_RESET){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1620 f->f1 = DFL;
1621 f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1648 GF_8BIT_WRAP(f); \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658 * if needed.
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1662 GF_8BIT_WRAP(f); \
1663 f->n = 1L; \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1667 f->n = 3; \
1669 else \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1678 state = WSPACE; \
1680 else if(c == '\015'){ \
1681 state = CCR; \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1687 else{ \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1695 void
1696 gf_8bit_qp(FILTER_S *f, int flg)
1698 short dummy_dots = 0, dummy_dmap = 1;
1699 GF_INIT(f, f->next);
1701 if(flg == GF_DATA){
1702 register unsigned char c;
1703 register int state = f->f1;
1705 while(GF_GETC(f, c)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1710 switch(state){
1711 case DFL : /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f, c);
1713 break;
1715 case CCR : /* true line break? */
1716 state = DFL;
1717 if(c == '\012'){
1718 GF_PUTC(f->next, '\015');
1719 GF_PUTC(f->next, '\012');
1720 f->n = 0L;
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f, '\015');
1724 GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1726 break;
1728 case WSPACE:
1729 state = DFL;
1730 if(c == '\015' || f->t){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f, ' ');
1732 f->t = 0; /* reset From flag */
1734 else
1735 GF_8BIT_PUT(f, ' ');
1737 GF_8BIT_DEFAULT(f, c); /* handle 'c' in the default way */
1738 break;
1742 f->f1 = state;
1743 GF_END(f, f->next);
1745 else if(flg == GF_EOD){
1746 switch(f->f1){
1747 case CCR :
1748 GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 break;
1751 case WSPACE :
1752 GF_8BIT_PUT_QUOTE(f, ' '); /* write the last space */
1753 break;
1756 (void) GF_FLUSH(f->next);
1757 (*f->next->f)(f->next, GF_EOD);
1759 else if(flg == GF_RESET){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f->f1 = DFL; /* state from last character */
1762 f->f2 = 1; /* state of "^NFrom " bitmap */
1763 f->t = 0;
1764 f->n = 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1772 void
1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1775 static unsigned char *conv_table = NULL;
1776 GF_INIT(f, f->next);
1778 if(flg == GF_DATA){
1779 register unsigned char c;
1781 while(GF_GETC(f, c)){
1782 GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1785 GF_END(f, f->next);
1787 else if(flg == GF_EOD){
1788 (void) GF_FLUSH(f->next);
1789 (*f->next->f)(f->next, GF_EOD);
1791 else if(flg == GF_RESET){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1799 typedef struct _utf8c_s {
1800 void *conv_table;
1801 int report_err;
1802 } UTF8C_S;
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1810 void
1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1813 static unsigned short *conv_table = NULL;
1814 static int report_err = 0;
1815 register int more = f->f2;
1816 register long u = f->n;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f, f->next);
1826 if(flg == GF_DATA){
1827 register unsigned char c;
1829 while(GF_GETC(f, c)){
1830 if(!conv_table){ /* can't do much if no conversion table */
1831 GF_PUTC(f->next, c);
1833 /* UTF-8 continuation? */
1834 else if((c > 0x7f) && (c < 0xc0)){
1835 if(more){
1836 u <<= 6; /* shift current value by 6 bits */
1837 u |= c & 0x3f;
1838 if (!--more){ /* last octet? */
1839 if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1845 c = '?';
1846 if(report_err){
1847 if(f->opt)
1848 fs_give((void **) &f->opt);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1854 /* NO RETURN */
1857 else{
1858 if(u > 0xff){
1859 c = (unsigned char) (u >> 8);
1860 GF_PUTC(f->next, c);
1863 c = (unsigned char) u & 0xff;
1866 GF_PUTC(f->next, c);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f->next, '?');
1873 else{
1874 if(more){ /* incomplete UTF-8 character */
1875 GF_PUTC(f->next, '?');
1876 more = 0;
1878 if(c < 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f->next, c);
1881 else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 u = c & 0x1f; /* first 5 bits of 12 */
1883 more = 1;
1885 else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 u = c & 0x0f; /* first 4 bits of 16 */
1887 more = 2;
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 u = c & 0x07; /* first 3 bits of 20.5 */
1892 more = 3;
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u = c & 0x03; /* first 2 bits of 26 */
1897 more = 4;
1899 else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u = c & 0x03; /* first 2 bits of 26 */
1901 more = 5;
1903 #endif
1904 else{ /* not in Unicode */
1905 GF_PUTC(f->next, '?');
1910 f->f2 = more;
1911 f->n = u;
1912 GF_END(f, f->next);
1914 else if(flg == GF_EOD){
1915 (void) GF_FLUSH(f->next);
1916 if(f->opt)
1917 fs_give((void **) &f->opt);
1919 (*f->next->f)(f->next, GF_EOD);
1921 else if(flg == GF_RESET){
1922 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1923 conv_table = ((UTF8C_S *) f->opt)->conv_table;
1924 report_err = ((UTF8C_S *) f->opt)->report_err;
1925 f->f2 = 0;
1926 f->n = 0L;
1931 void *
1932 gf_convert_utf8_charset_opt(void *table, int report_err)
1934 UTF8C_S *utf8c;
1936 utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1937 utf8c->conv_table = table;
1938 utf8c->report_err = report_err;
1939 return((void *) utf8c);
1944 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1946 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1947 * or to Shift-JIS (if PC-Pine).
1949 void
1950 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1952 register unsigned char c;
1953 register int state = f->f1;
1956 * f->t lit means we're in middle of decoding a sequence of characters.
1957 * f->f2 keeps track of first character of pair for Shift-JIS.
1958 * f->f1 is the state.
1961 GF_INIT(f, f->next);
1963 if(flg == GF_DATA){
1964 while(GF_GETC(f, c)){
1965 switch(state){
1966 case ESC: /* saw ESC */
1967 if(!f->t && c == '$')
1968 state = ESCDOL;
1969 else if(f->t && c == '(')
1970 state = ESCPAR;
1971 else{
1972 GF_PUTC(f->next, '\033');
1973 GF_PUTC(f->next, c);
1974 state = DFL;
1977 break;
1979 case ESCDOL: /* saw ESC $ */
1980 if(c == 'B' || c == '@'){
1981 state = EUC;
1982 f->t = 1; /* filtering into euc */
1983 f->f2 = -1; /* first character of pair */
1985 else{
1986 GF_PUTC(f->next, '\033');
1987 GF_PUTC(f->next, '$');
1988 GF_PUTC(f->next, c);
1989 state = DFL;
1992 break;
1994 case ESCPAR: /* saw ESC ( */
1995 if(c == 'B' || c == 'J' || c == 'H'){
1996 state = DFL;
1997 f->t = 0; /* done filtering */
1999 else{
2000 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2001 GF_PUTC(f->next, '('); /* escape sequences, which */
2002 GF_PUTC(f->next, c); /* this appears to be. */
2005 break;
2007 case EUC: /* filtering into euc */
2008 if(c == '\033')
2009 state = ESC;
2010 else{
2011 #ifdef _WINDOWS /* Shift-JIS */
2012 c &= 0x7f; /* 8-bit can't win */
2013 if (f->f2 >= 0){ /* second of a pair? */
2014 int rowOffset = (f->f2 < 95) ? 112 : 176;
2015 int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2016 : 126;
2018 GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2019 GF_PUTC(f->next, c + cellOffset);
2020 f->f2 = -1; /* restart */
2022 else if(c > 0x20 && c < 0x7f)
2023 f->f2 = c; /* first of pair */
2024 else{
2025 GF_PUTC(f->next, c); /* write CTL as itself */
2026 f->f2 = -1;
2028 #else /* EUC */
2029 GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2030 #endif
2033 break;
2035 case DFL:
2036 default:
2037 if(c == '\033')
2038 state = ESC;
2039 else
2040 GF_PUTC(f->next, c);
2042 break;
2046 f->f1 = state;
2047 GF_END(f, f->next);
2049 else if(flg == GF_EOD){
2050 switch(state){
2051 case ESC:
2052 GF_PUTC(f->next, '\033');
2053 break;
2055 case ESCDOL:
2056 GF_PUTC(f->next, '\033');
2057 GF_PUTC(f->next, '$');
2058 break;
2060 case ESCPAR:
2061 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2062 GF_PUTC(f->next, '('); /* escape sequences. */
2063 break;
2066 (void) GF_FLUSH(f->next);
2067 (*f->next->f)(f->next, GF_EOD);
2069 else if(flg == GF_RESET){
2070 dprint((9, "-- gf_reset jp_to_euc\n"));
2071 f->f1 = DFL; /* state */
2072 f->t = 0; /* not translating to euc */
2078 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2080 void
2081 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2083 #ifdef _WINDOWS
2084 gf_sjis_to_2022_jp(f, flg);
2085 #else
2086 gf_euc_to_2022_jp(f, flg);
2087 #endif
2091 void
2092 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2094 register unsigned char c;
2097 * f->t lit means we've sent the start esc seq but not the end seq.
2098 * f->f2 keeps track of first character of pair for Shift-JIS.
2101 GF_INIT(f, f->next);
2103 if(flg == GF_DATA){
2104 while(GF_GETC(f, c)){
2105 if(f->t){
2106 if(c & 0x80){
2107 GF_PUTC(f->next, c & 0x7f);
2109 else{
2110 GF_PUTC(f->next, '\033');
2111 GF_PUTC(f->next, '(');
2112 GF_PUTC(f->next, 'B');
2113 GF_PUTC(f->next, c);
2114 f->f2 = -1;
2115 f->t = 0;
2118 else{
2119 if(c & 0x80){
2120 GF_PUTC(f->next, '\033');
2121 GF_PUTC(f->next, '$');
2122 GF_PUTC(f->next, 'B');
2123 GF_PUTC(f->next, c & 0x7f);
2124 f->t = 1;
2126 else{
2127 GF_PUTC(f->next, c);
2132 GF_END(f, f->next);
2134 else if(flg == GF_EOD){
2135 if(f->t){
2136 GF_PUTC(f->next, '\033');
2137 GF_PUTC(f->next, '(');
2138 GF_PUTC(f->next, 'B');
2139 f->t = 0;
2140 f->f2 = -1;
2143 (void) GF_FLUSH(f->next);
2144 (*f->next->f)(f->next, GF_EOD);
2146 else if(flg == GF_RESET){
2147 dprint((9, "-- gf_reset euc_to_jp\n"));
2148 f->t = 0;
2149 f->f2 = -1;
2153 void
2154 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2156 register unsigned char c;
2159 * f->t lit means we've sent the start esc seq but not the end seq.
2160 * f->f2 keeps track of first character of pair for Shift-JIS.
2163 GF_INIT(f, f->next);
2165 if(flg == GF_DATA){
2166 while(GF_GETC(f, c)){
2167 if(f->t){
2168 if(f->f2 >= 0){ /* second of a pair? */
2169 int adjust = c < 159;
2170 int rowOffset = f->f2 < 160 ? 112 : 176;
2171 int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2173 GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2174 GF_PUTC(f->next, c - cellOffset);
2175 f->f2 = -1;
2177 else if(c & 0x80){
2178 f->f2 = c; /* remember first of pair */
2180 else{
2181 GF_PUTC(f->next, '\033');
2182 GF_PUTC(f->next, '(');
2183 GF_PUTC(f->next, 'B');
2184 GF_PUTC(f->next, c);
2185 f->f2 = -1;
2186 f->t = 0;
2189 else{
2190 if(c & 0x80){
2191 GF_PUTC(f->next, '\033');
2192 GF_PUTC(f->next, '$');
2193 GF_PUTC(f->next, 'B');
2194 f->f2 = c;
2195 f->t = 1;
2197 else{
2198 GF_PUTC(f->next, c);
2203 GF_END(f, f->next);
2205 else if(flg == GF_EOD){
2206 if(f->t){
2207 GF_PUTC(f->next, '\033');
2208 GF_PUTC(f->next, '(');
2209 GF_PUTC(f->next, 'B');
2210 f->t = 0;
2211 f->f2 = -1;
2214 (void) GF_FLUSH(f->next);
2215 (*f->next->f)(f->next, GF_EOD);
2217 else if(flg == GF_RESET){
2218 dprint((9, "-- gf_reset sjis_to_jp\n"));
2219 f->t = 0;
2220 f->f2 = -1;
2227 * Various charset to UTF-8 Translation filter
2231 * utf8 conversion options
2233 typedef struct _utf8_s {
2234 CHARSET *charset;
2235 unsigned long ucsc;
2236 } UTF8_S;
2238 #define UTF8_BLOCK 1024
2239 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2240 #define UTF8_ADD(f, c) \
2242 if(p >= eobuf){ \
2243 f->f2 += UTF8_BLOCK; \
2244 fs_resize((void **)&f->line, \
2245 (size_t) f->f2 * sizeof(char)); \
2246 eobuf = UTF8_EOB(f); \
2247 p = eobuf - UTF8_BLOCK; \
2249 *p++ = c; \
2251 #define GF_UTF8_FLUSH(f) { \
2252 register long n; \
2253 SIZEDTEXT intext, outtext; \
2254 intext.data = (unsigned char *) f->line; \
2255 intext.size = p - f->line; \
2256 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2257 if(!((UTF8_S *) f->opt)->charset){ \
2258 for(n = 0; n < intext.size; n++) \
2259 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2261 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2262 for(n = 0; n < outtext.size; n++) \
2263 GF_PUTC(f->next, outtext.data[n]); \
2264 if(outtext.data && intext.data != outtext.data) \
2265 fs_give((void **) &outtext.data); \
2267 else{ \
2268 for(n = 0; n < intext.size; n++) \
2269 GF_PUTC(f->next, '?'); \
2275 * gf_utf8 - text in specified charset to to UTF-8 filter
2276 * Process line-at-a-time rather than character
2277 * because ISO-2022-JP. Call utf8_text_cs by hand
2278 * rather than utf8_text to reduce the cost of
2279 * utf8_charset() for each line.
2281 void
2282 gf_utf8(FILTER_S *f, int flg)
2284 register char *p = f->linep;
2285 register char *eobuf = UTF8_EOB(f);
2286 GF_INIT(f, f->next);
2288 if(flg == GF_DATA){
2289 register int state = f->f1;
2290 register unsigned char c;
2292 while(GF_GETC(f, c)){
2294 switch(state){
2295 case CCR :
2296 state = DFL;
2297 if(c == '\012'){
2298 GF_UTF8_FLUSH(f);
2299 p = f->line;
2300 GF_PUTC(f->next, '\015');
2301 GF_PUTC(f->next, '\012');
2303 else{
2304 UTF8_ADD(f, '\015');
2305 UTF8_ADD(f, c);
2308 break;
2310 default :
2311 if(c == '\015'){
2312 state = CCR;
2314 else
2315 UTF8_ADD(f, c);
2319 f->f1 = state;
2320 GF_END(f, f->next);
2322 else if(flg == GF_EOD){
2324 if(p != f->line)
2325 GF_UTF8_FLUSH(f);
2327 fs_give((void **) &f->line);
2328 fs_give((void **) &f->opt);
2329 (void) GF_FLUSH(f->next);
2330 (*f->next->f)(f->next, GF_EOD);
2332 else if(GF_RESET){
2333 dprint((9, "-- gf_reset utf8\n"));
2334 f->f1 = DFL;
2335 f->f2 = UTF8_BLOCK; /* input buffer length */
2336 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2339 f->linep = p;
2343 void *
2344 gf_utf8_opt(char *charset)
2346 UTF8_S *utf8;
2348 utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2350 utf8->charset = (CHARSET *) utf8_charset(charset);
2353 * When we get 8-bit non-ascii characters but it is supposed to
2354 * be ascii we want it to turn into question marks, not
2355 * just behave as if it is UTF-8 which is what happens
2356 * with ascii because there is no translation table.
2357 * So we need to catch the ascii special case here.
2359 if(utf8->charset && utf8->charset->type == CT_ASCII)
2360 utf8->charset = NULL;
2362 return((void *) utf8);
2367 * RICHTEXT-TO-PLAINTEXT filter
2371 * option to be used by rich2plain (NOTE: if this filter is ever
2372 * used more than once in a pipe, all instances will have the same
2373 * option value)
2377 /*----------------------------------------------------------------------
2378 richtext to plaintext filter
2380 Args: f --
2381 flg --
2383 This basically removes all richtext formatting. A cute hack is used
2384 to get bold and underlining to work.
2385 Further work could be done to handle things like centering and right
2386 and left flush, but then it could no longer be done in place. This
2387 operates on text *with* CRLF's.
2389 WARNING: does not wrap lines!
2390 ----*/
2391 void
2392 gf_rich2plain(FILTER_S *f, int flg)
2394 static int rich_bold_on = 0, rich_uline_on = 0;
2396 /* BUG: qoute incoming \255 values */
2397 GF_INIT(f, f->next);
2399 if(flg == GF_DATA){
2400 register unsigned char c;
2401 register int state = f->f1;
2402 register int plain;
2404 plain = f->opt ? (*(int *) f->opt) : 0;
2406 while(GF_GETC(f, c)){
2408 switch(state){
2409 case TOKEN : /* collect a richtext token */
2410 if(c == '>'){ /* what should we do with it? */
2411 state = DFL; /* return to default next time */
2412 *(f->linep) = '\0'; /* cap off token */
2413 if(f->line[0] == 'l' && f->line[1] == 't'){
2414 GF_PUTC(f->next, '<'); /* literal '<' */
2416 else if(f->line[0] == 'n' && f->line[1] == 'l'){
2417 GF_PUTC(f->next, '\015');/* newline! */
2418 GF_PUTC(f->next, '\012');
2420 else if(!strcmp("comment", f->line)){
2421 (f->f2)++;
2423 else if(!strcmp("/comment", f->line)){
2424 f->f2 = 0;
2426 else if(!strcmp("/paragraph", f->line)) {
2427 GF_PUTC(f->next, '\r');
2428 GF_PUTC(f->next, '\n');
2429 GF_PUTC(f->next, '\r');
2430 GF_PUTC(f->next, '\n');
2432 else if(!plain /* gf_rich_plain */){
2433 if(!strcmp(f->line, "bold")) {
2434 GF_PUTC(f->next, TAG_EMBED);
2435 GF_PUTC(f->next, TAG_BOLDON);
2436 rich_bold_on = 1;
2437 } else if(!strcmp(f->line, "/bold")) {
2438 GF_PUTC(f->next, TAG_EMBED);
2439 GF_PUTC(f->next, TAG_BOLDOFF);
2440 rich_bold_on = 0;
2441 } else if(!strcmp(f->line, "italic")) {
2442 GF_PUTC(f->next, TAG_EMBED);
2443 GF_PUTC(f->next, TAG_ULINEON);
2444 rich_uline_on = 1;
2445 } else if(!strcmp(f->line, "/italic")) {
2446 GF_PUTC(f->next, TAG_EMBED);
2447 GF_PUTC(f->next, TAG_ULINEOFF);
2448 rich_uline_on = 0;
2449 } else if(!strcmp(f->line, "underline")) {
2450 GF_PUTC(f->next, TAG_EMBED);
2451 GF_PUTC(f->next, TAG_ULINEON);
2452 rich_uline_on = 1;
2453 } else if(!strcmp(f->line, "/underline")) {
2454 GF_PUTC(f->next, TAG_EMBED);
2455 GF_PUTC(f->next, TAG_ULINEOFF);
2456 rich_uline_on = 0;
2459 /* else we just ignore the token! */
2461 f->linep = f->line; /* reset token buffer */
2463 else{ /* add char to token */
2464 if(f->linep - f->line > 40){
2465 /* What? rfc1341 says 40 char tokens MAX! */
2466 fs_give((void **)&(f->line));
2467 gf_error("Richtext token over 40 characters");
2468 /* NO RETURN */
2471 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2473 break;
2475 case CCR :
2476 state = DFL; /* back to default next time */
2477 if(c == '\012'){ /* treat as single space? */
2478 GF_PUTC(f->next, ' ');
2479 break;
2481 /* fall thru to process c */
2483 case DFL :
2484 default:
2485 if(c == '<')
2486 state = TOKEN;
2487 else if(c == '\015')
2488 state = CCR;
2489 else if(!f->f2) /* not in comment! */
2490 GF_PUTC(f->next, c);
2492 break;
2496 f->f1 = state;
2497 GF_END(f, f->next);
2499 else if(flg == GF_EOD){
2500 if((f->f1 = (f->linep != f->line)) != 0){
2501 /* incomplete token!! */
2502 gf_error("Incomplete token in richtext");
2503 /* NO RETURN */
2506 if(rich_uline_on){
2507 GF_PUTC(f->next, TAG_EMBED);
2508 GF_PUTC(f->next, TAG_ULINEOFF);
2509 rich_uline_on = 0;
2511 if(rich_bold_on){
2512 GF_PUTC(f->next, TAG_EMBED);
2513 GF_PUTC(f->next, TAG_BOLDOFF);
2514 rich_bold_on = 0;
2517 fs_give((void **)&(f->line));
2518 (void) GF_FLUSH(f->next);
2519 (*f->next->f)(f->next, GF_EOD);
2521 else if(flg == GF_RESET){
2522 dprint((9, "-- gf_reset rich2plain\n"));
2523 f->f1 = DFL; /* state */
2524 f->f2 = 0; /* set means we're in a comment */
2525 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2531 * function called from the outside to set
2532 * richtext filter's options
2534 void *
2535 gf_rich2plain_opt(int *plain)
2537 return((void *) plain);
2543 * ENRICHED-TO-PLAIN text filter
2546 #define TEF_QUELL 0x01
2547 #define TEF_NOFILL 0x02
2551 /*----------------------------------------------------------------------
2552 enriched text to plain text filter (ala rfc1523)
2554 Args: f -- state and input data
2555 flg --
2557 This basically removes all enriched formatting. A cute hack is used
2558 to get bold and underlining to work.
2560 Further work could be done to handle things like centering and right
2561 and left flush, but then it could no longer be done in place. This
2562 operates on text *with* CRLF's.
2564 WARNING: does not wrap lines!
2565 ----*/
2566 void
2567 gf_enriched2plain(FILTER_S *f, int flg)
2569 static int enr_uline_on = 0, enr_bold_on = 0;
2571 /* BUG: qoute incoming \255 values */
2572 GF_INIT(f, f->next);
2574 if(flg == GF_DATA){
2575 register unsigned char c;
2576 register int state = f->f1;
2577 register int plain;
2579 plain = f->opt ? (*(int *) f->opt) : 0;
2581 while(GF_GETC(f, c)){
2583 switch(state){
2584 case TOKEN : /* collect a richtext token */
2585 if(c == '>'){ /* what should we do with it? */
2586 int off = *f->line == '/';
2587 char *token = f->line + (off ? 1 : 0);
2588 state = DFL;
2589 *f->linep = '\0';
2590 if(!strcmp("param", token)){
2591 if(off)
2592 f->f2 &= ~TEF_QUELL;
2593 else
2594 f->f2 |= TEF_QUELL;
2596 else if(!strcmp("nofill", token)){
2597 if(off)
2598 f->f2 &= ~TEF_NOFILL;
2599 else
2600 f->f2 |= TEF_NOFILL;
2602 else if(!plain /* gf_enriched_plain */){
2603 /* Following is a cute hack or two to get
2604 bold and underline on the screen.
2605 See Putline0n() where these codes are
2606 interpreted */
2607 if(!strcmp("bold", token)) {
2608 GF_PUTC(f->next, TAG_EMBED);
2609 GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2610 enr_bold_on = off ? 0 : 1;
2611 } else if(!strcmp("italic", token)) {
2612 GF_PUTC(f->next, TAG_EMBED);
2613 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2614 enr_uline_on = off ? 0 : 1;
2615 } else if(!strcmp("underline", token)) {
2616 GF_PUTC(f->next, TAG_EMBED);
2617 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2618 enr_uline_on = off ? 0 : 1;
2621 /* else we just ignore the token! */
2623 f->linep = f->line; /* reset token buffer */
2625 else if(c == '<'){ /* literal '<'? */
2626 if(f->linep == f->line){
2627 GF_PUTC(f->next, '<');
2628 state = DFL;
2630 else{
2631 fs_give((void **)&(f->line));
2632 gf_error("Malformed Enriched text: unexpected '<'");
2633 /* NO RETURN */
2636 else{ /* add char to token */
2637 if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2638 fs_give((void **)&(f->line));
2639 gf_error("Malformed Enriched text: token too long");
2640 /* NO RETURN */
2643 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2645 break;
2647 case CCR :
2648 if(c != '\012'){ /* treat as single space? */
2649 state = DFL; /* lone cr? */
2650 f->f2 &= ~TEF_QUELL;
2651 GF_PUTC(f->next, '\015');
2652 goto df;
2655 state = CLF;
2656 break;
2658 case CLF :
2659 if(c == '\015'){ /* treat as single space? */
2660 state = CCR; /* repeat crlf's mean real newlines */
2661 f->f2 |= TEF_QUELL;
2662 GF_PUTC(f->next, '\r');
2663 GF_PUTC(f->next, '\n');
2664 break;
2666 else{
2667 state = DFL;
2668 if(!((f->f2) & TEF_QUELL))
2669 GF_PUTC(f->next, ' ');
2671 f->f2 &= ~TEF_QUELL;
2674 /* fall thru to take care of 'c' */
2676 case DFL :
2677 default :
2678 df :
2679 if(c == '<')
2680 state = TOKEN;
2681 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2682 state = CCR;
2683 else if(!((f->f2) & TEF_QUELL))
2684 GF_PUTC(f->next, c);
2686 break;
2690 f->f1 = state;
2691 GF_END(f, f->next);
2693 else if(flg == GF_EOD){
2694 if((f->f1 = (f->linep != f->line)) != 0){
2695 /* incomplete token!! */
2696 gf_error("Incomplete token in richtext");
2697 /* NO RETURN */
2699 if(enr_uline_on){
2700 GF_PUTC(f->next, TAG_EMBED);
2701 GF_PUTC(f->next, TAG_ULINEOFF);
2702 enr_uline_on = 0;
2704 if(enr_bold_on){
2705 GF_PUTC(f->next, TAG_EMBED);
2706 GF_PUTC(f->next, TAG_BOLDOFF);
2707 enr_bold_on = 0;
2710 /* Make sure we end with a newline so everything gets flushed */
2711 GF_PUTC(f->next, '\015');
2712 GF_PUTC(f->next, '\012');
2714 fs_give((void **)&(f->line));
2716 (void) GF_FLUSH(f->next);
2717 (*f->next->f)(f->next, GF_EOD);
2719 else if(flg == GF_RESET){
2720 dprint((9, "-- gf_reset enriched2plain\n"));
2721 f->f1 = DFL; /* state */
2722 f->f2 = 0; /* set means we're in a comment */
2723 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2729 * function called from the outside to set
2730 * richtext filter's options
2732 void *
2733 gf_enriched2plain_opt(int *plain)
2735 return((void *) plain);
2741 * HTML-TO-PLAIN text filter
2745 /* OK, here's the plan:
2747 * a universal output function handles writing chars and worries
2748 * about wrapping.
2750 * a unversal element collector reads chars and collects params
2751 * and dispatches the appropriate element handler.
2753 * element handlers are stacked. The most recently dispatched gets
2754 * first crack at the incoming character stream. It passes bytes it's
2755 * done with or not interested in to the next
2757 * installs that handler as the current one collecting data...
2759 * stacked handlers take their params from the element collector and
2760 * accept chars or do whatever they need to do. Sort of a vertical
2761 * piping? recursion-like? hmmm.
2763 * at least I think this is how it'll work. tres simple, non?
2769 * Some important constants
2771 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2772 #define MAX_ENTITY 20 /* maximum length of an entity */
2773 #define MAX_ELEMENT 72 /* maximum length of an element */
2774 #define HTML_MOREDATA 0 /* expect more entity data */
2775 #define HTML_ENTITY 1 /* valid entity collected */
2776 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2777 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2778 #define HTML_LITERAL 0x0400 /* Literal character value */
2779 #define HTML_NEWLINE 0x010A /* hard newline */
2780 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2781 #define HTML_ID_GET 0 /* indent func: return current val */
2782 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2783 #define HTML_ID_INC 2 /* indent func: increment by val */
2784 #define HTML_HX_CENTER 0x0001
2785 #define HTML_HX_ULINE 0x0002
2786 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2789 /* types of lists that we will support */
2790 #define LIST_DECIMAL (long) 0
2791 #define LIST_ALPHALO (long) 1
2792 #define LIST_ALPHAUP (long) 2
2793 #define LIST_ROMANLO (long) 3
2794 #define LIST_ROMANUP (long) 4
2795 #define LIST_UNKNOWN (long) 10
2798 * Handler data, state information including function that uses it
2800 typedef struct handler_s {
2801 FILTER_S *html_data;
2802 void *element;
2803 long x, y, z;
2804 void *dp;
2805 unsigned char *s;
2806 struct handler_s *below;
2807 } HANDLER_S;
2810 * Element Property structure
2812 typedef struct _element_properties {
2813 char *element;
2814 size_t len;
2815 int (*handler)(HANDLER_S *, int, int);
2816 unsigned blocklevel:1;
2817 unsigned alternate:1;
2818 } ELPROP_S;
2821 * Types used to manage HTML parsing
2823 static void html_handoff(HANDLER_S *, int);
2827 * to help manage line wrapping.
2829 typedef struct _wrap_line {
2830 char *buf; /* buf to collect wrapped text */
2831 int used, /* number of chars in buf */
2832 width, /* text's width as displayed */
2833 len; /* length of allocated buf */
2834 } WRAPLINE_S;
2838 * to help manage centered text
2840 typedef struct _center_s {
2841 WRAPLINE_S line; /* buf to assembled centered text */
2842 WRAPLINE_S word; /* word being to append to Line */
2843 int anchor;
2844 short space;
2845 } CENTER_S;
2849 * Collector data and state information
2851 typedef struct collector_s {
2852 char buf[HTML_BUF_LEN]; /* buffer to collect data */
2853 int len; /* length of that buffer */
2854 unsigned end_tag:1; /* collecting a closing tag */
2855 unsigned hit_equal:1; /* collecting right half of attrib */
2856 unsigned mkup_decl:1; /* markup declaration */
2857 unsigned start_comment:1; /* markup declaration comment */
2858 unsigned end_comment:1; /* legit comment format */
2859 unsigned hyphen:1; /* markup hyphen read */
2860 unsigned badform:1; /* malformed markup element */
2861 unsigned overrun:1; /* Overran buf above */
2862 unsigned proc_inst:1; /* XML processing instructions */
2863 unsigned empty:1; /* empty element */
2864 unsigned was_quoted:1; /* basically to catch null string */
2865 char quoted; /* quoted element param value */
2866 char *element; /* element's collected name */
2867 PARAMETER *attribs; /* element's collected attributes */
2868 PARAMETER *cur_attrib; /* attribute now being collected */
2869 } CLCTR_S;
2873 * State information for all element handlers
2875 typedef struct html_data {
2876 HANDLER_S *h_stack; /* handler list */
2877 CLCTR_S *el_data; /* element collector data */
2878 CENTER_S *centered; /* struct to manage centered text */
2879 int (*token)(FILTER_S *, int);
2880 char quoted; /* quoted, by either ' or ", text */
2881 short indent_level; /* levels of indention */
2882 int in_anchor; /* text now being written to anchor */
2883 int blanks; /* Consecutive blank line count */
2884 int wrapcol; /* column to wrap lines on */
2885 int *prefix; /* buffer containing Anchor prefix */
2886 int prefix_used;
2887 long line_bufsize; /* current size of the line buffer */
2888 COLOR_PAIR *color;
2889 struct {
2890 int state; /* embedded data state */
2891 char *color; /* embedded color pointer */
2892 } embedded;
2893 CBUF_S cb; /* utf8->ucs4 conversion state */
2894 unsigned wrapstate:1; /* whether or not to wrap output */
2895 unsigned li_pending:1; /* <LI> next token expected */
2896 unsigned de_pending:1; /* <DT> or <DD> next token expected */
2897 unsigned bold_on:1; /* currently bolding text */
2898 unsigned uline_on:1; /* currently underlining text */
2899 unsigned center:1; /* center output text */
2900 unsigned bitbucket:1; /* Ignore input */
2901 unsigned head:1; /* In doc's HEAD */
2902 unsigned body:1; /* In doc's BODY */
2903 unsigned alt_entity:1; /* use alternative entity values */
2904 unsigned wrote:1; /* anything witten yet? */
2905 } HTML_DATA_S;
2909 * HTML filter options
2911 typedef struct _html_opts {
2912 char *base; /* Base URL for this html file */
2913 int columns, /* Display columns (excluding margins) */
2914 indent; /* Left margin */
2915 HANDLE_S **handlesp; /* Head of handles */
2916 htmlrisk_t warnrisk_f; /* Nasty link warning call */
2917 ELPROP_S *element_table; /* markup element table */
2918 RSS_FEED_S **feedp; /* hook for RSS feed response */
2919 unsigned strip:1; /* Hilite TAGs allowed */
2920 unsigned handles_loc:1; /* Local handles requested? */
2921 unsigned showserver:1; /* Display server after anchors */
2922 unsigned outputted:1; /* any */
2923 unsigned no_relative_links:1; /* Disable embeded relative links */
2924 unsigned related_content:1; /* Embeded related content */
2925 unsigned html:1; /* Output content in HTML */
2926 unsigned html_imgs:1; /* Output IMG tags in HTML content */
2927 } HTML_OPT_S;
2932 * Some macros to make life a little easier
2934 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2935 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2936 #define HTML_WROTE(X) (HD(X)->wrote)
2937 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2938 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2939 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2940 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2941 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2942 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2943 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2944 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2945 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2946 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2947 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2948 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2949 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2950 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2951 #define HD(X) ((HTML_DATA_S *)(X)->data)
2952 #define ED(X) (HD(X)->el_data)
2953 #define EL(X) ((ELPROP_S *) (X)->element)
2954 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2955 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2956 #define NEW_CLCTR(X) { \
2957 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2958 memset(ED(X), 0, sizeof(CLCTR_S)); \
2959 HD(X)->token = html_element_collector; \
2962 #define FREE_CLCTR(X) { \
2963 if(ED(X)->attribs){ \
2964 PARAMETER *p; \
2965 while((p = ED(X)->attribs) != NULL){ \
2966 ED(X)->attribs = ED(X)->attribs->next; \
2967 if(p->attribute) \
2968 fs_give((void **)&p->attribute); \
2969 if(p->value) \
2970 fs_give((void **)&p->value); \
2971 fs_give((void **)&p); \
2974 if(ED(X)->element) \
2975 fs_give((void **) &ED(X)->element); \
2976 fs_give((void **) &ED(X)); \
2977 HD(X)->token = NULL; \
2979 #define HANDLERS(X) (HD(X)->h_stack)
2980 #define BOLD_BIT(X) (HD(X)->bold_on)
2981 #define ULINE_BIT(X) (HD(X)->uline_on)
2982 #define CENTER_BIT(X) (HD(X)->center)
2983 #define HTML_FLUSH(X) { \
2984 html_write(X, (X)->line, (X)->linep - (X)->line); \
2985 (X)->linep = (X)->line; \
2986 (X)->f2 = 0L; \
2988 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2989 if((S)){ \
2990 html_output((X), TAG_EMBED); \
2991 html_output((X), TAG_BOLDON); \
2993 else if(!(S)){ \
2994 html_output((X), TAG_EMBED); \
2995 html_output((X), TAG_BOLDOFF); \
2998 #define HTML_ULINE(X, S) \
2999 if(! STRIP(X)){ \
3000 if((S)){ \
3001 html_output((X), TAG_EMBED); \
3002 html_output((X), TAG_ULINEON); \
3004 else if(!(S)){ \
3005 html_output((X), TAG_EMBED); \
3006 html_output((X), TAG_ULINEOFF); \
3009 #define HTML_ITALIC(X, S) \
3010 if(! STRIP(X)){ \
3011 if(S){ \
3012 html_output((X), TAG_EMBED); \
3013 html_output((X), TAG_ITALICON); \
3015 else if(!(S)){ \
3016 html_output((X), TAG_EMBED); \
3017 html_output((X), TAG_ITALICOFF); \
3020 #define HTML_STRIKE(X, S) \
3021 if(! STRIP(X)){ \
3022 if(S){ \
3023 html_output((X), TAG_EMBED); \
3024 html_output((X), TAG_STRIKEON); \
3026 else if(!(S)){ \
3027 html_output((X), TAG_EMBED); \
3028 html_output((X), TAG_STRIKEOFF); \
3031 #define HTML_BIG(X, S) \
3032 if(! STRIP(X)){ \
3033 if(S){ \
3034 html_output((X), TAG_EMBED); \
3035 html_output((X), TAG_BIGON); \
3037 else if(!(S)){ \
3038 html_output((X), TAG_EMBED); \
3039 html_output((X), TAG_BIGOFF); \
3042 #define HTML_SMALL(X, S) \
3043 if(! STRIP(X)){ \
3044 if(S){ \
3045 html_output((X), TAG_EMBED); \
3046 html_output((X), TAG_SMALLON); \
3048 else if(!(S)){ \
3049 html_output((X), TAG_EMBED); \
3050 html_output((X), TAG_SMALLOFF); \
3053 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3054 ? (HD(f)->centered->line.width \
3055 + HD(f)->centered->word.width \
3056 + ((HD(f)->centered->line.width \
3057 && HD(f)->centered->word.width) \
3058 ? 1 : 0)) \
3059 : 0)
3060 #define HTML_DUMP_LIT(F, S, L) { \
3061 int i, c; \
3062 for(i = 0; i < (L); i++){ \
3063 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3064 ? (S)[i] \
3065 : MAKE_LITERAL((S)[i]); \
3066 HTML_TEXT(F, c); \
3069 #define HTML_PROC(F, C) { \
3070 if(HD(F)->token){ \
3071 int i; \
3072 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3073 if(i < 0){ \
3074 HTML_DUMP_LIT(F, "<", 1); \
3075 if(HD(F)->el_data->element){ \
3076 HTML_DUMP_LIT(F, \
3077 HD(F)->el_data->element, \
3078 strlen(HD(F)->el_data->element));\
3080 if(HD(F)->el_data->len){ \
3081 HTML_DUMP_LIT(F, \
3082 HD(F)->el_data->buf, \
3083 HD(F)->el_data->len); \
3085 HTML_TEXT(F, C); \
3087 FREE_CLCTR(F); \
3090 else if((C) == '<'){ \
3091 NEW_CLCTR(F); \
3093 else \
3094 HTML_TEXT(F, C); \
3096 #define HTML_LINEP_PUTC(F, C) { \
3097 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3098 size_t offset = (F)->linep - (F)->line; \
3099 fs_resize((void **) &(F)->line, \
3100 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3101 HD(F)->line_bufsize *= 2; \
3102 (F)->linep = &(F)->line[offset]; \
3104 *(F)->linep++ = (C); \
3106 #define HTML_TEXT(F, C) switch((F)->f1){ \
3107 case WSPACE : \
3108 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3109 break; \
3110 HTML_TEXT_OUT(F, ' '); \
3111 (F)->f1 = DFL;/* stop sending chars here */ \
3112 /* fall thru to process 'c' */ \
3113 case DFL: \
3114 if(HD(F)->bitbucket) \
3115 (F)->f1 = DFL; /* no op */ \
3116 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3117 (F)->f1 = WSPACE;/* coalesce white space */ \
3118 else HTML_TEXT_OUT(F, C); \
3119 break; \
3121 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3122 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3123 else \
3124 html_output(F, C);
3125 #ifdef DEBUG
3126 #define HTML_DEBUG_EL(S, D) { \
3127 dprint((5, "-- html %s: %s\n", \
3128 S ? S : "?", \
3129 (D)->element \
3130 ? (D)->element : "NULL")); \
3131 if(debug > 5){ \
3132 PARAMETER *p; \
3133 for(p = (D)->attribs; \
3134 p && p->attribute; \
3135 p = p->next) \
3136 dprint((6, \
3137 " PARM: %s%s%s\n", \
3138 p->attribute \
3139 ? p->attribute : "NULL",\
3140 p->value ? "=" : "", \
3141 p->value ? p->value : ""));\
3144 #else
3145 #define HTML_DEBUG_EL(S, D)
3146 #endif
3148 #ifndef SYSTEM_PINE_INFO_PATH
3149 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3150 #endif
3151 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3152 ? SYSTEM_PINE_INFO_PATH : S)
3155 * Protos for Tag handlers
3157 int html_head(HANDLER_S *, int, int);
3158 int html_base(HANDLER_S *, int, int);
3159 int html_title(HANDLER_S *, int, int);
3160 int html_body(HANDLER_S *, int, int);
3161 int html_a(HANDLER_S *, int, int);
3162 int html_br(HANDLER_S *, int, int);
3163 int html_hr(HANDLER_S *, int, int);
3164 int html_p(HANDLER_S *, int, int);
3165 int html_table(HANDLER_S *, int, int);
3166 int html_caption(HANDLER_S *, int, int);
3167 int html_tr(HANDLER_S *, int, int);
3168 int html_td(HANDLER_S *, int, int);
3169 int html_th(HANDLER_S *, int, int);
3170 int html_thead(HANDLER_S *, int, int);
3171 int html_tbody(HANDLER_S *, int, int);
3172 int html_tfoot(HANDLER_S *, int, int);
3173 int html_col(HANDLER_S *, int, int);
3174 int html_colgroup(HANDLER_S *, int, int);
3175 int html_b(HANDLER_S *, int, int);
3176 int html_u(HANDLER_S *, int, int);
3177 int html_i(HANDLER_S *, int, int);
3178 int html_em(HANDLER_S *, int, int);
3179 int html_strong(HANDLER_S *, int, int);
3180 int html_s(HANDLER_S *, int, int);
3181 int html_big(HANDLER_S *, int, int);
3182 int html_small(HANDLER_S *, int, int);
3183 int html_font(HANDLER_S *, int, int);
3184 int html_img(HANDLER_S *, int, int);
3185 int html_map(HANDLER_S *, int, int);
3186 int html_area(HANDLER_S *, int, int);
3187 int html_form(HANDLER_S *, int, int);
3188 int html_input(HANDLER_S *, int, int);
3189 int html_option(HANDLER_S *, int, int);
3190 int html_optgroup(HANDLER_S *, int, int);
3191 int html_button(HANDLER_S *, int, int);
3192 int html_select(HANDLER_S *, int, int);
3193 int html_textarea(HANDLER_S *, int, int);
3194 int html_label(HANDLER_S *, int, int);
3195 int html_fieldset(HANDLER_S *, int, int);
3196 int html_ul(HANDLER_S *, int, int);
3197 int html_ol(HANDLER_S *, int, int);
3198 int html_menu(HANDLER_S *, int, int);
3199 int html_dir(HANDLER_S *, int, int);
3200 int html_li(HANDLER_S *, int, int);
3201 int html_h1(HANDLER_S *, int, int);
3202 int html_h2(HANDLER_S *, int, int);
3203 int html_h3(HANDLER_S *, int, int);
3204 int html_h4(HANDLER_S *, int, int);
3205 int html_h5(HANDLER_S *, int, int);
3206 int html_h6(HANDLER_S *, int, int);
3207 int html_blockquote(HANDLER_S *, int, int);
3208 int html_address(HANDLER_S *, int, int);
3209 int html_pre(HANDLER_S *, int, int);
3210 int html_center(HANDLER_S *, int, int);
3211 int html_div(HANDLER_S *, int, int);
3212 int html_span(HANDLER_S *, int, int);
3213 int html_dl(HANDLER_S *, int, int);
3214 int html_dt(HANDLER_S *, int, int);
3215 int html_dd(HANDLER_S *, int, int);
3216 int html_script(HANDLER_S *, int, int);
3217 int html_applet(HANDLER_S *, int, int);
3218 int html_style(HANDLER_S *, int, int);
3219 int html_kbd(HANDLER_S *, int, int);
3220 int html_dfn(HANDLER_S *, int, int);
3221 int html_var(HANDLER_S *, int, int);
3222 int html_tt(HANDLER_S *, int, int);
3223 int html_samp(HANDLER_S *, int, int);
3224 int html_sub(HANDLER_S *, int, int);
3225 int html_sup(HANDLER_S *, int, int);
3226 int html_cite(HANDLER_S *, int, int);
3227 int html_code(HANDLER_S *, int, int);
3228 int html_ins(HANDLER_S *, int, int);
3229 int html_del(HANDLER_S *, int, int);
3230 int html_abbr(HANDLER_S *, int, int);
3233 * Protos for RSS 2.0 Tag handlers
3235 int rss_rss(HANDLER_S *, int, int);
3236 int rss_channel(HANDLER_S *, int, int);
3237 int rss_title(HANDLER_S *, int, int);
3238 int rss_image(HANDLER_S *, int, int);
3239 int rss_link(HANDLER_S *, int, int);
3240 int rss_description(HANDLER_S *, int, int);
3241 int rss_ttl(HANDLER_S *, int, int);
3242 int rss_item(HANDLER_S *, int, int);
3245 * Proto's for support routines
3247 void html_pop(FILTER_S *, ELPROP_S *);
3248 int html_push(FILTER_S *, ELPROP_S *);
3249 int html_element_collector(FILTER_S *, int);
3250 int html_element_flush(CLCTR_S *);
3251 void html_element_comment(FILTER_S *, char *);
3252 void html_element_output(FILTER_S *, int);
3253 int html_entity_collector(FILTER_S *, int, UCS *, char **);
3254 void html_a_prefix(FILTER_S *);
3255 void html_a_finish(HANDLER_S *);
3256 void html_a_output_prefix(FILTER_S *, int);
3257 void html_a_output_info(HANDLER_S *);
3258 void html_a_relative(char *, char *, HANDLE_S *);
3259 int html_href_relative(char *);
3260 int html_indent(FILTER_S *, int, int);
3261 void html_blank(FILTER_S *, int);
3262 void html_newline(FILTER_S *);
3263 void html_output(FILTER_S *, int);
3264 void html_output_string(FILTER_S *, char *);
3265 void html_output_raw_tag(FILTER_S *, char *);
3266 void html_output_normal(FILTER_S *, int, int, int);
3267 void html_output_flush(FILTER_S *);
3268 void html_output_centered(FILTER_S *, int, int, int);
3269 void html_centered_handle(int *, char *, int);
3270 void html_centered_putc(WRAPLINE_S *, int);
3271 void html_centered_flush(FILTER_S *);
3272 void html_centered_flush_line(FILTER_S *);
3273 void html_write_anchor(FILTER_S *, int);
3274 void html_write_newline(FILTER_S *);
3275 void html_write_indent(FILTER_S *, int);
3276 void html_write(FILTER_S *, char *, int);
3277 void html_putc(FILTER_S *, int);
3278 int html_event_attribute(char *);
3279 char *rss_skip_whitespace(char *s);
3280 ELPROP_S *element_properties(FILTER_S *, char *);
3284 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3285 * W3C doc "Additional named entities for HTML"
3287 static struct html_entities {
3288 char *name; /* entity name */
3289 UCS value; /* UCS entity value */
3290 char *plain; /* US-ASCII representation */
3291 } entity_tab[] = {
3292 {"quot", 0x0022}, /* 34 - quotation mark */
3293 {"amp", 0x0026}, /* 38 - ampersand */
3294 {"apos", 0x0027}, /* 39 - apostrophe */
3295 {"lt", 0x003C}, /* 60 - less-than sign */
3296 {"gt", 0x003E}, /* 62 - greater-than sign */
3297 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3298 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3299 {"cent", 0x00A2}, /* 162 - cent sign */
3300 {"pound", 0x00A3}, /* 163 - pound sign */
3301 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3302 {"yen", 0x00A5}, /* 165 - yen sign */
3303 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3304 {"sect", 0x00A7}, /* 167 - section sign */
3305 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3306 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3307 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3308 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3309 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3310 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3311 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3312 {"macr", 0x00AF}, /* 175 - macron */
3313 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3314 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3315 {"sup2", 0x00B2}, /* 178 - superscript two */
3316 {"sup3", 0x00B3}, /* 179 - superscript three */
3317 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3318 {"micro", 0x00B5}, /* 181 - micro sign */
3319 {"para", 0x00B6}, /* 182 - pilcrow sign */
3320 {"middot", 0x00B7}, /* 183 - middle dot */
3321 {"cedil", 0x00B8}, /* 184 - cedilla */
3322 {"sup1", 0x00B9}, /* 185 - superscript one */
3323 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3324 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3325 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3326 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3327 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3328 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3329 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3330 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3331 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3332 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3333 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3334 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3335 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3336 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3337 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3338 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3339 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3340 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3341 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3342 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3343 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3344 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3345 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3346 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3347 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3348 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3349 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3350 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3351 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3352 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3353 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3354 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3355 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3356 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3357 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3358 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3359 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3360 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3361 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3362 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3363 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3364 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3365 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3366 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3367 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3368 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3369 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3370 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3371 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3372 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3373 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3374 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3375 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3376 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3377 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3378 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3379 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3380 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3381 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3382 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3383 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3384 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3385 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3386 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3387 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3388 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3389 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3390 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3391 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3392 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3393 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3394 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3395 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3396 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3397 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3398 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3399 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3400 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3401 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3402 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3403 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3404 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3405 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3406 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3407 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3408 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3409 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3410 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3411 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3412 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3413 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3414 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3415 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3416 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3417 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3418 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3419 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3420 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3421 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3422 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3423 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3424 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3425 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3426 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3427 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3428 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3429 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3430 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3431 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3432 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3433 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3434 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3435 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3436 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3437 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3438 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3439 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3440 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3441 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3442 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3443 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3444 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3445 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3446 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3447 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3448 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3449 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3450 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3451 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3452 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3453 {"ensp", 0x2002}, /* 8194 - en space */
3454 {"emsp", 0x2003}, /* 8195 - em space */
3455 {"thinsp", 0x2009}, /* 8201 - thin space */
3456 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3457 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3458 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3459 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3460 {"ndash", 0x2013}, /* 8211 - en dash */
3461 {"mdash", 0x2014}, /* 8212 - em dash */
3462 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3463 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3464 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3465 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3466 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3467 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3468 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3469 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3470 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3471 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3472 {"dagger", 0x2020}, /* 8224 - dagger */
3473 {"Dagger", 0x2021}, /* 8225 - double dagger */
3474 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3475 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3476 {"permil", 0x2030}, /* 8240 - per mille sign */
3477 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3478 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3479 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3480 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3481 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3482 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3483 {"oline", 0x203E, "-"}, /* 8254 - overline */
3484 {"frasl", 0x2044}, /* 8260 - fraction slash */
3485 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3486 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3487 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3488 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3489 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3490 {"image", 0x2111}, /* 8465 - black-letter capital i */
3491 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3492 {"real", 0x211C}, /* 8476 - black-letter capital r */
3493 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3494 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3495 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3496 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3497 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3498 {"darr", 0x2193}, /* 8595 - downwards arrow */
3499 {"harr", 0x2194}, /* 8596 - left right arrow */
3500 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3501 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3502 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3503 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3504 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3505 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3506 {"forall", 0x2200}, /* 8704 - for all */
3507 {"part", 0x2202}, /* 8706 - partial differential */
3508 {"exist", 0x2203}, /* 8707 - there exists */
3509 {"empty", 0x2205}, /* 8709 - empty set */
3510 {"nabla", 0x2207}, /* 8711 - nabla */
3511 {"isin", 0x2208}, /* 8712 - element of */
3512 {"notin", 0x2209}, /* 8713 - not an element of */
3513 {"ni", 0x220B}, /* 8715 - contains as member */
3514 {"prod", 0x220F}, /* 8719 - n-ary product */
3515 {"sum", 0x2211}, /* 8721 - n-ary summation */
3516 {"minus", 0x2212}, /* 8722 - minus sign */
3517 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3518 {"radic", 0x221A}, /* 8730 - square root */
3519 {"prop", 0x221D}, /* 8733 - proportional to */
3520 {"infin", 0x221E}, /* 8734 - infinity */
3521 {"ang", 0x2220}, /* 8736 - angle */
3522 {"and", 0x2227}, /* 8743 - logical and */
3523 {"or", 0x2228}, /* 8744 - logical or */
3524 {"cap", 0x2229}, /* 8745 - intersection */
3525 {"cup", 0x222A}, /* 8746 - union */
3526 {"int", 0x222B}, /* 8747 - integral */
3527 {"there4", 0x2234}, /* 8756 - therefore */
3528 {"sim", 0x223C}, /* 8764 - tilde operator */
3529 {"cong", 0x2245}, /* 8773 - congruent to */
3530 {"asymp", 0x2248}, /* 8776 - almost equal to */
3531 {"ne", 0x2260}, /* 8800 - not equal to */
3532 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3533 {"le", 0x2264}, /* 8804 - less-than or equal to */
3534 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3535 {"sub", 0x2282}, /* 8834 - subset of */
3536 {"sup", 0x2283}, /* 8835 - superset of */
3537 {"nsub", 0x2284}, /* 8836 - not a subset of */
3538 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3539 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3540 {"oplus", 0x2295}, /* 8853 - circled plus */
3541 {"otimes", 0x2297}, /* 8855 - circled times */
3542 {"perp", 0x22A5}, /* 8869 - up tack */
3543 {"sdot", 0x22C5}, /* 8901 - dot operator */
3544 {"lceil", 0x2308}, /* 8968 - left ceiling */
3545 {"rceil", 0x2309}, /* 8969 - right ceiling */
3546 {"lfloor", 0x230A}, /* 8970 - left floor */
3547 {"rfloor", 0x230B}, /* 8971 - right floor */
3548 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3549 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3550 {"loz", 0x25CA}, /* 9674 - lozenge */
3551 {"spades", 0x2660}, /* 9824 - black spade suit */
3552 {"clubs", 0x2663}, /* 9827 - black club suit */
3553 {"hearts", 0x2665}, /* 9829 - black heart suit */
3554 {"diams", 0x2666} /* 9830 - black diamond suit */
3559 * Table of supported elements and corresponding handlers
3561 static ELPROP_S html_element_table[] = {
3562 {"HTML", 4}, /* HTML ignore if seen? */
3563 {"HEAD", 4, html_head}, /* slurp until <BODY> ? */
3564 {"TITLE", 5, html_title}, /* Document Title */
3565 {"BASE", 4, html_base}, /* HREF base */
3566 {"BODY", 4, html_body}, /* HTML BODY */
3567 {"A", 1, html_a}, /* Anchor */
3568 {"ABBR", 4, html_abbr}, /* Abbreviation */
3569 {"IMG", 3, html_img}, /* Image */
3570 {"MAP", 3, html_map}, /* Image Map */
3571 {"AREA", 4, html_area}, /* Image Map Area */
3572 {"HR", 2, html_hr, 1, 1}, /* Horizontal Rule */
3573 {"BR", 2, html_br, 0, 1}, /* Line Break */
3574 {"P", 1, html_p, 1}, /* Paragraph */
3575 {"OL", 2, html_ol, 1}, /* Ordered List */
3576 {"UL", 2, html_ul, 1}, /* Unordered List */
3577 {"MENU", 4, html_menu}, /* Menu List */
3578 {"DIR", 3, html_dir}, /* Directory List */
3579 {"LI", 2, html_li}, /* ... List Item */
3580 {"DL", 2, html_dl, 1}, /* Definition List */
3581 {"DT", 2, html_dt}, /* ... Def. Term */
3582 {"DD", 2, html_dd}, /* ... Def. Definition */
3583 {"I", 1, html_i}, /* Italic Text */
3584 {"EM", 2, html_em}, /* Typographic Emphasis */
3585 {"STRONG", 6, html_strong}, /* STRONG Typo Emphasis */
3586 {"VAR", 3, html_i}, /* Variable Name */
3587 {"B", 1, html_b}, /* Bold Text */
3588 {"U", 1, html_u}, /* Underline Text */
3589 {"S", 1, html_s}, /* Strike-Through Text */
3590 {"STRIKE", 6, html_s}, /* Strike-Through Text */
3591 {"BIG", 3, html_big}, /* Big Font Text */
3592 {"SMALL", 5, html_small}, /* Small Font Text */
3593 {"FONT", 4, html_font}, /* Font display directives */
3594 {"BLOCKQUOTE", 10, html_blockquote, 1}, /* Blockquote */
3595 {"ADDRESS", 7, html_address, 1}, /* Address */
3596 {"CENTER", 6, html_center}, /* Centered Text v3.2 */
3597 {"DIV", 3, html_div, 1}, /* Document Division 3.2 */
3598 {"SPAN", 4, html_span}, /* Text Span */
3599 {"H1", 2, html_h1, 1}, /* Headings... */
3600 {"H2", 2, html_h2, 1},
3601 {"H3", 2, html_h3,1},
3602 {"H4", 2, html_h4, 1},
3603 {"H5", 2, html_h5, 1},
3604 {"H6", 2, html_h6, 1},
3605 {"PRE", 3, html_pre, 1}, /* Preformatted Text */
3606 {"KBD", 3, html_kbd}, /* Keyboard Input (NO OP) */
3607 {"DFN", 3, html_dfn}, /* Definition (NO OP) */
3608 {"VAR", 3, html_var}, /* Variable (NO OP) */
3609 {"TT", 2, html_tt}, /* Typetype (NO OP) */
3610 {"SAMP", 4, html_samp}, /* Sample Text (NO OP) */
3611 {"CITE", 4, html_cite}, /* Citation (NO OP) */
3612 {"CODE", 4, html_code}, /* Code Text (NO OP) */
3613 {"INS", 3, html_ins}, /* Text Inseted (NO OP) */
3614 {"DEL", 3, html_del}, /* Text Deleted (NO OP) */
3615 {"SUP", 3, html_sup}, /* Text Superscript (NO OP) */
3616 {"SUB", 3, html_sub}, /* Text Superscript (NO OP) */
3617 {"STYLE", 5, html_style}, /* CSS Definitions */
3619 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3621 {"FORM", 4, html_form, 1}, /* form within a document */
3622 {"INPUT", 5, html_input}, /* One input field, options */
3623 {"BUTTON", 6, html_button}, /* Push Button */
3624 {"OPTION", 6, html_option}, /* One option within Select */
3625 {"OPTION", 6, html_optgroup}, /* Option Group Definition */
3626 {"SELECT", 6, html_select}, /* Selection from a set */
3627 {"TEXTAREA", 8, html_textarea}, /* A multi-line input field */
3628 {"LABEL", 5, html_label}, /* Control Label */
3629 {"FIELDSET", 8, html_fieldset, 1}, /* Fieldset Control Group */
3631 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3632 {"SCRIPT", 6, html_script}, /* Embedded scripting statements */
3633 {"APPLET", 6, NULL}, /* Embedded applet statements */
3634 {"OBJECT", 6, NULL}, /* Embedded object statements */
3635 {"LINK", 4, NULL}, /* References to external data */
3636 {"PARAM", 5, NULL}, /* Applet/Object parameters */
3638 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3640 {"TABLE", 5, html_table, 1}, /* Table */
3641 {"CAPTION", 7, html_caption}, /* Table Caption */
3642 {"TR", 2, html_tr}, /* Table Table Row */
3643 {"TD", 2, html_td}, /* Table Table Data */
3644 {"TH", 2, html_th}, /* Table Table Head */
3645 {"THEAD", 5, html_thead}, /* Table Table Head */
3646 {"TBODY", 5, html_tbody}, /* Table Table Body */
3647 {"TFOOT", 5, html_tfoot}, /* Table Table Foot */
3648 {"COL", 3, html_col}, /* Table Column Attibutes */
3649 {"COLGROUP", 8, html_colgroup}, /* Table Column Group Attibutes */
3651 {NULL, 0, NULL}
3656 * Table of supported RSS 2.0 elements
3658 static ELPROP_S rss_element_table[] = {
3659 {"RSS", 3, rss_rss}, /* RSS 2.0 version */
3660 {"CHANNEL", 7, rss_channel}, /* RSS 2.0 Channel */
3661 {"TITLE", 5, rss_title}, /* RSS 2.0 Title */
3662 {"IMAGE", 5, rss_image}, /* RSS 2.0 Channel Image */
3663 {"LINK", 4, rss_link}, /* RSS 2.0 Channel/Item Link */
3664 {"DESCRIPTION", 11, rss_description}, /* RSS 2.0 Channel/Item Description */
3665 {"ITEM", 4, rss_item}, /* RSS 2.0 Channel ITEM */
3666 {"TTL", 3, rss_ttl}, /* RSS 2.0 Item TTL */
3667 {NULL, 0, NULL}
3672 * Initialize the given handler, and add it to the stack if it
3673 * requests it.
3675 * Returns: 1 if handler chose to get pushed on stack
3676 * 0 if handler declined
3679 html_push(FILTER_S *fd, ELPROP_S *ep)
3681 HANDLER_S *new;
3683 new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3684 memset(new, 0, sizeof(HANDLER_S));
3685 new->html_data = fd;
3686 new->element = ep;
3687 if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3688 new->below = HANDLERS(fd);
3689 HANDLERS(fd) = new; /* push */
3690 return(1);
3693 fs_give((void **) &new);
3694 return(0);
3699 * Remove the most recently installed the given handler
3700 * after letting it accept its demise.
3702 void
3703 html_pop(FILTER_S *fd, ELPROP_S *ep)
3705 HANDLER_S *tp;
3707 for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3708 HANDLER_S *tp2;
3710 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3711 /* if no evidence of opening tag, ignore given closing tag */
3712 for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3715 if(!tp2){
3716 dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3717 return;
3720 (void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3721 HANDLERS(fd) = tp->below;
3724 if(tp){
3725 (void) (*EL(tp)->handler)(tp, 0, GF_EOD); /* may adjust handler list */
3726 if(tp != HANDLERS(fd)){
3727 HANDLER_S *p;
3729 for(p = HANDLERS(fd); p->below != tp; p = p->below)
3732 if(p)
3733 p->below = tp->below; /* remove from middle of stack */
3734 /* BUG: else programming botch and we should die */
3736 else
3737 HANDLERS(fd) = tp->below; /* pop */
3739 fs_give((void **)&tp);
3741 else{
3742 /* BUG: should MAKE SURE NOT TO EMIT IT */
3743 dprint((3, "-- html error: end tag without a start: %s", ep->element));
3749 * Deal with data passed a hander in its GF_DATA state
3751 static void
3752 html_handoff(HANDLER_S *hd, int ch)
3754 if(hd->below)
3755 (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3756 else
3757 html_output(hd->html_data, ch);
3762 * HTML <BR> element handler
3765 html_br(HANDLER_S *hd, int ch, int cmd)
3767 if(cmd == GF_RESET){
3768 if(PASS_HTML(hd->html_data)){
3769 html_output_raw_tag(hd->html_data, "br");
3771 else{
3772 html_output(hd->html_data, HTML_NEWLINE);
3776 return(0); /* don't get linked */
3781 * HTML <HR> (Horizontal Rule) element handler
3784 html_hr(HANDLER_S *hd, int ch, int cmd)
3786 if(cmd == GF_RESET){
3787 if(PASS_HTML(hd->html_data)){
3788 html_output_raw_tag(hd->html_data, "hr");
3790 else{
3791 int i, old_wrap, width, align;
3792 PARAMETER *p;
3794 width = WRAP_COLS(hd->html_data);
3795 align = 0;
3796 for(p = HD(hd->html_data)->el_data->attribs;
3797 p && p->attribute;
3798 p = p->next)
3799 if(p->value){
3800 if(!strucmp(p->attribute, "ALIGN")){
3801 if(!strucmp(p->value, "LEFT"))
3802 align = 1;
3803 else if(!strucmp(p->value, "RIGHT"))
3804 align = 2;
3806 else if(!strucmp(p->attribute, "WIDTH")){
3807 char *cp;
3809 width = 0;
3810 for(cp = p->value; *cp; cp++)
3811 if(*cp == '%'){
3812 width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3813 break;
3815 else if(isdigit((unsigned char) *cp))
3816 width = (width * 10) + (*cp - '0');
3818 width = MIN(width, WRAP_COLS(hd->html_data));
3822 html_blank(hd->html_data, 1); /* at least one blank line */
3824 old_wrap = HD(hd->html_data)->wrapstate;
3825 HD(hd->html_data)->wrapstate = 0;
3826 if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3827 && ((align == 0) ? i /= 2 : (align == 2)))
3828 for(; i > 0; i--)
3829 html_output(hd->html_data, ' ');
3831 for(i = 0; i < width; i++)
3832 html_output(hd->html_data, '_');
3834 html_blank(hd->html_data, 1);
3835 HD(hd->html_data)->wrapstate = old_wrap;
3839 return(0); /* don't get linked */
3844 * HTML <P> (paragraph) element handler
3847 html_p(HANDLER_S *hd, int ch, int cmd)
3849 if(cmd == GF_DATA){
3850 html_handoff(hd, ch);
3852 else if(cmd == GF_RESET){
3853 if(PASS_HTML(hd->html_data)){
3854 html_output_raw_tag(hd->html_data, "p");
3856 else{
3857 /* Make sure there's at least 1 blank line */
3858 html_blank(hd->html_data, 1);
3860 /* adjust indent level if needed */
3861 if(HD(hd->html_data)->li_pending){
3862 html_indent(hd->html_data, 4, HTML_ID_INC);
3863 HD(hd->html_data)->li_pending = 0;
3867 else if(cmd == GF_EOD){
3868 if(PASS_HTML(hd->html_data)){
3869 html_output_string(hd->html_data, "</p>");
3871 else{
3872 /* Make sure there's at least 1 blank line */
3873 html_blank(hd->html_data, 1);
3877 return(1); /* GET linked */
3882 * HTML Table <TABLE> (paragraph) table row
3885 html_table(HANDLER_S *hd, int ch, int cmd)
3887 if(cmd == GF_DATA){
3888 if(PASS_HTML(hd->html_data)){
3889 html_handoff(hd, ch);
3892 else if(cmd == GF_RESET){
3893 if(PASS_HTML(hd->html_data)){
3894 html_output_raw_tag(hd->html_data, "table");
3896 else
3897 /* Make sure there's at least 1 blank line */
3898 html_blank(hd->html_data, 0);
3900 else if(cmd == GF_EOD){
3901 if(PASS_HTML(hd->html_data)){
3902 html_output_string(hd->html_data, "</table>");
3904 else
3905 /* Make sure there's at least 1 blank line */
3906 html_blank(hd->html_data, 0);
3908 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3913 * HTML <CAPTION> (Table Caption) element handler
3916 html_caption(HANDLER_S *hd, int ch, int cmd)
3918 if(cmd == GF_DATA){
3919 html_handoff(hd, ch);
3921 else if(cmd == GF_RESET){
3922 if(PASS_HTML(hd->html_data)){
3923 html_output_raw_tag(hd->html_data, "caption");
3925 else{
3926 /* turn ON the centered bit */
3927 CENTER_BIT(hd->html_data) = 1;
3930 else if(cmd == GF_EOD){
3931 if(PASS_HTML(hd->html_data)){
3932 html_output_string(hd->html_data, "</caption>");
3934 else{
3935 /* turn OFF the centered bit */
3936 CENTER_BIT(hd->html_data) = 0;
3940 return(1);
3945 * HTML Table <TR> (paragraph) table row
3948 html_tr(HANDLER_S *hd, int ch, int cmd)
3950 if(cmd == GF_DATA){
3951 if(PASS_HTML(hd->html_data)){
3952 html_handoff(hd, ch);
3955 else if(cmd == GF_RESET){
3956 if(PASS_HTML(hd->html_data)){
3957 html_output_raw_tag(hd->html_data, "tr");
3959 else
3960 /* Make sure there's at least 1 blank line */
3961 html_blank(hd->html_data, 0);
3963 else if(cmd == GF_EOD){
3964 if(PASS_HTML(hd->html_data)){
3965 html_output_string(hd->html_data, "</tr>");
3967 else
3968 /* Make sure there's at least 1 blank line */
3969 html_blank(hd->html_data, 0);
3971 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3976 * HTML Table <TD> (paragraph) table data
3979 html_td(HANDLER_S *hd, int ch, int cmd)
3981 if(cmd == GF_DATA){
3982 if(PASS_HTML(hd->html_data)){
3983 html_handoff(hd, ch);
3986 else if(cmd == GF_RESET){
3987 if(PASS_HTML(hd->html_data)){
3988 html_output_raw_tag(hd->html_data, "td");
3990 else{
3991 PARAMETER *p;
3993 for(p = HD(hd->html_data)->el_data->attribs;
3994 p && p->attribute;
3995 p = p->next)
3996 if(!strucmp(p->attribute, "nowrap")
3997 && (hd->html_data->f2 || hd->html_data->n)){
3998 HTML_DUMP_LIT(hd->html_data, " | ", 3);
3999 break;
4003 else if(cmd == GF_EOD){
4004 if(PASS_HTML(hd->html_data)){
4005 html_output_string(hd->html_data, "</td>");
4009 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4014 * HTML Table <TH> (paragraph) table head
4017 html_th(HANDLER_S *hd, int ch, int cmd)
4019 if(cmd == GF_DATA){
4020 if(PASS_HTML(hd->html_data)){
4021 html_handoff(hd, ch);
4024 else if(cmd == GF_RESET){
4025 if(PASS_HTML(hd->html_data)){
4026 html_output_raw_tag(hd->html_data, "th");
4028 else{
4029 PARAMETER *p;
4031 for(p = HD(hd->html_data)->el_data->attribs;
4032 p && p->attribute;
4033 p = p->next)
4034 if(!strucmp(p->attribute, "nowrap")
4035 && (hd->html_data->f2 || hd->html_data->n)){
4036 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4037 break;
4041 else if(cmd == GF_EOD){
4042 if(PASS_HTML(hd->html_data)){
4043 html_output_string(hd->html_data, "</th>");
4047 return(PASS_HTML(hd->html_data)); /* don't get linked */
4052 * HTML Table <THEAD> table head
4055 html_thead(HANDLER_S *hd, int ch, int cmd)
4057 if(PASS_HTML(hd->html_data)){
4058 if(cmd == GF_DATA){
4059 html_handoff(hd, ch);
4061 else if(cmd == GF_RESET){
4062 html_output_raw_tag(hd->html_data, "thead");
4064 else if(cmd == GF_EOD){
4065 html_output_string(hd->html_data, "</thead>");
4068 return(1); /* GET linked */
4071 return(0); /* don't get linked */
4076 * HTML Table <TBODY> table body
4079 html_tbody(HANDLER_S *hd, int ch, int cmd)
4081 if(PASS_HTML(hd->html_data)){
4082 if(cmd == GF_DATA){
4083 html_handoff(hd, ch);
4085 else if(cmd == GF_RESET){
4086 html_output_raw_tag(hd->html_data, "tbody");
4088 else if(cmd == GF_EOD){
4089 html_output_string(hd->html_data, "</tbody>");
4092 return(1); /* GET linked */
4095 return(0); /* don't get linked */
4100 * HTML Table <TFOOT> table body
4103 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4105 if(PASS_HTML(hd->html_data)){
4106 if(cmd == GF_DATA){
4107 html_handoff(hd, ch);
4109 else if(cmd == GF_RESET){
4110 html_output_raw_tag(hd->html_data, "tfoot");
4112 else if(cmd == GF_EOD){
4113 html_output_string(hd->html_data, "</tfoot>");
4116 return(1); /* GET linked */
4119 return(0); /* don't get linked */
4124 * HTML <COL> (Table Column Attributes) element handler
4127 html_col(HANDLER_S *hd, int ch, int cmd)
4129 if(cmd == GF_RESET){
4130 if(PASS_HTML(hd->html_data)){
4131 html_output_raw_tag(hd->html_data, "col");
4135 return(0); /* don't get linked */
4140 * HTML Table <COLGROUP> table body
4143 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4145 if(PASS_HTML(hd->html_data)){
4146 if(cmd == GF_DATA){
4147 html_handoff(hd, ch);
4149 else if(cmd == GF_RESET){
4150 html_output_raw_tag(hd->html_data, "colgroup");
4152 else if(cmd == GF_EOD){
4153 html_output_string(hd->html_data, "</colgroup>");
4156 return(1); /* GET linked */
4159 return(0); /* don't get linked */
4164 * HTML <I> (italic text) element handler
4167 html_i(HANDLER_S *hd, int ch, int cmd)
4169 if(cmd == GF_DATA){
4170 /* include LITERAL in spaceness test! */
4171 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4172 HTML_ITALIC(hd->html_data, 1);
4173 hd->x = 0;
4176 html_handoff(hd, ch);
4178 else if(cmd == GF_RESET){
4179 hd->x = 1;
4181 else if(cmd == GF_EOD){
4182 if(!hd->x)
4183 HTML_ITALIC(hd->html_data, 0);
4186 return(1); /* get linked */
4191 * HTML <EM> element handler
4194 html_em(HANDLER_S *hd, int ch, int cmd)
4196 if(cmd == GF_DATA){
4197 if(!PASS_HTML(hd->html_data)){
4198 /* include LITERAL in spaceness test! */
4199 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4200 HTML_ITALIC(hd->html_data, 1);
4201 hd->x = 0;
4205 html_handoff(hd, ch);
4207 else if(cmd == GF_RESET){
4208 if(PASS_HTML(hd->html_data)){
4209 html_output_raw_tag(hd->html_data, "em");
4211 else{
4212 hd->x = 1;
4215 else if(cmd == GF_EOD){
4216 if(PASS_HTML(hd->html_data)){
4217 html_output_string(hd->html_data, "</em>");
4219 else{
4220 if(!hd->x)
4221 HTML_ITALIC(hd->html_data, 0);
4225 return(1); /* get linked */
4230 * HTML <STRONG> element handler
4233 html_strong(HANDLER_S *hd, int ch, int cmd)
4235 if(cmd == GF_DATA){
4236 if(!PASS_HTML(hd->html_data)){
4237 /* include LITERAL in spaceness test! */
4238 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4239 HTML_ITALIC(hd->html_data, 1);
4240 hd->x = 0;
4244 html_handoff(hd, ch);
4246 else if(cmd == GF_RESET){
4247 if(PASS_HTML(hd->html_data)){
4248 html_output_raw_tag(hd->html_data, "strong");
4250 else{
4251 hd->x = 1;
4254 else if(cmd == GF_EOD){
4255 if(PASS_HTML(hd->html_data)){
4256 html_output_string(hd->html_data, "</strong>");
4258 else{
4259 if(!hd->x)
4260 HTML_ITALIC(hd->html_data, 0);
4264 return(1); /* get linked */
4269 * HTML <u> (Underline text) element handler
4272 html_u(HANDLER_S *hd, int ch, int cmd)
4274 if(PASS_HTML(hd->html_data)){
4275 if(cmd == GF_DATA){
4276 html_handoff(hd, ch);
4278 else if(cmd == GF_RESET){
4279 html_output_raw_tag(hd->html_data, "u");
4281 else if(cmd == GF_EOD){
4282 html_output_string(hd->html_data, "</u>");
4285 return(1); /* get linked */
4288 return(0); /* do NOT get linked */
4293 * HTML <b> (Bold text) element handler
4296 html_b(HANDLER_S *hd, int ch, int cmd)
4298 if(cmd == GF_DATA){
4299 if(!PASS_HTML(hd->html_data)){
4300 /* include LITERAL in spaceness test! */
4301 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4302 HTML_BOLD(hd->html_data, 1);
4303 hd->x = 0;
4307 html_handoff(hd, ch);
4309 else if(cmd == GF_RESET){
4310 if(PASS_HTML(hd->html_data)){
4311 html_output_raw_tag(hd->html_data, "b");
4313 else{
4314 hd->x = 1;
4317 else if(cmd == GF_EOD){
4318 if(PASS_HTML(hd->html_data)){
4319 html_output_string(hd->html_data, "</b>");
4321 else{
4322 if(!hd->x)
4323 HTML_BOLD(hd->html_data, 0);
4327 return(1); /* get linked */
4332 * HTML <s> (strike-through text) element handler
4335 html_s(HANDLER_S *hd, int ch, int cmd)
4337 if(cmd == GF_DATA){
4338 if(!PASS_HTML(hd->html_data)){
4339 /* include LITERAL in spaceness test! */
4340 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4341 HTML_STRIKE(hd->html_data, 1);
4342 hd->x = 0;
4346 html_handoff(hd, ch);
4348 else if(cmd == GF_RESET){
4349 if(PASS_HTML(hd->html_data)){
4350 html_output_raw_tag(hd->html_data, "s");
4352 else{
4353 hd->x = 1;
4356 else if(cmd == GF_EOD){
4357 if(PASS_HTML(hd->html_data)){
4358 html_output_string(hd->html_data, "</s>");
4360 else{
4361 if(!hd->x)
4362 HTML_STRIKE(hd->html_data, 0);
4366 return(1); /* get linked */
4371 * HTML <big> (BIG text) element handler
4374 html_big(HANDLER_S *hd, int ch, int cmd)
4376 if(cmd == GF_DATA){
4377 /* include LITERAL in spaceness test! */
4378 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4379 HTML_BIG(hd->html_data, 1);
4380 hd->x = 0;
4383 html_handoff(hd, ch);
4385 else if(cmd == GF_RESET){
4386 hd->x = 1;
4388 else if(cmd == GF_EOD){
4389 if(!hd->x)
4390 HTML_BIG(hd->html_data, 0);
4393 return(1); /* get linked */
4398 * HTML <small> (SMALL text) element handler
4401 html_small(HANDLER_S *hd, int ch, int cmd)
4403 if(cmd == GF_DATA){
4404 /* include LITERAL in spaceness test! */
4405 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4406 HTML_SMALL(hd->html_data, 1);
4407 hd->x = 0;
4410 html_handoff(hd, ch);
4412 else if(cmd == GF_RESET){
4413 hd->x = 1;
4415 else if(cmd == GF_EOD){
4416 if(!hd->x)
4417 HTML_SMALL(hd->html_data, 0);
4420 return(1); /* get linked */
4425 * HTML <FONT> element handler
4428 html_font(HANDLER_S *hd, int ch, int cmd)
4430 if(PASS_HTML(hd->html_data)){
4431 if(cmd == GF_DATA){
4432 html_handoff(hd, ch);
4434 else if(cmd == GF_RESET){
4435 html_output_raw_tag(hd->html_data, "font");
4437 else if(cmd == GF_EOD){
4438 html_output_string(hd->html_data, "</font>");
4441 return(1); /* get linked */
4444 return(0);
4449 * HTML <IMG> element handler
4452 html_img(HANDLER_S *hd, int ch, int cmd)
4454 PARAMETER *p;
4455 char *alt = NULL, *src = NULL, *s;
4457 if(cmd == GF_RESET){
4458 if(PASS_HTML(hd->html_data)){
4459 html_output_raw_tag(hd->html_data, "img");
4461 else{
4462 for(p = HD(hd->html_data)->el_data->attribs;
4463 p && p->attribute;
4464 p = p->next)
4465 if(p->value && p->value[0]){
4466 if(!strucmp(p->attribute, "alt"))
4467 alt = p->value;
4468 if(!strucmp(p->attribute, "src"))
4469 src = p->value;
4473 * Multipart/Related Content ID pointer
4474 * ONLY attached messages are recognized
4475 * if we ever decide web bugs aren't a problem
4476 * anymore then we might expand the scope
4478 if(src
4479 && DO_HANDLES(hd->html_data)
4480 && RELATED_OK(hd->html_data)
4481 && struncmp(src, "cid:", 4) == 0){
4482 char buf[32];
4483 int i, n;
4484 HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4486 h->type = IMG;
4487 h->h.img.src = cpystr(src + 4);
4488 h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4490 HTML_TEXT(hd->html_data, TAG_EMBED);
4491 HTML_TEXT(hd->html_data, TAG_HANDLE);
4493 sprintf(buf, "%d", h->key);
4494 n = strlen(buf);
4495 HTML_TEXT(hd->html_data, n);
4496 for(i = 0; i < n; i++){
4497 unsigned int uic = buf[i];
4498 HTML_TEXT(hd->html_data, uic);
4501 return(0);
4503 else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4504 HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4505 HTML_TEXT(hd->html_data, ' ');
4506 return(0);
4508 else if(src
4509 && (s = strrindex(src, '/'))
4510 && *++s != '\0'){
4511 HTML_TEXT(hd->html_data, '[');
4512 HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4513 HTML_TEXT(hd->html_data, ']');
4514 HTML_TEXT(hd->html_data, ' ');
4515 return(0);
4518 /* text filler of last resort */
4519 HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4523 return(0); /* don't get linked */
4528 * HTML <MAP> (Image Map) element handler
4531 html_map(HANDLER_S *hd, int ch, int cmd)
4533 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4534 if(cmd == GF_DATA){
4535 html_handoff(hd, ch);
4537 else if(cmd == GF_RESET){
4538 html_output_raw_tag(hd->html_data, "map");
4540 else if(cmd == GF_EOD){
4541 html_output_string(hd->html_data, "</map>");
4544 return(1);
4547 return(0);
4552 * HTML <AREA> (Image Map Area) element handler
4555 html_area(HANDLER_S *hd, int ch, int cmd)
4557 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4558 if(cmd == GF_DATA){
4559 html_handoff(hd, ch);
4561 else if(cmd == GF_RESET){
4562 html_output_raw_tag(hd->html_data, "area");
4564 else if(cmd == GF_EOD){
4565 html_output_string(hd->html_data, "</area>");
4568 return(1);
4571 return(0);
4576 * HTML <FORM> (Form) element handler
4579 html_form(HANDLER_S *hd, int ch, int cmd)
4581 if(PASS_HTML(hd->html_data)){
4582 if(cmd == GF_DATA){
4583 html_handoff(hd, ch);
4585 else if(cmd == GF_RESET){
4586 PARAMETER **pp;
4588 /* SECURITY: make sure to redirect to new browser instance */
4589 for(pp = &(HD(hd->html_data)->el_data->attribs);
4590 *pp && (*pp)->attribute;
4591 pp = &(*pp)->next)
4592 if(!strucmp((*pp)->attribute, "target")){
4593 if((*pp)->value)
4594 fs_give((void **) &(*pp)->value);
4596 (*pp)->value = cpystr("_blank");
4599 if(!*pp){
4600 *pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4601 memset(*pp, 0, sizeof(PARAMETER));
4602 (*pp)->attribute = cpystr("target");
4603 (*pp)->value = cpystr("_blank");
4606 html_output_raw_tag(hd->html_data, "form");
4608 else if(cmd == GF_EOD){
4609 html_output_string(hd->html_data, "</form>");
4612 else{
4613 if(cmd == GF_RESET){
4614 html_blank(hd->html_data, 0);
4615 HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4616 html_blank(hd->html_data, 0);
4620 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4625 * HTML <INPUT> (Form) element handler
4628 html_input(HANDLER_S *hd, int ch, int cmd)
4630 if(PASS_HTML(hd->html_data)){
4631 if(cmd == GF_RESET){
4632 html_output_raw_tag(hd->html_data, "input");
4636 return(0); /* don't get linked */
4641 * HTML <BUTTON> (Form) element handler
4644 html_button(HANDLER_S *hd, int ch, int cmd)
4646 if(PASS_HTML(hd->html_data)){
4647 if(cmd == GF_DATA){
4648 html_handoff(hd, ch);
4650 else if(cmd == GF_RESET){
4651 html_output_raw_tag(hd->html_data, "button");
4653 else if(cmd == GF_EOD){
4654 html_output_string(hd->html_data, "</button>");
4657 return(1); /* get linked */
4660 return(0);
4665 * HTML <OPTION> (Form) element handler
4668 html_option(HANDLER_S *hd, int ch, int cmd)
4670 if(PASS_HTML(hd->html_data)){
4671 if(cmd == GF_DATA){
4672 html_handoff(hd, ch);
4674 else if(cmd == GF_RESET){
4675 html_output_raw_tag(hd->html_data, "option");
4677 else if(cmd == GF_EOD){
4678 html_output_string(hd->html_data, "</option>");
4681 return(1); /* get linked */
4684 return(0);
4689 * HTML <OPTGROUP> (Form) element handler
4692 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4694 if(PASS_HTML(hd->html_data)){
4695 if(cmd == GF_DATA){
4696 html_handoff(hd, ch);
4698 else if(cmd == GF_RESET){
4699 html_output_raw_tag(hd->html_data, "optgroup");
4701 else if(cmd == GF_EOD){
4702 html_output_string(hd->html_data, "</optgroup>");
4705 return(1); /* get linked */
4708 return(0);
4713 * HTML <SELECT> (Form) element handler
4716 html_select(HANDLER_S *hd, int ch, int cmd)
4718 if(PASS_HTML(hd->html_data)){
4719 if(cmd == GF_DATA){
4720 html_handoff(hd, ch);
4722 else if(cmd == GF_RESET){
4723 html_output_raw_tag(hd->html_data, "select");
4725 else if(cmd == GF_EOD){
4726 html_output_string(hd->html_data, "</select>");
4729 return(1); /* get linked */
4732 return(0);
4737 * HTML <TEXTAREA> (Form) element handler
4740 html_textarea(HANDLER_S *hd, int ch, int cmd)
4742 if(PASS_HTML(hd->html_data)){
4743 if(cmd == GF_DATA){
4744 html_handoff(hd, ch);
4746 else if(cmd == GF_RESET){
4747 html_output_raw_tag(hd->html_data, "textarea");
4749 else if(cmd == GF_EOD){
4750 html_output_string(hd->html_data, "</textarea>");
4753 return(1); /* get linked */
4756 return(0);
4761 * HTML <LABEL> (Form) element handler
4764 html_label(HANDLER_S *hd, int ch, int cmd)
4766 if(PASS_HTML(hd->html_data)){
4767 if(cmd == GF_DATA){
4768 html_handoff(hd, ch);
4770 else if(cmd == GF_RESET){
4771 html_output_raw_tag(hd->html_data, "label");
4773 else if(cmd == GF_EOD){
4774 html_output_string(hd->html_data, "</label>");
4777 return(1); /* get linked */
4780 return(0);
4785 * HTML <FIELDSET> (Form) element handler
4788 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4790 if(PASS_HTML(hd->html_data)){
4791 if(cmd == GF_DATA){
4792 html_handoff(hd, ch);
4794 else if(cmd == GF_RESET){
4795 html_output_raw_tag(hd->html_data, "fieldset");
4797 else if(cmd == GF_EOD){
4798 html_output_string(hd->html_data, "</fieldset>");
4801 return(1); /* get linked */
4804 return(0);
4809 * HTML <HEAD> element handler
4812 html_head(HANDLER_S *hd, int ch, int cmd)
4814 if(cmd == GF_DATA){
4815 html_handoff(hd, ch);
4817 else if(cmd == GF_RESET){
4818 HD(hd->html_data)->head = 1;
4820 else if(cmd == GF_EOD){
4821 HD(hd->html_data)->head = 0;
4824 return(1); /* get linked */
4829 * HTML <BASE> element handler
4832 html_base(HANDLER_S *hd, int ch, int cmd)
4834 if(cmd == GF_RESET){
4835 if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4836 PARAMETER *p;
4838 for(p = HD(hd->html_data)->el_data->attribs;
4839 p && p->attribute && strucmp(p->attribute, "HREF");
4840 p = p->next)
4843 if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4844 ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4848 return(0); /* DON'T get linked */
4853 * HTML <TITLE> element handler
4856 html_title(HANDLER_S *hd, int ch, int cmd)
4858 if(cmd == GF_DATA){
4859 if(hd->x + 1 >= hd->y){
4860 hd->y += 80;
4861 fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4864 hd->s[hd->x++] = (unsigned char) ch;
4866 else if(cmd == GF_RESET){
4867 hd->x = 0L;
4868 hd->y = 80L;
4869 hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4871 else if(cmd == GF_EOD){
4872 /* Down the road we probably want to give these bytes to
4873 * someone...
4875 hd->s[hd->x] = '\0';
4876 fs_give((void **)&hd->s);
4879 return(1); /* get linked */
4884 * HTML <BODY> element handler
4887 html_body(HANDLER_S *hd, int ch, int cmd)
4889 if(cmd == GF_DATA){
4890 html_handoff(hd, ch);
4892 else if(cmd == GF_RESET){
4893 if(PASS_HTML(hd->html_data)){
4894 PARAMETER *p, *tp;
4895 char **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4897 /* modify any attributes in a useful way? */
4898 for(p = HD(hd->html_data)->el_data->attribs;
4899 p && p->attribute;
4900 p = p->next)
4901 if(p->value){
4902 if(!strucmp(p->attribute, "style"))
4903 style = &p->value;
4904 else if(!strucmp(p->attribute, "text"))
4905 text = p->value;
4907 * bgcolor NOT passed since user setting takes precedence
4909 else if(!strucmp(p->attribute, "bgcolor"))
4910 bgcolor = p->value;
4914 /* colors pretty much it */
4915 if(text || bgcolor){
4916 if(!style){
4917 tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4918 memset(tp, 0, sizeof(PARAMETER));
4919 tp->next = HD(hd->html_data)->el_data->attribs;
4920 HD(hd->html_data)->el_data->attribs = tp;
4921 tp->attribute = cpystr("style");
4923 tmp_20k_buf[0] = '\0';
4924 style = &tp->value;
4925 pcs = "%s%s%s%s%s";
4927 else{
4928 snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4929 fs_give((void **) style);
4930 pcs = "; %s%s%s%s%s";
4933 snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4934 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4935 pcs,
4936 (text) ? "color: " : "", (text) ? text : "",
4937 (text && bgcolor) ? ";" : "",
4938 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4939 *style = cpystr(tmp_20k_buf);
4942 html_output_raw_tag(hd->html_data, "div");
4945 HD(hd->html_data)->body = 1;
4947 else if(cmd == GF_EOD){
4948 if(PASS_HTML(hd->html_data)){
4949 html_output_string(hd->html_data, "</div>");
4952 HD(hd->html_data)->body = 0;
4955 return(1); /* get linked */
4960 * HTML <A> (Anchor) element handler
4963 html_a(HANDLER_S *hd, int ch, int cmd)
4965 if(cmd == GF_DATA){
4966 html_handoff(hd, ch);
4968 if(hd->dp) /* remember text within anchor tags */
4969 so_writec(ch, (STORE_S *) hd->dp);
4971 else if(cmd == GF_RESET){
4972 int i, n, x;
4973 char buf[256];
4974 HANDLE_S *h;
4975 PARAMETER *p, *href = NULL, *name = NULL;
4978 * Pending Anchor!?!?
4979 * space insertion/line breaking that's yet to get done...
4981 if(HD(hd->html_data)->prefix){
4982 dprint((2, "-- html error: nested or unterminated anchor\n"));
4983 html_a_finish(hd);
4987 * Look for valid Anchor data vis the filter installer's parms
4988 * (e.g., Only allow references to our internal URLs if asked)
4990 for(p = HD(hd->html_data)->el_data->attribs;
4991 p && p->attribute;
4992 p = p->next)
4993 if(!strucmp(p->attribute, "HREF")
4994 && p->value
4995 && (HANDLES_LOC(hd->html_data)
4996 || struncmp(p->value, "x-alpine-", 9)
4997 || struncmp(p->value, "x-pine-help", 11)
4998 || p->value[0] == '#'))
4999 href = p;
5000 else if(!strucmp(p->attribute, "NAME"))
5001 name = p;
5003 if(DO_HANDLES(hd->html_data) && (href || name)){
5004 h = new_handle(HANDLESP(hd->html_data));
5007 * Enhancement: we might want to get fancier and parse the
5008 * href a bit further such that we can launch images using
5009 * our image viewer, or browse local files or directories
5010 * with our internal tools. Of course, having the jump-off
5011 * point into text/html always be the defined "web-browser",
5012 * just might be the least confusing UI-wise...
5014 h->type = URL;
5016 if(name && name->value)
5017 h->h.url.name = cpystr(name->value);
5020 * Prepare to build embedded prefix...
5022 HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5023 x = 0;
5026 * Is this something that looks like a URL? If not and
5027 * we were giving some "base" string, proceed ala RFC1808...
5029 if(href){
5030 if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5031 html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5033 else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5034 h->h.url.path = cpystr(href->value);
5036 if(pico_usingcolor()){
5037 char *fg = NULL, *bg = NULL, *q;
5039 if(ps_global->VAR_SLCTBL_FORE_COLOR
5040 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5041 ps_global->VAR_NORM_FORE_COLOR))
5042 fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5044 if(ps_global->VAR_SLCTBL_BACK_COLOR
5045 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5046 ps_global->VAR_NORM_BACK_COLOR))
5047 bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5049 if(fg || bg){
5050 COLOR_PAIR *tmp;
5053 * The blacks are just known good colors for testing
5054 * whether the other color is good.
5056 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5057 bg ? bg : colorx(COL_BLACK));
5058 if(pico_is_good_colorpair(tmp)){
5059 q = color_embed(fg, bg);
5061 for(i = 0; q[i]; i++)
5062 HD(hd->html_data)->prefix[x++] = q[i];
5065 if(tmp)
5066 free_color_pair(&tmp);
5069 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5070 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5072 else
5073 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5076 HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5077 HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5079 snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5080 HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5081 for(i = 0; i < n; i++)
5082 HD(hd->html_data)->prefix[x++] = buf[i];
5084 HD(hd->html_data)->prefix_used = x;
5086 hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5089 else if(cmd == GF_EOD){
5090 html_a_finish(hd);
5093 return(1); /* get linked */
5097 void
5098 html_a_prefix(FILTER_S *f)
5100 int *prefix, n;
5102 /* Do this so we don't visit from html_output... */
5103 prefix = HD(f)->prefix;
5104 HD(f)->prefix = NULL;
5106 for(n = 0; n < HD(f)->prefix_used; n++)
5107 html_a_output_prefix(f, prefix[n]);
5109 fs_give((void **) &prefix);
5114 * html_a_finish - house keeping associated with end of link tag
5116 void
5117 html_a_finish(HANDLER_S *hd)
5119 if(DO_HANDLES(hd->html_data)){
5120 if(HD(hd->html_data)->prefix){
5121 if(!PASS_HTML(hd->html_data)){
5122 char *empty_link = "[LINK]";
5123 int i;
5125 html_a_prefix(hd->html_data);
5126 for(i = 0; empty_link[i]; i++)
5127 html_output(hd->html_data, empty_link[i]);
5131 if(pico_usingcolor()){
5132 char *fg = NULL, *bg = NULL, *p;
5133 int i;
5135 if(ps_global->VAR_SLCTBL_FORE_COLOR
5136 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5137 ps_global->VAR_NORM_FORE_COLOR))
5138 fg = ps_global->VAR_NORM_FORE_COLOR;
5140 if(ps_global->VAR_SLCTBL_BACK_COLOR
5141 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5142 ps_global->VAR_NORM_BACK_COLOR))
5143 bg = ps_global->VAR_NORM_BACK_COLOR;
5145 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5146 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5148 if(fg || bg){
5149 COLOR_PAIR *tmp;
5152 * The blacks are just known good colors for testing
5153 * whether the other color is good.
5155 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5156 bg ? bg : colorx(COL_BLACK));
5157 if(pico_is_good_colorpair(tmp)){
5158 p = color_embed(fg, bg);
5160 for(i = 0; p[i]; i++)
5161 html_output(hd->html_data, p[i]);
5164 if(tmp)
5165 free_color_pair(&tmp);
5168 else
5169 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5171 html_output(hd->html_data, TAG_EMBED);
5172 html_output(hd->html_data, TAG_HANDLEOFF);
5174 html_a_output_info(hd);
5180 * html_output_a_prefix - dump Anchor prefix data
5182 void
5183 html_a_output_prefix(FILTER_S *f, int c)
5185 switch(c){
5186 case HTML_DOBOLD :
5187 HTML_BOLD(f, 1);
5188 break;
5190 default :
5191 html_output(f, c);
5192 break;
5199 * html_a_output_info - dump possibly deceptive link info into text.
5200 * phark the phishers.
5202 void
5203 html_a_output_info(HANDLER_S *hd)
5205 int l, risky = 0, hl = 0, tl;
5206 char *url = NULL, *hn = NULL, *txt;
5207 HANDLE_S *h;
5209 /* find host anchor references */
5210 if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5211 && h->h.url.path != NULL
5212 && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5213 && (hn = srchstr(hn,"://")) != NULL){
5215 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5219 if(hn && hl){
5221 * look over anchor's text to see if there's a
5222 * mismatch between href target and url-ish
5223 * looking text. throw a red flag if so.
5224 * similarly, toss one if the target's referenced
5225 * by a
5227 if(hd->dp){
5228 so_writec('\0', (STORE_S *) hd->dp);
5230 if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5231 && (txt = rfc1738_scan(txt, &tl)) != NULL
5232 && (txt = srchstr(txt,"://")) != NULL){
5234 for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5237 if(tl != hl)
5238 risky++;
5239 else
5240 /* look for non matching text */
5241 for(l = 0; l < tl && l < hl; l++)
5242 if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5243 risky++;
5244 break;
5248 so_give((STORE_S **) &hd->dp);
5251 /* look for literal IP, anything possibly encoded or auth specifier */
5252 if(!risky){
5253 int digits = 1;
5255 for(l = 0; l < hl; l++){
5256 if(hn[l] == '@' || hn[l] == '%'){
5257 risky++;
5258 break;
5260 else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5261 digits = 0;
5264 if(digits)
5265 risky++;
5268 /* Insert text of link's domain */
5269 if(SHOWSERVER(hd->html_data)){
5270 char *q;
5271 COLOR_PAIR *col = NULL, *colnorm = NULL;
5273 html_output(hd->html_data, ' ');
5274 html_output(hd->html_data, '[');
5276 if(pico_usingcolor()
5277 && ps_global->VAR_METAMSG_FORE_COLOR
5278 && ps_global->VAR_METAMSG_BACK_COLOR
5279 && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5280 ps_global->VAR_METAMSG_BACK_COLOR))){
5281 if(!pico_is_good_colorpair(col))
5282 free_color_pair(&col);
5284 if(col){
5285 q = color_embed(col->fg, col->bg);
5287 for(l = 0; q[l]; l++)
5288 html_output(hd->html_data, q[l]);
5292 for(l = 0; l < hl; l++)
5293 html_output(hd->html_data, hn[l]);
5295 if(col){
5296 if(ps_global->VAR_NORM_FORE_COLOR
5297 && ps_global->VAR_NORM_BACK_COLOR
5298 && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5299 ps_global->VAR_NORM_BACK_COLOR))){
5300 if(!pico_is_good_colorpair(colnorm))
5301 free_color_pair(&colnorm);
5303 if(colnorm){
5304 q = color_embed(colnorm->fg, colnorm->bg);
5305 free_color_pair(&colnorm);
5307 for(l = 0; q[l]; l++)
5308 html_output(hd->html_data, q[l]);
5312 free_color_pair(&col);
5315 html_output(hd->html_data, ']');
5320 * if things look OK so far, make sure nothing within
5321 * the url looks too fishy...
5323 while(!risky && hn
5324 && (hn = rfc1738_scan(hn, &l)) != NULL
5325 && (hn = srchstr(hn,"://")) != NULL){
5326 int digits = 1;
5328 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5330 * auth spec, encoded characters, or possibly non-standard port
5331 * should raise a red flag
5333 if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5334 risky++;
5335 break;
5337 else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5338 digits = 0;
5341 /* dotted-dec/raw-int address should cause suspicion as well */
5342 if(digits)
5343 risky++;
5346 if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5347 (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5349 if(hd->dp)
5350 so_give((STORE_S **) &hd->dp);
5353 fs_give((void **) &url);
5359 * relative_url - put full url path in h based on base and relative url
5361 void
5362 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5364 size_t len;
5365 char tmp[MAILTMPLEN], *p, *q;
5366 char *scheme = NULL, *net = NULL, *path = NULL,
5367 *parms = NULL, *query = NULL, *frag = NULL,
5368 *base_scheme = NULL, *base_net_loc = NULL,
5369 *base_path = NULL, *base_parms = NULL,
5370 *base_query = NULL, *base_frag = NULL,
5371 *rel_scheme = NULL, *rel_net_loc = NULL,
5372 *rel_path = NULL, *rel_parms = NULL,
5373 *rel_query = NULL, *rel_frag = NULL;
5375 /* Rough parse of base URL */
5376 rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5377 &base_parms, &base_query, &base_frag);
5379 /* Rough parse of this URL */
5380 rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5381 &rel_parms, &rel_query, &rel_frag);
5383 scheme = rel_scheme; /* defaults */
5384 net = rel_net_loc;
5385 path = rel_path;
5386 parms = rel_parms;
5387 query = rel_query;
5388 frag = rel_frag;
5389 if(!scheme && base_scheme){
5390 scheme = base_scheme;
5391 if(!net){
5392 net = base_net_loc;
5393 if(path){
5394 if(*path != '/'){
5395 if(base_path){
5396 for(p = q = base_path; /* Drop base path's tail */
5397 (p = strchr(p, '/'));
5398 q = ++p)
5401 len = q - base_path;
5403 else
5404 len = 0;
5406 if(len + strlen(rel_path) < sizeof(tmp)-1){
5407 if(len)
5408 snprintf(path = tmp, sizeof(tmp), "%.*s", (int) len, base_path);
5410 strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5411 tmp[sizeof(tmp)-1] = '\0';
5413 /* Follow RFC 1808 "Step 6" */
5414 for(p = tmp; (p = strchr(p, '.')); )
5415 switch(*(p+1)){
5417 * a) All occurrences of "./", where "." is a
5418 * complete path segment, are removed.
5420 case '/' :
5421 if(p > tmp)
5422 for(q = p; (*q = *(q+2)) != '\0'; q++)
5424 else
5425 p++;
5427 break;
5430 * b) If the path ends with "." as a
5431 * complete path segment, that "." is
5432 * removed.
5434 case '\0' :
5435 if(p == tmp || *(p-1) == '/')
5436 *p = '\0';
5437 else
5438 p++;
5440 break;
5443 * c) All occurrences of "<segment>/../",
5444 * where <segment> is a complete path
5445 * segment not equal to "..", are removed.
5446 * Removal of these path segments is
5447 * performed iteratively, removing the
5448 * leftmost matching pattern on each
5449 * iteration, until no matching pattern
5450 * remains.
5452 * d) If the path ends with "<segment>/..",
5453 * where <segment> is a complete path
5454 * segment not equal to "..", that
5455 * "<segment>/.." is removed.
5457 case '.' :
5458 if(p > tmp + 1){
5459 for(q = p - 2; q > tmp && *q != '/'; q--)
5462 if(*q == '/')
5463 q++;
5465 if(q + 1 == p /* no "//.." */
5466 || (*q == '.' /* and "../.." */
5467 && *(q+1) == '.'
5468 && *(q+2) == '/')){
5469 p += 2;
5470 break;
5473 switch(*(p+2)){
5474 case '/' :
5475 len = (p - q) + 3;
5476 p = q;
5477 for(; (*q = *(q+len)) != '\0'; q++)
5480 break;
5482 case '\0':
5483 *(p = q) = '\0';
5484 break;
5486 default:
5487 p += 2;
5488 break;
5491 else
5492 p += 2;
5494 break;
5496 default :
5497 p++;
5498 break;
5501 else
5502 path = ""; /* lame. */
5505 else{
5506 path = base_path;
5507 if(!parms){
5508 parms = base_parms;
5509 if(!query)
5510 query = base_query;
5516 len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5517 + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5518 + (query ? strlen(query) : 0) + (frag ? strlen(frag ) : 0) + 8;
5520 h->h.url.path = (char *) fs_get(len * sizeof(char));
5521 snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5522 scheme ? scheme : "", scheme ? ":" : "",
5523 net ? "//" : "", net ? net : "",
5524 (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5525 path ? path : "",
5526 parms ? ";" : "", parms ? parms : "",
5527 query ? "?" : "", query ? query : "",
5528 frag ? "#" : "", frag ? frag : "");
5530 if(base_scheme)
5531 fs_give((void **) &base_scheme);
5533 if(base_net_loc)
5534 fs_give((void **) &base_net_loc);
5536 if(base_path)
5537 fs_give((void **) &base_path);
5539 if(base_parms)
5540 fs_give((void **) &base_parms);
5542 if(base_query)
5543 fs_give((void **) &base_query);
5545 if(base_frag)
5546 fs_give((void **) &base_frag);
5548 if(rel_scheme)
5549 fs_give((void **) &rel_scheme);
5551 if(rel_net_loc)
5552 fs_give((void **) &rel_net_loc);
5554 if(rel_parms)
5555 fs_give((void **) &rel_parms);
5557 if(rel_query)
5558 fs_give((void **) &rel_query);
5560 if(rel_frag)
5561 fs_give((void **) &rel_frag);
5563 if(rel_path)
5564 fs_give((void **) &rel_path);
5569 * html_href_relative - href
5572 html_href_relative(char *url)
5574 int i;
5576 if(url)
5577 for(i = 0; i < 32 && url[i]; i++)
5578 if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5579 if(url[i] == ':')
5580 return(FALSE);
5581 else
5582 break;
5585 return(TRUE);
5590 * HTML <UL> (Unordered List) element handler
5593 html_ul(HANDLER_S *hd, int ch, int cmd)
5595 if(cmd == GF_DATA){
5596 html_handoff(hd, ch);
5598 else if(cmd == GF_RESET){
5599 if(PASS_HTML(hd->html_data)){
5600 html_output_raw_tag(hd->html_data, "ul");
5602 else{
5603 HD(hd->html_data)->li_pending = 1;
5604 html_blank(hd->html_data, 0);
5607 else if(cmd == GF_EOD){
5608 if(PASS_HTML(hd->html_data)){
5609 html_output_string(hd->html_data, "</ul>");
5611 else{
5612 html_blank(hd->html_data, 0);
5614 if(!HD(hd->html_data)->li_pending)
5615 html_indent(hd->html_data, -4, HTML_ID_INC);
5616 else
5617 HD(hd->html_data)->li_pending = 0;
5621 return(1); /* get linked */
5626 * HTML <OL> (Ordered List) element handler
5629 html_ol(HANDLER_S *hd, int ch, int cmd)
5631 if(cmd == GF_DATA){
5632 html_handoff(hd, ch);
5634 else if(cmd == GF_RESET){
5635 if(PASS_HTML(hd->html_data)){
5636 html_output_raw_tag(hd->html_data, "ol");
5638 else{
5639 PARAMETER *p;
5641 * Signal that we're expecting to see <LI> as our next elemnt
5642 * and set the the initial ordered count.
5644 hd->x = 1L; /* set default */
5645 hd->y = LIST_DECIMAL; /* set default */
5646 for(p = HD(hd->html_data)->el_data->attribs;
5647 p && p->attribute;
5648 p = p->next)
5649 if(p->value){
5650 if(!strucmp(p->attribute, "TYPE")){
5651 if(!strucmp(p->value, "a")) /* alpha, lowercase */
5652 hd->y = LIST_ALPHALO;
5653 else if(!strucmp(p->value, "A")) /* alpha, uppercase */
5654 hd->y = LIST_ALPHAUP;
5655 else if(!strucmp(p->value, "i")) /* roman, lowercase */
5656 hd->y = LIST_ROMANLO;
5657 else if(!strucmp(p->value, "I")) /* roman, uppercase */
5658 hd->y = LIST_ROMANUP;
5659 else if(strucmp(p->value, "1")) /* decimal, the default */
5660 hd->y = LIST_UNKNOWN;
5662 else if(!strucmp(p->attribute, "START"))
5663 hd->x = atol(p->value);
5664 // else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5665 // this is not so simple. The main missing support
5666 // is for the STYLE attribute, but implementing that
5667 // correctly will take time, so will be implemented
5668 // after version 2.21 is released.
5670 HD(hd->html_data)->li_pending = 1;
5671 html_blank(hd->html_data, 0);
5674 else if(cmd == GF_EOD){
5675 if(PASS_HTML(hd->html_data)){
5676 html_output_string(hd->html_data, "</ol>");
5678 else{
5679 html_blank(hd->html_data, 0);
5681 if(!HD(hd->html_data)->li_pending)
5682 html_indent(hd->html_data, -4, HTML_ID_INC);
5683 else
5684 HD(hd->html_data)->li_pending = 0;
5688 return(1); /* get linked */
5693 * HTML <MENU> (Menu List) element handler
5696 html_menu(HANDLER_S *hd, int ch, int cmd)
5698 if(cmd == GF_DATA){
5699 html_handoff(hd, ch);
5701 else if(cmd == GF_RESET){
5702 if(PASS_HTML(hd->html_data)){
5703 html_output_raw_tag(hd->html_data, "menu");
5705 else{
5706 HD(hd->html_data)->li_pending = 1;
5709 else if(cmd == GF_EOD){
5710 if(PASS_HTML(hd->html_data)){
5711 html_output_string(hd->html_data, "</menu>");
5713 else{
5714 html_blank(hd->html_data, 0);
5716 if(!HD(hd->html_data)->li_pending)
5717 html_indent(hd->html_data, -4, HTML_ID_INC);
5718 else
5719 HD(hd->html_data)->li_pending = 0;
5723 return(1); /* get linked */
5728 * HTML <DIR> (Directory List) element handler
5731 html_dir(HANDLER_S *hd, int ch, int cmd)
5733 if(cmd == GF_DATA){
5734 html_handoff(hd, ch);
5736 else if(cmd == GF_RESET){
5737 if(PASS_HTML(hd->html_data)){
5738 html_output_raw_tag(hd->html_data, "dir");
5740 else{
5741 HD(hd->html_data)->li_pending = 1;
5744 else if(cmd == GF_EOD){
5745 if(PASS_HTML(hd->html_data)){
5746 html_output_string(hd->html_data, "</dir>");
5748 else{
5749 html_blank(hd->html_data, 0);
5751 if(!HD(hd->html_data)->li_pending)
5752 html_indent(hd->html_data, -4, HTML_ID_INC);
5753 else
5754 HD(hd->html_data)->li_pending = 0;
5758 return(1); /* get linked */
5763 * HTML <LI> (List Item) element handler
5766 html_li(HANDLER_S *hd, int ch, int cmd)
5768 if(cmd == GF_DATA){
5769 if(PASS_HTML(hd->html_data)){
5770 html_handoff(hd, ch);
5773 else if(cmd == GF_RESET){
5774 HANDLER_S *p, *found = NULL;
5777 * There better be a an unordered list, ordered list,
5778 * Menu or Directory handler installed
5779 * or else we crap out...
5781 for(p = HANDLERS(hd->html_data); p; p = p->below)
5782 if(EL(p)->handler == html_ul
5783 || EL(p)->handler == html_ol
5784 || EL(p)->handler == html_menu
5785 || EL(p)->handler == html_dir){
5786 found = p;
5787 break;
5790 if(found){
5791 if(PASS_HTML(hd->html_data)){
5793 else{
5794 char buf[16], tmp[16], *p;
5795 int wrapstate;
5797 /* Start a new line */
5798 html_blank(hd->html_data, 0);
5800 /* adjust indent level if needed */
5801 if(HD(hd->html_data)->li_pending){
5802 html_indent(hd->html_data, 4, HTML_ID_INC);
5803 HD(hd->html_data)->li_pending = 0;
5806 if(EL(found)->handler == html_ul){
5807 int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5809 strncpy(buf, " ", sizeof(buf));
5810 buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5812 else if(EL(found)->handler == html_ol){
5813 if(found->y == LIST_DECIMAL || found->y == LIST_UNKNOWN)
5814 snprintf(tmp, sizeof(tmp), "%ld", found->x++);
5815 else if(found->y == LIST_ALPHALO)
5816 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'a');
5817 else if(found->y == LIST_ALPHAUP)
5818 convert_decimal_to_alpha(tmp, sizeof(tmp), found->x++, 'A');
5819 else if(found->y == LIST_ROMANLO)
5820 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'i');
5821 else if(found->y == LIST_ROMANUP)
5822 convert_decimal_to_roman(tmp, sizeof(tmp), found->x++, 'I');
5823 snprintf(buf, sizeof(buf), " %s.", tmp);
5824 buf[sizeof(buf)-1] = '\0';
5826 else if(EL(found)->handler == html_menu){
5827 strncpy(buf, " ->", sizeof(buf));
5828 buf[sizeof(buf)-1] = '\0';
5831 html_indent(hd->html_data, -4, HTML_ID_INC);
5833 /* So we don't munge whitespace */
5834 wrapstate = HD(hd->html_data)->wrapstate;
5835 HD(hd->html_data)->wrapstate = 0;
5837 html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5838 for(p = buf; *p; p++)
5839 html_output(hd->html_data, (int) *p);
5840 HD(hd->html_data)->wrapstate = wrapstate;
5841 html_indent(hd->html_data, 4, HTML_ID_INC);
5843 /* else BUG: should really bitch about this */
5846 if(PASS_HTML(hd->html_data)){
5847 html_output_raw_tag(hd->html_data, "li");
5848 return(1); /* get linked */
5851 else if(cmd == GF_EOD){
5852 if(PASS_HTML(hd->html_data)){
5853 html_output_string(hd->html_data, "</li>");
5857 return(PASS_HTML(hd->html_data)); /* DON'T get linked */
5862 * HTML <DL> (Definition List) element handler
5865 html_dl(HANDLER_S *hd, int ch, int cmd)
5867 if(cmd == GF_DATA){
5868 html_handoff(hd, ch);
5870 else if(cmd == GF_RESET){
5871 if(PASS_HTML(hd->html_data)){
5872 html_output_raw_tag(hd->html_data, "dl");
5874 else{
5876 * Set indention level for definition terms and definitions...
5878 hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5879 hd->y = hd->x + 2;
5880 hd->z = hd->y + 4;
5883 else if(cmd == GF_EOD){
5884 if(PASS_HTML(hd->html_data)){
5885 html_output_string(hd->html_data, "</dl>");
5887 else{
5888 html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5889 html_blank(hd->html_data, 1);
5893 return(1); /* get linked */
5898 * HTML <DT> (Definition Term) element handler
5901 html_dt(HANDLER_S *hd, int ch, int cmd)
5903 if(PASS_HTML(hd->html_data)){
5904 if(cmd == GF_DATA){
5905 html_handoff(hd, ch);
5907 else if(cmd == GF_RESET){
5908 html_output_raw_tag(hd->html_data, "dt");
5910 else if(cmd == GF_EOD){
5911 html_output_string(hd->html_data, "</dt>");
5914 return(1); /* get linked */
5917 if(cmd == GF_RESET){
5918 HANDLER_S *p;
5921 * There better be a Definition Handler installed
5922 * or else we crap out...
5924 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5927 if(p){ /* adjust indent level if needed */
5928 html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5929 html_blank(hd->html_data, 1);
5931 /* BUG: else should really bitch about this */
5934 return(0); /* DON'T get linked */
5939 * HTML <DD> (Definition Definition) element handler
5942 html_dd(HANDLER_S *hd, int ch, int cmd)
5944 if(PASS_HTML(hd->html_data)){
5945 if(cmd == GF_DATA){
5946 html_handoff(hd, ch);
5948 else if(cmd == GF_RESET){
5949 html_output_raw_tag(hd->html_data, "dd");
5951 else if(cmd == GF_EOD){
5952 html_output_string(hd->html_data, "</dd>");
5955 return(1); /* get linked */
5958 if(cmd == GF_RESET){
5959 HANDLER_S *p;
5962 * There better be a Definition Handler installed
5963 * or else we crap out...
5965 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5968 if(p){ /* adjust indent level if needed */
5969 html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5970 html_blank(hd->html_data, 0);
5972 /* BUG: should really bitch about this */
5975 return(0); /* DON'T get linked */
5980 * HTML <H1> (Headings 1) element handler.
5982 * Bold, very-large font, CENTERED. One or two blank lines
5983 * above and below. For our silly character cell's that
5984 * means centered and ALL CAPS...
5987 html_h1(HANDLER_S *hd, int ch, int cmd)
5989 if(cmd == GF_DATA){
5990 html_handoff(hd, ch);
5992 else if(cmd == GF_RESET){
5993 if(PASS_HTML(hd->html_data)){
5994 html_output_raw_tag(hd->html_data, "h1");
5996 else{
5997 /* turn ON the centered bit */
5998 CENTER_BIT(hd->html_data) = 1;
6001 else if(cmd == GF_EOD){
6002 if(PASS_HTML(hd->html_data)){
6003 html_output_string(hd->html_data, "</h1>");
6005 else{
6006 /* turn OFF the centered bit, add blank line */
6007 CENTER_BIT(hd->html_data) = 0;
6008 html_blank(hd->html_data, 1);
6012 return(1); /* get linked */
6017 * HTML <H2> (Headings 2) element handler
6020 html_h2(HANDLER_S *hd, int ch, int cmd)
6022 if(cmd == GF_DATA){
6023 if(PASS_HTML(hd->html_data)){
6024 html_handoff(hd, ch);
6026 else{
6027 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6028 HTML_ULINE(hd->html_data, 1);
6029 hd->x ^= HTML_HX_ULINE; /* only once! */
6032 html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
6033 ? toupper((unsigned char) ch) : ch);
6036 else if(cmd == GF_RESET){
6037 if(PASS_HTML(hd->html_data)){
6038 html_output_raw_tag(hd->html_data, "h2");
6040 else{
6042 * Bold, large font, flush-left. One or two blank lines
6043 * above and below.
6045 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6046 hd->x = HTML_HX_CENTER;
6047 else
6048 hd->x = 0;
6050 hd->x |= HTML_HX_ULINE;
6052 CENTER_BIT(hd->html_data) = 0;
6053 hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6054 hd->z = HD(hd->html_data)->wrapcol;
6055 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6056 html_blank(hd->html_data, 1);
6059 else if(cmd == GF_EOD){
6060 if(PASS_HTML(hd->html_data)){
6061 html_output_string(hd->html_data, "</h2>");
6063 else{
6065 * restore previous centering, and indent level
6067 if(!(hd->x & HTML_HX_ULINE))
6068 HTML_ULINE(hd->html_data, 0);
6070 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6071 html_blank(hd->html_data, 1);
6072 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6073 HD(hd->html_data)->wrapcol = hd->z;
6077 return(1); /* get linked */
6082 * HTML <H3> (Headings 3) element handler
6085 html_h3(HANDLER_S *hd, int ch, int cmd)
6087 if(cmd == GF_DATA){
6088 if(!PASS_HTML(hd->html_data)){
6089 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6090 HTML_ULINE(hd->html_data, 1);
6091 hd->x ^= HTML_HX_ULINE; /* only once! */
6095 html_handoff(hd, ch);
6097 else if(cmd == GF_RESET){
6098 if(PASS_HTML(hd->html_data)){
6099 html_output_raw_tag(hd->html_data, "h3");
6101 else{
6103 * Italic, large font, slightly indented from the left
6104 * margin. One or two blank lines above and below.
6106 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6107 hd->x = HTML_HX_CENTER;
6108 else
6109 hd->x = 0;
6111 hd->x |= HTML_HX_ULINE;
6112 CENTER_BIT(hd->html_data) = 0;
6113 hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6114 hd->z = HD(hd->html_data)->wrapcol;
6115 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6116 html_blank(hd->html_data, 1);
6119 else if(cmd == GF_EOD){
6120 if(PASS_HTML(hd->html_data)){
6121 html_output_string(hd->html_data, "</h3>");
6123 else{
6125 * restore previous centering, and indent level
6127 if(!(hd->x & HTML_HX_ULINE))
6128 HTML_ULINE(hd->html_data, 0);
6130 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6131 html_blank(hd->html_data, 1);
6132 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6133 HD(hd->html_data)->wrapcol = hd->z;
6137 return(1); /* get linked */
6142 * HTML <H4> (Headings 4) element handler
6145 html_h4(HANDLER_S *hd, int ch, int cmd)
6147 if(cmd == GF_DATA){
6148 html_handoff(hd, ch);
6150 else if(cmd == GF_RESET){
6151 if(PASS_HTML(hd->html_data)){
6152 html_output_raw_tag(hd->html_data, "h4");
6154 else{
6156 * Bold, normal font, indented more than H3. One blank line
6157 * above and below.
6159 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6160 CENTER_BIT(hd->html_data) = 0;
6161 hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6162 hd->z = HD(hd->html_data)->wrapcol;
6163 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6164 html_blank(hd->html_data, 1);
6167 else if(cmd == GF_EOD){
6168 if(PASS_HTML(hd->html_data)){
6169 html_output_string(hd->html_data, "</h4>");
6171 else{
6173 * restore previous centering, and indent level
6175 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6176 html_blank(hd->html_data, 1);
6177 CENTER_BIT(hd->html_data) = hd->x;
6178 HD(hd->html_data)->wrapcol = hd->z;
6182 return(1); /* get linked */
6187 * HTML <H5> (Headings 5) element handler
6190 html_h5(HANDLER_S *hd, int ch, int cmd)
6192 if(cmd == GF_DATA){
6193 html_handoff(hd, ch);
6195 else if(cmd == GF_RESET){
6196 if(PASS_HTML(hd->html_data)){
6197 html_output_raw_tag(hd->html_data, "h5");
6199 else{
6201 * Italic, normal font, indented as H4. One blank line
6202 * above.
6204 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6205 CENTER_BIT(hd->html_data) = 0;
6206 hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6207 hd->z = HD(hd->html_data)->wrapcol;
6208 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6209 html_blank(hd->html_data, 1);
6212 else if(cmd == GF_EOD){
6213 if(PASS_HTML(hd->html_data)){
6214 html_output_string(hd->html_data, "</h5>");
6216 else{
6218 * restore previous centering, and indent level
6220 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6221 html_blank(hd->html_data, 1);
6222 CENTER_BIT(hd->html_data) = hd->x;
6223 HD(hd->html_data)->wrapcol = hd->z;
6227 return(1); /* get linked */
6232 * HTML <H6> (Headings 6) element handler
6235 html_h6(HANDLER_S *hd, int ch, int cmd)
6237 if(cmd == GF_DATA){
6238 html_handoff(hd, ch);
6240 else if(cmd == GF_RESET){
6241 if(PASS_HTML(hd->html_data)){
6242 html_output_raw_tag(hd->html_data, "h6");
6244 else{
6246 * Bold, indented same as normal text, more than H5. One
6247 * blank line above.
6249 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6250 CENTER_BIT(hd->html_data) = 0;
6251 hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6252 hd->z = HD(hd->html_data)->wrapcol;
6253 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6254 html_blank(hd->html_data, 1);
6257 else if(cmd == GF_EOD){
6258 if(PASS_HTML(hd->html_data)){
6259 html_output_string(hd->html_data, "</h6>");
6261 else{
6263 * restore previous centering, and indent level
6265 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6266 html_blank(hd->html_data, 1);
6267 CENTER_BIT(hd->html_data) = hd->x;
6268 HD(hd->html_data)->wrapcol = hd->z;
6272 return(1); /* get linked */
6277 * HTML <BlockQuote> element handler
6280 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6282 int j;
6283 #define HTML_BQ_INDENT 6
6285 if(cmd == GF_DATA){
6286 html_handoff(hd, ch);
6288 else if(cmd == GF_RESET){
6289 if(PASS_HTML(hd->html_data)){
6290 html_output_raw_tag(hd->html_data, "blockquote");
6292 else{
6294 * A typical rendering might be a slight extra left and
6295 * right indent, and/or italic font. The Blockquote element
6296 * causes a paragraph break, and typically provides space
6297 * above and below the quote.
6299 html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6300 j = HD(hd->html_data)->wrapstate;
6301 HD(hd->html_data)->wrapstate = 0;
6302 html_blank(hd->html_data, 1);
6303 HD(hd->html_data)->wrapstate = j;
6304 HD(hd->html_data)->wrapcol -= HTML_BQ_INDENT;
6307 else if(cmd == GF_EOD){
6308 if(PASS_HTML(hd->html_data)){
6309 html_output_string(hd->html_data, "</blockquote>");
6311 else{
6312 html_blank(hd->html_data, 1);
6314 j = HD(hd->html_data)->wrapstate;
6315 HD(hd->html_data)->wrapstate = 0;
6316 html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6317 HD(hd->html_data)->wrapstate = j;
6318 HD(hd->html_data)->wrapcol += HTML_BQ_INDENT;
6322 return(1); /* get linked */
6327 * HTML <Address> element handler
6330 html_address(HANDLER_S *hd, int ch, int cmd)
6332 int j;
6333 #define HTML_ADD_INDENT 2
6335 if(cmd == GF_DATA){
6336 html_handoff(hd, ch);
6338 else if(cmd == GF_RESET){
6339 if(PASS_HTML(hd->html_data)){
6340 html_output_raw_tag(hd->html_data, "address");
6342 else{
6344 * A typical rendering might be a slight extra left and
6345 * right indent, and/or italic font. The Blockquote element
6346 * causes a paragraph break, and typically provides space
6347 * above and below the quote.
6349 html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6350 j = HD(hd->html_data)->wrapstate;
6351 HD(hd->html_data)->wrapstate = 0;
6352 html_blank(hd->html_data, 1);
6353 HD(hd->html_data)->wrapstate = j;
6356 else if(cmd == GF_EOD){
6357 if(PASS_HTML(hd->html_data)){
6358 html_output_string(hd->html_data, "</address>");
6360 else{
6361 html_blank(hd->html_data, 1);
6363 j = HD(hd->html_data)->wrapstate;
6364 HD(hd->html_data)->wrapstate = 0;
6365 html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6366 HD(hd->html_data)->wrapstate = j;
6370 return(1); /* get linked */
6375 * HTML <PRE> (Preformatted Text) element handler
6378 html_pre(HANDLER_S *hd, int ch, int cmd)
6380 if(cmd == GF_DATA){
6382 * remove CRLF after '>' in element.
6383 * We see CRLF because wrapstate is off.
6385 switch(hd->y){
6386 case 2 :
6387 if(ch == '\012'){
6388 hd->y = 3;
6389 return(1);
6391 else
6392 html_handoff(hd, '\015');
6394 break;
6396 case 1 :
6397 if(ch == '\015'){
6398 hd->y = 2;
6399 return(1);
6402 case 3 :
6403 /* passing tags? replace CRLF with <BR> to make
6404 * sure hard newline survives in the end...
6406 if(PASS_HTML(hd->html_data))
6407 hd->y = 4; /* keep looking for CRLF */
6408 else
6409 hd->y = 0; /* stop looking */
6411 break;
6413 case 4 :
6414 if(ch == '\015'){
6415 hd->y = 5;
6416 return(1);
6419 break;
6421 case 5 :
6422 hd->y = 4;
6423 if(ch == '\012'){
6424 html_output_string(hd->html_data, "<br />");
6425 return(1);
6427 else
6428 html_handoff(hd, '\015'); /* not CRLF, pass raw CR */
6430 break;
6432 default : /* zero case */
6433 break;
6436 html_handoff(hd, ch);
6438 else if(cmd == GF_RESET){
6439 hd->y = 1;
6440 if(PASS_HTML(hd->html_data)){
6441 html_output_raw_tag(hd->html_data, "pre");
6443 else{
6444 if(hd->html_data)
6445 hd->html_data->f1 = DFL; \
6447 html_blank(hd->html_data, 1);
6448 hd->x = HD(hd->html_data)->wrapstate;
6449 HD(hd->html_data)->wrapstate = 0;
6452 else if(cmd == GF_EOD){
6453 if(PASS_HTML(hd->html_data)){
6454 html_output_string(hd->html_data, "</pre>");
6456 else{
6457 HD(hd->html_data)->wrapstate = (hd->x != 0);
6458 html_blank(hd->html_data, 0);
6462 return(1);
6467 * HTML <CENTER> (Centerd Text) element handler
6470 html_center(HANDLER_S *hd, int ch, int cmd)
6472 if(cmd == GF_DATA){
6473 html_handoff(hd, ch);
6475 else if(cmd == GF_RESET){
6476 if(PASS_HTML(hd->html_data)){
6477 html_output_raw_tag(hd->html_data, "center");
6479 else{
6480 /* turn ON the centered bit */
6481 CENTER_BIT(hd->html_data) = 1;
6484 else if(cmd == GF_EOD){
6485 if(PASS_HTML(hd->html_data)){
6486 html_output_string(hd->html_data, "</center>");
6488 else{
6489 /* turn OFF the centered bit */
6490 CENTER_BIT(hd->html_data) = 0;
6494 return(1);
6499 * HTML <DIV> (Document Divisions) element handler
6502 html_div(HANDLER_S *hd, int ch, int cmd)
6504 if(cmd == GF_DATA){
6505 html_handoff(hd, ch);
6507 else if(cmd == GF_RESET){
6508 if(PASS_HTML(hd->html_data)){
6509 html_output_raw_tag(hd->html_data, "div");
6511 else{
6512 PARAMETER *p;
6514 for(p = HD(hd->html_data)->el_data->attribs;
6515 p && p->attribute;
6516 p = p->next)
6517 if(!strucmp(p->attribute, "ALIGN")){
6518 if(p->value){
6519 /* remember previous values */
6520 hd->x = CENTER_BIT(hd->html_data);
6521 hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6523 html_blank(hd->html_data, 0);
6524 CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6525 html_indent(hd->html_data, 0, HTML_ID_SET);
6526 /* NOTE: "RIGHT" not supported yet */
6531 else if(cmd == GF_EOD){
6532 if(PASS_HTML(hd->html_data)){
6533 html_output_string(hd->html_data, "</div>");
6535 else{
6536 /* restore centered bit and indentiousness */
6537 CENTER_BIT(hd->html_data) = hd->y;
6538 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6539 html_blank(hd->html_data, 0);
6543 return(1);
6548 * HTML <SPAN> (Text Span) element handler
6551 html_span(HANDLER_S *hd, int ch, int cmd)
6553 if(PASS_HTML(hd->html_data)){
6554 if(cmd == GF_DATA){
6555 html_handoff(hd, ch);
6557 else if(cmd == GF_RESET){
6558 html_output_raw_tag(hd->html_data, "span");
6560 else if(cmd == GF_EOD){
6561 html_output_string(hd->html_data, "</span>");
6564 return(1);
6567 return(0);
6572 * HTML <KBD> (Text Kbd) element handler
6575 html_kbd(HANDLER_S *hd, int ch, int cmd)
6577 if(PASS_HTML(hd->html_data)){
6578 if(cmd == GF_DATA){
6579 html_handoff(hd, ch);
6581 else if(cmd == GF_RESET){
6582 html_output_raw_tag(hd->html_data, "kbd");
6584 else if(cmd == GF_EOD){
6585 html_output_string(hd->html_data, "</kbd>");
6588 return(1);
6591 return(0);
6596 * HTML <DFN> (Text Definition) element handler
6599 html_dfn(HANDLER_S *hd, int ch, int cmd)
6601 if(PASS_HTML(hd->html_data)){
6602 if(cmd == GF_DATA){
6603 html_handoff(hd, ch);
6605 else if(cmd == GF_RESET){
6606 html_output_raw_tag(hd->html_data, "dfn");
6608 else if(cmd == GF_EOD){
6609 html_output_string(hd->html_data, "</dfn>");
6612 return(1);
6615 return(0);
6620 * HTML <TT> (Text Tt) element handler
6623 html_tt(HANDLER_S *hd, int ch, int cmd)
6625 if(PASS_HTML(hd->html_data)){
6626 if(cmd == GF_DATA){
6627 html_handoff(hd, ch);
6629 else if(cmd == GF_RESET){
6630 html_output_raw_tag(hd->html_data, "tt");
6632 else if(cmd == GF_EOD){
6633 html_output_string(hd->html_data, "</tt>");
6636 return(1);
6639 return(0);
6644 * HTML <VAR> (Text Var) element handler
6647 html_var(HANDLER_S *hd, int ch, int cmd)
6649 if(PASS_HTML(hd->html_data)){
6650 if(cmd == GF_DATA){
6651 html_handoff(hd, ch);
6653 else if(cmd == GF_RESET){
6654 html_output_raw_tag(hd->html_data, "var");
6656 else if(cmd == GF_EOD){
6657 html_output_string(hd->html_data, "</var>");
6660 return(1);
6663 return(0);
6668 * HTML <SAMP> (Text Samp) element handler
6671 html_samp(HANDLER_S *hd, int ch, int cmd)
6673 if(PASS_HTML(hd->html_data)){
6674 if(cmd == GF_DATA){
6675 html_handoff(hd, ch);
6677 else if(cmd == GF_RESET){
6678 html_output_raw_tag(hd->html_data, "samp");
6680 else if(cmd == GF_EOD){
6681 html_output_string(hd->html_data, "</samp>");
6684 return(1);
6687 return(0);
6692 * HTML <SUP> (Text Superscript) element handler
6695 html_sup(HANDLER_S *hd, int ch, int cmd)
6697 if(PASS_HTML(hd->html_data)){
6698 if(cmd == GF_DATA){
6699 html_handoff(hd, ch);
6701 else if(cmd == GF_RESET){
6702 html_output_raw_tag(hd->html_data, "sup");
6704 else if(cmd == GF_EOD){
6705 html_output_string(hd->html_data, "</sup>");
6708 return(1);
6711 return(0);
6716 * HTML <SUB> (Text Subscript) element handler
6719 html_sub(HANDLER_S *hd, int ch, int cmd)
6721 if(PASS_HTML(hd->html_data)){
6722 if(cmd == GF_DATA){
6723 html_handoff(hd, ch);
6725 else if(cmd == GF_RESET){
6726 html_output_raw_tag(hd->html_data, "sub");
6728 else if(cmd == GF_EOD){
6729 html_output_string(hd->html_data, "</sub>");
6732 return(1);
6735 return(0);
6740 * HTML <CITE> (Text Citation) element handler
6743 html_cite(HANDLER_S *hd, int ch, int cmd)
6745 if(PASS_HTML(hd->html_data)){
6746 if(cmd == GF_DATA){
6747 html_handoff(hd, ch);
6749 else if(cmd == GF_RESET){
6750 html_output_raw_tag(hd->html_data, "cite");
6752 else if(cmd == GF_EOD){
6753 html_output_string(hd->html_data, "</cite>");
6756 return(1);
6759 return(0);
6764 * HTML <CODE> (Text Code) element handler
6767 html_code(HANDLER_S *hd, int ch, int cmd)
6769 if(PASS_HTML(hd->html_data)){
6770 if(cmd == GF_DATA){
6771 html_handoff(hd, ch);
6773 else if(cmd == GF_RESET){
6774 html_output_raw_tag(hd->html_data, "code");
6776 else if(cmd == GF_EOD){
6777 html_output_string(hd->html_data, "</code>");
6780 return(1);
6783 return(0);
6788 * HTML <INS> (Text Inserted) element handler
6791 html_ins(HANDLER_S *hd, int ch, int cmd)
6793 if(PASS_HTML(hd->html_data)){
6794 if(cmd == GF_DATA){
6795 html_handoff(hd, ch);
6797 else if(cmd == GF_RESET){
6798 html_output_raw_tag(hd->html_data, "ins");
6800 else if(cmd == GF_EOD){
6801 html_output_string(hd->html_data, "</ins>");
6804 return(1);
6807 return(0);
6812 * HTML <DEL> (Text Deleted) element handler
6815 html_del(HANDLER_S *hd, int ch, int cmd)
6817 if(PASS_HTML(hd->html_data)){
6818 if(cmd == GF_DATA){
6819 html_handoff(hd, ch);
6821 else if(cmd == GF_RESET){
6822 html_output_raw_tag(hd->html_data, "del");
6824 else if(cmd == GF_EOD){
6825 html_output_string(hd->html_data, "</del>");
6828 return(1);
6831 return(0);
6836 * HTML <ABBR> (Text Abbreviation) element handler
6839 html_abbr(HANDLER_S *hd, int ch, int cmd)
6841 if(PASS_HTML(hd->html_data)){
6842 if(cmd == GF_DATA){
6843 html_handoff(hd, ch);
6845 else if(cmd == GF_RESET){
6846 html_output_raw_tag(hd->html_data, "abbr");
6848 else if(cmd == GF_EOD){
6849 html_output_string(hd->html_data, "</abbr>");
6852 return(1);
6855 return(0);
6860 * HTML <SCRIPT> element handler
6863 html_script(HANDLER_S *hd, int ch, int cmd)
6865 /* Link in and drop everything within on the floor */
6866 return(1);
6871 * HTML <APPLET> element handler
6874 html_applet(HANDLER_S *hd, int ch, int cmd)
6876 /* Link in and drop everything within on the floor */
6877 return(1);
6882 * HTML <STYLE> CSS element handler
6885 html_style(HANDLER_S *hd, int ch, int cmd)
6887 static STORE_S *css_stuff ;
6889 if(PASS_HTML(hd->html_data)){
6890 if(cmd == GF_DATA){
6891 /* collect style settings */
6892 so_writec(ch, css_stuff);
6894 else if(cmd == GF_RESET){
6895 if(css_stuff)
6896 so_give(&css_stuff);
6898 css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6900 else if(cmd == GF_EOD){
6902 * TODO: strip anything mischievous and pass on
6905 so_give(&css_stuff);
6909 return(1);
6913 * RSS 2.0 <RSS> version
6916 rss_rss(HANDLER_S *hd, int ch, int cmd)
6918 if(cmd == GF_RESET){
6919 PARAMETER *p;
6921 for(p = HD(hd->html_data)->el_data->attribs;
6922 p && p->attribute;
6923 p = p->next)
6924 if(!strucmp(p->attribute, "VERSION")){
6925 if(p->value && !strucmp(p->value,"2.0"))
6926 return(0); /* do not link in */
6929 gf_error("Incompatible RSS version");
6930 /* NO RETURN */
6933 return(0); /* not linked or error means we never get here */
6937 * RSS 2.0 <CHANNEL>
6940 rss_channel(HANDLER_S *hd, int ch, int cmd)
6942 if(cmd == GF_DATA){
6943 html_handoff(hd, ch);
6945 else if(cmd == GF_RESET){
6946 RSS_FEED_S *feed;
6948 feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6949 memset(feed, 0, sizeof(RSS_FEED_S));
6952 return(1); /* link in */
6956 * RSS 2.0 <TITLE>
6959 rss_title(HANDLER_S *hd, int ch, int cmd)
6961 static STORE_S *title_so;
6963 if(cmd == GF_DATA){
6964 /* collect data */
6965 if(title_so){
6966 so_writec(ch, title_so);
6969 else if(cmd == GF_RESET){
6970 if(RSS_FEED(hd->html_data)){
6971 /* prepare for data */
6972 if(title_so)
6973 so_give(&title_so);
6975 title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6978 else if(cmd == GF_EOD){
6979 if(title_so){
6980 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6981 RSS_ITEM_S *rip;
6983 if(feed){
6984 if((rip = feed->items) != NULL){
6985 for(; rip->next; rip = rip->next)
6988 if(rip->title)
6989 fs_give((void **) &rip->title);
6991 rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6993 else{
6994 if(feed->title)
6995 fs_give((void **) &feed->title);
6997 feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
7001 so_give(&title_so);
7005 return(1); /* link in */
7009 * RSS 2.0 <IMAGE>
7012 rss_image(HANDLER_S *hd, int ch, int cmd)
7014 static STORE_S *img_so;
7016 if(cmd == GF_DATA){
7017 /* collect data */
7018 if(img_so){
7019 so_writec(ch, img_so);
7022 else if(cmd == GF_RESET){
7023 if(RSS_FEED(hd->html_data)){
7024 /* prepare to collect data */
7025 if(img_so)
7026 so_give(&img_so);
7028 img_so = so_get(CharStar, NULL, EDIT_ACCESS);
7031 else if(cmd == GF_EOD){
7032 if(img_so){
7033 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7035 if(feed){
7036 if(feed->image)
7037 fs_give((void **) &feed->image);
7039 feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
7042 so_give(&img_so);
7046 return(1); /* link in */
7050 * RSS 2.0 <LINK>
7053 rss_link(HANDLER_S *hd, int ch, int cmd)
7055 static STORE_S *link_so;
7057 if(cmd == GF_DATA){
7058 /* collect data */
7059 if(link_so){
7060 so_writec(ch, link_so);
7063 else if(cmd == GF_RESET){
7064 if(RSS_FEED(hd->html_data)){
7065 /* prepare to collect data */
7066 if(link_so)
7067 so_give(&link_so);
7069 link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7072 else if(cmd == GF_EOD){
7073 if(link_so){
7074 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7075 RSS_ITEM_S *rip;
7077 if(feed){
7078 if((rip = feed->items) != NULL){
7079 for(; rip->next; rip = rip->next)
7082 if(rip->link)
7083 fs_give((void **) &rip->link);
7085 rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7087 else{
7088 if(feed->link)
7089 fs_give((void **) &feed->link);
7091 feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7095 so_give(&link_so);
7099 return(1); /* link in */
7103 * RSS 2.0 <DESCRIPTION>
7106 rss_description(HANDLER_S *hd, int ch, int cmd)
7108 static STORE_S *desc_so;
7110 if(cmd == GF_DATA){
7111 /* collect data */
7112 if(desc_so){
7113 so_writec(ch, desc_so);
7116 else if(cmd == GF_RESET){
7117 if(RSS_FEED(hd->html_data)){
7118 /* prepare to collect data */
7119 if(desc_so)
7120 so_give(&desc_so);
7122 desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7125 else if(cmd == GF_EOD){
7126 if(desc_so){
7127 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7128 RSS_ITEM_S *rip;
7130 if(feed){
7131 if((rip = feed->items) != NULL){
7132 for(; rip->next; rip = rip->next)
7135 if(rip->description)
7136 fs_give((void **) &rip->description);
7138 rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7140 else{
7141 if(feed->description)
7142 fs_give((void **) &feed->description);
7144 feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7148 so_give(&desc_so);
7152 return(1); /* link in */
7156 * RSS 2.0 <TTL> (in minutes)
7159 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7161 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7163 if(cmd == GF_DATA){
7164 if(isdigit((unsigned char) ch))
7165 feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7167 else if(cmd == GF_RESET){
7168 /* prepare to collect data */
7169 feed->ttl = 0;
7171 else if(cmd == GF_EOD){
7174 return(1); /* link in */
7178 * RSS 2.0 <ITEM>
7181 rss_item(HANDLER_S *hd, int ch, int cmd)
7183 /* BUG: verify no ITEM nesting? */
7184 if(cmd == GF_RESET){
7185 RSS_FEED_S *feed;
7187 if((feed = RSS_FEED(hd->html_data)) != NULL){
7188 RSS_ITEM_S **rip;
7189 int n = 0;
7191 for(rip = &feed->items; *rip; rip = &(*rip)->next)
7192 if(++n > RSS_ITEM_LIMIT)
7193 return(0);
7195 *rip = fs_get(sizeof(RSS_ITEM_S));
7196 memset(*rip, 0, sizeof(RSS_ITEM_S));
7200 return(0); /* don't link in */
7204 char *
7205 rss_skip_whitespace(char *s)
7207 for(; *s && isspace((unsigned char) *s); s++)
7210 return(s);
7215 * return the function associated with the given element name
7217 ELPROP_S *
7218 element_properties(FILTER_S *fd, char *el_name)
7220 register ELPROP_S *el_table = ELEMENTS(fd);
7221 size_t len_name = strlen(el_name);
7223 for(; el_table->element; el_table++)
7224 if(!strucmp(el_name, el_table->element)
7225 || (el_table->alternate
7226 && len_name == el_table->len + 1
7227 && el_name[el_table->len] == '/'
7228 && !struncmp(el_name, el_table->element, el_table->len)))
7229 return(el_table);
7231 return(NULL);
7236 * collect element's name and any attribute/value pairs then
7237 * dispatch to the appropriate handler.
7239 * Returns 1 : got what we wanted
7240 * 0 : we need more data
7241 * -1 : bogus input
7244 html_element_collector(FILTER_S *fd, int ch)
7246 if(ch == '>'){
7247 if(ED(fd)->overrun){
7249 * If problem processing, don't bother doing anything
7250 * internally, just return such that none of what we've
7251 * digested is displayed.
7253 HTML_DEBUG_EL("too long", ED(fd));
7254 return(1); /* Let it go, Jim */
7256 else if(ED(fd)->mkup_decl){
7257 if(ED(fd)->badform){
7258 dprint((2, "-- html error: bad form: %.*s\n",
7259 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7261 * Invalid comment -- make some guesses as
7262 * to whether we should stop with this greater-than...
7264 if(ED(fd)->buf[0] != '-'
7265 || ED(fd)->len < 4
7266 || (ED(fd)->buf[1] == '-'
7267 && ED(fd)->buf[ED(fd)->len - 1] == '-'
7268 && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7269 return(1);
7271 else{
7272 dprint((5, "-- html: OK: %.*s\n",
7273 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7274 if(ED(fd)->start_comment == ED(fd)->end_comment){
7275 if(ED(fd)->len > 10){
7276 ED(fd)->buf[ED(fd)->len - 2] = '\0';
7277 html_element_comment(fd, ED(fd)->buf + 2);
7280 return(1);
7282 /* else keep collecting comment below */
7285 else if(ED(fd)->proc_inst){
7286 return(1); /* return without display... */
7288 else if(!ED(fd)->quoted || ED(fd)->badform){
7289 ELPROP_S *ep;
7292 * We either have the whole thing or all that we could
7293 * salvage from it. Try our best...
7296 if(HD(fd)->bitbucket)
7297 return(1); /* element inside chtml clause! */
7299 if(!ED(fd)->badform && html_element_flush(ED(fd)))
7300 return(1); /* return without display... */
7303 * If we ran into an empty tag or we don't know how to deal
7304 * with it, just go on, ignoring it...
7306 if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7307 if(ep->handler){
7308 /* dispatch the element's handler */
7309 HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7310 if(ED(fd)->end_tag){
7311 html_pop(fd, ep); /* remove it's handler */
7313 else{
7314 /* if a block element, pop any open <p>'s */
7315 if(ep->blocklevel){
7316 HANDLER_S *tp;
7318 for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7319 HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7320 html_pop(fd, EL(tp));
7321 break;
7325 /* enforce table nesting */
7326 if(!strucmp(ep->element, "tr")){
7327 if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7328 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7329 if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7330 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7331 html_pop(fd, EL(HANDLERS(fd)));
7333 else{
7334 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7335 html_push(fd, element_properties(fd, "table"));
7339 else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7340 if(!HANDLERS(fd)){
7341 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7342 html_push(fd, element_properties(fd, "table"));
7343 html_push(fd, element_properties(fd, "tr"));
7345 else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7346 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7347 html_push(fd, element_properties(fd, "tr"));
7349 else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7350 dprint((2, "-- html error: bad nesting popping <TD>"));
7351 html_pop(fd, EL(HANDLERS(fd)));
7355 /* add it's handler */
7356 if(html_push(fd, ep)){
7357 if(ED(fd)->empty){
7358 /* remove empty element */
7359 html_pop(fd, ep);
7364 else {
7365 HTML_DEBUG_EL("IGNORED", ED(fd));
7368 else{ /* else, empty or unrecognized */
7369 HTML_DEBUG_EL("?", ED(fd));
7372 return(1); /* all done! see, that didn't hurt */
7375 else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7376 ED(fd)->empty = 1;
7378 else
7379 ED(fd)->empty = 0;
7381 if(ED(fd)->mkup_decl){
7382 if((ch &= 0xff) == '-'){
7383 if(ED(fd)->hyphen){
7384 ED(fd)->hyphen = 0;
7385 if(ED(fd)->start_comment)
7386 ED(fd)->end_comment = 1;
7387 else
7388 ED(fd)->start_comment = 1;
7390 else
7391 ED(fd)->hyphen = 1;
7393 else{
7394 if(ED(fd)->end_comment)
7395 ED(fd)->start_comment = ED(fd)->end_comment = 0;
7398 * no "--" after ! or non-whitespace between comments - bad
7400 if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7401 && !ASCII_ISSPACE((unsigned char) ch)))
7402 ED(fd)->badform = 1; /* non-comment! */
7404 ED(fd)->hyphen = 0;
7408 * Remember the comment for possible later processing, if
7409 * it get's too long, remember first and last few chars
7410 * so we know when to terminate (and throw some garbage
7411 * in between when we toss out what's between.
7413 if(ED(fd)->len == HTML_BUF_LEN){
7414 ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7415 ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7416 ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7417 ED(fd)->len = 6;
7420 ED(fd)->buf[(ED(fd)->len)++] = ch;
7421 return(0); /* comments go in the bit bucket */
7423 else if(ED(fd)->overrun || ED(fd)->badform){
7424 return(0); /* swallow char's until next '>' */
7426 else if(!ED(fd)->element && !ED(fd)->len){
7427 if(ch == '/'){ /* validate leading chars */
7428 ED(fd)->end_tag = 1;
7429 return(0);
7431 else if(ch == '!'){
7432 ED(fd)->mkup_decl = 1;
7433 return(0);
7435 else if(ch == '?'){
7436 ED(fd)->proc_inst = 1;
7437 return(0);
7439 else if(!isalpha((unsigned char) ch))
7440 return(-1); /* can't be a tag! */
7442 else if(ch == '\"' || ch == '\''){
7443 if(!ED(fd)->hit_equal){
7444 ED(fd)->badform = 1; /* quote in element name?!? */
7445 return(0);
7448 if(ED(fd)->quoted){
7449 if(ED(fd)->quoted == (char) ch){
7450 /* end of a quoted value */
7451 ED(fd)->quoted = 0;
7452 if(ED(fd)->len && html_element_flush(ED(fd)))
7453 ED(fd)->badform = 1;
7455 return(0); /* continue collecting chars */
7457 /* ELSE fall thru writing other quoting char */
7459 else{
7460 ED(fd)->quoted = (char) ch;
7461 ED(fd)->was_quoted = 1;
7462 return(0); /* need more data */
7466 ch &= 0xff; /* strip any "literal" high bits */
7467 if(ED(fd)->quoted
7468 || isalnum(ch)
7469 || strchr("#-.!", ch)){
7470 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7471 ? HTML_BUF_LEN:MAX_ELEMENT)){
7472 ED(fd)->buf[(ED(fd)->len)++] = ch;
7474 else
7475 ED(fd)->overrun = 1; /* flag it broken */
7477 else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7478 if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7479 ED(fd)->badform = 1;
7480 return(0); /* else, we ain't done yet */
7483 if(!ED(fd)->hit_equal)
7484 ED(fd)->hit_equal = (ch == '=');
7486 else if(ch == '/' && ED(fd)->len && !ED(fd)->element){
7487 ELPROP_S *ep;
7488 ep = element_properties(fd, ED(fd)->buf);
7489 if(ep){
7490 if(!ep->alternate)
7491 ED(fd)->badform = 1;
7492 else{
7493 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7494 ? HTML_BUF_LEN:MAX_ELEMENT)){
7495 ED(fd)->buf[(ED(fd)->len)++] = ch; /* add this exception */
7497 else
7498 ED(fd)->overrun = 1;
7501 else
7502 ED(fd)->badform = 1;
7504 else
7505 ED(fd)->badform = 1; /* unrecognized data?? */
7507 return(0); /* keep collecting */
7512 * Element collector found complete string, integrate it and reset
7513 * internal collection buffer.
7515 * Returns zero if element collection buffer flushed, error flag otherwise
7518 html_element_flush(CLCTR_S *el_data)
7520 int rv = 0;
7522 if(el_data->hit_equal){ /* adding a value */
7523 el_data->hit_equal = 0;
7524 if(el_data->cur_attrib){
7525 if(!el_data->cur_attrib->value){
7526 el_data->cur_attrib->value = cpystr(el_data->len
7527 ? el_data->buf : "");
7529 else{
7530 dprint((2, "** element: unexpected value: %.10s...\n",
7531 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7532 rv = 1;
7535 else{
7536 dprint((2, "** element: missing attribute name: %.10s...\n",
7537 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7538 rv = 2;
7541 else if(el_data->len){
7542 if(!el_data->element){
7543 el_data->element = cpystr(el_data->buf);
7545 else{
7546 PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7547 memset(p, 0, sizeof(PARAMETER));
7548 if(el_data->attribs){
7549 el_data->cur_attrib->next = p;
7550 el_data->cur_attrib = p;
7552 else
7553 el_data->attribs = el_data->cur_attrib = p;
7555 p->attribute = cpystr(el_data->buf);
7560 el_data->was_quoted = 0; /* reset collector buf and state */
7561 el_data->len = 0;
7562 memset(el_data->buf, 0, HTML_BUF_LEN);
7563 return(rv); /* report whatever happened above */
7568 * html_element_comment - "Special" comment handling here
7570 void
7571 html_element_comment(FILTER_S *f, char *s)
7573 char *p;
7575 while(*s && ASCII_ISSPACE((unsigned char) *s))
7576 s++;
7579 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7581 if(!struncmp(s, "chtml ", 6)){
7582 s += 6;
7583 if(!struncmp(s, "if ", 3)){
7584 HD(f)->bitbucket = 1; /* default is failure! */
7585 switch(*(s += 3)){
7586 case 'P' :
7587 case 'p' :
7588 if(!struncmp(s + 1, "inemode=", 8)){
7589 if(!strucmp(s = removing_quotes(s + 9), "function_key")
7590 && F_ON(F_USE_FK, ps_global))
7591 HD(f)->bitbucket = 0;
7592 else if(!strucmp(s, "running"))
7593 HD(f)->bitbucket = 0;
7594 else if(!strucmp(s, "PHONE_HOME") && ps_global->phone_home)
7595 HD(f)->bitbucket = 0;
7596 #ifdef _WINDOWS
7597 else if(!strucmp(s, "os_windows"))
7598 HD(f)->bitbucket = 0;
7599 #endif
7602 break;
7604 case '[' : /* test */
7605 if((p = strindex(++s, ']')) != NULL){
7606 *p = '\0'; /* tie off test string */
7607 removing_leading_white_space(s);
7608 removing_trailing_white_space(s);
7609 if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7610 for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7614 HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7615 READ_ACCESS) != 0);
7619 break;
7621 default :
7622 break;
7625 else if(!strucmp(s, "else")){
7626 HD(f)->bitbucket = !HD(f)->bitbucket;
7628 else if(!strucmp(s, "endif")){
7629 /* Clean up after chtml here */
7630 HD(f)->bitbucket = 0;
7633 else if(!HD(f)->bitbucket){
7634 if(!struncmp(s, "#include ", 9)){
7635 char buf[MAILTMPLEN], *bufp;
7636 int len, end_of_line;
7637 FILE *fp;
7639 /* Include the named file */
7640 if(!struncmp(s += 9, "file=", 5)
7641 && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7642 html_element_output(f, HTML_NEWLINE);
7644 while(fgets(buf, sizeof(buf), fp)){
7645 if((len = strlen(buf)) && buf[len-1] == '\n'){
7646 end_of_line = 1;
7647 buf[--len] = '\0';
7649 else
7650 end_of_line = 0;
7652 for(bufp = buf; len; bufp++, len--)
7653 html_element_output(f, (int) *bufp);
7655 if(end_of_line)
7656 html_element_output(f, HTML_NEWLINE);
7659 fclose(fp);
7660 html_element_output(f, HTML_NEWLINE);
7661 HD(f)->blanks = 0;
7662 if(f->f1 == WSPACE)
7663 f->f1 = DFL;
7666 else if(!struncmp(s, "#echo ", 6)){
7667 if(!struncmp(s += 6, "var=", 4)){
7668 char *p, buf[MAILTMPLEN];
7669 ADDRESS *adr;
7670 extern char datestamp[];
7672 if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7673 p = ALPINE_VERSION;
7675 else if(!strcmp(s, "ALPINE_REVISION")){
7676 p = get_alpine_revision_string(buf, sizeof(buf));
7678 else if(!strcmp(s, "C_CLIENT_VERSION")){
7679 p = CCLIENTVERSION;
7681 else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7682 p = datestamp;
7684 else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7685 rfc822_date(p = buf);
7687 else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7688 p = (ps_global->VAR_LOCAL_FULLNAME
7689 && ps_global->VAR_LOCAL_FULLNAME[0])
7690 ? ps_global->VAR_LOCAL_FULLNAME
7691 : "Local Support";
7693 else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7694 p = (ps_global->VAR_LOCAL_ADDRESS
7695 && ps_global->VAR_LOCAL_ADDRESS[0])
7696 ? ps_global->VAR_LOCAL_ADDRESS
7697 : "postmaster";
7698 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7699 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7700 mail_free_address(&adr);
7702 else if(!strcmp(s, "_BUGS_FULLNAME_")){
7703 p = (ps_global->VAR_BUGS_FULLNAME
7704 && ps_global->VAR_BUGS_FULLNAME[0])
7705 ? ps_global->VAR_BUGS_FULLNAME
7706 : "Place to report Alpine Bugs";
7708 else if(!strcmp(s, "_BUGS_ADDRESS_")){
7709 p = (ps_global->VAR_BUGS_ADDRESS
7710 && ps_global->VAR_BUGS_ADDRESS[0])
7711 ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7712 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7713 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7714 mail_free_address(&adr);
7716 else if(!strcmp(s, "CURRENT_DIR")){
7717 getcwd(p = buf, sizeof(buf));
7719 else if(!strcmp(s, "HOME_DIR")){
7720 p = ps_global->home_dir;
7722 else if(!strcmp(s, "PINE_CONF_PATH")){
7723 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7724 p = "/usr/local/lib/pine.conf";
7725 #else
7726 p = SYSTEM_PINERC;
7727 #endif
7729 else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7730 #ifdef SYSTEM_PINERC_FIXED
7731 p = SYSTEM_PINERC_FIXED;
7732 #else
7733 p = "/usr/local/lib/pine.conf.fixed";
7734 #endif
7736 else if(!strcmp(s, "PINE_INFO_PATH")){
7737 p = SYSTEM_PINE_INFO_PATH;
7739 else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7740 p = sysinbox();
7742 else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7743 /* Don't put the leading /tmp/. */
7744 int i, j;
7746 p = sysinbox();
7747 if(p){
7748 for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7749 if(p[i] == '/')
7750 buf[j++] = '\\';
7751 else
7752 buf[j++] = p[i];
7754 buf[j++] = '\0';
7755 p = buf;
7758 else if(!struncmp(s, "VAR_", 4)){
7759 p = s+4;
7760 if(pith_opt_pretty_var_name)
7761 p = (*pith_opt_pretty_var_name)(p);
7763 else if(!struncmp(s, "FEAT_", 5)){
7764 p = s+5;
7765 if(pith_opt_pretty_feature_name)
7766 p = (*pith_opt_pretty_feature_name)(p, -1);
7768 else
7769 p = NULL;
7771 if(p){
7772 if(f->f1 == WSPACE){
7773 html_element_output(f, ' ');
7774 f->f1 = DFL; /* clear it */
7777 while(*p)
7778 html_element_output(f, (int) *p++);
7786 void
7787 html_element_output(FILTER_S *f, int ch)
7789 if(HANDLERS(f))
7790 (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7791 else
7792 html_output(f, ch);
7795 #define ISHEX_DIGIT(X) (isdigit((X)) || \
7796 ((X) >= 'a' && (X) <= 'f') || \
7797 ((X) >= 'A' && (X) <= 'F'))
7800 * collect html entity and return its UCS value when done.
7802 * Returns HTML_MOREDATA : we need more data
7803 * HTML_ENTITY : entity collected
7804 * HTML_BADVALUE : good data, but no named match or out of range
7805 * HTML_BADDATA : invalid input
7807 * NOTES:
7808 * - entity format is "'&' tag ';'" and represents a literal char
7809 * - named entities are CASE SENSITIVE.
7810 * - numeric char references (where the tag is prefixed with a '#')
7811 * are a char with that numbers value
7812 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7815 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7817 static int len = 0;
7818 static char buf[MAX_ENTITY+2];
7819 int rv, i;
7821 if(len == MAX_ENTITY){
7822 rv = HTML_BADDATA;
7824 else if((len == 0)
7825 ? (isalpha((unsigned char) ch) || ch == '#')
7826 : ((isdigit((unsigned char) ch)
7827 || (len == 1 && (unsigned char) ch == 'x')
7828 || (len == 1 &&(unsigned char) ch == 'X')
7829 || (len > 1 && isxdigit((unsigned char) ch))
7830 || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7831 buf[len++] = ch;
7832 return(HTML_MOREDATA);
7834 else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7835 buf[len] = '\0'; /* got something! */
7836 if(buf[0] == '#'){
7837 if(buf[1] == 'x' || buf[1] == 'X')
7838 *ucs = (UCS) strtoul(&buf[2], NULL, 16);
7839 else
7840 *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7842 if(alt){
7843 *alt = NULL;
7844 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7845 if(entity_tab[i].value == *ucs){
7846 *alt = entity_tab[i].plain;
7847 break;
7851 len = 0;
7852 return(HTML_ENTITY);
7854 else{
7855 rv = HTML_BADVALUE; /* in case of no match */
7856 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7857 if(strcmp(entity_tab[i].name, buf) == 0){
7858 *ucs = entity_tab[i].value;
7859 if(alt)
7860 *alt = entity_tab[i].plain;
7862 len = 0;
7863 return(HTML_ENTITY);
7867 else
7868 rv = HTML_BADDATA; /* bogus input! */
7870 if(alt){
7871 buf[len] = '\0';
7872 *alt = buf;
7875 len = 0;
7876 return(rv);
7880 /*----------------------------------------------------------------------
7881 HTML text to plain text filter
7883 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7884 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7885 formatting.
7887 ----*/
7888 void
7889 gf_html2plain(FILTER_S *f, int flg)
7891 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7892 if(flg == GF_DATA){
7893 register int c;
7894 GF_INIT(f, f->next);
7896 if(!HTML_WROTE(f)){
7897 int ii;
7899 for(ii = HTML_INDENT(f); ii > 0; ii--)
7900 html_putc(f, ' ');
7902 HTML_WROTE(f) = 1;
7905 while(GF_GETC(f, c)){
7907 * First we have to collect any literal entities...
7908 * that is, IF we're not already collecting one
7909 * AND we're not in element's text or, if we are, we're
7910 * not in quoted text. Whew.
7912 if(f->t){
7913 char *alt = NULL;
7914 UCS ucs;
7916 switch(html_entity_collector(f, c, &ucs, &alt)){
7917 case HTML_MOREDATA: /* more data required? */
7918 continue; /* go get another char */
7920 case HTML_BADVALUE :
7921 case HTML_BADDATA :
7922 /* if supplied, process bogus data */
7923 HTML_PROC(f, '&');
7924 for(; *alt; alt++){
7925 unsigned int uic = *alt;
7926 HTML_PROC(f, uic);
7929 if(c == '&' && !HD(f)->quoted){
7930 f->t = '&';
7931 continue;
7933 else
7934 f->t = 0; /* don't come back next time */
7936 break;
7938 default : /* thing to process */
7939 f->t = 0; /* don't come back */
7942 * do something with UCS codepoint. If it's
7943 * not displayable then use the alt version
7944 * otherwise
7945 * cvt UCS to UTF-8 and toss into next filter.
7947 if(ucs > 127 && wcellwidth(ucs) < 0){
7948 if(alt){
7949 for(; *alt; alt++){
7950 c = MAKE_LITERAL(*alt);
7951 HTML_PROC(f, c);
7954 continue;
7956 else
7957 c = MAKE_LITERAL('?');
7959 else{
7960 unsigned char utf8buf[8], *p1, *p2;
7962 p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7963 for(; p1 < p2; p1++){
7964 c = MAKE_LITERAL(*p1);
7965 HTML_PROC(f, c);
7968 continue;
7971 break;
7974 else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7975 f->t = '&';
7976 continue;
7980 * then we process whatever we got...
7983 HTML_PROC(f, c);
7986 GF_OP_END(f); /* clean up our input pointers */
7988 else if(flg == GF_EOD){
7989 while(HANDLERS(f)){
7990 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7991 html_pop(f, EL(HANDLERS(f)));
7994 html_output(f, HTML_NEWLINE);
7995 if(ULINE_BIT(f))
7996 HTML_ULINE(f, ULINE_BIT(f) = 0);
7998 if(BOLD_BIT(f))
7999 HTML_BOLD(f, BOLD_BIT(f) = 0);
8001 HTML_FLUSH(f);
8002 fs_give((void **)&f->line);
8003 if(HD(f)->color)
8004 free_color_pair(&HD(f)->color);
8006 fs_give(&f->data);
8007 if(f->opt){
8008 if(((HTML_OPT_S *)f->opt)->base)
8009 fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
8011 fs_give(&f->opt);
8014 (*f->next->f)(f->next, GF_DATA);
8015 (*f->next->f)(f->next, GF_EOD);
8017 else if(flg == GF_RESET){
8018 dprint((9, "-- gf_reset html2plain\n"));
8019 f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
8020 memset(f->data, 0, sizeof(HTML_DATA_S));
8021 /* start with flowing text */
8022 HD(f)->wrapstate = !PASS_HTML(f);
8023 HD(f)->wrapcol = WRAP_COLS(f);
8024 f->f1 = DFL; /* state */
8025 f->f2 = 0; /* chars in wrap buffer */
8026 f->n = 0L; /* chars on line so far */
8027 f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
8028 HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
8029 HD(f)->alt_entity = (!ps_global->display_charmap
8030 || strucmp(ps_global->display_charmap, "iso-8859-1"));
8031 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8038 * html_indent - do the requested indent level function with appropriate
8039 * flushing and such.
8041 * Returns: indent level prior to set/increment
8044 html_indent(FILTER_S *f, int val, int func)
8046 int old = HD(f)->indent_level;
8048 /* flush pending data at old indent level */
8049 switch(func){
8050 case HTML_ID_INC :
8051 html_output_flush(f);
8052 if((HD(f)->indent_level += val) < 0)
8053 HD(f)->indent_level = 0;
8055 break;
8057 case HTML_ID_SET :
8058 html_output_flush(f);
8059 HD(f)->indent_level = val;
8060 break;
8062 default :
8063 break;
8066 return(old);
8072 * html_blanks - Insert n blank lines into output
8074 void
8075 html_blank(FILTER_S *f, int n)
8077 /* Cap off any flowing text, and then write blank lines */
8078 if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
8079 html_output(f, HTML_NEWLINE);
8081 if(HD(f)->wrapstate)
8082 while(HD(f)->blanks < n) /* blanks inc'd by HTML_NEWLINE */
8083 html_output(f, HTML_NEWLINE);
8089 * html_newline -- insert a newline mindful of embedded tags
8091 void
8092 html_newline(FILTER_S *f)
8094 html_write_newline(f); /* commit an actual newline */
8096 if(f->n){ /* and keep track of blank lines */
8097 HD(f)->blanks = 0;
8098 f->n = 0L;
8100 else
8101 HD(f)->blanks++;
8106 * output the given char, handling any requested wrapping.
8107 * It's understood that all whitespace handed us is written. In other
8108 * words, junk whitespace is weeded out before it's given to us here.
8111 void
8112 html_output(FILTER_S *f, int ch)
8114 UCS uc;
8115 int width;
8116 void (*o_f)(FILTER_S *, int, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8119 * if ch is a control token, just pass it on, else, collect
8120 * utf8-encoded characters to determine width,then feed into
8121 * output routines
8123 if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8124 (*o_f)(f, ch, 1, 0);
8126 else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8127 unsigned char *cp;
8129 for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8130 (*o_f)(f, *cp, width, HD(f)->cb.cbufend - cp);
8131 width = 0; /* only count it once */
8134 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8136 else
8137 HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8138 /* else do nothing until we have a full character */
8142 void
8143 html_output_string(FILTER_S *f, char *s)
8145 for(; *s; s++)
8146 html_output(f, *s);
8150 void
8151 html_output_raw_tag(FILTER_S *f, char *tag)
8153 PARAMETER *p;
8154 char *vp;
8155 int i;
8157 html_output(f, '<');
8158 html_output_string(f, tag);
8159 for(p = HD(f)->el_data->attribs;
8160 p && p->attribute;
8161 p = p->next){
8162 /* SECURITY: no javascript */
8163 /* PRIVACY: no img src without permission */
8164 /* BUGS: no class collisions since <head> ignored */
8165 if(html_event_attribute(p->attribute)
8166 || !strucmp(p->attribute, "class")
8167 || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8168 continue;
8170 /* PRIVACY: sniff out background images */
8171 if(p->value && !PASS_IMAGES(f)){
8172 if(!strucmp(p->attribute, "style")){
8173 if((vp = srchstr(p->value, "background-image")) != NULL){
8174 /* neuter in place */
8175 vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8177 else{
8178 for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8179 if(vp[10] == ' ' || vp[10] == ':')
8180 for(i = 11; vp[i] && vp[i] != ';'; i++)
8181 if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8182 || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8183 vp[0] = 'X';
8186 else if(!strucmp(p->attribute, "background")){
8187 char *ip;
8189 for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8192 if(ip)
8193 continue;
8197 html_output(f, ' ');
8198 html_output_string(f, p->attribute);
8199 if(p->value){
8200 html_output(f, '=');
8201 html_output(f, '\"');
8202 html_output_string(f, p->value);
8203 html_output(f, '\"');
8207 /* append warning to form submission */
8208 if(!strucmp(tag, "form")){
8209 html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8212 if(ED(f)->end_tag){
8213 html_output(f, ' ');
8214 html_output(f, '/');
8217 html_output(f, '>');
8222 html_event_attribute(char *attr)
8224 int i;
8225 static char *events[] = {
8226 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8227 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8228 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8229 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8232 if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8233 for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8234 if(!strucmp(attr, events[i]))
8235 return(TRUE);
8237 return(FALSE);
8241 void
8242 html_output_normal(FILTER_S *f, int ch, int width, int remaining)
8244 static int written = 0;
8245 static int cwidth;
8247 if(HD(f)->centered){
8248 html_centered_flush(f);
8249 fs_give((void **) &HD(f)->centered->line.buf);
8250 fs_give((void **) &HD(f)->centered->word.buf);
8251 fs_give((void **) &HD(f)->centered);
8254 if(HD(f)->wrapstate){
8255 if(ch == HTML_NEWLINE){ /* hard newline */
8256 html_output_flush(f);
8257 html_newline(f);
8259 else
8260 HD(f)->blanks = 0; /* reset blank line counter */
8262 if(ch == TAG_EMBED){ /* takes up no space */
8263 HD(f)->embedded.state = -5;
8264 HTML_LINEP_PUTC(f, TAG_EMBED);
8266 else if(HD(f)->embedded.state){ /* ditto */
8267 if(HD(f)->embedded.state == -5){
8268 /* looking for specially handled tags following TAG_EMBED */
8269 if(ch == TAG_HANDLE)
8270 HD(f)->embedded.state = -1; /* next ch is length */
8271 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8272 if(!HD(f)->color)
8273 HD(f)->color = new_color_pair(NULL, NULL);
8275 if(ch == TAG_FGCOLOR)
8276 HD(f)->embedded.color = HD(f)->color->fg;
8277 else
8278 HD(f)->embedded.color = HD(f)->color->bg;
8280 HD(f)->embedded.state = RGBLEN;
8282 else
8283 HD(f)->embedded.state = 0; /* non-special */
8285 else if(HD(f)->embedded.state > 0){
8286 /* collecting up an RGBLEN color or length, ignore tags */
8287 (HD(f)->embedded.state)--;
8288 if(HD(f)->embedded.color)
8289 *HD(f)->embedded.color++ = ch;
8291 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8292 *HD(f)->embedded.color = '\0';
8293 HD(f)->embedded.color = NULL;
8296 else if(HD(f)->embedded.state < 0){
8297 HD(f)->embedded.state = ch; /* number of embedded chars */
8299 else{
8300 (HD(f)->embedded.state)--;
8301 if(HD(f)->embedded.color)
8302 *HD(f)->embedded.color++ = ch;
8304 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8305 *HD(f)->embedded.color = '\0';
8306 HD(f)->embedded.color = NULL;
8310 HTML_LINEP_PUTC(f, ch);
8312 else if(HTML_ISSPACE(ch)){
8313 html_output_flush(f);
8315 else{
8316 if(HD(f)->prefix)
8317 html_a_prefix(f);
8319 if(written == 0)
8320 cwidth = width;
8322 if(f->f2 + cwidth + 1 >= WRAP_COLS(f)){
8323 HTML_LINEP_PUTC(f, ch & 0xff);
8324 written++;
8325 if(remaining == 0){
8326 HTML_FLUSH(f);
8327 html_newline(f);
8329 if(HD(f)->in_anchor)
8330 html_write_anchor(f, HD(f)->in_anchor);
8332 else{
8333 HTML_LINEP_PUTC(f, ch & 0xff);
8334 written++;
8337 if(remaining == 0){
8338 written = 0;
8339 f->f2 += cwidth;
8343 else{
8344 if(HD(f)->prefix)
8345 html_a_prefix(f);
8347 html_output_flush(f);
8349 switch(HD(f)->embedded.state){
8350 case 0 :
8351 switch(ch){
8352 default :
8354 * It's difficult to both preserve whitespace and wrap at the
8355 * same time so we'll do a dumb wrap at the edge of the screen.
8356 * Since this shouldn't come up much in real life we'll hope
8357 * it is good enough.
8359 if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8360 html_newline(f);
8362 f->n += width; /* inc displayed char count */
8363 HD(f)->blanks = 0; /* reset blank line counter */
8364 html_putc(f, ch & 0xff);
8365 break;
8367 case TAG_EMBED : /* takes up no space */
8368 html_putc(f, TAG_EMBED);
8369 HD(f)->embedded.state = -2;
8370 break;
8372 case HTML_NEWLINE : /* newline handling */
8373 if(!f->n)
8374 break;
8376 case '\n' :
8377 html_newline(f);
8379 case '\r' :
8380 break;
8383 break;
8385 case -2 :
8386 HD(f)->embedded.state = 0;
8387 switch(ch){
8388 case TAG_HANDLE :
8389 HD(f)->embedded.state = -1; /* next ch is length */
8390 break;
8392 case TAG_BOLDON :
8393 BOLD_BIT(f) = 1;
8394 break;
8396 case TAG_BOLDOFF :
8397 BOLD_BIT(f) = 0;
8398 break;
8400 case TAG_ULINEON :
8401 ULINE_BIT(f) = 1;
8402 break;
8404 case TAG_ULINEOFF :
8405 ULINE_BIT(f) = 0;
8406 break;
8408 case TAG_FGCOLOR :
8409 if(!HD(f)->color)
8410 HD(f)->color = new_color_pair(NULL, NULL);
8412 HD(f)->embedded.color = HD(f)->color->fg;
8413 HD(f)->embedded.state = 11;
8414 break;
8416 case TAG_BGCOLOR :
8417 if(!HD(f)->color)
8418 HD(f)->color = new_color_pair(NULL, NULL);
8420 HD(f)->embedded.color = HD(f)->color->bg;
8421 HD(f)->embedded.state = 11;
8422 break;
8424 case TAG_HANDLEOFF :
8425 ch = TAG_INVOFF;
8426 HD(f)->in_anchor = 0;
8427 break;
8429 default :
8430 break;
8433 html_putc(f, ch);
8434 break;
8436 case -1 :
8437 HD(f)->embedded.state = ch; /* number of embedded chars */
8438 html_putc(f, ch);
8439 break;
8441 default :
8442 HD(f)->embedded.state--;
8443 if(HD(f)->embedded.color)
8444 *HD(f)->embedded.color++ = ch;
8446 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8447 *HD(f)->embedded.color = '\0';
8448 HD(f)->embedded.color = NULL;
8451 html_putc(f, ch);
8452 break;
8459 * flush any buffered chars waiting for wrapping.
8461 void
8462 html_output_flush(FILTER_S *f)
8464 if(f->f2){
8465 if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8466 html_newline(f); /* wrap? */
8468 if(f->n){ /* text already on the line? */
8469 html_putc(f, ' ');
8470 f->n++; /* increment count */
8472 else{
8473 /* write at start of new line */
8474 html_write_indent(f, HD(f)->indent_level);
8476 if(HD(f)->in_anchor)
8477 html_write_anchor(f, HD(f)->in_anchor);
8480 f->n += f->f2;
8481 HTML_FLUSH(f);
8488 * html_output_centered - managed writing centered text
8490 void
8491 html_output_centered(FILTER_S *f, int ch, int width, int remaining)
8493 static int written;
8494 static int cwidth;
8496 if(!HD(f)->centered){ /* new text? */
8497 html_output_flush(f);
8498 if(f->n) /* start on blank line */
8499 html_newline(f);
8501 HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8502 memset(HD(f)->centered, 0, sizeof(CENTER_S));
8503 /* and grab a buf to start collecting centered text */
8504 HD(f)->centered->line.len = WRAP_COLS(f);
8505 HD(f)->centered->line.buf = (char *) fs_get(HD(f)->centered->line.len
8506 * sizeof(char));
8507 HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8508 HD(f)->centered->word.len = 32;
8509 HD(f)->centered->word.buf = (char *) fs_get(HD(f)->centered->word.len
8510 * sizeof(char));
8511 HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8514 if(ch == HTML_NEWLINE){ /* hard newline */
8515 html_centered_flush(f);
8517 else if(ch == TAG_EMBED){ /* takes up no space */
8518 HD(f)->embedded.state = -5;
8519 html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8521 else if(HD(f)->embedded.state){
8522 if(HD(f)->embedded.state == -5){
8523 /* looking for specially handled tags following TAG_EMBED */
8524 if(ch == TAG_HANDLE)
8525 HD(f)->embedded.state = -1; /* next ch is length */
8526 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8527 if(!HD(f)->color)
8528 HD(f)->color = new_color_pair(NULL, NULL);
8530 if(ch == TAG_FGCOLOR)
8531 HD(f)->embedded.color = HD(f)->color->fg;
8532 else
8533 HD(f)->embedded.color = HD(f)->color->bg;
8535 HD(f)->embedded.state = RGBLEN;
8537 else
8538 HD(f)->embedded.state = 0; /* non-special */
8540 else if(HD(f)->embedded.state > 0){
8541 /* collecting up an RGBLEN color or length, ignore tags */
8542 (HD(f)->embedded.state)--;
8543 if(HD(f)->embedded.color)
8544 *HD(f)->embedded.color++ = ch;
8546 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8547 *HD(f)->embedded.color = '\0';
8548 HD(f)->embedded.color = NULL;
8551 else if(HD(f)->embedded.state < 0){
8552 HD(f)->embedded.state = ch; /* number of embedded chars */
8554 else{
8555 (HD(f)->embedded.state)--;
8556 if(HD(f)->embedded.color)
8557 *HD(f)->embedded.color++ = ch;
8559 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8560 *HD(f)->embedded.color = '\0';
8561 HD(f)->embedded.color = NULL;
8565 html_centered_putc(&HD(f)->centered->word, ch);
8567 else if(ASCII_ISSPACE((unsigned char) ch)){
8568 if(!HD(f)->centered->space++){ /* end of a word? flush! */
8569 int i;
8571 if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8572 html_centered_flush_line(f);
8573 /* fall thru to put current "word" on blank "line" */
8575 else if(HD(f)->centered->line.width){
8576 /* put space char between line and appended word */
8577 html_centered_putc(&HD(f)->centered->line, ' ');
8578 HD(f)->centered->line.width++;
8581 for(i = 0; i < HD(f)->centered->word.used; i++)
8582 html_centered_putc(&HD(f)->centered->line,
8583 HD(f)->centered->word.buf[i]);
8585 HD(f)->centered->line.width += HD(f)->centered->word.width;
8586 HD(f)->centered->word.used = 0;
8587 HD(f)->centered->word.width = 0;
8590 else{
8591 if(HD(f)->prefix)
8592 html_a_prefix(f);
8594 /* ch is start of next word */
8595 HD(f)->centered->space = 0;
8596 if(HD(f)->centered->word.width >= WRAP_COLS(f))
8597 html_centered_flush(f);
8599 html_centered_putc(&HD(f)->centered->word, ch);
8601 if(written == 0)
8602 cwidth = width;
8604 written++;
8606 if(remaining == 0){
8607 written = 0;
8608 HD(f)->centered->word.width += cwidth;
8615 * html_centered_putc -- add given char to given WRAPLINE_S
8617 void
8618 html_centered_putc(WRAPLINE_S *wp, int ch)
8620 if(wp->used + 1 >= wp->len){
8621 wp->len += 64;
8622 fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8625 wp->buf[wp->used++] = ch;
8631 * html_centered_flush - finish writing any pending centered output
8633 void
8634 html_centered_flush(FILTER_S *f)
8636 int i;
8639 * If word present (what about line?) we need to deal with
8640 * appending it...
8642 if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8643 html_centered_flush_line(f);
8645 if(WRAPPED_LEN(f)){
8646 /* figure out how much to indent */
8647 if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8648 html_write_indent(f, i);
8650 if(HD(f)->centered->anchor)
8651 html_write_anchor(f, HD(f)->centered->anchor);
8653 html_centered_handle(&HD(f)->centered->anchor,
8654 HD(f)->centered->line.buf,
8655 HD(f)->centered->line.used);
8656 html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8658 if(HD(f)->centered->word.used){
8659 if(HD(f)->centered->line.width)
8660 html_putc(f, ' ');
8662 html_centered_handle(&HD(f)->centered->anchor,
8663 HD(f)->centered->word.buf,
8664 HD(f)->centered->word.used);
8665 html_write(f, HD(f)->centered->word.buf,
8666 HD(f)->centered->word.used);
8669 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8670 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8672 else{
8673 if(HD(f)->centered->word.used){
8674 html_write(f, HD(f)->centered->word.buf,
8675 HD(f)->centered->word.used);
8676 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8677 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8679 HD(f)->blanks++; /* advance the blank line counter */
8682 html_newline(f); /* finish the line */
8687 * html_centered_handle - scan the line for embedded handles
8689 void
8690 html_centered_handle(int *h, char *line, int len)
8692 int n;
8694 while(len-- > 0)
8695 if(*line++ == TAG_EMBED && len-- > 0)
8696 switch(*line++){
8697 case TAG_HANDLE :
8698 if((n = *line++) >= --len){
8699 *h = 0;
8700 len -= n;
8701 while(n--)
8702 *h = (*h * 10) + (*line++ - '0');
8704 break;
8706 case TAG_HANDLEOFF :
8707 case TAG_INVOFF :
8708 *h = 0; /* assumption 23,342: inverse off ends tags */
8709 break;
8711 default :
8712 break;
8719 * html_centered_flush_line - flush the centered "line" only
8721 void
8722 html_centered_flush_line(FILTER_S *f)
8724 if(HD(f)->centered->line.used){
8725 int i, j;
8727 /* hide "word" from flush */
8728 i = HD(f)->centered->word.used;
8729 j = HD(f)->centered->word.width;
8730 HD(f)->centered->word.used = 0;
8731 HD(f)->centered->word.width = 0;
8732 html_centered_flush(f);
8734 HD(f)->centered->word.used = i;
8735 HD(f)->centered->word.width = j;
8741 * html_write_indent - write indention mindful of display attributes
8743 void
8744 html_write_indent(FILTER_S *f, int indent)
8746 if(! STRIP(f)){
8747 if(BOLD_BIT(f)){
8748 html_putc(f, TAG_EMBED);
8749 html_putc(f, TAG_BOLDOFF);
8752 if(ULINE_BIT(f)){
8753 html_putc(f, TAG_EMBED);
8754 html_putc(f, TAG_ULINEOFF);
8758 f->n = indent;
8759 while(indent-- > 0)
8760 html_putc(f, ' '); /* indent as needed */
8763 * Resume any previous embedded state
8765 if(! STRIP(f)){
8766 if(BOLD_BIT(f)){
8767 html_putc(f, TAG_EMBED);
8768 html_putc(f, TAG_BOLDON);
8771 if(ULINE_BIT(f)){
8772 html_putc(f, TAG_EMBED);
8773 html_putc(f, TAG_ULINEON);
8782 void
8783 html_write_anchor(FILTER_S *f, int anchor)
8785 char buf[256];
8786 int i;
8788 html_putc(f, TAG_EMBED);
8789 html_putc(f, TAG_HANDLE);
8790 snprintf(buf, sizeof(buf), "%d", anchor);
8791 html_putc(f, (int) strlen(buf));
8793 for(i = 0; buf[i]; i++)
8794 html_putc(f, buf[i]);
8799 * html_write_newline - write a newline mindful of display attributes
8801 void
8802 html_write_newline(FILTER_S *f)
8804 int i;
8806 if(! STRIP(f)){ /* First tie, off any embedded state */
8807 if(HD(f)->in_anchor){
8808 html_putc(f, TAG_EMBED);
8809 html_putc(f, TAG_INVOFF);
8812 if(BOLD_BIT(f)){
8813 html_putc(f, TAG_EMBED);
8814 html_putc(f, TAG_BOLDOFF);
8817 if(ULINE_BIT(f)){
8818 html_putc(f, TAG_EMBED);
8819 html_putc(f, TAG_ULINEOFF);
8822 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8823 char *p;
8824 int i;
8826 p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8827 ps_global->VAR_NORM_BACK_COLOR);
8828 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8829 html_putc(f, p[i]);
8833 html_write(f, "\015\012", 2);
8834 for(i = HTML_INDENT(f); i > 0; i--)
8835 html_putc(f, ' ');
8837 if(! STRIP(f)){ /* First tie, off any embedded state */
8838 if(BOLD_BIT(f)){
8839 html_putc(f, TAG_EMBED);
8840 html_putc(f, TAG_BOLDON);
8843 if(ULINE_BIT(f)){
8844 html_putc(f, TAG_EMBED);
8845 html_putc(f, TAG_ULINEON);
8848 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8849 char *p, *tfg, *tbg;
8850 int i;
8851 COLOR_PAIR *tmp;
8853 tfg = HD(f)->color->fg;
8854 tbg = HD(f)->color->bg;
8855 tmp = new_color_pair(tfg[0] ? tfg
8856 : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8857 tbg[0] ? tbg
8858 : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8859 if(pico_is_good_colorpair(tmp)){
8860 p = color_embed(tfg[0] ? tfg
8861 : ps_global->VAR_NORM_FORE_COLOR,
8862 tbg[0] ? tbg
8863 : ps_global->VAR_NORM_BACK_COLOR);
8864 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8865 html_putc(f, p[i]);
8868 if(tmp)
8869 free_color_pair(&tmp);
8876 * html_write - write given n-length string to next filter
8878 void
8879 html_write(FILTER_S *f, char *s, int n)
8881 GF_INIT(f, f->next);
8883 while(n-- > 0){
8884 /* keep track of attribute state? Not if last char! */
8885 if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8886 GF_PUTC(f->next, TAG_EMBED);
8887 switch(*++s){
8888 case TAG_BOLDON :
8889 BOLD_BIT(f) = 1;
8890 break;
8891 case TAG_BOLDOFF :
8892 BOLD_BIT(f) = 0;
8893 break;
8894 case TAG_ULINEON :
8895 ULINE_BIT(f) = 1;
8896 break;
8897 case TAG_ULINEOFF :
8898 ULINE_BIT(f) = 0;
8899 break;
8900 case TAG_HANDLEOFF :
8901 HD(f)->in_anchor = 0;
8902 GF_PUTC(f->next, TAG_INVOFF);
8903 s++;
8904 continue;
8905 case TAG_HANDLE :
8906 if(n-- > 0){
8907 int i = *++s;
8909 GF_PUTC(f->next, TAG_HANDLE);
8910 if(i <= n){
8911 int anum = 0;
8912 HANDLE_S *h;
8914 n -= i;
8915 GF_PUTC(f->next, i);
8916 while(1){
8917 anum = (anum * 10) + (*++s - '0');
8918 if(--i)
8919 GF_PUTC(f->next, *s);
8920 else
8921 break;
8924 if(DO_HANDLES(f)
8925 && (h = get_handle(*HANDLESP(f), anum)) != NULL
8926 && (h->type == URL || h->type == Attach)){
8927 HD(f)->in_anchor = anum;
8932 break;
8933 default:
8934 break;
8938 GF_PUTC(f->next, (*s++) & 0xff);
8941 GF_IP_END(f->next); /* clean up next's input pointers */
8946 * html_putc -- actual work of writing to next filter.
8947 * NOTE: Small opt not using full GF_END since our input
8948 * pointers don't need adjusting.
8950 void
8951 html_putc(FILTER_S *f, int ch)
8953 GF_INIT(f, f->next);
8954 GF_PUTC(f->next, ch & 0xff);
8955 GF_IP_END(f->next); /* clean up next's input pointers */
8961 * Only current option is to turn on embedded data stripping for text
8962 * bound to a printer or composer.
8964 void *
8965 gf_html2plain_opt(char *base,
8966 int columns,
8967 int *margin,
8968 HANDLE_S **handlesp,
8969 htmlrisk_t risk_f,
8970 int flags)
8972 HTML_OPT_S *op;
8973 int margin_l, margin_r;
8975 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8977 op->base = cpystr(base);
8978 margin_l = (margin) ? margin[0] : 0;
8979 margin_r = (margin) ? margin[1] : 0;
8980 op->indent = margin_l;
8981 op->columns = columns - (margin_l + margin_r);
8982 op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8983 op->handlesp = handlesp;
8984 op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8985 op->showserver = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8986 op->warnrisk_f = risk_f;
8987 op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8988 op->related_content = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8989 op->html = ((flags & GFHP_HTML) == GFHP_HTML);
8990 op->html_imgs = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8991 op->element_table = html_element_table;
8992 return((void *) op);
8996 void *
8997 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
8999 HTML_OPT_S *op;
9001 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
9002 memset(op, 0, sizeof(HTML_OPT_S));
9004 op->base = cpystr("");
9005 op->element_table = rss_element_table;
9006 *(op->feedp = feedp) = NULL;
9007 return((void *) op);
9010 void
9011 gf_html2plain_rss_free(RSS_FEED_S **feedp)
9013 if(feedp && *feedp){
9014 if((*feedp)->title)
9015 fs_give((void **) &(*feedp)->title);
9017 if((*feedp)->link)
9018 fs_give((void **) &(*feedp)->link);
9020 if((*feedp)->description)
9021 fs_give((void **) &(*feedp)->description);
9023 if((*feedp)->source)
9024 fs_give((void **) &(*feedp)->source);
9026 if((*feedp)->image)
9027 fs_give((void **) &(*feedp)->image);
9029 gf_html2plain_rss_free_items(&((*feedp)->items));
9030 fs_give((void **) feedp);
9034 void
9035 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
9037 if(itemp && *itemp){
9038 if((*itemp)->title)
9039 fs_give((void **) &(*itemp)->title);
9041 if((*itemp)->link)
9042 fs_give((void **) &(*itemp)->link);
9044 if((*itemp)->description)
9045 fs_give((void **) &(*itemp)->description);
9047 if((*itemp)->source)
9048 fs_give((void **) &(*itemp)->source);
9050 gf_html2plain_rss_free_items(&(*itemp)->next);
9051 fs_give((void **) itemp);
9056 /* END OF HTML-TO-PLAIN text filter */
9059 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9060 * from the text stream.
9063 #define MAX_ESC_LEN 5
9066 * the simple filter, removes unknown escape codes from the stream
9068 void
9069 gf_escape_filter(FILTER_S *f, int flg)
9071 register char *p;
9072 GF_INIT(f, f->next);
9074 if(flg == GF_DATA){
9075 register unsigned char c;
9076 register int state = f->f1;
9078 while(GF_GETC(f, c)){
9080 if(state){
9081 if(c == '\033' || f->n == MAX_ESC_LEN){
9082 f->line[f->n] = '\0';
9083 f->n = 0L;
9084 if(!match_escapes(f->line)){
9085 GF_PUTC(f->next, '^');
9086 GF_PUTC(f->next, '[');
9088 else
9089 GF_PUTC(f->next, '\033');
9091 p = f->line;
9092 while(*p)
9093 GF_PUTC(f->next, *p++);
9095 if(c == '\033')
9096 continue;
9097 else
9098 state = 0; /* fall thru */
9100 else{
9101 f->line[f->n++] = c; /* collect */
9102 continue;
9106 if(c == '\033')
9107 state = 1;
9108 else
9109 GF_PUTC(f->next, c);
9112 f->f1 = state;
9113 GF_END(f, f->next);
9115 else if(flg == GF_EOD){
9116 if(f->f1){
9117 if(!match_escapes(f->line)){
9118 GF_PUTC(f->next, '^');
9119 GF_PUTC(f->next, '[');
9121 else
9122 GF_PUTC(f->next, '\033');
9125 for(p = f->line; f->n; f->n--, p++)
9126 GF_PUTC(f->next, *p);
9128 fs_give((void **)&(f->line)); /* free temp line buffer */
9129 (void) GF_FLUSH(f->next);
9130 (*f->next->f)(f->next, GF_EOD);
9132 else if(flg == GF_RESET){
9133 dprint((9, "-- gf_reset escape\n"));
9134 f->f1 = 0;
9135 f->n = 0L;
9136 f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9143 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9144 * corresponding string representations (you know, ^blah and such)...
9148 * the simple filter transforms unknown control characters in the stream
9149 * into harmless strings.
9151 void
9152 gf_control_filter(FILTER_S *f, int flg)
9154 GF_INIT(f, f->next);
9156 if(flg == GF_DATA){
9157 register unsigned char c;
9158 register int filt_only_c0;
9160 filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9162 while(GF_GETC(f, c)){
9164 if(((c < 0x20 || c == 0x7f)
9165 || (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9166 && !(ASCII_ISSPACE((unsigned char) c)
9167 || c == '\016' || c == '\017' || c == '\033')){
9168 GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9169 GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9171 else
9172 GF_PUTC(f->next, c);
9175 GF_END(f, f->next);
9177 else if(flg == GF_EOD){
9178 (void) GF_FLUSH(f->next);
9179 (*f->next->f)(f->next, GF_EOD);
9185 * function called from the outside to set
9186 * control filter's option, which says to filter C0 control characters
9187 * but not C1 control chars. We don't call it at all if we don't want
9188 * to filter C0 chars either.
9190 void *
9191 gf_control_filter_opt(int *filt_only_c0)
9193 return((void *) filt_only_c0);
9198 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9199 * This prevents the possibility of embedding other tags.
9200 * We assume that this filter should only be used for something
9201 * that is eventually writing to a display, which has the special
9202 * knowledge of quoted TAG_EMBEDs.
9204 void
9205 gf_tag_filter(FILTER_S *f, int flg)
9207 GF_INIT(f, f->next);
9209 if(flg == GF_DATA){
9210 register unsigned char c;
9212 while(GF_GETC(f, c)){
9214 if((c & 0xff) == (TAG_EMBED & 0xff)){
9215 GF_PUTC(f->next, TAG_EMBED);
9216 GF_PUTC(f->next, c);
9218 else
9219 GF_PUTC(f->next, c);
9222 GF_END(f, f->next);
9224 else if(flg == GF_EOD){
9225 (void) GF_FLUSH(f->next);
9226 (*f->next->f)(f->next, GF_EOD);
9232 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9233 * specified line width
9237 typedef struct wrap_col_s {
9238 unsigned bold:1;
9239 unsigned uline:1;
9240 unsigned inverse:1;
9241 unsigned tags:1;
9242 unsigned do_indent:1;
9243 unsigned on_comma:1;
9244 unsigned flowed:1;
9245 unsigned delsp:1;
9246 unsigned quoted:1;
9247 unsigned allwsp:1;
9248 unsigned hard_nl:1;
9249 unsigned leave_flowed:1;
9250 unsigned use_color:1;
9251 unsigned hdr_color:1;
9252 unsigned for_compose:1;
9253 unsigned handle_soft_hyphen:1;
9254 unsigned saw_soft_hyphen:1;
9255 unsigned trailing_space:1;
9256 unsigned char utf8buf[7];
9257 unsigned char *utf8bufp;
9258 COLOR_PAIR *color;
9259 STORE_S *spaces;
9260 short embedded,
9261 space_len;
9262 char *lineendp;
9263 int anchor,
9264 prefbrk,
9265 prefbrkn,
9266 quote_depth,
9267 quote_count,
9268 sig,
9269 state,
9270 wrap_col,
9271 wrap_max,
9272 margin_l,
9273 margin_r,
9274 indent;
9275 char special[256];
9276 } WRAP_S;
9278 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9279 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9280 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9281 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9282 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9283 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9284 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9285 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9286 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9287 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9288 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9289 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9290 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9291 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9292 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9293 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9294 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9295 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9296 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9297 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9298 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9299 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9300 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9301 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9302 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9303 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9304 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9305 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9306 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9307 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9308 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9309 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9310 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9311 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9312 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9313 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9314 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9315 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9316 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9317 #define WRAP_PUTC(F,C,W) { \
9318 if((F)->linep == WRAP_LASTC(F)){ \
9319 size_t offset = (F)->linep - (F)->line; \
9320 fs_resize((void **) &(F)->line, \
9321 (2 * offset) * sizeof(char)); \
9322 (F)->linep = &(F)->line[offset]; \
9323 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9325 *(F)->linep++ = (C); \
9326 (F)->f2 += (W); \
9329 #define WRAP_EMBED_PUTC(F,C) { \
9330 if((F)->f2){ \
9331 WRAP_PUTC((F), C, 0); \
9333 else \
9334 so_writec(C, WRAP_SPACES(F)); \
9337 #define WRAP_COLOR_UNSET(F) { \
9338 if(WRAP_COLOR_SET(F)){ \
9339 WRAP_COLOR(F)->fg[0] = '\0'; \
9344 * wrap_flush_embed flags
9346 #define WFE_NONE 0 /* Nothing special */
9347 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9350 int wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9351 int wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9352 unsigned char **, unsigned char **);
9353 int wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9354 unsigned char **, unsigned char **, int);
9355 int wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9356 unsigned char **, unsigned char **);
9357 int wrap_bol(FILTER_S *, int, int, unsigned char **,
9358 unsigned char **, unsigned char **, unsigned char **);
9359 int wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9360 unsigned char **, unsigned char **);
9363 * the no longer simple filter, breaks lines at end of white space nearest
9364 * to global "gf_wrap_width" in length
9365 * It also supports margins, indents (inverse indenting, really) and
9366 * flowed text (ala RFC 3676)
9369 void
9370 gf_wrap(FILTER_S *f, int flg)
9372 register long i;
9373 GF_INIT(f, f->next);
9376 * f->f1 state
9377 * f->line buffer where next "word" being considered is stored
9378 * f->f2 width in screen cells of f->line stuff
9379 * f->n width in screen cells of the part of this line committed to next
9380 * filter so far
9383 if(flg == GF_DATA){
9384 register unsigned char c;
9385 register int state = f->f1;
9386 int width, full_character;
9388 while(GF_GETC(f, c)){
9390 switch(state){
9391 case CCR : /* CRLF or CR in text ? */
9392 state = BOL; /* either way, handle start */
9394 if(WRAP_FLOW(f)){
9395 /* wrapped line? */
9396 if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9398 * whack trailing space char, but be aware
9399 * of embeds in space buffer. grok them just
9400 * in case they contain a 0x20 value
9402 if(WRAP_DELSP(f)){
9403 char *sb, *sbp, *scp = NULL;
9404 int x;
9406 for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9407 switch(*sbp){
9408 case ' ' :
9409 scp = sbp;
9410 break;
9412 case TAG_EMBED :
9413 sbp++;
9414 switch (*sbp++){
9415 case TAG_HANDLE :
9416 x = (int) *sbp++;
9417 if(strlen(sbp) >= x)
9418 sbp += (x - 1);
9420 break;
9422 case TAG_FGCOLOR :
9423 case TAG_BGCOLOR :
9424 if(strlen(sbp) >= RGBLEN)
9425 sbp += (RGBLEN - 1);
9427 break;
9429 default :
9430 break;
9433 break;
9435 default :
9436 break;
9440 /* replace space buf without trailing space char */
9441 if(scp){
9442 STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9444 *scp++ = '\0';
9445 WRAP_SPC_LEN(f)--;
9446 WRAP_TRL_SPC(f) = 0;
9448 so_puts(ns, sb);
9449 so_puts(ns, scp);
9451 so_give(&WRAP_SPACES(f));
9452 WRAP_SPACES(f) = ns;
9456 else{ /* fixed line */
9457 WRAP_HARD(f) = 1;
9458 wrap_flush(f, &ip, &eib, &op, &eob);
9459 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9462 * When we get to a real end of line, we don't need to
9463 * remember what the special color was anymore because
9464 * we aren't going to be changing back to it. We unset it
9465 * so that we don't keep resetting the color to normal.
9467 WRAP_COLOR_UNSET(f);
9470 if(c == '\012'){ /* get c following LF */
9471 break;
9473 /* else c is first char of new line, fall thru */
9475 else{
9476 wrap_flush(f, &ip, &eib, &op, &eob);
9477 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9478 WRAP_COLOR_UNSET(f); /* see note above */
9479 if(c == '\012'){
9480 break;
9482 /* else fall thru to deal with beginning of line */
9485 case BOL :
9486 if(WRAP_FLOW(f)){
9487 if(c == '>'){
9488 WRAP_FL_QC(f) = 1; /* init it */
9489 state = FL_QLEV; /* go collect it */
9491 else {
9492 /* if EMBEDed, process it and return here */
9493 if(c == (unsigned char) TAG_EMBED){
9494 WRAP_EMBED_PUTC(f, TAG_EMBED);
9495 WRAP_STATE(f) = state;
9496 state = TAG;
9497 continue;
9500 /* quote level change implies new paragraph */
9501 if(WRAP_FL_QD(f)){
9502 WRAP_FL_QD(f) = 0;
9503 if(WRAP_HARD(f) == 0){
9504 WRAP_HARD(f) = 1;
9505 wrap_flush(f, &ip, &eib, &op, &eob);
9506 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9507 WRAP_COLOR_UNSET(f); /* see note above */
9511 if(WRAP_HARD(f)){
9512 wrap_bol(f, 0, 1, &ip, &eib, &op,
9513 &eob); /* write quoting prefix */
9514 WRAP_HARD(f) = 0;
9517 switch (c) {
9518 case '\015' : /* a blank line? */
9519 wrap_flush(f, &ip, &eib, &op, &eob);
9520 state = CCR; /* go collect it */
9521 break;
9523 case ' ' : /* space stuffed */
9524 state = FL_STF; /* just eat it */
9525 break;
9527 case '-' : /* possible sig-dash */
9528 WRAP_FL_SIG(f) = 1; /* init state */
9529 state = FL_SIG; /* go collect it */
9530 break;
9532 default :
9533 state = DFL; /* go back to normal */
9534 goto case_dfl; /* handle c like DFL case */
9538 else{
9539 state = DFL;
9540 if(WRAP_COMMA(f) && c == TAB){
9541 wrap_bol(f, 1, 0, &ip, &eib, &op,
9542 &eob); /* convert to normal indent */
9543 break;
9546 wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9547 goto case_dfl; /* handle c like DFL case */
9550 break;
9552 case FL_QLEV :
9553 if(c == '>'){ /* another level */
9554 WRAP_FL_QC(f)++;
9556 else {
9557 /* if EMBEDed, process it and return here */
9558 if(c == (unsigned char) TAG_EMBED){
9559 WRAP_EMBED_PUTC(f, TAG_EMBED);
9560 WRAP_STATE(f) = state;
9561 state = TAG;
9562 continue;
9565 /* quote level change signals new paragraph */
9566 if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9567 WRAP_FL_QD(f) = WRAP_FL_QC(f);
9568 if(WRAP_HARD(f) == 0){ /* add hard newline */
9569 WRAP_HARD(f) = 1; /* hard newline */
9570 wrap_flush(f, &ip, &eib, &op, &eob);
9571 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9572 WRAP_COLOR_UNSET(f); /* see note above */
9576 if(WRAP_HARD(f)){
9577 wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9578 WRAP_HARD(f) = 0;
9581 switch (c) {
9582 case '\015' : /* a blank line? */
9583 wrap_flush(f, &ip, &eib, &op, &eob);
9584 state = CCR; /* go collect it */
9585 break;
9587 case ' ' : /* space-stuffed! */
9588 state = FL_STF; /* just eat it */
9589 break;
9591 case '-' : /* sig dash? */
9592 WRAP_FL_SIG(f) = 1;
9593 state = FL_SIG;
9594 break;
9596 default : /* something else */
9597 state = DFL;
9598 goto case_dfl; /* handle c like DFL */
9602 break;
9604 case FL_STF : /* space stuffed */
9605 switch (c) {
9606 case '\015' : /* a blank line? */
9607 wrap_flush(f, &ip, &eib, &op, &eob);
9608 state = CCR; /* go collect it */
9609 break;
9611 case (unsigned char) TAG_EMBED : /* process TAG data */
9612 WRAP_EMBED_PUTC(f, TAG_EMBED);
9613 WRAP_STATE(f) = state; /* and return */
9614 state = TAG;
9615 continue;
9617 case '-' : /* sig dash? */
9618 WRAP_FL_SIG(f) = 1;
9619 WRAP_ALLWSP(f) = 0;
9620 state = FL_SIG;
9621 break;
9623 default : /* something else */
9624 state = DFL;
9625 goto case_dfl; /* handle c like DFL */
9628 break;
9630 case FL_SIG : /* sig-dash collector */
9631 switch (WRAP_FL_SIG(f)){ /* possible sig-dash? */
9632 case 1 :
9633 if(c != '-'){ /* not a sigdash */
9634 if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9635 wrap_flush_embed(f, &ip, &eib, &op,
9636 &eob); /* note any embedded*/
9637 wrap_eol(f, 1, &ip, &eib,
9638 &op, &eob); /* plunk down newline */
9639 wrap_bol(f, 1, 1, &ip, &eib,
9640 &op, &eob); /* write any prefix */
9643 WRAP_PUTC(f,'-', 1); /* write what we got */
9645 WRAP_FL_SIG(f) = 0;
9646 state = DFL;
9647 goto case_dfl;
9650 /* don't put anything yet until we know to wrap or not */
9651 WRAP_FL_SIG(f) = 2;
9652 break;
9654 case 2 :
9655 if(c != ' '){ /* not a sigdash */
9656 WRAP_PUTC(f, '-', 1);
9657 if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9658 wrap_flush_embed(f, &ip, &eib, &op,
9659 &eob); /* note any embedded*/
9660 wrap_eol(f, 1, &ip, &eib,
9661 &op, &eob); /* plunk down newline */
9662 wrap_bol(f, 1, 1, &ip, &eib, &op,
9663 &eob); /* write any prefix */
9666 WRAP_PUTC(f,'-', 1); /* write what we got */
9668 WRAP_FL_SIG(f) = 0;
9669 state = DFL;
9670 goto case_dfl;
9673 /* don't put anything yet until we know to wrap or not */
9674 WRAP_FL_SIG(f) = 3;
9675 break;
9677 case 3 :
9678 if(c == '\015'){ /* success! */
9679 /* known sigdash, newline if soft nl */
9680 if(WRAP_SPC_LEN(f)){
9681 wrap_flush(f, &ip, &eib, &op, &eob);
9682 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9683 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9685 WRAP_PUTC(f,'-',1);
9686 WRAP_PUTC(f,'-',1);
9687 WRAP_PUTC(f,' ',1);
9689 state = CCR;
9690 break;
9692 else{
9693 WRAP_FL_SIG(f) = 4; /* possible success */
9696 case 4 :
9697 switch(c){
9698 case (unsigned char) TAG_EMBED :
9700 * At this point we're almost 100% sure that we've got
9701 * a sigdash. Putc it (adding newline if previous
9702 * was a soft nl) so we get it the right color
9703 * before we store this new embedded stuff
9705 if(WRAP_SPC_LEN(f)){
9706 wrap_flush(f, &ip, &eib, &op, &eob);
9707 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9708 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9710 WRAP_PUTC(f,'-',1);
9711 WRAP_PUTC(f,'-',1);
9712 WRAP_PUTC(f,' ',1);
9714 WRAP_FL_SIG(f) = 5;
9715 break;
9717 case '\015' : /* success! */
9719 * We shouldn't get here, but in case we do, we have
9720 * not yet put the sigdash
9722 if(WRAP_SPC_LEN(f)){
9723 wrap_flush(f, &ip, &eib, &op, &eob);
9724 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9725 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9727 WRAP_PUTC(f,'-',1);
9728 WRAP_PUTC(f,'-',1);
9729 WRAP_PUTC(f,' ',1);
9731 state = CCR;
9732 break;
9734 default : /* that's no sigdash! */
9735 /* write what we got but didn't put yet */
9736 WRAP_PUTC(f,'-', 1);
9737 WRAP_PUTC(f,'-', 1);
9738 WRAP_PUTC(f,' ', 1);
9740 WRAP_FL_SIG(f) = 0;
9741 wrap_flush(f, &ip, &eib, &op, &eob);
9742 WRAP_SPC_LEN(f) = 1;
9743 state = DFL; /* set normal state */
9744 goto case_dfl; /* and go do "c" */
9747 break;
9749 case 5 :
9750 WRAP_STATE(f) = FL_SIG; /* come back here */
9751 WRAP_FL_SIG(f) = 6; /* and seek EOL */
9752 WRAP_EMBED_PUTC(f, TAG_EMBED);
9753 state = TAG; /* process embed */
9754 goto case_tag;
9756 case 6 :
9758 * at this point we've already putc the sigdash in case 4
9760 switch(c){
9761 case (unsigned char) TAG_EMBED :
9762 WRAP_FL_SIG(f) = 5;
9763 break;
9765 case '\015' : /* success! */
9766 state = CCR;
9767 break;
9769 default : /* that's no sigdash! */
9771 * probably never reached (fake sigdash with embedded
9772 * stuff) but if this did get reached, then we
9773 * might have accidentally disobeyed a soft nl
9775 WRAP_FL_SIG(f) = 0;
9776 wrap_flush(f, &ip, &eib, &op, &eob);
9777 WRAP_SPC_LEN(f) = 1;
9778 state = DFL; /* set normal state */
9779 goto case_dfl; /* and go do "c" */
9782 break;
9785 default :
9786 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9787 WRAP_FL_SIG(f)));
9788 WRAP_FL_SIG(f) = 0;
9789 state = DFL; /* set normal state */
9790 goto case_dfl; /* and go process "c" */
9793 break;
9795 case_dfl :
9796 case DFL :
9798 * This was just if(WRAP_SPEC(f, c)) before the change to add
9799 * the == 0 test. This isn't quite right, either. We should really
9800 * be looking for special characters in the UCS characters, not
9801 * in the incoming stream of UTF-8. It is not right to
9802 * call this on bytes that are in the middle of a UTF-8 character,
9803 * hence the == 0 test which restricts it to the first byte
9804 * of a character. This isn't right, either, but it's closer.
9805 * Also change the definition of WRAP_SPEC so that isspace only
9806 * matches ascii characters, which will never be in the middle
9807 * of a UTF-8 multi-byte character.
9809 if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9810 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9811 switch(c){
9812 default :
9813 if(WRAP_QUOTED(f))
9814 break;
9816 if(f->f2){ /* any non-lwsp to flush? */
9817 if(WRAP_COMMA(f)){
9818 /* remember our second best break point */
9819 WRAP_PB_OFF(f) = f->linep - f->line;
9820 WRAP_PB_LEN(f) = f->f2;
9821 break;
9823 else
9824 wrap_flush(f, &ip, &eib, &op, &eob);
9827 switch(c){ /* remember separator */
9828 case ' ' :
9829 WRAP_SPC_LEN(f)++;
9830 WRAP_TRL_SPC(f) = 1;
9831 so_writec(' ',WRAP_SPACES(f));
9832 break;
9834 case TAB :
9836 int i = (int) f->n + WRAP_SPC_LEN(f);
9839 WRAP_SPC_LEN(f)++;
9840 while(++i & 0x07);
9842 so_writec(TAB,WRAP_SPACES(f));
9843 WRAP_TRL_SPC(f) = 0;
9846 break;
9848 default : /* some control char? */
9849 WRAP_SPC_LEN(f) += 2;
9850 WRAP_TRL_SPC(f) = 0;
9851 break;
9854 continue;
9856 case '\"' :
9857 WRAP_QUOTED(f) = !WRAP_QUOTED(f);
9858 break;
9860 case '\015' : /* already has newline? */
9861 state = CCR;
9862 continue;
9864 case '\012' : /* bare LF in text? */
9865 wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
9866 wrap_eol(f, 0, &ip, &eib, &op, &eob); /* meant */
9867 wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
9868 continue;
9870 case (unsigned char) TAG_EMBED :
9871 WRAP_EMBED_PUTC(f, TAG_EMBED);
9872 WRAP_STATE(f) = state;
9873 state = TAG;
9874 continue;
9876 case ',' :
9877 if(!WRAP_QUOTED(f)){
9878 /* handle this special case in general code below */
9879 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
9880 && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
9881 break;
9883 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
9884 if(WRAP_ALLWSP(f)) /* if anything visible */
9885 wrap_flush(f, &ip, &eib, &op,
9886 &eob); /* ... blat buf'd chars */
9888 wrap_eol(f, 1, &ip, &eib, &op,
9889 &eob); /* plunk down newline */
9890 wrap_bol(f, 1, 1, &ip, &eib, &op,
9891 &eob); /* write any prefix */
9894 WRAP_PUTC(f, ',', 1); /* put out comma */
9895 wrap_flush(f, &ip, &eib, &op,
9896 &eob); /* write buf'd chars */
9897 continue;
9900 break;
9903 else if(WRAP_HANDLE_SOFT_HYPHEN(f)
9904 && (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
9905 && WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
9907 * This is a soft hyphen. If there is enough space for
9908 * a real hyphen to fit on the line here then we can
9909 * flush everything up to before the soft hyphen,
9910 * and simply remember that we saw a soft hyphen.
9911 * If it turns out that we can't fit the next piece in
9912 * then wrap_eol will append a real hyphen to the line.
9913 * If we can fit another piece in it will be because we've
9914 * reached the next break point. At that point we'll flush
9915 * everything but won't include the unneeded hyphen. We erase
9916 * the fact that we saw this soft hyphen because it have
9917 * become irrelevant.
9919 * If the hyphen is the character that puts us over the edge
9920 * we go through the else case.
9923 /* erase this soft hyphen character from buffer */
9924 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
9926 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9927 if(f->f2) /* any non-lwsp to flush? */
9928 wrap_flush(f, &ip, &eib, &op, &eob);
9930 /* remember that we saw the soft hyphen */
9931 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9933 else{
9935 * Everything up to the hyphen fits, otherwise it
9936 * would have already been flushed the last time
9937 * through the loop. But the hyphen won't fit. So
9938 * we need to go back to the last line break and
9939 * break there instead. Then start a new line with
9940 * the buffered up characters and the soft hyphen.
9942 wrap_flush_embed(f, &ip, &eib, &op, &eob);
9943 wrap_eol(f, 1, &ip, &eib, &op,
9944 &eob); /* plunk down newline */
9945 wrap_bol(f,1,1, &ip, &eib, &op,
9946 &eob); /* write any prefix */
9949 * Now we're in the same situation as we would have
9950 * been above except we're on a new line. Try to
9951 * flush out the characters seen up to the hyphen.
9953 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9954 if(f->f2) /* any non-lwsp to flush? */
9955 wrap_flush(f, &ip, &eib, &op, &eob);
9957 /* remember that we saw the soft hyphen */
9958 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9960 else
9961 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9964 continue;
9967 full_character = 0;
9970 unsigned char *inputp;
9971 unsigned long remaining_octets;
9972 UCS ucs;
9974 if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){ /* always true */
9976 *WRAP_UTF8BUFP(f)++ = c;
9977 remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9978 if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
9979 full_character++;
9980 if(c == TAB){
9981 int i = (int) f->n;
9983 while(i & 0x07)
9984 i++;
9986 width = i - f->n;
9988 else if(c < 0x80 && iscntrl((unsigned char) c))
9989 width = 2;
9990 else
9991 width = 1;
9993 else{
9994 inputp = &WRAP_UTF8BUF(f, 0);
9995 ucs = (UCS) utf8_get(&inputp, &remaining_octets);
9996 switch(ucs){
9997 case U8G_ENDSTRG: /* incomplete character, wait */
9998 case U8G_ENDSTRI: /* incomplete character, wait */
9999 width = 0;
10000 break;
10002 default:
10003 if(ucs & U8G_ERROR || ucs == UBOGON){
10005 * None of these cases is supposed to happen. If it
10006 * does happen then the input stream isn't UTF-8
10007 * so something is wrong. Writechar will treat
10008 * each octet in the input buffer as a separate
10009 * error character and print a '?' for each,
10010 * so the width will be the number of octets.
10012 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10013 full_character++;
10015 else{
10016 /* got a character */
10017 width = wcellwidth(ucs);
10018 full_character++;
10020 if(width < 0){
10022 * This happens when we have a UTF-8 character that
10023 * we aren't able to print in our locale. For example,
10024 * if the locale is setup with the terminal
10025 * expecting ISO-8859-1 characters then there are
10026 * lots of UTF-8 characters that can't be printed.
10027 * Print a '?' instead.
10029 width = 1;
10033 break;
10037 else{
10039 * This cannot happen because an error would have
10040 * happened at least by character #6. So if we get
10041 * here there is a bug in utf8_get().
10043 if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
10044 *WRAP_UTF8BUFP(f)++ = c;
10048 * We could possibly do some more sophisticated
10049 * resynchronization here, but we aren't doing
10050 * anything in Writechar so it wouldn't match up
10051 * with that anyway. Just figure each character will
10052 * end up being printed as a ? character.
10054 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10055 full_character++;
10059 if(WRAP_ALLWSP(f)){
10061 * Nothing is visible yet but the first word may be too long
10062 * all by itself. We need to break early.
10064 if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
10066 * A little reaching behind the curtain here.
10067 * if there's at least a preferable break point, use
10068 * it and stuff what's left back into the wrap buffer.
10069 * The "nwsp" latch is used to skip leading whitespace
10070 * The second half of the test prevents us from wrapping
10071 * at the preferred break point in the case that it
10072 * is so early in the line that it doesn't help.
10073 * That is, the width of the indent is even more than
10074 * the width of the first part before the preferred
10075 * break point. An example would be breaking after
10076 * "To:" when the indent is 4 which is > 3.
10078 if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
10079 char *p1 = f->line + WRAP_PB_OFF(f);
10080 char *p2 = f->linep;
10081 char c2;
10082 int nwsp = 0, left_after_wrap;
10084 left_after_wrap = f->f2 - WRAP_PB_LEN(f);
10086 f->f2 = WRAP_PB_LEN(f);
10087 f->linep = p1;
10089 wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
10091 /* put back rest of characters */
10092 while(p1 < p2){
10093 c2 = *p1++;
10094 if(!(c2 == ' ' || c2 == '\t') || nwsp){
10095 WRAP_PUTC(f, c2, 0);
10096 nwsp = 1;
10098 else
10099 left_after_wrap--; /* wrong if a tab! */
10102 f->f2 = MAX(left_after_wrap, 0);
10104 wrap_eol(f, 1, &ip, &eib, &op,
10105 &eob); /* plunk down newline */
10106 wrap_bol(f,1,1, &ip, &eib, &op,
10107 &eob); /* write any prefix */
10110 * What's this for?
10111 * If we do the less preferable break point at
10112 * the space we don't want to lose the fact that
10113 * we might be able to break at this comma for
10114 * the next one.
10116 if(full_character && c == ','){
10117 WRAP_PUTC(f, c, 1);
10118 wrap_flush(f, &ip, &eib, &op, &eob);
10119 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10122 else{
10123 wrap_flush(f, &ip, &eib, &op, &eob);
10125 wrap_eol(f, 1, &ip, &eib, &op,
10126 &eob); /* plunk down newline */
10127 wrap_bol(f,1,1, &ip, &eib, &op,
10128 &eob); /* write any prefix */
10132 else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10133 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10134 wrap_eol(f, 1, &ip, &eib, &op,
10135 &eob); /* plunk down newline */
10136 wrap_bol(f,1,1, &ip, &eib, &op,
10137 &eob); /* write any prefix */
10141 * Commit entire multibyte UTF-8 character at once
10142 * instead of writing partial characters into the
10143 * buffer.
10145 if(full_character){
10146 unsigned char *q;
10148 for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10149 WRAP_PUTC(f, *q, width);
10150 width = 0;
10153 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10156 break;
10158 case_tag :
10159 case TAG :
10160 WRAP_EMBED_PUTC(f, c);
10161 switch(c){
10162 case TAG_HANDLE :
10163 WRAP_EMBED(f) = -1;
10164 state = HANDLE;
10165 break;
10167 case TAG_FGCOLOR :
10168 case TAG_BGCOLOR :
10169 WRAP_EMBED(f) = RGBLEN;
10170 state = HDATA;
10171 break;
10173 default :
10174 state = WRAP_STATE(f);
10175 break;
10178 break;
10180 case HANDLE :
10181 WRAP_EMBED_PUTC(f, c);
10182 WRAP_EMBED(f) = c;
10183 state = HDATA;
10184 break;
10186 case HDATA :
10187 if(f->f2){
10188 WRAP_PUTC(f, c, 0);
10190 else
10191 so_writec(c, WRAP_SPACES(f));
10193 if(!(WRAP_EMBED(f) -= 1)){
10194 state = WRAP_STATE(f);
10197 break;
10201 f->f1 = state;
10202 GF_END(f, f->next);
10204 else if(flg == GF_EOD){
10205 wrap_flush(f, &ip, &eib, &op, &eob);
10206 if(WRAP_COLOR(f))
10207 free_color_pair(&WRAP_COLOR(f));
10209 fs_give((void **) &f->line); /* free temp line buffer */
10210 so_give(&WRAP_SPACES(f));
10211 fs_give((void **) &f->opt); /* free wrap widths struct */
10212 (void) GF_FLUSH(f->next);
10213 (*f->next->f)(f->next, GF_EOD);
10215 else if(flg == GF_RESET){
10216 dprint((9, "-- gf_reset wrap\n"));
10217 f->f1 = BOL;
10218 f->n = 0L; /* displayed length of line so far */
10219 f->f2 = 0; /* displayed length of buffered chars */
10220 WRAP_HARD(f) = 1; /* starting at beginning of line */
10221 if(! (WRAP_S *) f->opt)
10222 f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10224 while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10225 WRAP_INDENT(f) /= 2;
10227 f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10228 f->linep = f->line;
10229 WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10231 for(i = 0; i < 256; i++)
10232 ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10233 || i == '\015'
10234 || i == '\012'
10235 || (i == (unsigned char) TAG_EMBED
10236 && WRAP_TAGS(f))
10237 || (i == ',' && WRAP_COMMA(f)
10238 && !WRAP_QUOTED(f))
10239 || ASCII_ISSPACE(i));
10240 WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10241 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10246 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10247 unsigned char **opp, unsigned char **eobp)
10249 register char *s;
10250 register int n;
10252 s = (char *)so_text(WRAP_SPACES(f));
10253 n = so_tell(WRAP_SPACES(f));
10254 so_seek(WRAP_SPACES(f), 0L, 0);
10255 wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10256 so_truncate(WRAP_SPACES(f), 0L);
10257 WRAP_SPC_LEN(f) = 0;
10258 WRAP_TRL_SPC(f) = 0;
10259 s = f->line;
10260 n = f->linep - f->line;
10261 wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10262 f->f2 = 0;
10263 f->linep = f->line;
10264 WRAP_PB_OFF(f) = 0;
10265 WRAP_PB_LEN(f) = 0;
10267 return 0;
10271 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10273 register char *s;
10274 register int n;
10275 s = (char *)so_text(WRAP_SPACES(f));
10276 n = so_tell(WRAP_SPACES(f));
10277 so_seek(WRAP_SPACES(f), 0L, 0);
10278 wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10279 so_truncate(WRAP_SPACES(f), 0L);
10280 WRAP_SPC_LEN(f) = 0;
10281 WRAP_TRL_SPC(f) = 0;
10283 return 0;
10287 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10288 unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10290 f->n += w;
10292 for(; n > 0; n--,s++){
10293 if(*s == TAG_EMBED){
10294 if(n-- > 0){
10295 switch(*++s){
10296 case TAG_BOLDON :
10297 GF_PUTC_GLO(f->next,TAG_EMBED);
10298 GF_PUTC_GLO(f->next,TAG_BOLDON);
10299 WRAP_BOLD(f) = 1;
10300 break;
10301 case TAG_BOLDOFF :
10302 GF_PUTC_GLO(f->next,TAG_EMBED);
10303 GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10304 WRAP_BOLD(f) = 0;
10305 break;
10306 case TAG_ULINEON :
10307 GF_PUTC_GLO(f->next,TAG_EMBED);
10308 GF_PUTC_GLO(f->next,TAG_ULINEON);
10309 WRAP_ULINE(f) = 1;
10310 break;
10311 case TAG_ULINEOFF :
10312 GF_PUTC_GLO(f->next,TAG_EMBED);
10313 GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10314 WRAP_ULINE(f) = 0;
10315 break;
10316 case TAG_INVOFF :
10317 GF_PUTC_GLO(f->next,TAG_EMBED);
10318 GF_PUTC_GLO(f->next,TAG_INVOFF);
10319 WRAP_ANCHOR(f) = 0;
10320 break;
10321 case TAG_HANDLE :
10322 if((flags & WFE_CNT_HANDLE) == 0)
10323 GF_PUTC_GLO(f->next,TAG_EMBED);
10325 if(n-- > 0){
10326 int i = *++s;
10328 if((flags & WFE_CNT_HANDLE) == 0)
10329 GF_PUTC_GLO(f->next, TAG_HANDLE);
10331 if(i <= n){
10332 n -= i;
10334 if((flags & WFE_CNT_HANDLE) == 0)
10335 GF_PUTC_GLO(f->next, i);
10337 WRAP_ANCHOR(f) = 0;
10338 while(i-- > 0){
10339 WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10341 if((flags & WFE_CNT_HANDLE) == 0)
10342 GF_PUTC_GLO(f->next,*s);
10347 break;
10348 case TAG_FGCOLOR :
10349 if(pico_usingcolor() && n >= RGBLEN){
10350 int i;
10351 GF_PUTC_GLO(f->next,TAG_EMBED);
10352 GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10353 if(!WRAP_COLOR(f))
10354 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10355 strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10356 WRAP_COLOR(f)->fg[RGBLEN]='\0';
10357 i = RGBLEN;
10358 n -= i;
10359 while(i-- > 0)
10360 GF_PUTC_GLO(f->next,
10361 (*++s) & 0xff);
10363 break;
10364 case TAG_BGCOLOR :
10365 if(pico_usingcolor() && n >= RGBLEN){
10366 int i;
10367 GF_PUTC_GLO(f->next,TAG_EMBED);
10368 GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10369 if(!WRAP_COLOR(f))
10370 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10371 strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10372 WRAP_COLOR(f)->bg[RGBLEN]='\0';
10373 i = RGBLEN;
10374 n -= i;
10375 while(i-- > 0)
10376 GF_PUTC_GLO(f->next,
10377 (*++s) & 0xff);
10379 break;
10380 default :
10381 break;
10385 else if(w){
10387 if(f->n <= WRAP_MAX_COL(f)){
10388 GF_PUTC_GLO(f->next, (*s) & 0xff);
10390 else{
10391 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10394 WRAP_ALLWSP(f) = 0;
10398 return 0;
10402 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10403 unsigned char **opp, unsigned char **eobp)
10405 if(WRAP_SAW_SOFT_HYPHEN(f)){
10406 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10407 GF_PUTC_GLO(f->next, '-'); /* real hyphen */
10410 if(c && WRAP_LV_FLD(f))
10411 GF_PUTC_GLO(f->next, ' ');
10413 if(WRAP_BOLD(f)){
10414 GF_PUTC_GLO(f->next, TAG_EMBED);
10415 GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10418 if(WRAP_ULINE(f)){
10419 GF_PUTC_GLO(f->next, TAG_EMBED);
10420 GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10423 if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10424 GF_PUTC_GLO(f->next, TAG_EMBED);
10425 GF_PUTC_GLO(f->next, TAG_INVOFF);
10428 if(WRAP_COLOR_SET(f)){
10429 char *p;
10430 char cb[RGBLEN+1];
10431 GF_PUTC_GLO(f->next, TAG_EMBED);
10432 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10433 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10434 cb[sizeof(cb)-1] = '\0';
10435 p = cb;
10436 for(; *p; p++)
10437 GF_PUTC_GLO(f->next, *p);
10438 GF_PUTC_GLO(f->next, TAG_EMBED);
10439 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10440 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10441 cb[sizeof(cb)-1] = '\0';
10442 p = cb;
10443 for(; *p; p++)
10444 GF_PUTC_GLO(f->next, *p);
10447 GF_PUTC_GLO(f->next, '\015');
10448 GF_PUTC_GLO(f->next, '\012');
10449 f->n = 0L;
10450 so_truncate(WRAP_SPACES(f), 0L);
10451 WRAP_SPC_LEN(f) = 0;
10452 WRAP_TRL_SPC(f) = 0;
10454 return 0;
10458 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10459 unsigned char **opp, unsigned char **eobp)
10461 int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10463 if(WRAP_HDR_CLR(f)){
10464 char *p;
10465 char cbuf[RGBLEN+1];
10466 int k;
10468 if((k = WRAP_MARG_L(f)) > 0)
10469 while(k-- > 0){
10470 n--;
10471 f->n++;
10472 GF_PUTC_GLO(f->next, ' ');
10475 GF_PUTC_GLO(f->next, TAG_EMBED);
10476 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10477 strncpy(cbuf,
10478 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10479 sizeof(cbuf));
10480 cbuf[sizeof(cbuf)-1] = '\0';
10481 p = cbuf;
10482 for(; *p; p++)
10483 GF_PUTC_GLO(f->next, *p);
10484 GF_PUTC_GLO(f->next, TAG_EMBED);
10485 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10486 strncpy(cbuf,
10487 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10488 sizeof(cbuf));
10489 cbuf[sizeof(cbuf)-1] = '\0';
10490 p = cbuf;
10491 for(; *p; p++)
10492 GF_PUTC_GLO(f->next, *p);
10495 while(n-- > 0){
10496 f->n++;
10497 GF_PUTC_GLO(f->next, ' ');
10500 WRAP_ALLWSP(f) = 1;
10502 if(q)
10503 wrap_quote_insert(f, ipp, eibp, opp, eobp);
10505 if(WRAP_BOLD(f)){
10506 GF_PUTC_GLO(f->next, TAG_EMBED);
10507 GF_PUTC_GLO(f->next, TAG_BOLDON);
10509 if(WRAP_ULINE(f)){
10510 GF_PUTC_GLO(f->next, TAG_EMBED);
10511 GF_PUTC_GLO(f->next, TAG_ULINEON);
10513 if(WRAP_INVERSE(f)){
10514 GF_PUTC_GLO(f->next, TAG_EMBED);
10515 GF_PUTC_GLO(f->next, TAG_INVON);
10517 if(WRAP_COLOR_SET(f)){
10518 char *p;
10519 if(WRAP_COLOR(f)->fg[0]){
10520 char cb[RGBLEN+1];
10521 GF_PUTC_GLO(f->next, TAG_EMBED);
10522 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10523 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10524 cb[sizeof(cb)-1] = '\0';
10525 p = cb;
10526 for(; *p; p++)
10527 GF_PUTC_GLO(f->next, *p);
10529 if(WRAP_COLOR(f)->bg[0]){
10530 char cb[RGBLEN+1];
10531 GF_PUTC_GLO(f->next, TAG_EMBED);
10532 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10533 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10534 cb[sizeof(cb)-1] = '\0';
10535 p = cb;
10536 for(; *p; p++)
10537 GF_PUTC_GLO(f->next, *p);
10540 if(WRAP_ANCHOR(f)){
10541 char buf[64]; int i;
10542 GF_PUTC_GLO(f->next, TAG_EMBED);
10543 GF_PUTC_GLO(f->next, TAG_HANDLE);
10544 snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10545 GF_PUTC_GLO(f->next, (int) strlen(buf));
10546 for(i = 0; buf[i]; i++)
10547 GF_PUTC_GLO(f->next, buf[i]);
10550 return 0;
10554 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10555 unsigned char **opp, unsigned char **eobp)
10557 int j, i;
10558 COLOR_PAIR *col = NULL;
10559 char *prefix = NULL, *last_prefix = NULL;
10561 if(ps_global->VAR_QUOTE_REPLACE_STRING){
10562 get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10563 if(!prefix && last_prefix){
10564 prefix = last_prefix;
10565 last_prefix = NULL;
10569 for(j = 0; j < WRAP_FL_QD(f); j++){
10570 if(WRAP_USE_CLR(f)){
10571 if((j % 3) == 0
10572 && ps_global->VAR_QUOTE1_FORE_COLOR
10573 && ps_global->VAR_QUOTE1_BACK_COLOR
10574 && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10575 ps_global->VAR_QUOTE1_BACK_COLOR))
10576 && pico_is_good_colorpair(col)){
10577 GF_COLOR_PUTC(f, col);
10579 else if((j % 3) == 1
10580 && ps_global->VAR_QUOTE2_FORE_COLOR
10581 && ps_global->VAR_QUOTE2_BACK_COLOR
10582 && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10583 ps_global->VAR_QUOTE2_BACK_COLOR))
10584 && pico_is_good_colorpair(col)){
10585 GF_COLOR_PUTC(f, col);
10587 else if((j % 3) == 2
10588 && ps_global->VAR_QUOTE3_FORE_COLOR
10589 && ps_global->VAR_QUOTE3_BACK_COLOR
10590 && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10591 ps_global->VAR_QUOTE3_BACK_COLOR))
10592 && pico_is_good_colorpair(col)){
10593 GF_COLOR_PUTC(f, col);
10595 if(col){
10596 free_color_pair(&col);
10597 col = NULL;
10601 if(!WRAP_LV_FLD(f)){
10602 if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10603 for(i = 0; prefix[i]; i++)
10604 GF_PUTC_GLO(f->next, prefix[i]);
10605 f->n += utf8_width(prefix);
10607 else if(ps_global->VAR_REPLY_STRING
10608 && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10609 || !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10610 GF_PUTC_GLO(f->next, '>');
10611 f->n += 1;
10613 else{
10614 GF_PUTC_GLO(f->next, '>');
10615 GF_PUTC_GLO(f->next, ' ');
10616 f->n += 2;
10619 else{
10620 GF_PUTC_GLO(f->next, '>');
10621 f->n += 1;
10624 if(j && WRAP_LV_FLD(f)){
10625 GF_PUTC_GLO(f->next, ' ');
10626 f->n++;
10628 else if(j && last_prefix){
10629 for(i = 0; last_prefix[i]; i++)
10630 GF_PUTC_GLO(f->next, last_prefix[i]);
10631 f->n += utf8_width(last_prefix);
10634 if(prefix)
10635 fs_give((void **)&prefix);
10636 if(last_prefix)
10637 fs_give((void **)&last_prefix);
10639 return 0;
10644 * function called from the outside to set
10645 * wrap filter's width option
10647 void *
10648 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10650 WRAP_S *wrap;
10652 /* NOTE: variables MUST be sanity checked before they get here */
10653 wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10654 memset(wrap, 0, sizeof(WRAP_S));
10655 wrap->wrap_col = width;
10656 wrap->wrap_max = width_max;
10657 wrap->indent = indent;
10658 wrap->margin_l = (margin) ? margin[0] : 0;
10659 wrap->margin_r = (margin) ? margin[1] : 0;
10660 wrap->tags = (GFW_HANDLES & flags) == GFW_HANDLES;
10661 wrap->on_comma = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10662 wrap->flowed = (GFW_FLOWED & flags) == GFW_FLOWED;
10663 wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10664 wrap->delsp = (GFW_DELSP & flags) == GFW_DELSP;
10665 wrap->use_color = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10666 wrap->hdr_color = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10667 wrap->for_compose = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10668 wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10670 return((void *) wrap);
10674 void *
10675 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10677 if(uh){
10678 memset(uh, 0, sizeof(URL_HILITE_S));
10679 uh->handlesp = handlesp;
10680 uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10683 return((void *) uh);
10687 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10688 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10689 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10691 typedef struct preflow_s {
10692 int quote_depth,
10693 quote_count,
10694 sig;
10695 } PREFLOW_S;
10698 * This would normally be handled in gf_wrap. If there is a possibility
10699 * that a url we want to recognize is cut in half by a soft newline we
10700 * want to fix that up by putting the halves back together. We do that
10701 * by deleting the soft newline and putting it all in one line. It will
10702 * still get wrapped later in gf_wrap. It isn't pretty with all the
10703 * goto's, but whatta ya gonna do?
10705 void
10706 gf_preflow(FILTER_S *f, int flg)
10708 GF_INIT(f, f->next);
10710 if(flg == GF_DATA){
10711 register unsigned char c;
10712 register int state = f->f1;
10713 register int pending = f->f2;
10715 while(GF_GETC(f, c)){
10716 switch(state){
10717 case DFL:
10718 default_case:
10719 switch(c){
10720 case ' ':
10721 state = WSPACE;
10722 break;
10724 case '\015':
10725 state = CCR;
10726 break;
10728 default:
10729 GF_PUTC(f->next, c);
10730 break;
10733 break;
10735 case CCR:
10736 switch(c){
10737 case '\012':
10738 pending = 1;
10739 state = BOL;
10740 break;
10742 default:
10743 GF_PUTC(f->next, '\012');
10744 state = DFL;
10745 goto default_case;
10746 break;
10749 break;
10751 case WSPACE:
10752 switch(c){
10753 case '\015':
10754 state = SPACECR;
10755 break;
10757 default:
10758 GF_PUTC(f->next, ' ');
10759 state = DFL;
10760 goto default_case;
10761 break;
10764 break;
10766 case SPACECR:
10767 switch(c){
10768 case '\012':
10769 pending = 2;
10770 state = BOL;
10771 break;
10773 default:
10774 GF_PUTC(f->next, ' ');
10775 GF_PUTC(f->next, '\012');
10776 state = DFL;
10777 goto default_case;
10778 break;
10781 break;
10783 case BOL:
10784 PF_QC(f) = 0;
10785 if(c == '>'){ /* count quote level */
10786 PF_QC(f)++;
10787 state = FL_QLEV;
10789 else{
10790 done_counting_quotes:
10791 if(c == ' '){ /* eat stuffed space */
10792 state = FL_STF;
10793 break;
10796 done_with_stuffed_space:
10797 if(c == '-'){ /* look for signature */
10798 PF_SIG(f) = 1;
10799 state = FL_SIG;
10800 break;
10803 done_with_sig:
10804 if(pending == 2){
10805 if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10806 /* delete pending */
10808 PF_QD(f) = PF_QC(f);
10810 /* suppress quotes, too */
10811 PF_QC(f) = 0;
10813 else{
10815 * This should have been a hard new line
10816 * instead so leave out the trailing space.
10818 GF_PUTC(f->next, '\015');
10819 GF_PUTC(f->next, '\012');
10821 PF_QD(f) = PF_QC(f);
10824 else if(pending == 1){
10825 GF_PUTC(f->next, '\015');
10826 GF_PUTC(f->next, '\012');
10827 PF_QD(f) = PF_QC(f);
10829 else{
10830 PF_QD(f) = PF_QC(f);
10833 pending = 0;
10834 state = DFL;
10835 while(PF_QC(f)-- > 0)
10836 GF_PUTC(f->next, '>');
10838 switch(PF_SIG(f)){
10839 case 0:
10840 default:
10841 break;
10843 case 1:
10844 GF_PUTC(f->next, '-');
10845 break;
10847 case 2:
10848 GF_PUTC(f->next, '-');
10849 GF_PUTC(f->next, '-');
10850 break;
10852 case 3:
10853 case 4:
10854 GF_PUTC(f->next, '-');
10855 GF_PUTC(f->next, '-');
10856 GF_PUTC(f->next, ' ');
10857 break;
10860 PF_SIG(f) = 0;
10861 goto default_case; /* to handle c */
10864 break;
10866 case FL_QLEV: /* count quote level */
10867 if(c == '>')
10868 PF_QC(f)++;
10869 else
10870 goto done_counting_quotes;
10872 break;
10874 case FL_STF: /* eat stuffed space */
10875 goto done_with_stuffed_space;
10876 break;
10878 case FL_SIG: /* deal with sig indicator */
10879 switch(PF_SIG(f)){
10880 case 1: /* saw '-' */
10881 if(c == '-')
10882 PF_SIG(f) = 2;
10883 else
10884 goto done_with_sig;
10886 break;
10888 case 2: /* saw '--' */
10889 if(c == ' ')
10890 PF_SIG(f) = 3;
10891 else
10892 goto done_with_sig;
10894 break;
10896 case 3: /* saw '-- ' */
10897 if(c == '\015')
10898 PF_SIG(f) = 4; /* it really is a sig line */
10900 goto done_with_sig;
10901 break;
10904 break;
10908 f->f1 = state;
10909 f->f2 = pending;
10910 GF_END(f, f->next);
10912 else if(flg == GF_EOD){
10913 fs_give((void **) &f->opt);
10914 (void) GF_FLUSH(f->next);
10915 (*f->next->f)(f->next, GF_EOD);
10917 else if(flg == GF_RESET){
10918 PREFLOW_S *pf;
10920 pf = (PREFLOW_S *) fs_get(sizeof(*pf));
10921 memset(pf, 0, sizeof(*pf));
10922 f->opt = (void *) pf;
10924 f->f1 = BOL; /* state */
10925 f->f2 = 0; /* pending */
10926 PF_QD(f) = 0; /* quote depth */
10927 PF_QC(f) = 0; /* quote count */
10928 PF_SIG(f) = 0; /* sig level */
10936 * LINE PREFIX FILTER - insert given text at beginning of each
10937 * line
10941 #define GF_PREFIX_WRITE(s) { \
10942 register char *p; \
10943 if((p = (s)) != NULL) \
10944 while(*p) \
10945 GF_PUTC(f->next, *p++); \
10950 * the simple filter, prepends each line with the requested prefix.
10951 * if prefix is null, does nothing, and as with all filters, assumes
10952 * NVT end of lines.
10954 void
10955 gf_prefix(FILTER_S *f, int flg)
10957 GF_INIT(f, f->next);
10959 if(flg == GF_DATA){
10960 register unsigned char c;
10961 register int state = f->f1;
10962 register int first = f->f2;
10964 while(GF_GETC(f, c)){
10966 if(first){ /* write initial prefix!! */
10967 first = 0; /* but just once */
10968 GF_PREFIX_WRITE((char *) f->opt);
10972 * State == 0 is the starting state and the usual state.
10973 * State == 1 means we saw a CR and haven't acted on it yet.
10974 * We are looking for a LF to get the CRLF end of line.
10975 * However, we also treat bare CR and bare LF as if they
10976 * were CRLF sequences. What else could it mean in text?
10977 * This filter is only used for text so that is probably
10978 * a reasonable interpretation of the bad input.
10980 if(c == '\015'){ /* CR */
10981 if(state){ /* Treat pending CR as endofline, */
10982 GF_PUTC(f->next, '\015'); /* and remain in saw-a-CR state. */
10983 GF_PUTC(f->next, '\012');
10984 GF_PREFIX_WRITE((char *) f->opt);
10986 else{
10987 state = 1;
10990 else if(c == '\012'){ /* LF */
10991 GF_PUTC(f->next, '\015'); /* Got either a CRLF or a bare LF, */
10992 GF_PUTC(f->next, '\012'); /* treat both as if a CRLF. */
10993 GF_PREFIX_WRITE((char *) f->opt);
10994 state = 0;
10996 else{ /* any other character */
10997 if(state){
10998 GF_PUTC(f->next, '\015'); /* Treat pending CR as endofline. */
10999 GF_PUTC(f->next, '\012');
11000 GF_PREFIX_WRITE((char *) f->opt);
11001 state = 0;
11004 GF_PUTC(f->next, c);
11008 f->f1 = state; /* save state for next chunk of data */
11009 f->f2 = first;
11010 GF_END(f, f->next);
11012 else if(flg == GF_EOD){
11013 (void) GF_FLUSH(f->next);
11014 (*f->next->f)(f->next, GF_EOD);
11016 else if(flg == GF_RESET){
11017 dprint((9, "-- gf_reset prefix\n"));
11018 f->f1 = 0;
11019 f->f2 = 1; /* nothing written yet */
11025 * function called from the outside to set
11026 * prefix filter's prefix string
11028 void *
11029 gf_prefix_opt(char *prefix)
11031 return((void *) prefix);
11036 * LINE TEST FILTER - accumulate lines and offer each to the provided
11037 * test function.
11040 typedef struct _linetest_s {
11041 linetest_t f;
11042 void *local;
11043 } LINETEST_S;
11046 /* accumulator growth increment */
11047 #define LINE_TEST_BLOCK 1024
11049 #define GF_LINE_TEST_EOB(f) \
11050 ((f)->line + ((f)->f2 - 1))
11052 #define GF_LINE_TEST_ADD(f, c) \
11054 if(p >= eobuf){ \
11055 f->f2 += LINE_TEST_BLOCK; \
11056 fs_resize((void **)&f->line, \
11057 (size_t) f->f2 * sizeof(char)); \
11058 eobuf = GF_LINE_TEST_EOB(f); \
11059 p = eobuf - LINE_TEST_BLOCK; \
11061 *p++ = c; \
11064 #define GF_LINE_TEST_TEST(F, D) \
11066 unsigned char c; \
11067 register char *cp; \
11068 register int l; \
11069 LT_INS_S *ins = NULL, *insp; \
11070 *p = '\0'; \
11071 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11072 (F)->line, &ins, \
11073 ((LINETEST_S *) (F)->opt)->local); \
11074 if((D) < 2){ \
11075 if((D) < 0){ \
11076 if((F)->line) \
11077 fs_give((void **) &(F)->line); \
11078 if((F)->opt) \
11079 fs_give((void **) &(F)->opt); \
11080 gf_error(_("translation error")); \
11081 /* NO RETURN */ \
11083 for(insp = ins, cp = (F)->line; cp < p; ){ \
11084 if(insp && cp == insp->where){ \
11085 if(insp->len > 0){ \
11086 for(l = 0; l < insp->len; l++){ \
11087 c = (unsigned char) insp->text[l]; \
11088 GF_PUTC((F)->next, c); \
11090 insp = insp->next; \
11091 continue; \
11092 } else if(insp->len < 0){ \
11093 cp -= insp->len; \
11094 insp = insp->next; \
11095 continue; \
11098 GF_PUTC((F)->next, *cp); \
11099 cp++; \
11101 while(insp){ \
11102 for(l = 0; l < insp->len; l++){ \
11103 c = (unsigned char) insp->text[l]; \
11104 GF_PUTC((F)->next, c); \
11106 insp = insp->next; \
11108 gf_line_test_free_ins(&ins); \
11115 * this simple filter accumulates characters until a newline, offers it
11116 * to the provided test function, and then passes it on. It assumes
11117 * NVT EOLs.
11119 void
11120 gf_line_test(FILTER_S *f, int flg)
11122 register char *p = f->linep;
11123 register char *eobuf = GF_LINE_TEST_EOB(f);
11124 GF_INIT(f, f->next);
11126 if(flg == GF_DATA){
11127 register unsigned char c;
11128 register int state = f->f1;
11130 while(GF_GETC(f, c)){
11132 if(state){
11133 state = 0;
11134 if(c == '\012'){
11135 int done;
11137 GF_LINE_TEST_TEST(f, done);
11139 p = (f)->line;
11141 if(done == 2) /* skip this line! */
11142 continue;
11144 GF_PUTC(f->next, '\015');
11145 GF_PUTC(f->next, '\012');
11147 * if the line tester returns TRUE, it's
11148 * telling us its seen enough and doesn't
11149 * want to see any more. Remove ourself
11150 * from the pipeline...
11152 if(done){
11153 if(gf_master == f){
11154 gf_master = f->next;
11156 else{
11157 FILTER_S *fprev;
11159 for(fprev = gf_master;
11160 fprev && fprev->next != f;
11161 fprev = fprev->next)
11164 if(fprev) /* wha??? */
11165 fprev->next = f->next;
11166 else
11167 continue;
11170 while(GF_GETC(f, c)) /* pass input */
11171 GF_PUTC(f->next, c);
11173 (void) GF_FLUSH(f->next); /* and drain queue */
11174 fs_give((void **)&f->line);
11175 fs_give((void **)&f); /* wax our data */
11176 return;
11178 else
11179 continue;
11181 else /* add CR to buffer */
11182 GF_LINE_TEST_ADD(f, '\015');
11183 } /* fall thru to handle 'c' */
11185 if(c == '\015') /* newline? */
11186 state = 1;
11187 else
11188 GF_LINE_TEST_ADD(f, c);
11191 f->f1 = state;
11192 GF_END(f, f->next);
11194 else if(flg == GF_EOD){
11195 int i;
11197 GF_LINE_TEST_TEST(f, i); /* examine remaining data */
11198 fs_give((void **) &f->line); /* free line buffer */
11199 fs_give((void **) &f->opt); /* free test struct */
11200 (void) GF_FLUSH(f->next);
11201 (*f->next->f)(f->next, GF_EOD);
11203 else if(flg == GF_RESET){
11204 dprint((9, "-- gf_reset line_test\n"));
11205 f->f1 = 0; /* state */
11206 f->n = 0L; /* line number */
11207 f->f2 = LINE_TEST_BLOCK; /* size of alloc'd line */
11208 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11211 f->linep = p;
11216 * function called from the outside to operate on accumulated line.
11218 void *
11219 gf_line_test_opt(linetest_t test_f, void *local)
11221 LINETEST_S *ltp;
11223 ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11224 memset(ltp, 0, sizeof(LINETEST_S));
11225 ltp->f = test_f;
11226 ltp->local = local;
11227 return((void *) ltp);
11232 LT_INS_S **
11233 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11235 *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11236 if(((*ins)->len = n) > 0)
11237 strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11238 else
11239 (*ins)->text = NULL;
11241 (*ins)->where = p;
11242 (*ins)->next = NULL;
11243 return(&(*ins)->next);
11247 void
11248 gf_line_test_free_ins(LT_INS_S **ins)
11250 if(ins && *ins){
11251 if((*ins)->next)
11252 gf_line_test_free_ins(&(*ins)->next);
11254 if((*ins)->text)
11255 fs_give((void **) &(*ins)->text);
11257 fs_give((void **) ins);
11263 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11264 * with editorial comment
11267 typedef struct _preped_s {
11268 prepedtest_t f;
11269 char *text;
11270 } PREPED_S;
11274 * gf_prepend_editorial - accumulate filtered text and prepend its
11275 * output with given text
11279 void
11280 gf_prepend_editorial(FILTER_S *f, int flg)
11282 GF_INIT(f, f->next);
11284 if(flg == GF_DATA){
11285 register unsigned char c;
11287 while(GF_GETC(f, c)){
11288 so_writec(c, (STORE_S *) f->data);
11291 GF_END(f, f->next);
11293 else if(flg == GF_EOD){
11294 unsigned char c;
11296 if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11297 char *p = ((PREPED_S *)(f)->opt)->text;
11299 for( ; p && *p; p++)
11300 GF_PUTC(f->next, *p);
11303 so_seek((STORE_S *) f->data, 0L, 0);
11304 while(so_readc(&c, (STORE_S *) f->data)){
11305 GF_PUTC(f->next, c);
11308 so_give((STORE_S **) &f->data);
11309 fs_give((void **) &f->opt);
11310 (void) GF_FLUSH(f->next);
11311 (*f->next->f)(f->next, GF_EOD);
11313 else if(flg == GF_RESET){
11314 dprint((9, "-- gf_reset line_test\n"));
11315 f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11321 * function called from the outside to setup prepending editorial
11322 * to output text
11324 void *
11325 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11327 PREPED_S *pep;
11329 pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11330 memset(pep, 0, sizeof(PREPED_S));
11331 pep->f = test_f;
11332 pep->text = text;
11333 return((void *) pep);
11338 * Network virtual terminal to local newline convention filter
11340 void
11341 gf_nvtnl_local(FILTER_S *f, int flg)
11343 GF_INIT(f, f->next);
11345 if(flg == GF_DATA){
11346 register unsigned char c;
11347 register int state = f->f1;
11349 while(GF_GETC(f, c)){
11350 if(state){
11351 state = 0;
11352 if(c == '\012'){
11353 GF_PUTC(f->next, '\012');
11354 continue;
11356 else
11357 GF_PUTC(f->next, '\015');
11358 /* fall thru to deal with 'c' */
11361 if(c == '\015')
11362 state = 1;
11363 else
11364 GF_PUTC(f->next, c);
11367 f->f1 = state;
11368 GF_END(f, f->next);
11370 else if(flg == GF_EOD){
11371 (void) GF_FLUSH(f->next);
11372 (*f->next->f)(f->next, GF_EOD);
11374 else if(flg == GF_RESET){
11375 dprint((9, "-- gf_reset nvtnl_local\n"));
11376 f->f1 = 0;
11382 * local to network newline convention filter
11384 void
11385 gf_local_nvtnl(FILTER_S *f, int flg)
11387 GF_INIT(f, f->next);
11389 if(flg == GF_DATA){
11390 register unsigned char c;
11392 while(GF_GETC(f, c)){
11393 if(c == '\012'){
11394 GF_PUTC(f->next, '\015');
11395 GF_PUTC(f->next, '\012');
11397 else if(c != '\015') /* do not copy isolated \015 into source */
11398 GF_PUTC(f->next, c);
11401 GF_END(f, f->next);
11403 else if(flg == GF_EOD){
11404 (void) GF_FLUSH(f->next);
11405 (*f->next->f)(f->next, GF_EOD);
11407 else if(GF_RESET){
11408 dprint((9, "-- gf_reset local_nvtnl\n"));
11409 /* no op */
11414 void
11415 free_filter_module_globals(void)
11417 FILTER_S *flt, *fltn = gf_master;
11419 while((flt = fltn) != NULL){ /* free list of old filters */
11420 fltn = flt->next;
11421 fs_give((void **)&flt);