* Create help for explaining how encrypted password file support
[alpine.git] / pith / filter.c
blobc60416b158a88336b7f2a3c40f0f2dcb98f3c2a6
1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
5 /*
6 * ========================================================================
7 * Copyright 2006-2008 University of Washington
8 * Copyright 2013-2014 Eduardo Chappa
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
20 filter.c
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
30 call another filter).
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
38 TODO:
39 reasonable error handling
41 ====*/
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
65 * Internal prototypes
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
72 int gf_fwritec(int);
73 int gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
80 int gf_pwritec(int);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
85 int gf_swritec(int);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S *, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S *, int);
97 * System specific options
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S *gf_master = NULL;
115 static gf_io_t last_filter;
116 static char *gf_error_string;
117 static long gf_byte_count;
118 static jmp_buf gf_error_state;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
131 #define DFL 0
132 #define EQUAL 1
133 #define HEX 2
134 #define WSPACE 3
135 #define CCR 4
136 #define CLF 5
137 #define TOKEN 6
138 #define TAG 7
139 #define HANDLE 8
140 #define HDATA 9
141 #define ESC 10
142 #define ESCDOL 11
143 #define ESCPAR 12
144 #define EUC 13
145 #define BOL 14
146 #define FL_QLEV 15
147 #define FL_STF 16
148 #define FL_SIG 17
149 #define STOP_DECODING 18
150 #define SPACECR 19
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
207 char *p; \
208 char cb[RGBLEN+1]; \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
213 p = cb; \
214 for(; *p; p++) \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
220 p = cb; \
221 for(; *p; p++) \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
232 * functions
234 static struct gf_io_struct {
235 FILE *file;
236 PIPE_S *pipe;
237 char *txtp;
238 unsigned long n;
239 int flags;
240 CBUF_S cb;
241 } gf_in, gf_out;
243 #define GF_SO_STACK struct gf_so_stack
244 static GF_SO_STACK {
245 STORE_S *so;
246 GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc)
261 return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 gf_so_out->so->src == ExternalText);
268 * setup to use and return a pointer to the generic
269 * getc function
271 void
272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
274 gf_in.n = len;
275 gf_in.flags = flags;
276 gf_in.cb.cbuf[0] = '\0';
277 gf_in.cb.cbufp = gf_in.cb.cbuf;
278 gf_in.cb.cbufend = gf_in.cb.cbuf;
280 if(src == FileStar){
281 gf_in.file = (FILE *)txt;
282 fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 : gf_freadc;
286 #else /* UNIX */
287 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 : gf_freadc;
289 #endif /* UNIX */
291 else if(src == PipeStar){
292 gf_in.pipe = (PIPE_S *)txt;
293 *gc = gf_preadc;
294 *gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 : gf_preadc;
297 else{
298 gf_in.txtp = (char *)txt;
299 *gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 : gf_sreadc;
306 * setup to use and return a pointer to the generic
307 * putc function
309 void
310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
312 gf_out.n = len;
313 gf_out.flags = flags;
314 gf_out.cb.cbuf[0] = '\0';
315 gf_out.cb.cbufp = gf_out.cb.cbuf;
316 gf_out.cb.cbufend = gf_out.cb.cbuf;
318 if(src == FileStar){
319 gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 *pc = gf_fwritec;
322 #else /* UNIX */
323 *pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 : gf_fwritec;
325 #endif /* UNIX */
327 else if(src == PipeStar){
328 gf_out.pipe = (PIPE_S *)txt;
329 *pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 : gf_pwritec;
332 else{
333 gf_out.txtp = (char *)txt;
334 *pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 : gf_swritec;
341 * setup to use and return a pointer to the generic
342 * getc function
344 void
345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
347 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
349 sp->so = so;
350 sp->next = gf_so_in;
351 gf_so_in = sp;
352 *gc = gf_so_readc;
356 void
357 gf_clear_so_readc(STORE_S *so)
359 GF_SO_STACK *sp;
361 if((sp = gf_so_in) != NULL){
362 if(so == sp->so){
363 gf_so_in = gf_so_in->next;
364 fs_give((void **) &sp);
366 else
367 panic("Programmer botch: Can't unstack store readc");
369 else
370 panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
376 * putc function
378 void
379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
381 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
383 sp->so = so;
384 sp->next = gf_so_out;
385 gf_so_out = sp;
386 *pc = gf_so_writec;
390 void
391 gf_clear_so_writec(STORE_S *so)
393 GF_SO_STACK *sp;
395 if((sp = gf_so_out) != NULL){
396 if(so == sp->so){
397 gf_so_out = gf_so_out->next;
398 fs_give((void **) &sp);
400 else
401 panic("Programmer botch: Can't unstack store writec");
403 else
404 panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
412 gf_so_writec(int c)
414 return(so_writec(c, gf_so_out->so));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c)
424 return(so_readc(c, gf_so_in->so));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c)
433 int rv = 0;
435 do {
436 errno = 0;
437 clearerr(gf_in.file);
438 rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439 } while(!rv && ferror(gf_in.file) && errno == EINTR);
441 return(rv);
446 gf_freadc_locale(unsigned char *c)
448 return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c, void *extraarg)
458 FILE *file;
459 int rv = 0;
461 file = (FILE *) extraarg;
463 do {
464 errno = 0;
465 clearerr(file);
466 rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467 } while(!rv && ferror(file) && errno == EINTR);
469 return(rv);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
479 gf_fwritec(int c)
481 unsigned char ch = (unsigned char)c;
482 int rv = 0;
485 rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486 while(!rv && ferror(gf_out.file) && errno == EINTR);
488 return(rv);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c)
499 int rv = 1;
500 int i, outchars;
501 unsigned char obuf[MAX(MB_LEN_MAX,32)];
503 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 for(i = 0; i < outchars; i++)
505 if(gf_fwritec(obuf[i]) != 1){
506 rv = 0;
507 break;
511 return(rv);
515 #ifdef _WINDOWS
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c)
527 int rv = 0;
528 UCS ucs;
530 /* already got some from previous call? */
531 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 *c = *gf_in.cb.cbufp;
533 gf_in.cb.cbufp++;
534 rv++;
535 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 gf_in.cb.cbufend = gf_in.cb.cbuf;
537 gf_in.cb.cbufp = gf_in.cb.cbuf;
540 return(rv);
543 if(gf_in.file){
544 /* windows only so second arg is ignored */
545 ucs = read_a_wide_char(gf_in.file, NULL);
546 rv = (ucs == CCONV_EOF) ? 0 : 1;
549 if(rv){
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 gf_in.cb.cbufp = gf_in.cb.cbuf;
556 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 *c = *gf_in.cb.cbufp;
558 gf_in.cb.cbufp++;
559 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 gf_in.cb.cbufend = gf_in.cb.cbuf;
561 gf_in.cb.cbufp = gf_in.cb.cbuf;
564 else
565 *c = '?';
568 return(rv);
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c)
576 return(pipe_readc(c, gf_in.pipe));
581 gf_preadc_locale(unsigned char *c)
583 return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c, void *extraarg)
593 PIPE_S *pipe;
595 pipe = (PIPE_S *) extraarg;
597 return(pipe_readc(c, pipe));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
607 gf_pwritec(int c)
609 return(pipe_writec(c, gf_out.pipe));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c)
620 int rv = 1;
621 int i, outchars;
622 unsigned char obuf[MAX(MB_LEN_MAX,32)];
624 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 for(i = 0; i < outchars; i++)
626 if(gf_pwritec(obuf[i]) != 1){
627 rv = 0;
628 break;
632 return(rv);
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c)
641 return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
646 gf_sreadc_locale(unsigned char *c)
648 return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
669 gf_swritec(int c)
671 return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c)
682 int rv = 1;
683 int i, outchars;
684 unsigned char obuf[MAX(MB_LEN_MAX,32)];
686 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 for(i = 0; i < outchars; i++)
688 if(gf_swritec(obuf[i]) != 1){
689 rv = 0;
690 break;
694 return(rv);
699 * output the given string with the given function
702 gf_puts(register char *s, gf_io_t pc)
704 while(*s != '\0')
705 if(!(*pc)((unsigned char)*s++))
706 return(0); /* ERROR putting char ! */
708 return(1);
713 * output the given string with the given function
716 gf_nputs(register char *s, long int n, gf_io_t pc)
718 while(n--)
719 if(!(*pc)((unsigned char)*s++))
720 return(0); /* ERROR putting char ! */
722 return(1);
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c,
743 int (*get_a_char)(unsigned char *, void *),
744 void *extraarg,
745 CBUF_S *cb)
747 unsigned long octets_so_far = 0, remaining_octets;
748 unsigned char *inputp;
749 unsigned char ch;
750 UCS ucs;
751 unsigned char inputbuf[20];
752 int rv = 0;
753 int got_one = 0;
755 /* already got some from previous call? */
756 if(cb->cbufend > cb->cbuf){
757 *c = *cb->cbufp;
758 cb->cbufp++;
759 rv++;
760 if(cb->cbufp >= cb->cbufend){
761 cb->cbufend = cb->cbuf;
762 cb->cbufp = cb->cbuf;
765 return(rv);
768 memset(inputbuf, 0, sizeof(inputbuf));
769 if((*get_a_char)(&ch, extraarg) == 0)
770 return(0);
772 inputbuf[octets_so_far++] = ch;
774 while(!got_one){
775 remaining_octets = octets_so_far;
776 inputp = inputbuf;
777 ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 switch(ucs){
779 case CCONV_BADCHAR:
780 return(rv);
782 case CCONV_NEEDMORE:
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far >= sizeof(inputbuf))
790 return(rv);
792 if((*get_a_char)(&ch, extraarg) == 0)
793 return(rv);
795 inputbuf[octets_so_far++] = ch;
796 break;
798 default:
799 /* got a good UCS-4 character */
800 got_one++;
801 break;
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
809 rv++;
810 cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811 cb->cbufp = cb->cbuf;
812 if(cb->cbufend > cb->cbuf){
813 *c = *cb->cbufp;
814 cb->cbufp++;
815 if(cb->cbufp >= cb->cbufend){
816 cb->cbufend = cb->cbuf;
817 cb->cbufp = cb->cbuf;
820 else
821 *c = '?';
823 return(rv);
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
834 void
835 gf_filter_init(void)
837 FILTER_S *flt, *fltn = gf_master;
839 while((flt = fltn) != NULL){ /* free list of old filters */
840 fltn = flt->next;
841 fs_give((void **)&flt);
844 gf_master = NULL;
845 gf_error_string = NULL; /* clear previous errors */
846 gf_byte_count = 0L; /* reset counter */
852 * link the given filter into the filter chain
854 void
855 gf_link_filter(filter_t f, void *data)
857 FILTER_S *new, *tail;
859 #ifdef CRLF_NEWLINES
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865 return;
866 #endif
868 new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869 memset(new, 0, sizeof(FILTER_S));
871 new->f = f; /* set the function pointer */
872 new->opt = data; /* set any optional parameter data */
873 (*f)(new, GF_RESET); /* have it setup initial state */
875 if((tail = gf_master) != NULL){ /* or add it to end of existing */
876 while(tail->next) /* list */
877 tail = tail->next;
879 tail->next = new;
881 else /* attach new struct to list */
882 gf_master = new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
890 void
891 gf_terminal(FILTER_S *f, int flg)
893 if(flg == GF_DATA){
894 GF_INIT(f, f);
896 while(op < eob)
897 if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 gf_error(errno ? error_description(errno) : "Error writing pipe");
900 GF_CH_RESET(f);
902 else if(flg == GF_RESET)
903 errno = 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
911 void
912 gf_set_terminal(gf_io_t f) /* function to set generic filter */
915 last_filter = f;
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
923 void
924 gf_error(char *s)
926 /* let the user know the error passed in s */
927 gf_error_string = s;
928 longjmp(gf_error_state, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
938 char *
939 gf_pipe(gf_io_t gc, gf_io_t pc)
940 /* how to get a character */
942 unsigned char c;
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string ? gf_error_string : "NULL"));
952 return(gf_error_string); /* */
956 * set and link in the terminal filter
958 gf_set_terminal(pc);
959 gf_link_filter(gf_terminal, NULL);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
968 * objects.
971 GF_INIT(gf_master, gf_master);
973 while((*gc)(&c)){
974 gf_byte_count++;
976 #ifdef _WINDOWS
977 if(!(gf_byte_count & 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
980 * mechinism.
982 mswin_yield ();
983 #endif
985 GF_PUTC(gf_master, c & 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master);
993 (*gf_master->f)(gf_master, GF_EOD);
996 dprint((4, "done.\n"));
997 return(NULL); /* everything went OK */
1002 * return the number of bytes piped so far
1004 long
1005 gf_bytes_piped(void)
1007 return(gf_byte_count);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1022 char *
1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 void (*pipecb_f)(PIPE_S *, int, void *))
1027 unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028 int flags, outchars, i;
1029 char *errstr = NULL, buf[MAILTMPLEN];
1030 PIPE_S *fpipe;
1031 CBUF_S cb;
1032 #ifdef NON_BLOCKING_IO
1033 int n;
1034 #endif
1036 dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1038 gf_filter_init();
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045 gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1047 for( ; aux_filters && aux_filters->filter; aux_filters++)
1048 gf_link_filter(aux_filters->filter, aux_filters->data);
1050 gf_set_terminal(pc);
1051 gf_link_filter(gf_terminal, NULL);
1053 cb.cbuf[0] = '\0';
1054 cb.cbufp = cb.cbuf;
1055 cb.cbufend = cb.cbuf;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so, 0L, 0);
1061 flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 | (silent ? PIPE_SILENT : 0)
1063 | (!disable_reset ? PIPE_RESET : 0);
1065 if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 errstr = "Can't set up non-blocking IO";
1072 if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 || fputc('\n', fpipe->out.f) == EOF))
1074 errstr = error_description(errno);
1076 while(!errstr){
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 if(!so_readc(&c, source_so)){
1082 fclose(fpipe->out.f);
1083 fpipe->out.f = NULL;
1085 else{
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 for(i = 0; i < outchars && !errstr; i++)
1093 if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 errstr = error_description(errno);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1102 errno = 0;
1103 clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 errstr = gf_filter_puts(buf);
1108 /* then fgets failed! */
1109 if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 if(feof(fpipe->in.f)) /* nothing else interesting! */
1111 break;
1112 else if(ferror(fpipe->in.f)) /* bummer. */
1113 errstr = error_description(errno);
1115 else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 clearerr(fpipe->in.f);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 || pipe_putc('\n', fpipe) == EOF))
1123 errstr = error_description(errno);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr && so_readc(&c, source_so))
1130 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 for(i = 0; i < outchars && !errstr; i++)
1132 if(pipe_putc(obuf[i], fpipe) == EOF)
1133 errstr = error_description(errno);
1135 if(pipe_close_write(fpipe))
1136 errstr = _("Pipe command returned error.");
1138 while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 errstr = gf_filter_puts(buf);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 errstr = _("Pipe command returned error.");
1146 gf_filter_eod();
1148 else
1149 errstr = _("Error setting up pipe command.");
1151 return(errstr);
1156 * gf_filter_puts - write the given string down the filter's pipe
1158 char *
1159 gf_filter_puts(register char *s)
1161 GF_INIT(gf_master, gf_master);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string ? gf_error_string : "NULL"));
1169 return(gf_error_string);
1172 while(*s)
1173 GF_PUTC(gf_master, (*s++) & 0xff);
1175 GF_END(gf_master, gf_master);
1176 return(NULL);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1184 void
1185 gf_filter_eod(void)
1187 GF_INIT(gf_master, gf_master);
1188 (void) GF_FLUSH(gf_master);
1189 (*gf_master->f)(gf_master, GF_EOD);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1211 * via a vector.)
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1217 * void
1218 * gf_xxx_filter(f, flg)
1219 * FILTER_S *f;
1220 * int flg;
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1274 void
1275 gf_binary_b64(FILTER_S *f, int flg)
1277 static char *v =
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f, f->next);
1281 if(flg == GF_DATA){
1282 register unsigned char c;
1283 register unsigned char t = f->t;
1284 register long n = f->n;
1286 while(GF_GETC(f, c)){
1288 switch(n++){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 case 42: case 45:
1292 GF_PUTC(f->next, v[c >> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t = c << 4; /* remember high 2 bits for next */
1295 break;
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 case 43:
1300 GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 t = c << 2;
1302 break;
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 case 44:
1307 GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 GF_PUTC(f->next, v[c & 0x3f]);
1309 break;
1312 if(n == 45){ /* start a new line? */
1313 GF_PUTC(f->next, '\015');
1314 GF_PUTC(f->next, '\012');
1315 n = 0L;
1319 f->n = n;
1320 f->t = t;
1321 GF_END(f, f->next);
1323 else if(flg == GF_EOD){ /* no more data */
1324 switch (f->n % 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1326 break;
1328 case 1:
1329 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 GF_PUTC(f->next, '='); /* byte 3 */
1331 GF_PUTC(f->next, '='); /* byte 4 */
1332 break;
1334 case 2:
1335 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 GF_PUTC(f->next, '='); /* byte 4 */
1337 break;
1340 /* end with CRLF */
1341 if(f->n){
1342 GF_PUTC(f->next, '\015');
1343 GF_PUTC(f->next, '\012');
1346 (void) GF_FLUSH(f->next);
1347 (*f->next->f)(f->next, GF_EOD);
1349 else if(flg == GF_RESET){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1351 f->n = 0L;
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1360 void
1361 gf_b64_binary(FILTER_S *f, int flg)
1363 static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f, f->next);
1373 if(flg == GF_DATA){
1374 register unsigned char c;
1375 register unsigned char t = f->t;
1376 register int n = (int) f->n;
1377 register int state = f->f1;
1379 while(GF_GETC(f, c)){
1381 if(state){
1382 state = 0;
1383 if (c != '=') {
1384 gf_error("Illegal '=' in base64 text");
1385 /* NO RETURN */
1389 /* in range, and a valid value? */
1390 if((c & ~0x7f) || (c = v[c]) > 63){
1391 if(c == 64){
1392 switch (n++) { /* check quantum position */
1393 case 2:
1394 state++; /* expect an equal as next char */
1395 break;
1397 case 3:
1398 n = 0L; /* restart quantum */
1399 break;
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1403 /* NO RETURN */
1407 else{
1408 switch (n++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1410 t = c << 2;
1411 break;
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f->next, (t|(c >> 4)));
1415 t = c << 4; /* byte 2: high 4 bits */
1416 break;
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f->next, (t|(c >> 2)));
1420 t = c << 6; /* byte 3: high 2 bits */
1421 break;
1423 case 3:
1424 GF_PUTC(f->next, t | c);
1425 n = 0L; /* reinitialize mechanism */
1426 break;
1431 f->f1 = state;
1432 f->t = t;
1433 f->n = n;
1434 GF_END(f, f->next);
1436 else if(flg == GF_EOD){
1437 (void) GF_FLUSH(f->next);
1438 (*f->next->f)(f->next, GF_EOD);
1440 else if(flg == GF_RESET){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f->n = 0L; /* quantum position */
1443 f->f1 = 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1461 if((c) == ' '){ \
1462 state = WSPACE; \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1468 state = EQUAL; \
1470 else \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1478 void
1479 gf_qp_8bit(FILTER_S *f, int flg)
1482 GF_INIT(f, f->next);
1484 if(flg == GF_DATA){
1485 register unsigned char c;
1486 register int state = f->f1;
1488 while(GF_GETC(f, c)){
1490 switch(state){
1491 case DFL : /* default case */
1492 default:
1493 GF_QP_DEFAULT(f, c);
1494 break;
1496 case CCR : /* non-significant space */
1497 state = DFL;
1498 if(c == '\012')
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f, c);
1502 break;
1504 case EQUAL :
1505 if(c == '\015'){ /* "=\015" is a soft EOL */
1506 state = CCR;
1507 break;
1510 if(c == '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f->next, '=');
1512 state = DFL;
1513 break;
1516 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1534 * below, as well.
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state = STOP_DECODING;
1540 GF_PUTC(f->next, '=');
1541 GF_PUTC(f->next, c);
1542 q_status_message(SM_ORDER,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 break;
1549 if (isdigit ((unsigned char)c))
1550 f->t = c - '0';
1551 else
1552 f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1554 f->f2 = c; /* store character in case we have to
1555 back out in !isxdigit below */
1557 state = HEX;
1558 break;
1560 case HEX :
1561 state = DFL;
1562 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1563 state = STOP_DECODING;
1564 GF_PUTC(f->next, '=');
1565 GF_PUTC(f->next, f->f2);
1566 GF_PUTC(f->next, c);
1567 q_status_message(SM_ORDER,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 break;
1574 if (isdigit((unsigned char)c))
1575 c -= '0';
1576 else
1577 c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f->next, c + (f->t << 4));
1580 break;
1582 case WSPACE :
1583 if(c == ' '){ /* toss it in with other spaces */
1584 if(f->linep - f->line < GF_MAXLINE)
1585 *(f->linep)++ = ' ';
1586 break;
1589 state = DFL;
1590 if(c == '\015'){ /* not our white space! */
1591 f->linep = f->line; /* reset buffer */
1592 GF_PUTC(f->next, '\015');
1593 break;
1596 /* the spaces are ours, write 'em */
1597 f->n = f->linep - f->line;
1598 while((f->n)--)
1599 GF_PUTC(f->next, ' ');
1601 GF_QP_DEFAULT(f, c); /* take care of 'c' in default way */
1602 break;
1604 case STOP_DECODING :
1605 GF_PUTC(f->next, c);
1606 break;
1610 f->f1 = state;
1611 GF_END(f, f->next);
1613 else if(flg == GF_EOD){
1614 fs_give((void **)&(f->line));
1615 (void) GF_FLUSH(f->next);
1616 (*f->next->f)(f->next, GF_EOD);
1618 else if(flg == GF_RESET){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1620 f->f1 = DFL;
1621 f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1648 GF_8BIT_WRAP(f); \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658 * if needed.
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1662 GF_8BIT_WRAP(f); \
1663 f->n = 1L; \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1667 f->n = 3; \
1669 else \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1678 state = WSPACE; \
1680 else if(c == '\015'){ \
1681 state = CCR; \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1687 else{ \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1695 void
1696 gf_8bit_qp(FILTER_S *f, int flg)
1698 short dummy_dots = 0, dummy_dmap = 1;
1699 GF_INIT(f, f->next);
1701 if(flg == GF_DATA){
1702 register unsigned char c;
1703 register int state = f->f1;
1705 while(GF_GETC(f, c)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1710 switch(state){
1711 case DFL : /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f, c);
1713 break;
1715 case CCR : /* true line break? */
1716 state = DFL;
1717 if(c == '\012'){
1718 GF_PUTC(f->next, '\015');
1719 GF_PUTC(f->next, '\012');
1720 f->n = 0L;
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f, '\015');
1724 GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1726 break;
1728 case WSPACE:
1729 state = DFL;
1730 if(c == '\015' || f->t){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f, ' ');
1732 f->t = 0; /* reset From flag */
1734 else
1735 GF_8BIT_PUT(f, ' ');
1737 GF_8BIT_DEFAULT(f, c); /* handle 'c' in the default way */
1738 break;
1742 f->f1 = state;
1743 GF_END(f, f->next);
1745 else if(flg == GF_EOD){
1746 switch(f->f1){
1747 case CCR :
1748 GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 break;
1751 case WSPACE :
1752 GF_8BIT_PUT_QUOTE(f, ' '); /* write the last space */
1753 break;
1756 (void) GF_FLUSH(f->next);
1757 (*f->next->f)(f->next, GF_EOD);
1759 else if(flg == GF_RESET){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f->f1 = DFL; /* state from last character */
1762 f->f2 = 1; /* state of "^NFrom " bitmap */
1763 f->t = 0;
1764 f->n = 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1772 void
1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1775 static unsigned char *conv_table = NULL;
1776 GF_INIT(f, f->next);
1778 if(flg == GF_DATA){
1779 register unsigned char c;
1781 while(GF_GETC(f, c)){
1782 GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1785 GF_END(f, f->next);
1787 else if(flg == GF_EOD){
1788 (void) GF_FLUSH(f->next);
1789 (*f->next->f)(f->next, GF_EOD);
1791 else if(flg == GF_RESET){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1799 typedef struct _utf8c_s {
1800 void *conv_table;
1801 int report_err;
1802 } UTF8C_S;
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1810 void
1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1813 static unsigned short *conv_table = NULL;
1814 static int report_err = 0;
1815 register int more = f->f2;
1816 register long u = f->n;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f, f->next);
1826 if(flg == GF_DATA){
1827 register unsigned char c;
1829 while(GF_GETC(f, c)){
1830 if(!conv_table){ /* can't do much if no conversion table */
1831 GF_PUTC(f->next, c);
1833 /* UTF-8 continuation? */
1834 else if((c > 0x7f) && (c < 0xc0)){
1835 if(more){
1836 u <<= 6; /* shift current value by 6 bits */
1837 u |= c & 0x3f;
1838 if (!--more){ /* last octet? */
1839 if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1845 c = '?';
1846 if(report_err){
1847 if(f->opt)
1848 fs_give((void **) &f->opt);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1854 /* NO RETURN */
1857 else{
1858 if(u > 0xff){
1859 c = (unsigned char) (u >> 8);
1860 GF_PUTC(f->next, c);
1863 c = (unsigned char) u & 0xff;
1866 GF_PUTC(f->next, c);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f->next, '?');
1873 else{
1874 if(more){ /* incomplete UTF-8 character */
1875 GF_PUTC(f->next, '?');
1876 more = 0;
1878 if(c < 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f->next, c);
1881 else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 u = c & 0x1f; /* first 5 bits of 12 */
1883 more = 1;
1885 else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 u = c & 0x0f; /* first 4 bits of 16 */
1887 more = 2;
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 u = c & 0x07; /* first 3 bits of 20.5 */
1892 more = 3;
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u = c & 0x03; /* first 2 bits of 26 */
1897 more = 4;
1899 else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u = c & 0x03; /* first 2 bits of 26 */
1901 more = 5;
1903 #endif
1904 else{ /* not in Unicode */
1905 GF_PUTC(f->next, '?');
1910 f->f2 = more;
1911 f->n = u;
1912 GF_END(f, f->next);
1914 else if(flg == GF_EOD){
1915 (void) GF_FLUSH(f->next);
1916 if(f->opt)
1917 fs_give((void **) &f->opt);
1919 (*f->next->f)(f->next, GF_EOD);
1921 else if(flg == GF_RESET){
1922 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1923 conv_table = ((UTF8C_S *) f->opt)->conv_table;
1924 report_err = ((UTF8C_S *) f->opt)->report_err;
1925 f->f2 = 0;
1926 f->n = 0L;
1931 void *
1932 gf_convert_utf8_charset_opt(void *table, int report_err)
1934 UTF8C_S *utf8c;
1936 utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1937 utf8c->conv_table = table;
1938 utf8c->report_err = report_err;
1939 return((void *) utf8c);
1944 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1946 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1947 * or to Shift-JIS (if PC-Pine).
1949 void
1950 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1952 register unsigned char c;
1953 register int state = f->f1;
1956 * f->t lit means we're in middle of decoding a sequence of characters.
1957 * f->f2 keeps track of first character of pair for Shift-JIS.
1958 * f->f1 is the state.
1961 GF_INIT(f, f->next);
1963 if(flg == GF_DATA){
1964 while(GF_GETC(f, c)){
1965 switch(state){
1966 case ESC: /* saw ESC */
1967 if(!f->t && c == '$')
1968 state = ESCDOL;
1969 else if(f->t && c == '(')
1970 state = ESCPAR;
1971 else{
1972 GF_PUTC(f->next, '\033');
1973 GF_PUTC(f->next, c);
1974 state = DFL;
1977 break;
1979 case ESCDOL: /* saw ESC $ */
1980 if(c == 'B' || c == '@'){
1981 state = EUC;
1982 f->t = 1; /* filtering into euc */
1983 f->f2 = -1; /* first character of pair */
1985 else{
1986 GF_PUTC(f->next, '\033');
1987 GF_PUTC(f->next, '$');
1988 GF_PUTC(f->next, c);
1989 state = DFL;
1992 break;
1994 case ESCPAR: /* saw ESC ( */
1995 if(c == 'B' || c == 'J' || c == 'H'){
1996 state = DFL;
1997 f->t = 0; /* done filtering */
1999 else{
2000 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2001 GF_PUTC(f->next, '('); /* escape sequences, which */
2002 GF_PUTC(f->next, c); /* this appears to be. */
2005 break;
2007 case EUC: /* filtering into euc */
2008 if(c == '\033')
2009 state = ESC;
2010 else{
2011 #ifdef _WINDOWS /* Shift-JIS */
2012 c &= 0x7f; /* 8-bit can't win */
2013 if (f->f2 >= 0){ /* second of a pair? */
2014 int rowOffset = (f->f2 < 95) ? 112 : 176;
2015 int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2016 : 126;
2018 GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2019 GF_PUTC(f->next, c + cellOffset);
2020 f->f2 = -1; /* restart */
2022 else if(c > 0x20 && c < 0x7f)
2023 f->f2 = c; /* first of pair */
2024 else{
2025 GF_PUTC(f->next, c); /* write CTL as itself */
2026 f->f2 = -1;
2028 #else /* EUC */
2029 GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2030 #endif
2033 break;
2035 case DFL:
2036 default:
2037 if(c == '\033')
2038 state = ESC;
2039 else
2040 GF_PUTC(f->next, c);
2042 break;
2046 f->f1 = state;
2047 GF_END(f, f->next);
2049 else if(flg == GF_EOD){
2050 switch(state){
2051 case ESC:
2052 GF_PUTC(f->next, '\033');
2053 break;
2055 case ESCDOL:
2056 GF_PUTC(f->next, '\033');
2057 GF_PUTC(f->next, '$');
2058 break;
2060 case ESCPAR:
2061 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2062 GF_PUTC(f->next, '('); /* escape sequences. */
2063 break;
2066 (void) GF_FLUSH(f->next);
2067 (*f->next->f)(f->next, GF_EOD);
2069 else if(flg == GF_RESET){
2070 dprint((9, "-- gf_reset jp_to_euc\n"));
2071 f->f1 = DFL; /* state */
2072 f->t = 0; /* not translating to euc */
2078 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2080 void
2081 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2083 #ifdef _WINDOWS
2084 gf_sjis_to_2022_jp(f, flg);
2085 #else
2086 gf_euc_to_2022_jp(f, flg);
2087 #endif
2091 void
2092 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2094 register unsigned char c;
2097 * f->t lit means we've sent the start esc seq but not the end seq.
2098 * f->f2 keeps track of first character of pair for Shift-JIS.
2101 GF_INIT(f, f->next);
2103 if(flg == GF_DATA){
2104 while(GF_GETC(f, c)){
2105 if(f->t){
2106 if(c & 0x80){
2107 GF_PUTC(f->next, c & 0x7f);
2109 else{
2110 GF_PUTC(f->next, '\033');
2111 GF_PUTC(f->next, '(');
2112 GF_PUTC(f->next, 'B');
2113 GF_PUTC(f->next, c);
2114 f->f2 = -1;
2115 f->t = 0;
2118 else{
2119 if(c & 0x80){
2120 GF_PUTC(f->next, '\033');
2121 GF_PUTC(f->next, '$');
2122 GF_PUTC(f->next, 'B');
2123 GF_PUTC(f->next, c & 0x7f);
2124 f->t = 1;
2126 else{
2127 GF_PUTC(f->next, c);
2132 GF_END(f, f->next);
2134 else if(flg == GF_EOD){
2135 if(f->t){
2136 GF_PUTC(f->next, '\033');
2137 GF_PUTC(f->next, '(');
2138 GF_PUTC(f->next, 'B');
2139 f->t = 0;
2140 f->f2 = -1;
2143 (void) GF_FLUSH(f->next);
2144 (*f->next->f)(f->next, GF_EOD);
2146 else if(flg == GF_RESET){
2147 dprint((9, "-- gf_reset euc_to_jp\n"));
2148 f->t = 0;
2149 f->f2 = -1;
2153 void
2154 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2156 register unsigned char c;
2159 * f->t lit means we've sent the start esc seq but not the end seq.
2160 * f->f2 keeps track of first character of pair for Shift-JIS.
2163 GF_INIT(f, f->next);
2165 if(flg == GF_DATA){
2166 while(GF_GETC(f, c)){
2167 if(f->t){
2168 if(f->f2 >= 0){ /* second of a pair? */
2169 int adjust = c < 159;
2170 int rowOffset = f->f2 < 160 ? 112 : 176;
2171 int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2173 GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2174 GF_PUTC(f->next, c - cellOffset);
2175 f->f2 = -1;
2177 else if(c & 0x80){
2178 f->f2 = c; /* remember first of pair */
2180 else{
2181 GF_PUTC(f->next, '\033');
2182 GF_PUTC(f->next, '(');
2183 GF_PUTC(f->next, 'B');
2184 GF_PUTC(f->next, c);
2185 f->f2 = -1;
2186 f->t = 0;
2189 else{
2190 if(c & 0x80){
2191 GF_PUTC(f->next, '\033');
2192 GF_PUTC(f->next, '$');
2193 GF_PUTC(f->next, 'B');
2194 f->f2 = c;
2195 f->t = 1;
2197 else{
2198 GF_PUTC(f->next, c);
2203 GF_END(f, f->next);
2205 else if(flg == GF_EOD){
2206 if(f->t){
2207 GF_PUTC(f->next, '\033');
2208 GF_PUTC(f->next, '(');
2209 GF_PUTC(f->next, 'B');
2210 f->t = 0;
2211 f->f2 = -1;
2214 (void) GF_FLUSH(f->next);
2215 (*f->next->f)(f->next, GF_EOD);
2217 else if(flg == GF_RESET){
2218 dprint((9, "-- gf_reset sjis_to_jp\n"));
2219 f->t = 0;
2220 f->f2 = -1;
2227 * Various charset to UTF-8 Translation filter
2231 * utf8 conversion options
2233 typedef struct _utf8_s {
2234 CHARSET *charset;
2235 unsigned long ucsc;
2236 } UTF8_S;
2238 #define UTF8_BLOCK 1024
2239 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2240 #define UTF8_ADD(f, c) \
2242 if(p >= eobuf){ \
2243 f->f2 += UTF8_BLOCK; \
2244 fs_resize((void **)&f->line, \
2245 (size_t) f->f2 * sizeof(char)); \
2246 eobuf = UTF8_EOB(f); \
2247 p = eobuf - UTF8_BLOCK; \
2249 *p++ = c; \
2251 #define GF_UTF8_FLUSH(f) { \
2252 register long n; \
2253 SIZEDTEXT intext, outtext; \
2254 intext.data = (unsigned char *) f->line; \
2255 intext.size = p - f->line; \
2256 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2257 if(!((UTF8_S *) f->opt)->charset){ \
2258 for(n = 0; n < intext.size; n++) \
2259 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2261 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2262 for(n = 0; n < outtext.size; n++) \
2263 GF_PUTC(f->next, outtext.data[n]); \
2264 if(outtext.data && intext.data != outtext.data) \
2265 fs_give((void **) &outtext.data); \
2267 else{ \
2268 for(n = 0; n < intext.size; n++) \
2269 GF_PUTC(f->next, '?'); \
2275 * gf_utf8 - text in specified charset to to UTF-8 filter
2276 * Process line-at-a-time rather than character
2277 * because ISO-2022-JP. Call utf8_text_cs by hand
2278 * rather than utf8_text to reduce the cost of
2279 * utf8_charset() for each line.
2281 void
2282 gf_utf8(FILTER_S *f, int flg)
2284 register char *p = f->linep;
2285 register char *eobuf = UTF8_EOB(f);
2286 GF_INIT(f, f->next);
2288 if(flg == GF_DATA){
2289 register int state = f->f1;
2290 register unsigned char c;
2292 while(GF_GETC(f, c)){
2294 switch(state){
2295 case CCR :
2296 state = DFL;
2297 if(c == '\012'){
2298 GF_UTF8_FLUSH(f);
2299 p = f->line;
2300 GF_PUTC(f->next, '\015');
2301 GF_PUTC(f->next, '\012');
2303 else{
2304 UTF8_ADD(f, '\015');
2305 UTF8_ADD(f, c);
2308 break;
2310 default :
2311 if(c == '\015'){
2312 state = CCR;
2314 else
2315 UTF8_ADD(f, c);
2319 f->f1 = state;
2320 GF_END(f, f->next);
2322 else if(flg == GF_EOD){
2324 if(p != f->line)
2325 GF_UTF8_FLUSH(f);
2327 fs_give((void **) &f->line);
2328 fs_give((void **) &f->opt);
2329 (void) GF_FLUSH(f->next);
2330 (*f->next->f)(f->next, GF_EOD);
2332 else if(GF_RESET){
2333 dprint((9, "-- gf_reset utf8\n"));
2334 f->f1 = DFL;
2335 f->f2 = UTF8_BLOCK; /* input buffer length */
2336 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2339 f->linep = p;
2343 void *
2344 gf_utf8_opt(char *charset)
2346 UTF8_S *utf8;
2348 utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2350 utf8->charset = (CHARSET *) utf8_charset(charset);
2353 * When we get 8-bit non-ascii characters but it is supposed to
2354 * be ascii we want it to turn into question marks, not
2355 * just behave as if it is UTF-8 which is what happens
2356 * with ascii because there is no translation table.
2357 * So we need to catch the ascii special case here.
2359 if(utf8->charset && utf8->charset->type == CT_ASCII)
2360 utf8->charset = NULL;
2362 return((void *) utf8);
2367 * RICHTEXT-TO-PLAINTEXT filter
2371 * option to be used by rich2plain (NOTE: if this filter is ever
2372 * used more than once in a pipe, all instances will have the same
2373 * option value)
2377 /*----------------------------------------------------------------------
2378 richtext to plaintext filter
2380 Args: f --
2381 flg --
2383 This basically removes all richtext formatting. A cute hack is used
2384 to get bold and underlining to work.
2385 Further work could be done to handle things like centering and right
2386 and left flush, but then it could no longer be done in place. This
2387 operates on text *with* CRLF's.
2389 WARNING: does not wrap lines!
2390 ----*/
2391 void
2392 gf_rich2plain(FILTER_S *f, int flg)
2394 static int rich_bold_on = 0, rich_uline_on = 0;
2396 /* BUG: qoute incoming \255 values */
2397 GF_INIT(f, f->next);
2399 if(flg == GF_DATA){
2400 register unsigned char c;
2401 register int state = f->f1;
2402 register int plain;
2404 plain = f->opt ? (*(int *) f->opt) : 0;
2406 while(GF_GETC(f, c)){
2408 switch(state){
2409 case TOKEN : /* collect a richtext token */
2410 if(c == '>'){ /* what should we do with it? */
2411 state = DFL; /* return to default next time */
2412 *(f->linep) = '\0'; /* cap off token */
2413 if(f->line[0] == 'l' && f->line[1] == 't'){
2414 GF_PUTC(f->next, '<'); /* literal '<' */
2416 else if(f->line[0] == 'n' && f->line[1] == 'l'){
2417 GF_PUTC(f->next, '\015');/* newline! */
2418 GF_PUTC(f->next, '\012');
2420 else if(!strcmp("comment", f->line)){
2421 (f->f2)++;
2423 else if(!strcmp("/comment", f->line)){
2424 f->f2 = 0;
2426 else if(!strcmp("/paragraph", f->line)) {
2427 GF_PUTC(f->next, '\r');
2428 GF_PUTC(f->next, '\n');
2429 GF_PUTC(f->next, '\r');
2430 GF_PUTC(f->next, '\n');
2432 else if(!plain /* gf_rich_plain */){
2433 if(!strcmp(f->line, "bold")) {
2434 GF_PUTC(f->next, TAG_EMBED);
2435 GF_PUTC(f->next, TAG_BOLDON);
2436 rich_bold_on = 1;
2437 } else if(!strcmp(f->line, "/bold")) {
2438 GF_PUTC(f->next, TAG_EMBED);
2439 GF_PUTC(f->next, TAG_BOLDOFF);
2440 rich_bold_on = 0;
2441 } else if(!strcmp(f->line, "italic")) {
2442 GF_PUTC(f->next, TAG_EMBED);
2443 GF_PUTC(f->next, TAG_ULINEON);
2444 rich_uline_on = 1;
2445 } else if(!strcmp(f->line, "/italic")) {
2446 GF_PUTC(f->next, TAG_EMBED);
2447 GF_PUTC(f->next, TAG_ULINEOFF);
2448 rich_uline_on = 0;
2449 } else if(!strcmp(f->line, "underline")) {
2450 GF_PUTC(f->next, TAG_EMBED);
2451 GF_PUTC(f->next, TAG_ULINEON);
2452 rich_uline_on = 1;
2453 } else if(!strcmp(f->line, "/underline")) {
2454 GF_PUTC(f->next, TAG_EMBED);
2455 GF_PUTC(f->next, TAG_ULINEOFF);
2456 rich_uline_on = 0;
2459 /* else we just ignore the token! */
2461 f->linep = f->line; /* reset token buffer */
2463 else{ /* add char to token */
2464 if(f->linep - f->line > 40){
2465 /* What? rfc1341 says 40 char tokens MAX! */
2466 fs_give((void **)&(f->line));
2467 gf_error("Richtext token over 40 characters");
2468 /* NO RETURN */
2471 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2473 break;
2475 case CCR :
2476 state = DFL; /* back to default next time */
2477 if(c == '\012'){ /* treat as single space? */
2478 GF_PUTC(f->next, ' ');
2479 break;
2481 /* fall thru to process c */
2483 case DFL :
2484 default:
2485 if(c == '<')
2486 state = TOKEN;
2487 else if(c == '\015')
2488 state = CCR;
2489 else if(!f->f2) /* not in comment! */
2490 GF_PUTC(f->next, c);
2492 break;
2496 f->f1 = state;
2497 GF_END(f, f->next);
2499 else if(flg == GF_EOD){
2500 if((f->f1 = (f->linep != f->line)) != 0){
2501 /* incomplete token!! */
2502 gf_error("Incomplete token in richtext");
2503 /* NO RETURN */
2506 if(rich_uline_on){
2507 GF_PUTC(f->next, TAG_EMBED);
2508 GF_PUTC(f->next, TAG_ULINEOFF);
2509 rich_uline_on = 0;
2511 if(rich_bold_on){
2512 GF_PUTC(f->next, TAG_EMBED);
2513 GF_PUTC(f->next, TAG_BOLDOFF);
2514 rich_bold_on = 0;
2517 fs_give((void **)&(f->line));
2518 (void) GF_FLUSH(f->next);
2519 (*f->next->f)(f->next, GF_EOD);
2521 else if(flg == GF_RESET){
2522 dprint((9, "-- gf_reset rich2plain\n"));
2523 f->f1 = DFL; /* state */
2524 f->f2 = 0; /* set means we're in a comment */
2525 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2531 * function called from the outside to set
2532 * richtext filter's options
2534 void *
2535 gf_rich2plain_opt(int *plain)
2537 return((void *) plain);
2543 * ENRICHED-TO-PLAIN text filter
2546 #define TEF_QUELL 0x01
2547 #define TEF_NOFILL 0x02
2551 /*----------------------------------------------------------------------
2552 enriched text to plain text filter (ala rfc1523)
2554 Args: f -- state and input data
2555 flg --
2557 This basically removes all enriched formatting. A cute hack is used
2558 to get bold and underlining to work.
2560 Further work could be done to handle things like centering and right
2561 and left flush, but then it could no longer be done in place. This
2562 operates on text *with* CRLF's.
2564 WARNING: does not wrap lines!
2565 ----*/
2566 void
2567 gf_enriched2plain(FILTER_S *f, int flg)
2569 static int enr_uline_on = 0, enr_bold_on = 0;
2571 /* BUG: qoute incoming \255 values */
2572 GF_INIT(f, f->next);
2574 if(flg == GF_DATA){
2575 register unsigned char c;
2576 register int state = f->f1;
2577 register int plain;
2579 plain = f->opt ? (*(int *) f->opt) : 0;
2581 while(GF_GETC(f, c)){
2583 switch(state){
2584 case TOKEN : /* collect a richtext token */
2585 if(c == '>'){ /* what should we do with it? */
2586 int off = *f->line == '/';
2587 char *token = f->line + (off ? 1 : 0);
2588 state = DFL;
2589 *f->linep = '\0';
2590 if(!strcmp("param", token)){
2591 if(off)
2592 f->f2 &= ~TEF_QUELL;
2593 else
2594 f->f2 |= TEF_QUELL;
2596 else if(!strcmp("nofill", token)){
2597 if(off)
2598 f->f2 &= ~TEF_NOFILL;
2599 else
2600 f->f2 |= TEF_NOFILL;
2602 else if(!plain /* gf_enriched_plain */){
2603 /* Following is a cute hack or two to get
2604 bold and underline on the screen.
2605 See Putline0n() where these codes are
2606 interpreted */
2607 if(!strcmp("bold", token)) {
2608 GF_PUTC(f->next, TAG_EMBED);
2609 GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2610 enr_bold_on = off ? 0 : 1;
2611 } else if(!strcmp("italic", token)) {
2612 GF_PUTC(f->next, TAG_EMBED);
2613 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2614 enr_uline_on = off ? 0 : 1;
2615 } else if(!strcmp("underline", token)) {
2616 GF_PUTC(f->next, TAG_EMBED);
2617 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2618 enr_uline_on = off ? 0 : 1;
2621 /* else we just ignore the token! */
2623 f->linep = f->line; /* reset token buffer */
2625 else if(c == '<'){ /* literal '<'? */
2626 if(f->linep == f->line){
2627 GF_PUTC(f->next, '<');
2628 state = DFL;
2630 else{
2631 fs_give((void **)&(f->line));
2632 gf_error("Malformed Enriched text: unexpected '<'");
2633 /* NO RETURN */
2636 else{ /* add char to token */
2637 if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2638 fs_give((void **)&(f->line));
2639 gf_error("Malformed Enriched text: token too long");
2640 /* NO RETURN */
2643 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2645 break;
2647 case CCR :
2648 if(c != '\012'){ /* treat as single space? */
2649 state = DFL; /* lone cr? */
2650 f->f2 &= ~TEF_QUELL;
2651 GF_PUTC(f->next, '\015');
2652 goto df;
2655 state = CLF;
2656 break;
2658 case CLF :
2659 if(c == '\015'){ /* treat as single space? */
2660 state = CCR; /* repeat crlf's mean real newlines */
2661 f->f2 |= TEF_QUELL;
2662 GF_PUTC(f->next, '\r');
2663 GF_PUTC(f->next, '\n');
2664 break;
2666 else{
2667 state = DFL;
2668 if(!((f->f2) & TEF_QUELL))
2669 GF_PUTC(f->next, ' ');
2671 f->f2 &= ~TEF_QUELL;
2674 /* fall thru to take care of 'c' */
2676 case DFL :
2677 default :
2678 df :
2679 if(c == '<')
2680 state = TOKEN;
2681 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2682 state = CCR;
2683 else if(!((f->f2) & TEF_QUELL))
2684 GF_PUTC(f->next, c);
2686 break;
2690 f->f1 = state;
2691 GF_END(f, f->next);
2693 else if(flg == GF_EOD){
2694 if((f->f1 = (f->linep != f->line)) != 0){
2695 /* incomplete token!! */
2696 gf_error("Incomplete token in richtext");
2697 /* NO RETURN */
2699 if(enr_uline_on){
2700 GF_PUTC(f->next, TAG_EMBED);
2701 GF_PUTC(f->next, TAG_ULINEOFF);
2702 enr_uline_on = 0;
2704 if(enr_bold_on){
2705 GF_PUTC(f->next, TAG_EMBED);
2706 GF_PUTC(f->next, TAG_BOLDOFF);
2707 enr_bold_on = 0;
2710 /* Make sure we end with a newline so everything gets flushed */
2711 GF_PUTC(f->next, '\015');
2712 GF_PUTC(f->next, '\012');
2714 fs_give((void **)&(f->line));
2716 (void) GF_FLUSH(f->next);
2717 (*f->next->f)(f->next, GF_EOD);
2719 else if(flg == GF_RESET){
2720 dprint((9, "-- gf_reset enriched2plain\n"));
2721 f->f1 = DFL; /* state */
2722 f->f2 = 0; /* set means we're in a comment */
2723 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2729 * function called from the outside to set
2730 * richtext filter's options
2732 void *
2733 gf_enriched2plain_opt(int *plain)
2735 return((void *) plain);
2741 * HTML-TO-PLAIN text filter
2745 /* OK, here's the plan:
2747 * a universal output function handles writing chars and worries
2748 * about wrapping.
2750 * a unversal element collector reads chars and collects params
2751 * and dispatches the appropriate element handler.
2753 * element handlers are stacked. The most recently dispatched gets
2754 * first crack at the incoming character stream. It passes bytes it's
2755 * done with or not interested in to the next
2757 * installs that handler as the current one collecting data...
2759 * stacked handlers take their params from the element collector and
2760 * accept chars or do whatever they need to do. Sort of a vertical
2761 * piping? recursion-like? hmmm.
2763 * at least I think this is how it'll work. tres simple, non?
2769 * Some important constants
2771 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2772 #define MAX_ENTITY 20 /* maximum length of an entity */
2773 #define MAX_ELEMENT 72 /* maximum length of an element */
2774 #define HTML_MOREDATA 0 /* expect more entity data */
2775 #define HTML_ENTITY 1 /* valid entity collected */
2776 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2777 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2778 #define HTML_LITERAL 0x0400 /* Literal character value */
2779 #define HTML_NEWLINE 0x010A /* hard newline */
2780 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2781 #define HTML_ID_GET 0 /* indent func: return current val */
2782 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2783 #define HTML_ID_INC 2 /* indent func: increment by val */
2784 #define HTML_HX_CENTER 0x0001
2785 #define HTML_HX_ULINE 0x0002
2786 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2790 * Handler data, state information including function that uses it
2792 typedef struct handler_s {
2793 FILTER_S *html_data;
2794 void *element;
2795 long x, y, z;
2796 void *dp;
2797 unsigned char *s;
2798 struct handler_s *below;
2799 } HANDLER_S;
2802 * Element Property structure
2804 typedef struct _element_properties {
2805 char *element;
2806 int (*handler)(HANDLER_S *, int, int);
2807 unsigned blocklevel:1;
2808 } ELPROP_S;
2811 * Types used to manage HTML parsing
2813 static void html_handoff(HANDLER_S *, int);
2817 * to help manage line wrapping.
2819 typedef struct _wrap_line {
2820 char *buf; /* buf to collect wrapped text */
2821 int used, /* number of chars in buf */
2822 width, /* text's width as displayed */
2823 len; /* length of allocated buf */
2824 } WRAPLINE_S;
2828 * to help manage centered text
2830 typedef struct _center_s {
2831 WRAPLINE_S line; /* buf to assembled centered text */
2832 WRAPLINE_S word; /* word being to append to Line */
2833 int anchor;
2834 short space;
2835 } CENTER_S;
2839 * Collector data and state information
2841 typedef struct collector_s {
2842 char buf[HTML_BUF_LEN]; /* buffer to collect data */
2843 int len; /* length of that buffer */
2844 unsigned end_tag:1; /* collecting a closing tag */
2845 unsigned hit_equal:1; /* collecting right half of attrib */
2846 unsigned mkup_decl:1; /* markup declaration */
2847 unsigned start_comment:1; /* markup declaration comment */
2848 unsigned end_comment:1; /* legit comment format */
2849 unsigned hyphen:1; /* markup hyphen read */
2850 unsigned badform:1; /* malformed markup element */
2851 unsigned overrun:1; /* Overran buf above */
2852 unsigned proc_inst:1; /* XML processing instructions */
2853 unsigned empty:1; /* empty element */
2854 unsigned was_quoted:1; /* basically to catch null string */
2855 char quoted; /* quoted element param value */
2856 char *element; /* element's collected name */
2857 PARAMETER *attribs; /* element's collected attributes */
2858 PARAMETER *cur_attrib; /* attribute now being collected */
2859 } CLCTR_S;
2863 * State information for all element handlers
2865 typedef struct html_data {
2866 HANDLER_S *h_stack; /* handler list */
2867 CLCTR_S *el_data; /* element collector data */
2868 CENTER_S *centered; /* struct to manage centered text */
2869 int (*token)(FILTER_S *, int);
2870 char quoted; /* quoted, by either ' or ", text */
2871 short indent_level; /* levels of indention */
2872 int in_anchor; /* text now being written to anchor */
2873 int blanks; /* Consecutive blank line count */
2874 int wrapcol; /* column to wrap lines on */
2875 int *prefix; /* buffer containing Anchor prefix */
2876 int prefix_used;
2877 long line_bufsize; /* current size of the line buffer */
2878 COLOR_PAIR *color;
2879 struct {
2880 int state; /* embedded data state */
2881 char *color; /* embedded color pointer */
2882 } embedded;
2883 CBUF_S cb; /* utf8->ucs4 conversion state */
2884 unsigned wrapstate:1; /* whether or not to wrap output */
2885 unsigned li_pending:1; /* <LI> next token expected */
2886 unsigned de_pending:1; /* <DT> or <DD> next token expected */
2887 unsigned bold_on:1; /* currently bolding text */
2888 unsigned uline_on:1; /* currently underlining text */
2889 unsigned center:1; /* center output text */
2890 unsigned bitbucket:1; /* Ignore input */
2891 unsigned head:1; /* In doc's HEAD */
2892 unsigned body:1; /* In doc's BODY */
2893 unsigned alt_entity:1; /* use alternative entity values */
2894 unsigned wrote:1; /* anything witten yet? */
2895 } HTML_DATA_S;
2899 * HTML filter options
2901 typedef struct _html_opts {
2902 char *base; /* Base URL for this html file */
2903 int columns, /* Display columns (excluding margins) */
2904 indent; /* Left margin */
2905 HANDLE_S **handlesp; /* Head of handles */
2906 htmlrisk_t warnrisk_f; /* Nasty link warning call */
2907 ELPROP_S *element_table; /* markup element table */
2908 RSS_FEED_S **feedp; /* hook for RSS feed response */
2909 unsigned strip:1; /* Hilite TAGs allowed */
2910 unsigned handles_loc:1; /* Local handles requested? */
2911 unsigned showserver:1; /* Display server after anchors */
2912 unsigned outputted:1; /* any */
2913 unsigned no_relative_links:1; /* Disable embeded relative links */
2914 unsigned related_content:1; /* Embeded related content */
2915 unsigned html:1; /* Output content in HTML */
2916 unsigned html_imgs:1; /* Output IMG tags in HTML content */
2917 } HTML_OPT_S;
2922 * Some macros to make life a little easier
2924 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2925 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2926 #define HTML_WROTE(X) (HD(X)->wrote)
2927 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2928 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2929 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2930 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2931 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2932 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2933 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2934 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2935 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2936 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2937 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2938 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2939 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2940 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2941 #define HD(X) ((HTML_DATA_S *)(X)->data)
2942 #define ED(X) (HD(X)->el_data)
2943 #define EL(X) ((ELPROP_S *) (X)->element)
2944 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2945 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2946 #define NEW_CLCTR(X) { \
2947 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2948 memset(ED(X), 0, sizeof(CLCTR_S)); \
2949 HD(X)->token = html_element_collector; \
2952 #define FREE_CLCTR(X) { \
2953 if(ED(X)->attribs){ \
2954 PARAMETER *p; \
2955 while((p = ED(X)->attribs) != NULL){ \
2956 ED(X)->attribs = ED(X)->attribs->next; \
2957 if(p->attribute) \
2958 fs_give((void **)&p->attribute); \
2959 if(p->value) \
2960 fs_give((void **)&p->value); \
2961 fs_give((void **)&p); \
2964 if(ED(X)->element) \
2965 fs_give((void **) &ED(X)->element); \
2966 fs_give((void **) &ED(X)); \
2967 HD(X)->token = NULL; \
2969 #define HANDLERS(X) (HD(X)->h_stack)
2970 #define BOLD_BIT(X) (HD(X)->bold_on)
2971 #define ULINE_BIT(X) (HD(X)->uline_on)
2972 #define CENTER_BIT(X) (HD(X)->center)
2973 #define HTML_FLUSH(X) { \
2974 html_write(X, (X)->line, (X)->linep - (X)->line); \
2975 (X)->linep = (X)->line; \
2976 (X)->f2 = 0L; \
2978 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2979 if((S)){ \
2980 html_output((X), TAG_EMBED); \
2981 html_output((X), TAG_BOLDON); \
2983 else if(!(S)){ \
2984 html_output((X), TAG_EMBED); \
2985 html_output((X), TAG_BOLDOFF); \
2988 #define HTML_ULINE(X, S) \
2989 if(! STRIP(X)){ \
2990 if((S)){ \
2991 html_output((X), TAG_EMBED); \
2992 html_output((X), TAG_ULINEON); \
2994 else if(!(S)){ \
2995 html_output((X), TAG_EMBED); \
2996 html_output((X), TAG_ULINEOFF); \
2999 #define HTML_ITALIC(X, S) \
3000 if(! STRIP(X)){ \
3001 if(S){ \
3002 html_output((X), TAG_EMBED); \
3003 html_output((X), TAG_ITALICON); \
3005 else if(!(S)){ \
3006 html_output((X), TAG_EMBED); \
3007 html_output((X), TAG_ITALICOFF); \
3010 #define HTML_STRIKE(X, S) \
3011 if(! STRIP(X)){ \
3012 if(S){ \
3013 html_output((X), TAG_EMBED); \
3014 html_output((X), TAG_STRIKEON); \
3016 else if(!(S)){ \
3017 html_output((X), TAG_EMBED); \
3018 html_output((X), TAG_STRIKEOFF); \
3021 #define HTML_BIG(X, S) \
3022 if(! STRIP(X)){ \
3023 if(S){ \
3024 html_output((X), TAG_EMBED); \
3025 html_output((X), TAG_BIGON); \
3027 else if(!(S)){ \
3028 html_output((X), TAG_EMBED); \
3029 html_output((X), TAG_BIGOFF); \
3032 #define HTML_SMALL(X, S) \
3033 if(! STRIP(X)){ \
3034 if(S){ \
3035 html_output((X), TAG_EMBED); \
3036 html_output((X), TAG_SMALLON); \
3038 else if(!(S)){ \
3039 html_output((X), TAG_EMBED); \
3040 html_output((X), TAG_SMALLOFF); \
3043 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3044 ? (HD(f)->centered->line.width \
3045 + HD(f)->centered->word.width \
3046 + ((HD(f)->centered->line.width \
3047 && HD(f)->centered->word.width) \
3048 ? 1 : 0)) \
3049 : 0)
3050 #define HTML_DUMP_LIT(F, S, L) { \
3051 int i, c; \
3052 for(i = 0; i < (L); i++){ \
3053 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3054 ? (S)[i] \
3055 : MAKE_LITERAL((S)[i]); \
3056 HTML_TEXT(F, c); \
3059 #define HTML_PROC(F, C) { \
3060 if(HD(F)->token){ \
3061 int i; \
3062 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3063 if(i < 0){ \
3064 HTML_DUMP_LIT(F, "<", 1); \
3065 if(HD(F)->el_data->element){ \
3066 HTML_DUMP_LIT(F, \
3067 HD(F)->el_data->element, \
3068 strlen(HD(F)->el_data->element));\
3070 if(HD(F)->el_data->len){ \
3071 HTML_DUMP_LIT(F, \
3072 HD(F)->el_data->buf, \
3073 HD(F)->el_data->len); \
3075 HTML_TEXT(F, C); \
3077 FREE_CLCTR(F); \
3080 else if((C) == '<'){ \
3081 NEW_CLCTR(F); \
3083 else \
3084 HTML_TEXT(F, C); \
3086 #define HTML_LINEP_PUTC(F, C) { \
3087 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3088 size_t offset = (F)->linep - (F)->line; \
3089 fs_resize((void **) &(F)->line, \
3090 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3091 HD(F)->line_bufsize *= 2; \
3092 (F)->linep = &(F)->line[offset]; \
3094 *(F)->linep++ = (C); \
3096 #define HTML_TEXT(F, C) switch((F)->f1){ \
3097 case WSPACE : \
3098 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3099 break; \
3100 HTML_TEXT_OUT(F, ' '); \
3101 (F)->f1 = DFL;/* stop sending chars here */ \
3102 /* fall thru to process 'c' */ \
3103 case DFL: \
3104 if(HD(F)->bitbucket) \
3105 (F)->f1 = DFL; /* no op */ \
3106 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3107 (F)->f1 = WSPACE;/* coalesce white space */ \
3108 else HTML_TEXT_OUT(F, C); \
3109 break; \
3111 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3112 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3113 else \
3114 html_output(F, C);
3115 #ifdef DEBUG
3116 #define HTML_DEBUG_EL(S, D) { \
3117 dprint((5, "-- html %s: %s\n", \
3118 S ? S : "?", \
3119 (D)->element \
3120 ? (D)->element : "NULL")); \
3121 if(debug > 5){ \
3122 PARAMETER *p; \
3123 for(p = (D)->attribs; \
3124 p && p->attribute; \
3125 p = p->next) \
3126 dprint((6, \
3127 " PARM: %s%s%s\n", \
3128 p->attribute \
3129 ? p->attribute : "NULL",\
3130 p->value ? "=" : "", \
3131 p->value ? p->value : ""));\
3134 #else
3135 #define HTML_DEBUG_EL(S, D)
3136 #endif
3138 #ifndef SYSTEM_PINE_INFO_PATH
3139 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3140 #endif
3141 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3142 ? SYSTEM_PINE_INFO_PATH : S)
3145 * Protos for Tag handlers
3147 int html_head(HANDLER_S *, int, int);
3148 int html_base(HANDLER_S *, int, int);
3149 int html_title(HANDLER_S *, int, int);
3150 int html_body(HANDLER_S *, int, int);
3151 int html_a(HANDLER_S *, int, int);
3152 int html_br(HANDLER_S *, int, int);
3153 int html_hr(HANDLER_S *, int, int);
3154 int html_p(HANDLER_S *, int, int);
3155 int html_table(HANDLER_S *, int, int);
3156 int html_caption(HANDLER_S *, int, int);
3157 int html_tr(HANDLER_S *, int, int);
3158 int html_td(HANDLER_S *, int, int);
3159 int html_th(HANDLER_S *, int, int);
3160 int html_thead(HANDLER_S *, int, int);
3161 int html_tbody(HANDLER_S *, int, int);
3162 int html_tfoot(HANDLER_S *, int, int);
3163 int html_col(HANDLER_S *, int, int);
3164 int html_colgroup(HANDLER_S *, int, int);
3165 int html_b(HANDLER_S *, int, int);
3166 int html_u(HANDLER_S *, int, int);
3167 int html_i(HANDLER_S *, int, int);
3168 int html_em(HANDLER_S *, int, int);
3169 int html_strong(HANDLER_S *, int, int);
3170 int html_s(HANDLER_S *, int, int);
3171 int html_big(HANDLER_S *, int, int);
3172 int html_small(HANDLER_S *, int, int);
3173 int html_font(HANDLER_S *, int, int);
3174 int html_img(HANDLER_S *, int, int);
3175 int html_map(HANDLER_S *, int, int);
3176 int html_area(HANDLER_S *, int, int);
3177 int html_form(HANDLER_S *, int, int);
3178 int html_input(HANDLER_S *, int, int);
3179 int html_option(HANDLER_S *, int, int);
3180 int html_optgroup(HANDLER_S *, int, int);
3181 int html_button(HANDLER_S *, int, int);
3182 int html_select(HANDLER_S *, int, int);
3183 int html_textarea(HANDLER_S *, int, int);
3184 int html_label(HANDLER_S *, int, int);
3185 int html_fieldset(HANDLER_S *, int, int);
3186 int html_ul(HANDLER_S *, int, int);
3187 int html_ol(HANDLER_S *, int, int);
3188 int html_menu(HANDLER_S *, int, int);
3189 int html_dir(HANDLER_S *, int, int);
3190 int html_li(HANDLER_S *, int, int);
3191 int html_h1(HANDLER_S *, int, int);
3192 int html_h2(HANDLER_S *, int, int);
3193 int html_h3(HANDLER_S *, int, int);
3194 int html_h4(HANDLER_S *, int, int);
3195 int html_h5(HANDLER_S *, int, int);
3196 int html_h6(HANDLER_S *, int, int);
3197 int html_blockquote(HANDLER_S *, int, int);
3198 int html_address(HANDLER_S *, int, int);
3199 int html_pre(HANDLER_S *, int, int);
3200 int html_center(HANDLER_S *, int, int);
3201 int html_div(HANDLER_S *, int, int);
3202 int html_span(HANDLER_S *, int, int);
3203 int html_dl(HANDLER_S *, int, int);
3204 int html_dt(HANDLER_S *, int, int);
3205 int html_dd(HANDLER_S *, int, int);
3206 int html_script(HANDLER_S *, int, int);
3207 int html_applet(HANDLER_S *, int, int);
3208 int html_style(HANDLER_S *, int, int);
3209 int html_kbd(HANDLER_S *, int, int);
3210 int html_dfn(HANDLER_S *, int, int);
3211 int html_var(HANDLER_S *, int, int);
3212 int html_tt(HANDLER_S *, int, int);
3213 int html_samp(HANDLER_S *, int, int);
3214 int html_sub(HANDLER_S *, int, int);
3215 int html_sup(HANDLER_S *, int, int);
3216 int html_cite(HANDLER_S *, int, int);
3217 int html_code(HANDLER_S *, int, int);
3218 int html_ins(HANDLER_S *, int, int);
3219 int html_del(HANDLER_S *, int, int);
3220 int html_abbr(HANDLER_S *, int, int);
3223 * Protos for RSS 2.0 Tag handlers
3225 int rss_rss(HANDLER_S *, int, int);
3226 int rss_channel(HANDLER_S *, int, int);
3227 int rss_title(HANDLER_S *, int, int);
3228 int rss_image(HANDLER_S *, int, int);
3229 int rss_link(HANDLER_S *, int, int);
3230 int rss_description(HANDLER_S *, int, int);
3231 int rss_ttl(HANDLER_S *, int, int);
3232 int rss_item(HANDLER_S *, int, int);
3235 * Proto's for support routines
3237 void html_pop(FILTER_S *, ELPROP_S *);
3238 int html_push(FILTER_S *, ELPROP_S *);
3239 int html_element_collector(FILTER_S *, int);
3240 int html_element_flush(CLCTR_S *);
3241 void html_element_comment(FILTER_S *, char *);
3242 void html_element_output(FILTER_S *, int);
3243 int html_entity_collector(FILTER_S *, int, UCS *, char **);
3244 void html_a_prefix(FILTER_S *);
3245 void html_a_finish(HANDLER_S *);
3246 void html_a_output_prefix(FILTER_S *, int);
3247 void html_a_output_info(HANDLER_S *);
3248 void html_a_relative(char *, char *, HANDLE_S *);
3249 int html_href_relative(char *);
3250 int html_indent(FILTER_S *, int, int);
3251 void html_blank(FILTER_S *, int);
3252 void html_newline(FILTER_S *);
3253 void html_output(FILTER_S *, int);
3254 void html_output_string(FILTER_S *, char *);
3255 void html_output_raw_tag(FILTER_S *, char *);
3256 void html_output_normal(FILTER_S *, int, int);
3257 void html_output_flush(FILTER_S *);
3258 void html_output_centered(FILTER_S *, int, int);
3259 void html_centered_handle(int *, char *, int);
3260 void html_centered_putc(WRAPLINE_S *, int);
3261 void html_centered_flush(FILTER_S *);
3262 void html_centered_flush_line(FILTER_S *);
3263 void html_write_anchor(FILTER_S *, int);
3264 void html_write_newline(FILTER_S *);
3265 void html_write_indent(FILTER_S *, int);
3266 void html_write(FILTER_S *, char *, int);
3267 void html_putc(FILTER_S *, int);
3268 int html_event_attribute(char *);
3269 char *rss_skip_whitespace(char *s);
3270 ELPROP_S *element_properties(FILTER_S *, char *);
3274 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3275 * W3C doc "Additional named entities for HTML"
3277 static struct html_entities {
3278 char *name; /* entity name */
3279 UCS value; /* UCS entity value */
3280 char *plain; /* US-ASCII representation */
3281 } entity_tab[] = {
3282 {"quot", 0x0022}, /* 34 - quotation mark */
3283 {"amp", 0x0026}, /* 38 - ampersand */
3284 {"apos", 0x0027}, /* 39 - apostrophe */
3285 {"lt", 0x003C}, /* 60 - less-than sign */
3286 {"gt", 0x003E}, /* 62 - greater-than sign */
3287 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3288 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3289 {"cent", 0x00A2}, /* 162 - cent sign */
3290 {"pound", 0x00A3}, /* 163 - pound sign */
3291 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3292 {"yen", 0x00A5}, /* 165 - yen sign */
3293 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3294 {"sect", 0x00A7}, /* 167 - section sign */
3295 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3296 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3297 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3298 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3299 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3300 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3301 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3302 {"macr", 0x00AF}, /* 175 - macron */
3303 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3304 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3305 {"sup2", 0x00B2}, /* 178 - superscript two */
3306 {"sup3", 0x00B3}, /* 179 - superscript three */
3307 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3308 {"micro", 0x00B5}, /* 181 - micro sign */
3309 {"para", 0x00B6}, /* 182 - pilcrow sign */
3310 {"middot", 0x00B7}, /* 183 - middle dot */
3311 {"cedil", 0x00B8}, /* 184 - cedilla */
3312 {"sup1", 0x00B9}, /* 185 - superscript one */
3313 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3314 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3315 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3316 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3317 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3318 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3319 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3320 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3321 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3322 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3323 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3324 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3325 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3326 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3327 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3328 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3329 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3330 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3331 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3332 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3333 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3334 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3335 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3336 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3337 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3338 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3339 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3340 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3341 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3342 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3343 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3344 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3345 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3346 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3347 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3348 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3349 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3350 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3351 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3352 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3353 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3354 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3355 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3356 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3357 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3358 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3359 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3360 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3361 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3362 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3363 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3364 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3365 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3366 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3367 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3368 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3369 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3370 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3371 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3372 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3373 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3374 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3375 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3376 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3377 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3378 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3379 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3380 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3381 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3382 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3383 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3384 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3385 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3386 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3387 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3388 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3389 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3390 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3391 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3392 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3393 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3394 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3395 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3396 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3397 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3398 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3399 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3400 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3401 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3402 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3403 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3404 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3405 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3406 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3407 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3408 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3409 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3410 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3411 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3412 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3413 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3414 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3415 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3416 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3417 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3418 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3419 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3420 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3421 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3422 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3423 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3424 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3425 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3426 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3427 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3428 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3429 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3430 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3431 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3432 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3433 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3434 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3435 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3436 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3437 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3438 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3439 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3440 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3441 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3442 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3443 {"ensp", 0x2002}, /* 8194 - en space */
3444 {"emsp", 0x2003}, /* 8195 - em space */
3445 {"thinsp", 0x2009}, /* 8201 - thin space */
3446 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3447 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3448 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3449 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3450 {"ndash", 0x2013}, /* 8211 - en dash */
3451 {"mdash", 0x2014}, /* 8212 - em dash */
3452 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3453 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3454 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3455 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3456 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3457 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3458 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3459 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3460 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3461 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3462 {"dagger", 0x2020}, /* 8224 - dagger */
3463 {"Dagger", 0x2021}, /* 8225 - double dagger */
3464 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3465 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3466 {"permil", 0x2030}, /* 8240 - per mille sign */
3467 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3468 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3469 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3470 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3471 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3472 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3473 {"oline", 0x203E, "-"}, /* 8254 - overline */
3474 {"frasl", 0x2044}, /* 8260 - fraction slash */
3475 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3476 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3477 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3478 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3479 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3480 {"image", 0x2111}, /* 8465 - black-letter capital i */
3481 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3482 {"real", 0x211C}, /* 8476 - black-letter capital r */
3483 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3484 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3485 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3486 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3487 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3488 {"darr", 0x2193}, /* 8595 - downwards arrow */
3489 {"harr", 0x2194}, /* 8596 - left right arrow */
3490 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3491 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3492 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3493 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3494 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3495 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3496 {"forall", 0x2200}, /* 8704 - for all */
3497 {"part", 0x2202}, /* 8706 - partial differential */
3498 {"exist", 0x2203}, /* 8707 - there exists */
3499 {"empty", 0x2205}, /* 8709 - empty set */
3500 {"nabla", 0x2207}, /* 8711 - nabla */
3501 {"isin", 0x2208}, /* 8712 - element of */
3502 {"notin", 0x2209}, /* 8713 - not an element of */
3503 {"ni", 0x220B}, /* 8715 - contains as member */
3504 {"prod", 0x220F}, /* 8719 - n-ary product */
3505 {"sum", 0x2211}, /* 8721 - n-ary summation */
3506 {"minus", 0x2212}, /* 8722 - minus sign */
3507 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3508 {"radic", 0x221A}, /* 8730 - square root */
3509 {"prop", 0x221D}, /* 8733 - proportional to */
3510 {"infin", 0x221E}, /* 8734 - infinity */
3511 {"ang", 0x2220}, /* 8736 - angle */
3512 {"and", 0x2227}, /* 8743 - logical and */
3513 {"or", 0x2228}, /* 8744 - logical or */
3514 {"cap", 0x2229}, /* 8745 - intersection */
3515 {"cup", 0x222A}, /* 8746 - union */
3516 {"int", 0x222B}, /* 8747 - integral */
3517 {"there4", 0x2234}, /* 8756 - therefore */
3518 {"sim", 0x223C}, /* 8764 - tilde operator */
3519 {"cong", 0x2245}, /* 8773 - congruent to */
3520 {"asymp", 0x2248}, /* 8776 - almost equal to */
3521 {"ne", 0x2260}, /* 8800 - not equal to */
3522 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3523 {"le", 0x2264}, /* 8804 - less-than or equal to */
3524 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3525 {"sub", 0x2282}, /* 8834 - subset of */
3526 {"sup", 0x2283}, /* 8835 - superset of */
3527 {"nsub", 0x2284}, /* 8836 - not a subset of */
3528 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3529 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3530 {"oplus", 0x2295}, /* 8853 - circled plus */
3531 {"otimes", 0x2297}, /* 8855 - circled times */
3532 {"perp", 0x22A5}, /* 8869 - up tack */
3533 {"sdot", 0x22C5}, /* 8901 - dot operator */
3534 {"lceil", 0x2308}, /* 8968 - left ceiling */
3535 {"rceil", 0x2309}, /* 8969 - right ceiling */
3536 {"lfloor", 0x230A}, /* 8970 - left floor */
3537 {"rfloor", 0x230B}, /* 8971 - right floor */
3538 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3539 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3540 {"loz", 0x25CA}, /* 9674 - lozenge */
3541 {"spades", 0x2660}, /* 9824 - black spade suit */
3542 {"clubs", 0x2663}, /* 9827 - black club suit */
3543 {"hearts", 0x2665}, /* 9829 - black heart suit */
3544 {"diams", 0x2666} /* 9830 - black diamond suit */
3549 * Table of supported elements and corresponding handlers
3551 static ELPROP_S html_element_table[] = {
3552 {"HTML"}, /* HTML ignore if seen? */
3553 {"HEAD", html_head}, /* slurp until <BODY> ? */
3554 {"TITLE", html_title}, /* Document Title */
3555 {"BASE", html_base}, /* HREF base */
3556 {"BODY", html_body}, /* HTML BODY */
3557 {"A", html_a}, /* Anchor */
3558 {"ABBR", html_abbr}, /* Abbreviation */
3559 {"IMG", html_img}, /* Image */
3560 {"MAP", html_map}, /* Image Map */
3561 {"AREA", html_area}, /* Image Map Area */
3562 {"HR", html_hr, 1}, /* Horizontal Rule */
3563 {"BR", html_br}, /* Line Break */
3564 {"P", html_p, 1}, /* Paragraph */
3565 {"OL", html_ol, 1}, /* Ordered List */
3566 {"UL", html_ul, 1}, /* Unordered List */
3567 {"MENU", html_menu}, /* Menu List */
3568 {"DIR", html_dir}, /* Directory List */
3569 {"LI", html_li}, /* ... List Item */
3570 {"DL", html_dl, 1}, /* Definition List */
3571 {"DT", html_dt}, /* ... Def. Term */
3572 {"DD", html_dd}, /* ... Def. Definition */
3573 {"I", html_i}, /* Italic Text */
3574 {"EM", html_em}, /* Typographic Emphasis */
3575 {"STRONG", html_strong}, /* STRONG Typo Emphasis */
3576 {"VAR", html_i}, /* Variable Name */
3577 {"B", html_b}, /* Bold Text */
3578 {"U", html_u}, /* Underline Text */
3579 {"S", html_s}, /* Strike-Through Text */
3580 {"STRIKE", html_s}, /* Strike-Through Text */
3581 {"BIG", html_big}, /* Big Font Text */
3582 {"SMALL", html_small}, /* Small Font Text */
3583 {"FONT", html_font}, /* Font display directives */
3584 {"BLOCKQUOTE", html_blockquote, 1}, /* Blockquote */
3585 {"ADDRESS", html_address, 1}, /* Address */
3586 {"CENTER", html_center}, /* Centered Text v3.2 */
3587 {"DIV", html_div, 1}, /* Document Division 3.2 */
3588 {"SPAN", html_span}, /* Text Span */
3589 {"H1", html_h1, 1}, /* Headings... */
3590 {"H2", html_h2, 1},
3591 {"H3", html_h3,1},
3592 {"H4", html_h4, 1},
3593 {"H5", html_h5, 1},
3594 {"H6", html_h6, 1},
3595 {"PRE", html_pre, 1}, /* Preformatted Text */
3596 {"KBD", html_kbd}, /* Keyboard Input (NO OP) */
3597 {"DFN", html_dfn}, /* Definition (NO OP) */
3598 {"VAR", html_var}, /* Variable (NO OP) */
3599 {"TT", html_tt}, /* Typetype (NO OP) */
3600 {"SAMP", html_samp}, /* Sample Text (NO OP) */
3601 {"CITE", html_cite}, /* Citation (NO OP) */
3602 {"CODE", html_code}, /* Code Text (NO OP) */
3603 {"INS", html_ins}, /* Text Inseted (NO OP) */
3604 {"DEL", html_del}, /* Text Deleted (NO OP) */
3605 {"SUP", html_sup}, /* Text Superscript (NO OP) */
3606 {"SUB", html_sub}, /* Text Superscript (NO OP) */
3607 {"STYLE", html_style}, /* CSS Definitions */
3609 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3611 {"FORM", html_form, 1}, /* form within a document */
3612 {"INPUT", html_input}, /* One input field, options */
3613 {"BUTTON", html_button}, /* Push Button */
3614 {"OPTION", html_option}, /* One option within Select */
3615 {"OPTION", html_optgroup}, /* Option Group Definition */
3616 {"SELECT", html_select}, /* Selection from a set */
3617 {"TEXTAREA", html_textarea}, /* A multi-line input field */
3618 {"LABEL", html_label}, /* Control Label */
3619 {"FIELDSET", html_fieldset, 1}, /* Fieldset Control Group */
3621 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3622 {"SCRIPT", html_script}, /* Embedded scripting statements */
3623 {"APPLET", NULL}, /* Embedded applet statements */
3624 {"OBJECT", NULL}, /* Embedded object statements */
3625 {"LINK", NULL}, /* References to external data */
3626 {"PARAM", NULL}, /* Applet/Object parameters */
3628 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3630 {"TABLE", html_table, 1}, /* Table */
3631 {"CAPTION", html_caption}, /* Table Caption */
3632 {"TR", html_tr}, /* Table Table Row */
3633 {"TD", html_td}, /* Table Table Data */
3634 {"TH", html_th}, /* Table Table Head */
3635 {"THEAD", html_thead}, /* Table Table Head */
3636 {"TBODY", html_tbody}, /* Table Table Body */
3637 {"TFOOT", html_tfoot}, /* Table Table Foot */
3638 {"COL", html_col}, /* Table Column Attibutes */
3639 {"COLGROUP", html_colgroup}, /* Table Column Group Attibutes */
3641 {NULL, NULL}
3646 * Table of supported RSS 2.0 elements
3648 static ELPROP_S rss_element_table[] = {
3649 {"RSS", rss_rss}, /* RSS 2.0 version */
3650 {"CHANNEL", rss_channel}, /* RSS 2.0 Channel */
3651 {"TITLE", rss_title}, /* RSS 2.0 Title */
3652 {"IMAGE", rss_image}, /* RSS 2.0 Channel Image */
3653 {"LINK", rss_link}, /* RSS 2.0 Channel/Item Link */
3654 {"DESCRIPTION", rss_description}, /* RSS 2.0 Channel/Item Description */
3655 {"ITEM", rss_item}, /* RSS 2.0 Channel ITEM */
3656 {"TTL", rss_ttl}, /* RSS 2.0 Item TTL */
3657 {NULL, NULL}
3662 * Initialize the given handler, and add it to the stack if it
3663 * requests it.
3665 * Returns: 1 if handler chose to get pushed on stack
3666 * 0 if handler declined
3669 html_push(FILTER_S *fd, ELPROP_S *ep)
3671 HANDLER_S *new;
3673 new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3674 memset(new, 0, sizeof(HANDLER_S));
3675 new->html_data = fd;
3676 new->element = ep;
3677 if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3678 new->below = HANDLERS(fd);
3679 HANDLERS(fd) = new; /* push */
3680 return(1);
3683 fs_give((void **) &new);
3684 return(0);
3689 * Remove the most recently installed the given handler
3690 * after letting it accept its demise.
3692 void
3693 html_pop(FILTER_S *fd, ELPROP_S *ep)
3695 HANDLER_S *tp;
3697 for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3698 HANDLER_S *tp2;
3700 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3701 /* if no evidence of opening tag, ignore given closing tag */
3702 for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3705 if(!tp2){
3706 dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3707 return;
3710 (void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3711 HANDLERS(fd) = tp->below;
3714 if(tp){
3715 (void) (*EL(tp)->handler)(tp, 0, GF_EOD); /* may adjust handler list */
3716 if(tp != HANDLERS(fd)){
3717 HANDLER_S *p;
3719 for(p = HANDLERS(fd); p->below != tp; p = p->below)
3722 if(p)
3723 p->below = tp->below; /* remove from middle of stack */
3724 /* BUG: else programming botch and we should die */
3726 else
3727 HANDLERS(fd) = tp->below; /* pop */
3729 fs_give((void **)&tp);
3731 else{
3732 /* BUG: should MAKE SURE NOT TO EMIT IT */
3733 dprint((3, "-- html error: end tag without a start: %s", ep->element));
3739 * Deal with data passed a hander in its GF_DATA state
3741 static void
3742 html_handoff(HANDLER_S *hd, int ch)
3744 if(hd->below)
3745 (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3746 else
3747 html_output(hd->html_data, ch);
3752 * HTML <BR> element handler
3755 html_br(HANDLER_S *hd, int ch, int cmd)
3757 if(cmd == GF_RESET){
3758 if(PASS_HTML(hd->html_data)){
3759 html_output_raw_tag(hd->html_data, "br");
3761 else{
3762 html_output(hd->html_data, HTML_NEWLINE);
3766 return(0); /* don't get linked */
3771 * HTML <HR> (Horizontal Rule) element handler
3774 html_hr(HANDLER_S *hd, int ch, int cmd)
3776 if(cmd == GF_RESET){
3777 if(PASS_HTML(hd->html_data)){
3778 html_output_raw_tag(hd->html_data, "hr");
3780 else{
3781 int i, old_wrap, width, align;
3782 PARAMETER *p;
3784 width = WRAP_COLS(hd->html_data);
3785 align = 0;
3786 for(p = HD(hd->html_data)->el_data->attribs;
3787 p && p->attribute;
3788 p = p->next)
3789 if(p->value){
3790 if(!strucmp(p->attribute, "ALIGN")){
3791 if(!strucmp(p->value, "LEFT"))
3792 align = 1;
3793 else if(!strucmp(p->value, "RIGHT"))
3794 align = 2;
3796 else if(!strucmp(p->attribute, "WIDTH")){
3797 char *cp;
3799 width = 0;
3800 for(cp = p->value; *cp; cp++)
3801 if(*cp == '%'){
3802 width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3803 break;
3805 else if(isdigit((unsigned char) *cp))
3806 width = (width * 10) + (*cp - '0');
3808 width = MIN(width, WRAP_COLS(hd->html_data));
3812 html_blank(hd->html_data, 1); /* at least one blank line */
3814 old_wrap = HD(hd->html_data)->wrapstate;
3815 HD(hd->html_data)->wrapstate = 0;
3816 if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3817 && ((align == 0) ? i /= 2 : (align == 2)))
3818 for(; i > 0; i--)
3819 html_output(hd->html_data, ' ');
3821 for(i = 0; i < width; i++)
3822 html_output(hd->html_data, '_');
3824 html_blank(hd->html_data, 1);
3825 HD(hd->html_data)->wrapstate = old_wrap;
3829 return(0); /* don't get linked */
3834 * HTML <P> (paragraph) element handler
3837 html_p(HANDLER_S *hd, int ch, int cmd)
3839 if(cmd == GF_DATA){
3840 html_handoff(hd, ch);
3842 else if(cmd == GF_RESET){
3843 if(PASS_HTML(hd->html_data)){
3844 html_output_raw_tag(hd->html_data, "p");
3846 else{
3847 /* Make sure there's at least 1 blank line */
3848 html_blank(hd->html_data, 1);
3850 /* adjust indent level if needed */
3851 if(HD(hd->html_data)->li_pending){
3852 html_indent(hd->html_data, 4, HTML_ID_INC);
3853 HD(hd->html_data)->li_pending = 0;
3857 else if(cmd == GF_EOD){
3858 if(PASS_HTML(hd->html_data)){
3859 html_output_string(hd->html_data, "</p>");
3861 else{
3862 /* Make sure there's at least 1 blank line */
3863 html_blank(hd->html_data, 1);
3867 return(1); /* GET linked */
3872 * HTML Table <TABLE> (paragraph) table row
3875 html_table(HANDLER_S *hd, int ch, int cmd)
3877 if(cmd == GF_DATA){
3878 if(PASS_HTML(hd->html_data)){
3879 html_handoff(hd, ch);
3882 else if(cmd == GF_RESET){
3883 if(PASS_HTML(hd->html_data)){
3884 html_output_raw_tag(hd->html_data, "table");
3886 else
3887 /* Make sure there's at least 1 blank line */
3888 html_blank(hd->html_data, 0);
3890 else if(cmd == GF_EOD){
3891 if(PASS_HTML(hd->html_data)){
3892 html_output_string(hd->html_data, "</table>");
3894 else
3895 /* Make sure there's at least 1 blank line */
3896 html_blank(hd->html_data, 0);
3898 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3903 * HTML <CAPTION> (Table Caption) element handler
3906 html_caption(HANDLER_S *hd, int ch, int cmd)
3908 if(cmd == GF_DATA){
3909 html_handoff(hd, ch);
3911 else if(cmd == GF_RESET){
3912 if(PASS_HTML(hd->html_data)){
3913 html_output_raw_tag(hd->html_data, "caption");
3915 else{
3916 /* turn ON the centered bit */
3917 CENTER_BIT(hd->html_data) = 1;
3920 else if(cmd == GF_EOD){
3921 if(PASS_HTML(hd->html_data)){
3922 html_output_string(hd->html_data, "</caption>");
3924 else{
3925 /* turn OFF the centered bit */
3926 CENTER_BIT(hd->html_data) = 0;
3930 return(1);
3935 * HTML Table <TR> (paragraph) table row
3938 html_tr(HANDLER_S *hd, int ch, int cmd)
3940 if(cmd == GF_DATA){
3941 if(PASS_HTML(hd->html_data)){
3942 html_handoff(hd, ch);
3945 else if(cmd == GF_RESET){
3946 if(PASS_HTML(hd->html_data)){
3947 html_output_raw_tag(hd->html_data, "tr");
3949 else
3950 /* Make sure there's at least 1 blank line */
3951 html_blank(hd->html_data, 0);
3953 else if(cmd == GF_EOD){
3954 if(PASS_HTML(hd->html_data)){
3955 html_output_string(hd->html_data, "</tr>");
3957 else
3958 /* Make sure there's at least 1 blank line */
3959 html_blank(hd->html_data, 0);
3961 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3966 * HTML Table <TD> (paragraph) table data
3969 html_td(HANDLER_S *hd, int ch, int cmd)
3971 if(cmd == GF_DATA){
3972 if(PASS_HTML(hd->html_data)){
3973 html_handoff(hd, ch);
3976 else if(cmd == GF_RESET){
3977 if(PASS_HTML(hd->html_data)){
3978 html_output_raw_tag(hd->html_data, "td");
3980 else{
3981 PARAMETER *p;
3983 for(p = HD(hd->html_data)->el_data->attribs;
3984 p && p->attribute;
3985 p = p->next)
3986 if(!strucmp(p->attribute, "nowrap")
3987 && (hd->html_data->f2 || hd->html_data->n)){
3988 HTML_DUMP_LIT(hd->html_data, " | ", 3);
3989 break;
3993 else if(cmd == GF_EOD){
3994 if(PASS_HTML(hd->html_data)){
3995 html_output_string(hd->html_data, "</td>");
3999 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4004 * HTML Table <TH> (paragraph) table head
4007 html_th(HANDLER_S *hd, int ch, int cmd)
4009 if(cmd == GF_DATA){
4010 if(PASS_HTML(hd->html_data)){
4011 html_handoff(hd, ch);
4014 else if(cmd == GF_RESET){
4015 if(PASS_HTML(hd->html_data)){
4016 html_output_raw_tag(hd->html_data, "th");
4018 else{
4019 PARAMETER *p;
4021 for(p = HD(hd->html_data)->el_data->attribs;
4022 p && p->attribute;
4023 p = p->next)
4024 if(!strucmp(p->attribute, "nowrap")
4025 && (hd->html_data->f2 || hd->html_data->n)){
4026 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4027 break;
4031 else if(cmd == GF_EOD){
4032 if(PASS_HTML(hd->html_data)){
4033 html_output_string(hd->html_data, "</th>");
4037 return(PASS_HTML(hd->html_data)); /* don't get linked */
4042 * HTML Table <THEAD> table head
4045 html_thead(HANDLER_S *hd, int ch, int cmd)
4047 if(PASS_HTML(hd->html_data)){
4048 if(cmd == GF_DATA){
4049 html_handoff(hd, ch);
4051 else if(cmd == GF_RESET){
4052 html_output_raw_tag(hd->html_data, "thead");
4054 else if(cmd == GF_EOD){
4055 html_output_string(hd->html_data, "</thead>");
4058 return(1); /* GET linked */
4061 return(0); /* don't get linked */
4066 * HTML Table <TBODY> table body
4069 html_tbody(HANDLER_S *hd, int ch, int cmd)
4071 if(PASS_HTML(hd->html_data)){
4072 if(cmd == GF_DATA){
4073 html_handoff(hd, ch);
4075 else if(cmd == GF_RESET){
4076 html_output_raw_tag(hd->html_data, "tbody");
4078 else if(cmd == GF_EOD){
4079 html_output_string(hd->html_data, "</tbody>");
4082 return(1); /* GET linked */
4085 return(0); /* don't get linked */
4090 * HTML Table <TFOOT> table body
4093 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4095 if(PASS_HTML(hd->html_data)){
4096 if(cmd == GF_DATA){
4097 html_handoff(hd, ch);
4099 else if(cmd == GF_RESET){
4100 html_output_raw_tag(hd->html_data, "tfoot");
4102 else if(cmd == GF_EOD){
4103 html_output_string(hd->html_data, "</tfoot>");
4106 return(1); /* GET linked */
4109 return(0); /* don't get linked */
4114 * HTML <COL> (Table Column Attributes) element handler
4117 html_col(HANDLER_S *hd, int ch, int cmd)
4119 if(cmd == GF_RESET){
4120 if(PASS_HTML(hd->html_data)){
4121 html_output_raw_tag(hd->html_data, "col");
4125 return(0); /* don't get linked */
4130 * HTML Table <COLGROUP> table body
4133 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4135 if(PASS_HTML(hd->html_data)){
4136 if(cmd == GF_DATA){
4137 html_handoff(hd, ch);
4139 else if(cmd == GF_RESET){
4140 html_output_raw_tag(hd->html_data, "colgroup");
4142 else if(cmd == GF_EOD){
4143 html_output_string(hd->html_data, "</colgroup>");
4146 return(1); /* GET linked */
4149 return(0); /* don't get linked */
4154 * HTML <I> (italic text) element handler
4157 html_i(HANDLER_S *hd, int ch, int cmd)
4159 if(cmd == GF_DATA){
4160 /* include LITERAL in spaceness test! */
4161 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4162 HTML_ITALIC(hd->html_data, 1);
4163 hd->x = 0;
4166 html_handoff(hd, ch);
4168 else if(cmd == GF_RESET){
4169 hd->x = 1;
4171 else if(cmd == GF_EOD){
4172 if(!hd->x)
4173 HTML_ITALIC(hd->html_data, 0);
4176 return(1); /* get linked */
4181 * HTML <EM> element handler
4184 html_em(HANDLER_S *hd, int ch, int cmd)
4186 if(cmd == GF_DATA){
4187 if(!PASS_HTML(hd->html_data)){
4188 /* include LITERAL in spaceness test! */
4189 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4190 HTML_ITALIC(hd->html_data, 1);
4191 hd->x = 0;
4195 html_handoff(hd, ch);
4197 else if(cmd == GF_RESET){
4198 if(PASS_HTML(hd->html_data)){
4199 html_output_raw_tag(hd->html_data, "em");
4201 else{
4202 hd->x = 1;
4205 else if(cmd == GF_EOD){
4206 if(PASS_HTML(hd->html_data)){
4207 html_output_string(hd->html_data, "</em>");
4209 else{
4210 if(!hd->x)
4211 HTML_ITALIC(hd->html_data, 0);
4215 return(1); /* get linked */
4220 * HTML <STRONG> element handler
4223 html_strong(HANDLER_S *hd, int ch, int cmd)
4225 if(cmd == GF_DATA){
4226 if(!PASS_HTML(hd->html_data)){
4227 /* include LITERAL in spaceness test! */
4228 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4229 HTML_ITALIC(hd->html_data, 1);
4230 hd->x = 0;
4234 html_handoff(hd, ch);
4236 else if(cmd == GF_RESET){
4237 if(PASS_HTML(hd->html_data)){
4238 html_output_raw_tag(hd->html_data, "strong");
4240 else{
4241 hd->x = 1;
4244 else if(cmd == GF_EOD){
4245 if(PASS_HTML(hd->html_data)){
4246 html_output_string(hd->html_data, "</strong>");
4248 else{
4249 if(!hd->x)
4250 HTML_ITALIC(hd->html_data, 0);
4254 return(1); /* get linked */
4259 * HTML <u> (Underline text) element handler
4262 html_u(HANDLER_S *hd, int ch, int cmd)
4264 if(PASS_HTML(hd->html_data)){
4265 if(cmd == GF_DATA){
4266 html_handoff(hd, ch);
4268 else if(cmd == GF_RESET){
4269 html_output_raw_tag(hd->html_data, "u");
4271 else if(cmd == GF_EOD){
4272 html_output_string(hd->html_data, "</u>");
4275 return(1); /* get linked */
4278 return(0); /* do NOT get linked */
4283 * HTML <b> (Bold text) element handler
4286 html_b(HANDLER_S *hd, int ch, int cmd)
4288 if(cmd == GF_DATA){
4289 if(!PASS_HTML(hd->html_data)){
4290 /* include LITERAL in spaceness test! */
4291 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4292 HTML_BOLD(hd->html_data, 1);
4293 hd->x = 0;
4297 html_handoff(hd, ch);
4299 else if(cmd == GF_RESET){
4300 if(PASS_HTML(hd->html_data)){
4301 html_output_raw_tag(hd->html_data, "b");
4303 else{
4304 hd->x = 1;
4307 else if(cmd == GF_EOD){
4308 if(PASS_HTML(hd->html_data)){
4309 html_output_string(hd->html_data, "</b>");
4311 else{
4312 if(!hd->x)
4313 HTML_BOLD(hd->html_data, 0);
4317 return(1); /* get linked */
4322 * HTML <s> (strike-through text) element handler
4325 html_s(HANDLER_S *hd, int ch, int cmd)
4327 if(cmd == GF_DATA){
4328 if(!PASS_HTML(hd->html_data)){
4329 /* include LITERAL in spaceness test! */
4330 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4331 HTML_STRIKE(hd->html_data, 1);
4332 hd->x = 0;
4336 html_handoff(hd, ch);
4338 else if(cmd == GF_RESET){
4339 if(PASS_HTML(hd->html_data)){
4340 html_output_raw_tag(hd->html_data, "s");
4342 else{
4343 hd->x = 1;
4346 else if(cmd == GF_EOD){
4347 if(PASS_HTML(hd->html_data)){
4348 html_output_string(hd->html_data, "</s>");
4350 else{
4351 if(!hd->x)
4352 HTML_STRIKE(hd->html_data, 0);
4356 return(1); /* get linked */
4361 * HTML <big> (BIG text) element handler
4364 html_big(HANDLER_S *hd, int ch, int cmd)
4366 if(cmd == GF_DATA){
4367 /* include LITERAL in spaceness test! */
4368 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4369 HTML_BIG(hd->html_data, 1);
4370 hd->x = 0;
4373 html_handoff(hd, ch);
4375 else if(cmd == GF_RESET){
4376 hd->x = 1;
4378 else if(cmd == GF_EOD){
4379 if(!hd->x)
4380 HTML_BIG(hd->html_data, 0);
4383 return(1); /* get linked */
4388 * HTML <small> (SMALL text) element handler
4391 html_small(HANDLER_S *hd, int ch, int cmd)
4393 if(cmd == GF_DATA){
4394 /* include LITERAL in spaceness test! */
4395 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4396 HTML_SMALL(hd->html_data, 1);
4397 hd->x = 0;
4400 html_handoff(hd, ch);
4402 else if(cmd == GF_RESET){
4403 hd->x = 1;
4405 else if(cmd == GF_EOD){
4406 if(!hd->x)
4407 HTML_SMALL(hd->html_data, 0);
4410 return(1); /* get linked */
4415 * HTML <FONT> element handler
4418 html_font(HANDLER_S *hd, int ch, int cmd)
4420 if(PASS_HTML(hd->html_data)){
4421 if(cmd == GF_DATA){
4422 html_handoff(hd, ch);
4424 else if(cmd == GF_RESET){
4425 html_output_raw_tag(hd->html_data, "font");
4427 else if(cmd == GF_EOD){
4428 html_output_string(hd->html_data, "</font>");
4431 return(1); /* get linked */
4434 return(0);
4439 * HTML <IMG> element handler
4442 html_img(HANDLER_S *hd, int ch, int cmd)
4444 PARAMETER *p;
4445 char *alt = NULL, *src = NULL, *s;
4447 if(cmd == GF_RESET){
4448 if(PASS_HTML(hd->html_data)){
4449 html_output_raw_tag(hd->html_data, "img");
4451 else{
4452 for(p = HD(hd->html_data)->el_data->attribs;
4453 p && p->attribute;
4454 p = p->next)
4455 if(p->value && p->value[0]){
4456 if(!strucmp(p->attribute, "alt"))
4457 alt = p->value;
4458 if(!strucmp(p->attribute, "src"))
4459 src = p->value;
4463 * Multipart/Related Content ID pointer
4464 * ONLY attached messages are recognized
4465 * if we ever decide web bugs aren't a problem
4466 * anymore then we might expand the scope
4468 if(src
4469 && DO_HANDLES(hd->html_data)
4470 && RELATED_OK(hd->html_data)
4471 && struncmp(src, "cid:", 4) == 0){
4472 char buf[32];
4473 int i, n;
4474 HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4476 h->type = IMG;
4477 h->h.img.src = cpystr(src + 4);
4478 h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4480 HTML_TEXT(hd->html_data, TAG_EMBED);
4481 HTML_TEXT(hd->html_data, TAG_HANDLE);
4483 sprintf(buf, "%d", h->key);
4484 n = strlen(buf);
4485 HTML_TEXT(hd->html_data, n);
4486 for(i = 0; i < n; i++){
4487 unsigned int uic = buf[i];
4488 HTML_TEXT(hd->html_data, uic);
4491 return(0);
4493 else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4494 HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4495 HTML_TEXT(hd->html_data, ' ');
4496 return(0);
4498 else if(src
4499 && (s = strrindex(src, '/'))
4500 && *++s != '\0'){
4501 HTML_TEXT(hd->html_data, '[');
4502 HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4503 HTML_TEXT(hd->html_data, ']');
4504 HTML_TEXT(hd->html_data, ' ');
4505 return(0);
4508 /* text filler of last resort */
4509 HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4513 return(0); /* don't get linked */
4518 * HTML <MAP> (Image Map) element handler
4521 html_map(HANDLER_S *hd, int ch, int cmd)
4523 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4524 if(cmd == GF_DATA){
4525 html_handoff(hd, ch);
4527 else if(cmd == GF_RESET){
4528 html_output_raw_tag(hd->html_data, "map");
4530 else if(cmd == GF_EOD){
4531 html_output_string(hd->html_data, "</map>");
4534 return(1);
4537 return(0);
4542 * HTML <AREA> (Image Map Area) element handler
4545 html_area(HANDLER_S *hd, int ch, int cmd)
4547 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4548 if(cmd == GF_DATA){
4549 html_handoff(hd, ch);
4551 else if(cmd == GF_RESET){
4552 html_output_raw_tag(hd->html_data, "area");
4554 else if(cmd == GF_EOD){
4555 html_output_string(hd->html_data, "</area>");
4558 return(1);
4561 return(0);
4566 * HTML <FORM> (Form) element handler
4569 html_form(HANDLER_S *hd, int ch, int cmd)
4571 if(PASS_HTML(hd->html_data)){
4572 if(cmd == GF_DATA){
4573 html_handoff(hd, ch);
4575 else if(cmd == GF_RESET){
4576 PARAMETER **pp;
4578 /* SECURITY: make sure to redirect to new browser instance */
4579 for(pp = &(HD(hd->html_data)->el_data->attribs);
4580 *pp && (*pp)->attribute;
4581 pp = &(*pp)->next)
4582 if(!strucmp((*pp)->attribute, "target")){
4583 if((*pp)->value)
4584 fs_give((void **) &(*pp)->value);
4586 (*pp)->value = cpystr("_blank");
4589 if(!*pp){
4590 *pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4591 memset(*pp, 0, sizeof(PARAMETER));
4592 (*pp)->attribute = cpystr("target");
4593 (*pp)->value = cpystr("_blank");
4596 html_output_raw_tag(hd->html_data, "form");
4598 else if(cmd == GF_EOD){
4599 html_output_string(hd->html_data, "</form>");
4602 else{
4603 if(cmd == GF_RESET){
4604 html_blank(hd->html_data, 0);
4605 HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4606 html_blank(hd->html_data, 0);
4610 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4615 * HTML <INPUT> (Form) element handler
4618 html_input(HANDLER_S *hd, int ch, int cmd)
4620 if(PASS_HTML(hd->html_data)){
4621 if(cmd == GF_RESET){
4622 html_output_raw_tag(hd->html_data, "input");
4626 return(0); /* don't get linked */
4631 * HTML <BUTTON> (Form) element handler
4634 html_button(HANDLER_S *hd, int ch, int cmd)
4636 if(PASS_HTML(hd->html_data)){
4637 if(cmd == GF_DATA){
4638 html_handoff(hd, ch);
4640 else if(cmd == GF_RESET){
4641 html_output_raw_tag(hd->html_data, "button");
4643 else if(cmd == GF_EOD){
4644 html_output_string(hd->html_data, "</button>");
4647 return(1); /* get linked */
4650 return(0);
4655 * HTML <OPTION> (Form) element handler
4658 html_option(HANDLER_S *hd, int ch, int cmd)
4660 if(PASS_HTML(hd->html_data)){
4661 if(cmd == GF_DATA){
4662 html_handoff(hd, ch);
4664 else if(cmd == GF_RESET){
4665 html_output_raw_tag(hd->html_data, "option");
4667 else if(cmd == GF_EOD){
4668 html_output_string(hd->html_data, "</option>");
4671 return(1); /* get linked */
4674 return(0);
4679 * HTML <OPTGROUP> (Form) element handler
4682 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4684 if(PASS_HTML(hd->html_data)){
4685 if(cmd == GF_DATA){
4686 html_handoff(hd, ch);
4688 else if(cmd == GF_RESET){
4689 html_output_raw_tag(hd->html_data, "optgroup");
4691 else if(cmd == GF_EOD){
4692 html_output_string(hd->html_data, "</optgroup>");
4695 return(1); /* get linked */
4698 return(0);
4703 * HTML <SELECT> (Form) element handler
4706 html_select(HANDLER_S *hd, int ch, int cmd)
4708 if(PASS_HTML(hd->html_data)){
4709 if(cmd == GF_DATA){
4710 html_handoff(hd, ch);
4712 else if(cmd == GF_RESET){
4713 html_output_raw_tag(hd->html_data, "select");
4715 else if(cmd == GF_EOD){
4716 html_output_string(hd->html_data, "</select>");
4719 return(1); /* get linked */
4722 return(0);
4727 * HTML <TEXTAREA> (Form) element handler
4730 html_textarea(HANDLER_S *hd, int ch, int cmd)
4732 if(PASS_HTML(hd->html_data)){
4733 if(cmd == GF_DATA){
4734 html_handoff(hd, ch);
4736 else if(cmd == GF_RESET){
4737 html_output_raw_tag(hd->html_data, "textarea");
4739 else if(cmd == GF_EOD){
4740 html_output_string(hd->html_data, "</textarea>");
4743 return(1); /* get linked */
4746 return(0);
4751 * HTML <LABEL> (Form) element handler
4754 html_label(HANDLER_S *hd, int ch, int cmd)
4756 if(PASS_HTML(hd->html_data)){
4757 if(cmd == GF_DATA){
4758 html_handoff(hd, ch);
4760 else if(cmd == GF_RESET){
4761 html_output_raw_tag(hd->html_data, "label");
4763 else if(cmd == GF_EOD){
4764 html_output_string(hd->html_data, "</label>");
4767 return(1); /* get linked */
4770 return(0);
4775 * HTML <FIELDSET> (Form) element handler
4778 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4780 if(PASS_HTML(hd->html_data)){
4781 if(cmd == GF_DATA){
4782 html_handoff(hd, ch);
4784 else if(cmd == GF_RESET){
4785 html_output_raw_tag(hd->html_data, "fieldset");
4787 else if(cmd == GF_EOD){
4788 html_output_string(hd->html_data, "</fieldset>");
4791 return(1); /* get linked */
4794 return(0);
4799 * HTML <HEAD> element handler
4802 html_head(HANDLER_S *hd, int ch, int cmd)
4804 if(cmd == GF_DATA){
4805 html_handoff(hd, ch);
4807 else if(cmd == GF_RESET){
4808 HD(hd->html_data)->head = 1;
4810 else if(cmd == GF_EOD){
4811 HD(hd->html_data)->head = 0;
4814 return(1); /* get linked */
4819 * HTML <BASE> element handler
4822 html_base(HANDLER_S *hd, int ch, int cmd)
4824 if(cmd == GF_RESET){
4825 if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4826 PARAMETER *p;
4828 for(p = HD(hd->html_data)->el_data->attribs;
4829 p && p->attribute && strucmp(p->attribute, "HREF");
4830 p = p->next)
4833 if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4834 ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4838 return(0); /* DON'T get linked */
4843 * HTML <TITLE> element handler
4846 html_title(HANDLER_S *hd, int ch, int cmd)
4848 if(cmd == GF_DATA){
4849 if(hd->x + 1 >= hd->y){
4850 hd->y += 80;
4851 fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4854 hd->s[hd->x++] = (unsigned char) ch;
4856 else if(cmd == GF_RESET){
4857 hd->x = 0L;
4858 hd->y = 80L;
4859 hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4861 else if(cmd == GF_EOD){
4862 /* Down the road we probably want to give these bytes to
4863 * someone...
4865 hd->s[hd->x] = '\0';
4866 fs_give((void **)&hd->s);
4869 return(1); /* get linked */
4874 * HTML <BODY> element handler
4877 html_body(HANDLER_S *hd, int ch, int cmd)
4879 if(cmd == GF_DATA){
4880 html_handoff(hd, ch);
4882 else if(cmd == GF_RESET){
4883 if(PASS_HTML(hd->html_data)){
4884 PARAMETER *p, *tp;
4885 char **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4887 /* modify any attributes in a useful way? */
4888 for(p = HD(hd->html_data)->el_data->attribs;
4889 p && p->attribute;
4890 p = p->next)
4891 if(p->value){
4892 if(!strucmp(p->attribute, "style"))
4893 style = &p->value;
4894 else if(!strucmp(p->attribute, "text"))
4895 text = p->value;
4897 * bgcolor NOT passed since user setting takes precedence
4899 else if(!strucmp(p->attribute, "bgcolor"))
4900 bgcolor = p->value;
4904 /* colors pretty much it */
4905 if(text || bgcolor){
4906 if(!style){
4907 tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4908 memset(tp, 0, sizeof(PARAMETER));
4909 tp->next = HD(hd->html_data)->el_data->attribs;
4910 HD(hd->html_data)->el_data->attribs = tp;
4911 tp->attribute = cpystr("style");
4913 tmp_20k_buf[0] = '\0';
4914 style = &tp->value;
4915 pcs = "%s%s%s%s%s";
4917 else{
4918 snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4919 fs_give((void **) style);
4920 pcs = "; %s%s%s%s%s";
4923 snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4924 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4925 pcs,
4926 (text) ? "color: " : "", (text) ? text : "",
4927 (text && bgcolor) ? ";" : "",
4928 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4929 *style = cpystr(tmp_20k_buf);
4932 html_output_raw_tag(hd->html_data, "div");
4935 HD(hd->html_data)->body = 1;
4937 else if(cmd == GF_EOD){
4938 if(PASS_HTML(hd->html_data)){
4939 html_output_string(hd->html_data, "</div>");
4942 HD(hd->html_data)->body = 0;
4945 return(1); /* get linked */
4950 * HTML <A> (Anchor) element handler
4953 html_a(HANDLER_S *hd, int ch, int cmd)
4955 if(cmd == GF_DATA){
4956 html_handoff(hd, ch);
4958 if(hd->dp) /* remember text within anchor tags */
4959 so_writec(ch, (STORE_S *) hd->dp);
4961 else if(cmd == GF_RESET){
4962 int i, n, x;
4963 char buf[256];
4964 HANDLE_S *h;
4965 PARAMETER *p, *href = NULL, *name = NULL;
4968 * Pending Anchor!?!?
4969 * space insertion/line breaking that's yet to get done...
4971 if(HD(hd->html_data)->prefix){
4972 dprint((2, "-- html error: nested or unterminated anchor\n"));
4973 html_a_finish(hd);
4977 * Look for valid Anchor data vis the filter installer's parms
4978 * (e.g., Only allow references to our internal URLs if asked)
4980 for(p = HD(hd->html_data)->el_data->attribs;
4981 p && p->attribute;
4982 p = p->next)
4983 if(!strucmp(p->attribute, "HREF")
4984 && p->value
4985 && (HANDLES_LOC(hd->html_data)
4986 || struncmp(p->value, "x-alpine-", 9)
4987 || struncmp(p->value, "x-pine-help", 11)
4988 || p->value[0] == '#'))
4989 href = p;
4990 else if(!strucmp(p->attribute, "NAME"))
4991 name = p;
4993 if(DO_HANDLES(hd->html_data) && (href || name)){
4994 h = new_handle(HANDLESP(hd->html_data));
4997 * Enhancement: we might want to get fancier and parse the
4998 * href a bit further such that we can launch images using
4999 * our image viewer, or browse local files or directories
5000 * with our internal tools. Of course, having the jump-off
5001 * point into text/html always be the defined "web-browser",
5002 * just might be the least confusing UI-wise...
5004 h->type = URL;
5006 if(name && name->value)
5007 h->h.url.name = cpystr(name->value);
5010 * Prepare to build embedded prefix...
5012 HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5013 x = 0;
5016 * Is this something that looks like a URL? If not and
5017 * we were giving some "base" string, proceed ala RFC1808...
5019 if(href){
5020 if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5021 html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5023 else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5024 h->h.url.path = cpystr(href->value);
5026 if(pico_usingcolor()){
5027 char *fg = NULL, *bg = NULL, *q;
5029 if(ps_global->VAR_SLCTBL_FORE_COLOR
5030 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5031 ps_global->VAR_NORM_FORE_COLOR))
5032 fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5034 if(ps_global->VAR_SLCTBL_BACK_COLOR
5035 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5036 ps_global->VAR_NORM_BACK_COLOR))
5037 bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5039 if(fg || bg){
5040 COLOR_PAIR *tmp;
5043 * The blacks are just known good colors for testing
5044 * whether the other color is good.
5046 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5047 bg ? bg : colorx(COL_BLACK));
5048 if(pico_is_good_colorpair(tmp)){
5049 q = color_embed(fg, bg);
5051 for(i = 0; q[i]; i++)
5052 HD(hd->html_data)->prefix[x++] = q[i];
5055 if(tmp)
5056 free_color_pair(&tmp);
5059 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5060 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5062 else
5063 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5066 HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5067 HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5069 snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5070 HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5071 for(i = 0; i < n; i++)
5072 HD(hd->html_data)->prefix[x++] = buf[i];
5074 HD(hd->html_data)->prefix_used = x;
5076 hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5079 else if(cmd == GF_EOD){
5080 html_a_finish(hd);
5083 return(1); /* get linked */
5087 void
5088 html_a_prefix(FILTER_S *f)
5090 int *prefix, n;
5092 /* Do this so we don't visit from html_output... */
5093 prefix = HD(f)->prefix;
5094 HD(f)->prefix = NULL;
5096 for(n = 0; n < HD(f)->prefix_used; n++)
5097 html_a_output_prefix(f, prefix[n]);
5099 fs_give((void **) &prefix);
5104 * html_a_finish - house keeping associated with end of link tag
5106 void
5107 html_a_finish(HANDLER_S *hd)
5109 if(DO_HANDLES(hd->html_data)){
5110 if(HD(hd->html_data)->prefix){
5111 if(!PASS_HTML(hd->html_data)){
5112 char *empty_link = "[LINK]";
5113 int i;
5115 html_a_prefix(hd->html_data);
5116 for(i = 0; empty_link[i]; i++)
5117 html_output(hd->html_data, empty_link[i]);
5121 if(pico_usingcolor()){
5122 char *fg = NULL, *bg = NULL, *p;
5123 int i;
5125 if(ps_global->VAR_SLCTBL_FORE_COLOR
5126 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5127 ps_global->VAR_NORM_FORE_COLOR))
5128 fg = ps_global->VAR_NORM_FORE_COLOR;
5130 if(ps_global->VAR_SLCTBL_BACK_COLOR
5131 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5132 ps_global->VAR_NORM_BACK_COLOR))
5133 bg = ps_global->VAR_NORM_BACK_COLOR;
5135 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5136 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5138 if(fg || bg){
5139 COLOR_PAIR *tmp;
5142 * The blacks are just known good colors for testing
5143 * whether the other color is good.
5145 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5146 bg ? bg : colorx(COL_BLACK));
5147 if(pico_is_good_colorpair(tmp)){
5148 p = color_embed(fg, bg);
5150 for(i = 0; p[i]; i++)
5151 html_output(hd->html_data, p[i]);
5154 if(tmp)
5155 free_color_pair(&tmp);
5158 else
5159 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5161 html_output(hd->html_data, TAG_EMBED);
5162 html_output(hd->html_data, TAG_HANDLEOFF);
5164 html_a_output_info(hd);
5170 * html_output_a_prefix - dump Anchor prefix data
5172 void
5173 html_a_output_prefix(FILTER_S *f, int c)
5175 switch(c){
5176 case HTML_DOBOLD :
5177 HTML_BOLD(f, 1);
5178 break;
5180 default :
5181 html_output(f, c);
5182 break;
5189 * html_a_output_info - dump possibly deceptive link info into text.
5190 * phark the phishers.
5192 void
5193 html_a_output_info(HANDLER_S *hd)
5195 int l, risky = 0, hl = 0, tl;
5196 char *url = NULL, *hn = NULL, *txt;
5197 HANDLE_S *h;
5199 /* find host anchor references */
5200 if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5201 && h->h.url.path != NULL
5202 && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5203 && (hn = srchstr(hn,"://")) != NULL){
5205 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5209 if(hn && hl){
5211 * look over anchor's text to see if there's a
5212 * mismatch between href target and url-ish
5213 * looking text. throw a red flag if so.
5214 * similarly, toss one if the target's referenced
5215 * by a
5217 if(hd->dp){
5218 so_writec('\0', (STORE_S *) hd->dp);
5220 if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5221 && (txt = rfc1738_scan(txt, &tl)) != NULL
5222 && (txt = srchstr(txt,"://")) != NULL){
5224 for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5227 if(tl != hl)
5228 risky++;
5229 else
5230 /* look for non matching text */
5231 for(l = 0; l < tl && l < hl; l++)
5232 if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5233 risky++;
5234 break;
5238 so_give((STORE_S **) &hd->dp);
5241 /* look for literal IP, anything possibly encoded or auth specifier */
5242 if(!risky){
5243 int digits = 1;
5245 for(l = 0; l < hl; l++){
5246 if(hn[l] == '@' || hn[l] == '%'){
5247 risky++;
5248 break;
5250 else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5251 digits = 0;
5254 if(digits)
5255 risky++;
5258 /* Insert text of link's domain */
5259 if(SHOWSERVER(hd->html_data)){
5260 char *q;
5261 COLOR_PAIR *col = NULL, *colnorm = NULL;
5263 html_output(hd->html_data, ' ');
5264 html_output(hd->html_data, '[');
5266 if(pico_usingcolor()
5267 && ps_global->VAR_METAMSG_FORE_COLOR
5268 && ps_global->VAR_METAMSG_BACK_COLOR
5269 && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5270 ps_global->VAR_METAMSG_BACK_COLOR))){
5271 if(!pico_is_good_colorpair(col))
5272 free_color_pair(&col);
5274 if(col){
5275 q = color_embed(col->fg, col->bg);
5277 for(l = 0; q[l]; l++)
5278 html_output(hd->html_data, q[l]);
5282 for(l = 0; l < hl; l++)
5283 html_output(hd->html_data, hn[l]);
5285 if(col){
5286 if(ps_global->VAR_NORM_FORE_COLOR
5287 && ps_global->VAR_NORM_BACK_COLOR
5288 && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5289 ps_global->VAR_NORM_BACK_COLOR))){
5290 if(!pico_is_good_colorpair(colnorm))
5291 free_color_pair(&colnorm);
5293 if(colnorm){
5294 q = color_embed(colnorm->fg, colnorm->bg);
5295 free_color_pair(&colnorm);
5297 for(l = 0; q[l]; l++)
5298 html_output(hd->html_data, q[l]);
5302 free_color_pair(&col);
5305 html_output(hd->html_data, ']');
5310 * if things look OK so far, make sure nothing within
5311 * the url looks too fishy...
5313 while(!risky && hn
5314 && (hn = rfc1738_scan(hn, &l)) != NULL
5315 && (hn = srchstr(hn,"://")) != NULL){
5316 int digits = 1;
5318 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5320 * auth spec, encoded characters, or possibly non-standard port
5321 * should raise a red flag
5323 if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5324 risky++;
5325 break;
5327 else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5328 digits = 0;
5331 /* dotted-dec/raw-int address should cause suspicion as well */
5332 if(digits)
5333 risky++;
5336 if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5337 (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5339 fs_give((void **) &url);
5345 * relative_url - put full url path in h based on base and relative url
5347 void
5348 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5350 size_t len;
5351 char tmp[MAILTMPLEN], *p, *q;
5352 char *scheme = NULL, *net = NULL, *path = NULL,
5353 *parms = NULL, *query = NULL, *frag = NULL,
5354 *base_scheme = NULL, *base_net_loc = NULL,
5355 *base_path = NULL, *base_parms = NULL,
5356 *base_query = NULL, *base_frag = NULL,
5357 *rel_scheme = NULL, *rel_net_loc = NULL,
5358 *rel_path = NULL, *rel_parms = NULL,
5359 *rel_query = NULL, *rel_frag = NULL;
5361 /* Rough parse of base URL */
5362 rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5363 &base_parms, &base_query, &base_frag);
5365 /* Rough parse of this URL */
5366 rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5367 &rel_parms, &rel_query, &rel_frag);
5369 scheme = rel_scheme; /* defaults */
5370 net = rel_net_loc;
5371 path = rel_path;
5372 parms = rel_parms;
5373 query = rel_query;
5374 frag = rel_frag;
5375 if(!scheme && base_scheme){
5376 scheme = base_scheme;
5377 if(!net){
5378 net = base_net_loc;
5379 if(path){
5380 if(*path != '/'){
5381 if(base_path){
5382 for(p = q = base_path; /* Drop base path's tail */
5383 (p = strchr(p, '/'));
5384 q = ++p)
5387 len = q - base_path;
5389 else
5390 len = 0;
5392 if(len + strlen(rel_path) < sizeof(tmp)-1){
5393 if(len)
5394 snprintf(path = tmp, sizeof(tmp), "%.*s", len, base_path);
5396 strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5397 tmp[sizeof(tmp)-1] = '\0';
5399 /* Follow RFC 1808 "Step 6" */
5400 for(p = tmp; (p = strchr(p, '.')); )
5401 switch(*(p+1)){
5403 * a) All occurrences of "./", where "." is a
5404 * complete path segment, are removed.
5406 case '/' :
5407 if(p > tmp)
5408 for(q = p; (*q = *(q+2)) != '\0'; q++)
5410 else
5411 p++;
5413 break;
5416 * b) If the path ends with "." as a
5417 * complete path segment, that "." is
5418 * removed.
5420 case '\0' :
5421 if(p == tmp || *(p-1) == '/')
5422 *p = '\0';
5423 else
5424 p++;
5426 break;
5429 * c) All occurrences of "<segment>/../",
5430 * where <segment> is a complete path
5431 * segment not equal to "..", are removed.
5432 * Removal of these path segments is
5433 * performed iteratively, removing the
5434 * leftmost matching pattern on each
5435 * iteration, until no matching pattern
5436 * remains.
5438 * d) If the path ends with "<segment>/..",
5439 * where <segment> is a complete path
5440 * segment not equal to "..", that
5441 * "<segment>/.." is removed.
5443 case '.' :
5444 if(p > tmp + 1){
5445 for(q = p - 2; q > tmp && *q != '/'; q--)
5448 if(*q == '/')
5449 q++;
5451 if(q + 1 == p /* no "//.." */
5452 || (*q == '.' /* and "../.." */
5453 && *(q+1) == '.'
5454 && *(q+2) == '/')){
5455 p += 2;
5456 break;
5459 switch(*(p+2)){
5460 case '/' :
5461 len = (p - q) + 3;
5462 p = q;
5463 for(; (*q = *(q+len)) != '\0'; q++)
5466 break;
5468 case '\0':
5469 *(p = q) = '\0';
5470 break;
5472 default:
5473 p += 2;
5474 break;
5477 else
5478 p += 2;
5480 break;
5482 default :
5483 p++;
5484 break;
5487 else
5488 path = ""; /* lame. */
5491 else{
5492 path = base_path;
5493 if(!parms){
5494 parms = base_parms;
5495 if(!query)
5496 query = base_query;
5502 len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5503 + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5504 + (query ? strlen(query) : 0) + (frag ? strlen(frag ) : 0) + 8;
5506 h->h.url.path = (char *) fs_get(len * sizeof(char));
5507 snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5508 scheme ? scheme : "", scheme ? ":" : "",
5509 net ? "//" : "", net ? net : "",
5510 (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5511 path ? path : "",
5512 parms ? ";" : "", parms ? parms : "",
5513 query ? "?" : "", query ? query : "",
5514 frag ? "#" : "", frag ? frag : "");
5516 if(base_scheme)
5517 fs_give((void **) &base_scheme);
5519 if(base_net_loc)
5520 fs_give((void **) &base_net_loc);
5522 if(base_path)
5523 fs_give((void **) &base_path);
5525 if(base_parms)
5526 fs_give((void **) &base_parms);
5528 if(base_query)
5529 fs_give((void **) &base_query);
5531 if(base_frag)
5532 fs_give((void **) &base_frag);
5534 if(rel_scheme)
5535 fs_give((void **) &rel_scheme);
5537 if(rel_net_loc)
5538 fs_give((void **) &rel_net_loc);
5540 if(rel_parms)
5541 fs_give((void **) &rel_parms);
5543 if(rel_query)
5544 fs_give((void **) &rel_query);
5546 if(rel_frag)
5547 fs_give((void **) &rel_frag);
5549 if(rel_path)
5550 fs_give((void **) &rel_path);
5555 * html_href_relative - href
5558 html_href_relative(char *url)
5560 int i;
5562 if(url)
5563 for(i = 0; i < 32 && url[i]; i++)
5564 if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5565 if(url[i] == ':')
5566 return(FALSE);
5567 else
5568 break;
5571 return(TRUE);
5576 * HTML <UL> (Unordered List) element handler
5579 html_ul(HANDLER_S *hd, int ch, int cmd)
5581 if(cmd == GF_DATA){
5582 html_handoff(hd, ch);
5584 else if(cmd == GF_RESET){
5585 if(PASS_HTML(hd->html_data)){
5586 html_output_raw_tag(hd->html_data, "ul");
5588 else{
5589 HD(hd->html_data)->li_pending = 1;
5590 html_blank(hd->html_data, 0);
5593 else if(cmd == GF_EOD){
5594 if(PASS_HTML(hd->html_data)){
5595 html_output_string(hd->html_data, "</ul>");
5597 else{
5598 html_blank(hd->html_data, 0);
5600 if(!HD(hd->html_data)->li_pending)
5601 html_indent(hd->html_data, -4, HTML_ID_INC);
5602 else
5603 HD(hd->html_data)->li_pending = 0;
5607 return(1); /* get linked */
5612 * HTML <OL> (Ordered List) element handler
5615 html_ol(HANDLER_S *hd, int ch, int cmd)
5617 if(cmd == GF_DATA){
5618 html_handoff(hd, ch);
5620 else if(cmd == GF_RESET){
5621 if(PASS_HTML(hd->html_data)){
5622 html_output_raw_tag(hd->html_data, "ol");
5624 else{
5626 * Signal that we're expecting to see <LI> as our next elemnt
5627 * and set the the initial ordered count.
5629 HD(hd->html_data)->li_pending = 1;
5630 hd->x = 1L;
5631 html_blank(hd->html_data, 0);
5634 else if(cmd == GF_EOD){
5635 if(PASS_HTML(hd->html_data)){
5636 html_output_string(hd->html_data, "</ol>");
5638 else{
5639 html_blank(hd->html_data, 0);
5641 if(!HD(hd->html_data)->li_pending)
5642 html_indent(hd->html_data, -4, HTML_ID_INC);
5643 else
5644 HD(hd->html_data)->li_pending = 0;
5648 return(1); /* get linked */
5653 * HTML <MENU> (Menu List) element handler
5656 html_menu(HANDLER_S *hd, int ch, int cmd)
5658 if(cmd == GF_DATA){
5659 html_handoff(hd, ch);
5661 else if(cmd == GF_RESET){
5662 if(PASS_HTML(hd->html_data)){
5663 html_output_raw_tag(hd->html_data, "menu");
5665 else{
5666 HD(hd->html_data)->li_pending = 1;
5669 else if(cmd == GF_EOD){
5670 if(PASS_HTML(hd->html_data)){
5671 html_output_string(hd->html_data, "</menu>");
5673 else{
5674 html_blank(hd->html_data, 0);
5676 if(!HD(hd->html_data)->li_pending)
5677 html_indent(hd->html_data, -4, HTML_ID_INC);
5678 else
5679 HD(hd->html_data)->li_pending = 0;
5683 return(1); /* get linked */
5688 * HTML <DIR> (Directory List) element handler
5691 html_dir(HANDLER_S *hd, int ch, int cmd)
5693 if(cmd == GF_DATA){
5694 html_handoff(hd, ch);
5696 else if(cmd == GF_RESET){
5697 if(PASS_HTML(hd->html_data)){
5698 html_output_raw_tag(hd->html_data, "dir");
5700 else{
5701 HD(hd->html_data)->li_pending = 1;
5704 else if(cmd == GF_EOD){
5705 if(PASS_HTML(hd->html_data)){
5706 html_output_string(hd->html_data, "</dir>");
5708 else{
5709 html_blank(hd->html_data, 0);
5711 if(!HD(hd->html_data)->li_pending)
5712 html_indent(hd->html_data, -4, HTML_ID_INC);
5713 else
5714 HD(hd->html_data)->li_pending = 0;
5718 return(1); /* get linked */
5723 * HTML <LI> (List Item) element handler
5726 html_li(HANDLER_S *hd, int ch, int cmd)
5728 if(cmd == GF_DATA){
5729 if(PASS_HTML(hd->html_data)){
5730 html_handoff(hd, ch);
5733 else if(cmd == GF_RESET){
5734 HANDLER_S *p, *found = NULL;
5737 * There better be a an unordered list, ordered list,
5738 * Menu or Directory handler installed
5739 * or else we crap out...
5741 for(p = HANDLERS(hd->html_data); p; p = p->below)
5742 if(EL(p)->handler == html_ul
5743 || EL(p)->handler == html_ol
5744 || EL(p)->handler == html_menu
5745 || EL(p)->handler == html_dir){
5746 found = p;
5747 break;
5750 if(found){
5751 if(PASS_HTML(hd->html_data)){
5753 else{
5754 char buf[8], *p;
5755 int wrapstate;
5757 /* Start a new line */
5758 html_blank(hd->html_data, 0);
5760 /* adjust indent level if needed */
5761 if(HD(hd->html_data)->li_pending){
5762 html_indent(hd->html_data, 4, HTML_ID_INC);
5763 HD(hd->html_data)->li_pending = 0;
5766 if(EL(found)->handler == html_ul){
5767 int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5769 strncpy(buf, " ", sizeof(buf));
5770 buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5772 else if(EL(found)->handler == html_ol)
5773 snprintf(buf, sizeof(buf), "%2ld.", found->x++);
5774 else if(EL(found)->handler == html_menu){
5775 strncpy(buf, " ->", sizeof(buf));
5776 buf[sizeof(buf)-1] = '\0';
5779 html_indent(hd->html_data, -4, HTML_ID_INC);
5781 /* So we don't munge whitespace */
5782 wrapstate = HD(hd->html_data)->wrapstate;
5783 HD(hd->html_data)->wrapstate = 0;
5785 html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5786 for(p = buf; *p; p++)
5787 html_output(hd->html_data, (int) *p);
5789 HD(hd->html_data)->wrapstate = wrapstate;
5790 html_indent(hd->html_data, 4, HTML_ID_INC);
5792 /* else BUG: should really bitch about this */
5795 if(PASS_HTML(hd->html_data)){
5796 html_output_raw_tag(hd->html_data, "li");
5797 return(1); /* get linked */
5800 else if(cmd == GF_EOD){
5801 if(PASS_HTML(hd->html_data)){
5802 html_output_string(hd->html_data, "</li>");
5806 return(PASS_HTML(hd->html_data)); /* DON'T get linked */
5811 * HTML <DL> (Definition List) element handler
5814 html_dl(HANDLER_S *hd, int ch, int cmd)
5816 if(cmd == GF_DATA){
5817 html_handoff(hd, ch);
5819 else if(cmd == GF_RESET){
5820 if(PASS_HTML(hd->html_data)){
5821 html_output_raw_tag(hd->html_data, "dl");
5823 else{
5825 * Set indention level for definition terms and definitions...
5827 hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5828 hd->y = hd->x + 2;
5829 hd->z = hd->y + 4;
5832 else if(cmd == GF_EOD){
5833 if(PASS_HTML(hd->html_data)){
5834 html_output_string(hd->html_data, "</dl>");
5836 else{
5837 html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5838 html_blank(hd->html_data, 1);
5842 return(1); /* get linked */
5847 * HTML <DT> (Definition Term) element handler
5850 html_dt(HANDLER_S *hd, int ch, int cmd)
5852 if(PASS_HTML(hd->html_data)){
5853 if(cmd == GF_DATA){
5854 html_handoff(hd, ch);
5856 else if(cmd == GF_RESET){
5857 html_output_raw_tag(hd->html_data, "dt");
5859 else if(cmd == GF_EOD){
5860 html_output_string(hd->html_data, "</dt>");
5863 return(1); /* get linked */
5866 if(cmd == GF_RESET){
5867 HANDLER_S *p;
5870 * There better be a Definition Handler installed
5871 * or else we crap out...
5873 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5876 if(p){ /* adjust indent level if needed */
5877 html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5878 html_blank(hd->html_data, 1);
5880 /* BUG: else should really bitch about this */
5883 return(0); /* DON'T get linked */
5888 * HTML <DD> (Definition Definition) element handler
5891 html_dd(HANDLER_S *hd, int ch, int cmd)
5893 if(PASS_HTML(hd->html_data)){
5894 if(cmd == GF_DATA){
5895 html_handoff(hd, ch);
5897 else if(cmd == GF_RESET){
5898 html_output_raw_tag(hd->html_data, "dd");
5900 else if(cmd == GF_EOD){
5901 html_output_string(hd->html_data, "</dd>");
5904 return(1); /* get linked */
5907 if(cmd == GF_RESET){
5908 HANDLER_S *p;
5911 * There better be a Definition Handler installed
5912 * or else we crap out...
5914 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5917 if(p){ /* adjust indent level if needed */
5918 html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5919 html_blank(hd->html_data, 0);
5921 /* BUG: should really bitch about this */
5924 return(0); /* DON'T get linked */
5929 * HTML <H1> (Headings 1) element handler.
5931 * Bold, very-large font, CENTERED. One or two blank lines
5932 * above and below. For our silly character cell's that
5933 * means centered and ALL CAPS...
5936 html_h1(HANDLER_S *hd, int ch, int cmd)
5938 if(cmd == GF_DATA){
5939 html_handoff(hd, ch);
5941 else if(cmd == GF_RESET){
5942 if(PASS_HTML(hd->html_data)){
5943 html_output_raw_tag(hd->html_data, "h1");
5945 else{
5946 /* turn ON the centered bit */
5947 CENTER_BIT(hd->html_data) = 1;
5950 else if(cmd == GF_EOD){
5951 if(PASS_HTML(hd->html_data)){
5952 html_output_string(hd->html_data, "</h1>");
5954 else{
5955 /* turn OFF the centered bit, add blank line */
5956 CENTER_BIT(hd->html_data) = 0;
5957 html_blank(hd->html_data, 1);
5961 return(1); /* get linked */
5966 * HTML <H2> (Headings 2) element handler
5969 html_h2(HANDLER_S *hd, int ch, int cmd)
5971 if(cmd == GF_DATA){
5972 if(PASS_HTML(hd->html_data)){
5973 html_handoff(hd, ch);
5975 else{
5976 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
5977 HTML_ULINE(hd->html_data, 1);
5978 hd->x ^= HTML_HX_ULINE; /* only once! */
5981 html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
5982 ? toupper((unsigned char) ch) : ch);
5985 else if(cmd == GF_RESET){
5986 if(PASS_HTML(hd->html_data)){
5987 html_output_raw_tag(hd->html_data, "h2");
5989 else{
5991 * Bold, large font, flush-left. One or two blank lines
5992 * above and below.
5994 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
5995 hd->x = HTML_HX_CENTER;
5996 else
5997 hd->x = 0;
5999 hd->x |= HTML_HX_ULINE;
6001 CENTER_BIT(hd->html_data) = 0;
6002 hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6003 hd->z = HD(hd->html_data)->wrapcol;
6004 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6005 html_blank(hd->html_data, 1);
6008 else if(cmd == GF_EOD){
6009 if(PASS_HTML(hd->html_data)){
6010 html_output_string(hd->html_data, "</h2>");
6012 else{
6014 * restore previous centering, and indent level
6016 if(!(hd->x & HTML_HX_ULINE))
6017 HTML_ULINE(hd->html_data, 0);
6019 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6020 html_blank(hd->html_data, 1);
6021 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6022 HD(hd->html_data)->wrapcol = hd->z;
6026 return(1); /* get linked */
6031 * HTML <H3> (Headings 3) element handler
6034 html_h3(HANDLER_S *hd, int ch, int cmd)
6036 if(cmd == GF_DATA){
6037 if(!PASS_HTML(hd->html_data)){
6038 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6039 HTML_ULINE(hd->html_data, 1);
6040 hd->x ^= HTML_HX_ULINE; /* only once! */
6044 html_handoff(hd, ch);
6046 else if(cmd == GF_RESET){
6047 if(PASS_HTML(hd->html_data)){
6048 html_output_raw_tag(hd->html_data, "h3");
6050 else{
6052 * Italic, large font, slightly indented from the left
6053 * margin. One or two blank lines above and below.
6055 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6056 hd->x = HTML_HX_CENTER;
6057 else
6058 hd->x = 0;
6060 hd->x |= HTML_HX_ULINE;
6061 CENTER_BIT(hd->html_data) = 0;
6062 hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6063 hd->z = HD(hd->html_data)->wrapcol;
6064 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6065 html_blank(hd->html_data, 1);
6068 else if(cmd == GF_EOD){
6069 if(PASS_HTML(hd->html_data)){
6070 html_output_string(hd->html_data, "</h3>");
6072 else{
6074 * restore previous centering, and indent level
6076 if(!(hd->x & HTML_HX_ULINE))
6077 HTML_ULINE(hd->html_data, 0);
6079 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6080 html_blank(hd->html_data, 1);
6081 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6082 HD(hd->html_data)->wrapcol = hd->z;
6086 return(1); /* get linked */
6091 * HTML <H4> (Headings 4) element handler
6094 html_h4(HANDLER_S *hd, int ch, int cmd)
6096 if(cmd == GF_DATA){
6097 html_handoff(hd, ch);
6099 else if(cmd == GF_RESET){
6100 if(PASS_HTML(hd->html_data)){
6101 html_output_raw_tag(hd->html_data, "h4");
6103 else{
6105 * Bold, normal font, indented more than H3. One blank line
6106 * above and below.
6108 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6109 CENTER_BIT(hd->html_data) = 0;
6110 hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6111 hd->z = HD(hd->html_data)->wrapcol;
6112 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6113 html_blank(hd->html_data, 1);
6116 else if(cmd == GF_EOD){
6117 if(PASS_HTML(hd->html_data)){
6118 html_output_string(hd->html_data, "</h4>");
6120 else{
6122 * restore previous centering, and indent level
6124 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6125 html_blank(hd->html_data, 1);
6126 CENTER_BIT(hd->html_data) = hd->x;
6127 HD(hd->html_data)->wrapcol = hd->z;
6131 return(1); /* get linked */
6136 * HTML <H5> (Headings 5) element handler
6139 html_h5(HANDLER_S *hd, int ch, int cmd)
6141 if(cmd == GF_DATA){
6142 html_handoff(hd, ch);
6144 else if(cmd == GF_RESET){
6145 if(PASS_HTML(hd->html_data)){
6146 html_output_raw_tag(hd->html_data, "h5");
6148 else{
6150 * Italic, normal font, indented as H4. One blank line
6151 * above.
6153 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6154 CENTER_BIT(hd->html_data) = 0;
6155 hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6156 hd->z = HD(hd->html_data)->wrapcol;
6157 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6158 html_blank(hd->html_data, 1);
6161 else if(cmd == GF_EOD){
6162 if(PASS_HTML(hd->html_data)){
6163 html_output_string(hd->html_data, "</h5>");
6165 else{
6167 * restore previous centering, and indent level
6169 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6170 html_blank(hd->html_data, 1);
6171 CENTER_BIT(hd->html_data) = hd->x;
6172 HD(hd->html_data)->wrapcol = hd->z;
6176 return(1); /* get linked */
6181 * HTML <H6> (Headings 6) element handler
6184 html_h6(HANDLER_S *hd, int ch, int cmd)
6186 if(cmd == GF_DATA){
6187 html_handoff(hd, ch);
6189 else if(cmd == GF_RESET){
6190 if(PASS_HTML(hd->html_data)){
6191 html_output_raw_tag(hd->html_data, "h6");
6193 else{
6195 * Bold, indented same as normal text, more than H5. One
6196 * blank line above.
6198 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6199 CENTER_BIT(hd->html_data) = 0;
6200 hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6201 hd->z = HD(hd->html_data)->wrapcol;
6202 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6203 html_blank(hd->html_data, 1);
6206 else if(cmd == GF_EOD){
6207 if(PASS_HTML(hd->html_data)){
6208 html_output_string(hd->html_data, "</h6>");
6210 else{
6212 * restore previous centering, and indent level
6214 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6215 html_blank(hd->html_data, 1);
6216 CENTER_BIT(hd->html_data) = hd->x;
6217 HD(hd->html_data)->wrapcol = hd->z;
6221 return(1); /* get linked */
6226 * HTML <BlockQuote> element handler
6229 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6231 int j;
6232 #define HTML_BQ_INDENT 6
6234 if(cmd == GF_DATA){
6235 html_handoff(hd, ch);
6237 else if(cmd == GF_RESET){
6238 if(PASS_HTML(hd->html_data)){
6239 html_output_raw_tag(hd->html_data, "blockquote");
6241 else{
6243 * A typical rendering might be a slight extra left and
6244 * right indent, and/or italic font. The Blockquote element
6245 * causes a paragraph break, and typically provides space
6246 * above and below the quote.
6248 html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6249 j = HD(hd->html_data)->wrapstate;
6250 HD(hd->html_data)->wrapstate = 0;
6251 html_blank(hd->html_data, 1);
6252 HD(hd->html_data)->wrapstate = j;
6253 HD(hd->html_data)->wrapcol -= HTML_BQ_INDENT;
6256 else if(cmd == GF_EOD){
6257 if(PASS_HTML(hd->html_data)){
6258 html_output_string(hd->html_data, "</blockquote>");
6260 else{
6261 html_blank(hd->html_data, 1);
6263 j = HD(hd->html_data)->wrapstate;
6264 HD(hd->html_data)->wrapstate = 0;
6265 html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6266 HD(hd->html_data)->wrapstate = j;
6267 HD(hd->html_data)->wrapcol += HTML_BQ_INDENT;
6271 return(1); /* get linked */
6276 * HTML <Address> element handler
6279 html_address(HANDLER_S *hd, int ch, int cmd)
6281 int j;
6282 #define HTML_ADD_INDENT 2
6284 if(cmd == GF_DATA){
6285 html_handoff(hd, ch);
6287 else if(cmd == GF_RESET){
6288 if(PASS_HTML(hd->html_data)){
6289 html_output_raw_tag(hd->html_data, "address");
6291 else{
6293 * A typical rendering might be a slight extra left and
6294 * right indent, and/or italic font. The Blockquote element
6295 * causes a paragraph break, and typically provides space
6296 * above and below the quote.
6298 html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6299 j = HD(hd->html_data)->wrapstate;
6300 HD(hd->html_data)->wrapstate = 0;
6301 html_blank(hd->html_data, 1);
6302 HD(hd->html_data)->wrapstate = j;
6305 else if(cmd == GF_EOD){
6306 if(PASS_HTML(hd->html_data)){
6307 html_output_string(hd->html_data, "</address>");
6309 else{
6310 html_blank(hd->html_data, 1);
6312 j = HD(hd->html_data)->wrapstate;
6313 HD(hd->html_data)->wrapstate = 0;
6314 html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6315 HD(hd->html_data)->wrapstate = j;
6319 return(1); /* get linked */
6324 * HTML <PRE> (Preformatted Text) element handler
6327 html_pre(HANDLER_S *hd, int ch, int cmd)
6329 if(cmd == GF_DATA){
6331 * remove CRLF after '>' in element.
6332 * We see CRLF because wrapstate is off.
6334 switch(hd->y){
6335 case 2 :
6336 if(ch == '\012'){
6337 hd->y = 3;
6338 return(1);
6340 else
6341 html_handoff(hd, '\015');
6343 break;
6345 case 1 :
6346 if(ch == '\015'){
6347 hd->y = 2;
6348 return(1);
6351 case 3 :
6352 /* passing tags? replace CRLF with <BR> to make
6353 * sure hard newline survives in the end...
6355 if(PASS_HTML(hd->html_data))
6356 hd->y = 4; /* keep looking for CRLF */
6357 else
6358 hd->y = 0; /* stop looking */
6360 break;
6362 case 4 :
6363 if(ch == '\015'){
6364 hd->y = 5;
6365 return(1);
6368 break;
6370 case 5 :
6371 hd->y = 4;
6372 if(ch == '\012'){
6373 html_output_string(hd->html_data, "<br />");
6374 return(1);
6376 else
6377 html_handoff(hd, '\015'); /* not CRLF, pass raw CR */
6379 break;
6381 default : /* zero case */
6382 break;
6385 html_handoff(hd, ch);
6387 else if(cmd == GF_RESET){
6388 hd->y = 1;
6389 if(PASS_HTML(hd->html_data)){
6390 html_output_raw_tag(hd->html_data, "pre");
6392 else{
6393 if(hd->html_data)
6394 hd->html_data->f1 = DFL; \
6396 html_blank(hd->html_data, 1);
6397 hd->x = HD(hd->html_data)->wrapstate;
6398 HD(hd->html_data)->wrapstate = 0;
6401 else if(cmd == GF_EOD){
6402 if(PASS_HTML(hd->html_data)){
6403 html_output_string(hd->html_data, "</pre>");
6405 else{
6406 HD(hd->html_data)->wrapstate = (hd->x != 0);
6407 html_blank(hd->html_data, 0);
6411 return(1);
6416 * HTML <CENTER> (Centerd Text) element handler
6419 html_center(HANDLER_S *hd, int ch, int cmd)
6421 if(cmd == GF_DATA){
6422 html_handoff(hd, ch);
6424 else if(cmd == GF_RESET){
6425 if(PASS_HTML(hd->html_data)){
6426 html_output_raw_tag(hd->html_data, "center");
6428 else{
6429 /* turn ON the centered bit */
6430 CENTER_BIT(hd->html_data) = 1;
6433 else if(cmd == GF_EOD){
6434 if(PASS_HTML(hd->html_data)){
6435 html_output_string(hd->html_data, "</center>");
6437 else{
6438 /* turn OFF the centered bit */
6439 CENTER_BIT(hd->html_data) = 0;
6443 return(1);
6448 * HTML <DIV> (Document Divisions) element handler
6451 html_div(HANDLER_S *hd, int ch, int cmd)
6453 if(cmd == GF_DATA){
6454 html_handoff(hd, ch);
6456 else if(cmd == GF_RESET){
6457 if(PASS_HTML(hd->html_data)){
6458 html_output_raw_tag(hd->html_data, "div");
6460 else{
6461 PARAMETER *p;
6463 for(p = HD(hd->html_data)->el_data->attribs;
6464 p && p->attribute;
6465 p = p->next)
6466 if(!strucmp(p->attribute, "ALIGN")){
6467 if(p->value){
6468 /* remember previous values */
6469 hd->x = CENTER_BIT(hd->html_data);
6470 hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6472 html_blank(hd->html_data, 0);
6473 CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6474 html_indent(hd->html_data, 0, HTML_ID_SET);
6475 /* NOTE: "RIGHT" not supported yet */
6480 else if(cmd == GF_EOD){
6481 if(PASS_HTML(hd->html_data)){
6482 html_output_string(hd->html_data, "</div>");
6484 else{
6485 /* restore centered bit and indentiousness */
6486 CENTER_BIT(hd->html_data) = hd->y;
6487 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6488 html_blank(hd->html_data, 0);
6492 return(1);
6497 * HTML <SPAN> (Text Span) element handler
6500 html_span(HANDLER_S *hd, int ch, int cmd)
6502 if(PASS_HTML(hd->html_data)){
6503 if(cmd == GF_DATA){
6504 html_handoff(hd, ch);
6506 else if(cmd == GF_RESET){
6507 html_output_raw_tag(hd->html_data, "span");
6509 else if(cmd == GF_EOD){
6510 html_output_string(hd->html_data, "</span>");
6513 return(1);
6516 return(0);
6521 * HTML <KBD> (Text Kbd) element handler
6524 html_kbd(HANDLER_S *hd, int ch, int cmd)
6526 if(PASS_HTML(hd->html_data)){
6527 if(cmd == GF_DATA){
6528 html_handoff(hd, ch);
6530 else if(cmd == GF_RESET){
6531 html_output_raw_tag(hd->html_data, "kbd");
6533 else if(cmd == GF_EOD){
6534 html_output_string(hd->html_data, "</kbd>");
6537 return(1);
6540 return(0);
6545 * HTML <DFN> (Text Definition) element handler
6548 html_dfn(HANDLER_S *hd, int ch, int cmd)
6550 if(PASS_HTML(hd->html_data)){
6551 if(cmd == GF_DATA){
6552 html_handoff(hd, ch);
6554 else if(cmd == GF_RESET){
6555 html_output_raw_tag(hd->html_data, "dfn");
6557 else if(cmd == GF_EOD){
6558 html_output_string(hd->html_data, "</dfn>");
6561 return(1);
6564 return(0);
6569 * HTML <TT> (Text Tt) element handler
6572 html_tt(HANDLER_S *hd, int ch, int cmd)
6574 if(PASS_HTML(hd->html_data)){
6575 if(cmd == GF_DATA){
6576 html_handoff(hd, ch);
6578 else if(cmd == GF_RESET){
6579 html_output_raw_tag(hd->html_data, "tt");
6581 else if(cmd == GF_EOD){
6582 html_output_string(hd->html_data, "</tt>");
6585 return(1);
6588 return(0);
6593 * HTML <VAR> (Text Var) element handler
6596 html_var(HANDLER_S *hd, int ch, int cmd)
6598 if(PASS_HTML(hd->html_data)){
6599 if(cmd == GF_DATA){
6600 html_handoff(hd, ch);
6602 else if(cmd == GF_RESET){
6603 html_output_raw_tag(hd->html_data, "var");
6605 else if(cmd == GF_EOD){
6606 html_output_string(hd->html_data, "</var>");
6609 return(1);
6612 return(0);
6617 * HTML <SAMP> (Text Samp) element handler
6620 html_samp(HANDLER_S *hd, int ch, int cmd)
6622 if(PASS_HTML(hd->html_data)){
6623 if(cmd == GF_DATA){
6624 html_handoff(hd, ch);
6626 else if(cmd == GF_RESET){
6627 html_output_raw_tag(hd->html_data, "samp");
6629 else if(cmd == GF_EOD){
6630 html_output_string(hd->html_data, "</samp>");
6633 return(1);
6636 return(0);
6641 * HTML <SUP> (Text Superscript) element handler
6644 html_sup(HANDLER_S *hd, int ch, int cmd)
6646 if(PASS_HTML(hd->html_data)){
6647 if(cmd == GF_DATA){
6648 html_handoff(hd, ch);
6650 else if(cmd == GF_RESET){
6651 html_output_raw_tag(hd->html_data, "sup");
6653 else if(cmd == GF_EOD){
6654 html_output_string(hd->html_data, "</sup>");
6657 return(1);
6660 return(0);
6665 * HTML <SUB> (Text Subscript) element handler
6668 html_sub(HANDLER_S *hd, int ch, int cmd)
6670 if(PASS_HTML(hd->html_data)){
6671 if(cmd == GF_DATA){
6672 html_handoff(hd, ch);
6674 else if(cmd == GF_RESET){
6675 html_output_raw_tag(hd->html_data, "sub");
6677 else if(cmd == GF_EOD){
6678 html_output_string(hd->html_data, "</sub>");
6681 return(1);
6684 return(0);
6689 * HTML <CITE> (Text Citation) element handler
6692 html_cite(HANDLER_S *hd, int ch, int cmd)
6694 if(PASS_HTML(hd->html_data)){
6695 if(cmd == GF_DATA){
6696 html_handoff(hd, ch);
6698 else if(cmd == GF_RESET){
6699 html_output_raw_tag(hd->html_data, "cite");
6701 else if(cmd == GF_EOD){
6702 html_output_string(hd->html_data, "</cite>");
6705 return(1);
6708 return(0);
6713 * HTML <CODE> (Text Code) element handler
6716 html_code(HANDLER_S *hd, int ch, int cmd)
6718 if(PASS_HTML(hd->html_data)){
6719 if(cmd == GF_DATA){
6720 html_handoff(hd, ch);
6722 else if(cmd == GF_RESET){
6723 html_output_raw_tag(hd->html_data, "code");
6725 else if(cmd == GF_EOD){
6726 html_output_string(hd->html_data, "</code>");
6729 return(1);
6732 return(0);
6737 * HTML <INS> (Text Inserted) element handler
6740 html_ins(HANDLER_S *hd, int ch, int cmd)
6742 if(PASS_HTML(hd->html_data)){
6743 if(cmd == GF_DATA){
6744 html_handoff(hd, ch);
6746 else if(cmd == GF_RESET){
6747 html_output_raw_tag(hd->html_data, "ins");
6749 else if(cmd == GF_EOD){
6750 html_output_string(hd->html_data, "</ins>");
6753 return(1);
6756 return(0);
6761 * HTML <DEL> (Text Deleted) element handler
6764 html_del(HANDLER_S *hd, int ch, int cmd)
6766 if(PASS_HTML(hd->html_data)){
6767 if(cmd == GF_DATA){
6768 html_handoff(hd, ch);
6770 else if(cmd == GF_RESET){
6771 html_output_raw_tag(hd->html_data, "del");
6773 else if(cmd == GF_EOD){
6774 html_output_string(hd->html_data, "</del>");
6777 return(1);
6780 return(0);
6785 * HTML <ABBR> (Text Abbreviation) element handler
6788 html_abbr(HANDLER_S *hd, int ch, int cmd)
6790 if(PASS_HTML(hd->html_data)){
6791 if(cmd == GF_DATA){
6792 html_handoff(hd, ch);
6794 else if(cmd == GF_RESET){
6795 html_output_raw_tag(hd->html_data, "abbr");
6797 else if(cmd == GF_EOD){
6798 html_output_string(hd->html_data, "</abbr>");
6801 return(1);
6804 return(0);
6809 * HTML <SCRIPT> element handler
6812 html_script(HANDLER_S *hd, int ch, int cmd)
6814 /* Link in and drop everything within on the floor */
6815 return(1);
6820 * HTML <APPLET> element handler
6823 html_applet(HANDLER_S *hd, int ch, int cmd)
6825 /* Link in and drop everything within on the floor */
6826 return(1);
6831 * HTML <STYLE> CSS element handler
6834 html_style(HANDLER_S *hd, int ch, int cmd)
6836 static STORE_S *css_stuff ;
6838 if(PASS_HTML(hd->html_data)){
6839 if(cmd == GF_DATA){
6840 /* collect style settings */
6841 so_writec(ch, css_stuff);
6843 else if(cmd == GF_RESET){
6844 if(css_stuff)
6845 so_give(&css_stuff);
6847 css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6849 else if(cmd == GF_EOD){
6851 * TODO: strip anything mischievous and pass on
6854 so_give(&css_stuff);
6858 return(1);
6862 * RSS 2.0 <RSS> version
6865 rss_rss(HANDLER_S *hd, int ch, int cmd)
6867 if(cmd == GF_RESET){
6868 PARAMETER *p;
6870 for(p = HD(hd->html_data)->el_data->attribs;
6871 p && p->attribute;
6872 p = p->next)
6873 if(!strucmp(p->attribute, "VERSION")){
6874 if(p->value && !strucmp(p->value,"2.0"))
6875 return(0); /* do not link in */
6878 gf_error("Incompatible RSS version");
6879 /* NO RETURN */
6882 return(0); /* not linked or error means we never get here */
6886 * RSS 2.0 <CHANNEL>
6889 rss_channel(HANDLER_S *hd, int ch, int cmd)
6891 if(cmd == GF_DATA){
6892 html_handoff(hd, ch);
6894 else if(cmd == GF_RESET){
6895 RSS_FEED_S *feed;
6897 feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6898 memset(feed, 0, sizeof(RSS_FEED_S));
6901 return(1); /* link in */
6905 * RSS 2.0 <TITLE>
6908 rss_title(HANDLER_S *hd, int ch, int cmd)
6910 static STORE_S *title_so;
6912 if(cmd == GF_DATA){
6913 /* collect data */
6914 if(title_so){
6915 so_writec(ch, title_so);
6918 else if(cmd == GF_RESET){
6919 if(RSS_FEED(hd->html_data)){
6920 /* prepare for data */
6921 if(title_so)
6922 so_give(&title_so);
6924 title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6927 else if(cmd == GF_EOD){
6928 if(title_so){
6929 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6930 RSS_ITEM_S *rip;
6932 if(feed){
6933 if((rip = feed->items) != NULL){
6934 for(; rip->next; rip = rip->next)
6937 if(rip->title)
6938 fs_give((void **) &rip->title);
6940 rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6942 else{
6943 if(feed->title)
6944 fs_give((void **) &feed->title);
6946 feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6950 so_give(&title_so);
6954 return(1); /* link in */
6958 * RSS 2.0 <IMAGE>
6961 rss_image(HANDLER_S *hd, int ch, int cmd)
6963 static STORE_S *img_so;
6965 if(cmd == GF_DATA){
6966 /* collect data */
6967 if(img_so){
6968 so_writec(ch, img_so);
6971 else if(cmd == GF_RESET){
6972 if(RSS_FEED(hd->html_data)){
6973 /* prepare to collect data */
6974 if(img_so)
6975 so_give(&img_so);
6977 img_so = so_get(CharStar, NULL, EDIT_ACCESS);
6980 else if(cmd == GF_EOD){
6981 if(img_so){
6982 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6984 if(feed){
6985 if(feed->image)
6986 fs_give((void **) &feed->image);
6988 feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
6991 so_give(&img_so);
6995 return(1); /* link in */
6999 * RSS 2.0 <LINK>
7002 rss_link(HANDLER_S *hd, int ch, int cmd)
7004 static STORE_S *link_so;
7006 if(cmd == GF_DATA){
7007 /* collect data */
7008 if(link_so){
7009 so_writec(ch, link_so);
7012 else if(cmd == GF_RESET){
7013 if(RSS_FEED(hd->html_data)){
7014 /* prepare to collect data */
7015 if(link_so)
7016 so_give(&link_so);
7018 link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7021 else if(cmd == GF_EOD){
7022 if(link_so){
7023 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7024 RSS_ITEM_S *rip;
7026 if(feed){
7027 if((rip = feed->items) != NULL){
7028 for(; rip->next; rip = rip->next)
7031 if(rip->link)
7032 fs_give((void **) &rip->link);
7034 rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7036 else{
7037 if(feed->link)
7038 fs_give((void **) &feed->link);
7040 feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7044 so_give(&link_so);
7048 return(1); /* link in */
7052 * RSS 2.0 <DESCRIPTION>
7055 rss_description(HANDLER_S *hd, int ch, int cmd)
7057 static STORE_S *desc_so;
7059 if(cmd == GF_DATA){
7060 /* collect data */
7061 if(desc_so){
7062 so_writec(ch, desc_so);
7065 else if(cmd == GF_RESET){
7066 if(RSS_FEED(hd->html_data)){
7067 /* prepare to collect data */
7068 if(desc_so)
7069 so_give(&desc_so);
7071 desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7074 else if(cmd == GF_EOD){
7075 if(desc_so){
7076 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7077 RSS_ITEM_S *rip;
7079 if(feed){
7080 if((rip = feed->items) != NULL){
7081 for(; rip->next; rip = rip->next)
7084 if(rip->description)
7085 fs_give((void **) &rip->description);
7087 rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7089 else{
7090 if(feed->description)
7091 fs_give((void **) &feed->description);
7093 feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7097 so_give(&desc_so);
7101 return(1); /* link in */
7105 * RSS 2.0 <TTL> (in minutes)
7108 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7110 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7112 if(cmd == GF_DATA){
7113 if(isdigit((unsigned char) ch))
7114 feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7116 else if(cmd == GF_RESET){
7117 /* prepare to collect data */
7118 feed->ttl = 0;
7120 else if(cmd == GF_EOD){
7123 return(1); /* link in */
7127 * RSS 2.0 <ITEM>
7130 rss_item(HANDLER_S *hd, int ch, int cmd)
7132 /* BUG: verify no ITEM nesting? */
7133 if(cmd == GF_RESET){
7134 RSS_FEED_S *feed;
7136 if((feed = RSS_FEED(hd->html_data)) != NULL){
7137 RSS_ITEM_S **rip;
7138 int n = 0;
7140 for(rip = &feed->items; *rip; rip = &(*rip)->next)
7141 if(++n > RSS_ITEM_LIMIT)
7142 return(0);
7144 *rip = fs_get(sizeof(RSS_ITEM_S));
7145 memset(*rip, 0, sizeof(RSS_ITEM_S));
7149 return(0); /* don't link in */
7153 char *
7154 rss_skip_whitespace(char *s)
7156 for(; *s && isspace((unsigned char) *s); s++)
7159 return(s);
7164 * return the function associated with the given element name
7166 ELPROP_S *
7167 element_properties(FILTER_S *fd, char *el_name)
7169 register ELPROP_S *el_table = ELEMENTS(fd);
7171 for(; el_table->element; el_table++)
7172 if(!strucmp(el_name, el_table->element))
7173 return(el_table);
7175 return(NULL);
7180 * collect element's name and any attribute/value pairs then
7181 * dispatch to the appropriate handler.
7183 * Returns 1 : got what we wanted
7184 * 0 : we need more data
7185 * -1 : bogus input
7188 html_element_collector(FILTER_S *fd, int ch)
7190 if(ch == '>'){
7191 if(ED(fd)->overrun){
7193 * If problem processing, don't bother doing anything
7194 * internally, just return such that none of what we've
7195 * digested is displayed.
7197 HTML_DEBUG_EL("too long", ED(fd));
7198 return(1); /* Let it go, Jim */
7200 else if(ED(fd)->mkup_decl){
7201 if(ED(fd)->badform){
7202 dprint((2, "-- html error: bad form: %.*s\n",
7203 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7205 * Invalid comment -- make some guesses as
7206 * to whether we should stop with this greater-than...
7208 if(ED(fd)->buf[0] != '-'
7209 || ED(fd)->len < 4
7210 || (ED(fd)->buf[1] == '-'
7211 && ED(fd)->buf[ED(fd)->len - 1] == '-'
7212 && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7213 return(1);
7215 else{
7216 dprint((5, "-- html: OK: %.*s\n",
7217 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7218 if(ED(fd)->start_comment == ED(fd)->end_comment){
7219 if(ED(fd)->len > 10){
7220 ED(fd)->buf[ED(fd)->len - 2] = '\0';
7221 html_element_comment(fd, ED(fd)->buf + 2);
7224 return(1);
7226 /* else keep collecting comment below */
7229 else if(ED(fd)->proc_inst){
7230 return(1); /* return without display... */
7232 else if(!ED(fd)->quoted || ED(fd)->badform){
7233 ELPROP_S *ep;
7236 * We either have the whole thing or all that we could
7237 * salvage from it. Try our best...
7240 if(HD(fd)->bitbucket)
7241 return(1); /* element inside chtml clause! */
7243 if(!ED(fd)->badform && html_element_flush(ED(fd)))
7244 return(1); /* return without display... */
7247 * If we ran into an empty tag or we don't know how to deal
7248 * with it, just go on, ignoring it...
7250 if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7251 if(ep->handler){
7252 /* dispatch the element's handler */
7253 HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7254 if(ED(fd)->end_tag){
7255 html_pop(fd, ep); /* remove it's handler */
7257 else{
7258 /* if a block element, pop any open <p>'s */
7259 if(ep->blocklevel){
7260 HANDLER_S *tp;
7262 for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7263 HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7264 html_pop(fd, EL(tp));
7265 break;
7269 /* enforce table nesting */
7270 if(!strucmp(ep->element, "tr")){
7271 if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7272 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7273 if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7274 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7275 html_pop(fd, EL(HANDLERS(fd)));
7277 else{
7278 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7279 html_push(fd, element_properties(fd, "table"));
7283 else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7284 if(!HANDLERS(fd)){
7285 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7286 html_push(fd, element_properties(fd, "table"));
7287 html_push(fd, element_properties(fd, "tr"));
7289 else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7290 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7291 html_push(fd, element_properties(fd, "tr"));
7293 else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7294 dprint((2, "-- html error: bad nesting popping <TD>"));
7295 html_pop(fd, EL(HANDLERS(fd)));
7299 /* add it's handler */
7300 if(html_push(fd, ep)){
7301 if(ED(fd)->empty){
7302 /* remove empty element */
7303 html_pop(fd, ep);
7308 else {
7309 HTML_DEBUG_EL("IGNORED", ED(fd));
7312 else{ /* else, empty or unrecognized */
7313 HTML_DEBUG_EL("?", ED(fd));
7316 return(1); /* all done! see, that didn't hurt */
7319 else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7320 ED(fd)->empty = 1;
7322 else
7323 ED(fd)->empty = 0;
7325 if(ED(fd)->mkup_decl){
7326 if((ch &= 0xff) == '-'){
7327 if(ED(fd)->hyphen){
7328 ED(fd)->hyphen = 0;
7329 if(ED(fd)->start_comment)
7330 ED(fd)->end_comment = 1;
7331 else
7332 ED(fd)->start_comment = 1;
7334 else
7335 ED(fd)->hyphen = 1;
7337 else{
7338 if(ED(fd)->end_comment)
7339 ED(fd)->start_comment = ED(fd)->end_comment = 0;
7342 * no "--" after ! or non-whitespace between comments - bad
7344 if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7345 && !ASCII_ISSPACE((unsigned char) ch)))
7346 ED(fd)->badform = 1; /* non-comment! */
7348 ED(fd)->hyphen = 0;
7352 * Remember the comment for possible later processing, if
7353 * it get's too long, remember first and last few chars
7354 * so we know when to terminate (and throw some garbage
7355 * in between when we toss out what's between.
7357 if(ED(fd)->len == HTML_BUF_LEN){
7358 ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7359 ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7360 ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7361 ED(fd)->len = 6;
7364 ED(fd)->buf[(ED(fd)->len)++] = ch;
7365 return(0); /* comments go in the bit bucket */
7367 else if(ED(fd)->overrun || ED(fd)->badform){
7368 return(0); /* swallow char's until next '>' */
7370 else if(!ED(fd)->element && !ED(fd)->len){
7371 if(ch == '/'){ /* validate leading chars */
7372 ED(fd)->end_tag = 1;
7373 return(0);
7375 else if(ch == '!'){
7376 ED(fd)->mkup_decl = 1;
7377 return(0);
7379 else if(ch == '?'){
7380 ED(fd)->proc_inst = 1;
7381 return(0);
7383 else if(!isalpha((unsigned char) ch))
7384 return(-1); /* can't be a tag! */
7386 else if(ch == '\"' || ch == '\''){
7387 if(!ED(fd)->hit_equal){
7388 ED(fd)->badform = 1; /* quote in element name?!? */
7389 return(0);
7392 if(ED(fd)->quoted){
7393 if(ED(fd)->quoted == (char) ch){
7394 /* end of a quoted value */
7395 ED(fd)->quoted = 0;
7396 if(ED(fd)->len && html_element_flush(ED(fd)))
7397 ED(fd)->badform = 1;
7399 return(0); /* continue collecting chars */
7401 /* ELSE fall thru writing other quoting char */
7403 else{
7404 ED(fd)->quoted = (char) ch;
7405 ED(fd)->was_quoted = 1;
7406 return(0); /* need more data */
7410 ch &= 0xff; /* strip any "literal" high bits */
7411 if(ED(fd)->quoted
7412 || isalnum(ch)
7413 || strchr("#-.!", ch)){
7414 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7415 ? HTML_BUF_LEN:MAX_ELEMENT)){
7416 ED(fd)->buf[(ED(fd)->len)++] = ch;
7418 else
7419 ED(fd)->overrun = 1; /* flag it broken */
7421 else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7422 if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7423 ED(fd)->badform = 1;
7424 return(0); /* else, we ain't done yet */
7427 if(!ED(fd)->hit_equal)
7428 ED(fd)->hit_equal = (ch == '=');
7430 else
7431 ED(fd)->badform = 1; /* unrecognized data?? */
7433 return(0); /* keep collecting */
7438 * Element collector found complete string, integrate it and reset
7439 * internal collection buffer.
7441 * Returns zero if element collection buffer flushed, error flag otherwise
7444 html_element_flush(CLCTR_S *el_data)
7446 int rv = 0;
7448 if(el_data->hit_equal){ /* adding a value */
7449 el_data->hit_equal = 0;
7450 if(el_data->cur_attrib){
7451 if(!el_data->cur_attrib->value){
7452 el_data->cur_attrib->value = cpystr(el_data->len
7453 ? el_data->buf : "");
7455 else{
7456 dprint((2, "** element: unexpected value: %.10s...\n",
7457 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7458 rv = 1;
7461 else{
7462 dprint((2, "** element: missing attribute name: %.10s...\n",
7463 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7464 rv = 2;
7467 else if(el_data->len){
7468 if(!el_data->element){
7469 el_data->element = cpystr(el_data->buf);
7471 else{
7472 PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7473 memset(p, 0, sizeof(PARAMETER));
7474 if(el_data->attribs){
7475 el_data->cur_attrib->next = p;
7476 el_data->cur_attrib = p;
7478 else
7479 el_data->attribs = el_data->cur_attrib = p;
7481 p->attribute = cpystr(el_data->buf);
7486 el_data->was_quoted = 0; /* reset collector buf and state */
7487 el_data->len = 0;
7488 memset(el_data->buf, 0, HTML_BUF_LEN);
7489 return(rv); /* report whatever happened above */
7494 * html_element_comment - "Special" comment handling here
7496 void
7497 html_element_comment(FILTER_S *f, char *s)
7499 char *p;
7501 while(*s && ASCII_ISSPACE((unsigned char) *s))
7502 s++;
7505 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7507 if(!struncmp(s, "chtml ", 6)){
7508 s += 6;
7509 if(!struncmp(s, "if ", 3)){
7510 HD(f)->bitbucket = 1; /* default is failure! */
7511 switch(*(s += 3)){
7512 case 'P' :
7513 case 'p' :
7514 if(!struncmp(s + 1, "inemode=", 8)){
7515 if(!strucmp(s = removing_quotes(s + 9), "function_key")
7516 && F_ON(F_USE_FK, ps_global))
7517 HD(f)->bitbucket = 0;
7518 else if(!strucmp(s, "running"))
7519 HD(f)->bitbucket = 0;
7520 else if(!strucmp(s, "phone_home") && ps_global->phone_home)
7521 HD(f)->bitbucket = 0;
7522 #ifdef _WINDOWS
7523 else if(!strucmp(s, "os_windows"))
7524 HD(f)->bitbucket = 0;
7525 #endif
7528 break;
7530 case '[' : /* test */
7531 if((p = strindex(++s, ']')) != NULL){
7532 *p = '\0'; /* tie off test string */
7533 removing_leading_white_space(s);
7534 removing_trailing_white_space(s);
7535 if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7536 for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7540 HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7541 READ_ACCESS) != 0);
7545 break;
7547 default :
7548 break;
7551 else if(!strucmp(s, "else")){
7552 HD(f)->bitbucket = !HD(f)->bitbucket;
7554 else if(!strucmp(s, "endif")){
7555 /* Clean up after chtml here */
7556 HD(f)->bitbucket = 0;
7559 else if(!HD(f)->bitbucket){
7560 if(!struncmp(s, "#include ", 9)){
7561 char buf[MAILTMPLEN], *bufp;
7562 int len, end_of_line;
7563 FILE *fp;
7565 /* Include the named file */
7566 if(!struncmp(s += 9, "file=", 5)
7567 && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7568 html_element_output(f, HTML_NEWLINE);
7570 while(fgets(buf, sizeof(buf), fp)){
7571 if((len = strlen(buf)) && buf[len-1] == '\n'){
7572 end_of_line = 1;
7573 buf[--len] = '\0';
7575 else
7576 end_of_line = 0;
7578 for(bufp = buf; len; bufp++, len--)
7579 html_element_output(f, (int) *bufp);
7581 if(end_of_line)
7582 html_element_output(f, HTML_NEWLINE);
7585 fclose(fp);
7586 html_element_output(f, HTML_NEWLINE);
7587 HD(f)->blanks = 0;
7588 if(f->f1 == WSPACE)
7589 f->f1 = DFL;
7592 else if(!struncmp(s, "#echo ", 6)){
7593 if(!struncmp(s += 6, "var=", 4)){
7594 char *p, buf[MAILTMPLEN];
7595 ADDRESS *adr;
7596 extern char datestamp[];
7598 if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7599 p = ALPINE_VERSION;
7601 else if(!strcmp(s, "ALPINE_REVISION")){
7602 p = get_alpine_revision_string(buf, sizeof(buf));
7604 else if(!strcmp(s, "C_CLIENT_VERSION")){
7605 p = CCLIENTVERSION;
7607 else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7608 p = datestamp;
7610 else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7611 rfc822_date(p = buf);
7613 else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7614 p = (ps_global->VAR_LOCAL_FULLNAME
7615 && ps_global->VAR_LOCAL_FULLNAME[0])
7616 ? ps_global->VAR_LOCAL_FULLNAME
7617 : "Local Support";
7619 else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7620 p = (ps_global->VAR_LOCAL_ADDRESS
7621 && ps_global->VAR_LOCAL_ADDRESS[0])
7622 ? ps_global->VAR_LOCAL_ADDRESS
7623 : "postmaster";
7624 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7625 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7626 mail_free_address(&adr);
7628 else if(!strcmp(s, "_BUGS_FULLNAME_")){
7629 p = (ps_global->VAR_BUGS_FULLNAME
7630 && ps_global->VAR_BUGS_FULLNAME[0])
7631 ? ps_global->VAR_BUGS_FULLNAME
7632 : "Place to report Alpine Bugs";
7634 else if(!strcmp(s, "_BUGS_ADDRESS_")){
7635 p = (ps_global->VAR_BUGS_ADDRESS
7636 && ps_global->VAR_BUGS_ADDRESS[0])
7637 ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7638 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7639 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7640 mail_free_address(&adr);
7642 else if(!strcmp(s, "CURRENT_DIR")){
7643 getcwd(p = buf, sizeof(buf));
7645 else if(!strcmp(s, "HOME_DIR")){
7646 p = ps_global->home_dir;
7648 else if(!strcmp(s, "PINE_CONF_PATH")){
7649 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7650 p = "/usr/local/lib/pine.conf";
7651 #else
7652 p = SYSTEM_PINERC;
7653 #endif
7655 else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7656 #ifdef SYSTEM_PINERC_FIXED
7657 p = SYSTEM_PINERC_FIXED;
7658 #else
7659 p = "/usr/local/lib/pine.conf.fixed";
7660 #endif
7662 else if(!strcmp(s, "PINE_INFO_PATH")){
7663 p = SYSTEM_PINE_INFO_PATH;
7665 else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7666 p = sysinbox();
7668 else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7669 /* Don't put the leading /tmp/. */
7670 int i, j;
7672 p = sysinbox();
7673 if(p){
7674 for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7675 if(p[i] == '/')
7676 buf[j++] = '\\';
7677 else
7678 buf[j++] = p[i];
7680 buf[j++] = '\0';
7681 p = buf;
7684 else if(!struncmp(s, "VAR_", 4)){
7685 p = s+4;
7686 if(pith_opt_pretty_var_name)
7687 p = (*pith_opt_pretty_var_name)(p);
7689 else if(!struncmp(s, "FEAT_", 5)){
7690 p = s+5;
7691 if(pith_opt_pretty_feature_name)
7692 p = (*pith_opt_pretty_feature_name)(p, -1);
7694 else
7695 p = NULL;
7697 if(p){
7698 if(f->f1 == WSPACE){
7699 html_element_output(f, ' ');
7700 f->f1 = DFL; /* clear it */
7703 while(*p)
7704 html_element_output(f, (int) *p++);
7712 void
7713 html_element_output(FILTER_S *f, int ch)
7715 if(HANDLERS(f))
7716 (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7717 else
7718 html_output(f, ch);
7723 * collect html entity and return its UCS value when done.
7725 * Returns HTML_MOREDATA : we need more data
7726 * HTML_ENTITY : entity collected
7727 * HTML_BADVALUE : good data, but no named match or out of range
7728 * HTML_BADDATA : invalid input
7730 * NOTES:
7731 * - entity format is "'&' tag ';'" and represents a literal char
7732 * - named entities are CASE SENSITIVE.
7733 * - numeric char references (where the tag is prefixed with a '#')
7734 * are a char with that numbers value
7735 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7738 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7740 static int len = 0;
7741 static char buf[MAX_ENTITY+2];
7742 int rv, i;
7744 if(len == MAX_ENTITY){
7745 rv = HTML_BADDATA;
7747 else if((len == 0)
7748 ? (isalpha((unsigned char) ch) || ch == '#')
7749 : ((isdigit((unsigned char) ch)
7750 || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7751 buf[len++] = ch;
7752 return(HTML_MOREDATA);
7754 else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7755 buf[len] = '\0'; /* got something! */
7756 if(buf[0] == '#'){
7757 *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7758 if(alt){
7759 *alt = NULL;
7760 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7761 if(entity_tab[i].value == *ucs){
7762 *alt = entity_tab[i].plain;
7763 break;
7767 len = 0;
7768 return(HTML_ENTITY);
7770 else{
7771 rv = HTML_BADVALUE; /* in case of no match */
7772 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7773 if(strcmp(entity_tab[i].name, buf) == 0){
7774 *ucs = entity_tab[i].value;
7775 if(alt)
7776 *alt = entity_tab[i].plain;
7778 len = 0;
7779 return(HTML_ENTITY);
7783 else
7784 rv = HTML_BADDATA; /* bogus input! */
7786 if(alt){
7787 buf[len] = '\0';
7788 *alt = buf;
7791 len = 0;
7792 return(rv);
7796 /*----------------------------------------------------------------------
7797 HTML text to plain text filter
7799 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7800 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7801 formatting.
7803 ----*/
7804 void
7805 gf_html2plain(FILTER_S *f, int flg)
7807 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7808 if(flg == GF_DATA){
7809 register int c;
7810 GF_INIT(f, f->next);
7812 if(!HTML_WROTE(f)){
7813 int ii;
7815 for(ii = HTML_INDENT(f); ii > 0; ii--)
7816 html_putc(f, ' ');
7818 HTML_WROTE(f) = 1;
7821 while(GF_GETC(f, c)){
7823 * First we have to collect any literal entities...
7824 * that is, IF we're not already collecting one
7825 * AND we're not in element's text or, if we are, we're
7826 * not in quoted text. Whew.
7828 if(f->t){
7829 char *alt = NULL;
7830 UCS ucs;
7832 switch(html_entity_collector(f, c, &ucs, &alt)){
7833 case HTML_MOREDATA: /* more data required? */
7834 continue; /* go get another char */
7836 case HTML_BADVALUE :
7837 case HTML_BADDATA :
7838 /* if supplied, process bogus data */
7839 HTML_PROC(f, '&');
7840 for(; *alt; alt++){
7841 unsigned int uic = *alt;
7842 HTML_PROC(f, uic);
7845 if(c == '&' && !HD(f)->quoted){
7846 f->t = '&';
7847 continue;
7849 else
7850 f->t = 0; /* don't come back next time */
7852 break;
7854 default : /* thing to process */
7855 f->t = 0; /* don't come back */
7858 * do something with UCS codepoint. If it's
7859 * not displayable then use the alt version
7860 * otherwise
7861 * cvt UCS to UTF-8 and toss into next filter.
7863 if(ucs > 127 && wcellwidth(ucs) < 0){
7864 if(alt){
7865 for(; *alt; alt++){
7866 c = MAKE_LITERAL(*alt);
7867 HTML_PROC(f, c);
7870 continue;
7872 else
7873 c = MAKE_LITERAL('?');
7875 else{
7876 unsigned char utf8buf[8], *p1, *p2;
7878 p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7879 for(; p1 < p2; p1++){
7880 c = MAKE_LITERAL(*p1);
7881 HTML_PROC(f, c);
7884 continue;
7887 break;
7890 else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7891 f->t = '&';
7892 continue;
7896 * then we process whatever we got...
7899 HTML_PROC(f, c);
7902 GF_OP_END(f); /* clean up our input pointers */
7904 else if(flg == GF_EOD){
7905 while(HANDLERS(f)){
7906 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7907 html_pop(f, EL(HANDLERS(f)));
7910 html_output(f, HTML_NEWLINE);
7911 if(ULINE_BIT(f))
7912 HTML_ULINE(f, ULINE_BIT(f) = 0);
7914 if(BOLD_BIT(f))
7915 HTML_BOLD(f, BOLD_BIT(f) = 0);
7917 HTML_FLUSH(f);
7918 fs_give((void **)&f->line);
7919 if(HD(f)->color)
7920 free_color_pair(&HD(f)->color);
7922 fs_give(&f->data);
7923 if(f->opt){
7924 if(((HTML_OPT_S *)f->opt)->base)
7925 fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
7927 fs_give(&f->opt);
7930 (*f->next->f)(f->next, GF_DATA);
7931 (*f->next->f)(f->next, GF_EOD);
7933 else if(flg == GF_RESET){
7934 dprint((9, "-- gf_reset html2plain\n"));
7935 f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
7936 memset(f->data, 0, sizeof(HTML_DATA_S));
7937 /* start with flowing text */
7938 HD(f)->wrapstate = !PASS_HTML(f);
7939 HD(f)->wrapcol = WRAP_COLS(f);
7940 f->f1 = DFL; /* state */
7941 f->f2 = 0; /* chars in wrap buffer */
7942 f->n = 0L; /* chars on line so far */
7943 f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
7944 HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
7945 HD(f)->alt_entity = (!ps_global->display_charmap
7946 || strucmp(ps_global->display_charmap, "iso-8859-1"));
7947 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
7954 * html_indent - do the requested indent level function with appropriate
7955 * flushing and such.
7957 * Returns: indent level prior to set/increment
7960 html_indent(FILTER_S *f, int val, int func)
7962 int old = HD(f)->indent_level;
7964 /* flush pending data at old indent level */
7965 switch(func){
7966 case HTML_ID_INC :
7967 html_output_flush(f);
7968 if((HD(f)->indent_level += val) < 0)
7969 HD(f)->indent_level = 0;
7971 break;
7973 case HTML_ID_SET :
7974 html_output_flush(f);
7975 HD(f)->indent_level = val;
7976 break;
7978 default :
7979 break;
7982 return(old);
7988 * html_blanks - Insert n blank lines into output
7990 void
7991 html_blank(FILTER_S *f, int n)
7993 /* Cap off any flowing text, and then write blank lines */
7994 if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
7995 html_output(f, HTML_NEWLINE);
7997 if(HD(f)->wrapstate)
7998 while(HD(f)->blanks < n) /* blanks inc'd by HTML_NEWLINE */
7999 html_output(f, HTML_NEWLINE);
8005 * html_newline -- insert a newline mindful of embedded tags
8007 void
8008 html_newline(FILTER_S *f)
8010 html_write_newline(f); /* commit an actual newline */
8012 if(f->n){ /* and keep track of blank lines */
8013 HD(f)->blanks = 0;
8014 f->n = 0L;
8016 else
8017 HD(f)->blanks++;
8022 * output the given char, handling any requested wrapping.
8023 * It's understood that all whitespace handed us is written. In other
8024 * words, junk whitespace is weeded out before it's given to us here.
8027 void
8028 html_output(FILTER_S *f, int ch)
8030 UCS uc;
8031 int width;
8032 void (*o_f)(FILTER_S *, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8035 * if ch is a control token, just pass it on, else, collect
8036 * utf8-encoded characters to determine width,then feed into
8037 * output routines
8039 if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8040 (*o_f)(f, ch, 1);
8042 else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8043 unsigned char *cp;
8045 for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8046 (*o_f)(f, *cp, width);
8047 width = 0; /* only count it once */
8050 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8052 else
8053 HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8054 /* else do nothing until we have a full character */
8058 void
8059 html_output_string(FILTER_S *f, char *s)
8061 for(; *s; s++)
8062 html_output(f, *s);
8066 void
8067 html_output_raw_tag(FILTER_S *f, char *tag)
8069 PARAMETER *p;
8070 char *vp;
8071 int i;
8073 html_output(f, '<');
8074 html_output_string(f, tag);
8075 for(p = HD(f)->el_data->attribs;
8076 p && p->attribute;
8077 p = p->next){
8078 /* SECURITY: no javascript */
8079 /* PRIVACY: no img src without permission */
8080 /* BUGS: no class collisions since <head> ignored */
8081 if(html_event_attribute(p->attribute)
8082 || !strucmp(p->attribute, "class")
8083 || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8084 continue;
8086 /* PRIVACY: sniff out background images */
8087 if(p->value && !PASS_IMAGES(f)){
8088 if(!strucmp(p->attribute, "style")){
8089 if((vp = srchstr(p->value, "background-image")) != NULL){
8090 /* neuter in place */
8091 vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8093 else{
8094 for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8095 if(vp[10] == ' ' || vp[10] == ':')
8096 for(i = 11; vp[i] && vp[i] != ';'; i++)
8097 if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8098 || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8099 vp[0] = 'X';
8102 else if(!strucmp(p->attribute, "background")){
8103 char *ip;
8105 for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8108 if(ip)
8109 continue;
8113 html_output(f, ' ');
8114 html_output_string(f, p->attribute);
8115 if(p->value){
8116 html_output(f, '=');
8117 html_output(f, '\"');
8118 html_output_string(f, p->value);
8119 html_output(f, '\"');
8123 /* append warning to form submission */
8124 if(!strucmp(tag, "form")){
8125 html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8128 if(ED(f)->end_tag){
8129 html_output(f, ' ');
8130 html_output(f, '/');
8133 html_output(f, '>');
8138 html_event_attribute(char *attr)
8140 int i;
8141 static char *events[] = {
8142 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8143 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8144 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8145 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8148 if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8149 for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8150 if(!strucmp(attr, events[i]))
8151 return(TRUE);
8153 return(FALSE);
8157 void
8158 html_output_normal(FILTER_S *f, int ch, int width)
8160 if(HD(f)->centered){
8161 html_centered_flush(f);
8162 fs_give((void **) &HD(f)->centered->line.buf);
8163 fs_give((void **) &HD(f)->centered->word.buf);
8164 fs_give((void **) &HD(f)->centered);
8167 if(HD(f)->wrapstate){
8168 if(ch == HTML_NEWLINE){ /* hard newline */
8169 html_output_flush(f);
8170 html_newline(f);
8172 else
8173 HD(f)->blanks = 0; /* reset blank line counter */
8175 if(ch == TAG_EMBED){ /* takes up no space */
8176 HD(f)->embedded.state = -5;
8177 HTML_LINEP_PUTC(f, TAG_EMBED);
8179 else if(HD(f)->embedded.state){ /* ditto */
8180 if(HD(f)->embedded.state == -5){
8181 /* looking for specially handled tags following TAG_EMBED */
8182 if(ch == TAG_HANDLE)
8183 HD(f)->embedded.state = -1; /* next ch is length */
8184 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8185 if(!HD(f)->color)
8186 HD(f)->color = new_color_pair(NULL, NULL);
8188 if(ch == TAG_FGCOLOR)
8189 HD(f)->embedded.color = HD(f)->color->fg;
8190 else
8191 HD(f)->embedded.color = HD(f)->color->bg;
8193 HD(f)->embedded.state = RGBLEN;
8195 else
8196 HD(f)->embedded.state = 0; /* non-special */
8198 else if(HD(f)->embedded.state > 0){
8199 /* collecting up an RGBLEN color or length, ignore tags */
8200 (HD(f)->embedded.state)--;
8201 if(HD(f)->embedded.color)
8202 *HD(f)->embedded.color++ = ch;
8204 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8205 *HD(f)->embedded.color = '\0';
8206 HD(f)->embedded.color = NULL;
8209 else if(HD(f)->embedded.state < 0){
8210 HD(f)->embedded.state = ch; /* number of embedded chars */
8212 else{
8213 (HD(f)->embedded.state)--;
8214 if(HD(f)->embedded.color)
8215 *HD(f)->embedded.color++ = ch;
8217 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8218 *HD(f)->embedded.color = '\0';
8219 HD(f)->embedded.color = NULL;
8223 HTML_LINEP_PUTC(f, ch);
8225 else if(HTML_ISSPACE(ch)){
8226 html_output_flush(f);
8228 else{
8229 if(HD(f)->prefix)
8230 html_a_prefix(f);
8232 if((f->f2 += width) + 1 >= WRAP_COLS(f)){
8233 HTML_LINEP_PUTC(f, ch & 0xff);
8234 HTML_FLUSH(f);
8235 html_newline(f);
8236 if(HD(f)->in_anchor)
8237 html_write_anchor(f, HD(f)->in_anchor);
8239 else
8240 HTML_LINEP_PUTC(f, ch & 0xff);
8243 else{
8244 if(HD(f)->prefix)
8245 html_a_prefix(f);
8247 html_output_flush(f);
8249 switch(HD(f)->embedded.state){
8250 case 0 :
8251 switch(ch){
8252 default :
8254 * It's difficult to both preserve whitespace and wrap at the
8255 * same time so we'll do a dumb wrap at the edge of the screen.
8256 * Since this shouldn't come up much in real life we'll hope
8257 * it is good enough.
8259 if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8260 html_newline(f);
8262 f->n += width; /* inc displayed char count */
8263 HD(f)->blanks = 0; /* reset blank line counter */
8264 html_putc(f, ch & 0xff);
8265 break;
8267 case TAG_EMBED : /* takes up no space */
8268 html_putc(f, TAG_EMBED);
8269 HD(f)->embedded.state = -2;
8270 break;
8272 case HTML_NEWLINE : /* newline handling */
8273 if(!f->n)
8274 break;
8276 case '\n' :
8277 html_newline(f);
8279 case '\r' :
8280 break;
8283 break;
8285 case -2 :
8286 HD(f)->embedded.state = 0;
8287 switch(ch){
8288 case TAG_HANDLE :
8289 HD(f)->embedded.state = -1; /* next ch is length */
8290 break;
8292 case TAG_BOLDON :
8293 BOLD_BIT(f) = 1;
8294 break;
8296 case TAG_BOLDOFF :
8297 BOLD_BIT(f) = 0;
8298 break;
8300 case TAG_ULINEON :
8301 ULINE_BIT(f) = 1;
8302 break;
8304 case TAG_ULINEOFF :
8305 ULINE_BIT(f) = 0;
8306 break;
8308 case TAG_FGCOLOR :
8309 if(!HD(f)->color)
8310 HD(f)->color = new_color_pair(NULL, NULL);
8312 HD(f)->embedded.color = HD(f)->color->fg;
8313 HD(f)->embedded.state = 11;
8314 break;
8316 case TAG_BGCOLOR :
8317 if(!HD(f)->color)
8318 HD(f)->color = new_color_pair(NULL, NULL);
8320 HD(f)->embedded.color = HD(f)->color->bg;
8321 HD(f)->embedded.state = 11;
8322 break;
8324 case TAG_HANDLEOFF :
8325 ch = TAG_INVOFF;
8326 HD(f)->in_anchor = 0;
8327 break;
8329 default :
8330 break;
8333 html_putc(f, ch);
8334 break;
8336 case -1 :
8337 HD(f)->embedded.state = ch; /* number of embedded chars */
8338 html_putc(f, ch);
8339 break;
8341 default :
8342 HD(f)->embedded.state--;
8343 if(HD(f)->embedded.color)
8344 *HD(f)->embedded.color++ = ch;
8346 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8347 *HD(f)->embedded.color = '\0';
8348 HD(f)->embedded.color = NULL;
8351 html_putc(f, ch);
8352 break;
8359 * flush any buffered chars waiting for wrapping.
8361 void
8362 html_output_flush(FILTER_S *f)
8364 if(f->f2){
8365 if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8366 html_newline(f); /* wrap? */
8368 if(f->n){ /* text already on the line? */
8369 html_putc(f, ' ');
8370 f->n++; /* increment count */
8372 else{
8373 /* write at start of new line */
8374 html_write_indent(f, HD(f)->indent_level);
8376 if(HD(f)->in_anchor)
8377 html_write_anchor(f, HD(f)->in_anchor);
8380 f->n += f->f2;
8381 HTML_FLUSH(f);
8388 * html_output_centered - managed writing centered text
8390 void
8391 html_output_centered(FILTER_S *f, int ch, int width)
8393 if(!HD(f)->centered){ /* new text? */
8394 html_output_flush(f);
8395 if(f->n) /* start on blank line */
8396 html_newline(f);
8398 HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8399 memset(HD(f)->centered, 0, sizeof(CENTER_S));
8400 /* and grab a buf to start collecting centered text */
8401 HD(f)->centered->line.len = WRAP_COLS(f);
8402 HD(f)->centered->line.buf = (char *) fs_get(HD(f)->centered->line.len
8403 * sizeof(char));
8404 HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8405 HD(f)->centered->word.len = 32;
8406 HD(f)->centered->word.buf = (char *) fs_get(HD(f)->centered->word.len
8407 * sizeof(char));
8408 HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8411 if(ch == HTML_NEWLINE){ /* hard newline */
8412 html_centered_flush(f);
8414 else if(ch == TAG_EMBED){ /* takes up no space */
8415 HD(f)->embedded.state = -5;
8416 html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8418 else if(HD(f)->embedded.state){
8419 if(HD(f)->embedded.state == -5){
8420 /* looking for specially handled tags following TAG_EMBED */
8421 if(ch == TAG_HANDLE)
8422 HD(f)->embedded.state = -1; /* next ch is length */
8423 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8424 if(!HD(f)->color)
8425 HD(f)->color = new_color_pair(NULL, NULL);
8427 if(ch == TAG_FGCOLOR)
8428 HD(f)->embedded.color = HD(f)->color->fg;
8429 else
8430 HD(f)->embedded.color = HD(f)->color->bg;
8432 HD(f)->embedded.state = RGBLEN;
8434 else
8435 HD(f)->embedded.state = 0; /* non-special */
8437 else if(HD(f)->embedded.state > 0){
8438 /* collecting up an RGBLEN color or length, ignore tags */
8439 (HD(f)->embedded.state)--;
8440 if(HD(f)->embedded.color)
8441 *HD(f)->embedded.color++ = ch;
8443 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8444 *HD(f)->embedded.color = '\0';
8445 HD(f)->embedded.color = NULL;
8448 else if(HD(f)->embedded.state < 0){
8449 HD(f)->embedded.state = ch; /* number of embedded chars */
8451 else{
8452 (HD(f)->embedded.state)--;
8453 if(HD(f)->embedded.color)
8454 *HD(f)->embedded.color++ = ch;
8456 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8457 *HD(f)->embedded.color = '\0';
8458 HD(f)->embedded.color = NULL;
8462 html_centered_putc(&HD(f)->centered->word, ch);
8464 else if(ASCII_ISSPACE((unsigned char) ch)){
8465 if(!HD(f)->centered->space++){ /* end of a word? flush! */
8466 int i;
8468 if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8469 html_centered_flush_line(f);
8470 /* fall thru to put current "word" on blank "line" */
8472 else if(HD(f)->centered->line.width){
8473 /* put space char between line and appended word */
8474 html_centered_putc(&HD(f)->centered->line, ' ');
8475 HD(f)->centered->line.width++;
8478 for(i = 0; i < HD(f)->centered->word.used; i++)
8479 html_centered_putc(&HD(f)->centered->line,
8480 HD(f)->centered->word.buf[i]);
8482 HD(f)->centered->line.width += HD(f)->centered->word.width;
8483 HD(f)->centered->word.used = 0;
8484 HD(f)->centered->word.width = 0;
8487 else{
8488 if(HD(f)->prefix)
8489 html_a_prefix(f);
8491 /* ch is start of next word */
8492 HD(f)->centered->space = 0;
8493 if(HD(f)->centered->word.width >= WRAP_COLS(f))
8494 html_centered_flush(f);
8496 html_centered_putc(&HD(f)->centered->word, ch);
8497 HD(f)->centered->word.width++;
8503 * html_centered_putc -- add given char to given WRAPLINE_S
8505 void
8506 html_centered_putc(WRAPLINE_S *wp, int ch)
8508 if(wp->used + 1 >= wp->len){
8509 wp->len += 64;
8510 fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8513 wp->buf[wp->used++] = ch;
8519 * html_centered_flush - finish writing any pending centered output
8521 void
8522 html_centered_flush(FILTER_S *f)
8524 int i;
8527 * If word present (what about line?) we need to deal with
8528 * appending it...
8530 if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8531 html_centered_flush_line(f);
8533 if(WRAPPED_LEN(f)){
8534 /* figure out how much to indent */
8535 if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8536 html_write_indent(f, i);
8538 if(HD(f)->centered->anchor)
8539 html_write_anchor(f, HD(f)->centered->anchor);
8541 html_centered_handle(&HD(f)->centered->anchor,
8542 HD(f)->centered->line.buf,
8543 HD(f)->centered->line.used);
8544 html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8546 if(HD(f)->centered->word.used){
8547 if(HD(f)->centered->line.width)
8548 html_putc(f, ' ');
8550 html_centered_handle(&HD(f)->centered->anchor,
8551 HD(f)->centered->word.buf,
8552 HD(f)->centered->word.used);
8553 html_write(f, HD(f)->centered->word.buf,
8554 HD(f)->centered->word.used);
8557 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8558 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8560 else{
8561 if(HD(f)->centered->word.used){
8562 html_write(f, HD(f)->centered->word.buf,
8563 HD(f)->centered->word.used);
8564 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8565 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8567 HD(f)->blanks++; /* advance the blank line counter */
8570 html_newline(f); /* finish the line */
8575 * html_centered_handle - scan the line for embedded handles
8577 void
8578 html_centered_handle(int *h, char *line, int len)
8580 int n;
8582 while(len-- > 0)
8583 if(*line++ == TAG_EMBED && len-- > 0)
8584 switch(*line++){
8585 case TAG_HANDLE :
8586 if((n = *line++) >= --len){
8587 *h = 0;
8588 len -= n;
8589 while(n--)
8590 *h = (*h * 10) + (*line++ - '0');
8592 break;
8594 case TAG_HANDLEOFF :
8595 case TAG_INVOFF :
8596 *h = 0; /* assumption 23,342: inverse off ends tags */
8597 break;
8599 default :
8600 break;
8607 * html_centered_flush_line - flush the centered "line" only
8609 void
8610 html_centered_flush_line(FILTER_S *f)
8612 if(HD(f)->centered->line.used){
8613 int i, j;
8615 /* hide "word" from flush */
8616 i = HD(f)->centered->word.used;
8617 j = HD(f)->centered->word.width;
8618 HD(f)->centered->word.used = 0;
8619 HD(f)->centered->word.width = 0;
8620 html_centered_flush(f);
8622 HD(f)->centered->word.used = i;
8623 HD(f)->centered->word.width = j;
8629 * html_write_indent - write indention mindful of display attributes
8631 void
8632 html_write_indent(FILTER_S *f, int indent)
8634 if(! STRIP(f)){
8635 if(BOLD_BIT(f)){
8636 html_putc(f, TAG_EMBED);
8637 html_putc(f, TAG_BOLDOFF);
8640 if(ULINE_BIT(f)){
8641 html_putc(f, TAG_EMBED);
8642 html_putc(f, TAG_ULINEOFF);
8646 f->n = indent;
8647 while(indent-- > 0)
8648 html_putc(f, ' '); /* indent as needed */
8651 * Resume any previous embedded state
8653 if(! STRIP(f)){
8654 if(BOLD_BIT(f)){
8655 html_putc(f, TAG_EMBED);
8656 html_putc(f, TAG_BOLDON);
8659 if(ULINE_BIT(f)){
8660 html_putc(f, TAG_EMBED);
8661 html_putc(f, TAG_ULINEON);
8670 void
8671 html_write_anchor(FILTER_S *f, int anchor)
8673 char buf[256];
8674 int i;
8676 html_putc(f, TAG_EMBED);
8677 html_putc(f, TAG_HANDLE);
8678 snprintf(buf, sizeof(buf), "%d", anchor);
8679 html_putc(f, (int) strlen(buf));
8681 for(i = 0; buf[i]; i++)
8682 html_putc(f, buf[i]);
8687 * html_write_newline - write a newline mindful of display attributes
8689 void
8690 html_write_newline(FILTER_S *f)
8692 int i;
8694 if(! STRIP(f)){ /* First tie, off any embedded state */
8695 if(HD(f)->in_anchor){
8696 html_putc(f, TAG_EMBED);
8697 html_putc(f, TAG_INVOFF);
8700 if(BOLD_BIT(f)){
8701 html_putc(f, TAG_EMBED);
8702 html_putc(f, TAG_BOLDOFF);
8705 if(ULINE_BIT(f)){
8706 html_putc(f, TAG_EMBED);
8707 html_putc(f, TAG_ULINEOFF);
8710 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8711 char *p;
8712 int i;
8714 p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8715 ps_global->VAR_NORM_BACK_COLOR);
8716 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8717 html_putc(f, p[i]);
8721 html_write(f, "\015\012", 2);
8722 for(i = HTML_INDENT(f); i > 0; i--)
8723 html_putc(f, ' ');
8725 if(! STRIP(f)){ /* First tie, off any embedded state */
8726 if(BOLD_BIT(f)){
8727 html_putc(f, TAG_EMBED);
8728 html_putc(f, TAG_BOLDON);
8731 if(ULINE_BIT(f)){
8732 html_putc(f, TAG_EMBED);
8733 html_putc(f, TAG_ULINEON);
8736 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8737 char *p, *tfg, *tbg;
8738 int i;
8739 COLOR_PAIR *tmp;
8741 tfg = HD(f)->color->fg;
8742 tbg = HD(f)->color->bg;
8743 tmp = new_color_pair(tfg[0] ? tfg
8744 : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8745 tbg[0] ? tbg
8746 : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8747 if(pico_is_good_colorpair(tmp)){
8748 p = color_embed(tfg[0] ? tfg
8749 : ps_global->VAR_NORM_FORE_COLOR,
8750 tbg[0] ? tbg
8751 : ps_global->VAR_NORM_BACK_COLOR);
8752 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8753 html_putc(f, p[i]);
8756 if(tmp)
8757 free_color_pair(&tmp);
8764 * html_write - write given n-length string to next filter
8766 void
8767 html_write(FILTER_S *f, char *s, int n)
8769 GF_INIT(f, f->next);
8771 while(n-- > 0){
8772 /* keep track of attribute state? Not if last char! */
8773 if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8774 GF_PUTC(f->next, TAG_EMBED);
8775 switch(*++s){
8776 case TAG_BOLDON :
8777 BOLD_BIT(f) = 1;
8778 break;
8779 case TAG_BOLDOFF :
8780 BOLD_BIT(f) = 0;
8781 break;
8782 case TAG_ULINEON :
8783 ULINE_BIT(f) = 1;
8784 break;
8785 case TAG_ULINEOFF :
8786 ULINE_BIT(f) = 0;
8787 break;
8788 case TAG_HANDLEOFF :
8789 HD(f)->in_anchor = 0;
8790 GF_PUTC(f->next, TAG_INVOFF);
8791 s++;
8792 continue;
8793 case TAG_HANDLE :
8794 if(n-- > 0){
8795 int i = *++s;
8797 GF_PUTC(f->next, TAG_HANDLE);
8798 if(i <= n){
8799 int anum = 0;
8800 HANDLE_S *h;
8802 n -= i;
8803 GF_PUTC(f->next, i);
8804 while(1){
8805 anum = (anum * 10) + (*++s - '0');
8806 if(--i)
8807 GF_PUTC(f->next, *s);
8808 else
8809 break;
8812 if(DO_HANDLES(f)
8813 && (h = get_handle(*HANDLESP(f), anum)) != NULL
8814 && (h->type == URL || h->type == Attach)){
8815 HD(f)->in_anchor = anum;
8820 break;
8821 default:
8822 break;
8826 GF_PUTC(f->next, (*s++) & 0xff);
8829 GF_IP_END(f->next); /* clean up next's input pointers */
8834 * html_putc -- actual work of writing to next filter.
8835 * NOTE: Small opt not using full GF_END since our input
8836 * pointers don't need adjusting.
8838 void
8839 html_putc(FILTER_S *f, int ch)
8841 GF_INIT(f, f->next);
8842 GF_PUTC(f->next, ch & 0xff);
8843 GF_IP_END(f->next); /* clean up next's input pointers */
8849 * Only current option is to turn on embedded data stripping for text
8850 * bound to a printer or composer.
8852 void *
8853 gf_html2plain_opt(char *base,
8854 int columns,
8855 int *margin,
8856 HANDLE_S **handlesp,
8857 htmlrisk_t risk_f,
8858 int flags)
8860 HTML_OPT_S *op;
8861 int margin_l, margin_r;
8863 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8865 op->base = cpystr(base);
8866 margin_l = (margin) ? margin[0] : 0;
8867 margin_r = (margin) ? margin[1] : 0;
8868 op->indent = margin_l;
8869 op->columns = columns - (margin_l + margin_r);
8870 op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8871 op->handlesp = handlesp;
8872 op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8873 op->showserver = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8874 op->warnrisk_f = risk_f;
8875 op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8876 op->related_content = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8877 op->html = ((flags & GFHP_HTML) == GFHP_HTML);
8878 op->html_imgs = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8879 op->element_table = html_element_table;
8880 return((void *) op);
8884 void *
8885 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
8887 HTML_OPT_S *op;
8889 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8890 memset(op, 0, sizeof(HTML_OPT_S));
8892 op->base = cpystr("");
8893 op->element_table = rss_element_table;
8894 *(op->feedp = feedp) = NULL;
8895 return((void *) op);
8898 void
8899 gf_html2plain_rss_free(RSS_FEED_S **feedp)
8901 if(feedp && *feedp){
8902 if((*feedp)->title)
8903 fs_give((void **) &(*feedp)->title);
8905 if((*feedp)->link)
8906 fs_give((void **) &(*feedp)->link);
8908 if((*feedp)->description)
8909 fs_give((void **) &(*feedp)->description);
8911 if((*feedp)->source)
8912 fs_give((void **) &(*feedp)->source);
8914 if((*feedp)->image)
8915 fs_give((void **) &(*feedp)->image);
8917 gf_html2plain_rss_free_items(&((*feedp)->items));
8918 fs_give((void **) feedp);
8922 void
8923 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
8925 if(itemp && *itemp){
8926 if((*itemp)->title)
8927 fs_give((void **) &(*itemp)->title);
8929 if((*itemp)->link)
8930 fs_give((void **) &(*itemp)->link);
8932 if((*itemp)->description)
8933 fs_give((void **) &(*itemp)->description);
8935 if((*itemp)->source)
8936 fs_give((void **) &(*itemp)->source);
8938 gf_html2plain_rss_free_items(&(*itemp)->next);
8939 fs_give((void **) itemp);
8944 /* END OF HTML-TO-PLAIN text filter */
8947 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
8948 * from the text stream.
8951 #define MAX_ESC_LEN 5
8954 * the simple filter, removes unknown escape codes from the stream
8956 void
8957 gf_escape_filter(FILTER_S *f, int flg)
8959 register char *p;
8960 GF_INIT(f, f->next);
8962 if(flg == GF_DATA){
8963 register unsigned char c;
8964 register int state = f->f1;
8966 while(GF_GETC(f, c)){
8968 if(state){
8969 if(c == '\033' || f->n == MAX_ESC_LEN){
8970 f->line[f->n] = '\0';
8971 f->n = 0L;
8972 if(!match_escapes(f->line)){
8973 GF_PUTC(f->next, '^');
8974 GF_PUTC(f->next, '[');
8976 else
8977 GF_PUTC(f->next, '\033');
8979 p = f->line;
8980 while(*p)
8981 GF_PUTC(f->next, *p++);
8983 if(c == '\033')
8984 continue;
8985 else
8986 state = 0; /* fall thru */
8988 else{
8989 f->line[f->n++] = c; /* collect */
8990 continue;
8994 if(c == '\033')
8995 state = 1;
8996 else
8997 GF_PUTC(f->next, c);
9000 f->f1 = state;
9001 GF_END(f, f->next);
9003 else if(flg == GF_EOD){
9004 if(f->f1){
9005 if(!match_escapes(f->line)){
9006 GF_PUTC(f->next, '^');
9007 GF_PUTC(f->next, '[');
9009 else
9010 GF_PUTC(f->next, '\033');
9013 for(p = f->line; f->n; f->n--, p++)
9014 GF_PUTC(f->next, *p);
9016 fs_give((void **)&(f->line)); /* free temp line buffer */
9017 (void) GF_FLUSH(f->next);
9018 (*f->next->f)(f->next, GF_EOD);
9020 else if(flg == GF_RESET){
9021 dprint((9, "-- gf_reset escape\n"));
9022 f->f1 = 0;
9023 f->n = 0L;
9024 f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9031 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9032 * corresponding string representations (you know, ^blah and such)...
9036 * the simple filter transforms unknown control characters in the stream
9037 * into harmless strings.
9039 void
9040 gf_control_filter(FILTER_S *f, int flg)
9042 GF_INIT(f, f->next);
9044 if(flg == GF_DATA){
9045 register unsigned char c;
9046 register int filt_only_c0;
9048 filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9050 while(GF_GETC(f, c)){
9052 if(((c < 0x20 || c == 0x7f)
9053 || (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9054 && !(ASCII_ISSPACE((unsigned char) c)
9055 || c == '\016' || c == '\017' || c == '\033')){
9056 GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9057 GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9059 else
9060 GF_PUTC(f->next, c);
9063 GF_END(f, f->next);
9065 else if(flg == GF_EOD){
9066 (void) GF_FLUSH(f->next);
9067 (*f->next->f)(f->next, GF_EOD);
9073 * function called from the outside to set
9074 * control filter's option, which says to filter C0 control characters
9075 * but not C1 control chars. We don't call it at all if we don't want
9076 * to filter C0 chars either.
9078 void *
9079 gf_control_filter_opt(int *filt_only_c0)
9081 return((void *) filt_only_c0);
9086 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9087 * This prevents the possibility of embedding other tags.
9088 * We assume that this filter should only be used for something
9089 * that is eventually writing to a display, which has the special
9090 * knowledge of quoted TAG_EMBEDs.
9092 void
9093 gf_tag_filter(FILTER_S *f, int flg)
9095 GF_INIT(f, f->next);
9097 if(flg == GF_DATA){
9098 register unsigned char c;
9100 while(GF_GETC(f, c)){
9102 if((c & 0xff) == (TAG_EMBED & 0xff)){
9103 GF_PUTC(f->next, TAG_EMBED);
9104 GF_PUTC(f->next, c);
9106 else
9107 GF_PUTC(f->next, c);
9110 GF_END(f, f->next);
9112 else if(flg == GF_EOD){
9113 (void) GF_FLUSH(f->next);
9114 (*f->next->f)(f->next, GF_EOD);
9120 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9121 * specified line width
9125 typedef struct wrap_col_s {
9126 unsigned bold:1;
9127 unsigned uline:1;
9128 unsigned inverse:1;
9129 unsigned tags:1;
9130 unsigned do_indent:1;
9131 unsigned on_comma:1;
9132 unsigned flowed:1;
9133 unsigned delsp:1;
9134 unsigned quoted:1;
9135 unsigned allwsp:1;
9136 unsigned hard_nl:1;
9137 unsigned leave_flowed:1;
9138 unsigned use_color:1;
9139 unsigned hdr_color:1;
9140 unsigned for_compose:1;
9141 unsigned handle_soft_hyphen:1;
9142 unsigned saw_soft_hyphen:1;
9143 unsigned trailing_space:1;
9144 unsigned char utf8buf[7];
9145 unsigned char *utf8bufp;
9146 COLOR_PAIR *color;
9147 STORE_S *spaces;
9148 short embedded,
9149 space_len;
9150 char *lineendp;
9151 int anchor,
9152 prefbrk,
9153 prefbrkn,
9154 quote_depth,
9155 quote_count,
9156 sig,
9157 state,
9158 wrap_col,
9159 wrap_max,
9160 margin_l,
9161 margin_r,
9162 indent;
9163 char special[256];
9164 } WRAP_S;
9166 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9167 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9168 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9169 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9170 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9171 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9172 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9173 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9174 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9175 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9176 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9177 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9178 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9179 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9180 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9181 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9182 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9183 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9184 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9185 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9186 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9187 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9188 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9189 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9190 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9191 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9192 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9193 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9194 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9195 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9196 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9197 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9198 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9199 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9200 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9201 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9202 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9203 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9204 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9205 #define WRAP_PUTC(F,C,W) { \
9206 if((F)->linep == WRAP_LASTC(F)){ \
9207 size_t offset = (F)->linep - (F)->line; \
9208 fs_resize((void **) &(F)->line, \
9209 (2 * offset) * sizeof(char)); \
9210 (F)->linep = &(F)->line[offset]; \
9211 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9213 *(F)->linep++ = (C); \
9214 (F)->f2 += (W); \
9217 #define WRAP_EMBED_PUTC(F,C) { \
9218 if((F)->f2){ \
9219 WRAP_PUTC((F), C, 0); \
9221 else \
9222 so_writec(C, WRAP_SPACES(F)); \
9225 #define WRAP_COLOR_UNSET(F) { \
9226 if(WRAP_COLOR_SET(F)){ \
9227 WRAP_COLOR(F)->fg[0] = '\0'; \
9232 * wrap_flush_embed flags
9234 #define WFE_NONE 0 /* Nothing special */
9235 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9238 int wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9239 int wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9240 unsigned char **, unsigned char **);
9241 int wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9242 unsigned char **, unsigned char **, int);
9243 int wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9244 unsigned char **, unsigned char **);
9245 int wrap_bol(FILTER_S *, int, int, unsigned char **,
9246 unsigned char **, unsigned char **, unsigned char **);
9247 int wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9248 unsigned char **, unsigned char **);
9251 * the no longer simple filter, breaks lines at end of white space nearest
9252 * to global "gf_wrap_width" in length
9253 * It also supports margins, indents (inverse indenting, really) and
9254 * flowed text (ala RFC 3676)
9257 void
9258 gf_wrap(FILTER_S *f, int flg)
9260 register long i;
9261 GF_INIT(f, f->next);
9264 * f->f1 state
9265 * f->line buffer where next "word" being considered is stored
9266 * f->f2 width in screen cells of f->line stuff
9267 * f->n width in screen cells of the part of this line committed to next
9268 * filter so far
9271 if(flg == GF_DATA){
9272 register unsigned char c;
9273 register int state = f->f1;
9274 int width, full_character;
9276 while(GF_GETC(f, c)){
9278 switch(state){
9279 case CCR : /* CRLF or CR in text ? */
9280 state = BOL; /* either way, handle start */
9282 if(WRAP_FLOW(f)){
9283 /* wrapped line? */
9284 if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9286 * whack trailing space char, but be aware
9287 * of embeds in space buffer. grok them just
9288 * in case they contain a 0x20 value
9290 if(WRAP_DELSP(f)){
9291 char *sb, *sbp, *scp = NULL;
9292 int x;
9294 for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9295 switch(*sbp){
9296 case ' ' :
9297 scp = sbp;
9298 break;
9300 case TAG_EMBED :
9301 sbp++;
9302 switch (*sbp++){
9303 case TAG_HANDLE :
9304 x = (int) *sbp++;
9305 if(strlen(sbp) >= x)
9306 sbp += (x - 1);
9308 break;
9310 case TAG_FGCOLOR :
9311 case TAG_BGCOLOR :
9312 if(strlen(sbp) >= RGBLEN)
9313 sbp += (RGBLEN - 1);
9315 break;
9317 default :
9318 break;
9321 break;
9323 default :
9324 break;
9328 /* replace space buf without trailing space char */
9329 if(scp){
9330 STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9332 *scp++ = '\0';
9333 WRAP_SPC_LEN(f)--;
9334 WRAP_TRL_SPC(f) = 0;
9336 so_puts(ns, sb);
9337 so_puts(ns, scp);
9339 so_give(&WRAP_SPACES(f));
9340 WRAP_SPACES(f) = ns;
9344 else{ /* fixed line */
9345 WRAP_HARD(f) = 1;
9346 wrap_flush(f, &ip, &eib, &op, &eob);
9347 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9350 * When we get to a real end of line, we don't need to
9351 * remember what the special color was anymore because
9352 * we aren't going to be changing back to it. We unset it
9353 * so that we don't keep resetting the color to normal.
9355 WRAP_COLOR_UNSET(f);
9358 if(c == '\012'){ /* get c following LF */
9359 break;
9361 /* else c is first char of new line, fall thru */
9363 else{
9364 wrap_flush(f, &ip, &eib, &op, &eob);
9365 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9366 WRAP_COLOR_UNSET(f); /* see note above */
9367 if(c == '\012'){
9368 break;
9370 /* else fall thru to deal with beginning of line */
9373 case BOL :
9374 if(WRAP_FLOW(f)){
9375 if(c == '>'){
9376 WRAP_FL_QC(f) = 1; /* init it */
9377 state = FL_QLEV; /* go collect it */
9379 else {
9380 /* if EMBEDed, process it and return here */
9381 if(c == (unsigned char) TAG_EMBED){
9382 WRAP_EMBED_PUTC(f, TAG_EMBED);
9383 WRAP_STATE(f) = state;
9384 state = TAG;
9385 continue;
9388 /* quote level change implies new paragraph */
9389 if(WRAP_FL_QD(f)){
9390 WRAP_FL_QD(f) = 0;
9391 if(WRAP_HARD(f) == 0){
9392 WRAP_HARD(f) = 1;
9393 wrap_flush(f, &ip, &eib, &op, &eob);
9394 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9395 WRAP_COLOR_UNSET(f); /* see note above */
9399 if(WRAP_HARD(f)){
9400 wrap_bol(f, 0, 1, &ip, &eib, &op,
9401 &eob); /* write quoting prefix */
9402 WRAP_HARD(f) = 0;
9405 switch (c) {
9406 case '\015' : /* a blank line? */
9407 wrap_flush(f, &ip, &eib, &op, &eob);
9408 state = CCR; /* go collect it */
9409 break;
9411 case ' ' : /* space stuffed */
9412 state = FL_STF; /* just eat it */
9413 break;
9415 case '-' : /* possible sig-dash */
9416 WRAP_FL_SIG(f) = 1; /* init state */
9417 state = FL_SIG; /* go collect it */
9418 break;
9420 default :
9421 state = DFL; /* go back to normal */
9422 goto case_dfl; /* handle c like DFL case */
9426 else{
9427 state = DFL;
9428 if(WRAP_COMMA(f) && c == TAB){
9429 wrap_bol(f, 1, 0, &ip, &eib, &op,
9430 &eob); /* convert to normal indent */
9431 break;
9434 wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9435 goto case_dfl; /* handle c like DFL case */
9438 break;
9440 case FL_QLEV :
9441 if(c == '>'){ /* another level */
9442 WRAP_FL_QC(f)++;
9444 else {
9445 /* if EMBEDed, process it and return here */
9446 if(c == (unsigned char) TAG_EMBED){
9447 WRAP_EMBED_PUTC(f, TAG_EMBED);
9448 WRAP_STATE(f) = state;
9449 state = TAG;
9450 continue;
9453 /* quote level change signals new paragraph */
9454 if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9455 WRAP_FL_QD(f) = WRAP_FL_QC(f);
9456 if(WRAP_HARD(f) == 0){ /* add hard newline */
9457 WRAP_HARD(f) = 1; /* hard newline */
9458 wrap_flush(f, &ip, &eib, &op, &eob);
9459 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9460 WRAP_COLOR_UNSET(f); /* see note above */
9464 if(WRAP_HARD(f)){
9465 wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9466 WRAP_HARD(f) = 0;
9469 switch (c) {
9470 case '\015' : /* a blank line? */
9471 wrap_flush(f, &ip, &eib, &op, &eob);
9472 state = CCR; /* go collect it */
9473 break;
9475 case ' ' : /* space-stuffed! */
9476 state = FL_STF; /* just eat it */
9477 break;
9479 case '-' : /* sig dash? */
9480 WRAP_FL_SIG(f) = 1;
9481 state = FL_SIG;
9482 break;
9484 default : /* something else */
9485 state = DFL;
9486 goto case_dfl; /* handle c like DFL */
9490 break;
9492 case FL_STF : /* space stuffed */
9493 switch (c) {
9494 case '\015' : /* a blank line? */
9495 wrap_flush(f, &ip, &eib, &op, &eob);
9496 state = CCR; /* go collect it */
9497 break;
9499 case (unsigned char) TAG_EMBED : /* process TAG data */
9500 WRAP_EMBED_PUTC(f, TAG_EMBED);
9501 WRAP_STATE(f) = state; /* and return */
9502 state = TAG;
9503 continue;
9505 case '-' : /* sig dash? */
9506 WRAP_FL_SIG(f) = 1;
9507 WRAP_ALLWSP(f) = 0;
9508 state = FL_SIG;
9509 break;
9511 default : /* something else */
9512 state = DFL;
9513 goto case_dfl; /* handle c like DFL */
9516 break;
9518 case FL_SIG : /* sig-dash collector */
9519 switch (WRAP_FL_SIG(f)){ /* possible sig-dash? */
9520 case 1 :
9521 if(c != '-'){ /* not a sigdash */
9522 if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9523 wrap_flush_embed(f, &ip, &eib, &op,
9524 &eob); /* note any embedded*/
9525 wrap_eol(f, 1, &ip, &eib,
9526 &op, &eob); /* plunk down newline */
9527 wrap_bol(f, 1, 1, &ip, &eib,
9528 &op, &eob); /* write any prefix */
9531 WRAP_PUTC(f,'-', 1); /* write what we got */
9533 WRAP_FL_SIG(f) = 0;
9534 state = DFL;
9535 goto case_dfl;
9538 /* don't put anything yet until we know to wrap or not */
9539 WRAP_FL_SIG(f) = 2;
9540 break;
9542 case 2 :
9543 if(c != ' '){ /* not a sigdash */
9544 WRAP_PUTC(f, '-', 1);
9545 if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9546 wrap_flush_embed(f, &ip, &eib, &op,
9547 &eob); /* note any embedded*/
9548 wrap_eol(f, 1, &ip, &eib,
9549 &op, &eob); /* plunk down newline */
9550 wrap_bol(f, 1, 1, &ip, &eib, &op,
9551 &eob); /* write any prefix */
9554 WRAP_PUTC(f,'-', 1); /* write what we got */
9556 WRAP_FL_SIG(f) = 0;
9557 state = DFL;
9558 goto case_dfl;
9561 /* don't put anything yet until we know to wrap or not */
9562 WRAP_FL_SIG(f) = 3;
9563 break;
9565 case 3 :
9566 if(c == '\015'){ /* success! */
9567 /* known sigdash, newline if soft nl */
9568 if(WRAP_SPC_LEN(f)){
9569 wrap_flush(f, &ip, &eib, &op, &eob);
9570 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9571 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9573 WRAP_PUTC(f,'-',1);
9574 WRAP_PUTC(f,'-',1);
9575 WRAP_PUTC(f,' ',1);
9577 state = CCR;
9578 break;
9580 else{
9581 WRAP_FL_SIG(f) = 4; /* possible success */
9584 case 4 :
9585 switch(c){
9586 case (unsigned char) TAG_EMBED :
9588 * At this point we're almost 100% sure that we've got
9589 * a sigdash. Putc it (adding newline if previous
9590 * was a soft nl) so we get it the right color
9591 * before we store this new embedded stuff
9593 if(WRAP_SPC_LEN(f)){
9594 wrap_flush(f, &ip, &eib, &op, &eob);
9595 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9596 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9598 WRAP_PUTC(f,'-',1);
9599 WRAP_PUTC(f,'-',1);
9600 WRAP_PUTC(f,' ',1);
9602 WRAP_FL_SIG(f) = 5;
9603 break;
9605 case '\015' : /* success! */
9607 * We shouldn't get here, but in case we do, we have
9608 * not yet put the sigdash
9610 if(WRAP_SPC_LEN(f)){
9611 wrap_flush(f, &ip, &eib, &op, &eob);
9612 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9613 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9615 WRAP_PUTC(f,'-',1);
9616 WRAP_PUTC(f,'-',1);
9617 WRAP_PUTC(f,' ',1);
9619 state = CCR;
9620 break;
9622 default : /* that's no sigdash! */
9623 /* write what we got but didn't put yet */
9624 WRAP_PUTC(f,'-', 1);
9625 WRAP_PUTC(f,'-', 1);
9626 WRAP_PUTC(f,' ', 1);
9628 WRAP_FL_SIG(f) = 0;
9629 wrap_flush(f, &ip, &eib, &op, &eob);
9630 WRAP_SPC_LEN(f) = 1;
9631 state = DFL; /* set normal state */
9632 goto case_dfl; /* and go do "c" */
9635 break;
9637 case 5 :
9638 WRAP_STATE(f) = FL_SIG; /* come back here */
9639 WRAP_FL_SIG(f) = 6; /* and seek EOL */
9640 WRAP_EMBED_PUTC(f, TAG_EMBED);
9641 state = TAG; /* process embed */
9642 goto case_tag;
9644 case 6 :
9646 * at this point we've already putc the sigdash in case 4
9648 switch(c){
9649 case (unsigned char) TAG_EMBED :
9650 WRAP_FL_SIG(f) = 5;
9651 break;
9653 case '\015' : /* success! */
9654 state = CCR;
9655 break;
9657 default : /* that's no sigdash! */
9659 * probably never reached (fake sigdash with embedded
9660 * stuff) but if this did get reached, then we
9661 * might have accidentally disobeyed a soft nl
9663 WRAP_FL_SIG(f) = 0;
9664 wrap_flush(f, &ip, &eib, &op, &eob);
9665 WRAP_SPC_LEN(f) = 1;
9666 state = DFL; /* set normal state */
9667 goto case_dfl; /* and go do "c" */
9670 break;
9673 default :
9674 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9675 WRAP_FL_SIG(f)));
9676 WRAP_FL_SIG(f) = 0;
9677 state = DFL; /* set normal state */
9678 goto case_dfl; /* and go process "c" */
9681 break;
9683 case_dfl :
9684 case DFL :
9686 * This was just if(WRAP_SPEC(f, c)) before the change to add
9687 * the == 0 test. This isn't quite right, either. We should really
9688 * be looking for special characters in the UCS characters, not
9689 * in the incoming stream of UTF-8. It is not right to
9690 * call this on bytes that are in the middle of a UTF-8 character,
9691 * hence the == 0 test which restricts it to the first byte
9692 * of a character. This isn't right, either, but it's closer.
9693 * Also change the definition of WRAP_SPEC so that isspace only
9694 * matches ascii characters, which will never be in the middle
9695 * of a UTF-8 multi-byte character.
9697 if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9698 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9699 switch(c){
9700 default :
9701 if(WRAP_QUOTED(f))
9702 break;
9704 if(f->f2){ /* any non-lwsp to flush? */
9705 if(WRAP_COMMA(f)){
9706 /* remember our second best break point */
9707 WRAP_PB_OFF(f) = f->linep - f->line;
9708 WRAP_PB_LEN(f) = f->f2;
9709 break;
9711 else
9712 wrap_flush(f, &ip, &eib, &op, &eob);
9715 switch(c){ /* remember separator */
9716 case ' ' :
9717 WRAP_SPC_LEN(f)++;
9718 WRAP_TRL_SPC(f) = 1;
9719 so_writec(' ',WRAP_SPACES(f));
9720 break;
9722 case TAB :
9724 int i = (int) f->n + WRAP_SPC_LEN(f);
9727 WRAP_SPC_LEN(f)++;
9728 while(++i & 0x07);
9730 so_writec(TAB,WRAP_SPACES(f));
9731 WRAP_TRL_SPC(f) = 0;
9734 break;
9736 default : /* some control char? */
9737 WRAP_SPC_LEN(f) += 2;
9738 WRAP_TRL_SPC(f) = 0;
9739 break;
9742 continue;
9744 case '\"' :
9745 WRAP_QUOTED(f) = !WRAP_QUOTED(f);
9746 break;
9748 case '\015' : /* already has newline? */
9749 state = CCR;
9750 continue;
9752 case '\012' : /* bare LF in text? */
9753 wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
9754 wrap_eol(f, 0, &ip, &eib, &op, &eob); /* meant */
9755 wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
9756 continue;
9758 case (unsigned char) TAG_EMBED :
9759 WRAP_EMBED_PUTC(f, TAG_EMBED);
9760 WRAP_STATE(f) = state;
9761 state = TAG;
9762 continue;
9764 case ',' :
9765 if(!WRAP_QUOTED(f)){
9766 /* handle this special case in general code below */
9767 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
9768 && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
9769 break;
9771 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
9772 if(WRAP_ALLWSP(f)) /* if anything visible */
9773 wrap_flush(f, &ip, &eib, &op,
9774 &eob); /* ... blat buf'd chars */
9776 wrap_eol(f, 1, &ip, &eib, &op,
9777 &eob); /* plunk down newline */
9778 wrap_bol(f, 1, 1, &ip, &eib, &op,
9779 &eob); /* write any prefix */
9782 WRAP_PUTC(f, ',', 1); /* put out comma */
9783 wrap_flush(f, &ip, &eib, &op,
9784 &eob); /* write buf'd chars */
9785 continue;
9788 break;
9791 else if(WRAP_HANDLE_SOFT_HYPHEN(f)
9792 && (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
9793 && WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
9795 * This is a soft hyphen. If there is enough space for
9796 * a real hyphen to fit on the line here then we can
9797 * flush everything up to before the soft hyphen,
9798 * and simply remember that we saw a soft hyphen.
9799 * If it turns out that we can't fit the next piece in
9800 * then wrap_eol will append a real hyphen to the line.
9801 * If we can fit another piece in it will be because we've
9802 * reached the next break point. At that point we'll flush
9803 * everything but won't include the unneeded hyphen. We erase
9804 * the fact that we saw this soft hyphen because it have
9805 * become irrelevant.
9807 * If the hyphen is the character that puts us over the edge
9808 * we go through the else case.
9811 /* erase this soft hyphen character from buffer */
9812 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
9814 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9815 if(f->f2) /* any non-lwsp to flush? */
9816 wrap_flush(f, &ip, &eib, &op, &eob);
9818 /* remember that we saw the soft hyphen */
9819 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9821 else{
9823 * Everything up to the hyphen fits, otherwise it
9824 * would have already been flushed the last time
9825 * through the loop. But the hyphen won't fit. So
9826 * we need to go back to the last line break and
9827 * break there instead. Then start a new line with
9828 * the buffered up characters and the soft hyphen.
9830 wrap_flush_embed(f, &ip, &eib, &op, &eob);
9831 wrap_eol(f, 1, &ip, &eib, &op,
9832 &eob); /* plunk down newline */
9833 wrap_bol(f,1,1, &ip, &eib, &op,
9834 &eob); /* write any prefix */
9837 * Now we're in the same situation as we would have
9838 * been above except we're on a new line. Try to
9839 * flush out the characters seen up to the hyphen.
9841 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9842 if(f->f2) /* any non-lwsp to flush? */
9843 wrap_flush(f, &ip, &eib, &op, &eob);
9845 /* remember that we saw the soft hyphen */
9846 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9848 else
9849 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9852 continue;
9855 full_character = 0;
9858 unsigned char *inputp;
9859 unsigned long remaining_octets;
9860 UCS ucs;
9862 if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){ /* always true */
9864 *WRAP_UTF8BUFP(f)++ = c;
9865 remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9866 if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
9867 full_character++;
9868 if(c == TAB){
9869 int i = (int) f->n;
9871 while(i & 0x07)
9872 i++;
9874 width = i - f->n;
9876 else if(c < 0x80 && iscntrl((unsigned char) c))
9877 width = 2;
9878 else
9879 width = 1;
9881 else{
9882 inputp = &WRAP_UTF8BUF(f, 0);
9883 ucs = (UCS) utf8_get(&inputp, &remaining_octets);
9884 switch(ucs){
9885 case U8G_ENDSTRG: /* incomplete character, wait */
9886 case U8G_ENDSTRI: /* incomplete character, wait */
9887 width = 0;
9888 break;
9890 default:
9891 if(ucs & U8G_ERROR || ucs == UBOGON){
9893 * None of these cases is supposed to happen. If it
9894 * does happen then the input stream isn't UTF-8
9895 * so something is wrong. Writechar will treat
9896 * each octet in the input buffer as a separate
9897 * error character and print a '?' for each,
9898 * so the width will be the number of octets.
9900 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9901 full_character++;
9903 else{
9904 /* got a character */
9905 width = wcellwidth(ucs);
9906 full_character++;
9908 if(width < 0){
9910 * This happens when we have a UTF-8 character that
9911 * we aren't able to print in our locale. For example,
9912 * if the locale is setup with the terminal
9913 * expecting ISO-8859-1 characters then there are
9914 * lots of UTF-8 characters that can't be printed.
9915 * Print a '?' instead.
9917 width = 1;
9921 break;
9925 else{
9927 * This cannot happen because an error would have
9928 * happened at least by character #6. So if we get
9929 * here there is a bug in utf8_get().
9931 if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
9932 *WRAP_UTF8BUFP(f)++ = c;
9936 * We could possibly do some more sophisticated
9937 * resynchronization here, but we aren't doing
9938 * anything in Writechar so it wouldn't match up
9939 * with that anyway. Just figure each character will
9940 * end up being printed as a ? character.
9942 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9943 full_character++;
9947 if(WRAP_ALLWSP(f)){
9949 * Nothing is visible yet but the first word may be too long
9950 * all by itself. We need to break early.
9952 if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
9954 * A little reaching behind the curtain here.
9955 * if there's at least a preferable break point, use
9956 * it and stuff what's left back into the wrap buffer.
9957 * The "nwsp" latch is used to skip leading whitespace
9958 * The second half of the test prevents us from wrapping
9959 * at the preferred break point in the case that it
9960 * is so early in the line that it doesn't help.
9961 * That is, the width of the indent is even more than
9962 * the width of the first part before the preferred
9963 * break point. An example would be breaking after
9964 * "To:" when the indent is 4 which is > 3.
9966 if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
9967 char *p1 = f->line + WRAP_PB_OFF(f);
9968 char *p2 = f->linep;
9969 char c2;
9970 int nwsp = 0, left_after_wrap;
9972 left_after_wrap = f->f2 - WRAP_PB_LEN(f);
9974 f->f2 = WRAP_PB_LEN(f);
9975 f->linep = p1;
9977 wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
9979 /* put back rest of characters */
9980 while(p1 < p2){
9981 c2 = *p1++;
9982 if(!(c2 == ' ' || c2 == '\t') || nwsp){
9983 WRAP_PUTC(f, c2, 0);
9984 nwsp = 1;
9986 else
9987 left_after_wrap--; /* wrong if a tab! */
9990 f->f2 = MAX(left_after_wrap, 0);
9992 wrap_eol(f, 1, &ip, &eib, &op,
9993 &eob); /* plunk down newline */
9994 wrap_bol(f,1,1, &ip, &eib, &op,
9995 &eob); /* write any prefix */
9998 * What's this for?
9999 * If we do the less preferable break point at
10000 * the space we don't want to lose the fact that
10001 * we might be able to break at this comma for
10002 * the next one.
10004 if(full_character && c == ','){
10005 WRAP_PUTC(f, c, 1);
10006 wrap_flush(f, &ip, &eib, &op, &eob);
10007 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10010 else{
10011 wrap_flush(f, &ip, &eib, &op, &eob);
10013 wrap_eol(f, 1, &ip, &eib, &op,
10014 &eob); /* plunk down newline */
10015 wrap_bol(f,1,1, &ip, &eib, &op,
10016 &eob); /* write any prefix */
10020 else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10021 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10022 wrap_eol(f, 1, &ip, &eib, &op,
10023 &eob); /* plunk down newline */
10024 wrap_bol(f,1,1, &ip, &eib, &op,
10025 &eob); /* write any prefix */
10029 * Commit entire multibyte UTF-8 character at once
10030 * instead of writing partial characters into the
10031 * buffer.
10033 if(full_character){
10034 unsigned char *q;
10036 for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10037 WRAP_PUTC(f, *q, width);
10038 width = 0;
10041 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10044 break;
10046 case_tag :
10047 case TAG :
10048 WRAP_EMBED_PUTC(f, c);
10049 switch(c){
10050 case TAG_HANDLE :
10051 WRAP_EMBED(f) = -1;
10052 state = HANDLE;
10053 break;
10055 case TAG_FGCOLOR :
10056 case TAG_BGCOLOR :
10057 WRAP_EMBED(f) = RGBLEN;
10058 state = HDATA;
10059 break;
10061 default :
10062 state = WRAP_STATE(f);
10063 break;
10066 break;
10068 case HANDLE :
10069 WRAP_EMBED_PUTC(f, c);
10070 WRAP_EMBED(f) = c;
10071 state = HDATA;
10072 break;
10074 case HDATA :
10075 if(f->f2){
10076 WRAP_PUTC(f, c, 0);
10078 else
10079 so_writec(c, WRAP_SPACES(f));
10081 if(!(WRAP_EMBED(f) -= 1)){
10082 state = WRAP_STATE(f);
10085 break;
10089 f->f1 = state;
10090 GF_END(f, f->next);
10092 else if(flg == GF_EOD){
10093 wrap_flush(f, &ip, &eib, &op, &eob);
10094 if(WRAP_COLOR(f))
10095 free_color_pair(&WRAP_COLOR(f));
10097 fs_give((void **) &f->line); /* free temp line buffer */
10098 so_give(&WRAP_SPACES(f));
10099 fs_give((void **) &f->opt); /* free wrap widths struct */
10100 (void) GF_FLUSH(f->next);
10101 (*f->next->f)(f->next, GF_EOD);
10103 else if(flg == GF_RESET){
10104 dprint((9, "-- gf_reset wrap\n"));
10105 f->f1 = BOL;
10106 f->n = 0L; /* displayed length of line so far */
10107 f->f2 = 0; /* displayed length of buffered chars */
10108 WRAP_HARD(f) = 1; /* starting at beginning of line */
10109 if(! (WRAP_S *) f->opt)
10110 f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10112 while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10113 WRAP_INDENT(f) /= 2;
10115 f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10116 f->linep = f->line;
10117 WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10119 for(i = 0; i < 256; i++)
10120 ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10121 || i == '\015'
10122 || i == '\012'
10123 || (i == (unsigned char) TAG_EMBED
10124 && WRAP_TAGS(f))
10125 || (i == ',' && WRAP_COMMA(f)
10126 && !WRAP_QUOTED(f))
10127 || ASCII_ISSPACE(i));
10128 WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10129 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10134 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10135 unsigned char **opp, unsigned char **eobp)
10137 register char *s;
10138 register int n;
10140 s = (char *)so_text(WRAP_SPACES(f));
10141 n = so_tell(WRAP_SPACES(f));
10142 so_seek(WRAP_SPACES(f), 0L, 0);
10143 wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10144 so_truncate(WRAP_SPACES(f), 0L);
10145 WRAP_SPC_LEN(f) = 0;
10146 WRAP_TRL_SPC(f) = 0;
10147 s = f->line;
10148 n = f->linep - f->line;
10149 wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10150 f->f2 = 0;
10151 f->linep = f->line;
10152 WRAP_PB_OFF(f) = 0;
10153 WRAP_PB_LEN(f) = 0;
10155 return 0;
10159 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10161 register char *s;
10162 register int n;
10163 s = (char *)so_text(WRAP_SPACES(f));
10164 n = so_tell(WRAP_SPACES(f));
10165 so_seek(WRAP_SPACES(f), 0L, 0);
10166 wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10167 so_truncate(WRAP_SPACES(f), 0L);
10168 WRAP_SPC_LEN(f) = 0;
10169 WRAP_TRL_SPC(f) = 0;
10171 return 0;
10175 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10176 unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10178 f->n += w;
10180 for(; n > 0; n--,s++){
10181 if(*s == TAG_EMBED){
10182 if(n-- > 0){
10183 switch(*++s){
10184 case TAG_BOLDON :
10185 GF_PUTC_GLO(f->next,TAG_EMBED);
10186 GF_PUTC_GLO(f->next,TAG_BOLDON);
10187 WRAP_BOLD(f) = 1;
10188 break;
10189 case TAG_BOLDOFF :
10190 GF_PUTC_GLO(f->next,TAG_EMBED);
10191 GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10192 WRAP_BOLD(f) = 0;
10193 break;
10194 case TAG_ULINEON :
10195 GF_PUTC_GLO(f->next,TAG_EMBED);
10196 GF_PUTC_GLO(f->next,TAG_ULINEON);
10197 WRAP_ULINE(f) = 1;
10198 break;
10199 case TAG_ULINEOFF :
10200 GF_PUTC_GLO(f->next,TAG_EMBED);
10201 GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10202 WRAP_ULINE(f) = 0;
10203 break;
10204 case TAG_INVOFF :
10205 GF_PUTC_GLO(f->next,TAG_EMBED);
10206 GF_PUTC_GLO(f->next,TAG_INVOFF);
10207 WRAP_ANCHOR(f) = 0;
10208 break;
10209 case TAG_HANDLE :
10210 if((flags & WFE_CNT_HANDLE) == 0)
10211 GF_PUTC_GLO(f->next,TAG_EMBED);
10213 if(n-- > 0){
10214 int i = *++s;
10216 if((flags & WFE_CNT_HANDLE) == 0)
10217 GF_PUTC_GLO(f->next, TAG_HANDLE);
10219 if(i <= n){
10220 n -= i;
10222 if((flags & WFE_CNT_HANDLE) == 0)
10223 GF_PUTC_GLO(f->next, i);
10225 WRAP_ANCHOR(f) = 0;
10226 while(i-- > 0){
10227 WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10229 if((flags & WFE_CNT_HANDLE) == 0)
10230 GF_PUTC_GLO(f->next,*s);
10235 break;
10236 case TAG_FGCOLOR :
10237 if(pico_usingcolor() && n >= RGBLEN){
10238 int i;
10239 GF_PUTC_GLO(f->next,TAG_EMBED);
10240 GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10241 if(!WRAP_COLOR(f))
10242 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10243 strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10244 WRAP_COLOR(f)->fg[RGBLEN]='\0';
10245 i = RGBLEN;
10246 n -= i;
10247 while(i-- > 0)
10248 GF_PUTC_GLO(f->next,
10249 (*++s) & 0xff);
10251 break;
10252 case TAG_BGCOLOR :
10253 if(pico_usingcolor() && n >= RGBLEN){
10254 int i;
10255 GF_PUTC_GLO(f->next,TAG_EMBED);
10256 GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10257 if(!WRAP_COLOR(f))
10258 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10259 strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10260 WRAP_COLOR(f)->bg[RGBLEN]='\0';
10261 i = RGBLEN;
10262 n -= i;
10263 while(i-- > 0)
10264 GF_PUTC_GLO(f->next,
10265 (*++s) & 0xff);
10267 break;
10268 default :
10269 break;
10273 else if(w){
10275 if(f->n <= WRAP_MAX_COL(f)){
10276 GF_PUTC_GLO(f->next, (*s) & 0xff);
10278 else{
10279 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10282 WRAP_ALLWSP(f) = 0;
10286 return 0;
10290 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10291 unsigned char **opp, unsigned char **eobp)
10293 if(WRAP_SAW_SOFT_HYPHEN(f)){
10294 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10295 GF_PUTC_GLO(f->next, '-'); /* real hyphen */
10298 if(c && WRAP_LV_FLD(f))
10299 GF_PUTC_GLO(f->next, ' ');
10301 if(WRAP_BOLD(f)){
10302 GF_PUTC_GLO(f->next, TAG_EMBED);
10303 GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10306 if(WRAP_ULINE(f)){
10307 GF_PUTC_GLO(f->next, TAG_EMBED);
10308 GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10311 if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10312 GF_PUTC_GLO(f->next, TAG_EMBED);
10313 GF_PUTC_GLO(f->next, TAG_INVOFF);
10316 if(WRAP_COLOR_SET(f)){
10317 char *p;
10318 char cb[RGBLEN+1];
10319 GF_PUTC_GLO(f->next, TAG_EMBED);
10320 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10321 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10322 cb[sizeof(cb)-1] = '\0';
10323 p = cb;
10324 for(; *p; p++)
10325 GF_PUTC_GLO(f->next, *p);
10326 GF_PUTC_GLO(f->next, TAG_EMBED);
10327 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10328 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10329 cb[sizeof(cb)-1] = '\0';
10330 p = cb;
10331 for(; *p; p++)
10332 GF_PUTC_GLO(f->next, *p);
10335 GF_PUTC_GLO(f->next, '\015');
10336 GF_PUTC_GLO(f->next, '\012');
10337 f->n = 0L;
10338 so_truncate(WRAP_SPACES(f), 0L);
10339 WRAP_SPC_LEN(f) = 0;
10340 WRAP_TRL_SPC(f) = 0;
10342 return 0;
10346 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10347 unsigned char **opp, unsigned char **eobp)
10349 int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10351 if(WRAP_HDR_CLR(f)){
10352 char *p;
10353 char cbuf[RGBLEN+1];
10354 int k;
10356 if((k = WRAP_MARG_L(f)) > 0)
10357 while(k-- > 0){
10358 n--;
10359 f->n++;
10360 GF_PUTC_GLO(f->next, ' ');
10363 GF_PUTC_GLO(f->next, TAG_EMBED);
10364 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10365 strncpy(cbuf,
10366 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10367 sizeof(cbuf));
10368 cbuf[sizeof(cbuf)-1] = '\0';
10369 p = cbuf;
10370 for(; *p; p++)
10371 GF_PUTC_GLO(f->next, *p);
10372 GF_PUTC_GLO(f->next, TAG_EMBED);
10373 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10374 strncpy(cbuf,
10375 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10376 sizeof(cbuf));
10377 cbuf[sizeof(cbuf)-1] = '\0';
10378 p = cbuf;
10379 for(; *p; p++)
10380 GF_PUTC_GLO(f->next, *p);
10383 while(n-- > 0){
10384 f->n++;
10385 GF_PUTC_GLO(f->next, ' ');
10388 WRAP_ALLWSP(f) = 1;
10390 if(q)
10391 wrap_quote_insert(f, ipp, eibp, opp, eobp);
10393 if(WRAP_BOLD(f)){
10394 GF_PUTC_GLO(f->next, TAG_EMBED);
10395 GF_PUTC_GLO(f->next, TAG_BOLDON);
10397 if(WRAP_ULINE(f)){
10398 GF_PUTC_GLO(f->next, TAG_EMBED);
10399 GF_PUTC_GLO(f->next, TAG_ULINEON);
10401 if(WRAP_INVERSE(f)){
10402 GF_PUTC_GLO(f->next, TAG_EMBED);
10403 GF_PUTC_GLO(f->next, TAG_INVON);
10405 if(WRAP_COLOR_SET(f)){
10406 char *p;
10407 if(WRAP_COLOR(f)->fg[0]){
10408 char cb[RGBLEN+1];
10409 GF_PUTC_GLO(f->next, TAG_EMBED);
10410 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10411 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10412 cb[sizeof(cb)-1] = '\0';
10413 p = cb;
10414 for(; *p; p++)
10415 GF_PUTC_GLO(f->next, *p);
10417 if(WRAP_COLOR(f)->bg[0]){
10418 char cb[RGBLEN+1];
10419 GF_PUTC_GLO(f->next, TAG_EMBED);
10420 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10421 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10422 cb[sizeof(cb)-1] = '\0';
10423 p = cb;
10424 for(; *p; p++)
10425 GF_PUTC_GLO(f->next, *p);
10428 if(WRAP_ANCHOR(f)){
10429 char buf[64]; int i;
10430 GF_PUTC_GLO(f->next, TAG_EMBED);
10431 GF_PUTC_GLO(f->next, TAG_HANDLE);
10432 snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10433 GF_PUTC_GLO(f->next, (int) strlen(buf));
10434 for(i = 0; buf[i]; i++)
10435 GF_PUTC_GLO(f->next, buf[i]);
10438 return 0;
10442 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10443 unsigned char **opp, unsigned char **eobp)
10445 int j, i;
10446 COLOR_PAIR *col = NULL;
10447 char *prefix = NULL, *last_prefix = NULL;
10449 if(ps_global->VAR_QUOTE_REPLACE_STRING){
10450 get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10451 if(!prefix && last_prefix){
10452 prefix = last_prefix;
10453 last_prefix = NULL;
10457 for(j = 0; j < WRAP_FL_QD(f); j++){
10458 if(WRAP_USE_CLR(f)){
10459 if((j % 3) == 0
10460 && ps_global->VAR_QUOTE1_FORE_COLOR
10461 && ps_global->VAR_QUOTE1_BACK_COLOR
10462 && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10463 ps_global->VAR_QUOTE1_BACK_COLOR))
10464 && pico_is_good_colorpair(col)){
10465 GF_COLOR_PUTC(f, col);
10467 else if((j % 3) == 1
10468 && ps_global->VAR_QUOTE2_FORE_COLOR
10469 && ps_global->VAR_QUOTE2_BACK_COLOR
10470 && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10471 ps_global->VAR_QUOTE2_BACK_COLOR))
10472 && pico_is_good_colorpair(col)){
10473 GF_COLOR_PUTC(f, col);
10475 else if((j % 3) == 2
10476 && ps_global->VAR_QUOTE3_FORE_COLOR
10477 && ps_global->VAR_QUOTE3_BACK_COLOR
10478 && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10479 ps_global->VAR_QUOTE3_BACK_COLOR))
10480 && pico_is_good_colorpair(col)){
10481 GF_COLOR_PUTC(f, col);
10483 if(col){
10484 free_color_pair(&col);
10485 col = NULL;
10489 if(!WRAP_LV_FLD(f)){
10490 if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10491 for(i = 0; prefix[i]; i++)
10492 GF_PUTC_GLO(f->next, prefix[i]);
10493 f->n += utf8_width(prefix);
10495 else if(ps_global->VAR_REPLY_STRING
10496 && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10497 || !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10498 GF_PUTC_GLO(f->next, '>');
10499 f->n += 1;
10501 else{
10502 GF_PUTC_GLO(f->next, '>');
10503 GF_PUTC_GLO(f->next, ' ');
10504 f->n += 2;
10507 else{
10508 GF_PUTC_GLO(f->next, '>');
10509 f->n += 1;
10512 if(j && WRAP_LV_FLD(f)){
10513 GF_PUTC_GLO(f->next, ' ');
10514 f->n++;
10516 else if(j && last_prefix){
10517 for(i = 0; last_prefix[i]; i++)
10518 GF_PUTC_GLO(f->next, last_prefix[i]);
10519 f->n += utf8_width(last_prefix);
10522 if(prefix)
10523 fs_give((void **)&prefix);
10524 if(last_prefix)
10525 fs_give((void **)&last_prefix);
10527 return 0;
10532 * function called from the outside to set
10533 * wrap filter's width option
10535 void *
10536 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10538 WRAP_S *wrap;
10540 /* NOTE: variables MUST be sanity checked before they get here */
10541 wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10542 memset(wrap, 0, sizeof(WRAP_S));
10543 wrap->wrap_col = width;
10544 wrap->wrap_max = width_max;
10545 wrap->indent = indent;
10546 wrap->margin_l = (margin) ? margin[0] : 0;
10547 wrap->margin_r = (margin) ? margin[1] : 0;
10548 wrap->tags = (GFW_HANDLES & flags) == GFW_HANDLES;
10549 wrap->on_comma = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10550 wrap->flowed = (GFW_FLOWED & flags) == GFW_FLOWED;
10551 wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10552 wrap->delsp = (GFW_DELSP & flags) == GFW_DELSP;
10553 wrap->use_color = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10554 wrap->hdr_color = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10555 wrap->for_compose = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10556 wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10558 return((void *) wrap);
10562 void *
10563 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10565 if(uh){
10566 memset(uh, 0, sizeof(URL_HILITE_S));
10567 uh->handlesp = handlesp;
10568 uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10571 return((void *) uh);
10575 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10576 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10577 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10579 typedef struct preflow_s {
10580 int quote_depth,
10581 quote_count,
10582 sig;
10583 } PREFLOW_S;
10586 * This would normally be handled in gf_wrap. If there is a possibility
10587 * that a url we want to recognize is cut in half by a soft newline we
10588 * want to fix that up by putting the halves back together. We do that
10589 * by deleting the soft newline and putting it all in one line. It will
10590 * still get wrapped later in gf_wrap. It isn't pretty with all the
10591 * goto's, but whatta ya gonna do?
10593 void
10594 gf_preflow(FILTER_S *f, int flg)
10596 GF_INIT(f, f->next);
10598 if(flg == GF_DATA){
10599 register unsigned char c;
10600 register int state = f->f1;
10601 register int pending = f->f2;
10603 while(GF_GETC(f, c)){
10604 switch(state){
10605 case DFL:
10606 default_case:
10607 switch(c){
10608 case ' ':
10609 state = WSPACE;
10610 break;
10612 case '\015':
10613 state = CCR;
10614 break;
10616 default:
10617 GF_PUTC(f->next, c);
10618 break;
10621 break;
10623 case CCR:
10624 switch(c){
10625 case '\012':
10626 pending = 1;
10627 state = BOL;
10628 break;
10630 default:
10631 GF_PUTC(f->next, '\012');
10632 state = DFL;
10633 goto default_case;
10634 break;
10637 break;
10639 case WSPACE:
10640 switch(c){
10641 case '\015':
10642 state = SPACECR;
10643 break;
10645 default:
10646 GF_PUTC(f->next, ' ');
10647 state = DFL;
10648 goto default_case;
10649 break;
10652 break;
10654 case SPACECR:
10655 switch(c){
10656 case '\012':
10657 pending = 2;
10658 state = BOL;
10659 break;
10661 default:
10662 GF_PUTC(f->next, ' ');
10663 GF_PUTC(f->next, '\012');
10664 state = DFL;
10665 goto default_case;
10666 break;
10669 break;
10671 case BOL:
10672 PF_QC(f) = 0;
10673 if(c == '>'){ /* count quote level */
10674 PF_QC(f)++;
10675 state = FL_QLEV;
10677 else{
10678 done_counting_quotes:
10679 if(c == ' '){ /* eat stuffed space */
10680 state = FL_STF;
10681 break;
10684 done_with_stuffed_space:
10685 if(c == '-'){ /* look for signature */
10686 PF_SIG(f) = 1;
10687 state = FL_SIG;
10688 break;
10691 done_with_sig:
10692 if(pending == 2){
10693 if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10694 /* delete pending */
10696 PF_QD(f) = PF_QC(f);
10698 /* suppress quotes, too */
10699 PF_QC(f) = 0;
10701 else{
10703 * This should have been a hard new line
10704 * instead so leave out the trailing space.
10706 GF_PUTC(f->next, '\015');
10707 GF_PUTC(f->next, '\012');
10709 PF_QD(f) = PF_QC(f);
10712 else if(pending == 1){
10713 GF_PUTC(f->next, '\015');
10714 GF_PUTC(f->next, '\012');
10715 PF_QD(f) = PF_QC(f);
10717 else{
10718 PF_QD(f) = PF_QC(f);
10721 pending = 0;
10722 state = DFL;
10723 while(PF_QC(f)-- > 0)
10724 GF_PUTC(f->next, '>');
10726 switch(PF_SIG(f)){
10727 case 0:
10728 default:
10729 break;
10731 case 1:
10732 GF_PUTC(f->next, '-');
10733 break;
10735 case 2:
10736 GF_PUTC(f->next, '-');
10737 GF_PUTC(f->next, '-');
10738 break;
10740 case 3:
10741 case 4:
10742 GF_PUTC(f->next, '-');
10743 GF_PUTC(f->next, '-');
10744 GF_PUTC(f->next, ' ');
10745 break;
10748 PF_SIG(f) = 0;
10749 goto default_case; /* to handle c */
10752 break;
10754 case FL_QLEV: /* count quote level */
10755 if(c == '>')
10756 PF_QC(f)++;
10757 else
10758 goto done_counting_quotes;
10760 break;
10762 case FL_STF: /* eat stuffed space */
10763 goto done_with_stuffed_space;
10764 break;
10766 case FL_SIG: /* deal with sig indicator */
10767 switch(PF_SIG(f)){
10768 case 1: /* saw '-' */
10769 if(c == '-')
10770 PF_SIG(f) = 2;
10771 else
10772 goto done_with_sig;
10774 break;
10776 case 2: /* saw '--' */
10777 if(c == ' ')
10778 PF_SIG(f) = 3;
10779 else
10780 goto done_with_sig;
10782 break;
10784 case 3: /* saw '-- ' */
10785 if(c == '\015')
10786 PF_SIG(f) = 4; /* it really is a sig line */
10788 goto done_with_sig;
10789 break;
10792 break;
10796 f->f1 = state;
10797 f->f2 = pending;
10798 GF_END(f, f->next);
10800 else if(flg == GF_EOD){
10801 fs_give((void **) &f->opt);
10802 (void) GF_FLUSH(f->next);
10803 (*f->next->f)(f->next, GF_EOD);
10805 else if(flg == GF_RESET){
10806 PREFLOW_S *pf;
10808 pf = (PREFLOW_S *) fs_get(sizeof(*pf));
10809 memset(pf, 0, sizeof(*pf));
10810 f->opt = (void *) pf;
10812 f->f1 = BOL; /* state */
10813 f->f2 = 0; /* pending */
10814 PF_QD(f) = 0; /* quote depth */
10815 PF_QC(f) = 0; /* quote count */
10816 PF_SIG(f) = 0; /* sig level */
10824 * LINE PREFIX FILTER - insert given text at beginning of each
10825 * line
10829 #define GF_PREFIX_WRITE(s) { \
10830 register char *p; \
10831 if((p = (s)) != NULL) \
10832 while(*p) \
10833 GF_PUTC(f->next, *p++); \
10838 * the simple filter, prepends each line with the requested prefix.
10839 * if prefix is null, does nothing, and as with all filters, assumes
10840 * NVT end of lines.
10842 void
10843 gf_prefix(FILTER_S *f, int flg)
10845 GF_INIT(f, f->next);
10847 if(flg == GF_DATA){
10848 register unsigned char c;
10849 register int state = f->f1;
10850 register int first = f->f2;
10852 while(GF_GETC(f, c)){
10854 if(first){ /* write initial prefix!! */
10855 first = 0; /* but just once */
10856 GF_PREFIX_WRITE((char *) f->opt);
10860 * State == 0 is the starting state and the usual state.
10861 * State == 1 means we saw a CR and haven't acted on it yet.
10862 * We are looking for a LF to get the CRLF end of line.
10863 * However, we also treat bare CR and bare LF as if they
10864 * were CRLF sequences. What else could it mean in text?
10865 * This filter is only used for text so that is probably
10866 * a reasonable interpretation of the bad input.
10868 if(c == '\015'){ /* CR */
10869 if(state){ /* Treat pending CR as endofline, */
10870 GF_PUTC(f->next, '\015'); /* and remain in saw-a-CR state. */
10871 GF_PUTC(f->next, '\012');
10872 GF_PREFIX_WRITE((char *) f->opt);
10874 else{
10875 state = 1;
10878 else if(c == '\012'){ /* LF */
10879 GF_PUTC(f->next, '\015'); /* Got either a CRLF or a bare LF, */
10880 GF_PUTC(f->next, '\012'); /* treat both as if a CRLF. */
10881 GF_PREFIX_WRITE((char *) f->opt);
10882 state = 0;
10884 else{ /* any other character */
10885 if(state){
10886 GF_PUTC(f->next, '\015'); /* Treat pending CR as endofline. */
10887 GF_PUTC(f->next, '\012');
10888 GF_PREFIX_WRITE((char *) f->opt);
10889 state = 0;
10892 GF_PUTC(f->next, c);
10896 f->f1 = state; /* save state for next chunk of data */
10897 f->f2 = first;
10898 GF_END(f, f->next);
10900 else if(flg == GF_EOD){
10901 (void) GF_FLUSH(f->next);
10902 (*f->next->f)(f->next, GF_EOD);
10904 else if(flg == GF_RESET){
10905 dprint((9, "-- gf_reset prefix\n"));
10906 f->f1 = 0;
10907 f->f2 = 1; /* nothing written yet */
10913 * function called from the outside to set
10914 * prefix filter's prefix string
10916 void *
10917 gf_prefix_opt(char *prefix)
10919 return((void *) prefix);
10924 * LINE TEST FILTER - accumulate lines and offer each to the provided
10925 * test function.
10928 typedef struct _linetest_s {
10929 linetest_t f;
10930 void *local;
10931 } LINETEST_S;
10934 /* accumulator growth increment */
10935 #define LINE_TEST_BLOCK 1024
10937 #define GF_LINE_TEST_EOB(f) \
10938 ((f)->line + ((f)->f2 - 1))
10940 #define GF_LINE_TEST_ADD(f, c) \
10942 if(p >= eobuf){ \
10943 f->f2 += LINE_TEST_BLOCK; \
10944 fs_resize((void **)&f->line, \
10945 (size_t) f->f2 * sizeof(char)); \
10946 eobuf = GF_LINE_TEST_EOB(f); \
10947 p = eobuf - LINE_TEST_BLOCK; \
10949 *p++ = c; \
10952 #define GF_LINE_TEST_TEST(F, D) \
10954 unsigned char c; \
10955 register char *cp; \
10956 register int l; \
10957 LT_INS_S *ins = NULL, *insp; \
10958 *p = '\0'; \
10959 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
10960 (F)->line, &ins, \
10961 ((LINETEST_S *) (F)->opt)->local); \
10962 if((D) < 2){ \
10963 if((D) < 0){ \
10964 if((F)->line) \
10965 fs_give((void **) &(F)->line); \
10966 if((F)->opt) \
10967 fs_give((void **) &(F)->opt); \
10968 gf_error(_("translation error")); \
10969 /* NO RETURN */ \
10971 for(insp = ins, cp = (F)->line; cp < p; ){ \
10972 if(insp && cp == insp->where){ \
10973 if(insp->len > 0){ \
10974 for(l = 0; l < insp->len; l++){ \
10975 c = (unsigned char) insp->text[l]; \
10976 GF_PUTC((F)->next, c); \
10978 insp = insp->next; \
10979 continue; \
10980 } else if(insp->len < 0){ \
10981 cp -= insp->len; \
10982 insp = insp->next; \
10983 continue; \
10986 GF_PUTC((F)->next, *cp); \
10987 cp++; \
10989 while(insp){ \
10990 for(l = 0; l < insp->len; l++){ \
10991 c = (unsigned char) insp->text[l]; \
10992 GF_PUTC((F)->next, c); \
10994 insp = insp->next; \
10996 gf_line_test_free_ins(&ins); \
11003 * this simple filter accumulates characters until a newline, offers it
11004 * to the provided test function, and then passes it on. It assumes
11005 * NVT EOLs.
11007 void
11008 gf_line_test(FILTER_S *f, int flg)
11010 register char *p = f->linep;
11011 register char *eobuf = GF_LINE_TEST_EOB(f);
11012 GF_INIT(f, f->next);
11014 if(flg == GF_DATA){
11015 register unsigned char c;
11016 register int state = f->f1;
11018 while(GF_GETC(f, c)){
11020 if(state){
11021 state = 0;
11022 if(c == '\012'){
11023 int done;
11025 GF_LINE_TEST_TEST(f, done);
11027 p = (f)->line;
11029 if(done == 2) /* skip this line! */
11030 continue;
11032 GF_PUTC(f->next, '\015');
11033 GF_PUTC(f->next, '\012');
11035 * if the line tester returns TRUE, it's
11036 * telling us its seen enough and doesn't
11037 * want to see any more. Remove ourself
11038 * from the pipeline...
11040 if(done){
11041 if(gf_master == f){
11042 gf_master = f->next;
11044 else{
11045 FILTER_S *fprev;
11047 for(fprev = gf_master;
11048 fprev && fprev->next != f;
11049 fprev = fprev->next)
11052 if(fprev) /* wha??? */
11053 fprev->next = f->next;
11054 else
11055 continue;
11058 while(GF_GETC(f, c)) /* pass input */
11059 GF_PUTC(f->next, c);
11061 (void) GF_FLUSH(f->next); /* and drain queue */
11062 fs_give((void **)&f->line);
11063 fs_give((void **)&f); /* wax our data */
11064 return;
11066 else
11067 continue;
11069 else /* add CR to buffer */
11070 GF_LINE_TEST_ADD(f, '\015');
11071 } /* fall thru to handle 'c' */
11073 if(c == '\015') /* newline? */
11074 state = 1;
11075 else
11076 GF_LINE_TEST_ADD(f, c);
11079 f->f1 = state;
11080 GF_END(f, f->next);
11082 else if(flg == GF_EOD){
11083 int i;
11085 GF_LINE_TEST_TEST(f, i); /* examine remaining data */
11086 fs_give((void **) &f->line); /* free line buffer */
11087 fs_give((void **) &f->opt); /* free test struct */
11088 (void) GF_FLUSH(f->next);
11089 (*f->next->f)(f->next, GF_EOD);
11091 else if(flg == GF_RESET){
11092 dprint((9, "-- gf_reset line_test\n"));
11093 f->f1 = 0; /* state */
11094 f->n = 0L; /* line number */
11095 f->f2 = LINE_TEST_BLOCK; /* size of alloc'd line */
11096 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11099 f->linep = p;
11104 * function called from the outside to operate on accumulated line.
11106 void *
11107 gf_line_test_opt(linetest_t test_f, void *local)
11109 LINETEST_S *ltp;
11111 ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11112 memset(ltp, 0, sizeof(LINETEST_S));
11113 ltp->f = test_f;
11114 ltp->local = local;
11115 return((void *) ltp);
11120 LT_INS_S **
11121 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11123 *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11124 if(((*ins)->len = n) > 0)
11125 strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11126 else
11127 (*ins)->text = NULL;
11129 (*ins)->where = p;
11130 (*ins)->next = NULL;
11131 return(&(*ins)->next);
11135 void
11136 gf_line_test_free_ins(LT_INS_S **ins)
11138 if(ins && *ins){
11139 if((*ins)->next)
11140 gf_line_test_free_ins(&(*ins)->next);
11142 if((*ins)->text)
11143 fs_give((void **) &(*ins)->text);
11145 fs_give((void **) ins);
11151 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11152 * with editorial comment
11155 typedef struct _preped_s {
11156 prepedtest_t f;
11157 char *text;
11158 } PREPED_S;
11162 * gf_prepend_editorial - accumulate filtered text and prepend its
11163 * output with given text
11167 void
11168 gf_prepend_editorial(FILTER_S *f, int flg)
11170 GF_INIT(f, f->next);
11172 if(flg == GF_DATA){
11173 register unsigned char c;
11175 while(GF_GETC(f, c)){
11176 so_writec(c, (STORE_S *) f->data);
11179 GF_END(f, f->next);
11181 else if(flg == GF_EOD){
11182 unsigned char c;
11184 if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11185 char *p = ((PREPED_S *)(f)->opt)->text;
11187 for( ; p && *p; p++)
11188 GF_PUTC(f->next, *p);
11191 so_seek((STORE_S *) f->data, 0L, 0);
11192 while(so_readc(&c, (STORE_S *) f->data)){
11193 GF_PUTC(f->next, c);
11196 so_give((STORE_S **) &f->data);
11197 fs_give((void **) &f->opt);
11198 (void) GF_FLUSH(f->next);
11199 (*f->next->f)(f->next, GF_EOD);
11201 else if(flg == GF_RESET){
11202 dprint((9, "-- gf_reset line_test\n"));
11203 f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11209 * function called from the outside to setup prepending editorial
11210 * to output text
11212 void *
11213 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11215 PREPED_S *pep;
11217 pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11218 memset(pep, 0, sizeof(PREPED_S));
11219 pep->f = test_f;
11220 pep->text = text;
11221 return((void *) pep);
11226 * Network virtual terminal to local newline convention filter
11228 void
11229 gf_nvtnl_local(FILTER_S *f, int flg)
11231 GF_INIT(f, f->next);
11233 if(flg == GF_DATA){
11234 register unsigned char c;
11235 register int state = f->f1;
11237 while(GF_GETC(f, c)){
11238 if(state){
11239 state = 0;
11240 if(c == '\012'){
11241 GF_PUTC(f->next, '\012');
11242 continue;
11244 else
11245 GF_PUTC(f->next, '\015');
11246 /* fall thru to deal with 'c' */
11249 if(c == '\015')
11250 state = 1;
11251 else
11252 GF_PUTC(f->next, c);
11255 f->f1 = state;
11256 GF_END(f, f->next);
11258 else if(flg == GF_EOD){
11259 (void) GF_FLUSH(f->next);
11260 (*f->next->f)(f->next, GF_EOD);
11262 else if(flg == GF_RESET){
11263 dprint((9, "-- gf_reset nvtnl_local\n"));
11264 f->f1 = 0;
11270 * local to network newline convention filter
11272 void
11273 gf_local_nvtnl(FILTER_S *f, int flg)
11275 GF_INIT(f, f->next);
11277 if(flg == GF_DATA){
11278 register unsigned char c;
11280 while(GF_GETC(f, c)){
11281 if(c == '\012'){
11282 GF_PUTC(f->next, '\015');
11283 GF_PUTC(f->next, '\012');
11285 else if(c != '\015') /* do not copy isolated \015 into source */
11286 GF_PUTC(f->next, c);
11289 GF_END(f, f->next);
11291 else if(flg == GF_EOD){
11292 (void) GF_FLUSH(f->next);
11293 (*f->next->f)(f->next, GF_EOD);
11295 else if(GF_RESET){
11296 dprint((9, "-- gf_reset local_nvtnl\n"));
11297 /* no op */