1 #if !defined(lint) && !defined(DOS)
2 static char rcsid
[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
6 * ========================================================================
7 * Copyright 2006-2008 University of Washington
8 * Copyright 2013-2016 Eduardo Chappa
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
39 reasonable error handling
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
60 #include "../pico/osdep/mswin.h"
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
73 int gf_fwritec_locale(int);
75 int gf_freadc_windows(unsigned char *);
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S
*, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S
*, int);
97 * System specific options
100 #define CRLF_NEWLINES
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name
)(char *);
108 char *(*pith_opt_pretty_feature_name
)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S
*gf_master
= NULL
;
115 static gf_io_t last_filter
;
116 static char *gf_error_string
;
117 static long gf_byte_count
;
118 static jmp_buf gf_error_state
;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
149 #define STOP_DECODING 18
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
234 static struct gf_io_struct
{
243 #define GF_SO_STACK struct gf_so_stack
247 } *gf_so_in
, *gf_so_out
;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc
)
261 return(pc
== gf_so_writec
&& gf_so_out
&& gf_so_out
->so
&&
262 gf_so_out
->so
->src
== ExternalText
);
268 * setup to use and return a pointer to the generic
272 gf_set_readc(gf_io_t
*gc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
276 gf_in
.cb
.cbuf
[0] = '\0';
277 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
278 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
281 gf_in
.file
= (FILE *)txt
;
282 fseek(gf_in
.file
, 0L, 0);
284 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_windows
287 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_locale
291 else if(src
== PipeStar
){
292 gf_in
.pipe
= (PIPE_S
*)txt
;
294 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_preadc_locale
298 gf_in
.txtp
= (char *)txt
;
299 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_sreadc_locale
306 * setup to use and return a pointer to the generic
310 gf_set_writec(gf_io_t
*pc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
313 gf_out
.flags
= flags
;
314 gf_out
.cb
.cbuf
[0] = '\0';
315 gf_out
.cb
.cbufp
= gf_out
.cb
.cbuf
;
316 gf_out
.cb
.cbufend
= gf_out
.cb
.cbuf
;
319 gf_out
.file
= (FILE *)txt
;
323 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_fwritec_locale
327 else if(src
== PipeStar
){
328 gf_out
.pipe
= (PIPE_S
*)txt
;
329 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_pwritec_locale
333 gf_out
.txtp
= (char *)txt
;
334 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_swritec_locale
341 * setup to use and return a pointer to the generic
345 gf_set_so_readc(gf_io_t
*gc
, STORE_S
*so
)
347 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
357 gf_clear_so_readc(STORE_S
*so
)
361 if((sp
= gf_so_in
) != NULL
){
363 gf_so_in
= gf_so_in
->next
;
364 fs_give((void **) &sp
);
367 alpine_panic("Programmer botch: Can't unstack store readc");
370 alpine_panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
379 gf_set_so_writec(gf_io_t
*pc
, STORE_S
*so
)
381 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
384 sp
->next
= gf_so_out
;
391 gf_clear_so_writec(STORE_S
*so
)
395 if((sp
= gf_so_out
) != NULL
){
397 gf_so_out
= gf_so_out
->next
;
398 fs_give((void **) &sp
);
401 alpine_panic("Programmer botch: Can't unstack store writec");
404 alpine_panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
414 return(so_writec(c
, gf_so_out
->so
));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c
)
424 return(so_readc(c
, gf_so_in
->so
));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c
)
437 clearerr(gf_in
.file
);
438 rv
= fread(c
, sizeof(unsigned char), (size_t)1, gf_in
.file
);
439 } while(!rv
&& ferror(gf_in
.file
) && errno
== EINTR
);
446 gf_freadc_locale(unsigned char *c
)
448 return(generic_readc_locale(c
, gf_freadc_getchar
, (void *) gf_in
.file
, &gf_in
.cb
));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c
, void *extraarg
)
461 file
= (FILE *) extraarg
;
466 rv
= fread(c
, sizeof(unsigned char), (size_t)1, file
);
467 } while(!rv
&& ferror(file
) && errno
== EINTR
);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
481 unsigned char ch
= (unsigned char)c
;
485 rv
= fwrite(&ch
, sizeof(unsigned char), (size_t)1, gf_out
.file
);
486 while(!rv
&& ferror(gf_out
.file
) && errno
== EINTR
);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c
)
501 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
503 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
504 for(i
= 0; i
< outchars
; i
++)
505 if(gf_fwritec(obuf
[i
]) != 1){
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c
)
530 /* already got some from previous call? */
531 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
532 *c
= *gf_in
.cb
.cbufp
;
535 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
536 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
537 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
544 /* windows only so second arg is ignored */
545 ucs
= read_a_wide_char(gf_in
.file
, NULL
);
546 rv
= (ucs
== CCONV_EOF
) ? 0 : 1;
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in
.cb
.cbufend
= utf8_put(gf_in
.cb
.cbuf
, (unsigned long) ucs
);
555 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
556 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
557 *c
= *gf_in
.cb
.cbufp
;
559 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
560 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
561 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c
)
576 return(pipe_readc(c
, gf_in
.pipe
));
581 gf_preadc_locale(unsigned char *c
)
583 return(generic_readc_locale(c
, gf_preadc_getchar
, (void *) gf_in
.pipe
, &gf_in
.cb
));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c
, void *extraarg
)
595 pipe
= (PIPE_S
*) extraarg
;
597 return(pipe_readc(c
, pipe
));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
609 return(pipe_writec(c
, gf_out
.pipe
));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c
)
622 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
624 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
625 for(i
= 0; i
< outchars
; i
++)
626 if(gf_pwritec(obuf
[i
]) != 1){
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c
)
641 return((gf_in
.n
) ? *c
= *(gf_in
.txtp
)++, gf_in
.n
-- : 0);
646 gf_sreadc_locale(unsigned char *c
)
648 return(generic_readc_locale(c
, gf_sreadc_getchar
, NULL
, &gf_in
.cb
));
653 gf_sreadc_getchar(unsigned char *c
, void *extraarg
)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c
));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
671 return((gf_out
.n
) ? *(gf_out
.txtp
)++ = c
, gf_out
.n
-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c
)
684 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
686 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
687 for(i
= 0; i
< outchars
; i
++)
688 if(gf_swritec(obuf
[i
]) != 1){
699 * output the given string with the given function
702 gf_puts(register char *s
, gf_io_t pc
)
705 if(!(*pc
)((unsigned char)*s
++))
706 return(0); /* ERROR putting char ! */
713 * output the given string with the given function
716 gf_nputs(register char *s
, long int n
, gf_io_t pc
)
719 if(!(*pc
)((unsigned char)*s
++))
720 return(0); /* ERROR putting char ! */
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c
,
743 int (*get_a_char
)(unsigned char *, void *),
747 unsigned long octets_so_far
= 0, remaining_octets
;
748 unsigned char *inputp
;
751 unsigned char inputbuf
[20];
755 /* already got some from previous call? */
756 if(cb
->cbufend
> cb
->cbuf
){
760 if(cb
->cbufp
>= cb
->cbufend
){
761 cb
->cbufend
= cb
->cbuf
;
762 cb
->cbufp
= cb
->cbuf
;
768 memset(inputbuf
, 0, sizeof(inputbuf
));
769 if((*get_a_char
)(&ch
, extraarg
) == 0)
772 inputbuf
[octets_so_far
++] = ch
;
775 remaining_octets
= octets_so_far
;
777 ucs
= mbtow(ps_global
->input_cs
, &inputp
, &remaining_octets
);
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far
>= sizeof(inputbuf
))
792 if((*get_a_char
)(&ch
, extraarg
) == 0)
795 inputbuf
[octets_so_far
++] = ch
;
799 /* got a good UCS-4 character */
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
810 cb
->cbufend
= utf8_put(cb
->cbuf
, (unsigned long) ucs
);
811 cb
->cbufp
= cb
->cbuf
;
812 if(cb
->cbufend
> cb
->cbuf
){
815 if(cb
->cbufp
>= cb
->cbufend
){
816 cb
->cbufend
= cb
->cbuf
;
817 cb
->cbufp
= cb
->cbuf
;
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
837 FILTER_S
*flt
, *fltn
= gf_master
;
839 while((flt
= fltn
) != NULL
){ /* free list of old filters */
841 fs_give((void **)&flt
);
845 gf_error_string
= NULL
; /* clear previous errors */
846 gf_byte_count
= 0L; /* reset counter */
852 * link the given filter into the filter chain
855 gf_link_filter(filter_t f
, void *data
)
857 FILTER_S
*new, *tail
;
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f
== gf_nvtnl_local
|| f
== gf_local_nvtnl
)
868 new = (FILTER_S
*)fs_get(sizeof(FILTER_S
));
869 memset(new, 0, sizeof(FILTER_S
));
871 new->f
= f
; /* set the function pointer */
872 new->opt
= data
; /* set any optional parameter data */
873 (*f
)(new, GF_RESET
); /* have it setup initial state */
875 if((tail
= gf_master
) != NULL
){ /* or add it to end of existing */
876 while(tail
->next
) /* list */
881 else /* attach new struct to list */
882 gf_master
= new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
891 gf_terminal(FILTER_S
*f
, int flg
)
897 if((*last_filter
)(*op
++) <= 0) /* generic terminal filter */
898 gf_error(errno
? error_description(errno
) : "Error writing pipe");
902 else if(flg
== GF_RESET
)
903 errno
= 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
912 gf_set_terminal(gf_io_t f
) /* function to set generic filter */
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
926 /* let the user know the error passed in s */
928 longjmp(gf_error_state
, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
939 gf_pipe(gf_io_t gc
, gf_io_t pc
)
940 /* how to get a character */
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state
)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string
? gf_error_string
: "NULL"));
952 return(gf_error_string
); /* */
956 * set and link in the terminal filter
959 gf_link_filter(gf_terminal
, NULL
);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
971 GF_INIT(gf_master
, gf_master
);
977 if(!(gf_byte_count
& 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
985 GF_PUTC(gf_master
, c
& 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master
);
993 (*gf_master
->f
)(gf_master
, GF_EOD
);
996 dprint((4, "done.\n"));
997 return(NULL
); /* everything went OK */
1002 * return the number of bytes piped so far
1005 gf_bytes_piped(void)
1007 return(gf_byte_count
);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1023 gf_filter(char *cmd
, char *prepend
, STORE_S
*source_so
, gf_io_t pc
,
1024 FILTLIST_S
*aux_filters
, int silent
, int disable_reset
,
1025 void (*pipecb_f
)(PIPE_S
*, int, void *))
1027 unsigned char c
, obuf
[MAX(MB_LEN_MAX
,32)];
1028 int flags
, outchars
, i
;
1029 char *errstr
= NULL
, buf
[MAILTMPLEN
];
1032 #ifdef NON_BLOCKING_IO
1036 dprint((4, "so_filter: \"%s\"\n", cmd
? cmd
: "?"));
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global
->keyboard_charmap
&& strucmp("UTF-8", ps_global
->keyboard_charmap
))
1045 gf_link_filter(gf_utf8
, gf_utf8_opt(ps_global
->keyboard_charmap
));
1047 for( ; aux_filters
&& aux_filters
->filter
; aux_filters
++)
1048 gf_link_filter(aux_filters
->filter
, aux_filters
->data
);
1050 gf_set_terminal(pc
);
1051 gf_link_filter(gf_terminal
, NULL
);
1055 cb
.cbufend
= cb
.cbuf
;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so
, 0L, 0);
1061 flags
= PIPE_WRITE
| PIPE_READ
| PIPE_NOSHELL
1062 | (silent
? PIPE_SILENT
: 0)
1063 | (!disable_reset
? PIPE_RESET
: 0);
1065 if((fpipe
= open_system_pipe(cmd
, NULL
, NULL
, flags
, 0, pipecb_f
, pipe_report_error
)) != NULL
){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe
->in
.f
), F_SETFL
, NON_BLOCKING_IO
) == -1)
1070 errstr
= "Can't set up non-blocking IO";
1072 if(prepend
&& (fputs(prepend
, fpipe
->out
.f
) == EOF
1073 || fputc('\n', fpipe
->out
.f
) == EOF
))
1074 errstr
= error_description(errno
);
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n
= 0; !errstr
&& fpipe
->out
.f
&& n
< 1024; n
++)
1081 if(!so_readc(&c
, source_so
)){
1082 fclose(fpipe
->out
.f
);
1083 fpipe
->out
.f
= NULL
;
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1092 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1093 if(fputc(obuf
[i
], fpipe
->out
.f
) == EOF
)
1094 errstr
= error_description(errno
);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1103 clearerr(fpipe
->in
.f
); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr
&& fgets(buf
, sizeof(buf
), fpipe
->in
.f
))
1106 errstr
= gf_filter_puts(buf
);
1108 /* then fgets failed! */
1109 if(!errstr
&& !(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)){
1110 if(feof(fpipe
->in
.f
)) /* nothing else interesting! */
1112 else if(ferror(fpipe
->in
.f
)) /* bummer. */
1113 errstr
= error_description(errno
);
1115 else if(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)
1116 clearerr(fpipe
->in
.f
);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend
&& (pipe_puts(prepend
, fpipe
) == EOF
1122 || pipe_putc('\n', fpipe
) == EOF
))
1123 errstr
= error_description(errno
);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr
&& so_readc(&c
, source_so
))
1130 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1131 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1132 if(pipe_putc(obuf
[i
], fpipe
) == EOF
)
1133 errstr
= error_description(errno
);
1135 if(pipe_close_write(fpipe
))
1136 errstr
= _("Pipe command returned error.");
1138 while(!errstr
&& pipe_gets(buf
, sizeof(buf
), fpipe
))
1139 errstr
= gf_filter_puts(buf
);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe
, NULL
, pipecb_f
) && !errstr
)
1144 errstr
= _("Pipe command returned error.");
1149 errstr
= _("Error setting up pipe command.");
1156 * gf_filter_puts - write the given string down the filter's pipe
1159 gf_filter_puts(register char *s
)
1161 GF_INIT(gf_master
, gf_master
);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state
)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string
? gf_error_string
: "NULL"));
1169 return(gf_error_string
);
1173 GF_PUTC(gf_master
, (*s
++) & 0xff);
1175 GF_END(gf_master
, gf_master
);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1187 GF_INIT(gf_master
, gf_master
);
1188 (void) GF_FLUSH(gf_master
);
1189 (*gf_master
->f
)(gf_master
, GF_EOD
);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1218 * gf_xxx_filter(f, flg)
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1275 gf_binary_b64(FILTER_S
*f
, int flg
)
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f
, f
->next
);
1282 register unsigned char c
;
1283 register unsigned char t
= f
->t
;
1284 register long n
= f
->n
;
1286 while(GF_GETC(f
, c
)){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1292 GF_PUTC(f
->next
, v
[c
>> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t
= c
<< 4; /* remember high 2 bits for next */
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1300 GF_PUTC(f
->next
, v
[(t
|(c
>>4)) & 0x3f]);
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1307 GF_PUTC(f
->next
, v
[(t
|(c
>> 6)) & 0x3f]);
1308 GF_PUTC(f
->next
, v
[c
& 0x3f]);
1312 if(n
== 45){ /* start a new line? */
1313 GF_PUTC(f
->next
, '\015');
1314 GF_PUTC(f
->next
, '\012');
1323 else if(flg
== GF_EOD
){ /* no more data */
1324 switch (f
->n
% 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1329 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1330 GF_PUTC(f
->next
, '='); /* byte 3 */
1331 GF_PUTC(f
->next
, '='); /* byte 4 */
1335 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1336 GF_PUTC(f
->next
, '='); /* byte 4 */
1342 GF_PUTC(f
->next
, '\015');
1343 GF_PUTC(f
->next
, '\012');
1346 (void) GF_FLUSH(f
->next
);
1347 (*f
->next
->f
)(f
->next
, GF_EOD
);
1349 else if(flg
== GF_RESET
){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1361 gf_b64_binary(FILTER_S
*f
, int flg
)
1363 static char v
[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f
, f
->next
);
1374 register unsigned char c
;
1375 register unsigned char t
= f
->t
;
1376 register int n
= (int) f
->n
;
1377 register int state
= f
->f1
;
1379 while(GF_GETC(f
, c
)){
1384 gf_error("Illegal '=' in base64 text");
1389 /* in range, and a valid value? */
1390 if((c
& ~0x7f) || (c
= v
[c
]) > 63){
1392 switch (n
++) { /* check quantum position */
1394 state
++; /* expect an equal as next char */
1398 n
= 0L; /* restart quantum */
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1408 switch (n
++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f
->next
, (t
|(c
>> 4)));
1415 t
= c
<< 4; /* byte 2: high 4 bits */
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f
->next
, (t
|(c
>> 2)));
1420 t
= c
<< 6; /* byte 3: high 2 bits */
1424 GF_PUTC(f
->next
, t
| c
);
1425 n
= 0L; /* reinitialize mechanism */
1436 else if(flg
== GF_EOD
){
1437 (void) GF_FLUSH(f
->next
);
1438 (*f
->next
->f
)(f
->next
, GF_EOD
);
1440 else if(flg
== GF_RESET
){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f
->n
= 0L; /* quantum position */
1443 f
->f1
= 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1479 gf_qp_8bit(FILTER_S
*f
, int flg
)
1482 GF_INIT(f
, f
->next
);
1485 register unsigned char c
;
1486 register int state
= f
->f1
;
1488 while(GF_GETC(f
, c
)){
1491 case DFL
: /* default case */
1493 GF_QP_DEFAULT(f
, c
);
1496 case CCR
: /* non-significant space */
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f
, c
);
1505 if(c
== '\015'){ /* "=\015" is a soft EOL */
1510 if(c
== '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f
->next
, '=');
1516 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state
= STOP_DECODING
;
1540 GF_PUTC(f
->next
, '=');
1541 GF_PUTC(f
->next
, c
);
1542 q_status_message(SM_ORDER
,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c
, c
));
1549 if (isdigit ((unsigned char)c
))
1552 f
->t
= c
- (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1554 f
->f2
= c
; /* store character in case we have to
1555 back out in !isxdigit below */
1562 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1563 state
= STOP_DECODING
;
1564 GF_PUTC(f
->next
, '=');
1565 GF_PUTC(f
->next
, f
->f2
);
1566 GF_PUTC(f
->next
, c
);
1567 q_status_message(SM_ORDER
,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c
, c
, f
->f2
));
1574 if (isdigit((unsigned char)c
))
1577 c
-= (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f
->next
, c
+ (f
->t
<< 4));
1583 if(c
== ' '){ /* toss it in with other spaces */
1584 if(f
->linep
- f
->line
< GF_MAXLINE
)
1585 *(f
->linep
)++ = ' ';
1590 if(c
== '\015'){ /* not our white space! */
1591 f
->linep
= f
->line
; /* reset buffer */
1592 GF_PUTC(f
->next
, '\015');
1596 /* the spaces are ours, write 'em */
1597 f
->n
= f
->linep
- f
->line
;
1599 GF_PUTC(f
->next
, ' ');
1601 GF_QP_DEFAULT(f
, c
); /* take care of 'c' in default way */
1604 case STOP_DECODING
:
1605 GF_PUTC(f
->next
, c
);
1613 else if(flg
== GF_EOD
){
1614 fs_give((void **)&(f
->line
));
1615 (void) GF_FLUSH(f
->next
);
1616 (*f
->next
->f
)(f
->next
, GF_EOD
);
1618 else if(flg
== GF_RESET
){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1621 f
->linep
= f
->line
= (char *)fs_get(GF_MAXLINE
* sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1680 else if(c == '\015'){ \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1696 gf_8bit_qp(FILTER_S
*f
, int flg
)
1698 short dummy_dots
= 0, dummy_dmap
= 1;
1699 GF_INIT(f
, f
->next
);
1702 register unsigned char c
;
1703 register int state
= f
->f1
;
1705 while(GF_GETC(f
, c
)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f
->t
, dummy_dots
, f
->f2
, dummy_dmap
, c
);
1711 case DFL
: /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f
, c
);
1715 case CCR
: /* true line break? */
1718 GF_PUTC(f
->next
, '\015');
1719 GF_PUTC(f
->next
, '\012');
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f
, '\015');
1724 GF_8BIT_DEFAULT(f
, c
); /* and don't forget about c! */
1730 if(c
== '\015' || f
->t
){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f
, ' ');
1732 f
->t
= 0; /* reset From flag */
1735 GF_8BIT_PUT(f
, ' ');
1737 GF_8BIT_DEFAULT(f
, c
); /* handle 'c' in the default way */
1745 else if(flg
== GF_EOD
){
1748 GF_8BIT_PUT_QUOTE(f
, '\015'); /* write the last cr */
1752 GF_8BIT_PUT_QUOTE(f
, ' '); /* write the last space */
1756 (void) GF_FLUSH(f
->next
);
1757 (*f
->next
->f
)(f
->next
, GF_EOD
);
1759 else if(flg
== GF_RESET
){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f
->f1
= DFL
; /* state from last character */
1762 f
->f2
= 1; /* state of "^NFrom " bitmap */
1764 f
->n
= 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1773 gf_convert_8bit_charset(FILTER_S
*f
, int flg
)
1775 static unsigned char *conv_table
= NULL
;
1776 GF_INIT(f
, f
->next
);
1779 register unsigned char c
;
1781 while(GF_GETC(f
, c
)){
1782 GF_PUTC(f
->next
, conv_table
? conv_table
[c
] : c
);
1787 else if(flg
== GF_EOD
){
1788 (void) GF_FLUSH(f
->next
);
1789 (*f
->next
->f
)(f
->next
, GF_EOD
);
1791 else if(flg
== GF_RESET
){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table
= (f
->opt
) ? (unsigned char *) (f
->opt
) : NULL
;
1799 typedef struct _utf8c_s
{
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1811 gf_convert_utf8_charset(FILTER_S
*f
, int flg
)
1813 static unsigned short *conv_table
= NULL
;
1814 static int report_err
= 0;
1815 register int more
= f
->f2
;
1816 register long u
= f
->n
;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f
, f
->next
);
1827 register unsigned char c
;
1829 while(GF_GETC(f
, c
)){
1830 if(!conv_table
){ /* can't do much if no conversion table */
1831 GF_PUTC(f
->next
, c
);
1833 /* UTF-8 continuation? */
1834 else if((c
> 0x7f) && (c
< 0xc0)){
1836 u
<<= 6; /* shift current value by 6 bits */
1838 if (!--more
){ /* last octet? */
1839 if(u
>= 0xffff || (u
= conv_table
[u
]) == NOCHAR
){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1848 fs_give((void **) &f
->opt
);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1859 c
= (unsigned char) (u
>> 8);
1860 GF_PUTC(f
->next
, c
);
1863 c
= (unsigned char) u
& 0xff;
1866 GF_PUTC(f
->next
, c
);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f
->next
, '?');
1874 if(more
){ /* incomplete UTF-8 character */
1875 GF_PUTC(f
->next
, '?');
1878 if(c
< 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f
->next
, c
);
1881 else if(c
< 0xe0){ /* U+0080 - U+07ff */
1882 u
= c
& 0x1f; /* first 5 bits of 12 */
1885 else if(c
< 0xf0){ /* U+1000 - U+ffff */
1886 u
= c
& 0x0f; /* first 4 bits of 16 */
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c
< 0xf8){ /* U+10000 - U+10ffff */
1891 u
= c
& 0x07; /* first 3 bits of 20.5 */
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c
< 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u
= c
& 0x03; /* first 2 bits of 26 */
1899 else if (c
< 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u
= c
& 0x03; /* first 2 bits of 26 */
1904 else{ /* not in Unicode */
1905 GF_PUTC(f
->next
, '?');
1914 else if(flg
== GF_EOD
){
1915 (void) GF_FLUSH(f
->next
);
1917 fs_give((void **) &f
->opt
);
1919 (*f
->next
->f
)(f
->next
, GF_EOD
);
1921 else if(flg
== GF_RESET
){
1922 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1923 conv_table
= ((UTF8C_S
*) f
->opt
)->conv_table
;
1924 report_err
= ((UTF8C_S
*) f
->opt
)->report_err
;
1932 gf_convert_utf8_charset_opt(void *table
, int report_err
)
1936 utf8c
= (UTF8C_S
*) fs_get(sizeof(UTF8C_S
));
1937 utf8c
->conv_table
= table
;
1938 utf8c
->report_err
= report_err
;
1939 return((void *) utf8c
);
1944 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1946 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1947 * or to Shift-JIS (if PC-Pine).
1950 gf_2022_jp_to_euc(FILTER_S
*f
, int flg
)
1952 register unsigned char c
;
1953 register int state
= f
->f1
;
1956 * f->t lit means we're in middle of decoding a sequence of characters.
1957 * f->f2 keeps track of first character of pair for Shift-JIS.
1958 * f->f1 is the state.
1961 GF_INIT(f
, f
->next
);
1964 while(GF_GETC(f
, c
)){
1966 case ESC
: /* saw ESC */
1967 if(!f
->t
&& c
== '$')
1969 else if(f
->t
&& c
== '(')
1972 GF_PUTC(f
->next
, '\033');
1973 GF_PUTC(f
->next
, c
);
1979 case ESCDOL
: /* saw ESC $ */
1980 if(c
== 'B' || c
== '@'){
1982 f
->t
= 1; /* filtering into euc */
1983 f
->f2
= -1; /* first character of pair */
1986 GF_PUTC(f
->next
, '\033');
1987 GF_PUTC(f
->next
, '$');
1988 GF_PUTC(f
->next
, c
);
1994 case ESCPAR
: /* saw ESC ( */
1995 if(c
== 'B' || c
== 'J' || c
== 'H'){
1997 f
->t
= 0; /* done filtering */
2000 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
2001 GF_PUTC(f
->next
, '('); /* escape sequences, which */
2002 GF_PUTC(f
->next
, c
); /* this appears to be. */
2007 case EUC
: /* filtering into euc */
2011 #ifdef _WINDOWS /* Shift-JIS */
2012 c
&= 0x7f; /* 8-bit can't win */
2013 if (f
->f2
>= 0){ /* second of a pair? */
2014 int rowOffset
= (f
->f2
< 95) ? 112 : 176;
2015 int cellOffset
= (f
->f2
% 2) ? ((c
> 95) ? 32 : 31)
2018 GF_PUTC(f
->next
, ((f
->f2
+ 1) >> 1) + rowOffset
);
2019 GF_PUTC(f
->next
, c
+ cellOffset
);
2020 f
->f2
= -1; /* restart */
2022 else if(c
> 0x20 && c
< 0x7f)
2023 f
->f2
= c
; /* first of pair */
2025 GF_PUTC(f
->next
, c
); /* write CTL as itself */
2029 GF_PUTC(f
->next
, (c
> 0x20 && c
< 0x7f) ? c
| 0x80 : c
);
2040 GF_PUTC(f
->next
, c
);
2049 else if(flg
== GF_EOD
){
2052 GF_PUTC(f
->next
, '\033');
2056 GF_PUTC(f
->next
, '\033');
2057 GF_PUTC(f
->next
, '$');
2061 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
2062 GF_PUTC(f
->next
, '('); /* escape sequences. */
2066 (void) GF_FLUSH(f
->next
);
2067 (*f
->next
->f
)(f
->next
, GF_EOD
);
2069 else if(flg
== GF_RESET
){
2070 dprint((9, "-- gf_reset jp_to_euc\n"));
2071 f
->f1
= DFL
; /* state */
2072 f
->t
= 0; /* not translating to euc */
2078 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2081 gf_native8bitjapanese_to_2022_jp(FILTER_S
*f
, int flg
)
2084 gf_sjis_to_2022_jp(f
, flg
);
2086 gf_euc_to_2022_jp(f
, flg
);
2092 gf_euc_to_2022_jp(FILTER_S
*f
, int flg
)
2094 register unsigned char c
;
2097 * f->t lit means we've sent the start esc seq but not the end seq.
2098 * f->f2 keeps track of first character of pair for Shift-JIS.
2101 GF_INIT(f
, f
->next
);
2104 while(GF_GETC(f
, c
)){
2107 GF_PUTC(f
->next
, c
& 0x7f);
2110 GF_PUTC(f
->next
, '\033');
2111 GF_PUTC(f
->next
, '(');
2112 GF_PUTC(f
->next
, 'B');
2113 GF_PUTC(f
->next
, c
);
2120 GF_PUTC(f
->next
, '\033');
2121 GF_PUTC(f
->next
, '$');
2122 GF_PUTC(f
->next
, 'B');
2123 GF_PUTC(f
->next
, c
& 0x7f);
2127 GF_PUTC(f
->next
, c
);
2134 else if(flg
== GF_EOD
){
2136 GF_PUTC(f
->next
, '\033');
2137 GF_PUTC(f
->next
, '(');
2138 GF_PUTC(f
->next
, 'B');
2143 (void) GF_FLUSH(f
->next
);
2144 (*f
->next
->f
)(f
->next
, GF_EOD
);
2146 else if(flg
== GF_RESET
){
2147 dprint((9, "-- gf_reset euc_to_jp\n"));
2154 gf_sjis_to_2022_jp(FILTER_S
*f
, int flg
)
2156 register unsigned char c
;
2159 * f->t lit means we've sent the start esc seq but not the end seq.
2160 * f->f2 keeps track of first character of pair for Shift-JIS.
2163 GF_INIT(f
, f
->next
);
2166 while(GF_GETC(f
, c
)){
2168 if(f
->f2
>= 0){ /* second of a pair? */
2169 int adjust
= c
< 159;
2170 int rowOffset
= f
->f2
< 160 ? 112 : 176;
2171 int cellOffset
= adjust
? (c
> 127 ? 32 : 31) : 126;
2173 GF_PUTC(f
->next
, ((f
->f2
- rowOffset
) << 1) - adjust
);
2174 GF_PUTC(f
->next
, c
- cellOffset
);
2178 f
->f2
= c
; /* remember first of pair */
2181 GF_PUTC(f
->next
, '\033');
2182 GF_PUTC(f
->next
, '(');
2183 GF_PUTC(f
->next
, 'B');
2184 GF_PUTC(f
->next
, c
);
2191 GF_PUTC(f
->next
, '\033');
2192 GF_PUTC(f
->next
, '$');
2193 GF_PUTC(f
->next
, 'B');
2198 GF_PUTC(f
->next
, c
);
2205 else if(flg
== GF_EOD
){
2207 GF_PUTC(f
->next
, '\033');
2208 GF_PUTC(f
->next
, '(');
2209 GF_PUTC(f
->next
, 'B');
2214 (void) GF_FLUSH(f
->next
);
2215 (*f
->next
->f
)(f
->next
, GF_EOD
);
2217 else if(flg
== GF_RESET
){
2218 dprint((9, "-- gf_reset sjis_to_jp\n"));
2227 * Various charset to UTF-8 Translation filter
2231 * utf8 conversion options
2233 typedef struct _utf8_s
{
2238 #define UTF8_BLOCK 1024
2239 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2240 #define UTF8_ADD(f, c) \
2243 f->f2 += UTF8_BLOCK; \
2244 fs_resize((void **)&f->line, \
2245 (size_t) f->f2 * sizeof(char)); \
2246 eobuf = UTF8_EOB(f); \
2247 p = eobuf - UTF8_BLOCK; \
2251 #define GF_UTF8_FLUSH(f) { \
2253 SIZEDTEXT intext, outtext; \
2254 intext.data = (unsigned char *) f->line; \
2255 intext.size = p - f->line; \
2256 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2257 if(!((UTF8_S *) f->opt)->charset){ \
2258 for(n = 0; n < intext.size; n++) \
2259 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2261 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2262 for(n = 0; n < outtext.size; n++) \
2263 GF_PUTC(f->next, outtext.data[n]); \
2264 if(outtext.data && intext.data != outtext.data) \
2265 fs_give((void **) &outtext.data); \
2268 for(n = 0; n < intext.size; n++) \
2269 GF_PUTC(f->next, '?'); \
2275 * gf_utf8 - text in specified charset to to UTF-8 filter
2276 * Process line-at-a-time rather than character
2277 * because ISO-2022-JP. Call utf8_text_cs by hand
2278 * rather than utf8_text to reduce the cost of
2279 * utf8_charset() for each line.
2282 gf_utf8(FILTER_S
*f
, int flg
)
2284 register char *p
= f
->linep
;
2285 register char *eobuf
= UTF8_EOB(f
);
2286 GF_INIT(f
, f
->next
);
2289 register int state
= f
->f1
;
2290 register unsigned char c
;
2292 while(GF_GETC(f
, c
)){
2300 GF_PUTC(f
->next
, '\015');
2301 GF_PUTC(f
->next
, '\012');
2304 UTF8_ADD(f
, '\015');
2322 else if(flg
== GF_EOD
){
2327 fs_give((void **) &f
->line
);
2328 fs_give((void **) &f
->opt
);
2329 (void) GF_FLUSH(f
->next
);
2330 (*f
->next
->f
)(f
->next
, GF_EOD
);
2333 dprint((9, "-- gf_reset utf8\n"));
2335 f
->f2
= UTF8_BLOCK
; /* input buffer length */
2336 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
2344 gf_utf8_opt(char *charset
)
2348 utf8
= (UTF8_S
*) fs_get(sizeof(UTF8_S
));
2350 utf8
->charset
= (CHARSET
*) utf8_charset(charset
);
2353 * When we get 8-bit non-ascii characters but it is supposed to
2354 * be ascii we want it to turn into question marks, not
2355 * just behave as if it is UTF-8 which is what happens
2356 * with ascii because there is no translation table.
2357 * So we need to catch the ascii special case here.
2359 if(utf8
->charset
&& utf8
->charset
->type
== CT_ASCII
)
2360 utf8
->charset
= NULL
;
2362 return((void *) utf8
);
2367 * RICHTEXT-TO-PLAINTEXT filter
2371 * option to be used by rich2plain (NOTE: if this filter is ever
2372 * used more than once in a pipe, all instances will have the same
2377 /*----------------------------------------------------------------------
2378 richtext to plaintext filter
2383 This basically removes all richtext formatting. A cute hack is used
2384 to get bold and underlining to work.
2385 Further work could be done to handle things like centering and right
2386 and left flush, but then it could no longer be done in place. This
2387 operates on text *with* CRLF's.
2389 WARNING: does not wrap lines!
2392 gf_rich2plain(FILTER_S
*f
, int flg
)
2394 static int rich_bold_on
= 0, rich_uline_on
= 0;
2396 /* BUG: qoute incoming \255 values */
2397 GF_INIT(f
, f
->next
);
2400 register unsigned char c
;
2401 register int state
= f
->f1
;
2404 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2406 while(GF_GETC(f
, c
)){
2409 case TOKEN
: /* collect a richtext token */
2410 if(c
== '>'){ /* what should we do with it? */
2411 state
= DFL
; /* return to default next time */
2412 *(f
->linep
) = '\0'; /* cap off token */
2413 if(f
->line
[0] == 'l' && f
->line
[1] == 't'){
2414 GF_PUTC(f
->next
, '<'); /* literal '<' */
2416 else if(f
->line
[0] == 'n' && f
->line
[1] == 'l'){
2417 GF_PUTC(f
->next
, '\015');/* newline! */
2418 GF_PUTC(f
->next
, '\012');
2420 else if(!strcmp("comment", f
->line
)){
2423 else if(!strcmp("/comment", f
->line
)){
2426 else if(!strcmp("/paragraph", f
->line
)) {
2427 GF_PUTC(f
->next
, '\r');
2428 GF_PUTC(f
->next
, '\n');
2429 GF_PUTC(f
->next
, '\r');
2430 GF_PUTC(f
->next
, '\n');
2432 else if(!plain
/* gf_rich_plain */){
2433 if(!strcmp(f
->line
, "bold")) {
2434 GF_PUTC(f
->next
, TAG_EMBED
);
2435 GF_PUTC(f
->next
, TAG_BOLDON
);
2437 } else if(!strcmp(f
->line
, "/bold")) {
2438 GF_PUTC(f
->next
, TAG_EMBED
);
2439 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2441 } else if(!strcmp(f
->line
, "italic")) {
2442 GF_PUTC(f
->next
, TAG_EMBED
);
2443 GF_PUTC(f
->next
, TAG_ULINEON
);
2445 } else if(!strcmp(f
->line
, "/italic")) {
2446 GF_PUTC(f
->next
, TAG_EMBED
);
2447 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2449 } else if(!strcmp(f
->line
, "underline")) {
2450 GF_PUTC(f
->next
, TAG_EMBED
);
2451 GF_PUTC(f
->next
, TAG_ULINEON
);
2453 } else if(!strcmp(f
->line
, "/underline")) {
2454 GF_PUTC(f
->next
, TAG_EMBED
);
2455 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2459 /* else we just ignore the token! */
2461 f
->linep
= f
->line
; /* reset token buffer */
2463 else{ /* add char to token */
2464 if(f
->linep
- f
->line
> 40){
2465 /* What? rfc1341 says 40 char tokens MAX! */
2466 fs_give((void **)&(f
->line
));
2467 gf_error("Richtext token over 40 characters");
2471 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2476 state
= DFL
; /* back to default next time */
2477 if(c
== '\012'){ /* treat as single space? */
2478 GF_PUTC(f
->next
, ' ');
2481 /* fall thru to process c */
2487 else if(c
== '\015')
2489 else if(!f
->f2
) /* not in comment! */
2490 GF_PUTC(f
->next
, c
);
2499 else if(flg
== GF_EOD
){
2500 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2501 /* incomplete token!! */
2502 gf_error("Incomplete token in richtext");
2507 GF_PUTC(f
->next
, TAG_EMBED
);
2508 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2512 GF_PUTC(f
->next
, TAG_EMBED
);
2513 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2517 fs_give((void **)&(f
->line
));
2518 (void) GF_FLUSH(f
->next
);
2519 (*f
->next
->f
)(f
->next
, GF_EOD
);
2521 else if(flg
== GF_RESET
){
2522 dprint((9, "-- gf_reset rich2plain\n"));
2523 f
->f1
= DFL
; /* state */
2524 f
->f2
= 0; /* set means we're in a comment */
2525 f
->linep
= f
->line
= (char *)fs_get(45 * sizeof(char));
2531 * function called from the outside to set
2532 * richtext filter's options
2535 gf_rich2plain_opt(int *plain
)
2537 return((void *) plain
);
2543 * ENRICHED-TO-PLAIN text filter
2546 #define TEF_QUELL 0x01
2547 #define TEF_NOFILL 0x02
2551 /*----------------------------------------------------------------------
2552 enriched text to plain text filter (ala rfc1523)
2554 Args: f -- state and input data
2557 This basically removes all enriched formatting. A cute hack is used
2558 to get bold and underlining to work.
2560 Further work could be done to handle things like centering and right
2561 and left flush, but then it could no longer be done in place. This
2562 operates on text *with* CRLF's.
2564 WARNING: does not wrap lines!
2567 gf_enriched2plain(FILTER_S
*f
, int flg
)
2569 static int enr_uline_on
= 0, enr_bold_on
= 0;
2571 /* BUG: qoute incoming \255 values */
2572 GF_INIT(f
, f
->next
);
2575 register unsigned char c
;
2576 register int state
= f
->f1
;
2579 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2581 while(GF_GETC(f
, c
)){
2584 case TOKEN
: /* collect a richtext token */
2585 if(c
== '>'){ /* what should we do with it? */
2586 int off
= *f
->line
== '/';
2587 char *token
= f
->line
+ (off
? 1 : 0);
2590 if(!strcmp("param", token
)){
2592 f
->f2
&= ~TEF_QUELL
;
2596 else if(!strcmp("nofill", token
)){
2598 f
->f2
&= ~TEF_NOFILL
;
2600 f
->f2
|= TEF_NOFILL
;
2602 else if(!plain
/* gf_enriched_plain */){
2603 /* Following is a cute hack or two to get
2604 bold and underline on the screen.
2605 See Putline0n() where these codes are
2607 if(!strcmp("bold", token
)) {
2608 GF_PUTC(f
->next
, TAG_EMBED
);
2609 GF_PUTC(f
->next
, off
? TAG_BOLDOFF
: TAG_BOLDON
);
2610 enr_bold_on
= off
? 0 : 1;
2611 } else if(!strcmp("italic", token
)) {
2612 GF_PUTC(f
->next
, TAG_EMBED
);
2613 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2614 enr_uline_on
= off
? 0 : 1;
2615 } else if(!strcmp("underline", token
)) {
2616 GF_PUTC(f
->next
, TAG_EMBED
);
2617 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2618 enr_uline_on
= off
? 0 : 1;
2621 /* else we just ignore the token! */
2623 f
->linep
= f
->line
; /* reset token buffer */
2625 else if(c
== '<'){ /* literal '<'? */
2626 if(f
->linep
== f
->line
){
2627 GF_PUTC(f
->next
, '<');
2631 fs_give((void **)&(f
->line
));
2632 gf_error("Malformed Enriched text: unexpected '<'");
2636 else{ /* add char to token */
2637 if(f
->linep
- f
->line
> 60){ /* rfc1523 says 60 MAX! */
2638 fs_give((void **)&(f
->line
));
2639 gf_error("Malformed Enriched text: token too long");
2643 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2648 if(c
!= '\012'){ /* treat as single space? */
2649 state
= DFL
; /* lone cr? */
2650 f
->f2
&= ~TEF_QUELL
;
2651 GF_PUTC(f
->next
, '\015');
2659 if(c
== '\015'){ /* treat as single space? */
2660 state
= CCR
; /* repeat crlf's mean real newlines */
2662 GF_PUTC(f
->next
, '\r');
2663 GF_PUTC(f
->next
, '\n');
2668 if(!((f
->f2
) & TEF_QUELL
))
2669 GF_PUTC(f
->next
, ' ');
2671 f
->f2
&= ~TEF_QUELL
;
2674 /* fall thru to take care of 'c' */
2681 else if(c
== '\015' && (!((f
->f2
) & TEF_NOFILL
)))
2683 else if(!((f
->f2
) & TEF_QUELL
))
2684 GF_PUTC(f
->next
, c
);
2693 else if(flg
== GF_EOD
){
2694 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2695 /* incomplete token!! */
2696 gf_error("Incomplete token in richtext");
2700 GF_PUTC(f
->next
, TAG_EMBED
);
2701 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2705 GF_PUTC(f
->next
, TAG_EMBED
);
2706 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2710 /* Make sure we end with a newline so everything gets flushed */
2711 GF_PUTC(f
->next
, '\015');
2712 GF_PUTC(f
->next
, '\012');
2714 fs_give((void **)&(f
->line
));
2716 (void) GF_FLUSH(f
->next
);
2717 (*f
->next
->f
)(f
->next
, GF_EOD
);
2719 else if(flg
== GF_RESET
){
2720 dprint((9, "-- gf_reset enriched2plain\n"));
2721 f
->f1
= DFL
; /* state */
2722 f
->f2
= 0; /* set means we're in a comment */
2723 f
->linep
= f
->line
= (char *)fs_get(65 * sizeof(char));
2729 * function called from the outside to set
2730 * richtext filter's options
2733 gf_enriched2plain_opt(int *plain
)
2735 return((void *) plain
);
2741 * HTML-TO-PLAIN text filter
2745 /* OK, here's the plan:
2747 * a universal output function handles writing chars and worries
2750 * a unversal element collector reads chars and collects params
2751 * and dispatches the appropriate element handler.
2753 * element handlers are stacked. The most recently dispatched gets
2754 * first crack at the incoming character stream. It passes bytes it's
2755 * done with or not interested in to the next
2757 * installs that handler as the current one collecting data...
2759 * stacked handlers take their params from the element collector and
2760 * accept chars or do whatever they need to do. Sort of a vertical
2761 * piping? recursion-like? hmmm.
2763 * at least I think this is how it'll work. tres simple, non?
2769 * Some important constants
2771 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2772 #define MAX_ENTITY 20 /* maximum length of an entity */
2773 #define MAX_ELEMENT 72 /* maximum length of an element */
2774 #define HTML_MOREDATA 0 /* expect more entity data */
2775 #define HTML_ENTITY 1 /* valid entity collected */
2776 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2777 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2778 #define HTML_LITERAL 0x0400 /* Literal character value */
2779 #define HTML_NEWLINE 0x010A /* hard newline */
2780 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2781 #define HTML_ID_GET 0 /* indent func: return current val */
2782 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2783 #define HTML_ID_INC 2 /* indent func: increment by val */
2784 #define HTML_HX_CENTER 0x0001
2785 #define HTML_HX_ULINE 0x0002
2786 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2790 * Handler data, state information including function that uses it
2792 typedef struct handler_s
{
2793 FILTER_S
*html_data
;
2798 struct handler_s
*below
;
2802 * Element Property structure
2804 typedef struct _element_properties
{
2807 int (*handler
)(HANDLER_S
*, int, int);
2808 unsigned blocklevel
:1;
2809 unsigned alternate
:1;
2813 * Types used to manage HTML parsing
2815 static void html_handoff(HANDLER_S
*, int);
2819 * to help manage line wrapping.
2821 typedef struct _wrap_line
{
2822 char *buf
; /* buf to collect wrapped text */
2823 int used
, /* number of chars in buf */
2824 width
, /* text's width as displayed */
2825 len
; /* length of allocated buf */
2830 * to help manage centered text
2832 typedef struct _center_s
{
2833 WRAPLINE_S line
; /* buf to assembled centered text */
2834 WRAPLINE_S word
; /* word being to append to Line */
2841 * Collector data and state information
2843 typedef struct collector_s
{
2844 char buf
[HTML_BUF_LEN
]; /* buffer to collect data */
2845 int len
; /* length of that buffer */
2846 unsigned end_tag
:1; /* collecting a closing tag */
2847 unsigned hit_equal
:1; /* collecting right half of attrib */
2848 unsigned mkup_decl
:1; /* markup declaration */
2849 unsigned start_comment
:1; /* markup declaration comment */
2850 unsigned end_comment
:1; /* legit comment format */
2851 unsigned hyphen
:1; /* markup hyphen read */
2852 unsigned badform
:1; /* malformed markup element */
2853 unsigned overrun
:1; /* Overran buf above */
2854 unsigned proc_inst
:1; /* XML processing instructions */
2855 unsigned empty
:1; /* empty element */
2856 unsigned was_quoted
:1; /* basically to catch null string */
2857 char quoted
; /* quoted element param value */
2858 char *element
; /* element's collected name */
2859 PARAMETER
*attribs
; /* element's collected attributes */
2860 PARAMETER
*cur_attrib
; /* attribute now being collected */
2865 * State information for all element handlers
2867 typedef struct html_data
{
2868 HANDLER_S
*h_stack
; /* handler list */
2869 CLCTR_S
*el_data
; /* element collector data */
2870 CENTER_S
*centered
; /* struct to manage centered text */
2871 int (*token
)(FILTER_S
*, int);
2872 char quoted
; /* quoted, by either ' or ", text */
2873 short indent_level
; /* levels of indention */
2874 int in_anchor
; /* text now being written to anchor */
2875 int blanks
; /* Consecutive blank line count */
2876 int wrapcol
; /* column to wrap lines on */
2877 int *prefix
; /* buffer containing Anchor prefix */
2879 long line_bufsize
; /* current size of the line buffer */
2882 int state
; /* embedded data state */
2883 char *color
; /* embedded color pointer */
2885 CBUF_S cb
; /* utf8->ucs4 conversion state */
2886 unsigned wrapstate
:1; /* whether or not to wrap output */
2887 unsigned li_pending
:1; /* <LI> next token expected */
2888 unsigned de_pending
:1; /* <DT> or <DD> next token expected */
2889 unsigned bold_on
:1; /* currently bolding text */
2890 unsigned uline_on
:1; /* currently underlining text */
2891 unsigned center
:1; /* center output text */
2892 unsigned bitbucket
:1; /* Ignore input */
2893 unsigned head
:1; /* In doc's HEAD */
2894 unsigned body
:1; /* In doc's BODY */
2895 unsigned alt_entity
:1; /* use alternative entity values */
2896 unsigned wrote
:1; /* anything witten yet? */
2901 * HTML filter options
2903 typedef struct _html_opts
{
2904 char *base
; /* Base URL for this html file */
2905 int columns
, /* Display columns (excluding margins) */
2906 indent
; /* Left margin */
2907 HANDLE_S
**handlesp
; /* Head of handles */
2908 htmlrisk_t warnrisk_f
; /* Nasty link warning call */
2909 ELPROP_S
*element_table
; /* markup element table */
2910 RSS_FEED_S
**feedp
; /* hook for RSS feed response */
2911 unsigned strip
:1; /* Hilite TAGs allowed */
2912 unsigned handles_loc
:1; /* Local handles requested? */
2913 unsigned showserver
:1; /* Display server after anchors */
2914 unsigned outputted
:1; /* any */
2915 unsigned no_relative_links
:1; /* Disable embeded relative links */
2916 unsigned related_content
:1; /* Embeded related content */
2917 unsigned html
:1; /* Output content in HTML */
2918 unsigned html_imgs
:1; /* Output IMG tags in HTML content */
2924 * Some macros to make life a little easier
2926 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2927 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2928 #define HTML_WROTE(X) (HD(X)->wrote)
2929 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2930 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2931 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2932 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2933 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2934 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2935 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2936 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2937 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2938 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2939 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2940 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2941 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2942 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2943 #define HD(X) ((HTML_DATA_S *)(X)->data)
2944 #define ED(X) (HD(X)->el_data)
2945 #define EL(X) ((ELPROP_S *) (X)->element)
2946 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2947 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2948 #define NEW_CLCTR(X) { \
2949 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2950 memset(ED(X), 0, sizeof(CLCTR_S)); \
2951 HD(X)->token = html_element_collector; \
2954 #define FREE_CLCTR(X) { \
2955 if(ED(X)->attribs){ \
2957 while((p = ED(X)->attribs) != NULL){ \
2958 ED(X)->attribs = ED(X)->attribs->next; \
2960 fs_give((void **)&p->attribute); \
2962 fs_give((void **)&p->value); \
2963 fs_give((void **)&p); \
2966 if(ED(X)->element) \
2967 fs_give((void **) &ED(X)->element); \
2968 fs_give((void **) &ED(X)); \
2969 HD(X)->token = NULL; \
2971 #define HANDLERS(X) (HD(X)->h_stack)
2972 #define BOLD_BIT(X) (HD(X)->bold_on)
2973 #define ULINE_BIT(X) (HD(X)->uline_on)
2974 #define CENTER_BIT(X) (HD(X)->center)
2975 #define HTML_FLUSH(X) { \
2976 html_write(X, (X)->line, (X)->linep - (X)->line); \
2977 (X)->linep = (X)->line; \
2980 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2982 html_output((X), TAG_EMBED); \
2983 html_output((X), TAG_BOLDON); \
2986 html_output((X), TAG_EMBED); \
2987 html_output((X), TAG_BOLDOFF); \
2990 #define HTML_ULINE(X, S) \
2993 html_output((X), TAG_EMBED); \
2994 html_output((X), TAG_ULINEON); \
2997 html_output((X), TAG_EMBED); \
2998 html_output((X), TAG_ULINEOFF); \
3001 #define HTML_ITALIC(X, S) \
3004 html_output((X), TAG_EMBED); \
3005 html_output((X), TAG_ITALICON); \
3008 html_output((X), TAG_EMBED); \
3009 html_output((X), TAG_ITALICOFF); \
3012 #define HTML_STRIKE(X, S) \
3015 html_output((X), TAG_EMBED); \
3016 html_output((X), TAG_STRIKEON); \
3019 html_output((X), TAG_EMBED); \
3020 html_output((X), TAG_STRIKEOFF); \
3023 #define HTML_BIG(X, S) \
3026 html_output((X), TAG_EMBED); \
3027 html_output((X), TAG_BIGON); \
3030 html_output((X), TAG_EMBED); \
3031 html_output((X), TAG_BIGOFF); \
3034 #define HTML_SMALL(X, S) \
3037 html_output((X), TAG_EMBED); \
3038 html_output((X), TAG_SMALLON); \
3041 html_output((X), TAG_EMBED); \
3042 html_output((X), TAG_SMALLOFF); \
3045 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3046 ? (HD(f)->centered->line.width \
3047 + HD(f)->centered->word.width \
3048 + ((HD(f)->centered->line.width \
3049 && HD(f)->centered->word.width) \
3052 #define HTML_DUMP_LIT(F, S, L) { \
3054 for(i = 0; i < (L); i++){ \
3055 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3057 : MAKE_LITERAL((S)[i]); \
3061 #define HTML_PROC(F, C) { \
3064 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3066 HTML_DUMP_LIT(F, "<", 1); \
3067 if(HD(F)->el_data->element){ \
3069 HD(F)->el_data->element, \
3070 strlen(HD(F)->el_data->element));\
3072 if(HD(F)->el_data->len){ \
3074 HD(F)->el_data->buf, \
3075 HD(F)->el_data->len); \
3082 else if((C) == '<'){ \
3088 #define HTML_LINEP_PUTC(F, C) { \
3089 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3090 size_t offset = (F)->linep - (F)->line; \
3091 fs_resize((void **) &(F)->line, \
3092 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3093 HD(F)->line_bufsize *= 2; \
3094 (F)->linep = &(F)->line[offset]; \
3096 *(F)->linep++ = (C); \
3098 #define HTML_TEXT(F, C) switch((F)->f1){ \
3100 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3102 HTML_TEXT_OUT(F, ' '); \
3103 (F)->f1 = DFL;/* stop sending chars here */ \
3104 /* fall thru to process 'c' */ \
3106 if(HD(F)->bitbucket) \
3107 (F)->f1 = DFL; /* no op */ \
3108 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3109 (F)->f1 = WSPACE;/* coalesce white space */ \
3110 else HTML_TEXT_OUT(F, C); \
3113 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3114 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3118 #define HTML_DEBUG_EL(S, D) { \
3119 dprint((5, "-- html %s: %s\n", \
3122 ? (D)->element : "NULL")); \
3125 for(p = (D)->attribs; \
3126 p && p->attribute; \
3129 " PARM: %s%s%s\n", \
3131 ? p->attribute : "NULL",\
3132 p->value ? "=" : "", \
3133 p->value ? p->value : ""));\
3137 #define HTML_DEBUG_EL(S, D)
3140 #ifndef SYSTEM_PINE_INFO_PATH
3141 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3143 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3144 ? SYSTEM_PINE_INFO_PATH : S)
3147 * Protos for Tag handlers
3149 int html_head(HANDLER_S
*, int, int);
3150 int html_base(HANDLER_S
*, int, int);
3151 int html_title(HANDLER_S
*, int, int);
3152 int html_body(HANDLER_S
*, int, int);
3153 int html_a(HANDLER_S
*, int, int);
3154 int html_br(HANDLER_S
*, int, int);
3155 int html_hr(HANDLER_S
*, int, int);
3156 int html_p(HANDLER_S
*, int, int);
3157 int html_table(HANDLER_S
*, int, int);
3158 int html_caption(HANDLER_S
*, int, int);
3159 int html_tr(HANDLER_S
*, int, int);
3160 int html_td(HANDLER_S
*, int, int);
3161 int html_th(HANDLER_S
*, int, int);
3162 int html_thead(HANDLER_S
*, int, int);
3163 int html_tbody(HANDLER_S
*, int, int);
3164 int html_tfoot(HANDLER_S
*, int, int);
3165 int html_col(HANDLER_S
*, int, int);
3166 int html_colgroup(HANDLER_S
*, int, int);
3167 int html_b(HANDLER_S
*, int, int);
3168 int html_u(HANDLER_S
*, int, int);
3169 int html_i(HANDLER_S
*, int, int);
3170 int html_em(HANDLER_S
*, int, int);
3171 int html_strong(HANDLER_S
*, int, int);
3172 int html_s(HANDLER_S
*, int, int);
3173 int html_big(HANDLER_S
*, int, int);
3174 int html_small(HANDLER_S
*, int, int);
3175 int html_font(HANDLER_S
*, int, int);
3176 int html_img(HANDLER_S
*, int, int);
3177 int html_map(HANDLER_S
*, int, int);
3178 int html_area(HANDLER_S
*, int, int);
3179 int html_form(HANDLER_S
*, int, int);
3180 int html_input(HANDLER_S
*, int, int);
3181 int html_option(HANDLER_S
*, int, int);
3182 int html_optgroup(HANDLER_S
*, int, int);
3183 int html_button(HANDLER_S
*, int, int);
3184 int html_select(HANDLER_S
*, int, int);
3185 int html_textarea(HANDLER_S
*, int, int);
3186 int html_label(HANDLER_S
*, int, int);
3187 int html_fieldset(HANDLER_S
*, int, int);
3188 int html_ul(HANDLER_S
*, int, int);
3189 int html_ol(HANDLER_S
*, int, int);
3190 int html_menu(HANDLER_S
*, int, int);
3191 int html_dir(HANDLER_S
*, int, int);
3192 int html_li(HANDLER_S
*, int, int);
3193 int html_h1(HANDLER_S
*, int, int);
3194 int html_h2(HANDLER_S
*, int, int);
3195 int html_h3(HANDLER_S
*, int, int);
3196 int html_h4(HANDLER_S
*, int, int);
3197 int html_h5(HANDLER_S
*, int, int);
3198 int html_h6(HANDLER_S
*, int, int);
3199 int html_blockquote(HANDLER_S
*, int, int);
3200 int html_address(HANDLER_S
*, int, int);
3201 int html_pre(HANDLER_S
*, int, int);
3202 int html_center(HANDLER_S
*, int, int);
3203 int html_div(HANDLER_S
*, int, int);
3204 int html_span(HANDLER_S
*, int, int);
3205 int html_dl(HANDLER_S
*, int, int);
3206 int html_dt(HANDLER_S
*, int, int);
3207 int html_dd(HANDLER_S
*, int, int);
3208 int html_script(HANDLER_S
*, int, int);
3209 int html_applet(HANDLER_S
*, int, int);
3210 int html_style(HANDLER_S
*, int, int);
3211 int html_kbd(HANDLER_S
*, int, int);
3212 int html_dfn(HANDLER_S
*, int, int);
3213 int html_var(HANDLER_S
*, int, int);
3214 int html_tt(HANDLER_S
*, int, int);
3215 int html_samp(HANDLER_S
*, int, int);
3216 int html_sub(HANDLER_S
*, int, int);
3217 int html_sup(HANDLER_S
*, int, int);
3218 int html_cite(HANDLER_S
*, int, int);
3219 int html_code(HANDLER_S
*, int, int);
3220 int html_ins(HANDLER_S
*, int, int);
3221 int html_del(HANDLER_S
*, int, int);
3222 int html_abbr(HANDLER_S
*, int, int);
3225 * Protos for RSS 2.0 Tag handlers
3227 int rss_rss(HANDLER_S
*, int, int);
3228 int rss_channel(HANDLER_S
*, int, int);
3229 int rss_title(HANDLER_S
*, int, int);
3230 int rss_image(HANDLER_S
*, int, int);
3231 int rss_link(HANDLER_S
*, int, int);
3232 int rss_description(HANDLER_S
*, int, int);
3233 int rss_ttl(HANDLER_S
*, int, int);
3234 int rss_item(HANDLER_S
*, int, int);
3237 * Proto's for support routines
3239 void html_pop(FILTER_S
*, ELPROP_S
*);
3240 int html_push(FILTER_S
*, ELPROP_S
*);
3241 int html_element_collector(FILTER_S
*, int);
3242 int html_element_flush(CLCTR_S
*);
3243 void html_element_comment(FILTER_S
*, char *);
3244 void html_element_output(FILTER_S
*, int);
3245 int html_entity_collector(FILTER_S
*, int, UCS
*, char **);
3246 void html_a_prefix(FILTER_S
*);
3247 void html_a_finish(HANDLER_S
*);
3248 void html_a_output_prefix(FILTER_S
*, int);
3249 void html_a_output_info(HANDLER_S
*);
3250 void html_a_relative(char *, char *, HANDLE_S
*);
3251 int html_href_relative(char *);
3252 int html_indent(FILTER_S
*, int, int);
3253 void html_blank(FILTER_S
*, int);
3254 void html_newline(FILTER_S
*);
3255 void html_output(FILTER_S
*, int);
3256 void html_output_string(FILTER_S
*, char *);
3257 void html_output_raw_tag(FILTER_S
*, char *);
3258 void html_output_normal(FILTER_S
*, int, int, int);
3259 void html_output_flush(FILTER_S
*);
3260 void html_output_centered(FILTER_S
*, int, int, int);
3261 void html_centered_handle(int *, char *, int);
3262 void html_centered_putc(WRAPLINE_S
*, int);
3263 void html_centered_flush(FILTER_S
*);
3264 void html_centered_flush_line(FILTER_S
*);
3265 void html_write_anchor(FILTER_S
*, int);
3266 void html_write_newline(FILTER_S
*);
3267 void html_write_indent(FILTER_S
*, int);
3268 void html_write(FILTER_S
*, char *, int);
3269 void html_putc(FILTER_S
*, int);
3270 int html_event_attribute(char *);
3271 char *rss_skip_whitespace(char *s
);
3272 ELPROP_S
*element_properties(FILTER_S
*, char *);
3276 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3277 * W3C doc "Additional named entities for HTML"
3279 static struct html_entities
{
3280 char *name
; /* entity name */
3281 UCS value
; /* UCS entity value */
3282 char *plain
; /* US-ASCII representation */
3284 {"quot", 0x0022}, /* 34 - quotation mark */
3285 {"amp", 0x0026}, /* 38 - ampersand */
3286 {"apos", 0x0027}, /* 39 - apostrophe */
3287 {"lt", 0x003C}, /* 60 - less-than sign */
3288 {"gt", 0x003E}, /* 62 - greater-than sign */
3289 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3290 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3291 {"cent", 0x00A2}, /* 162 - cent sign */
3292 {"pound", 0x00A3}, /* 163 - pound sign */
3293 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3294 {"yen", 0x00A5}, /* 165 - yen sign */
3295 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3296 {"sect", 0x00A7}, /* 167 - section sign */
3297 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3298 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3299 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3300 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3301 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3302 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3303 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3304 {"macr", 0x00AF}, /* 175 - macron */
3305 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3306 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3307 {"sup2", 0x00B2}, /* 178 - superscript two */
3308 {"sup3", 0x00B3}, /* 179 - superscript three */
3309 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3310 {"micro", 0x00B5}, /* 181 - micro sign */
3311 {"para", 0x00B6}, /* 182 - pilcrow sign */
3312 {"middot", 0x00B7}, /* 183 - middle dot */
3313 {"cedil", 0x00B8}, /* 184 - cedilla */
3314 {"sup1", 0x00B9}, /* 185 - superscript one */
3315 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3316 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3317 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3318 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3319 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3320 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3321 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3322 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3323 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3324 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3325 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3326 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3327 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3328 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3329 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3330 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3331 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3332 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3333 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3334 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3335 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3336 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3337 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3338 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3339 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3340 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3341 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3342 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3343 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3344 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3345 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3346 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3347 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3348 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3349 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3350 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3351 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3352 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3353 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3354 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3355 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3356 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3357 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3358 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3359 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3360 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3361 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3362 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3363 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3364 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3365 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3366 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3367 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3368 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3369 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3370 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3371 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3372 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3373 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3374 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3375 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3376 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3377 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3378 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3379 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3380 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3381 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3382 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3383 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3384 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3385 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3386 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3387 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3388 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3389 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3390 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3391 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3392 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3393 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3394 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3395 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3396 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3397 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3398 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3399 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3400 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3401 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3402 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3403 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3404 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3405 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3406 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3407 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3408 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3409 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3410 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3411 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3412 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3413 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3414 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3415 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3416 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3417 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3418 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3419 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3420 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3421 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3422 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3423 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3424 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3425 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3426 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3427 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3428 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3429 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3430 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3431 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3432 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3433 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3434 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3435 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3436 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3437 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3438 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3439 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3440 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3441 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3442 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3443 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3444 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3445 {"ensp", 0x2002}, /* 8194 - en space */
3446 {"emsp", 0x2003}, /* 8195 - em space */
3447 {"thinsp", 0x2009}, /* 8201 - thin space */
3448 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3449 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3450 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3451 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3452 {"ndash", 0x2013}, /* 8211 - en dash */
3453 {"mdash", 0x2014}, /* 8212 - em dash */
3454 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3455 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3456 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3457 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3458 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3459 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3460 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3461 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3462 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3463 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3464 {"dagger", 0x2020}, /* 8224 - dagger */
3465 {"Dagger", 0x2021}, /* 8225 - double dagger */
3466 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3467 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3468 {"permil", 0x2030}, /* 8240 - per mille sign */
3469 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3470 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3471 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3472 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3473 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3474 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3475 {"oline", 0x203E, "-"}, /* 8254 - overline */
3476 {"frasl", 0x2044}, /* 8260 - fraction slash */
3477 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3478 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3479 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3480 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3481 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3482 {"image", 0x2111}, /* 8465 - black-letter capital i */
3483 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3484 {"real", 0x211C}, /* 8476 - black-letter capital r */
3485 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3486 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3487 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3488 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3489 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3490 {"darr", 0x2193}, /* 8595 - downwards arrow */
3491 {"harr", 0x2194}, /* 8596 - left right arrow */
3492 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3493 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3494 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3495 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3496 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3497 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3498 {"forall", 0x2200}, /* 8704 - for all */
3499 {"part", 0x2202}, /* 8706 - partial differential */
3500 {"exist", 0x2203}, /* 8707 - there exists */
3501 {"empty", 0x2205}, /* 8709 - empty set */
3502 {"nabla", 0x2207}, /* 8711 - nabla */
3503 {"isin", 0x2208}, /* 8712 - element of */
3504 {"notin", 0x2209}, /* 8713 - not an element of */
3505 {"ni", 0x220B}, /* 8715 - contains as member */
3506 {"prod", 0x220F}, /* 8719 - n-ary product */
3507 {"sum", 0x2211}, /* 8721 - n-ary summation */
3508 {"minus", 0x2212}, /* 8722 - minus sign */
3509 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3510 {"radic", 0x221A}, /* 8730 - square root */
3511 {"prop", 0x221D}, /* 8733 - proportional to */
3512 {"infin", 0x221E}, /* 8734 - infinity */
3513 {"ang", 0x2220}, /* 8736 - angle */
3514 {"and", 0x2227}, /* 8743 - logical and */
3515 {"or", 0x2228}, /* 8744 - logical or */
3516 {"cap", 0x2229}, /* 8745 - intersection */
3517 {"cup", 0x222A}, /* 8746 - union */
3518 {"int", 0x222B}, /* 8747 - integral */
3519 {"there4", 0x2234}, /* 8756 - therefore */
3520 {"sim", 0x223C}, /* 8764 - tilde operator */
3521 {"cong", 0x2245}, /* 8773 - congruent to */
3522 {"asymp", 0x2248}, /* 8776 - almost equal to */
3523 {"ne", 0x2260}, /* 8800 - not equal to */
3524 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3525 {"le", 0x2264}, /* 8804 - less-than or equal to */
3526 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3527 {"sub", 0x2282}, /* 8834 - subset of */
3528 {"sup", 0x2283}, /* 8835 - superset of */
3529 {"nsub", 0x2284}, /* 8836 - not a subset of */
3530 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3531 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3532 {"oplus", 0x2295}, /* 8853 - circled plus */
3533 {"otimes", 0x2297}, /* 8855 - circled times */
3534 {"perp", 0x22A5}, /* 8869 - up tack */
3535 {"sdot", 0x22C5}, /* 8901 - dot operator */
3536 {"lceil", 0x2308}, /* 8968 - left ceiling */
3537 {"rceil", 0x2309}, /* 8969 - right ceiling */
3538 {"lfloor", 0x230A}, /* 8970 - left floor */
3539 {"rfloor", 0x230B}, /* 8971 - right floor */
3540 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3541 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3542 {"loz", 0x25CA}, /* 9674 - lozenge */
3543 {"spades", 0x2660}, /* 9824 - black spade suit */
3544 {"clubs", 0x2663}, /* 9827 - black club suit */
3545 {"hearts", 0x2665}, /* 9829 - black heart suit */
3546 {"diams", 0x2666} /* 9830 - black diamond suit */
3551 * Table of supported elements and corresponding handlers
3553 static ELPROP_S html_element_table
[] = {
3554 {"HTML", 4}, /* HTML ignore if seen? */
3555 {"HEAD", 4, html_head
}, /* slurp until <BODY> ? */
3556 {"TITLE", 5, html_title
}, /* Document Title */
3557 {"BASE", 4, html_base
}, /* HREF base */
3558 {"BODY", 4, html_body
}, /* HTML BODY */
3559 {"A", 1, html_a
}, /* Anchor */
3560 {"ABBR", 4, html_abbr
}, /* Abbreviation */
3561 {"IMG", 3, html_img
}, /* Image */
3562 {"MAP", 3, html_map
}, /* Image Map */
3563 {"AREA", 4, html_area
}, /* Image Map Area */
3564 {"HR", 2, html_hr
, 1, 1}, /* Horizontal Rule */
3565 {"BR", 2, html_br
, 0, 1}, /* Line Break */
3566 {"P", 1, html_p
, 1}, /* Paragraph */
3567 {"OL", 2, html_ol
, 1}, /* Ordered List */
3568 {"UL", 2, html_ul
, 1}, /* Unordered List */
3569 {"MENU", 4, html_menu
}, /* Menu List */
3570 {"DIR", 3, html_dir
}, /* Directory List */
3571 {"LI", 2, html_li
}, /* ... List Item */
3572 {"DL", 2, html_dl
, 1}, /* Definition List */
3573 {"DT", 2, html_dt
}, /* ... Def. Term */
3574 {"DD", 2, html_dd
}, /* ... Def. Definition */
3575 {"I", 1, html_i
}, /* Italic Text */
3576 {"EM", 2, html_em
}, /* Typographic Emphasis */
3577 {"STRONG", 6, html_strong
}, /* STRONG Typo Emphasis */
3578 {"VAR", 3, html_i
}, /* Variable Name */
3579 {"B", 1, html_b
}, /* Bold Text */
3580 {"U", 1, html_u
}, /* Underline Text */
3581 {"S", 1, html_s
}, /* Strike-Through Text */
3582 {"STRIKE", 6, html_s
}, /* Strike-Through Text */
3583 {"BIG", 3, html_big
}, /* Big Font Text */
3584 {"SMALL", 5, html_small
}, /* Small Font Text */
3585 {"FONT", 4, html_font
}, /* Font display directives */
3586 {"BLOCKQUOTE", 10, html_blockquote
, 1}, /* Blockquote */
3587 {"ADDRESS", 7, html_address
, 1}, /* Address */
3588 {"CENTER", 6, html_center
}, /* Centered Text v3.2 */
3589 {"DIV", 3, html_div
, 1}, /* Document Division 3.2 */
3590 {"SPAN", 4, html_span
}, /* Text Span */
3591 {"H1", 2, html_h1
, 1}, /* Headings... */
3592 {"H2", 2, html_h2
, 1},
3593 {"H3", 2, html_h3
,1},
3594 {"H4", 2, html_h4
, 1},
3595 {"H5", 2, html_h5
, 1},
3596 {"H6", 2, html_h6
, 1},
3597 {"PRE", 3, html_pre
, 1}, /* Preformatted Text */
3598 {"KBD", 3, html_kbd
}, /* Keyboard Input (NO OP) */
3599 {"DFN", 3, html_dfn
}, /* Definition (NO OP) */
3600 {"VAR", 3, html_var
}, /* Variable (NO OP) */
3601 {"TT", 2, html_tt
}, /* Typetype (NO OP) */
3602 {"SAMP", 4, html_samp
}, /* Sample Text (NO OP) */
3603 {"CITE", 4, html_cite
}, /* Citation (NO OP) */
3604 {"CODE", 4, html_code
}, /* Code Text (NO OP) */
3605 {"INS", 3, html_ins
}, /* Text Inseted (NO OP) */
3606 {"DEL", 3, html_del
}, /* Text Deleted (NO OP) */
3607 {"SUP", 3, html_sup
}, /* Text Superscript (NO OP) */
3608 {"SUB", 3, html_sub
}, /* Text Superscript (NO OP) */
3609 {"STYLE", 5, html_style
}, /* CSS Definitions */
3611 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3613 {"FORM", 4, html_form
, 1}, /* form within a document */
3614 {"INPUT", 5, html_input
}, /* One input field, options */
3615 {"BUTTON", 6, html_button
}, /* Push Button */
3616 {"OPTION", 6, html_option
}, /* One option within Select */
3617 {"OPTION", 6, html_optgroup
}, /* Option Group Definition */
3618 {"SELECT", 6, html_select
}, /* Selection from a set */
3619 {"TEXTAREA", 8, html_textarea
}, /* A multi-line input field */
3620 {"LABEL", 5, html_label
}, /* Control Label */
3621 {"FIELDSET", 8, html_fieldset
, 1}, /* Fieldset Control Group */
3623 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3624 {"SCRIPT", 6, html_script
}, /* Embedded scripting statements */
3625 {"APPLET", 6, NULL
}, /* Embedded applet statements */
3626 {"OBJECT", 6, NULL
}, /* Embedded object statements */
3627 {"LINK", 4, NULL
}, /* References to external data */
3628 {"PARAM", 5, NULL
}, /* Applet/Object parameters */
3630 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3632 {"TABLE", 5, html_table
, 1}, /* Table */
3633 {"CAPTION", 7, html_caption
}, /* Table Caption */
3634 {"TR", 2, html_tr
}, /* Table Table Row */
3635 {"TD", 2, html_td
}, /* Table Table Data */
3636 {"TH", 2, html_th
}, /* Table Table Head */
3637 {"THEAD", 5, html_thead
}, /* Table Table Head */
3638 {"TBODY", 5, html_tbody
}, /* Table Table Body */
3639 {"TFOOT", 5, html_tfoot
}, /* Table Table Foot */
3640 {"COL", 3, html_col
}, /* Table Column Attibutes */
3641 {"COLGROUP", 8, html_colgroup
}, /* Table Column Group Attibutes */
3648 * Table of supported RSS 2.0 elements
3650 static ELPROP_S rss_element_table
[] = {
3651 {"RSS", 3, rss_rss
}, /* RSS 2.0 version */
3652 {"CHANNEL", 7, rss_channel
}, /* RSS 2.0 Channel */
3653 {"TITLE", 5, rss_title
}, /* RSS 2.0 Title */
3654 {"IMAGE", 5, rss_image
}, /* RSS 2.0 Channel Image */
3655 {"LINK", 4, rss_link
}, /* RSS 2.0 Channel/Item Link */
3656 {"DESCRIPTION", 11, rss_description
}, /* RSS 2.0 Channel/Item Description */
3657 {"ITEM", 4, rss_item
}, /* RSS 2.0 Channel ITEM */
3658 {"TTL", 3, rss_ttl
}, /* RSS 2.0 Item TTL */
3664 * Initialize the given handler, and add it to the stack if it
3667 * Returns: 1 if handler chose to get pushed on stack
3668 * 0 if handler declined
3671 html_push(FILTER_S
*fd
, ELPROP_S
*ep
)
3675 new = (HANDLER_S
*)fs_get(sizeof(HANDLER_S
));
3676 memset(new, 0, sizeof(HANDLER_S
));
3677 new->html_data
= fd
;
3679 if((*ep
->handler
)(new, 0, GF_RESET
)){ /* stack the handler? */
3680 new->below
= HANDLERS(fd
);
3681 HANDLERS(fd
) = new; /* push */
3685 fs_give((void **) &new);
3691 * Remove the most recently installed the given handler
3692 * after letting it accept its demise.
3695 html_pop(FILTER_S
*fd
, ELPROP_S
*ep
)
3699 for(tp
= HANDLERS(fd
); tp
&& ep
!= EL(tp
); tp
= tp
->below
){
3702 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep
->element
, EL(tp
)->element
));
3703 /* if no evidence of opening tag, ignore given closing tag */
3704 for(tp2
= HANDLERS(fd
); tp2
&& ep
!= EL(tp2
); tp2
= tp2
->below
)
3708 dprint((3, "-- html error: no opening tag for given tag /%s", ep
->element
));
3712 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
);
3713 HANDLERS(fd
) = tp
->below
;
3717 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
); /* may adjust handler list */
3718 if(tp
!= HANDLERS(fd
)){
3721 for(p
= HANDLERS(fd
); p
->below
!= tp
; p
= p
->below
)
3725 p
->below
= tp
->below
; /* remove from middle of stack */
3726 /* BUG: else programming botch and we should die */
3729 HANDLERS(fd
) = tp
->below
; /* pop */
3731 fs_give((void **)&tp
);
3734 /* BUG: should MAKE SURE NOT TO EMIT IT */
3735 dprint((3, "-- html error: end tag without a start: %s", ep
->element
));
3741 * Deal with data passed a hander in its GF_DATA state
3744 html_handoff(HANDLER_S
*hd
, int ch
)
3747 (void) (*EL(hd
->below
)->handler
)(hd
->below
, ch
, GF_DATA
);
3749 html_output(hd
->html_data
, ch
);
3754 * HTML <BR> element handler
3757 html_br(HANDLER_S
*hd
, int ch
, int cmd
)
3759 if(cmd
== GF_RESET
){
3760 if(PASS_HTML(hd
->html_data
)){
3761 html_output_raw_tag(hd
->html_data
, "br");
3764 html_output(hd
->html_data
, HTML_NEWLINE
);
3768 return(0); /* don't get linked */
3773 * HTML <HR> (Horizontal Rule) element handler
3776 html_hr(HANDLER_S
*hd
, int ch
, int cmd
)
3778 if(cmd
== GF_RESET
){
3779 if(PASS_HTML(hd
->html_data
)){
3780 html_output_raw_tag(hd
->html_data
, "hr");
3783 int i
, old_wrap
, width
, align
;
3786 width
= WRAP_COLS(hd
->html_data
);
3788 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3792 if(!strucmp(p
->attribute
, "ALIGN")){
3793 if(!strucmp(p
->value
, "LEFT"))
3795 else if(!strucmp(p
->value
, "RIGHT"))
3798 else if(!strucmp(p
->attribute
, "WIDTH")){
3802 for(cp
= p
->value
; *cp
; cp
++)
3804 width
= (WRAP_COLS(hd
->html_data
)*MIN(100,width
))/100;
3807 else if(isdigit((unsigned char) *cp
))
3808 width
= (width
* 10) + (*cp
- '0');
3810 width
= MIN(width
, WRAP_COLS(hd
->html_data
));
3814 html_blank(hd
->html_data
, 1); /* at least one blank line */
3816 old_wrap
= HD(hd
->html_data
)->wrapstate
;
3817 HD(hd
->html_data
)->wrapstate
= 0;
3818 if((i
= MAX(0, WRAP_COLS(hd
->html_data
) - width
))
3819 && ((align
== 0) ? i
/= 2 : (align
== 2)))
3821 html_output(hd
->html_data
, ' ');
3823 for(i
= 0; i
< width
; i
++)
3824 html_output(hd
->html_data
, '_');
3826 html_blank(hd
->html_data
, 1);
3827 HD(hd
->html_data
)->wrapstate
= old_wrap
;
3831 return(0); /* don't get linked */
3836 * HTML <P> (paragraph) element handler
3839 html_p(HANDLER_S
*hd
, int ch
, int cmd
)
3842 html_handoff(hd
, ch
);
3844 else if(cmd
== GF_RESET
){
3845 if(PASS_HTML(hd
->html_data
)){
3846 html_output_raw_tag(hd
->html_data
, "p");
3849 /* Make sure there's at least 1 blank line */
3850 html_blank(hd
->html_data
, 1);
3852 /* adjust indent level if needed */
3853 if(HD(hd
->html_data
)->li_pending
){
3854 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
3855 HD(hd
->html_data
)->li_pending
= 0;
3859 else if(cmd
== GF_EOD
){
3860 if(PASS_HTML(hd
->html_data
)){
3861 html_output_string(hd
->html_data
, "</p>");
3864 /* Make sure there's at least 1 blank line */
3865 html_blank(hd
->html_data
, 1);
3869 return(1); /* GET linked */
3874 * HTML Table <TABLE> (paragraph) table row
3877 html_table(HANDLER_S
*hd
, int ch
, int cmd
)
3880 if(PASS_HTML(hd
->html_data
)){
3881 html_handoff(hd
, ch
);
3884 else if(cmd
== GF_RESET
){
3885 if(PASS_HTML(hd
->html_data
)){
3886 html_output_raw_tag(hd
->html_data
, "table");
3889 /* Make sure there's at least 1 blank line */
3890 html_blank(hd
->html_data
, 0);
3892 else if(cmd
== GF_EOD
){
3893 if(PASS_HTML(hd
->html_data
)){
3894 html_output_string(hd
->html_data
, "</table>");
3897 /* Make sure there's at least 1 blank line */
3898 html_blank(hd
->html_data
, 0);
3900 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3905 * HTML <CAPTION> (Table Caption) element handler
3908 html_caption(HANDLER_S
*hd
, int ch
, int cmd
)
3911 html_handoff(hd
, ch
);
3913 else if(cmd
== GF_RESET
){
3914 if(PASS_HTML(hd
->html_data
)){
3915 html_output_raw_tag(hd
->html_data
, "caption");
3918 /* turn ON the centered bit */
3919 CENTER_BIT(hd
->html_data
) = 1;
3922 else if(cmd
== GF_EOD
){
3923 if(PASS_HTML(hd
->html_data
)){
3924 html_output_string(hd
->html_data
, "</caption>");
3927 /* turn OFF the centered bit */
3928 CENTER_BIT(hd
->html_data
) = 0;
3937 * HTML Table <TR> (paragraph) table row
3940 html_tr(HANDLER_S
*hd
, int ch
, int cmd
)
3943 if(PASS_HTML(hd
->html_data
)){
3944 html_handoff(hd
, ch
);
3947 else if(cmd
== GF_RESET
){
3948 if(PASS_HTML(hd
->html_data
)){
3949 html_output_raw_tag(hd
->html_data
, "tr");
3952 /* Make sure there's at least 1 blank line */
3953 html_blank(hd
->html_data
, 0);
3955 else if(cmd
== GF_EOD
){
3956 if(PASS_HTML(hd
->html_data
)){
3957 html_output_string(hd
->html_data
, "</tr>");
3960 /* Make sure there's at least 1 blank line */
3961 html_blank(hd
->html_data
, 0);
3963 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3968 * HTML Table <TD> (paragraph) table data
3971 html_td(HANDLER_S
*hd
, int ch
, int cmd
)
3974 if(PASS_HTML(hd
->html_data
)){
3975 html_handoff(hd
, ch
);
3978 else if(cmd
== GF_RESET
){
3979 if(PASS_HTML(hd
->html_data
)){
3980 html_output_raw_tag(hd
->html_data
, "td");
3985 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3988 if(!strucmp(p
->attribute
, "nowrap")
3989 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
3990 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
3995 else if(cmd
== GF_EOD
){
3996 if(PASS_HTML(hd
->html_data
)){
3997 html_output_string(hd
->html_data
, "</td>");
4001 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4006 * HTML Table <TH> (paragraph) table head
4009 html_th(HANDLER_S
*hd
, int ch
, int cmd
)
4012 if(PASS_HTML(hd
->html_data
)){
4013 html_handoff(hd
, ch
);
4016 else if(cmd
== GF_RESET
){
4017 if(PASS_HTML(hd
->html_data
)){
4018 html_output_raw_tag(hd
->html_data
, "th");
4023 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4026 if(!strucmp(p
->attribute
, "nowrap")
4027 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
4028 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
4033 else if(cmd
== GF_EOD
){
4034 if(PASS_HTML(hd
->html_data
)){
4035 html_output_string(hd
->html_data
, "</th>");
4039 return(PASS_HTML(hd
->html_data
)); /* don't get linked */
4044 * HTML Table <THEAD> table head
4047 html_thead(HANDLER_S
*hd
, int ch
, int cmd
)
4049 if(PASS_HTML(hd
->html_data
)){
4051 html_handoff(hd
, ch
);
4053 else if(cmd
== GF_RESET
){
4054 html_output_raw_tag(hd
->html_data
, "thead");
4056 else if(cmd
== GF_EOD
){
4057 html_output_string(hd
->html_data
, "</thead>");
4060 return(1); /* GET linked */
4063 return(0); /* don't get linked */
4068 * HTML Table <TBODY> table body
4071 html_tbody(HANDLER_S
*hd
, int ch
, int cmd
)
4073 if(PASS_HTML(hd
->html_data
)){
4075 html_handoff(hd
, ch
);
4077 else if(cmd
== GF_RESET
){
4078 html_output_raw_tag(hd
->html_data
, "tbody");
4080 else if(cmd
== GF_EOD
){
4081 html_output_string(hd
->html_data
, "</tbody>");
4084 return(1); /* GET linked */
4087 return(0); /* don't get linked */
4092 * HTML Table <TFOOT> table body
4095 html_tfoot(HANDLER_S
*hd
, int ch
, int cmd
)
4097 if(PASS_HTML(hd
->html_data
)){
4099 html_handoff(hd
, ch
);
4101 else if(cmd
== GF_RESET
){
4102 html_output_raw_tag(hd
->html_data
, "tfoot");
4104 else if(cmd
== GF_EOD
){
4105 html_output_string(hd
->html_data
, "</tfoot>");
4108 return(1); /* GET linked */
4111 return(0); /* don't get linked */
4116 * HTML <COL> (Table Column Attributes) element handler
4119 html_col(HANDLER_S
*hd
, int ch
, int cmd
)
4121 if(cmd
== GF_RESET
){
4122 if(PASS_HTML(hd
->html_data
)){
4123 html_output_raw_tag(hd
->html_data
, "col");
4127 return(0); /* don't get linked */
4132 * HTML Table <COLGROUP> table body
4135 html_colgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4137 if(PASS_HTML(hd
->html_data
)){
4139 html_handoff(hd
, ch
);
4141 else if(cmd
== GF_RESET
){
4142 html_output_raw_tag(hd
->html_data
, "colgroup");
4144 else if(cmd
== GF_EOD
){
4145 html_output_string(hd
->html_data
, "</colgroup>");
4148 return(1); /* GET linked */
4151 return(0); /* don't get linked */
4156 * HTML <I> (italic text) element handler
4159 html_i(HANDLER_S
*hd
, int ch
, int cmd
)
4162 /* include LITERAL in spaceness test! */
4163 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4164 HTML_ITALIC(hd
->html_data
, 1);
4168 html_handoff(hd
, ch
);
4170 else if(cmd
== GF_RESET
){
4173 else if(cmd
== GF_EOD
){
4175 HTML_ITALIC(hd
->html_data
, 0);
4178 return(1); /* get linked */
4183 * HTML <EM> element handler
4186 html_em(HANDLER_S
*hd
, int ch
, int cmd
)
4189 if(!PASS_HTML(hd
->html_data
)){
4190 /* include LITERAL in spaceness test! */
4191 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4192 HTML_ITALIC(hd
->html_data
, 1);
4197 html_handoff(hd
, ch
);
4199 else if(cmd
== GF_RESET
){
4200 if(PASS_HTML(hd
->html_data
)){
4201 html_output_raw_tag(hd
->html_data
, "em");
4207 else if(cmd
== GF_EOD
){
4208 if(PASS_HTML(hd
->html_data
)){
4209 html_output_string(hd
->html_data
, "</em>");
4213 HTML_ITALIC(hd
->html_data
, 0);
4217 return(1); /* get linked */
4222 * HTML <STRONG> element handler
4225 html_strong(HANDLER_S
*hd
, int ch
, int cmd
)
4228 if(!PASS_HTML(hd
->html_data
)){
4229 /* include LITERAL in spaceness test! */
4230 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4231 HTML_ITALIC(hd
->html_data
, 1);
4236 html_handoff(hd
, ch
);
4238 else if(cmd
== GF_RESET
){
4239 if(PASS_HTML(hd
->html_data
)){
4240 html_output_raw_tag(hd
->html_data
, "strong");
4246 else if(cmd
== GF_EOD
){
4247 if(PASS_HTML(hd
->html_data
)){
4248 html_output_string(hd
->html_data
, "</strong>");
4252 HTML_ITALIC(hd
->html_data
, 0);
4256 return(1); /* get linked */
4261 * HTML <u> (Underline text) element handler
4264 html_u(HANDLER_S
*hd
, int ch
, int cmd
)
4266 if(PASS_HTML(hd
->html_data
)){
4268 html_handoff(hd
, ch
);
4270 else if(cmd
== GF_RESET
){
4271 html_output_raw_tag(hd
->html_data
, "u");
4273 else if(cmd
== GF_EOD
){
4274 html_output_string(hd
->html_data
, "</u>");
4277 return(1); /* get linked */
4280 return(0); /* do NOT get linked */
4285 * HTML <b> (Bold text) element handler
4288 html_b(HANDLER_S
*hd
, int ch
, int cmd
)
4291 if(!PASS_HTML(hd
->html_data
)){
4292 /* include LITERAL in spaceness test! */
4293 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4294 HTML_BOLD(hd
->html_data
, 1);
4299 html_handoff(hd
, ch
);
4301 else if(cmd
== GF_RESET
){
4302 if(PASS_HTML(hd
->html_data
)){
4303 html_output_raw_tag(hd
->html_data
, "b");
4309 else if(cmd
== GF_EOD
){
4310 if(PASS_HTML(hd
->html_data
)){
4311 html_output_string(hd
->html_data
, "</b>");
4315 HTML_BOLD(hd
->html_data
, 0);
4319 return(1); /* get linked */
4324 * HTML <s> (strike-through text) element handler
4327 html_s(HANDLER_S
*hd
, int ch
, int cmd
)
4330 if(!PASS_HTML(hd
->html_data
)){
4331 /* include LITERAL in spaceness test! */
4332 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4333 HTML_STRIKE(hd
->html_data
, 1);
4338 html_handoff(hd
, ch
);
4340 else if(cmd
== GF_RESET
){
4341 if(PASS_HTML(hd
->html_data
)){
4342 html_output_raw_tag(hd
->html_data
, "s");
4348 else if(cmd
== GF_EOD
){
4349 if(PASS_HTML(hd
->html_data
)){
4350 html_output_string(hd
->html_data
, "</s>");
4354 HTML_STRIKE(hd
->html_data
, 0);
4358 return(1); /* get linked */
4363 * HTML <big> (BIG text) element handler
4366 html_big(HANDLER_S
*hd
, int ch
, int cmd
)
4369 /* include LITERAL in spaceness test! */
4370 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4371 HTML_BIG(hd
->html_data
, 1);
4375 html_handoff(hd
, ch
);
4377 else if(cmd
== GF_RESET
){
4380 else if(cmd
== GF_EOD
){
4382 HTML_BIG(hd
->html_data
, 0);
4385 return(1); /* get linked */
4390 * HTML <small> (SMALL text) element handler
4393 html_small(HANDLER_S
*hd
, int ch
, int cmd
)
4396 /* include LITERAL in spaceness test! */
4397 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4398 HTML_SMALL(hd
->html_data
, 1);
4402 html_handoff(hd
, ch
);
4404 else if(cmd
== GF_RESET
){
4407 else if(cmd
== GF_EOD
){
4409 HTML_SMALL(hd
->html_data
, 0);
4412 return(1); /* get linked */
4417 * HTML <FONT> element handler
4420 html_font(HANDLER_S
*hd
, int ch
, int cmd
)
4422 if(PASS_HTML(hd
->html_data
)){
4424 html_handoff(hd
, ch
);
4426 else if(cmd
== GF_RESET
){
4427 html_output_raw_tag(hd
->html_data
, "font");
4429 else if(cmd
== GF_EOD
){
4430 html_output_string(hd
->html_data
, "</font>");
4433 return(1); /* get linked */
4441 * HTML <IMG> element handler
4444 html_img(HANDLER_S
*hd
, int ch
, int cmd
)
4447 char *alt
= NULL
, *src
= NULL
, *s
;
4449 if(cmd
== GF_RESET
){
4450 if(PASS_HTML(hd
->html_data
)){
4451 html_output_raw_tag(hd
->html_data
, "img");
4454 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4457 if(p
->value
&& p
->value
[0]){
4458 if(!strucmp(p
->attribute
, "alt"))
4460 if(!strucmp(p
->attribute
, "src"))
4465 * Multipart/Related Content ID pointer
4466 * ONLY attached messages are recognized
4467 * if we ever decide web bugs aren't a problem
4468 * anymore then we might expand the scope
4471 && DO_HANDLES(hd
->html_data
)
4472 && RELATED_OK(hd
->html_data
)
4473 && struncmp(src
, "cid:", 4) == 0){
4476 HANDLE_S
*h
= new_handle(HANDLESP(hd
->html_data
));
4479 h
->h
.img
.src
= cpystr(src
+ 4);
4480 h
->h
.img
.alt
= cpystr((alt
) ? alt
: "Attached Image");
4482 HTML_TEXT(hd
->html_data
, TAG_EMBED
);
4483 HTML_TEXT(hd
->html_data
, TAG_HANDLE
);
4485 sprintf(buf
, "%d", h
->key
);
4487 HTML_TEXT(hd
->html_data
, n
);
4488 for(i
= 0; i
< n
; i
++){
4489 unsigned int uic
= buf
[i
];
4490 HTML_TEXT(hd
->html_data
, uic
);
4495 else if(alt
&& strlen(alt
) < 256){ /* arbitrary "reasonable" limit */
4496 HTML_DUMP_LIT(hd
->html_data
, alt
, strlen(alt
));
4497 HTML_TEXT(hd
->html_data
, ' ');
4501 && (s
= strrindex(src
, '/'))
4503 HTML_TEXT(hd
->html_data
, '[');
4504 HTML_DUMP_LIT(hd
->html_data
, s
, strlen(s
));
4505 HTML_TEXT(hd
->html_data
, ']');
4506 HTML_TEXT(hd
->html_data
, ' ');
4510 /* text filler of last resort */
4511 HTML_DUMP_LIT(hd
->html_data
, "[IMAGE] ", 7);
4515 return(0); /* don't get linked */
4520 * HTML <MAP> (Image Map) element handler
4523 html_map(HANDLER_S
*hd
, int ch
, int cmd
)
4525 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4527 html_handoff(hd
, ch
);
4529 else if(cmd
== GF_RESET
){
4530 html_output_raw_tag(hd
->html_data
, "map");
4532 else if(cmd
== GF_EOD
){
4533 html_output_string(hd
->html_data
, "</map>");
4544 * HTML <AREA> (Image Map Area) element handler
4547 html_area(HANDLER_S
*hd
, int ch
, int cmd
)
4549 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4551 html_handoff(hd
, ch
);
4553 else if(cmd
== GF_RESET
){
4554 html_output_raw_tag(hd
->html_data
, "area");
4556 else if(cmd
== GF_EOD
){
4557 html_output_string(hd
->html_data
, "</area>");
4568 * HTML <FORM> (Form) element handler
4571 html_form(HANDLER_S
*hd
, int ch
, int cmd
)
4573 if(PASS_HTML(hd
->html_data
)){
4575 html_handoff(hd
, ch
);
4577 else if(cmd
== GF_RESET
){
4580 /* SECURITY: make sure to redirect to new browser instance */
4581 for(pp
= &(HD(hd
->html_data
)->el_data
->attribs
);
4582 *pp
&& (*pp
)->attribute
;
4584 if(!strucmp((*pp
)->attribute
, "target")){
4586 fs_give((void **) &(*pp
)->value
);
4588 (*pp
)->value
= cpystr("_blank");
4592 *pp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4593 memset(*pp
, 0, sizeof(PARAMETER
));
4594 (*pp
)->attribute
= cpystr("target");
4595 (*pp
)->value
= cpystr("_blank");
4598 html_output_raw_tag(hd
->html_data
, "form");
4600 else if(cmd
== GF_EOD
){
4601 html_output_string(hd
->html_data
, "</form>");
4605 if(cmd
== GF_RESET
){
4606 html_blank(hd
->html_data
, 0);
4607 HTML_DUMP_LIT(hd
->html_data
, "[FORM]", 6);
4608 html_blank(hd
->html_data
, 0);
4612 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4617 * HTML <INPUT> (Form) element handler
4620 html_input(HANDLER_S
*hd
, int ch
, int cmd
)
4622 if(PASS_HTML(hd
->html_data
)){
4623 if(cmd
== GF_RESET
){
4624 html_output_raw_tag(hd
->html_data
, "input");
4628 return(0); /* don't get linked */
4633 * HTML <BUTTON> (Form) element handler
4636 html_button(HANDLER_S
*hd
, int ch
, int cmd
)
4638 if(PASS_HTML(hd
->html_data
)){
4640 html_handoff(hd
, ch
);
4642 else if(cmd
== GF_RESET
){
4643 html_output_raw_tag(hd
->html_data
, "button");
4645 else if(cmd
== GF_EOD
){
4646 html_output_string(hd
->html_data
, "</button>");
4649 return(1); /* get linked */
4657 * HTML <OPTION> (Form) element handler
4660 html_option(HANDLER_S
*hd
, int ch
, int cmd
)
4662 if(PASS_HTML(hd
->html_data
)){
4664 html_handoff(hd
, ch
);
4666 else if(cmd
== GF_RESET
){
4667 html_output_raw_tag(hd
->html_data
, "option");
4669 else if(cmd
== GF_EOD
){
4670 html_output_string(hd
->html_data
, "</option>");
4673 return(1); /* get linked */
4681 * HTML <OPTGROUP> (Form) element handler
4684 html_optgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4686 if(PASS_HTML(hd
->html_data
)){
4688 html_handoff(hd
, ch
);
4690 else if(cmd
== GF_RESET
){
4691 html_output_raw_tag(hd
->html_data
, "optgroup");
4693 else if(cmd
== GF_EOD
){
4694 html_output_string(hd
->html_data
, "</optgroup>");
4697 return(1); /* get linked */
4705 * HTML <SELECT> (Form) element handler
4708 html_select(HANDLER_S
*hd
, int ch
, int cmd
)
4710 if(PASS_HTML(hd
->html_data
)){
4712 html_handoff(hd
, ch
);
4714 else if(cmd
== GF_RESET
){
4715 html_output_raw_tag(hd
->html_data
, "select");
4717 else if(cmd
== GF_EOD
){
4718 html_output_string(hd
->html_data
, "</select>");
4721 return(1); /* get linked */
4729 * HTML <TEXTAREA> (Form) element handler
4732 html_textarea(HANDLER_S
*hd
, int ch
, int cmd
)
4734 if(PASS_HTML(hd
->html_data
)){
4736 html_handoff(hd
, ch
);
4738 else if(cmd
== GF_RESET
){
4739 html_output_raw_tag(hd
->html_data
, "textarea");
4741 else if(cmd
== GF_EOD
){
4742 html_output_string(hd
->html_data
, "</textarea>");
4745 return(1); /* get linked */
4753 * HTML <LABEL> (Form) element handler
4756 html_label(HANDLER_S
*hd
, int ch
, int cmd
)
4758 if(PASS_HTML(hd
->html_data
)){
4760 html_handoff(hd
, ch
);
4762 else if(cmd
== GF_RESET
){
4763 html_output_raw_tag(hd
->html_data
, "label");
4765 else if(cmd
== GF_EOD
){
4766 html_output_string(hd
->html_data
, "</label>");
4769 return(1); /* get linked */
4777 * HTML <FIELDSET> (Form) element handler
4780 html_fieldset(HANDLER_S
*hd
, int ch
, int cmd
)
4782 if(PASS_HTML(hd
->html_data
)){
4784 html_handoff(hd
, ch
);
4786 else if(cmd
== GF_RESET
){
4787 html_output_raw_tag(hd
->html_data
, "fieldset");
4789 else if(cmd
== GF_EOD
){
4790 html_output_string(hd
->html_data
, "</fieldset>");
4793 return(1); /* get linked */
4801 * HTML <HEAD> element handler
4804 html_head(HANDLER_S
*hd
, int ch
, int cmd
)
4807 html_handoff(hd
, ch
);
4809 else if(cmd
== GF_RESET
){
4810 HD(hd
->html_data
)->head
= 1;
4812 else if(cmd
== GF_EOD
){
4813 HD(hd
->html_data
)->head
= 0;
4816 return(1); /* get linked */
4821 * HTML <BASE> element handler
4824 html_base(HANDLER_S
*hd
, int ch
, int cmd
)
4826 if(cmd
== GF_RESET
){
4827 if(HD(hd
->html_data
)->head
&& !HTML_BASE(hd
->html_data
)){
4830 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4831 p
&& p
->attribute
&& strucmp(p
->attribute
, "HREF");
4835 if(p
&& p
->value
&& !((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
)
4836 ((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
= cpystr(p
->value
);
4840 return(0); /* DON'T get linked */
4845 * HTML <TITLE> element handler
4848 html_title(HANDLER_S
*hd
, int ch
, int cmd
)
4851 if(hd
->x
+ 1 >= hd
->y
){
4853 fs_resize((void **)&hd
->s
, (size_t)hd
->y
* sizeof(unsigned char));
4856 hd
->s
[hd
->x
++] = (unsigned char) ch
;
4858 else if(cmd
== GF_RESET
){
4861 hd
->s
= (unsigned char *)fs_get((size_t)hd
->y
* sizeof(unsigned char));
4863 else if(cmd
== GF_EOD
){
4864 /* Down the road we probably want to give these bytes to
4867 hd
->s
[hd
->x
] = '\0';
4868 fs_give((void **)&hd
->s
);
4871 return(1); /* get linked */
4876 * HTML <BODY> element handler
4879 html_body(HANDLER_S
*hd
, int ch
, int cmd
)
4882 html_handoff(hd
, ch
);
4884 else if(cmd
== GF_RESET
){
4885 if(PASS_HTML(hd
->html_data
)){
4887 char **style
= NULL
, *text
= NULL
, *bgcolor
= NULL
, *pcs
;
4889 /* modify any attributes in a useful way? */
4890 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4894 if(!strucmp(p
->attribute
, "style"))
4896 else if(!strucmp(p
->attribute
, "text"))
4899 * bgcolor NOT passed since user setting takes precedence
4901 else if(!strucmp(p->attribute, "bgcolor"))
4906 /* colors pretty much it */
4907 if(text
|| bgcolor
){
4909 tp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4910 memset(tp
, 0, sizeof(PARAMETER
));
4911 tp
->next
= HD(hd
->html_data
)->el_data
->attribs
;
4912 HD(hd
->html_data
)->el_data
->attribs
= tp
;
4913 tp
->attribute
= cpystr("style");
4915 tmp_20k_buf
[0] = '\0';
4920 snprintf(tmp_20k_buf
, SIZEOF_20KBUF
, "%s", *style
);
4921 fs_give((void **) style
);
4922 pcs
= "; %s%s%s%s%s";
4925 snprintf(tmp_20k_buf
+ strlen(tmp_20k_buf
),
4926 SIZEOF_20KBUF
- strlen(tmp_20k_buf
),
4928 (text
) ? "color: " : "", (text
) ? text
: "",
4929 (text
&& bgcolor
) ? ";" : "",
4930 (bgcolor
) ? "background-color: " : "", (bgcolor
) ? bgcolor
: "");
4931 *style
= cpystr(tmp_20k_buf
);
4934 html_output_raw_tag(hd
->html_data
, "div");
4937 HD(hd
->html_data
)->body
= 1;
4939 else if(cmd
== GF_EOD
){
4940 if(PASS_HTML(hd
->html_data
)){
4941 html_output_string(hd
->html_data
, "</div>");
4944 HD(hd
->html_data
)->body
= 0;
4947 return(1); /* get linked */
4952 * HTML <A> (Anchor) element handler
4955 html_a(HANDLER_S
*hd
, int ch
, int cmd
)
4958 html_handoff(hd
, ch
);
4960 if(hd
->dp
) /* remember text within anchor tags */
4961 so_writec(ch
, (STORE_S
*) hd
->dp
);
4963 else if(cmd
== GF_RESET
){
4967 PARAMETER
*p
, *href
= NULL
, *name
= NULL
;
4970 * Pending Anchor!?!?
4971 * space insertion/line breaking that's yet to get done...
4973 if(HD(hd
->html_data
)->prefix
){
4974 dprint((2, "-- html error: nested or unterminated anchor\n"));
4979 * Look for valid Anchor data vis the filter installer's parms
4980 * (e.g., Only allow references to our internal URLs if asked)
4982 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4985 if(!strucmp(p
->attribute
, "HREF")
4987 && (HANDLES_LOC(hd
->html_data
)
4988 || struncmp(p
->value
, "x-alpine-", 9)
4989 || struncmp(p
->value
, "x-pine-help", 11)
4990 || p
->value
[0] == '#'))
4992 else if(!strucmp(p
->attribute
, "NAME"))
4995 if(DO_HANDLES(hd
->html_data
) && (href
|| name
)){
4996 h
= new_handle(HANDLESP(hd
->html_data
));
4999 * Enhancement: we might want to get fancier and parse the
5000 * href a bit further such that we can launch images using
5001 * our image viewer, or browse local files or directories
5002 * with our internal tools. Of course, having the jump-off
5003 * point into text/html always be the defined "web-browser",
5004 * just might be the least confusing UI-wise...
5008 if(name
&& name
->value
)
5009 h
->h
.url
.name
= cpystr(name
->value
);
5012 * Prepare to build embedded prefix...
5014 HD(hd
->html_data
)->prefix
= (int *) fs_get(64 * sizeof(int));
5018 * Is this something that looks like a URL? If not and
5019 * we were giving some "base" string, proceed ala RFC1808...
5022 if(HTML_BASE(hd
->html_data
) && !rfc1738_scan(href
->value
, &n
)){
5023 html_a_relative(HTML_BASE(hd
->html_data
), href
->value
, h
);
5025 else if(!(NO_RELATIVE(hd
->html_data
) && html_href_relative(href
->value
)))
5026 h
->h
.url
.path
= cpystr(href
->value
);
5028 if(pico_usingcolor()){
5029 char *fg
= NULL
, *bg
= NULL
, *q
;
5031 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5032 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5033 ps_global
->VAR_NORM_FORE_COLOR
))
5034 fg
= ps_global
->VAR_SLCTBL_FORE_COLOR
;
5036 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5037 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5038 ps_global
->VAR_NORM_BACK_COLOR
))
5039 bg
= ps_global
->VAR_SLCTBL_BACK_COLOR
;
5045 * The blacks are just known good colors for testing
5046 * whether the other color is good.
5048 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5049 bg
? bg
: colorx(COL_BLACK
));
5050 if(pico_is_good_colorpair(tmp
)){
5051 q
= color_embed(fg
, bg
);
5053 for(i
= 0; q
[i
]; i
++)
5054 HD(hd
->html_data
)->prefix
[x
++] = q
[i
];
5058 free_color_pair(&tmp
);
5061 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5062 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5065 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5068 HD(hd
->html_data
)->prefix
[x
++] = TAG_EMBED
;
5069 HD(hd
->html_data
)->prefix
[x
++] = TAG_HANDLE
;
5071 snprintf(buf
, sizeof(buf
), "%ld", hd
->x
= h
->key
);
5072 HD(hd
->html_data
)->prefix
[x
++] = n
= strlen(buf
);
5073 for(i
= 0; i
< n
; i
++)
5074 HD(hd
->html_data
)->prefix
[x
++] = buf
[i
];
5076 HD(hd
->html_data
)->prefix_used
= x
;
5078 hd
->dp
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
5081 else if(cmd
== GF_EOD
){
5085 return(1); /* get linked */
5090 html_a_prefix(FILTER_S
*f
)
5094 /* Do this so we don't visit from html_output... */
5095 prefix
= HD(f
)->prefix
;
5096 HD(f
)->prefix
= NULL
;
5098 for(n
= 0; n
< HD(f
)->prefix_used
; n
++)
5099 html_a_output_prefix(f
, prefix
[n
]);
5101 fs_give((void **) &prefix
);
5106 * html_a_finish - house keeping associated with end of link tag
5109 html_a_finish(HANDLER_S
*hd
)
5111 if(DO_HANDLES(hd
->html_data
)){
5112 if(HD(hd
->html_data
)->prefix
){
5113 if(!PASS_HTML(hd
->html_data
)){
5114 char *empty_link
= "[LINK]";
5117 html_a_prefix(hd
->html_data
);
5118 for(i
= 0; empty_link
[i
]; i
++)
5119 html_output(hd
->html_data
, empty_link
[i
]);
5123 if(pico_usingcolor()){
5124 char *fg
= NULL
, *bg
= NULL
, *p
;
5127 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5128 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5129 ps_global
->VAR_NORM_FORE_COLOR
))
5130 fg
= ps_global
->VAR_NORM_FORE_COLOR
;
5132 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5133 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5134 ps_global
->VAR_NORM_BACK_COLOR
))
5135 bg
= ps_global
->VAR_NORM_BACK_COLOR
;
5137 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5138 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5144 * The blacks are just known good colors for testing
5145 * whether the other color is good.
5147 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5148 bg
? bg
: colorx(COL_BLACK
));
5149 if(pico_is_good_colorpair(tmp
)){
5150 p
= color_embed(fg
, bg
);
5152 for(i
= 0; p
[i
]; i
++)
5153 html_output(hd
->html_data
, p
[i
]);
5157 free_color_pair(&tmp
);
5161 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5163 html_output(hd
->html_data
, TAG_EMBED
);
5164 html_output(hd
->html_data
, TAG_HANDLEOFF
);
5166 html_a_output_info(hd
);
5172 * html_output_a_prefix - dump Anchor prefix data
5175 html_a_output_prefix(FILTER_S
*f
, int c
)
5191 * html_a_output_info - dump possibly deceptive link info into text.
5192 * phark the phishers.
5195 html_a_output_info(HANDLER_S
*hd
)
5197 int l
, risky
= 0, hl
= 0, tl
;
5198 char *url
= NULL
, *hn
= NULL
, *txt
;
5201 /* find host anchor references */
5202 if((h
= get_handle(*HANDLESP(hd
->html_data
), (int) hd
->x
)) != NULL
5203 && h
->h
.url
.path
!= NULL
5204 && (hn
= rfc1738_scan(rfc1738_str(url
= cpystr(h
->h
.url
.path
)), &l
)) != NULL
5205 && (hn
= srchstr(hn
,"://")) != NULL
){
5207 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++)
5213 * look over anchor's text to see if there's a
5214 * mismatch between href target and url-ish
5215 * looking text. throw a red flag if so.
5216 * similarly, toss one if the target's referenced
5220 so_writec('\0', (STORE_S
*) hd
->dp
);
5222 if((txt
= (char *) so_text((STORE_S
*) hd
->dp
)) != NULL
5223 && (txt
= rfc1738_scan(txt
, &tl
)) != NULL
5224 && (txt
= srchstr(txt
,"://")) != NULL
){
5226 for(txt
+= 3, tl
= 0; txt
[tl
] && txt
[tl
] != '/' && txt
[tl
] != '?'; tl
++)
5232 /* look for non matching text */
5233 for(l
= 0; l
< tl
&& l
< hl
; l
++)
5234 if(tolower((unsigned char) txt
[l
]) != tolower((unsigned char) hn
[l
])){
5240 so_give((STORE_S
**) &hd
->dp
);
5243 /* look for literal IP, anything possibly encoded or auth specifier */
5247 for(l
= 0; l
< hl
; l
++){
5248 if(hn
[l
] == '@' || hn
[l
] == '%'){
5252 else if(!(hn
[l
] == '.' || isdigit((unsigned char) hn
[l
])))
5260 /* Insert text of link's domain */
5261 if(SHOWSERVER(hd
->html_data
)){
5263 COLOR_PAIR
*col
= NULL
, *colnorm
= NULL
;
5265 html_output(hd
->html_data
, ' ');
5266 html_output(hd
->html_data
, '[');
5268 if(pico_usingcolor()
5269 && ps_global
->VAR_METAMSG_FORE_COLOR
5270 && ps_global
->VAR_METAMSG_BACK_COLOR
5271 && (col
= new_color_pair(ps_global
->VAR_METAMSG_FORE_COLOR
,
5272 ps_global
->VAR_METAMSG_BACK_COLOR
))){
5273 if(!pico_is_good_colorpair(col
))
5274 free_color_pair(&col
);
5277 q
= color_embed(col
->fg
, col
->bg
);
5279 for(l
= 0; q
[l
]; l
++)
5280 html_output(hd
->html_data
, q
[l
]);
5284 for(l
= 0; l
< hl
; l
++)
5285 html_output(hd
->html_data
, hn
[l
]);
5288 if(ps_global
->VAR_NORM_FORE_COLOR
5289 && ps_global
->VAR_NORM_BACK_COLOR
5290 && (colnorm
= new_color_pair(ps_global
->VAR_NORM_FORE_COLOR
,
5291 ps_global
->VAR_NORM_BACK_COLOR
))){
5292 if(!pico_is_good_colorpair(colnorm
))
5293 free_color_pair(&colnorm
);
5296 q
= color_embed(colnorm
->fg
, colnorm
->bg
);
5297 free_color_pair(&colnorm
);
5299 for(l
= 0; q
[l
]; l
++)
5300 html_output(hd
->html_data
, q
[l
]);
5304 free_color_pair(&col
);
5307 html_output(hd
->html_data
, ']');
5312 * if things look OK so far, make sure nothing within
5313 * the url looks too fishy...
5316 && (hn
= rfc1738_scan(hn
, &l
)) != NULL
5317 && (hn
= srchstr(hn
,"://")) != NULL
){
5320 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++){
5322 * auth spec, encoded characters, or possibly non-standard port
5323 * should raise a red flag
5325 if(hn
[hl
] == '@' || hn
[hl
] == '%' || hn
[hl
] == ':'){
5329 else if(!(hn
[hl
] == '.' || isdigit((unsigned char) hn
[hl
])))
5333 /* dotted-dec/raw-int address should cause suspicion as well */
5338 if(risky
&& ((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)
5339 (*((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)();
5341 fs_give((void **) &url
);
5347 * relative_url - put full url path in h based on base and relative url
5350 html_a_relative(char *base_url
, char *rel_url
, HANDLE_S
*h
)
5353 char tmp
[MAILTMPLEN
], *p
, *q
;
5354 char *scheme
= NULL
, *net
= NULL
, *path
= NULL
,
5355 *parms
= NULL
, *query
= NULL
, *frag
= NULL
,
5356 *base_scheme
= NULL
, *base_net_loc
= NULL
,
5357 *base_path
= NULL
, *base_parms
= NULL
,
5358 *base_query
= NULL
, *base_frag
= NULL
,
5359 *rel_scheme
= NULL
, *rel_net_loc
= NULL
,
5360 *rel_path
= NULL
, *rel_parms
= NULL
,
5361 *rel_query
= NULL
, *rel_frag
= NULL
;
5363 /* Rough parse of base URL */
5364 rfc1808_tokens(base_url
, &base_scheme
, &base_net_loc
, &base_path
,
5365 &base_parms
, &base_query
, &base_frag
);
5367 /* Rough parse of this URL */
5368 rfc1808_tokens(rel_url
, &rel_scheme
, &rel_net_loc
, &rel_path
,
5369 &rel_parms
, &rel_query
, &rel_frag
);
5371 scheme
= rel_scheme
; /* defaults */
5377 if(!scheme
&& base_scheme
){
5378 scheme
= base_scheme
;
5384 for(p
= q
= base_path
; /* Drop base path's tail */
5385 (p
= strchr(p
, '/'));
5389 len
= q
- base_path
;
5394 if(len
+ strlen(rel_path
) < sizeof(tmp
)-1){
5396 snprintf(path
= tmp
, sizeof(tmp
), "%.*s", (int) len
, base_path
);
5398 strncpy(tmp
+ len
, rel_path
, sizeof(tmp
)-len
);
5399 tmp
[sizeof(tmp
)-1] = '\0';
5401 /* Follow RFC 1808 "Step 6" */
5402 for(p
= tmp
; (p
= strchr(p
, '.')); )
5405 * a) All occurrences of "./", where "." is a
5406 * complete path segment, are removed.
5410 for(q
= p
; (*q
= *(q
+2)) != '\0'; q
++)
5418 * b) If the path ends with "." as a
5419 * complete path segment, that "." is
5423 if(p
== tmp
|| *(p
-1) == '/')
5431 * c) All occurrences of "<segment>/../",
5432 * where <segment> is a complete path
5433 * segment not equal to "..", are removed.
5434 * Removal of these path segments is
5435 * performed iteratively, removing the
5436 * leftmost matching pattern on each
5437 * iteration, until no matching pattern
5440 * d) If the path ends with "<segment>/..",
5441 * where <segment> is a complete path
5442 * segment not equal to "..", that
5443 * "<segment>/.." is removed.
5447 for(q
= p
- 2; q
> tmp
&& *q
!= '/'; q
--)
5453 if(q
+ 1 == p
/* no "//.." */
5454 || (*q
== '.' /* and "../.." */
5465 for(; (*q
= *(q
+len
)) != '\0'; q
++)
5490 path
= ""; /* lame. */
5504 len
= (scheme
? strlen(scheme
) : 0) + (net
? strlen(net
) : 0)
5505 + (path
? strlen(path
) : 0) + (parms
? strlen(parms
) : 0)
5506 + (query
? strlen(query
) : 0) + (frag
? strlen(frag
) : 0) + 8;
5508 h
->h
.url
.path
= (char *) fs_get(len
* sizeof(char));
5509 snprintf(h
->h
.url
.path
, len
, "%s%s%s%s%s%s%s%s%s%s%s%s",
5510 scheme
? scheme
: "", scheme
? ":" : "",
5511 net
? "//" : "", net
? net
: "",
5512 (path
&& *path
== '/') ? "" : ((path
&& net
) ? "/" : ""),
5514 parms
? ";" : "", parms
? parms
: "",
5515 query
? "?" : "", query
? query
: "",
5516 frag
? "#" : "", frag
? frag
: "");
5519 fs_give((void **) &base_scheme
);
5522 fs_give((void **) &base_net_loc
);
5525 fs_give((void **) &base_path
);
5528 fs_give((void **) &base_parms
);
5531 fs_give((void **) &base_query
);
5534 fs_give((void **) &base_frag
);
5537 fs_give((void **) &rel_scheme
);
5540 fs_give((void **) &rel_net_loc
);
5543 fs_give((void **) &rel_parms
);
5546 fs_give((void **) &rel_query
);
5549 fs_give((void **) &rel_frag
);
5552 fs_give((void **) &rel_path
);
5557 * html_href_relative - href
5560 html_href_relative(char *url
)
5565 for(i
= 0; i
< 32 && url
[i
]; i
++)
5566 if(!(isalpha((unsigned char) url
[i
]) || url
[i
] == '_' || url
[i
] == '-')){
5578 * HTML <UL> (Unordered List) element handler
5581 html_ul(HANDLER_S
*hd
, int ch
, int cmd
)
5584 html_handoff(hd
, ch
);
5586 else if(cmd
== GF_RESET
){
5587 if(PASS_HTML(hd
->html_data
)){
5588 html_output_raw_tag(hd
->html_data
, "ul");
5591 HD(hd
->html_data
)->li_pending
= 1;
5592 html_blank(hd
->html_data
, 0);
5595 else if(cmd
== GF_EOD
){
5596 if(PASS_HTML(hd
->html_data
)){
5597 html_output_string(hd
->html_data
, "</ul>");
5600 html_blank(hd
->html_data
, 0);
5602 if(!HD(hd
->html_data
)->li_pending
)
5603 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5605 HD(hd
->html_data
)->li_pending
= 0;
5609 return(1); /* get linked */
5614 * HTML <OL> (Ordered List) element handler
5617 html_ol(HANDLER_S
*hd
, int ch
, int cmd
)
5620 html_handoff(hd
, ch
);
5622 else if(cmd
== GF_RESET
){
5623 if(PASS_HTML(hd
->html_data
)){
5624 html_output_raw_tag(hd
->html_data
, "ol");
5628 * Signal that we're expecting to see <LI> as our next elemnt
5629 * and set the the initial ordered count.
5631 HD(hd
->html_data
)->li_pending
= 1;
5633 html_blank(hd
->html_data
, 0);
5636 else if(cmd
== GF_EOD
){
5637 if(PASS_HTML(hd
->html_data
)){
5638 html_output_string(hd
->html_data
, "</ol>");
5641 html_blank(hd
->html_data
, 0);
5643 if(!HD(hd
->html_data
)->li_pending
)
5644 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5646 HD(hd
->html_data
)->li_pending
= 0;
5650 return(1); /* get linked */
5655 * HTML <MENU> (Menu List) element handler
5658 html_menu(HANDLER_S
*hd
, int ch
, int cmd
)
5661 html_handoff(hd
, ch
);
5663 else if(cmd
== GF_RESET
){
5664 if(PASS_HTML(hd
->html_data
)){
5665 html_output_raw_tag(hd
->html_data
, "menu");
5668 HD(hd
->html_data
)->li_pending
= 1;
5671 else if(cmd
== GF_EOD
){
5672 if(PASS_HTML(hd
->html_data
)){
5673 html_output_string(hd
->html_data
, "</menu>");
5676 html_blank(hd
->html_data
, 0);
5678 if(!HD(hd
->html_data
)->li_pending
)
5679 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5681 HD(hd
->html_data
)->li_pending
= 0;
5685 return(1); /* get linked */
5690 * HTML <DIR> (Directory List) element handler
5693 html_dir(HANDLER_S
*hd
, int ch
, int cmd
)
5696 html_handoff(hd
, ch
);
5698 else if(cmd
== GF_RESET
){
5699 if(PASS_HTML(hd
->html_data
)){
5700 html_output_raw_tag(hd
->html_data
, "dir");
5703 HD(hd
->html_data
)->li_pending
= 1;
5706 else if(cmd
== GF_EOD
){
5707 if(PASS_HTML(hd
->html_data
)){
5708 html_output_string(hd
->html_data
, "</dir>");
5711 html_blank(hd
->html_data
, 0);
5713 if(!HD(hd
->html_data
)->li_pending
)
5714 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5716 HD(hd
->html_data
)->li_pending
= 0;
5720 return(1); /* get linked */
5725 * HTML <LI> (List Item) element handler
5728 html_li(HANDLER_S
*hd
, int ch
, int cmd
)
5731 if(PASS_HTML(hd
->html_data
)){
5732 html_handoff(hd
, ch
);
5735 else if(cmd
== GF_RESET
){
5736 HANDLER_S
*p
, *found
= NULL
;
5739 * There better be a an unordered list, ordered list,
5740 * Menu or Directory handler installed
5741 * or else we crap out...
5743 for(p
= HANDLERS(hd
->html_data
); p
; p
= p
->below
)
5744 if(EL(p
)->handler
== html_ul
5745 || EL(p
)->handler
== html_ol
5746 || EL(p
)->handler
== html_menu
5747 || EL(p
)->handler
== html_dir
){
5753 if(PASS_HTML(hd
->html_data
)){
5759 /* Start a new line */
5760 html_blank(hd
->html_data
, 0);
5762 /* adjust indent level if needed */
5763 if(HD(hd
->html_data
)->li_pending
){
5764 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5765 HD(hd
->html_data
)->li_pending
= 0;
5768 if(EL(found
)->handler
== html_ul
){
5769 int l
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5771 strncpy(buf
, " ", sizeof(buf
));
5772 buf
[1] = (l
< 5) ? '*' : (l
< 9) ? '+' : (l
< 17) ? 'o' : '#';
5774 else if(EL(found
)->handler
== html_ol
)
5775 snprintf(buf
, sizeof(buf
), "%2ld.", found
->x
++);
5776 else if(EL(found
)->handler
== html_menu
){
5777 strncpy(buf
, " ->", sizeof(buf
));
5778 buf
[sizeof(buf
)-1] = '\0';
5781 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5783 /* So we don't munge whitespace */
5784 wrapstate
= HD(hd
->html_data
)->wrapstate
;
5785 HD(hd
->html_data
)->wrapstate
= 0;
5787 html_write_indent(hd
->html_data
, HD(hd
->html_data
)->indent_level
);
5788 for(p
= buf
; *p
; p
++)
5789 html_output(hd
->html_data
, (int) *p
);
5791 HD(hd
->html_data
)->wrapstate
= wrapstate
;
5792 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5794 /* else BUG: should really bitch about this */
5797 if(PASS_HTML(hd
->html_data
)){
5798 html_output_raw_tag(hd
->html_data
, "li");
5799 return(1); /* get linked */
5802 else if(cmd
== GF_EOD
){
5803 if(PASS_HTML(hd
->html_data
)){
5804 html_output_string(hd
->html_data
, "</li>");
5808 return(PASS_HTML(hd
->html_data
)); /* DON'T get linked */
5813 * HTML <DL> (Definition List) element handler
5816 html_dl(HANDLER_S
*hd
, int ch
, int cmd
)
5819 html_handoff(hd
, ch
);
5821 else if(cmd
== GF_RESET
){
5822 if(PASS_HTML(hd
->html_data
)){
5823 html_output_raw_tag(hd
->html_data
, "dl");
5827 * Set indention level for definition terms and definitions...
5829 hd
->x
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5834 else if(cmd
== GF_EOD
){
5835 if(PASS_HTML(hd
->html_data
)){
5836 html_output_string(hd
->html_data
, "</dl>");
5839 html_indent(hd
->html_data
, (int) hd
->x
, HTML_ID_SET
);
5840 html_blank(hd
->html_data
, 1);
5844 return(1); /* get linked */
5849 * HTML <DT> (Definition Term) element handler
5852 html_dt(HANDLER_S
*hd
, int ch
, int cmd
)
5854 if(PASS_HTML(hd
->html_data
)){
5856 html_handoff(hd
, ch
);
5858 else if(cmd
== GF_RESET
){
5859 html_output_raw_tag(hd
->html_data
, "dt");
5861 else if(cmd
== GF_EOD
){
5862 html_output_string(hd
->html_data
, "</dt>");
5865 return(1); /* get linked */
5868 if(cmd
== GF_RESET
){
5872 * There better be a Definition Handler installed
5873 * or else we crap out...
5875 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5878 if(p
){ /* adjust indent level if needed */
5879 html_indent(hd
->html_data
, (int) p
->y
, HTML_ID_SET
);
5880 html_blank(hd
->html_data
, 1);
5882 /* BUG: else should really bitch about this */
5885 return(0); /* DON'T get linked */
5890 * HTML <DD> (Definition Definition) element handler
5893 html_dd(HANDLER_S
*hd
, int ch
, int cmd
)
5895 if(PASS_HTML(hd
->html_data
)){
5897 html_handoff(hd
, ch
);
5899 else if(cmd
== GF_RESET
){
5900 html_output_raw_tag(hd
->html_data
, "dd");
5902 else if(cmd
== GF_EOD
){
5903 html_output_string(hd
->html_data
, "</dd>");
5906 return(1); /* get linked */
5909 if(cmd
== GF_RESET
){
5913 * There better be a Definition Handler installed
5914 * or else we crap out...
5916 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5919 if(p
){ /* adjust indent level if needed */
5920 html_indent(hd
->html_data
, (int) p
->z
, HTML_ID_SET
);
5921 html_blank(hd
->html_data
, 0);
5923 /* BUG: should really bitch about this */
5926 return(0); /* DON'T get linked */
5931 * HTML <H1> (Headings 1) element handler.
5933 * Bold, very-large font, CENTERED. One or two blank lines
5934 * above and below. For our silly character cell's that
5935 * means centered and ALL CAPS...
5938 html_h1(HANDLER_S
*hd
, int ch
, int cmd
)
5941 html_handoff(hd
, ch
);
5943 else if(cmd
== GF_RESET
){
5944 if(PASS_HTML(hd
->html_data
)){
5945 html_output_raw_tag(hd
->html_data
, "h1");
5948 /* turn ON the centered bit */
5949 CENTER_BIT(hd
->html_data
) = 1;
5952 else if(cmd
== GF_EOD
){
5953 if(PASS_HTML(hd
->html_data
)){
5954 html_output_string(hd
->html_data
, "</h1>");
5957 /* turn OFF the centered bit, add blank line */
5958 CENTER_BIT(hd
->html_data
) = 0;
5959 html_blank(hd
->html_data
, 1);
5963 return(1); /* get linked */
5968 * HTML <H2> (Headings 2) element handler
5971 html_h2(HANDLER_S
*hd
, int ch
, int cmd
)
5974 if(PASS_HTML(hd
->html_data
)){
5975 html_handoff(hd
, ch
);
5978 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
5979 HTML_ULINE(hd
->html_data
, 1);
5980 hd
->x
^= HTML_HX_ULINE
; /* only once! */
5983 html_handoff(hd
, (ch
< 128 && islower((unsigned char) ch
))
5984 ? toupper((unsigned char) ch
) : ch
);
5987 else if(cmd
== GF_RESET
){
5988 if(PASS_HTML(hd
->html_data
)){
5989 html_output_raw_tag(hd
->html_data
, "h2");
5993 * Bold, large font, flush-left. One or two blank lines
5996 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
5997 hd
->x
= HTML_HX_CENTER
;
6001 hd
->x
|= HTML_HX_ULINE
;
6003 CENTER_BIT(hd
->html_data
) = 0;
6004 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6005 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6006 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6007 html_blank(hd
->html_data
, 1);
6010 else if(cmd
== GF_EOD
){
6011 if(PASS_HTML(hd
->html_data
)){
6012 html_output_string(hd
->html_data
, "</h2>");
6016 * restore previous centering, and indent level
6018 if(!(hd
->x
& HTML_HX_ULINE
))
6019 HTML_ULINE(hd
->html_data
, 0);
6021 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6022 html_blank(hd
->html_data
, 1);
6023 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6024 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6028 return(1); /* get linked */
6033 * HTML <H3> (Headings 3) element handler
6036 html_h3(HANDLER_S
*hd
, int ch
, int cmd
)
6039 if(!PASS_HTML(hd
->html_data
)){
6040 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
6041 HTML_ULINE(hd
->html_data
, 1);
6042 hd
->x
^= HTML_HX_ULINE
; /* only once! */
6046 html_handoff(hd
, ch
);
6048 else if(cmd
== GF_RESET
){
6049 if(PASS_HTML(hd
->html_data
)){
6050 html_output_raw_tag(hd
->html_data
, "h3");
6054 * Italic, large font, slightly indented from the left
6055 * margin. One or two blank lines above and below.
6057 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
6058 hd
->x
= HTML_HX_CENTER
;
6062 hd
->x
|= HTML_HX_ULINE
;
6063 CENTER_BIT(hd
->html_data
) = 0;
6064 hd
->y
= html_indent(hd
->html_data
, 2, HTML_ID_SET
);
6065 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6066 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6067 html_blank(hd
->html_data
, 1);
6070 else if(cmd
== GF_EOD
){
6071 if(PASS_HTML(hd
->html_data
)){
6072 html_output_string(hd
->html_data
, "</h3>");
6076 * restore previous centering, and indent level
6078 if(!(hd
->x
& HTML_HX_ULINE
))
6079 HTML_ULINE(hd
->html_data
, 0);
6081 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6082 html_blank(hd
->html_data
, 1);
6083 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6084 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6088 return(1); /* get linked */
6093 * HTML <H4> (Headings 4) element handler
6096 html_h4(HANDLER_S
*hd
, int ch
, int cmd
)
6099 html_handoff(hd
, ch
);
6101 else if(cmd
== GF_RESET
){
6102 if(PASS_HTML(hd
->html_data
)){
6103 html_output_raw_tag(hd
->html_data
, "h4");
6107 * Bold, normal font, indented more than H3. One blank line
6110 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6111 CENTER_BIT(hd
->html_data
) = 0;
6112 hd
->y
= html_indent(hd
->html_data
, 4, HTML_ID_SET
);
6113 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6114 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6115 html_blank(hd
->html_data
, 1);
6118 else if(cmd
== GF_EOD
){
6119 if(PASS_HTML(hd
->html_data
)){
6120 html_output_string(hd
->html_data
, "</h4>");
6124 * restore previous centering, and indent level
6126 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6127 html_blank(hd
->html_data
, 1);
6128 CENTER_BIT(hd
->html_data
) = hd
->x
;
6129 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6133 return(1); /* get linked */
6138 * HTML <H5> (Headings 5) element handler
6141 html_h5(HANDLER_S
*hd
, int ch
, int cmd
)
6144 html_handoff(hd
, ch
);
6146 else if(cmd
== GF_RESET
){
6147 if(PASS_HTML(hd
->html_data
)){
6148 html_output_raw_tag(hd
->html_data
, "h5");
6152 * Italic, normal font, indented as H4. One blank line
6155 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6156 CENTER_BIT(hd
->html_data
) = 0;
6157 hd
->y
= html_indent(hd
->html_data
, 6, HTML_ID_SET
);
6158 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6159 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6160 html_blank(hd
->html_data
, 1);
6163 else if(cmd
== GF_EOD
){
6164 if(PASS_HTML(hd
->html_data
)){
6165 html_output_string(hd
->html_data
, "</h5>");
6169 * restore previous centering, and indent level
6171 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6172 html_blank(hd
->html_data
, 1);
6173 CENTER_BIT(hd
->html_data
) = hd
->x
;
6174 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6178 return(1); /* get linked */
6183 * HTML <H6> (Headings 6) element handler
6186 html_h6(HANDLER_S
*hd
, int ch
, int cmd
)
6189 html_handoff(hd
, ch
);
6191 else if(cmd
== GF_RESET
){
6192 if(PASS_HTML(hd
->html_data
)){
6193 html_output_raw_tag(hd
->html_data
, "h6");
6197 * Bold, indented same as normal text, more than H5. One
6200 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6201 CENTER_BIT(hd
->html_data
) = 0;
6202 hd
->y
= html_indent(hd
->html_data
, 8, HTML_ID_SET
);
6203 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6204 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6205 html_blank(hd
->html_data
, 1);
6208 else if(cmd
== GF_EOD
){
6209 if(PASS_HTML(hd
->html_data
)){
6210 html_output_string(hd
->html_data
, "</h6>");
6214 * restore previous centering, and indent level
6216 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6217 html_blank(hd
->html_data
, 1);
6218 CENTER_BIT(hd
->html_data
) = hd
->x
;
6219 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6223 return(1); /* get linked */
6228 * HTML <BlockQuote> element handler
6231 html_blockquote(HANDLER_S
*hd
, int ch
, int cmd
)
6234 #define HTML_BQ_INDENT 6
6237 html_handoff(hd
, ch
);
6239 else if(cmd
== GF_RESET
){
6240 if(PASS_HTML(hd
->html_data
)){
6241 html_output_raw_tag(hd
->html_data
, "blockquote");
6245 * A typical rendering might be a slight extra left and
6246 * right indent, and/or italic font. The Blockquote element
6247 * causes a paragraph break, and typically provides space
6248 * above and below the quote.
6250 html_indent(hd
->html_data
, HTML_BQ_INDENT
, HTML_ID_INC
);
6251 j
= HD(hd
->html_data
)->wrapstate
;
6252 HD(hd
->html_data
)->wrapstate
= 0;
6253 html_blank(hd
->html_data
, 1);
6254 HD(hd
->html_data
)->wrapstate
= j
;
6255 HD(hd
->html_data
)->wrapcol
-= HTML_BQ_INDENT
;
6258 else if(cmd
== GF_EOD
){
6259 if(PASS_HTML(hd
->html_data
)){
6260 html_output_string(hd
->html_data
, "</blockquote>");
6263 html_blank(hd
->html_data
, 1);
6265 j
= HD(hd
->html_data
)->wrapstate
;
6266 HD(hd
->html_data
)->wrapstate
= 0;
6267 html_indent(hd
->html_data
, -(HTML_BQ_INDENT
), HTML_ID_INC
);
6268 HD(hd
->html_data
)->wrapstate
= j
;
6269 HD(hd
->html_data
)->wrapcol
+= HTML_BQ_INDENT
;
6273 return(1); /* get linked */
6278 * HTML <Address> element handler
6281 html_address(HANDLER_S
*hd
, int ch
, int cmd
)
6284 #define HTML_ADD_INDENT 2
6287 html_handoff(hd
, ch
);
6289 else if(cmd
== GF_RESET
){
6290 if(PASS_HTML(hd
->html_data
)){
6291 html_output_raw_tag(hd
->html_data
, "address");
6295 * A typical rendering might be a slight extra left and
6296 * right indent, and/or italic font. The Blockquote element
6297 * causes a paragraph break, and typically provides space
6298 * above and below the quote.
6300 html_indent(hd
->html_data
, HTML_ADD_INDENT
, HTML_ID_INC
);
6301 j
= HD(hd
->html_data
)->wrapstate
;
6302 HD(hd
->html_data
)->wrapstate
= 0;
6303 html_blank(hd
->html_data
, 1);
6304 HD(hd
->html_data
)->wrapstate
= j
;
6307 else if(cmd
== GF_EOD
){
6308 if(PASS_HTML(hd
->html_data
)){
6309 html_output_string(hd
->html_data
, "</address>");
6312 html_blank(hd
->html_data
, 1);
6314 j
= HD(hd
->html_data
)->wrapstate
;
6315 HD(hd
->html_data
)->wrapstate
= 0;
6316 html_indent(hd
->html_data
, -(HTML_ADD_INDENT
), HTML_ID_INC
);
6317 HD(hd
->html_data
)->wrapstate
= j
;
6321 return(1); /* get linked */
6326 * HTML <PRE> (Preformatted Text) element handler
6329 html_pre(HANDLER_S
*hd
, int ch
, int cmd
)
6333 * remove CRLF after '>' in element.
6334 * We see CRLF because wrapstate is off.
6343 html_handoff(hd
, '\015');
6354 /* passing tags? replace CRLF with <BR> to make
6355 * sure hard newline survives in the end...
6357 if(PASS_HTML(hd
->html_data
))
6358 hd
->y
= 4; /* keep looking for CRLF */
6360 hd
->y
= 0; /* stop looking */
6375 html_output_string(hd
->html_data
, "<br />");
6379 html_handoff(hd
, '\015'); /* not CRLF, pass raw CR */
6383 default : /* zero case */
6387 html_handoff(hd
, ch
);
6389 else if(cmd
== GF_RESET
){
6391 if(PASS_HTML(hd
->html_data
)){
6392 html_output_raw_tag(hd
->html_data
, "pre");
6396 hd
->html_data
->f1
= DFL
; \
6398 html_blank(hd
->html_data
, 1);
6399 hd
->x
= HD(hd
->html_data
)->wrapstate
;
6400 HD(hd
->html_data
)->wrapstate
= 0;
6403 else if(cmd
== GF_EOD
){
6404 if(PASS_HTML(hd
->html_data
)){
6405 html_output_string(hd
->html_data
, "</pre>");
6408 HD(hd
->html_data
)->wrapstate
= (hd
->x
!= 0);
6409 html_blank(hd
->html_data
, 0);
6418 * HTML <CENTER> (Centerd Text) element handler
6421 html_center(HANDLER_S
*hd
, int ch
, int cmd
)
6424 html_handoff(hd
, ch
);
6426 else if(cmd
== GF_RESET
){
6427 if(PASS_HTML(hd
->html_data
)){
6428 html_output_raw_tag(hd
->html_data
, "center");
6431 /* turn ON the centered bit */
6432 CENTER_BIT(hd
->html_data
) = 1;
6435 else if(cmd
== GF_EOD
){
6436 if(PASS_HTML(hd
->html_data
)){
6437 html_output_string(hd
->html_data
, "</center>");
6440 /* turn OFF the centered bit */
6441 CENTER_BIT(hd
->html_data
) = 0;
6450 * HTML <DIV> (Document Divisions) element handler
6453 html_div(HANDLER_S
*hd
, int ch
, int cmd
)
6456 html_handoff(hd
, ch
);
6458 else if(cmd
== GF_RESET
){
6459 if(PASS_HTML(hd
->html_data
)){
6460 html_output_raw_tag(hd
->html_data
, "div");
6465 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6468 if(!strucmp(p
->attribute
, "ALIGN")){
6470 /* remember previous values */
6471 hd
->x
= CENTER_BIT(hd
->html_data
);
6472 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
6474 html_blank(hd
->html_data
, 0);
6475 CENTER_BIT(hd
->html_data
) = !strucmp(p
->value
, "CENTER");
6476 html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6477 /* NOTE: "RIGHT" not supported yet */
6482 else if(cmd
== GF_EOD
){
6483 if(PASS_HTML(hd
->html_data
)){
6484 html_output_string(hd
->html_data
, "</div>");
6487 /* restore centered bit and indentiousness */
6488 CENTER_BIT(hd
->html_data
) = hd
->y
;
6489 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6490 html_blank(hd
->html_data
, 0);
6499 * HTML <SPAN> (Text Span) element handler
6502 html_span(HANDLER_S
*hd
, int ch
, int cmd
)
6504 if(PASS_HTML(hd
->html_data
)){
6506 html_handoff(hd
, ch
);
6508 else if(cmd
== GF_RESET
){
6509 html_output_raw_tag(hd
->html_data
, "span");
6511 else if(cmd
== GF_EOD
){
6512 html_output_string(hd
->html_data
, "</span>");
6523 * HTML <KBD> (Text Kbd) element handler
6526 html_kbd(HANDLER_S
*hd
, int ch
, int cmd
)
6528 if(PASS_HTML(hd
->html_data
)){
6530 html_handoff(hd
, ch
);
6532 else if(cmd
== GF_RESET
){
6533 html_output_raw_tag(hd
->html_data
, "kbd");
6535 else if(cmd
== GF_EOD
){
6536 html_output_string(hd
->html_data
, "</kbd>");
6547 * HTML <DFN> (Text Definition) element handler
6550 html_dfn(HANDLER_S
*hd
, int ch
, int cmd
)
6552 if(PASS_HTML(hd
->html_data
)){
6554 html_handoff(hd
, ch
);
6556 else if(cmd
== GF_RESET
){
6557 html_output_raw_tag(hd
->html_data
, "dfn");
6559 else if(cmd
== GF_EOD
){
6560 html_output_string(hd
->html_data
, "</dfn>");
6571 * HTML <TT> (Text Tt) element handler
6574 html_tt(HANDLER_S
*hd
, int ch
, int cmd
)
6576 if(PASS_HTML(hd
->html_data
)){
6578 html_handoff(hd
, ch
);
6580 else if(cmd
== GF_RESET
){
6581 html_output_raw_tag(hd
->html_data
, "tt");
6583 else if(cmd
== GF_EOD
){
6584 html_output_string(hd
->html_data
, "</tt>");
6595 * HTML <VAR> (Text Var) element handler
6598 html_var(HANDLER_S
*hd
, int ch
, int cmd
)
6600 if(PASS_HTML(hd
->html_data
)){
6602 html_handoff(hd
, ch
);
6604 else if(cmd
== GF_RESET
){
6605 html_output_raw_tag(hd
->html_data
, "var");
6607 else if(cmd
== GF_EOD
){
6608 html_output_string(hd
->html_data
, "</var>");
6619 * HTML <SAMP> (Text Samp) element handler
6622 html_samp(HANDLER_S
*hd
, int ch
, int cmd
)
6624 if(PASS_HTML(hd
->html_data
)){
6626 html_handoff(hd
, ch
);
6628 else if(cmd
== GF_RESET
){
6629 html_output_raw_tag(hd
->html_data
, "samp");
6631 else if(cmd
== GF_EOD
){
6632 html_output_string(hd
->html_data
, "</samp>");
6643 * HTML <SUP> (Text Superscript) element handler
6646 html_sup(HANDLER_S
*hd
, int ch
, int cmd
)
6648 if(PASS_HTML(hd
->html_data
)){
6650 html_handoff(hd
, ch
);
6652 else if(cmd
== GF_RESET
){
6653 html_output_raw_tag(hd
->html_data
, "sup");
6655 else if(cmd
== GF_EOD
){
6656 html_output_string(hd
->html_data
, "</sup>");
6667 * HTML <SUB> (Text Subscript) element handler
6670 html_sub(HANDLER_S
*hd
, int ch
, int cmd
)
6672 if(PASS_HTML(hd
->html_data
)){
6674 html_handoff(hd
, ch
);
6676 else if(cmd
== GF_RESET
){
6677 html_output_raw_tag(hd
->html_data
, "sub");
6679 else if(cmd
== GF_EOD
){
6680 html_output_string(hd
->html_data
, "</sub>");
6691 * HTML <CITE> (Text Citation) element handler
6694 html_cite(HANDLER_S
*hd
, int ch
, int cmd
)
6696 if(PASS_HTML(hd
->html_data
)){
6698 html_handoff(hd
, ch
);
6700 else if(cmd
== GF_RESET
){
6701 html_output_raw_tag(hd
->html_data
, "cite");
6703 else if(cmd
== GF_EOD
){
6704 html_output_string(hd
->html_data
, "</cite>");
6715 * HTML <CODE> (Text Code) element handler
6718 html_code(HANDLER_S
*hd
, int ch
, int cmd
)
6720 if(PASS_HTML(hd
->html_data
)){
6722 html_handoff(hd
, ch
);
6724 else if(cmd
== GF_RESET
){
6725 html_output_raw_tag(hd
->html_data
, "code");
6727 else if(cmd
== GF_EOD
){
6728 html_output_string(hd
->html_data
, "</code>");
6739 * HTML <INS> (Text Inserted) element handler
6742 html_ins(HANDLER_S
*hd
, int ch
, int cmd
)
6744 if(PASS_HTML(hd
->html_data
)){
6746 html_handoff(hd
, ch
);
6748 else if(cmd
== GF_RESET
){
6749 html_output_raw_tag(hd
->html_data
, "ins");
6751 else if(cmd
== GF_EOD
){
6752 html_output_string(hd
->html_data
, "</ins>");
6763 * HTML <DEL> (Text Deleted) element handler
6766 html_del(HANDLER_S
*hd
, int ch
, int cmd
)
6768 if(PASS_HTML(hd
->html_data
)){
6770 html_handoff(hd
, ch
);
6772 else if(cmd
== GF_RESET
){
6773 html_output_raw_tag(hd
->html_data
, "del");
6775 else if(cmd
== GF_EOD
){
6776 html_output_string(hd
->html_data
, "</del>");
6787 * HTML <ABBR> (Text Abbreviation) element handler
6790 html_abbr(HANDLER_S
*hd
, int ch
, int cmd
)
6792 if(PASS_HTML(hd
->html_data
)){
6794 html_handoff(hd
, ch
);
6796 else if(cmd
== GF_RESET
){
6797 html_output_raw_tag(hd
->html_data
, "abbr");
6799 else if(cmd
== GF_EOD
){
6800 html_output_string(hd
->html_data
, "</abbr>");
6811 * HTML <SCRIPT> element handler
6814 html_script(HANDLER_S
*hd
, int ch
, int cmd
)
6816 /* Link in and drop everything within on the floor */
6822 * HTML <APPLET> element handler
6825 html_applet(HANDLER_S
*hd
, int ch
, int cmd
)
6827 /* Link in and drop everything within on the floor */
6833 * HTML <STYLE> CSS element handler
6836 html_style(HANDLER_S
*hd
, int ch
, int cmd
)
6838 static STORE_S
*css_stuff
;
6840 if(PASS_HTML(hd
->html_data
)){
6842 /* collect style settings */
6843 so_writec(ch
, css_stuff
);
6845 else if(cmd
== GF_RESET
){
6847 so_give(&css_stuff
);
6849 css_stuff
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6851 else if(cmd
== GF_EOD
){
6853 * TODO: strip anything mischievous and pass on
6856 so_give(&css_stuff
);
6864 * RSS 2.0 <RSS> version
6867 rss_rss(HANDLER_S
*hd
, int ch
, int cmd
)
6869 if(cmd
== GF_RESET
){
6872 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6875 if(!strucmp(p
->attribute
, "VERSION")){
6876 if(p
->value
&& !strucmp(p
->value
,"2.0"))
6877 return(0); /* do not link in */
6880 gf_error("Incompatible RSS version");
6884 return(0); /* not linked or error means we never get here */
6891 rss_channel(HANDLER_S
*hd
, int ch
, int cmd
)
6894 html_handoff(hd
, ch
);
6896 else if(cmd
== GF_RESET
){
6899 feed
= RSS_FEED(hd
->html_data
) = fs_get(sizeof(RSS_FEED_S
));
6900 memset(feed
, 0, sizeof(RSS_FEED_S
));
6903 return(1); /* link in */
6910 rss_title(HANDLER_S
*hd
, int ch
, int cmd
)
6912 static STORE_S
*title_so
;
6917 so_writec(ch
, title_so
);
6920 else if(cmd
== GF_RESET
){
6921 if(RSS_FEED(hd
->html_data
)){
6922 /* prepare for data */
6926 title_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6929 else if(cmd
== GF_EOD
){
6931 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
6935 if((rip
= feed
->items
) != NULL
){
6936 for(; rip
->next
; rip
= rip
->next
)
6940 fs_give((void **) &rip
->title
);
6942 rip
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
6946 fs_give((void **) &feed
->title
);
6948 feed
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
6956 return(1); /* link in */
6963 rss_image(HANDLER_S
*hd
, int ch
, int cmd
)
6965 static STORE_S
*img_so
;
6970 so_writec(ch
, img_so
);
6973 else if(cmd
== GF_RESET
){
6974 if(RSS_FEED(hd
->html_data
)){
6975 /* prepare to collect data */
6979 img_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6982 else if(cmd
== GF_EOD
){
6984 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
6988 fs_give((void **) &feed
->image
);
6990 feed
->image
= cpystr(rss_skip_whitespace(so_text(img_so
)));
6997 return(1); /* link in */
7004 rss_link(HANDLER_S
*hd
, int ch
, int cmd
)
7006 static STORE_S
*link_so
;
7011 so_writec(ch
, link_so
);
7014 else if(cmd
== GF_RESET
){
7015 if(RSS_FEED(hd
->html_data
)){
7016 /* prepare to collect data */
7020 link_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7023 else if(cmd
== GF_EOD
){
7025 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7029 if((rip
= feed
->items
) != NULL
){
7030 for(; rip
->next
; rip
= rip
->next
)
7034 fs_give((void **) &rip
->link
);
7036 rip
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7040 fs_give((void **) &feed
->link
);
7042 feed
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7050 return(1); /* link in */
7054 * RSS 2.0 <DESCRIPTION>
7057 rss_description(HANDLER_S
*hd
, int ch
, int cmd
)
7059 static STORE_S
*desc_so
;
7064 so_writec(ch
, desc_so
);
7067 else if(cmd
== GF_RESET
){
7068 if(RSS_FEED(hd
->html_data
)){
7069 /* prepare to collect data */
7073 desc_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7076 else if(cmd
== GF_EOD
){
7078 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7082 if((rip
= feed
->items
) != NULL
){
7083 for(; rip
->next
; rip
= rip
->next
)
7086 if(rip
->description
)
7087 fs_give((void **) &rip
->description
);
7089 rip
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7092 if(feed
->description
)
7093 fs_give((void **) &feed
->description
);
7095 feed
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7103 return(1); /* link in */
7107 * RSS 2.0 <TTL> (in minutes)
7110 rss_ttl(HANDLER_S
*hd
, int ch
, int cmd
)
7112 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7115 if(isdigit((unsigned char) ch
))
7116 feed
->ttl
= ((feed
->ttl
* 10) + (ch
- '0'));
7118 else if(cmd
== GF_RESET
){
7119 /* prepare to collect data */
7122 else if(cmd
== GF_EOD
){
7125 return(1); /* link in */
7132 rss_item(HANDLER_S
*hd
, int ch
, int cmd
)
7134 /* BUG: verify no ITEM nesting? */
7135 if(cmd
== GF_RESET
){
7138 if((feed
= RSS_FEED(hd
->html_data
)) != NULL
){
7142 for(rip
= &feed
->items
; *rip
; rip
= &(*rip
)->next
)
7143 if(++n
> RSS_ITEM_LIMIT
)
7146 *rip
= fs_get(sizeof(RSS_ITEM_S
));
7147 memset(*rip
, 0, sizeof(RSS_ITEM_S
));
7151 return(0); /* don't link in */
7156 rss_skip_whitespace(char *s
)
7158 for(; *s
&& isspace((unsigned char) *s
); s
++)
7166 * return the function associated with the given element name
7169 element_properties(FILTER_S
*fd
, char *el_name
)
7171 register ELPROP_S
*el_table
= ELEMENTS(fd
);
7172 size_t len_name
= strlen(el_name
);
7174 for(; el_table
->element
; el_table
++)
7175 if(!strucmp(el_name
, el_table
->element
)
7176 || (el_table
->alternate
7177 && len_name
== el_table
->len
+ 1
7178 && el_name
[el_table
->len
] == '/'
7179 && !struncmp(el_name
, el_table
->element
, el_table
->len
)))
7187 * collect element's name and any attribute/value pairs then
7188 * dispatch to the appropriate handler.
7190 * Returns 1 : got what we wanted
7191 * 0 : we need more data
7195 html_element_collector(FILTER_S
*fd
, int ch
)
7198 if(ED(fd
)->overrun
){
7200 * If problem processing, don't bother doing anything
7201 * internally, just return such that none of what we've
7202 * digested is displayed.
7204 HTML_DEBUG_EL("too long", ED(fd
));
7205 return(1); /* Let it go, Jim */
7207 else if(ED(fd
)->mkup_decl
){
7208 if(ED(fd
)->badform
){
7209 dprint((2, "-- html error: bad form: %.*s\n",
7210 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7212 * Invalid comment -- make some guesses as
7213 * to whether we should stop with this greater-than...
7215 if(ED(fd
)->buf
[0] != '-'
7217 || (ED(fd
)->buf
[1] == '-'
7218 && ED(fd
)->buf
[ED(fd
)->len
- 1] == '-'
7219 && ED(fd
)->buf
[ED(fd
)->len
- 2] == '-'))
7223 dprint((5, "-- html: OK: %.*s\n",
7224 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7225 if(ED(fd
)->start_comment
== ED(fd
)->end_comment
){
7226 if(ED(fd
)->len
> 10){
7227 ED(fd
)->buf
[ED(fd
)->len
- 2] = '\0';
7228 html_element_comment(fd
, ED(fd
)->buf
+ 2);
7233 /* else keep collecting comment below */
7236 else if(ED(fd
)->proc_inst
){
7237 return(1); /* return without display... */
7239 else if(!ED(fd
)->quoted
|| ED(fd
)->badform
){
7243 * We either have the whole thing or all that we could
7244 * salvage from it. Try our best...
7247 if(HD(fd
)->bitbucket
)
7248 return(1); /* element inside chtml clause! */
7250 if(!ED(fd
)->badform
&& html_element_flush(ED(fd
)))
7251 return(1); /* return without display... */
7254 * If we ran into an empty tag or we don't know how to deal
7255 * with it, just go on, ignoring it...
7257 if(ED(fd
)->element
&& (ep
= element_properties(fd
, ED(fd
)->element
))){
7259 /* dispatch the element's handler */
7260 HTML_DEBUG_EL(ED(fd
)->end_tag
? "POP" : "PUSH", ED(fd
));
7261 if(ED(fd
)->end_tag
){
7262 html_pop(fd
, ep
); /* remove it's handler */
7265 /* if a block element, pop any open <p>'s */
7269 for(tp
= HANDLERS(fd
); tp
&& EL(tp
)->handler
== html_p
; tp
= tp
->below
){
7270 HTML_DEBUG_EL("Unclosed <P>", ED(fd
));
7271 html_pop(fd
, EL(tp
));
7276 /* enforce table nesting */
7277 if(!strucmp(ep
->element
, "tr")){
7278 if(!HANDLERS(fd
) || (strucmp(EL(HANDLERS(fd
))->element
, "table") && strucmp(EL(HANDLERS(fd
))->element
, "tbody") && strucmp(EL(HANDLERS(fd
))->element
, "thead"))){
7279 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd
)) ? EL(HANDLERS(fd
))->element
: "NO-HANDLERS"));
7280 if(HANDLERS(fd
) && !strucmp(EL(HANDLERS(fd
))->element
,"tr")){
7281 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7282 html_pop(fd
, EL(HANDLERS(fd
)));
7285 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7286 html_push(fd
, element_properties(fd
, "table"));
7290 else if(!strucmp(ep
->element
, "td") || !strucmp(ep
->element
, "th")){
7292 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7293 html_push(fd
, element_properties(fd
, "table"));
7294 html_push(fd
, element_properties(fd
, "tr"));
7296 else if(strucmp(EL(HANDLERS(fd
))->element
, "tr")){
7297 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd
))->element
));
7298 html_push(fd
, element_properties(fd
, "tr"));
7300 else if(!strucmp(EL(HANDLERS(fd
))->element
, "td")){
7301 dprint((2, "-- html error: bad nesting popping <TD>"));
7302 html_pop(fd
, EL(HANDLERS(fd
)));
7306 /* add it's handler */
7307 if(html_push(fd
, ep
)){
7309 /* remove empty element */
7316 HTML_DEBUG_EL("IGNORED", ED(fd
));
7319 else{ /* else, empty or unrecognized */
7320 HTML_DEBUG_EL("?", ED(fd
));
7323 return(1); /* all done! see, that didn't hurt */
7326 else if(ch
== '/' && ED(fd
)->element
&& ED(fd
)->len
){
7332 if(ED(fd
)->mkup_decl
){
7333 if((ch
&= 0xff) == '-'){
7336 if(ED(fd
)->start_comment
)
7337 ED(fd
)->end_comment
= 1;
7339 ED(fd
)->start_comment
= 1;
7345 if(ED(fd
)->end_comment
)
7346 ED(fd
)->start_comment
= ED(fd
)->end_comment
= 0;
7349 * no "--" after ! or non-whitespace between comments - bad
7351 if(ED(fd
)->len
< 2 || (!ED(fd
)->start_comment
7352 && !ASCII_ISSPACE((unsigned char) ch
)))
7353 ED(fd
)->badform
= 1; /* non-comment! */
7359 * Remember the comment for possible later processing, if
7360 * it get's too long, remember first and last few chars
7361 * so we know when to terminate (and throw some garbage
7362 * in between when we toss out what's between.
7364 if(ED(fd
)->len
== HTML_BUF_LEN
){
7365 ED(fd
)->buf
[2] = ED(fd
)->buf
[3] = 'X';
7366 ED(fd
)->buf
[4] = ED(fd
)->buf
[ED(fd
)->len
- 2];
7367 ED(fd
)->buf
[5] = ED(fd
)->buf
[ED(fd
)->len
- 1];
7371 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7372 return(0); /* comments go in the bit bucket */
7374 else if(ED(fd
)->overrun
|| ED(fd
)->badform
){
7375 return(0); /* swallow char's until next '>' */
7377 else if(!ED(fd
)->element
&& !ED(fd
)->len
){
7378 if(ch
== '/'){ /* validate leading chars */
7379 ED(fd
)->end_tag
= 1;
7383 ED(fd
)->mkup_decl
= 1;
7387 ED(fd
)->proc_inst
= 1;
7390 else if(!isalpha((unsigned char) ch
))
7391 return(-1); /* can't be a tag! */
7393 else if(ch
== '\"' || ch
== '\''){
7394 if(!ED(fd
)->hit_equal
){
7395 ED(fd
)->badform
= 1; /* quote in element name?!? */
7400 if(ED(fd
)->quoted
== (char) ch
){
7401 /* end of a quoted value */
7403 if(ED(fd
)->len
&& html_element_flush(ED(fd
)))
7404 ED(fd
)->badform
= 1;
7406 return(0); /* continue collecting chars */
7408 /* ELSE fall thru writing other quoting char */
7411 ED(fd
)->quoted
= (char) ch
;
7412 ED(fd
)->was_quoted
= 1;
7413 return(0); /* need more data */
7417 ch
&= 0xff; /* strip any "literal" high bits */
7420 || strchr("#-.!", ch
)){
7421 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7422 ? HTML_BUF_LEN
:MAX_ELEMENT
)){
7423 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7426 ED(fd
)->overrun
= 1; /* flag it broken */
7428 else if(ASCII_ISSPACE((unsigned char) ch
) || ch
== '='){
7429 if((ED(fd
)->len
|| ED(fd
)->was_quoted
) && html_element_flush(ED(fd
))){
7430 ED(fd
)->badform
= 1;
7431 return(0); /* else, we ain't done yet */
7434 if(!ED(fd
)->hit_equal
)
7435 ED(fd
)->hit_equal
= (ch
== '=');
7437 else if(ch
== '/' && ED(fd
)->len
&& !ED(fd
)->element
){
7439 ep
= element_properties(fd
, ED(fd
)->buf
);
7442 ED(fd
)->badform
= 1;
7444 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7445 ? HTML_BUF_LEN
:MAX_ELEMENT
)){
7446 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
; /* add this exception */
7449 ED(fd
)->overrun
= 1;
7453 ED(fd
)->badform
= 1;
7456 ED(fd
)->badform
= 1; /* unrecognized data?? */
7458 return(0); /* keep collecting */
7463 * Element collector found complete string, integrate it and reset
7464 * internal collection buffer.
7466 * Returns zero if element collection buffer flushed, error flag otherwise
7469 html_element_flush(CLCTR_S
*el_data
)
7473 if(el_data
->hit_equal
){ /* adding a value */
7474 el_data
->hit_equal
= 0;
7475 if(el_data
->cur_attrib
){
7476 if(!el_data
->cur_attrib
->value
){
7477 el_data
->cur_attrib
->value
= cpystr(el_data
->len
7478 ? el_data
->buf
: "");
7481 dprint((2, "** element: unexpected value: %.10s...\n",
7482 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7487 dprint((2, "** element: missing attribute name: %.10s...\n",
7488 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7492 else if(el_data
->len
){
7493 if(!el_data
->element
){
7494 el_data
->element
= cpystr(el_data
->buf
);
7497 PARAMETER
*p
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
7498 memset(p
, 0, sizeof(PARAMETER
));
7499 if(el_data
->attribs
){
7500 el_data
->cur_attrib
->next
= p
;
7501 el_data
->cur_attrib
= p
;
7504 el_data
->attribs
= el_data
->cur_attrib
= p
;
7506 p
->attribute
= cpystr(el_data
->buf
);
7511 el_data
->was_quoted
= 0; /* reset collector buf and state */
7513 memset(el_data
->buf
, 0, HTML_BUF_LEN
);
7514 return(rv
); /* report whatever happened above */
7519 * html_element_comment - "Special" comment handling here
7522 html_element_comment(FILTER_S
*f
, char *s
)
7526 while(*s
&& ASCII_ISSPACE((unsigned char) *s
))
7530 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7532 if(!struncmp(s
, "chtml ", 6)){
7534 if(!struncmp(s
, "if ", 3)){
7535 HD(f
)->bitbucket
= 1; /* default is failure! */
7539 if(!struncmp(s
+ 1, "inemode=", 8)){
7540 if(!strucmp(s
= removing_quotes(s
+ 9), "function_key")
7541 && F_ON(F_USE_FK
, ps_global
))
7542 HD(f
)->bitbucket
= 0;
7543 else if(!strucmp(s
, "running"))
7544 HD(f
)->bitbucket
= 0;
7545 else if(!strucmp(s
, "phone_home") && ps_global
->phone_home
)
7546 HD(f
)->bitbucket
= 0;
7548 else if(!strucmp(s
, "os_windows"))
7549 HD(f
)->bitbucket
= 0;
7555 case '[' : /* test */
7556 if((p
= strindex(++s
, ']')) != NULL
){
7557 *p
= '\0'; /* tie off test string */
7558 removing_leading_white_space(s
);
7559 removing_trailing_white_space(s
);
7560 if(*s
== '-' && *(s
+1) == 'r'){ /* readable file? */
7561 for(s
+= 2; *s
&& ASCII_ISSPACE((unsigned char) *s
); s
++)
7565 HD(f
)->bitbucket
= (can_access(CHTML_VAR_EXPAND(removing_quotes(s
)),
7576 else if(!strucmp(s
, "else")){
7577 HD(f
)->bitbucket
= !HD(f
)->bitbucket
;
7579 else if(!strucmp(s
, "endif")){
7580 /* Clean up after chtml here */
7581 HD(f
)->bitbucket
= 0;
7584 else if(!HD(f
)->bitbucket
){
7585 if(!struncmp(s
, "#include ", 9)){
7586 char buf
[MAILTMPLEN
], *bufp
;
7587 int len
, end_of_line
;
7590 /* Include the named file */
7591 if(!struncmp(s
+= 9, "file=", 5)
7592 && (fp
= our_fopen(CHTML_VAR_EXPAND(removing_quotes(s
+5)), "r"))){
7593 html_element_output(f
, HTML_NEWLINE
);
7595 while(fgets(buf
, sizeof(buf
), fp
)){
7596 if((len
= strlen(buf
)) && buf
[len
-1] == '\n'){
7603 for(bufp
= buf
; len
; bufp
++, len
--)
7604 html_element_output(f
, (int) *bufp
);
7607 html_element_output(f
, HTML_NEWLINE
);
7611 html_element_output(f
, HTML_NEWLINE
);
7617 else if(!struncmp(s
, "#echo ", 6)){
7618 if(!struncmp(s
+= 6, "var=", 4)){
7619 char *p
, buf
[MAILTMPLEN
];
7621 extern char datestamp
[];
7623 if(!strcmp(s
= removing_quotes(s
+ 4), "ALPINE_VERSION")){
7626 else if(!strcmp(s
, "ALPINE_REVISION")){
7627 p
= get_alpine_revision_string(buf
, sizeof(buf
));
7629 else if(!strcmp(s
, "C_CLIENT_VERSION")){
7632 else if(!strcmp(s
, "ALPINE_COMPILE_DATE")){
7635 else if(!strcmp(s
, "ALPINE_TODAYS_DATE")){
7636 rfc822_date(p
= buf
);
7638 else if(!strcmp(s
, "_LOCAL_FULLNAME_")){
7639 p
= (ps_global
->VAR_LOCAL_FULLNAME
7640 && ps_global
->VAR_LOCAL_FULLNAME
[0])
7641 ? ps_global
->VAR_LOCAL_FULLNAME
7644 else if(!strcmp(s
, "_LOCAL_ADDRESS_")){
7645 p
= (ps_global
->VAR_LOCAL_ADDRESS
7646 && ps_global
->VAR_LOCAL_ADDRESS
[0])
7647 ? ps_global
->VAR_LOCAL_ADDRESS
7649 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7650 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7651 mail_free_address(&adr
);
7653 else if(!strcmp(s
, "_BUGS_FULLNAME_")){
7654 p
= (ps_global
->VAR_BUGS_FULLNAME
7655 && ps_global
->VAR_BUGS_FULLNAME
[0])
7656 ? ps_global
->VAR_BUGS_FULLNAME
7657 : "Place to report Alpine Bugs";
7659 else if(!strcmp(s
, "_BUGS_ADDRESS_")){
7660 p
= (ps_global
->VAR_BUGS_ADDRESS
7661 && ps_global
->VAR_BUGS_ADDRESS
[0])
7662 ? ps_global
->VAR_BUGS_ADDRESS
: "postmaster";
7663 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7664 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7665 mail_free_address(&adr
);
7667 else if(!strcmp(s
, "CURRENT_DIR")){
7668 getcwd(p
= buf
, sizeof(buf
));
7670 else if(!strcmp(s
, "HOME_DIR")){
7671 p
= ps_global
->home_dir
;
7673 else if(!strcmp(s
, "PINE_CONF_PATH")){
7674 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7675 p
= "/usr/local/lib/pine.conf";
7680 else if(!strcmp(s
, "PINE_CONF_FIXED_PATH")){
7681 #ifdef SYSTEM_PINERC_FIXED
7682 p
= SYSTEM_PINERC_FIXED
;
7684 p
= "/usr/local/lib/pine.conf.fixed";
7687 else if(!strcmp(s
, "PINE_INFO_PATH")){
7688 p
= SYSTEM_PINE_INFO_PATH
;
7690 else if(!strcmp(s
, "MAIL_SPOOL_PATH")){
7693 else if(!strcmp(s
, "MAIL_SPOOL_LOCK_PATH")){
7694 /* Don't put the leading /tmp/. */
7699 for(j
= 0, i
= 0; p
[i
] && j
< MAILTMPLEN
- 1; i
++){
7709 else if(!struncmp(s
, "VAR_", 4)){
7711 if(pith_opt_pretty_var_name
)
7712 p
= (*pith_opt_pretty_var_name
)(p
);
7714 else if(!struncmp(s
, "FEAT_", 5)){
7716 if(pith_opt_pretty_feature_name
)
7717 p
= (*pith_opt_pretty_feature_name
)(p
, -1);
7723 if(f
->f1
== WSPACE
){
7724 html_element_output(f
, ' ');
7725 f
->f1
= DFL
; /* clear it */
7729 html_element_output(f
, (int) *p
++);
7738 html_element_output(FILTER_S
*f
, int ch
)
7741 (*EL(HANDLERS(f
))->handler
)(HANDLERS(f
), ch
, GF_DATA
);
7746 #define ISHEX_DIGIT(X) (isdigit((X)) || \
7747 ((X) >= 'a' && (X) <= 'f') || \
7748 ((X) >= 'A' && (X) <= 'F'))
7751 * collect html entity and return its UCS value when done.
7753 * Returns HTML_MOREDATA : we need more data
7754 * HTML_ENTITY : entity collected
7755 * HTML_BADVALUE : good data, but no named match or out of range
7756 * HTML_BADDATA : invalid input
7759 * - entity format is "'&' tag ';'" and represents a literal char
7760 * - named entities are CASE SENSITIVE.
7761 * - numeric char references (where the tag is prefixed with a '#')
7762 * are a char with that numbers value
7763 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7766 html_entity_collector(FILTER_S
*f
, int ch
, UCS
*ucs
, char **alt
)
7769 static char buf
[MAX_ENTITY
+2];
7772 if(len
== MAX_ENTITY
){
7776 ? (isalpha((unsigned char) ch
) || ch
== '#')
7777 : ((isdigit((unsigned char) ch
)
7778 || (len
== 1 && (unsigned char) ch
== 'x')
7779 || (len
== 1 &&(unsigned char) ch
== 'X')
7780 || (len
> 1 && isxdigit((unsigned char) ch
))
7781 || (isalpha((unsigned char) ch
) && buf
[0] != '#')))){
7783 return(HTML_MOREDATA
);
7785 else if(ch
== ';' || ASCII_ISSPACE((unsigned char) ch
)){
7786 buf
[len
] = '\0'; /* got something! */
7788 if(buf
[1] == 'x' || buf
[1] == 'X')
7789 *ucs
= (UCS
) strtoul(&buf
[2], NULL
, 16);
7791 *ucs
= (UCS
) strtoul(&buf
[1], NULL
, 10);
7795 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7796 if(entity_tab
[i
].value
== *ucs
){
7797 *alt
= entity_tab
[i
].plain
;
7803 return(HTML_ENTITY
);
7806 rv
= HTML_BADVALUE
; /* in case of no match */
7807 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7808 if(strcmp(entity_tab
[i
].name
, buf
) == 0){
7809 *ucs
= entity_tab
[i
].value
;
7811 *alt
= entity_tab
[i
].plain
;
7814 return(HTML_ENTITY
);
7819 rv
= HTML_BADDATA
; /* bogus input! */
7831 /*----------------------------------------------------------------------
7832 HTML text to plain text filter
7834 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7835 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7840 gf_html2plain(FILTER_S
*f
, int flg
)
7842 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7845 GF_INIT(f
, f
->next
);
7850 for(ii
= HTML_INDENT(f
); ii
> 0; ii
--)
7856 while(GF_GETC(f
, c
)){
7858 * First we have to collect any literal entities...
7859 * that is, IF we're not already collecting one
7860 * AND we're not in element's text or, if we are, we're
7861 * not in quoted text. Whew.
7867 switch(html_entity_collector(f
, c
, &ucs
, &alt
)){
7868 case HTML_MOREDATA
: /* more data required? */
7869 continue; /* go get another char */
7871 case HTML_BADVALUE
:
7873 /* if supplied, process bogus data */
7876 unsigned int uic
= *alt
;
7880 if(c
== '&' && !HD(f
)->quoted
){
7885 f
->t
= 0; /* don't come back next time */
7889 default : /* thing to process */
7890 f
->t
= 0; /* don't come back */
7893 * do something with UCS codepoint. If it's
7894 * not displayable then use the alt version
7896 * cvt UCS to UTF-8 and toss into next filter.
7898 if(ucs
> 127 && wcellwidth(ucs
) < 0){
7901 c
= MAKE_LITERAL(*alt
);
7908 c
= MAKE_LITERAL('?');
7911 unsigned char utf8buf
[8], *p1
, *p2
;
7913 p2
= utf8_put(p1
= (unsigned char *) utf8buf
, (unsigned long) ucs
);
7914 for(; p1
< p2
; p1
++){
7915 c
= MAKE_LITERAL(*p1
);
7925 else if(!PASS_HTML(f
) && c
== '&' && !HD(f
)->quoted
){
7931 * then we process whatever we got...
7937 GF_OP_END(f
); /* clean up our input pointers */
7939 else if(flg
== GF_EOD
){
7941 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f
))->element
));
7942 html_pop(f
, EL(HANDLERS(f
)));
7945 html_output(f
, HTML_NEWLINE
);
7947 HTML_ULINE(f
, ULINE_BIT(f
) = 0);
7950 HTML_BOLD(f
, BOLD_BIT(f
) = 0);
7953 fs_give((void **)&f
->line
);
7955 free_color_pair(&HD(f
)->color
);
7959 if(((HTML_OPT_S
*)f
->opt
)->base
)
7960 fs_give((void **) &((HTML_OPT_S
*)f
->opt
)->base
);
7965 (*f
->next
->f
)(f
->next
, GF_DATA
);
7966 (*f
->next
->f
)(f
->next
, GF_EOD
);
7968 else if(flg
== GF_RESET
){
7969 dprint((9, "-- gf_reset html2plain\n"));
7970 f
->data
= (HTML_DATA_S
*) fs_get(sizeof(HTML_DATA_S
));
7971 memset(f
->data
, 0, sizeof(HTML_DATA_S
));
7972 /* start with flowing text */
7973 HD(f
)->wrapstate
= !PASS_HTML(f
);
7974 HD(f
)->wrapcol
= WRAP_COLS(f
);
7975 f
->f1
= DFL
; /* state */
7976 f
->f2
= 0; /* chars in wrap buffer */
7977 f
->n
= 0L; /* chars on line so far */
7978 f
->linep
= f
->line
= (char *)fs_get(HTML_BUF_LEN
* sizeof(char));
7979 HD(f
)->line_bufsize
= HTML_BUF_LEN
; /* initial bufsize of line */
7980 HD(f
)->alt_entity
= (!ps_global
->display_charmap
7981 || strucmp(ps_global
->display_charmap
, "iso-8859-1"));
7982 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
7989 * html_indent - do the requested indent level function with appropriate
7990 * flushing and such.
7992 * Returns: indent level prior to set/increment
7995 html_indent(FILTER_S
*f
, int val
, int func
)
7997 int old
= HD(f
)->indent_level
;
7999 /* flush pending data at old indent level */
8002 html_output_flush(f
);
8003 if((HD(f
)->indent_level
+= val
) < 0)
8004 HD(f
)->indent_level
= 0;
8009 html_output_flush(f
);
8010 HD(f
)->indent_level
= val
;
8023 * html_blanks - Insert n blank lines into output
8026 html_blank(FILTER_S
*f
, int n
)
8028 /* Cap off any flowing text, and then write blank lines */
8029 if(f
->f2
|| f
->n
|| CENTER_BIT(f
) || HD(f
)->centered
|| WRAPPED_LEN(f
))
8030 html_output(f
, HTML_NEWLINE
);
8032 if(HD(f
)->wrapstate
)
8033 while(HD(f
)->blanks
< n
) /* blanks inc'd by HTML_NEWLINE */
8034 html_output(f
, HTML_NEWLINE
);
8040 * html_newline -- insert a newline mindful of embedded tags
8043 html_newline(FILTER_S
*f
)
8045 html_write_newline(f
); /* commit an actual newline */
8047 if(f
->n
){ /* and keep track of blank lines */
8057 * output the given char, handling any requested wrapping.
8058 * It's understood that all whitespace handed us is written. In other
8059 * words, junk whitespace is weeded out before it's given to us here.
8063 html_output(FILTER_S
*f
, int ch
)
8067 void (*o_f
)(FILTER_S
*, int, int, int) = CENTER_BIT(f
) ? html_output_centered
: html_output_normal
;
8070 * if ch is a control token, just pass it on, else, collect
8071 * utf8-encoded characters to determine width,then feed into
8074 if(ch
== TAG_EMBED
|| HD(f
)->embedded
.state
|| (ch
> 0xff && IS_LITERAL(ch
) == 0)){
8075 (*o_f
)(f
, ch
, 1, 0);
8077 else if(utf8_to_ucs4_oneatatime(ch
& 0xff, &(HD(f
)->cb
), &uc
, &width
)){
8080 for(cp
= HD(f
)->cb
.cbuf
; cp
<= HD(f
)->cb
.cbufend
; cp
++){
8081 (*o_f
)(f
, *cp
, width
, HD(f
)->cb
.cbufend
- cp
);
8082 width
= 0; /* only count it once */
8085 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
8088 HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbufp
;
8089 /* else do nothing until we have a full character */
8094 html_output_string(FILTER_S
*f
, char *s
)
8102 html_output_raw_tag(FILTER_S
*f
, char *tag
)
8108 html_output(f
, '<');
8109 html_output_string(f
, tag
);
8110 for(p
= HD(f
)->el_data
->attribs
;
8113 /* SECURITY: no javascript */
8114 /* PRIVACY: no img src without permission */
8115 /* BUGS: no class collisions since <head> ignored */
8116 if(html_event_attribute(p
->attribute
)
8117 || !strucmp(p
->attribute
, "class")
8118 || (!PASS_IMAGES(f
) && !strucmp(tag
, "img") && !strucmp(p
->attribute
, "src")))
8121 /* PRIVACY: sniff out background images */
8122 if(p
->value
&& !PASS_IMAGES(f
)){
8123 if(!strucmp(p
->attribute
, "style")){
8124 if((vp
= srchstr(p
->value
, "background-image")) != NULL
){
8125 /* neuter in place */
8126 vp
[11] = vp
[12] = vp
[13] = vp
[14] = vp
[15] = 'X';
8129 for(vp
= p
->value
; (vp
= srchstr(vp
, "background")) != NULL
; vp
++)
8130 if(vp
[10] == ' ' || vp
[10] == ':')
8131 for(i
= 11; vp
[i
] && vp
[i
] != ';'; i
++)
8132 if((vp
[i
] == 'u' && vp
[i
+1] == 'r' && vp
[i
+2] == 'l' && vp
[i
+3] == '(')
8133 || vp
[i
] == ':' || vp
[i
] == '/' || vp
[i
] == '.')
8137 else if(!strucmp(p
->attribute
, "background")){
8140 for(ip
= p
->value
; *ip
&& !(*ip
== ':' || *ip
== '/' || *ip
== '.'); ip
++)
8148 html_output(f
, ' ');
8149 html_output_string(f
, p
->attribute
);
8151 html_output(f
, '=');
8152 html_output(f
, '\"');
8153 html_output_string(f
, p
->value
);
8154 html_output(f
, '\"');
8158 /* append warning to form submission */
8159 if(!strucmp(tag
, "form")){
8160 html_output_string(f
, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8164 html_output(f
, ' ');
8165 html_output(f
, '/');
8168 html_output(f
, '>');
8173 html_event_attribute(char *attr
)
8176 static char *events
[] = {
8177 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8178 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8179 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8180 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8183 if((attr
[0] == 'o' || attr
[0] == 'O') && (attr
[1] == 'n' || attr
[1] == 'N'))
8184 for(i
= 0; i
< sizeof(events
)/sizeof(events
[0]); i
++)
8185 if(!strucmp(attr
, events
[i
]))
8193 html_output_normal(FILTER_S
*f
, int ch
, int width
, int remaining
)
8195 static int written
= 0;
8198 if(HD(f
)->centered
){
8199 html_centered_flush(f
);
8200 fs_give((void **) &HD(f
)->centered
->line
.buf
);
8201 fs_give((void **) &HD(f
)->centered
->word
.buf
);
8202 fs_give((void **) &HD(f
)->centered
);
8205 if(HD(f
)->wrapstate
){
8206 if(ch
== HTML_NEWLINE
){ /* hard newline */
8207 html_output_flush(f
);
8211 HD(f
)->blanks
= 0; /* reset blank line counter */
8213 if(ch
== TAG_EMBED
){ /* takes up no space */
8214 HD(f
)->embedded
.state
= -5;
8215 HTML_LINEP_PUTC(f
, TAG_EMBED
);
8217 else if(HD(f
)->embedded
.state
){ /* ditto */
8218 if(HD(f
)->embedded
.state
== -5){
8219 /* looking for specially handled tags following TAG_EMBED */
8220 if(ch
== TAG_HANDLE
)
8221 HD(f
)->embedded
.state
= -1; /* next ch is length */
8222 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8224 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8226 if(ch
== TAG_FGCOLOR
)
8227 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8229 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8231 HD(f
)->embedded
.state
= RGBLEN
;
8234 HD(f
)->embedded
.state
= 0; /* non-special */
8236 else if(HD(f
)->embedded
.state
> 0){
8237 /* collecting up an RGBLEN color or length, ignore tags */
8238 (HD(f
)->embedded
.state
)--;
8239 if(HD(f
)->embedded
.color
)
8240 *HD(f
)->embedded
.color
++ = ch
;
8242 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8243 *HD(f
)->embedded
.color
= '\0';
8244 HD(f
)->embedded
.color
= NULL
;
8247 else if(HD(f
)->embedded
.state
< 0){
8248 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8251 (HD(f
)->embedded
.state
)--;
8252 if(HD(f
)->embedded
.color
)
8253 *HD(f
)->embedded
.color
++ = ch
;
8255 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8256 *HD(f
)->embedded
.color
= '\0';
8257 HD(f
)->embedded
.color
= NULL
;
8261 HTML_LINEP_PUTC(f
, ch
);
8263 else if(HTML_ISSPACE(ch
)){
8264 html_output_flush(f
);
8273 if(f
->f2
+ cwidth
+ 1 >= WRAP_COLS(f
)){
8274 HTML_LINEP_PUTC(f
, ch
& 0xff);
8280 if(HD(f
)->in_anchor
)
8281 html_write_anchor(f
, HD(f
)->in_anchor
);
8284 HTML_LINEP_PUTC(f
, ch
& 0xff);
8298 html_output_flush(f
);
8300 switch(HD(f
)->embedded
.state
){
8305 * It's difficult to both preserve whitespace and wrap at the
8306 * same time so we'll do a dumb wrap at the edge of the screen.
8307 * Since this shouldn't come up much in real life we'll hope
8308 * it is good enough.
8310 if(!PASS_HTML(f
) && (f
->n
+ width
) > WRAP_COLS(f
))
8313 f
->n
+= width
; /* inc displayed char count */
8314 HD(f
)->blanks
= 0; /* reset blank line counter */
8315 html_putc(f
, ch
& 0xff);
8318 case TAG_EMBED
: /* takes up no space */
8319 html_putc(f
, TAG_EMBED
);
8320 HD(f
)->embedded
.state
= -2;
8323 case HTML_NEWLINE
: /* newline handling */
8337 HD(f
)->embedded
.state
= 0;
8340 HD(f
)->embedded
.state
= -1; /* next ch is length */
8361 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8363 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8364 HD(f
)->embedded
.state
= 11;
8369 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8371 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8372 HD(f
)->embedded
.state
= 11;
8375 case TAG_HANDLEOFF
:
8377 HD(f
)->in_anchor
= 0;
8388 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8393 HD(f
)->embedded
.state
--;
8394 if(HD(f
)->embedded
.color
)
8395 *HD(f
)->embedded
.color
++ = ch
;
8397 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8398 *HD(f
)->embedded
.color
= '\0';
8399 HD(f
)->embedded
.color
= NULL
;
8410 * flush any buffered chars waiting for wrapping.
8413 html_output_flush(FILTER_S
*f
)
8416 if(f
->n
&& ((int) f
->n
) + 1 + f
->f2
> HD(f
)->wrapcol
)
8417 html_newline(f
); /* wrap? */
8419 if(f
->n
){ /* text already on the line? */
8421 f
->n
++; /* increment count */
8424 /* write at start of new line */
8425 html_write_indent(f
, HD(f
)->indent_level
);
8427 if(HD(f
)->in_anchor
)
8428 html_write_anchor(f
, HD(f
)->in_anchor
);
8439 * html_output_centered - managed writing centered text
8442 html_output_centered(FILTER_S
*f
, int ch
, int width
, int remaining
)
8447 if(!HD(f
)->centered
){ /* new text? */
8448 html_output_flush(f
);
8449 if(f
->n
) /* start on blank line */
8452 HD(f
)->centered
= (CENTER_S
*) fs_get(sizeof(CENTER_S
));
8453 memset(HD(f
)->centered
, 0, sizeof(CENTER_S
));
8454 /* and grab a buf to start collecting centered text */
8455 HD(f
)->centered
->line
.len
= WRAP_COLS(f
);
8456 HD(f
)->centered
->line
.buf
= (char *) fs_get(HD(f
)->centered
->line
.len
8458 HD(f
)->centered
->line
.used
= HD(f
)->centered
->line
.width
= 0;
8459 HD(f
)->centered
->word
.len
= 32;
8460 HD(f
)->centered
->word
.buf
= (char *) fs_get(HD(f
)->centered
->word
.len
8462 HD(f
)->centered
->word
.used
= HD(f
)->centered
->word
.width
= 0;
8465 if(ch
== HTML_NEWLINE
){ /* hard newline */
8466 html_centered_flush(f
);
8468 else if(ch
== TAG_EMBED
){ /* takes up no space */
8469 HD(f
)->embedded
.state
= -5;
8470 html_centered_putc(&HD(f
)->centered
->word
, TAG_EMBED
);
8472 else if(HD(f
)->embedded
.state
){
8473 if(HD(f
)->embedded
.state
== -5){
8474 /* looking for specially handled tags following TAG_EMBED */
8475 if(ch
== TAG_HANDLE
)
8476 HD(f
)->embedded
.state
= -1; /* next ch is length */
8477 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8479 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8481 if(ch
== TAG_FGCOLOR
)
8482 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8484 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8486 HD(f
)->embedded
.state
= RGBLEN
;
8489 HD(f
)->embedded
.state
= 0; /* non-special */
8491 else if(HD(f
)->embedded
.state
> 0){
8492 /* collecting up an RGBLEN color or length, ignore tags */
8493 (HD(f
)->embedded
.state
)--;
8494 if(HD(f
)->embedded
.color
)
8495 *HD(f
)->embedded
.color
++ = ch
;
8497 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8498 *HD(f
)->embedded
.color
= '\0';
8499 HD(f
)->embedded
.color
= NULL
;
8502 else if(HD(f
)->embedded
.state
< 0){
8503 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8506 (HD(f
)->embedded
.state
)--;
8507 if(HD(f
)->embedded
.color
)
8508 *HD(f
)->embedded
.color
++ = ch
;
8510 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8511 *HD(f
)->embedded
.color
= '\0';
8512 HD(f
)->embedded
.color
= NULL
;
8516 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8518 else if(ASCII_ISSPACE((unsigned char) ch
)){
8519 if(!HD(f
)->centered
->space
++){ /* end of a word? flush! */
8522 if(WRAPPED_LEN(f
) > HD(f
)->wrapcol
){
8523 html_centered_flush_line(f
);
8524 /* fall thru to put current "word" on blank "line" */
8526 else if(HD(f
)->centered
->line
.width
){
8527 /* put space char between line and appended word */
8528 html_centered_putc(&HD(f
)->centered
->line
, ' ');
8529 HD(f
)->centered
->line
.width
++;
8532 for(i
= 0; i
< HD(f
)->centered
->word
.used
; i
++)
8533 html_centered_putc(&HD(f
)->centered
->line
,
8534 HD(f
)->centered
->word
.buf
[i
]);
8536 HD(f
)->centered
->line
.width
+= HD(f
)->centered
->word
.width
;
8537 HD(f
)->centered
->word
.used
= 0;
8538 HD(f
)->centered
->word
.width
= 0;
8545 /* ch is start of next word */
8546 HD(f
)->centered
->space
= 0;
8547 if(HD(f
)->centered
->word
.width
>= WRAP_COLS(f
))
8548 html_centered_flush(f
);
8550 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8559 HD(f
)->centered
->word
.width
+= cwidth
;
8566 * html_centered_putc -- add given char to given WRAPLINE_S
8569 html_centered_putc(WRAPLINE_S
*wp
, int ch
)
8571 if(wp
->used
+ 1 >= wp
->len
){
8573 fs_resize((void **) &wp
->buf
, wp
->len
* sizeof(char));
8576 wp
->buf
[wp
->used
++] = ch
;
8582 * html_centered_flush - finish writing any pending centered output
8585 html_centered_flush(FILTER_S
*f
)
8590 * If word present (what about line?) we need to deal with
8593 if(HD(f
)->centered
->word
.width
&& WRAPPED_LEN(f
) > HD(f
)->wrapcol
)
8594 html_centered_flush_line(f
);
8597 /* figure out how much to indent */
8598 if((i
= (WRAP_COLS(f
) - WRAPPED_LEN(f
))/2) > 0)
8599 html_write_indent(f
, i
);
8601 if(HD(f
)->centered
->anchor
)
8602 html_write_anchor(f
, HD(f
)->centered
->anchor
);
8604 html_centered_handle(&HD(f
)->centered
->anchor
,
8605 HD(f
)->centered
->line
.buf
,
8606 HD(f
)->centered
->line
.used
);
8607 html_write(f
, HD(f
)->centered
->line
.buf
, HD(f
)->centered
->line
.used
);
8609 if(HD(f
)->centered
->word
.used
){
8610 if(HD(f
)->centered
->line
.width
)
8613 html_centered_handle(&HD(f
)->centered
->anchor
,
8614 HD(f
)->centered
->word
.buf
,
8615 HD(f
)->centered
->word
.used
);
8616 html_write(f
, HD(f
)->centered
->word
.buf
,
8617 HD(f
)->centered
->word
.used
);
8620 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8621 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8624 if(HD(f
)->centered
->word
.used
){
8625 html_write(f
, HD(f
)->centered
->word
.buf
,
8626 HD(f
)->centered
->word
.used
);
8627 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8628 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8630 HD(f
)->blanks
++; /* advance the blank line counter */
8633 html_newline(f
); /* finish the line */
8638 * html_centered_handle - scan the line for embedded handles
8641 html_centered_handle(int *h
, char *line
, int len
)
8646 if(*line
++ == TAG_EMBED
&& len
-- > 0)
8649 if((n
= *line
++) >= --len
){
8653 *h
= (*h
* 10) + (*line
++ - '0');
8657 case TAG_HANDLEOFF
:
8659 *h
= 0; /* assumption 23,342: inverse off ends tags */
8670 * html_centered_flush_line - flush the centered "line" only
8673 html_centered_flush_line(FILTER_S
*f
)
8675 if(HD(f
)->centered
->line
.used
){
8678 /* hide "word" from flush */
8679 i
= HD(f
)->centered
->word
.used
;
8680 j
= HD(f
)->centered
->word
.width
;
8681 HD(f
)->centered
->word
.used
= 0;
8682 HD(f
)->centered
->word
.width
= 0;
8683 html_centered_flush(f
);
8685 HD(f
)->centered
->word
.used
= i
;
8686 HD(f
)->centered
->word
.width
= j
;
8692 * html_write_indent - write indention mindful of display attributes
8695 html_write_indent(FILTER_S
*f
, int indent
)
8699 html_putc(f
, TAG_EMBED
);
8700 html_putc(f
, TAG_BOLDOFF
);
8704 html_putc(f
, TAG_EMBED
);
8705 html_putc(f
, TAG_ULINEOFF
);
8711 html_putc(f
, ' '); /* indent as needed */
8714 * Resume any previous embedded state
8718 html_putc(f
, TAG_EMBED
);
8719 html_putc(f
, TAG_BOLDON
);
8723 html_putc(f
, TAG_EMBED
);
8724 html_putc(f
, TAG_ULINEON
);
8734 html_write_anchor(FILTER_S
*f
, int anchor
)
8739 html_putc(f
, TAG_EMBED
);
8740 html_putc(f
, TAG_HANDLE
);
8741 snprintf(buf
, sizeof(buf
), "%d", anchor
);
8742 html_putc(f
, (int) strlen(buf
));
8744 for(i
= 0; buf
[i
]; i
++)
8745 html_putc(f
, buf
[i
]);
8750 * html_write_newline - write a newline mindful of display attributes
8753 html_write_newline(FILTER_S
*f
)
8757 if(! STRIP(f
)){ /* First tie, off any embedded state */
8758 if(HD(f
)->in_anchor
){
8759 html_putc(f
, TAG_EMBED
);
8760 html_putc(f
, TAG_INVOFF
);
8764 html_putc(f
, TAG_EMBED
);
8765 html_putc(f
, TAG_BOLDOFF
);
8769 html_putc(f
, TAG_EMBED
);
8770 html_putc(f
, TAG_ULINEOFF
);
8773 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8777 p
= color_embed(ps_global
->VAR_NORM_FORE_COLOR
,
8778 ps_global
->VAR_NORM_BACK_COLOR
);
8779 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8784 html_write(f
, "\015\012", 2);
8785 for(i
= HTML_INDENT(f
); i
> 0; i
--)
8788 if(! STRIP(f
)){ /* First tie, off any embedded state */
8790 html_putc(f
, TAG_EMBED
);
8791 html_putc(f
, TAG_BOLDON
);
8795 html_putc(f
, TAG_EMBED
);
8796 html_putc(f
, TAG_ULINEON
);
8799 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8800 char *p
, *tfg
, *tbg
;
8804 tfg
= HD(f
)->color
->fg
;
8805 tbg
= HD(f
)->color
->bg
;
8806 tmp
= new_color_pair(tfg
[0] ? tfg
8807 : color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
),
8809 : color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
));
8810 if(pico_is_good_colorpair(tmp
)){
8811 p
= color_embed(tfg
[0] ? tfg
8812 : ps_global
->VAR_NORM_FORE_COLOR
,
8814 : ps_global
->VAR_NORM_BACK_COLOR
);
8815 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8820 free_color_pair(&tmp
);
8827 * html_write - write given n-length string to next filter
8830 html_write(FILTER_S
*f
, char *s
, int n
)
8832 GF_INIT(f
, f
->next
);
8835 /* keep track of attribute state? Not if last char! */
8836 if(!STRIP(f
) && *s
== TAG_EMBED
&& n
-- > 0){
8837 GF_PUTC(f
->next
, TAG_EMBED
);
8851 case TAG_HANDLEOFF
:
8852 HD(f
)->in_anchor
= 0;
8853 GF_PUTC(f
->next
, TAG_INVOFF
);
8860 GF_PUTC(f
->next
, TAG_HANDLE
);
8866 GF_PUTC(f
->next
, i
);
8868 anum
= (anum
* 10) + (*++s
- '0');
8870 GF_PUTC(f
->next
, *s
);
8876 && (h
= get_handle(*HANDLESP(f
), anum
)) != NULL
8877 && (h
->type
== URL
|| h
->type
== Attach
)){
8878 HD(f
)->in_anchor
= anum
;
8889 GF_PUTC(f
->next
, (*s
++) & 0xff);
8892 GF_IP_END(f
->next
); /* clean up next's input pointers */
8897 * html_putc -- actual work of writing to next filter.
8898 * NOTE: Small opt not using full GF_END since our input
8899 * pointers don't need adjusting.
8902 html_putc(FILTER_S
*f
, int ch
)
8904 GF_INIT(f
, f
->next
);
8905 GF_PUTC(f
->next
, ch
& 0xff);
8906 GF_IP_END(f
->next
); /* clean up next's input pointers */
8912 * Only current option is to turn on embedded data stripping for text
8913 * bound to a printer or composer.
8916 gf_html2plain_opt(char *base
,
8919 HANDLE_S
**handlesp
,
8924 int margin_l
, margin_r
;
8926 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
8928 op
->base
= cpystr(base
);
8929 margin_l
= (margin
) ? margin
[0] : 0;
8930 margin_r
= (margin
) ? margin
[1] : 0;
8931 op
->indent
= margin_l
;
8932 op
->columns
= columns
- (margin_l
+ margin_r
);
8933 op
->strip
= ((flags
& GFHP_STRIPPED
) == GFHP_STRIPPED
);
8934 op
->handlesp
= handlesp
;
8935 op
->handles_loc
= ((flags
& GFHP_LOCAL_HANDLES
) == GFHP_LOCAL_HANDLES
);
8936 op
->showserver
= ((flags
& GFHP_SHOW_SERVER
) == GFHP_SHOW_SERVER
);
8937 op
->warnrisk_f
= risk_f
;
8938 op
->no_relative_links
= ((flags
& GFHP_NO_RELATIVE
) == GFHP_NO_RELATIVE
);
8939 op
->related_content
= ((flags
& GFHP_RELATED_CONTENT
) == GFHP_RELATED_CONTENT
);
8940 op
->html
= ((flags
& GFHP_HTML
) == GFHP_HTML
);
8941 op
->html_imgs
= ((flags
& GFHP_HTML_IMAGES
) == GFHP_HTML_IMAGES
);
8942 op
->element_table
= html_element_table
;
8943 return((void *) op
);
8948 gf_html2plain_rss_opt(RSS_FEED_S
**feedp
, int flags
)
8952 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
8953 memset(op
, 0, sizeof(HTML_OPT_S
));
8955 op
->base
= cpystr("");
8956 op
->element_table
= rss_element_table
;
8957 *(op
->feedp
= feedp
) = NULL
;
8958 return((void *) op
);
8962 gf_html2plain_rss_free(RSS_FEED_S
**feedp
)
8964 if(feedp
&& *feedp
){
8966 fs_give((void **) &(*feedp
)->title
);
8969 fs_give((void **) &(*feedp
)->link
);
8971 if((*feedp
)->description
)
8972 fs_give((void **) &(*feedp
)->description
);
8974 if((*feedp
)->source
)
8975 fs_give((void **) &(*feedp
)->source
);
8978 fs_give((void **) &(*feedp
)->image
);
8980 gf_html2plain_rss_free_items(&((*feedp
)->items
));
8981 fs_give((void **) feedp
);
8986 gf_html2plain_rss_free_items(RSS_ITEM_S
**itemp
)
8988 if(itemp
&& *itemp
){
8990 fs_give((void **) &(*itemp
)->title
);
8993 fs_give((void **) &(*itemp
)->link
);
8995 if((*itemp
)->description
)
8996 fs_give((void **) &(*itemp
)->description
);
8998 if((*itemp
)->source
)
8999 fs_give((void **) &(*itemp
)->source
);
9001 gf_html2plain_rss_free_items(&(*itemp
)->next
);
9002 fs_give((void **) itemp
);
9007 /* END OF HTML-TO-PLAIN text filter */
9010 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9011 * from the text stream.
9014 #define MAX_ESC_LEN 5
9017 * the simple filter, removes unknown escape codes from the stream
9020 gf_escape_filter(FILTER_S
*f
, int flg
)
9023 GF_INIT(f
, f
->next
);
9026 register unsigned char c
;
9027 register int state
= f
->f1
;
9029 while(GF_GETC(f
, c
)){
9032 if(c
== '\033' || f
->n
== MAX_ESC_LEN
){
9033 f
->line
[f
->n
] = '\0';
9035 if(!match_escapes(f
->line
)){
9036 GF_PUTC(f
->next
, '^');
9037 GF_PUTC(f
->next
, '[');
9040 GF_PUTC(f
->next
, '\033');
9044 GF_PUTC(f
->next
, *p
++);
9049 state
= 0; /* fall thru */
9052 f
->line
[f
->n
++] = c
; /* collect */
9060 GF_PUTC(f
->next
, c
);
9066 else if(flg
== GF_EOD
){
9068 if(!match_escapes(f
->line
)){
9069 GF_PUTC(f
->next
, '^');
9070 GF_PUTC(f
->next
, '[');
9073 GF_PUTC(f
->next
, '\033');
9076 for(p
= f
->line
; f
->n
; f
->n
--, p
++)
9077 GF_PUTC(f
->next
, *p
);
9079 fs_give((void **)&(f
->line
)); /* free temp line buffer */
9080 (void) GF_FLUSH(f
->next
);
9081 (*f
->next
->f
)(f
->next
, GF_EOD
);
9083 else if(flg
== GF_RESET
){
9084 dprint((9, "-- gf_reset escape\n"));
9087 f
->linep
= f
->line
= (char *)fs_get((MAX_ESC_LEN
+ 1) * sizeof(char));
9094 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9095 * corresponding string representations (you know, ^blah and such)...
9099 * the simple filter transforms unknown control characters in the stream
9100 * into harmless strings.
9103 gf_control_filter(FILTER_S
*f
, int flg
)
9105 GF_INIT(f
, f
->next
);
9108 register unsigned char c
;
9109 register int filt_only_c0
;
9111 filt_only_c0
= f
->opt
? (*(int *) f
->opt
) : 0;
9113 while(GF_GETC(f
, c
)){
9115 if(((c
< 0x20 || c
== 0x7f)
9116 || (c
>= 0x80 && c
< 0xA0 && !filt_only_c0
))
9117 && !(ASCII_ISSPACE((unsigned char) c
)
9118 || c
== '\016' || c
== '\017' || c
== '\033')){
9119 GF_PUTC(f
->next
, c
>= 0x80 ? '~' : '^');
9120 GF_PUTC(f
->next
, (c
== 0x7f) ? '?' : (c
& 0x1f) + '@');
9123 GF_PUTC(f
->next
, c
);
9128 else if(flg
== GF_EOD
){
9129 (void) GF_FLUSH(f
->next
);
9130 (*f
->next
->f
)(f
->next
, GF_EOD
);
9136 * function called from the outside to set
9137 * control filter's option, which says to filter C0 control characters
9138 * but not C1 control chars. We don't call it at all if we don't want
9139 * to filter C0 chars either.
9142 gf_control_filter_opt(int *filt_only_c0
)
9144 return((void *) filt_only_c0
);
9149 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9150 * This prevents the possibility of embedding other tags.
9151 * We assume that this filter should only be used for something
9152 * that is eventually writing to a display, which has the special
9153 * knowledge of quoted TAG_EMBEDs.
9156 gf_tag_filter(FILTER_S
*f
, int flg
)
9158 GF_INIT(f
, f
->next
);
9161 register unsigned char c
;
9163 while(GF_GETC(f
, c
)){
9165 if((c
& 0xff) == (TAG_EMBED
& 0xff)){
9166 GF_PUTC(f
->next
, TAG_EMBED
);
9167 GF_PUTC(f
->next
, c
);
9170 GF_PUTC(f
->next
, c
);
9175 else if(flg
== GF_EOD
){
9176 (void) GF_FLUSH(f
->next
);
9177 (*f
->next
->f
)(f
->next
, GF_EOD
);
9183 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9184 * specified line width
9188 typedef struct wrap_col_s
{
9193 unsigned do_indent
:1;
9194 unsigned on_comma
:1;
9200 unsigned leave_flowed
:1;
9201 unsigned use_color
:1;
9202 unsigned hdr_color
:1;
9203 unsigned for_compose
:1;
9204 unsigned handle_soft_hyphen
:1;
9205 unsigned saw_soft_hyphen
:1;
9206 unsigned trailing_space
:1;
9207 unsigned char utf8buf
[7];
9208 unsigned char *utf8bufp
;
9229 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9230 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9231 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9232 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9233 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9234 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9235 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9236 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9237 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9238 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9239 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9240 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9241 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9242 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9243 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9244 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9245 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9246 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9247 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9248 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9249 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9250 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9251 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9252 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9253 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9254 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9255 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9256 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9257 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9258 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9259 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9260 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9261 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9262 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9263 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9264 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9265 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9266 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9267 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9268 #define WRAP_PUTC(F,C,W) { \
9269 if((F)->linep == WRAP_LASTC(F)){ \
9270 size_t offset = (F)->linep - (F)->line; \
9271 fs_resize((void **) &(F)->line, \
9272 (2 * offset) * sizeof(char)); \
9273 (F)->linep = &(F)->line[offset]; \
9274 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9276 *(F)->linep++ = (C); \
9280 #define WRAP_EMBED_PUTC(F,C) { \
9282 WRAP_PUTC((F), C, 0); \
9285 so_writec(C, WRAP_SPACES(F)); \
9288 #define WRAP_COLOR_UNSET(F) { \
9289 if(WRAP_COLOR_SET(F)){ \
9290 WRAP_COLOR(F)->fg[0] = '\0'; \
9295 * wrap_flush_embed flags
9297 #define WFE_NONE 0 /* Nothing special */
9298 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9301 int wrap_flush(FILTER_S
*, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9302 int wrap_flush_embed(FILTER_S
*, unsigned char **, unsigned char **,
9303 unsigned char **, unsigned char **);
9304 int wrap_flush_s(FILTER_S
*,char *, int, int, unsigned char **, unsigned char **,
9305 unsigned char **, unsigned char **, int);
9306 int wrap_eol(FILTER_S
*, int, unsigned char **, unsigned char **,
9307 unsigned char **, unsigned char **);
9308 int wrap_bol(FILTER_S
*, int, int, unsigned char **,
9309 unsigned char **, unsigned char **, unsigned char **);
9310 int wrap_quote_insert(FILTER_S
*, unsigned char **, unsigned char **,
9311 unsigned char **, unsigned char **);
9314 * the no longer simple filter, breaks lines at end of white space nearest
9315 * to global "gf_wrap_width" in length
9316 * It also supports margins, indents (inverse indenting, really) and
9317 * flowed text (ala RFC 3676)
9321 gf_wrap(FILTER_S
*f
, int flg
)
9324 GF_INIT(f
, f
->next
);
9328 * f->line buffer where next "word" being considered is stored
9329 * f->f2 width in screen cells of f->line stuff
9330 * f->n width in screen cells of the part of this line committed to next
9335 register unsigned char c
;
9336 register int state
= f
->f1
;
9337 int width
, full_character
;
9339 while(GF_GETC(f
, c
)){
9342 case CCR
: /* CRLF or CR in text ? */
9343 state
= BOL
; /* either way, handle start */
9347 if(f
->f2
== 0 && WRAP_SPC_LEN(f
) && WRAP_TRL_SPC(f
)){
9349 * whack trailing space char, but be aware
9350 * of embeds in space buffer. grok them just
9351 * in case they contain a 0x20 value
9354 char *sb
, *sbp
, *scp
= NULL
;
9357 for(sb
= sbp
= (char *)so_text(WRAP_SPACES(f
)); *sbp
; sbp
++){
9368 if(strlen(sbp
) >= x
)
9375 if(strlen(sbp
) >= RGBLEN
)
9376 sbp
+= (RGBLEN
- 1);
9391 /* replace space buf without trailing space char */
9393 STORE_S
*ns
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
9397 WRAP_TRL_SPC(f
) = 0;
9402 so_give(&WRAP_SPACES(f
));
9403 WRAP_SPACES(f
) = ns
;
9407 else{ /* fixed line */
9409 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9410 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9413 * When we get to a real end of line, we don't need to
9414 * remember what the special color was anymore because
9415 * we aren't going to be changing back to it. We unset it
9416 * so that we don't keep resetting the color to normal.
9418 WRAP_COLOR_UNSET(f
);
9421 if(c
== '\012'){ /* get c following LF */
9424 /* else c is first char of new line, fall thru */
9427 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9428 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9429 WRAP_COLOR_UNSET(f
); /* see note above */
9433 /* else fall thru to deal with beginning of line */
9439 WRAP_FL_QC(f
) = 1; /* init it */
9440 state
= FL_QLEV
; /* go collect it */
9443 /* if EMBEDed, process it and return here */
9444 if(c
== (unsigned char) TAG_EMBED
){
9445 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9446 WRAP_STATE(f
) = state
;
9451 /* quote level change implies new paragraph */
9454 if(WRAP_HARD(f
) == 0){
9456 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9457 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9458 WRAP_COLOR_UNSET(f
); /* see note above */
9463 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
,
9464 &eob
); /* write quoting prefix */
9469 case '\015' : /* a blank line? */
9470 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9471 state
= CCR
; /* go collect it */
9474 case ' ' : /* space stuffed */
9475 state
= FL_STF
; /* just eat it */
9478 case '-' : /* possible sig-dash */
9479 WRAP_FL_SIG(f
) = 1; /* init state */
9480 state
= FL_SIG
; /* go collect it */
9484 state
= DFL
; /* go back to normal */
9485 goto case_dfl
; /* handle c like DFL case */
9491 if(WRAP_COMMA(f
) && c
== TAB
){
9492 wrap_bol(f
, 1, 0, &ip
, &eib
, &op
,
9493 &eob
); /* convert to normal indent */
9497 wrap_bol(f
,0,0, &ip
, &eib
, &op
, &eob
);
9498 goto case_dfl
; /* handle c like DFL case */
9504 if(c
== '>'){ /* another level */
9508 /* if EMBEDed, process it and return here */
9509 if(c
== (unsigned char) TAG_EMBED
){
9510 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9511 WRAP_STATE(f
) = state
;
9516 /* quote level change signals new paragraph */
9517 if(WRAP_FL_QC(f
) != WRAP_FL_QD(f
)){
9518 WRAP_FL_QD(f
) = WRAP_FL_QC(f
);
9519 if(WRAP_HARD(f
) == 0){ /* add hard newline */
9520 WRAP_HARD(f
) = 1; /* hard newline */
9521 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9522 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9523 WRAP_COLOR_UNSET(f
); /* see note above */
9528 wrap_bol(f
,0,1, &ip
, &eib
, &op
, &eob
);
9533 case '\015' : /* a blank line? */
9534 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9535 state
= CCR
; /* go collect it */
9538 case ' ' : /* space-stuffed! */
9539 state
= FL_STF
; /* just eat it */
9542 case '-' : /* sig dash? */
9547 default : /* something else */
9549 goto case_dfl
; /* handle c like DFL */
9555 case FL_STF
: /* space stuffed */
9557 case '\015' : /* a blank line? */
9558 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9559 state
= CCR
; /* go collect it */
9562 case (unsigned char) TAG_EMBED
: /* process TAG data */
9563 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9564 WRAP_STATE(f
) = state
; /* and return */
9568 case '-' : /* sig dash? */
9574 default : /* something else */
9576 goto case_dfl
; /* handle c like DFL */
9581 case FL_SIG
: /* sig-dash collector */
9582 switch (WRAP_FL_SIG(f
)){ /* possible sig-dash? */
9584 if(c
!= '-'){ /* not a sigdash */
9585 if((f
->n
+ WRAP_SPC_LEN(f
) + 1) > WRAP_COL(f
)){
9586 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9587 &eob
); /* note any embedded*/
9588 wrap_eol(f
, 1, &ip
, &eib
,
9589 &op
, &eob
); /* plunk down newline */
9590 wrap_bol(f
, 1, 1, &ip
, &eib
,
9591 &op
, &eob
); /* write any prefix */
9594 WRAP_PUTC(f
,'-', 1); /* write what we got */
9601 /* don't put anything yet until we know to wrap or not */
9606 if(c
!= ' '){ /* not a sigdash */
9607 WRAP_PUTC(f
, '-', 1);
9608 if((f
->n
+ WRAP_SPC_LEN(f
) + 2) > WRAP_COL(f
)){
9609 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9610 &eob
); /* note any embedded*/
9611 wrap_eol(f
, 1, &ip
, &eib
,
9612 &op
, &eob
); /* plunk down newline */
9613 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
9614 &eob
); /* write any prefix */
9617 WRAP_PUTC(f
,'-', 1); /* write what we got */
9624 /* don't put anything yet until we know to wrap or not */
9629 if(c
== '\015'){ /* success! */
9630 /* known sigdash, newline if soft nl */
9631 if(WRAP_SPC_LEN(f
)){
9632 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9633 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9634 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9644 WRAP_FL_SIG(f
) = 4; /* possible success */
9649 case (unsigned char) TAG_EMBED
:
9651 * At this point we're almost 100% sure that we've got
9652 * a sigdash. Putc it (adding newline if previous
9653 * was a soft nl) so we get it the right color
9654 * before we store this new embedded stuff
9656 if(WRAP_SPC_LEN(f
)){
9657 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9658 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9659 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9668 case '\015' : /* success! */
9670 * We shouldn't get here, but in case we do, we have
9671 * not yet put the sigdash
9673 if(WRAP_SPC_LEN(f
)){
9674 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9675 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9676 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9685 default : /* that's no sigdash! */
9686 /* write what we got but didn't put yet */
9687 WRAP_PUTC(f
,'-', 1);
9688 WRAP_PUTC(f
,'-', 1);
9689 WRAP_PUTC(f
,' ', 1);
9692 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9693 WRAP_SPC_LEN(f
) = 1;
9694 state
= DFL
; /* set normal state */
9695 goto case_dfl
; /* and go do "c" */
9701 WRAP_STATE(f
) = FL_SIG
; /* come back here */
9702 WRAP_FL_SIG(f
) = 6; /* and seek EOL */
9703 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9704 state
= TAG
; /* process embed */
9709 * at this point we've already putc the sigdash in case 4
9712 case (unsigned char) TAG_EMBED
:
9716 case '\015' : /* success! */
9720 default : /* that's no sigdash! */
9722 * probably never reached (fake sigdash with embedded
9723 * stuff) but if this did get reached, then we
9724 * might have accidentally disobeyed a soft nl
9727 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9728 WRAP_SPC_LEN(f
) = 1;
9729 state
= DFL
; /* set normal state */
9730 goto case_dfl
; /* and go do "c" */
9737 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9740 state
= DFL
; /* set normal state */
9741 goto case_dfl
; /* and go process "c" */
9749 * This was just if(WRAP_SPEC(f, c)) before the change to add
9750 * the == 0 test. This isn't quite right, either. We should really
9751 * be looking for special characters in the UCS characters, not
9752 * in the incoming stream of UTF-8. It is not right to
9753 * call this on bytes that are in the middle of a UTF-8 character,
9754 * hence the == 0 test which restricts it to the first byte
9755 * of a character. This isn't right, either, but it's closer.
9756 * Also change the definition of WRAP_SPEC so that isspace only
9757 * matches ascii characters, which will never be in the middle
9758 * of a UTF-8 multi-byte character.
9760 if((WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 0 && WRAP_SPEC(f
, c
)){
9761 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
9767 if(f
->f2
){ /* any non-lwsp to flush? */
9769 /* remember our second best break point */
9770 WRAP_PB_OFF(f
) = f
->linep
- f
->line
;
9771 WRAP_PB_LEN(f
) = f
->f2
;
9775 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9778 switch(c
){ /* remember separator */
9781 WRAP_TRL_SPC(f
) = 1;
9782 so_writec(' ',WRAP_SPACES(f
));
9787 int i
= (int) f
->n
+ WRAP_SPC_LEN(f
);
9793 so_writec(TAB
,WRAP_SPACES(f
));
9794 WRAP_TRL_SPC(f
) = 0;
9799 default : /* some control char? */
9800 WRAP_SPC_LEN(f
) += 2;
9801 WRAP_TRL_SPC(f
) = 0;
9808 WRAP_QUOTED(f
) = !WRAP_QUOTED(f
);
9811 case '\015' : /* already has newline? */
9815 case '\012' : /* bare LF in text? */
9816 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* they must've */
9817 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
); /* meant */
9818 wrap_bol(f
,1,1, &ip
, &eib
, &op
, &eob
); /* newline... */
9821 case (unsigned char) TAG_EMBED
:
9822 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9823 WRAP_STATE(f
) = state
;
9828 if(!WRAP_QUOTED(f
)){
9829 /* handle this special case in general code below */
9830 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_MAX_COL(f
)
9831 && WRAP_ALLWSP(f
) && WRAP_PB_OFF(f
))
9834 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_COL(f
)){
9835 if(WRAP_ALLWSP(f
)) /* if anything visible */
9836 wrap_flush(f
, &ip
, &eib
, &op
,
9837 &eob
); /* ... blat buf'd chars */
9839 wrap_eol(f
, 1, &ip
, &eib
, &op
,
9840 &eob
); /* plunk down newline */
9841 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
9842 &eob
); /* write any prefix */
9845 WRAP_PUTC(f
, ',', 1); /* put out comma */
9846 wrap_flush(f
, &ip
, &eib
, &op
,
9847 &eob
); /* write buf'd chars */
9854 else if(WRAP_HANDLE_SOFT_HYPHEN(f
)
9855 && (WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 1
9856 && WRAP_UTF8BUF(f
, 0) == 0xC2 && c
== 0xAD){
9858 * This is a soft hyphen. If there is enough space for
9859 * a real hyphen to fit on the line here then we can
9860 * flush everything up to before the soft hyphen,
9861 * and simply remember that we saw a soft hyphen.
9862 * If it turns out that we can't fit the next piece in
9863 * then wrap_eol will append a real hyphen to the line.
9864 * If we can fit another piece in it will be because we've
9865 * reached the next break point. At that point we'll flush
9866 * everything but won't include the unneeded hyphen. We erase
9867 * the fact that we saw this soft hyphen because it have
9868 * become irrelevant.
9870 * If the hyphen is the character that puts us over the edge
9871 * we go through the else case.
9874 /* erase this soft hyphen character from buffer */
9875 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
9877 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
9878 if(f
->f2
) /* any non-lwsp to flush? */
9879 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9881 /* remember that we saw the soft hyphen */
9882 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
9886 * Everything up to the hyphen fits, otherwise it
9887 * would have already been flushed the last time
9888 * through the loop. But the hyphen won't fit. So
9889 * we need to go back to the last line break and
9890 * break there instead. Then start a new line with
9891 * the buffered up characters and the soft hyphen.
9893 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
9894 wrap_eol(f
, 1, &ip
, &eib
, &op
,
9895 &eob
); /* plunk down newline */
9896 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
9897 &eob
); /* write any prefix */
9900 * Now we're in the same situation as we would have
9901 * been above except we're on a new line. Try to
9902 * flush out the characters seen up to the hyphen.
9904 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
9905 if(f
->f2
) /* any non-lwsp to flush? */
9906 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9908 /* remember that we saw the soft hyphen */
9909 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
9912 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
9921 unsigned char *inputp
;
9922 unsigned long remaining_octets
;
9925 if(WRAP_UTF8BUFP(f
) < &WRAP_UTF8BUF(f
, 0) + 6){ /* always true */
9927 *WRAP_UTF8BUFP(f
)++ = c
;
9928 remaining_octets
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
9929 if(remaining_octets
== 1 && isascii(WRAP_UTF8BUF(f
, 0))){
9939 else if(c
< 0x80 && iscntrl((unsigned char) c
))
9945 inputp
= &WRAP_UTF8BUF(f
, 0);
9946 ucs
= (UCS
) utf8_get(&inputp
, &remaining_octets
);
9948 case U8G_ENDSTRG
: /* incomplete character, wait */
9949 case U8G_ENDSTRI
: /* incomplete character, wait */
9954 if(ucs
& U8G_ERROR
|| ucs
== UBOGON
){
9956 * None of these cases is supposed to happen. If it
9957 * does happen then the input stream isn't UTF-8
9958 * so something is wrong. Writechar will treat
9959 * each octet in the input buffer as a separate
9960 * error character and print a '?' for each,
9961 * so the width will be the number of octets.
9963 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
9967 /* got a character */
9968 width
= wcellwidth(ucs
);
9973 * This happens when we have a UTF-8 character that
9974 * we aren't able to print in our locale. For example,
9975 * if the locale is setup with the terminal
9976 * expecting ISO-8859-1 characters then there are
9977 * lots of UTF-8 characters that can't be printed.
9978 * Print a '?' instead.
9990 * This cannot happen because an error would have
9991 * happened at least by character #6. So if we get
9992 * here there is a bug in utf8_get().
9994 if(WRAP_UTF8BUFP(f
) == &WRAP_UTF8BUF(f
, 0) + 6){
9995 *WRAP_UTF8BUFP(f
)++ = c
;
9999 * We could possibly do some more sophisticated
10000 * resynchronization here, but we aren't doing
10001 * anything in Writechar so it wouldn't match up
10002 * with that anyway. Just figure each character will
10003 * end up being printed as a ? character.
10005 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10010 if(WRAP_ALLWSP(f
)){
10012 * Nothing is visible yet but the first word may be too long
10013 * all by itself. We need to break early.
10015 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
> WRAP_MAX_COL(f
)){
10017 * A little reaching behind the curtain here.
10018 * if there's at least a preferable break point, use
10019 * it and stuff what's left back into the wrap buffer.
10020 * The "nwsp" latch is used to skip leading whitespace
10021 * The second half of the test prevents us from wrapping
10022 * at the preferred break point in the case that it
10023 * is so early in the line that it doesn't help.
10024 * That is, the width of the indent is even more than
10025 * the width of the first part before the preferred
10026 * break point. An example would be breaking after
10027 * "To:" when the indent is 4 which is > 3.
10029 if(WRAP_PB_OFF(f
) && WRAP_PB_LEN(f
) >= WRAP_INDENT(f
)){
10030 char *p1
= f
->line
+ WRAP_PB_OFF(f
);
10031 char *p2
= f
->linep
;
10033 int nwsp
= 0, left_after_wrap
;
10035 left_after_wrap
= f
->f2
- WRAP_PB_LEN(f
);
10037 f
->f2
= WRAP_PB_LEN(f
);
10040 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* flush shortened buf */
10042 /* put back rest of characters */
10045 if(!(c2
== ' ' || c2
== '\t') || nwsp
){
10046 WRAP_PUTC(f
, c2
, 0);
10050 left_after_wrap
--; /* wrong if a tab! */
10053 f
->f2
= MAX(left_after_wrap
, 0);
10055 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10056 &eob
); /* plunk down newline */
10057 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10058 &eob
); /* write any prefix */
10062 * If we do the less preferable break point at
10063 * the space we don't want to lose the fact that
10064 * we might be able to break at this comma for
10067 if(full_character
&& c
== ','){
10068 WRAP_PUTC(f
, c
, 1);
10069 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10070 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10074 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10076 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10077 &eob
); /* plunk down newline */
10078 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10079 &eob
); /* write any prefix */
10083 else if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
) > WRAP_COL(f
)){
10084 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
10085 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10086 &eob
); /* plunk down newline */
10087 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10088 &eob
); /* write any prefix */
10092 * Commit entire multibyte UTF-8 character at once
10093 * instead of writing partial characters into the
10096 if(full_character
){
10099 for(q
= &WRAP_UTF8BUF(f
, 0); q
< WRAP_UTF8BUFP(f
); q
++){
10100 WRAP_PUTC(f
, *q
, width
);
10104 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10111 WRAP_EMBED_PUTC(f
, c
);
10114 WRAP_EMBED(f
) = -1;
10120 WRAP_EMBED(f
) = RGBLEN
;
10125 state
= WRAP_STATE(f
);
10132 WRAP_EMBED_PUTC(f
, c
);
10139 WRAP_PUTC(f
, c
, 0);
10142 so_writec(c
, WRAP_SPACES(f
));
10144 if(!(WRAP_EMBED(f
) -= 1)){
10145 state
= WRAP_STATE(f
);
10153 GF_END(f
, f
->next
);
10155 else if(flg
== GF_EOD
){
10156 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10158 free_color_pair(&WRAP_COLOR(f
));
10160 fs_give((void **) &f
->line
); /* free temp line buffer */
10161 so_give(&WRAP_SPACES(f
));
10162 fs_give((void **) &f
->opt
); /* free wrap widths struct */
10163 (void) GF_FLUSH(f
->next
);
10164 (*f
->next
->f
)(f
->next
, GF_EOD
);
10166 else if(flg
== GF_RESET
){
10167 dprint((9, "-- gf_reset wrap\n"));
10169 f
->n
= 0L; /* displayed length of line so far */
10170 f
->f2
= 0; /* displayed length of buffered chars */
10171 WRAP_HARD(f
) = 1; /* starting at beginning of line */
10172 if(! (WRAP_S
*) f
->opt
)
10173 f
->opt
= gf_wrap_filter_opt(75, 80, NULL
, 0, 0);
10175 while(WRAP_INDENT(f
) >= WRAP_MAX_COL(f
))
10176 WRAP_INDENT(f
) /= 2;
10178 f
->line
= (char *) fs_get(WRAP_MAX_COL(f
) * sizeof(char));
10179 f
->linep
= f
->line
;
10180 WRAP_LASTC(f
) = &f
->line
[WRAP_MAX_COL(f
) - 1];
10182 for(i
= 0; i
< 256; i
++)
10183 ((WRAP_S
*) f
->opt
)->special
[i
] = ((i
== '\"' && WRAP_COMMA(f
))
10186 || (i
== (unsigned char) TAG_EMBED
10188 || (i
== ',' && WRAP_COMMA(f
)
10189 && !WRAP_QUOTED(f
))
10190 || ASCII_ISSPACE(i
));
10191 WRAP_SPACES(f
) = so_get(CharStar
, NULL
, EDIT_ACCESS
);
10192 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10197 wrap_flush(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10198 unsigned char **opp
, unsigned char **eobp
)
10203 s
= (char *)so_text(WRAP_SPACES(f
));
10204 n
= so_tell(WRAP_SPACES(f
));
10205 so_seek(WRAP_SPACES(f
), 0L, 0);
10206 wrap_flush_s(f
, s
, n
, WRAP_SPC_LEN(f
), ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10207 so_truncate(WRAP_SPACES(f
), 0L);
10208 WRAP_SPC_LEN(f
) = 0;
10209 WRAP_TRL_SPC(f
) = 0;
10211 n
= f
->linep
- f
->line
;
10212 wrap_flush_s(f
, s
, n
, f
->f2
, ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10214 f
->linep
= f
->line
;
10215 WRAP_PB_OFF(f
) = 0;
10216 WRAP_PB_LEN(f
) = 0;
10222 wrap_flush_embed(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
)
10226 s
= (char *)so_text(WRAP_SPACES(f
));
10227 n
= so_tell(WRAP_SPACES(f
));
10228 so_seek(WRAP_SPACES(f
), 0L, 0);
10229 wrap_flush_s(f
, s
, n
, 0, ipp
, eibp
, opp
, eobp
, WFE_CNT_HANDLE
);
10230 so_truncate(WRAP_SPACES(f
), 0L);
10231 WRAP_SPC_LEN(f
) = 0;
10232 WRAP_TRL_SPC(f
) = 0;
10238 wrap_flush_s(FILTER_S
*f
, char *s
, int n
, int w
, unsigned char **ipp
,
10239 unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
, int flags
)
10243 for(; n
> 0; n
--,s
++){
10244 if(*s
== TAG_EMBED
){
10248 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10249 GF_PUTC_GLO(f
->next
,TAG_BOLDON
);
10253 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10254 GF_PUTC_GLO(f
->next
,TAG_BOLDOFF
);
10258 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10259 GF_PUTC_GLO(f
->next
,TAG_ULINEON
);
10262 case TAG_ULINEOFF
:
10263 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10264 GF_PUTC_GLO(f
->next
,TAG_ULINEOFF
);
10268 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10269 GF_PUTC_GLO(f
->next
,TAG_INVOFF
);
10270 WRAP_ANCHOR(f
) = 0;
10273 if((flags
& WFE_CNT_HANDLE
) == 0)
10274 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10279 if((flags
& WFE_CNT_HANDLE
) == 0)
10280 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10285 if((flags
& WFE_CNT_HANDLE
) == 0)
10286 GF_PUTC_GLO(f
->next
, i
);
10288 WRAP_ANCHOR(f
) = 0;
10290 WRAP_ANCHOR(f
) = (WRAP_ANCHOR(f
) * 10) + (*++s
-'0');
10292 if((flags
& WFE_CNT_HANDLE
) == 0)
10293 GF_PUTC_GLO(f
->next
,*s
);
10300 if(pico_usingcolor() && n
>= RGBLEN
){
10302 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10303 GF_PUTC_GLO(f
->next
,TAG_FGCOLOR
);
10305 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10306 strncpy(WRAP_COLOR(f
)->fg
, s
+1, RGBLEN
);
10307 WRAP_COLOR(f
)->fg
[RGBLEN
]='\0';
10311 GF_PUTC_GLO(f
->next
,
10316 if(pico_usingcolor() && n
>= RGBLEN
){
10318 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10319 GF_PUTC_GLO(f
->next
,TAG_BGCOLOR
);
10321 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10322 strncpy(WRAP_COLOR(f
)->bg
, s
+1, RGBLEN
);
10323 WRAP_COLOR(f
)->bg
[RGBLEN
]='\0';
10327 GF_PUTC_GLO(f
->next
,
10338 if(f
->n
<= WRAP_MAX_COL(f
)){
10339 GF_PUTC_GLO(f
->next
, (*s
) & 0xff);
10342 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s
) & 0xff));
10345 WRAP_ALLWSP(f
) = 0;
10353 wrap_eol(FILTER_S
*f
, int c
, unsigned char **ipp
, unsigned char **eibp
,
10354 unsigned char **opp
, unsigned char **eobp
)
10356 if(WRAP_SAW_SOFT_HYPHEN(f
)){
10357 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
10358 GF_PUTC_GLO(f
->next
, '-'); /* real hyphen */
10361 if(c
&& WRAP_LV_FLD(f
))
10362 GF_PUTC_GLO(f
->next
, ' ');
10365 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10366 GF_PUTC_GLO(f
->next
, TAG_BOLDOFF
);
10370 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10371 GF_PUTC_GLO(f
->next
, TAG_ULINEOFF
);
10374 if(WRAP_INVERSE(f
) || WRAP_ANCHOR(f
)){
10375 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10376 GF_PUTC_GLO(f
->next
, TAG_INVOFF
);
10379 if(WRAP_COLOR_SET(f
)){
10382 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10383 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10384 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
), sizeof(cb
));
10385 cb
[sizeof(cb
)-1] = '\0';
10388 GF_PUTC_GLO(f
->next
, *p
);
10389 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10390 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10391 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
), sizeof(cb
));
10392 cb
[sizeof(cb
)-1] = '\0';
10395 GF_PUTC_GLO(f
->next
, *p
);
10398 GF_PUTC_GLO(f
->next
, '\015');
10399 GF_PUTC_GLO(f
->next
, '\012');
10401 so_truncate(WRAP_SPACES(f
), 0L);
10402 WRAP_SPC_LEN(f
) = 0;
10403 WRAP_TRL_SPC(f
) = 0;
10409 wrap_bol(FILTER_S
*f
, int ivar
, int q
, unsigned char **ipp
, unsigned char **eibp
,
10410 unsigned char **opp
, unsigned char **eobp
)
10412 int n
= WRAP_MARG_L(f
) + (ivar
? WRAP_INDENT(f
) : 0);
10414 if(WRAP_HDR_CLR(f
)){
10416 char cbuf
[RGBLEN
+1];
10419 if((k
= WRAP_MARG_L(f
)) > 0)
10423 GF_PUTC_GLO(f
->next
, ' ');
10426 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10427 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10429 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_FORE_COLOR
),
10431 cbuf
[sizeof(cbuf
)-1] = '\0';
10434 GF_PUTC_GLO(f
->next
, *p
);
10435 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10436 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10438 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_BACK_COLOR
),
10440 cbuf
[sizeof(cbuf
)-1] = '\0';
10443 GF_PUTC_GLO(f
->next
, *p
);
10448 GF_PUTC_GLO(f
->next
, ' ');
10451 WRAP_ALLWSP(f
) = 1;
10454 wrap_quote_insert(f
, ipp
, eibp
, opp
, eobp
);
10457 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10458 GF_PUTC_GLO(f
->next
, TAG_BOLDON
);
10461 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10462 GF_PUTC_GLO(f
->next
, TAG_ULINEON
);
10464 if(WRAP_INVERSE(f
)){
10465 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10466 GF_PUTC_GLO(f
->next
, TAG_INVON
);
10468 if(WRAP_COLOR_SET(f
)){
10470 if(WRAP_COLOR(f
)->fg
[0]){
10472 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10473 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10474 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->fg
), sizeof(cb
));
10475 cb
[sizeof(cb
)-1] = '\0';
10478 GF_PUTC_GLO(f
->next
, *p
);
10480 if(WRAP_COLOR(f
)->bg
[0]){
10482 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10483 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10484 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->bg
), sizeof(cb
));
10485 cb
[sizeof(cb
)-1] = '\0';
10488 GF_PUTC_GLO(f
->next
, *p
);
10491 if(WRAP_ANCHOR(f
)){
10492 char buf
[64]; int i
;
10493 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10494 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10495 snprintf(buf
, sizeof(buf
), "%d", WRAP_ANCHOR(f
));
10496 GF_PUTC_GLO(f
->next
, (int) strlen(buf
));
10497 for(i
= 0; buf
[i
]; i
++)
10498 GF_PUTC_GLO(f
->next
, buf
[i
]);
10505 wrap_quote_insert(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10506 unsigned char **opp
, unsigned char **eobp
)
10509 COLOR_PAIR
*col
= NULL
;
10510 char *prefix
= NULL
, *last_prefix
= NULL
;
10512 if(ps_global
->VAR_QUOTE_REPLACE_STRING
){
10513 get_pair(ps_global
->VAR_QUOTE_REPLACE_STRING
, &prefix
, &last_prefix
, 0, 0);
10514 if(!prefix
&& last_prefix
){
10515 prefix
= last_prefix
;
10516 last_prefix
= NULL
;
10520 for(j
= 0; j
< WRAP_FL_QD(f
); j
++){
10521 if(WRAP_USE_CLR(f
)){
10523 && ps_global
->VAR_QUOTE1_FORE_COLOR
10524 && ps_global
->VAR_QUOTE1_BACK_COLOR
10525 && (col
= new_color_pair(ps_global
->VAR_QUOTE1_FORE_COLOR
,
10526 ps_global
->VAR_QUOTE1_BACK_COLOR
))
10527 && pico_is_good_colorpair(col
)){
10528 GF_COLOR_PUTC(f
, col
);
10530 else if((j
% 3) == 1
10531 && ps_global
->VAR_QUOTE2_FORE_COLOR
10532 && ps_global
->VAR_QUOTE2_BACK_COLOR
10533 && (col
= new_color_pair(ps_global
->VAR_QUOTE2_FORE_COLOR
,
10534 ps_global
->VAR_QUOTE2_BACK_COLOR
))
10535 && pico_is_good_colorpair(col
)){
10536 GF_COLOR_PUTC(f
, col
);
10538 else if((j
% 3) == 2
10539 && ps_global
->VAR_QUOTE3_FORE_COLOR
10540 && ps_global
->VAR_QUOTE3_BACK_COLOR
10541 && (col
= new_color_pair(ps_global
->VAR_QUOTE3_FORE_COLOR
,
10542 ps_global
->VAR_QUOTE3_BACK_COLOR
))
10543 && pico_is_good_colorpair(col
)){
10544 GF_COLOR_PUTC(f
, col
);
10547 free_color_pair(&col
);
10552 if(!WRAP_LV_FLD(f
)){
10553 if(!WRAP_FOR_CMPS(f
) && ps_global
->VAR_QUOTE_REPLACE_STRING
&& prefix
){
10554 for(i
= 0; prefix
[i
]; i
++)
10555 GF_PUTC_GLO(f
->next
, prefix
[i
]);
10556 f
->n
+= utf8_width(prefix
);
10558 else if(ps_global
->VAR_REPLY_STRING
10559 && (!strcmp(ps_global
->VAR_REPLY_STRING
, ">")
10560 || !strcmp(ps_global
->VAR_REPLY_STRING
, "\">\""))){
10561 GF_PUTC_GLO(f
->next
, '>');
10565 GF_PUTC_GLO(f
->next
, '>');
10566 GF_PUTC_GLO(f
->next
, ' ');
10571 GF_PUTC_GLO(f
->next
, '>');
10575 if(j
&& WRAP_LV_FLD(f
)){
10576 GF_PUTC_GLO(f
->next
, ' ');
10579 else if(j
&& last_prefix
){
10580 for(i
= 0; last_prefix
[i
]; i
++)
10581 GF_PUTC_GLO(f
->next
, last_prefix
[i
]);
10582 f
->n
+= utf8_width(last_prefix
);
10586 fs_give((void **)&prefix
);
10588 fs_give((void **)&last_prefix
);
10595 * function called from the outside to set
10596 * wrap filter's width option
10599 gf_wrap_filter_opt(int width
, int width_max
, int *margin
, int indent
, int flags
)
10603 /* NOTE: variables MUST be sanity checked before they get here */
10604 wrap
= (WRAP_S
*) fs_get(sizeof(WRAP_S
));
10605 memset(wrap
, 0, sizeof(WRAP_S
));
10606 wrap
->wrap_col
= width
;
10607 wrap
->wrap_max
= width_max
;
10608 wrap
->indent
= indent
;
10609 wrap
->margin_l
= (margin
) ? margin
[0] : 0;
10610 wrap
->margin_r
= (margin
) ? margin
[1] : 0;
10611 wrap
->tags
= (GFW_HANDLES
& flags
) == GFW_HANDLES
;
10612 wrap
->on_comma
= (GFW_ONCOMMA
& flags
) == GFW_ONCOMMA
;
10613 wrap
->flowed
= (GFW_FLOWED
& flags
) == GFW_FLOWED
;
10614 wrap
->leave_flowed
= (GFW_FLOW_RESULT
& flags
) == GFW_FLOW_RESULT
;
10615 wrap
->delsp
= (GFW_DELSP
& flags
) == GFW_DELSP
;
10616 wrap
->use_color
= (GFW_USECOLOR
& flags
) == GFW_USECOLOR
;
10617 wrap
->hdr_color
= (GFW_HDRCOLOR
& flags
) == GFW_HDRCOLOR
;
10618 wrap
->for_compose
= (GFW_FORCOMPOSE
& flags
) == GFW_FORCOMPOSE
;
10619 wrap
->handle_soft_hyphen
= (GFW_SOFTHYPHEN
& flags
) == GFW_SOFTHYPHEN
;
10621 return((void *) wrap
);
10626 gf_url_hilite_opt(URL_HILITE_S
*uh
, HANDLE_S
**handlesp
, int flags
)
10629 memset(uh
, 0, sizeof(URL_HILITE_S
));
10630 uh
->handlesp
= handlesp
;
10631 uh
->hdr_color
= (URH_HDRCOLOR
& flags
) == URH_HDRCOLOR
;
10634 return((void *) uh
);
10638 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10639 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10640 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10642 typedef struct preflow_s
{
10649 * This would normally be handled in gf_wrap. If there is a possibility
10650 * that a url we want to recognize is cut in half by a soft newline we
10651 * want to fix that up by putting the halves back together. We do that
10652 * by deleting the soft newline and putting it all in one line. It will
10653 * still get wrapped later in gf_wrap. It isn't pretty with all the
10654 * goto's, but whatta ya gonna do?
10657 gf_preflow(FILTER_S
*f
, int flg
)
10659 GF_INIT(f
, f
->next
);
10661 if(flg
== GF_DATA
){
10662 register unsigned char c
;
10663 register int state
= f
->f1
;
10664 register int pending
= f
->f2
;
10666 while(GF_GETC(f
, c
)){
10680 GF_PUTC(f
->next
, c
);
10694 GF_PUTC(f
->next
, '\012');
10709 GF_PUTC(f
->next
, ' ');
10725 GF_PUTC(f
->next
, ' ');
10726 GF_PUTC(f
->next
, '\012');
10736 if(c
== '>'){ /* count quote level */
10741 done_counting_quotes
:
10742 if(c
== ' '){ /* eat stuffed space */
10747 done_with_stuffed_space
:
10748 if(c
== '-'){ /* look for signature */
10756 if(PF_QD(f
) == PF_QC(f
) && PF_SIG(f
) < 4){
10757 /* delete pending */
10759 PF_QD(f
) = PF_QC(f
);
10761 /* suppress quotes, too */
10766 * This should have been a hard new line
10767 * instead so leave out the trailing space.
10769 GF_PUTC(f
->next
, '\015');
10770 GF_PUTC(f
->next
, '\012');
10772 PF_QD(f
) = PF_QC(f
);
10775 else if(pending
== 1){
10776 GF_PUTC(f
->next
, '\015');
10777 GF_PUTC(f
->next
, '\012');
10778 PF_QD(f
) = PF_QC(f
);
10781 PF_QD(f
) = PF_QC(f
);
10786 while(PF_QC(f
)-- > 0)
10787 GF_PUTC(f
->next
, '>');
10795 GF_PUTC(f
->next
, '-');
10799 GF_PUTC(f
->next
, '-');
10800 GF_PUTC(f
->next
, '-');
10805 GF_PUTC(f
->next
, '-');
10806 GF_PUTC(f
->next
, '-');
10807 GF_PUTC(f
->next
, ' ');
10812 goto default_case
; /* to handle c */
10817 case FL_QLEV
: /* count quote level */
10821 goto done_counting_quotes
;
10825 case FL_STF
: /* eat stuffed space */
10826 goto done_with_stuffed_space
;
10829 case FL_SIG
: /* deal with sig indicator */
10831 case 1: /* saw '-' */
10835 goto done_with_sig
;
10839 case 2: /* saw '--' */
10843 goto done_with_sig
;
10847 case 3: /* saw '-- ' */
10849 PF_SIG(f
) = 4; /* it really is a sig line */
10851 goto done_with_sig
;
10861 GF_END(f
, f
->next
);
10863 else if(flg
== GF_EOD
){
10864 fs_give((void **) &f
->opt
);
10865 (void) GF_FLUSH(f
->next
);
10866 (*f
->next
->f
)(f
->next
, GF_EOD
);
10868 else if(flg
== GF_RESET
){
10871 pf
= (PREFLOW_S
*) fs_get(sizeof(*pf
));
10872 memset(pf
, 0, sizeof(*pf
));
10873 f
->opt
= (void *) pf
;
10875 f
->f1
= BOL
; /* state */
10876 f
->f2
= 0; /* pending */
10877 PF_QD(f
) = 0; /* quote depth */
10878 PF_QC(f
) = 0; /* quote count */
10879 PF_SIG(f
) = 0; /* sig level */
10887 * LINE PREFIX FILTER - insert given text at beginning of each
10892 #define GF_PREFIX_WRITE(s) { \
10893 register char *p; \
10894 if((p = (s)) != NULL) \
10896 GF_PUTC(f->next, *p++); \
10901 * the simple filter, prepends each line with the requested prefix.
10902 * if prefix is null, does nothing, and as with all filters, assumes
10903 * NVT end of lines.
10906 gf_prefix(FILTER_S
*f
, int flg
)
10908 GF_INIT(f
, f
->next
);
10910 if(flg
== GF_DATA
){
10911 register unsigned char c
;
10912 register int state
= f
->f1
;
10913 register int first
= f
->f2
;
10915 while(GF_GETC(f
, c
)){
10917 if(first
){ /* write initial prefix!! */
10918 first
= 0; /* but just once */
10919 GF_PREFIX_WRITE((char *) f
->opt
);
10923 * State == 0 is the starting state and the usual state.
10924 * State == 1 means we saw a CR and haven't acted on it yet.
10925 * We are looking for a LF to get the CRLF end of line.
10926 * However, we also treat bare CR and bare LF as if they
10927 * were CRLF sequences. What else could it mean in text?
10928 * This filter is only used for text so that is probably
10929 * a reasonable interpretation of the bad input.
10931 if(c
== '\015'){ /* CR */
10932 if(state
){ /* Treat pending CR as endofline, */
10933 GF_PUTC(f
->next
, '\015'); /* and remain in saw-a-CR state. */
10934 GF_PUTC(f
->next
, '\012');
10935 GF_PREFIX_WRITE((char *) f
->opt
);
10941 else if(c
== '\012'){ /* LF */
10942 GF_PUTC(f
->next
, '\015'); /* Got either a CRLF or a bare LF, */
10943 GF_PUTC(f
->next
, '\012'); /* treat both as if a CRLF. */
10944 GF_PREFIX_WRITE((char *) f
->opt
);
10947 else{ /* any other character */
10949 GF_PUTC(f
->next
, '\015'); /* Treat pending CR as endofline. */
10950 GF_PUTC(f
->next
, '\012');
10951 GF_PREFIX_WRITE((char *) f
->opt
);
10955 GF_PUTC(f
->next
, c
);
10959 f
->f1
= state
; /* save state for next chunk of data */
10961 GF_END(f
, f
->next
);
10963 else if(flg
== GF_EOD
){
10964 (void) GF_FLUSH(f
->next
);
10965 (*f
->next
->f
)(f
->next
, GF_EOD
);
10967 else if(flg
== GF_RESET
){
10968 dprint((9, "-- gf_reset prefix\n"));
10970 f
->f2
= 1; /* nothing written yet */
10976 * function called from the outside to set
10977 * prefix filter's prefix string
10980 gf_prefix_opt(char *prefix
)
10982 return((void *) prefix
);
10987 * LINE TEST FILTER - accumulate lines and offer each to the provided
10991 typedef struct _linetest_s
{
10997 /* accumulator growth increment */
10998 #define LINE_TEST_BLOCK 1024
11000 #define GF_LINE_TEST_EOB(f) \
11001 ((f)->line + ((f)->f2 - 1))
11003 #define GF_LINE_TEST_ADD(f, c) \
11006 f->f2 += LINE_TEST_BLOCK; \
11007 fs_resize((void **)&f->line, \
11008 (size_t) f->f2 * sizeof(char)); \
11009 eobuf = GF_LINE_TEST_EOB(f); \
11010 p = eobuf - LINE_TEST_BLOCK; \
11015 #define GF_LINE_TEST_TEST(F, D) \
11018 register char *cp; \
11020 LT_INS_S *ins = NULL, *insp; \
11022 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11024 ((LINETEST_S *) (F)->opt)->local); \
11028 fs_give((void **) &(F)->line); \
11030 fs_give((void **) &(F)->opt); \
11031 gf_error(_("translation error")); \
11034 for(insp = ins, cp = (F)->line; cp < p; ){ \
11035 if(insp && cp == insp->where){ \
11036 if(insp->len > 0){ \
11037 for(l = 0; l < insp->len; l++){ \
11038 c = (unsigned char) insp->text[l]; \
11039 GF_PUTC((F)->next, c); \
11041 insp = insp->next; \
11043 } else if(insp->len < 0){ \
11045 insp = insp->next; \
11049 GF_PUTC((F)->next, *cp); \
11053 for(l = 0; l < insp->len; l++){ \
11054 c = (unsigned char) insp->text[l]; \
11055 GF_PUTC((F)->next, c); \
11057 insp = insp->next; \
11059 gf_line_test_free_ins(&ins); \
11066 * this simple filter accumulates characters until a newline, offers it
11067 * to the provided test function, and then passes it on. It assumes
11071 gf_line_test(FILTER_S
*f
, int flg
)
11073 register char *p
= f
->linep
;
11074 register char *eobuf
= GF_LINE_TEST_EOB(f
);
11075 GF_INIT(f
, f
->next
);
11077 if(flg
== GF_DATA
){
11078 register unsigned char c
;
11079 register int state
= f
->f1
;
11081 while(GF_GETC(f
, c
)){
11088 GF_LINE_TEST_TEST(f
, done
);
11092 if(done
== 2) /* skip this line! */
11095 GF_PUTC(f
->next
, '\015');
11096 GF_PUTC(f
->next
, '\012');
11098 * if the line tester returns TRUE, it's
11099 * telling us its seen enough and doesn't
11100 * want to see any more. Remove ourself
11101 * from the pipeline...
11104 if(gf_master
== f
){
11105 gf_master
= f
->next
;
11110 for(fprev
= gf_master
;
11111 fprev
&& fprev
->next
!= f
;
11112 fprev
= fprev
->next
)
11115 if(fprev
) /* wha??? */
11116 fprev
->next
= f
->next
;
11121 while(GF_GETC(f
, c
)) /* pass input */
11122 GF_PUTC(f
->next
, c
);
11124 (void) GF_FLUSH(f
->next
); /* and drain queue */
11125 fs_give((void **)&f
->line
);
11126 fs_give((void **)&f
); /* wax our data */
11132 else /* add CR to buffer */
11133 GF_LINE_TEST_ADD(f
, '\015');
11134 } /* fall thru to handle 'c' */
11136 if(c
== '\015') /* newline? */
11139 GF_LINE_TEST_ADD(f
, c
);
11143 GF_END(f
, f
->next
);
11145 else if(flg
== GF_EOD
){
11148 GF_LINE_TEST_TEST(f
, i
); /* examine remaining data */
11149 fs_give((void **) &f
->line
); /* free line buffer */
11150 fs_give((void **) &f
->opt
); /* free test struct */
11151 (void) GF_FLUSH(f
->next
);
11152 (*f
->next
->f
)(f
->next
, GF_EOD
);
11154 else if(flg
== GF_RESET
){
11155 dprint((9, "-- gf_reset line_test\n"));
11156 f
->f1
= 0; /* state */
11157 f
->n
= 0L; /* line number */
11158 f
->f2
= LINE_TEST_BLOCK
; /* size of alloc'd line */
11159 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
11167 * function called from the outside to operate on accumulated line.
11170 gf_line_test_opt(linetest_t test_f
, void *local
)
11174 ltp
= (LINETEST_S
*) fs_get(sizeof(LINETEST_S
));
11175 memset(ltp
, 0, sizeof(LINETEST_S
));
11177 ltp
->local
= local
;
11178 return((void *) ltp
);
11184 gf_line_test_new_ins(LT_INS_S
**ins
, char *p
, char *s
, int n
)
11186 *ins
= (LT_INS_S
*) fs_get(sizeof(LT_INS_S
));
11187 if(((*ins
)->len
= n
) > 0)
11188 strncpy((*ins
)->text
= (char *) fs_get(n
* sizeof(char)), s
, n
);
11190 (*ins
)->text
= NULL
;
11193 (*ins
)->next
= NULL
;
11194 return(&(*ins
)->next
);
11199 gf_line_test_free_ins(LT_INS_S
**ins
)
11203 gf_line_test_free_ins(&(*ins
)->next
);
11206 fs_give((void **) &(*ins
)->text
);
11208 fs_give((void **) ins
);
11214 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11215 * with editorial comment
11218 typedef struct _preped_s
{
11225 * gf_prepend_editorial - accumulate filtered text and prepend its
11226 * output with given text
11231 gf_prepend_editorial(FILTER_S
*f
, int flg
)
11233 GF_INIT(f
, f
->next
);
11235 if(flg
== GF_DATA
){
11236 register unsigned char c
;
11238 while(GF_GETC(f
, c
)){
11239 so_writec(c
, (STORE_S
*) f
->data
);
11242 GF_END(f
, f
->next
);
11244 else if(flg
== GF_EOD
){
11247 if(!((PREPED_S
*)(f
)->opt
)->f
|| (*((PREPED_S
*)(f
)->opt
)->f
)()){
11248 char *p
= ((PREPED_S
*)(f
)->opt
)->text
;
11250 for( ; p
&& *p
; p
++)
11251 GF_PUTC(f
->next
, *p
);
11254 so_seek((STORE_S
*) f
->data
, 0L, 0);
11255 while(so_readc(&c
, (STORE_S
*) f
->data
)){
11256 GF_PUTC(f
->next
, c
);
11259 so_give((STORE_S
**) &f
->data
);
11260 fs_give((void **) &f
->opt
);
11261 (void) GF_FLUSH(f
->next
);
11262 (*f
->next
->f
)(f
->next
, GF_EOD
);
11264 else if(flg
== GF_RESET
){
11265 dprint((9, "-- gf_reset line_test\n"));
11266 f
->data
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
11272 * function called from the outside to setup prepending editorial
11276 gf_prepend_editorial_opt(prepedtest_t test_f
, char *text
)
11280 pep
= (PREPED_S
*) fs_get(sizeof(PREPED_S
));
11281 memset(pep
, 0, sizeof(PREPED_S
));
11284 return((void *) pep
);
11289 * Network virtual terminal to local newline convention filter
11292 gf_nvtnl_local(FILTER_S
*f
, int flg
)
11294 GF_INIT(f
, f
->next
);
11296 if(flg
== GF_DATA
){
11297 register unsigned char c
;
11298 register int state
= f
->f1
;
11300 while(GF_GETC(f
, c
)){
11304 GF_PUTC(f
->next
, '\012');
11308 GF_PUTC(f
->next
, '\015');
11309 /* fall thru to deal with 'c' */
11315 GF_PUTC(f
->next
, c
);
11319 GF_END(f
, f
->next
);
11321 else if(flg
== GF_EOD
){
11322 (void) GF_FLUSH(f
->next
);
11323 (*f
->next
->f
)(f
->next
, GF_EOD
);
11325 else if(flg
== GF_RESET
){
11326 dprint((9, "-- gf_reset nvtnl_local\n"));
11333 * local to network newline convention filter
11336 gf_local_nvtnl(FILTER_S
*f
, int flg
)
11338 GF_INIT(f
, f
->next
);
11340 if(flg
== GF_DATA
){
11341 register unsigned char c
;
11343 while(GF_GETC(f
, c
)){
11345 GF_PUTC(f
->next
, '\015');
11346 GF_PUTC(f
->next
, '\012');
11348 else if(c
!= '\015') /* do not copy isolated \015 into source */
11349 GF_PUTC(f
->next
, c
);
11352 GF_END(f
, f
->next
);
11354 else if(flg
== GF_EOD
){
11355 (void) GF_FLUSH(f
->next
);
11356 (*f
->next
->f
)(f
->next
, GF_EOD
);
11359 dprint((9, "-- gf_reset local_nvtnl\n"));