1 #if !defined(lint) && !defined(DOS)
2 static char rcsid
[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
6 * ========================================================================
7 * Copyright 2013-2017 Eduardo Chappa
8 * Copyright 2006-2008 University of Washington
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
39 reasonable error handling
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
60 #include "../pico/osdep/mswin.h"
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
73 int gf_fwritec_locale(int);
75 int gf_freadc_windows(unsigned char *);
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S
*, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S
*, int);
97 * System specific options
100 #define CRLF_NEWLINES
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name
)(char *);
108 char *(*pith_opt_pretty_feature_name
)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S
*gf_master
= NULL
;
115 static gf_io_t last_filter
;
116 static char *gf_error_string
;
117 static long gf_byte_count
;
118 static jmp_buf gf_error_state
;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
149 #define STOP_DECODING 18
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
234 static struct gf_io_struct
{
243 #define GF_SO_STACK struct gf_so_stack
247 } *gf_so_in
, *gf_so_out
;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc
)
261 return(pc
== gf_so_writec
&& gf_so_out
&& gf_so_out
->so
&&
262 gf_so_out
->so
->src
== ExternalText
);
268 * setup to use and return a pointer to the generic
272 gf_set_readc(gf_io_t
*gc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
276 gf_in
.cb
.cbuf
[0] = '\0';
277 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
278 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
281 gf_in
.file
= (FILE *)txt
;
282 fseek(gf_in
.file
, 0L, 0);
284 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_windows
287 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_locale
291 else if(src
== PipeStar
){
292 gf_in
.pipe
= (PIPE_S
*)txt
;
294 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_preadc_locale
298 gf_in
.txtp
= (char *)txt
;
299 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_sreadc_locale
306 * setup to use and return a pointer to the generic
310 gf_set_writec(gf_io_t
*pc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
313 gf_out
.flags
= flags
;
314 gf_out
.cb
.cbuf
[0] = '\0';
315 gf_out
.cb
.cbufp
= gf_out
.cb
.cbuf
;
316 gf_out
.cb
.cbufend
= gf_out
.cb
.cbuf
;
319 gf_out
.file
= (FILE *)txt
;
323 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_fwritec_locale
327 else if(src
== PipeStar
){
328 gf_out
.pipe
= (PIPE_S
*)txt
;
329 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_pwritec_locale
333 gf_out
.txtp
= (char *)txt
;
334 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_swritec_locale
341 * setup to use and return a pointer to the generic
345 gf_set_so_readc(gf_io_t
*gc
, STORE_S
*so
)
347 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
357 gf_clear_so_readc(STORE_S
*so
)
361 if((sp
= gf_so_in
) != NULL
){
363 gf_so_in
= gf_so_in
->next
;
364 fs_give((void **) &sp
);
367 alpine_panic("Programmer botch: Can't unstack store readc");
370 alpine_panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
379 gf_set_so_writec(gf_io_t
*pc
, STORE_S
*so
)
381 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
384 sp
->next
= gf_so_out
;
391 gf_clear_so_writec(STORE_S
*so
)
395 if((sp
= gf_so_out
) != NULL
){
397 gf_so_out
= gf_so_out
->next
;
398 fs_give((void **) &sp
);
401 alpine_panic("Programmer botch: Can't unstack store writec");
404 alpine_panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
414 return(so_writec(c
, gf_so_out
->so
));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c
)
424 return(so_readc(c
, gf_so_in
->so
));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c
)
437 clearerr(gf_in
.file
);
438 rv
= fread(c
, sizeof(unsigned char), (size_t)1, gf_in
.file
);
439 } while(!rv
&& ferror(gf_in
.file
) && errno
== EINTR
);
446 gf_freadc_locale(unsigned char *c
)
448 return(generic_readc_locale(c
, gf_freadc_getchar
, (void *) gf_in
.file
, &gf_in
.cb
));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c
, void *extraarg
)
461 file
= (FILE *) extraarg
;
466 rv
= fread(c
, sizeof(unsigned char), (size_t)1, file
);
467 } while(!rv
&& ferror(file
) && errno
== EINTR
);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
481 unsigned char ch
= (unsigned char)c
;
485 rv
= fwrite(&ch
, sizeof(unsigned char), (size_t)1, gf_out
.file
);
486 while(!rv
&& ferror(gf_out
.file
) && errno
== EINTR
);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c
)
501 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
503 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
504 for(i
= 0; i
< outchars
; i
++)
505 if(gf_fwritec(obuf
[i
]) != 1){
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c
)
530 /* already got some from previous call? */
531 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
532 *c
= *gf_in
.cb
.cbufp
;
535 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
536 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
537 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
544 /* windows only so second arg is ignored */
545 ucs
= read_a_wide_char(gf_in
.file
, NULL
);
546 rv
= (ucs
== CCONV_EOF
) ? 0 : 1;
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in
.cb
.cbufend
= utf8_put(gf_in
.cb
.cbuf
, (unsigned long) ucs
);
555 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
556 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
557 *c
= *gf_in
.cb
.cbufp
;
559 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
560 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
561 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c
)
576 return(pipe_readc(c
, gf_in
.pipe
));
581 gf_preadc_locale(unsigned char *c
)
583 return(generic_readc_locale(c
, gf_preadc_getchar
, (void *) gf_in
.pipe
, &gf_in
.cb
));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c
, void *extraarg
)
595 pipe
= (PIPE_S
*) extraarg
;
597 return(pipe_readc(c
, pipe
));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
609 return(pipe_writec(c
, gf_out
.pipe
));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c
)
622 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
624 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
625 for(i
= 0; i
< outchars
; i
++)
626 if(gf_pwritec(obuf
[i
]) != 1){
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c
)
641 return((gf_in
.n
) ? *c
= *(gf_in
.txtp
)++, gf_in
.n
-- : 0);
646 gf_sreadc_locale(unsigned char *c
)
648 return(generic_readc_locale(c
, gf_sreadc_getchar
, NULL
, &gf_in
.cb
));
653 gf_sreadc_getchar(unsigned char *c
, void *extraarg
)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c
));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
671 return((gf_out
.n
) ? *(gf_out
.txtp
)++ = c
, gf_out
.n
-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c
)
684 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
686 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
687 for(i
= 0; i
< outchars
; i
++)
688 if(gf_swritec(obuf
[i
]) != 1){
699 * output the given string with the given function
702 gf_puts(register char *s
, gf_io_t pc
)
705 if(!(*pc
)((unsigned char)*s
++))
706 return(0); /* ERROR putting char ! */
713 * output the given string with the given function
716 gf_nputs(register char *s
, long int n
, gf_io_t pc
)
719 if(!(*pc
)((unsigned char)*s
++))
720 return(0); /* ERROR putting char ! */
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c
,
743 int (*get_a_char
)(unsigned char *, void *),
747 unsigned long octets_so_far
= 0, remaining_octets
;
748 unsigned char *inputp
;
751 unsigned char inputbuf
[20];
755 /* already got some from previous call? */
756 if(cb
->cbufend
> cb
->cbuf
){
760 if(cb
->cbufp
>= cb
->cbufend
){
761 cb
->cbufend
= cb
->cbuf
;
762 cb
->cbufp
= cb
->cbuf
;
768 memset(inputbuf
, 0, sizeof(inputbuf
));
769 if((*get_a_char
)(&ch
, extraarg
) == 0)
772 inputbuf
[octets_so_far
++] = ch
;
775 remaining_octets
= octets_so_far
;
777 ucs
= mbtow(ps_global
->input_cs
, &inputp
, &remaining_octets
);
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far
>= sizeof(inputbuf
))
792 if((*get_a_char
)(&ch
, extraarg
) == 0)
795 inputbuf
[octets_so_far
++] = ch
;
799 /* got a good UCS-4 character */
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
810 cb
->cbufend
= utf8_put(cb
->cbuf
, (unsigned long) ucs
);
811 cb
->cbufp
= cb
->cbuf
;
812 if(cb
->cbufend
> cb
->cbuf
){
815 if(cb
->cbufp
>= cb
->cbufend
){
816 cb
->cbufend
= cb
->cbuf
;
817 cb
->cbufp
= cb
->cbuf
;
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
837 FILTER_S
*flt
, *fltn
= gf_master
;
839 while((flt
= fltn
) != NULL
){ /* free list of old filters */
841 fs_give((void **)&flt
);
845 gf_error_string
= NULL
; /* clear previous errors */
846 gf_byte_count
= 0L; /* reset counter */
852 * link the given filter into the filter chain
855 gf_link_filter(filter_t f
, void *data
)
857 FILTER_S
*new, *tail
;
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f
== gf_nvtnl_local
|| f
== gf_local_nvtnl
)
868 new = (FILTER_S
*)fs_get(sizeof(FILTER_S
));
869 memset(new, 0, sizeof(FILTER_S
));
871 new->f
= f
; /* set the function pointer */
872 new->opt
= data
; /* set any optional parameter data */
873 (*f
)(new, GF_RESET
); /* have it setup initial state */
875 if((tail
= gf_master
) != NULL
){ /* or add it to end of existing */
876 while(tail
->next
) /* list */
881 else /* attach new struct to list */
882 gf_master
= new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
891 gf_terminal(FILTER_S
*f
, int flg
)
897 if((*last_filter
)(*op
++) <= 0) /* generic terminal filter */
898 gf_error(errno
? error_description(errno
) : "Error writing pipe");
902 else if(flg
== GF_RESET
)
903 errno
= 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
912 gf_set_terminal(gf_io_t f
) /* function to set generic filter */
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
926 /* let the user know the error passed in s */
928 longjmp(gf_error_state
, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
939 gf_pipe(gf_io_t gc
, gf_io_t pc
)
940 /* how to get a character */
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state
)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string
? gf_error_string
: "NULL"));
952 return(gf_error_string
); /* */
956 * set and link in the terminal filter
959 gf_link_filter(gf_terminal
, NULL
);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
971 GF_INIT(gf_master
, gf_master
);
977 if(!(gf_byte_count
& 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
985 GF_PUTC(gf_master
, c
& 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master
);
993 (*gf_master
->f
)(gf_master
, GF_EOD
);
996 dprint((4, "done.\n"));
997 return(NULL
); /* everything went OK */
1002 * return the number of bytes piped so far
1005 gf_bytes_piped(void)
1007 return(gf_byte_count
);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1023 gf_filter(char *cmd
, char *prepend
, STORE_S
*source_so
, gf_io_t pc
,
1024 FILTLIST_S
*aux_filters
, int silent
, int disable_reset
,
1025 void (*pipecb_f
)(PIPE_S
*, int, void *))
1027 unsigned char c
, obuf
[MAX(MB_LEN_MAX
,32)];
1028 int flags
, outchars
, i
;
1029 char *errstr
= NULL
, buf
[MAILTMPLEN
];
1032 #ifdef NON_BLOCKING_IO
1036 dprint((4, "so_filter: \"%s\"\n", cmd
? cmd
: "?"));
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global
->keyboard_charmap
&& strucmp("UTF-8", ps_global
->keyboard_charmap
))
1045 gf_link_filter(gf_utf8
, gf_utf8_opt(ps_global
->keyboard_charmap
));
1047 for( ; aux_filters
&& aux_filters
->filter
; aux_filters
++)
1048 gf_link_filter(aux_filters
->filter
, aux_filters
->data
);
1050 gf_set_terminal(pc
);
1051 gf_link_filter(gf_terminal
, NULL
);
1055 cb
.cbufend
= cb
.cbuf
;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so
, 0L, 0);
1061 flags
= PIPE_WRITE
| PIPE_READ
| PIPE_NOSHELL
1062 | (silent
? PIPE_SILENT
: 0)
1063 | (!disable_reset
? PIPE_RESET
: 0);
1065 if((fpipe
= open_system_pipe(cmd
, NULL
, NULL
, flags
, 0, pipecb_f
, pipe_report_error
)) != NULL
){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe
->in
.f
), F_SETFL
, NON_BLOCKING_IO
) == -1)
1070 errstr
= "Can't set up non-blocking IO";
1072 if(prepend
&& (fputs(prepend
, fpipe
->out
.f
) == EOF
1073 || fputc('\n', fpipe
->out
.f
) == EOF
))
1074 errstr
= error_description(errno
);
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n
= 0; !errstr
&& fpipe
->out
.f
&& n
< 1024; n
++)
1081 if(!so_readc(&c
, source_so
)){
1082 fclose(fpipe
->out
.f
);
1083 fpipe
->out
.f
= NULL
;
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1092 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1093 if(fputc(obuf
[i
], fpipe
->out
.f
) == EOF
)
1094 errstr
= error_description(errno
);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1103 clearerr(fpipe
->in
.f
); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr
&& fgets(buf
, sizeof(buf
), fpipe
->in
.f
))
1106 errstr
= gf_filter_puts(buf
);
1108 /* then fgets failed! */
1109 if(!errstr
&& !(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)){
1110 if(feof(fpipe
->in
.f
)) /* nothing else interesting! */
1112 else if(ferror(fpipe
->in
.f
)) /* bummer. */
1113 errstr
= error_description(errno
);
1115 else if(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)
1116 clearerr(fpipe
->in
.f
);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend
&& (pipe_puts(prepend
, fpipe
) == EOF
1122 || pipe_putc('\n', fpipe
) == EOF
))
1123 errstr
= error_description(errno
);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr
&& so_readc(&c
, source_so
))
1130 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1131 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1132 if(pipe_putc(obuf
[i
], fpipe
) == EOF
)
1133 errstr
= error_description(errno
);
1135 if(pipe_close_write(fpipe
))
1136 errstr
= _("Pipe command returned error.");
1138 while(!errstr
&& pipe_gets(buf
, sizeof(buf
), fpipe
))
1139 errstr
= gf_filter_puts(buf
);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe
, NULL
, pipecb_f
) && !errstr
)
1144 errstr
= _("Pipe command returned error.");
1149 errstr
= _("Error setting up pipe command.");
1156 * gf_filter_puts - write the given string down the filter's pipe
1159 gf_filter_puts(register char *s
)
1161 GF_INIT(gf_master
, gf_master
);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state
)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string
? gf_error_string
: "NULL"));
1169 return(gf_error_string
);
1173 GF_PUTC(gf_master
, (*s
++) & 0xff);
1175 GF_END(gf_master
, gf_master
);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1187 GF_INIT(gf_master
, gf_master
);
1188 (void) GF_FLUSH(gf_master
);
1189 (*gf_master
->f
)(gf_master
, GF_EOD
);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1218 * gf_xxx_filter(f, flg)
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1275 gf_binary_b64(FILTER_S
*f
, int flg
)
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f
, f
->next
);
1282 register unsigned char c
;
1283 register unsigned char t
= f
->t
;
1284 register long n
= f
->n
;
1286 while(GF_GETC(f
, c
)){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1292 GF_PUTC(f
->next
, v
[c
>> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t
= c
<< 4; /* remember high 2 bits for next */
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1300 GF_PUTC(f
->next
, v
[(t
|(c
>>4)) & 0x3f]);
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1307 GF_PUTC(f
->next
, v
[(t
|(c
>> 6)) & 0x3f]);
1308 GF_PUTC(f
->next
, v
[c
& 0x3f]);
1312 if(n
== 45){ /* start a new line? */
1313 GF_PUTC(f
->next
, '\015');
1314 GF_PUTC(f
->next
, '\012');
1323 else if(flg
== GF_EOD
){ /* no more data */
1324 switch (f
->n
% 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1329 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1330 GF_PUTC(f
->next
, '='); /* byte 3 */
1331 GF_PUTC(f
->next
, '='); /* byte 4 */
1335 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1336 GF_PUTC(f
->next
, '='); /* byte 4 */
1342 GF_PUTC(f
->next
, '\015');
1343 GF_PUTC(f
->next
, '\012');
1346 (void) GF_FLUSH(f
->next
);
1347 (*f
->next
->f
)(f
->next
, GF_EOD
);
1349 else if(flg
== GF_RESET
){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1361 gf_b64_binary(FILTER_S
*f
, int flg
)
1363 static char v
[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f
, f
->next
);
1374 register unsigned char c
;
1375 register unsigned char t
= f
->t
;
1376 register int n
= (int) f
->n
;
1377 register int state
= f
->f1
;
1379 while(GF_GETC(f
, c
)){
1384 gf_error("Illegal '=' in base64 text");
1389 /* in range, and a valid value? */
1390 if((c
& ~0x7f) || (c
= v
[c
]) > 63){
1392 switch (n
++) { /* check quantum position */
1394 state
++; /* expect an equal as next char */
1398 n
= 0L; /* restart quantum */
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1408 switch (n
++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f
->next
, (t
|(c
>> 4)));
1415 t
= c
<< 4; /* byte 2: high 4 bits */
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f
->next
, (t
|(c
>> 2)));
1420 t
= c
<< 6; /* byte 3: high 2 bits */
1424 GF_PUTC(f
->next
, t
| c
);
1425 n
= 0L; /* reinitialize mechanism */
1436 else if(flg
== GF_EOD
){
1437 (void) GF_FLUSH(f
->next
);
1438 (*f
->next
->f
)(f
->next
, GF_EOD
);
1440 else if(flg
== GF_RESET
){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f
->n
= 0L; /* quantum position */
1443 f
->f1
= 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1479 gf_qp_8bit(FILTER_S
*f
, int flg
)
1482 GF_INIT(f
, f
->next
);
1485 register unsigned char c
;
1486 register int state
= f
->f1
;
1488 while(GF_GETC(f
, c
)){
1491 case DFL
: /* default case */
1493 GF_QP_DEFAULT(f
, c
);
1496 case CCR
: /* non-significant space */
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f
, c
);
1505 if(c
== '\015'){ /* "=\015" is a soft EOL */
1510 if(c
== '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f
->next
, '=');
1516 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state
= STOP_DECODING
;
1540 GF_PUTC(f
->next
, '=');
1541 GF_PUTC(f
->next
, c
);
1542 q_status_message(SM_ORDER
,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c
, c
));
1549 if (isdigit ((unsigned char)c
))
1552 f
->t
= c
- (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1554 f
->f2
= c
; /* store character in case we have to
1555 back out in !isxdigit below */
1562 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1563 state
= STOP_DECODING
;
1564 GF_PUTC(f
->next
, '=');
1565 GF_PUTC(f
->next
, f
->f2
);
1566 GF_PUTC(f
->next
, c
);
1567 q_status_message(SM_ORDER
,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c
, c
, f
->f2
));
1574 if (isdigit((unsigned char)c
))
1577 c
-= (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f
->next
, c
+ (f
->t
<< 4));
1583 if(c
== ' '){ /* toss it in with other spaces */
1584 if(f
->linep
- f
->line
< GF_MAXLINE
)
1585 *(f
->linep
)++ = ' ';
1590 if(c
== '\015'){ /* not our white space! */
1591 f
->linep
= f
->line
; /* reset buffer */
1592 GF_PUTC(f
->next
, '\015');
1596 /* the spaces are ours, write 'em */
1597 f
->n
= f
->linep
- f
->line
;
1599 GF_PUTC(f
->next
, ' ');
1601 GF_QP_DEFAULT(f
, c
); /* take care of 'c' in default way */
1604 case STOP_DECODING
:
1605 GF_PUTC(f
->next
, c
);
1613 else if(flg
== GF_EOD
){
1614 fs_give((void **)&(f
->line
));
1615 (void) GF_FLUSH(f
->next
);
1616 (*f
->next
->f
)(f
->next
, GF_EOD
);
1618 else if(flg
== GF_RESET
){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1621 f
->linep
= f
->line
= (char *)fs_get(GF_MAXLINE
* sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1680 else if(c == '\015'){ \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1696 gf_8bit_qp(FILTER_S
*f
, int flg
)
1698 short dummy_dots
= 0, dummy_dmap
= 1;
1699 GF_INIT(f
, f
->next
);
1702 register unsigned char c
;
1703 register int state
= f
->f1
;
1705 while(GF_GETC(f
, c
)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f
->t
, dummy_dots
, f
->f2
, dummy_dmap
, c
);
1711 case DFL
: /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f
, c
);
1715 case CCR
: /* true line break? */
1718 GF_PUTC(f
->next
, '\015');
1719 GF_PUTC(f
->next
, '\012');
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f
, '\015');
1724 GF_8BIT_DEFAULT(f
, c
); /* and don't forget about c! */
1730 if(c
== '\015' || f
->t
){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f
, ' ');
1732 f
->t
= 0; /* reset From flag */
1735 GF_8BIT_PUT(f
, ' ');
1737 GF_8BIT_DEFAULT(f
, c
); /* handle 'c' in the default way */
1745 else if(flg
== GF_EOD
){
1748 GF_8BIT_PUT_QUOTE(f
, '\015'); /* write the last cr */
1752 GF_8BIT_PUT_QUOTE(f
, ' '); /* write the last space */
1756 (void) GF_FLUSH(f
->next
);
1757 (*f
->next
->f
)(f
->next
, GF_EOD
);
1759 else if(flg
== GF_RESET
){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f
->f1
= DFL
; /* state from last character */
1762 f
->f2
= 1; /* state of "^NFrom " bitmap */
1764 f
->n
= 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1773 gf_convert_8bit_charset(FILTER_S
*f
, int flg
)
1775 static unsigned char *conv_table
= NULL
;
1776 GF_INIT(f
, f
->next
);
1779 register unsigned char c
;
1781 while(GF_GETC(f
, c
)){
1782 GF_PUTC(f
->next
, conv_table
? conv_table
[c
] : c
);
1787 else if(flg
== GF_EOD
){
1788 (void) GF_FLUSH(f
->next
);
1789 (*f
->next
->f
)(f
->next
, GF_EOD
);
1791 else if(flg
== GF_RESET
){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table
= (f
->opt
) ? (unsigned char *) (f
->opt
) : NULL
;
1799 typedef struct _utf8c_s
{
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1811 gf_convert_utf8_charset(FILTER_S
*f
, int flg
)
1813 static unsigned short *conv_table
= NULL
;
1814 static int report_err
= 0;
1815 register int more
= f
->f2
;
1816 register long u
= f
->n
;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f
, f
->next
);
1827 register unsigned char c
;
1829 while(GF_GETC(f
, c
)){
1830 if(!conv_table
){ /* can't do much if no conversion table */
1831 GF_PUTC(f
->next
, c
);
1833 /* UTF-8 continuation? */
1834 else if((c
> 0x7f) && (c
< 0xc0)){
1836 u
<<= 6; /* shift current value by 6 bits */
1838 if (!--more
){ /* last octet? */
1839 if(u
>= 0xffff || (u
= conv_table
[u
]) == NOCHAR
){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1848 fs_give((void **) &f
->opt
);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1859 c
= (unsigned char) (u
>> 8);
1860 GF_PUTC(f
->next
, c
);
1863 c
= (unsigned char) u
& 0xff;
1866 GF_PUTC(f
->next
, c
);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f
->next
, '?');
1874 if(more
){ /* incomplete UTF-8 character */
1875 GF_PUTC(f
->next
, '?');
1878 if(c
< 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f
->next
, c
);
1881 else if(c
< 0xe0){ /* U+0080 - U+07ff */
1882 u
= c
& 0x1f; /* first 5 bits of 12 */
1885 else if(c
< 0xf0){ /* U+1000 - U+ffff */
1886 u
= c
& 0x0f; /* first 4 bits of 16 */
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c
< 0xf8){ /* U+10000 - U+10ffff */
1891 u
= c
& 0x07; /* first 3 bits of 20.5 */
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c
< 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u
= c
& 0x03; /* first 2 bits of 26 */
1899 else if (c
< 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u
= c
& 0x03; /* first 2 bits of 26 */
1904 else{ /* not in Unicode */
1905 GF_PUTC(f
->next
, '?');
1914 else if(flg
== GF_EOD
){
1915 (void) GF_FLUSH(f
->next
);
1917 fs_give((void **) &f
->opt
);
1919 (*f
->next
->f
)(f
->next
, GF_EOD
);
1921 else if(flg
== GF_RESET
){
1922 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1923 conv_table
= ((UTF8C_S
*) f
->opt
)->conv_table
;
1924 report_err
= ((UTF8C_S
*) f
->opt
)->report_err
;
1932 gf_convert_utf8_charset_opt(void *table
, int report_err
)
1936 utf8c
= (UTF8C_S
*) fs_get(sizeof(UTF8C_S
));
1937 utf8c
->conv_table
= table
;
1938 utf8c
->report_err
= report_err
;
1939 return((void *) utf8c
);
1944 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1946 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1947 * or to Shift-JIS (if PC-Pine).
1950 gf_2022_jp_to_euc(FILTER_S
*f
, int flg
)
1952 register unsigned char c
;
1953 register int state
= f
->f1
;
1956 * f->t lit means we're in middle of decoding a sequence of characters.
1957 * f->f2 keeps track of first character of pair for Shift-JIS.
1958 * f->f1 is the state.
1961 GF_INIT(f
, f
->next
);
1964 while(GF_GETC(f
, c
)){
1966 case ESC
: /* saw ESC */
1967 if(!f
->t
&& c
== '$')
1969 else if(f
->t
&& c
== '(')
1972 GF_PUTC(f
->next
, '\033');
1973 GF_PUTC(f
->next
, c
);
1979 case ESCDOL
: /* saw ESC $ */
1980 if(c
== 'B' || c
== '@'){
1982 f
->t
= 1; /* filtering into euc */
1983 f
->f2
= -1; /* first character of pair */
1986 GF_PUTC(f
->next
, '\033');
1987 GF_PUTC(f
->next
, '$');
1988 GF_PUTC(f
->next
, c
);
1994 case ESCPAR
: /* saw ESC ( */
1995 if(c
== 'B' || c
== 'J' || c
== 'H'){
1997 f
->t
= 0; /* done filtering */
2000 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
2001 GF_PUTC(f
->next
, '('); /* escape sequences, which */
2002 GF_PUTC(f
->next
, c
); /* this appears to be. */
2007 case EUC
: /* filtering into euc */
2011 #ifdef _WINDOWS /* Shift-JIS */
2012 c
&= 0x7f; /* 8-bit can't win */
2013 if (f
->f2
>= 0){ /* second of a pair? */
2014 int rowOffset
= (f
->f2
< 95) ? 112 : 176;
2015 int cellOffset
= (f
->f2
% 2) ? ((c
> 95) ? 32 : 31)
2018 GF_PUTC(f
->next
, ((f
->f2
+ 1) >> 1) + rowOffset
);
2019 GF_PUTC(f
->next
, c
+ cellOffset
);
2020 f
->f2
= -1; /* restart */
2022 else if(c
> 0x20 && c
< 0x7f)
2023 f
->f2
= c
; /* first of pair */
2025 GF_PUTC(f
->next
, c
); /* write CTL as itself */
2029 GF_PUTC(f
->next
, (c
> 0x20 && c
< 0x7f) ? c
| 0x80 : c
);
2040 GF_PUTC(f
->next
, c
);
2049 else if(flg
== GF_EOD
){
2052 GF_PUTC(f
->next
, '\033');
2056 GF_PUTC(f
->next
, '\033');
2057 GF_PUTC(f
->next
, '$');
2061 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
2062 GF_PUTC(f
->next
, '('); /* escape sequences. */
2066 (void) GF_FLUSH(f
->next
);
2067 (*f
->next
->f
)(f
->next
, GF_EOD
);
2069 else if(flg
== GF_RESET
){
2070 dprint((9, "-- gf_reset jp_to_euc\n"));
2071 f
->f1
= DFL
; /* state */
2072 f
->t
= 0; /* not translating to euc */
2078 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2081 gf_native8bitjapanese_to_2022_jp(FILTER_S
*f
, int flg
)
2084 gf_sjis_to_2022_jp(f
, flg
);
2086 gf_euc_to_2022_jp(f
, flg
);
2092 gf_euc_to_2022_jp(FILTER_S
*f
, int flg
)
2094 register unsigned char c
;
2097 * f->t lit means we've sent the start esc seq but not the end seq.
2098 * f->f2 keeps track of first character of pair for Shift-JIS.
2101 GF_INIT(f
, f
->next
);
2104 while(GF_GETC(f
, c
)){
2107 GF_PUTC(f
->next
, c
& 0x7f);
2110 GF_PUTC(f
->next
, '\033');
2111 GF_PUTC(f
->next
, '(');
2112 GF_PUTC(f
->next
, 'B');
2113 GF_PUTC(f
->next
, c
);
2120 GF_PUTC(f
->next
, '\033');
2121 GF_PUTC(f
->next
, '$');
2122 GF_PUTC(f
->next
, 'B');
2123 GF_PUTC(f
->next
, c
& 0x7f);
2127 GF_PUTC(f
->next
, c
);
2134 else if(flg
== GF_EOD
){
2136 GF_PUTC(f
->next
, '\033');
2137 GF_PUTC(f
->next
, '(');
2138 GF_PUTC(f
->next
, 'B');
2143 (void) GF_FLUSH(f
->next
);
2144 (*f
->next
->f
)(f
->next
, GF_EOD
);
2146 else if(flg
== GF_RESET
){
2147 dprint((9, "-- gf_reset euc_to_jp\n"));
2154 gf_sjis_to_2022_jp(FILTER_S
*f
, int flg
)
2156 register unsigned char c
;
2159 * f->t lit means we've sent the start esc seq but not the end seq.
2160 * f->f2 keeps track of first character of pair for Shift-JIS.
2163 GF_INIT(f
, f
->next
);
2166 while(GF_GETC(f
, c
)){
2168 if(f
->f2
>= 0){ /* second of a pair? */
2169 int adjust
= c
< 159;
2170 int rowOffset
= f
->f2
< 160 ? 112 : 176;
2171 int cellOffset
= adjust
? (c
> 127 ? 32 : 31) : 126;
2173 GF_PUTC(f
->next
, ((f
->f2
- rowOffset
) << 1) - adjust
);
2174 GF_PUTC(f
->next
, c
- cellOffset
);
2178 f
->f2
= c
; /* remember first of pair */
2181 GF_PUTC(f
->next
, '\033');
2182 GF_PUTC(f
->next
, '(');
2183 GF_PUTC(f
->next
, 'B');
2184 GF_PUTC(f
->next
, c
);
2191 GF_PUTC(f
->next
, '\033');
2192 GF_PUTC(f
->next
, '$');
2193 GF_PUTC(f
->next
, 'B');
2198 GF_PUTC(f
->next
, c
);
2205 else if(flg
== GF_EOD
){
2207 GF_PUTC(f
->next
, '\033');
2208 GF_PUTC(f
->next
, '(');
2209 GF_PUTC(f
->next
, 'B');
2214 (void) GF_FLUSH(f
->next
);
2215 (*f
->next
->f
)(f
->next
, GF_EOD
);
2217 else if(flg
== GF_RESET
){
2218 dprint((9, "-- gf_reset sjis_to_jp\n"));
2227 * Various charset to UTF-8 Translation filter
2231 * utf8 conversion options
2233 typedef struct _utf8_s
{
2238 #define UTF8_BLOCK 1024
2239 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2240 #define UTF8_ADD(f, c) \
2243 f->f2 += UTF8_BLOCK; \
2244 fs_resize((void **)&f->line, \
2245 (size_t) f->f2 * sizeof(char)); \
2246 eobuf = UTF8_EOB(f); \
2247 p = eobuf - UTF8_BLOCK; \
2251 #define GF_UTF8_FLUSH(f) { \
2253 SIZEDTEXT intext, outtext; \
2254 intext.data = (unsigned char *) f->line; \
2255 intext.size = p - f->line; \
2256 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2257 if(!((UTF8_S *) f->opt)->charset){ \
2258 for(n = 0; n < intext.size; n++) \
2259 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2261 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2262 for(n = 0; n < outtext.size; n++) \
2263 GF_PUTC(f->next, outtext.data[n]); \
2264 if(outtext.data && intext.data != outtext.data) \
2265 fs_give((void **) &outtext.data); \
2268 for(n = 0; n < intext.size; n++) \
2269 GF_PUTC(f->next, '?'); \
2275 * gf_utf8 - text in specified charset to to UTF-8 filter
2276 * Process line-at-a-time rather than character
2277 * because ISO-2022-JP. Call utf8_text_cs by hand
2278 * rather than utf8_text to reduce the cost of
2279 * utf8_charset() for each line.
2282 gf_utf8(FILTER_S
*f
, int flg
)
2284 register char *p
= f
->linep
;
2285 register char *eobuf
= UTF8_EOB(f
);
2286 GF_INIT(f
, f
->next
);
2289 register int state
= f
->f1
;
2290 register unsigned char c
;
2292 while(GF_GETC(f
, c
)){
2300 GF_PUTC(f
->next
, '\015');
2301 GF_PUTC(f
->next
, '\012');
2304 UTF8_ADD(f
, '\015');
2322 else if(flg
== GF_EOD
){
2327 fs_give((void **) &f
->line
);
2328 fs_give((void **) &f
->opt
);
2329 (void) GF_FLUSH(f
->next
);
2330 (*f
->next
->f
)(f
->next
, GF_EOD
);
2333 dprint((9, "-- gf_reset utf8\n"));
2335 f
->f2
= UTF8_BLOCK
; /* input buffer length */
2336 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
2344 gf_utf8_opt(char *charset
)
2348 utf8
= (UTF8_S
*) fs_get(sizeof(UTF8_S
));
2350 utf8
->charset
= (CHARSET
*) utf8_charset(charset
);
2353 * When we get 8-bit non-ascii characters but it is supposed to
2354 * be ascii we want it to turn into question marks, not
2355 * just behave as if it is UTF-8 which is what happens
2356 * with ascii because there is no translation table.
2357 * So we need to catch the ascii special case here.
2359 if(utf8
->charset
&& utf8
->charset
->type
== CT_ASCII
)
2360 utf8
->charset
= NULL
;
2362 return((void *) utf8
);
2367 * RICHTEXT-TO-PLAINTEXT filter
2371 * option to be used by rich2plain (NOTE: if this filter is ever
2372 * used more than once in a pipe, all instances will have the same
2377 /*----------------------------------------------------------------------
2378 richtext to plaintext filter
2383 This basically removes all richtext formatting. A cute hack is used
2384 to get bold and underlining to work.
2385 Further work could be done to handle things like centering and right
2386 and left flush, but then it could no longer be done in place. This
2387 operates on text *with* CRLF's.
2389 WARNING: does not wrap lines!
2392 gf_rich2plain(FILTER_S
*f
, int flg
)
2394 static int rich_bold_on
= 0, rich_uline_on
= 0;
2396 /* BUG: qoute incoming \255 values */
2397 GF_INIT(f
, f
->next
);
2400 register unsigned char c
;
2401 register int state
= f
->f1
;
2404 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2406 while(GF_GETC(f
, c
)){
2409 case TOKEN
: /* collect a richtext token */
2410 if(c
== '>'){ /* what should we do with it? */
2411 state
= DFL
; /* return to default next time */
2412 *(f
->linep
) = '\0'; /* cap off token */
2413 if(f
->line
[0] == 'l' && f
->line
[1] == 't'){
2414 GF_PUTC(f
->next
, '<'); /* literal '<' */
2416 else if(f
->line
[0] == 'n' && f
->line
[1] == 'l'){
2417 GF_PUTC(f
->next
, '\015');/* newline! */
2418 GF_PUTC(f
->next
, '\012');
2420 else if(!strcmp("comment", f
->line
)){
2423 else if(!strcmp("/comment", f
->line
)){
2426 else if(!strcmp("/paragraph", f
->line
)) {
2427 GF_PUTC(f
->next
, '\r');
2428 GF_PUTC(f
->next
, '\n');
2429 GF_PUTC(f
->next
, '\r');
2430 GF_PUTC(f
->next
, '\n');
2432 else if(!plain
/* gf_rich_plain */){
2433 if(!strcmp(f
->line
, "bold")) {
2434 GF_PUTC(f
->next
, TAG_EMBED
);
2435 GF_PUTC(f
->next
, TAG_BOLDON
);
2437 } else if(!strcmp(f
->line
, "/bold")) {
2438 GF_PUTC(f
->next
, TAG_EMBED
);
2439 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2441 } else if(!strcmp(f
->line
, "italic")) {
2442 GF_PUTC(f
->next
, TAG_EMBED
);
2443 GF_PUTC(f
->next
, TAG_ULINEON
);
2445 } else if(!strcmp(f
->line
, "/italic")) {
2446 GF_PUTC(f
->next
, TAG_EMBED
);
2447 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2449 } else if(!strcmp(f
->line
, "underline")) {
2450 GF_PUTC(f
->next
, TAG_EMBED
);
2451 GF_PUTC(f
->next
, TAG_ULINEON
);
2453 } else if(!strcmp(f
->line
, "/underline")) {
2454 GF_PUTC(f
->next
, TAG_EMBED
);
2455 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2459 /* else we just ignore the token! */
2461 f
->linep
= f
->line
; /* reset token buffer */
2463 else{ /* add char to token */
2464 if(f
->linep
- f
->line
> 40){
2465 /* What? rfc1341 says 40 char tokens MAX! */
2466 fs_give((void **)&(f
->line
));
2467 gf_error("Richtext token over 40 characters");
2471 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2476 state
= DFL
; /* back to default next time */
2477 if(c
== '\012'){ /* treat as single space? */
2478 GF_PUTC(f
->next
, ' ');
2481 /* fall thru to process c */
2487 else if(c
== '\015')
2489 else if(!f
->f2
) /* not in comment! */
2490 GF_PUTC(f
->next
, c
);
2499 else if(flg
== GF_EOD
){
2500 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2501 /* incomplete token!! */
2502 gf_error("Incomplete token in richtext");
2507 GF_PUTC(f
->next
, TAG_EMBED
);
2508 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2512 GF_PUTC(f
->next
, TAG_EMBED
);
2513 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2517 fs_give((void **)&(f
->line
));
2518 (void) GF_FLUSH(f
->next
);
2519 (*f
->next
->f
)(f
->next
, GF_EOD
);
2521 else if(flg
== GF_RESET
){
2522 dprint((9, "-- gf_reset rich2plain\n"));
2523 f
->f1
= DFL
; /* state */
2524 f
->f2
= 0; /* set means we're in a comment */
2525 f
->linep
= f
->line
= (char *)fs_get(45 * sizeof(char));
2531 * function called from the outside to set
2532 * richtext filter's options
2535 gf_rich2plain_opt(int *plain
)
2537 return((void *) plain
);
2543 * ENRICHED-TO-PLAIN text filter
2546 #define TEF_QUELL 0x01
2547 #define TEF_NOFILL 0x02
2551 /*----------------------------------------------------------------------
2552 enriched text to plain text filter (ala rfc1523)
2554 Args: f -- state and input data
2557 This basically removes all enriched formatting. A cute hack is used
2558 to get bold and underlining to work.
2560 Further work could be done to handle things like centering and right
2561 and left flush, but then it could no longer be done in place. This
2562 operates on text *with* CRLF's.
2564 WARNING: does not wrap lines!
2567 gf_enriched2plain(FILTER_S
*f
, int flg
)
2569 static int enr_uline_on
= 0, enr_bold_on
= 0;
2571 /* BUG: qoute incoming \255 values */
2572 GF_INIT(f
, f
->next
);
2575 register unsigned char c
;
2576 register int state
= f
->f1
;
2579 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2581 while(GF_GETC(f
, c
)){
2584 case TOKEN
: /* collect a richtext token */
2585 if(c
== '>'){ /* what should we do with it? */
2586 int off
= *f
->line
== '/';
2587 char *token
= f
->line
+ (off
? 1 : 0);
2590 if(!strcmp("param", token
)){
2592 f
->f2
&= ~TEF_QUELL
;
2596 else if(!strcmp("nofill", token
)){
2598 f
->f2
&= ~TEF_NOFILL
;
2600 f
->f2
|= TEF_NOFILL
;
2602 else if(!plain
/* gf_enriched_plain */){
2603 /* Following is a cute hack or two to get
2604 bold and underline on the screen.
2605 See Putline0n() where these codes are
2607 if(!strcmp("bold", token
)) {
2608 GF_PUTC(f
->next
, TAG_EMBED
);
2609 GF_PUTC(f
->next
, off
? TAG_BOLDOFF
: TAG_BOLDON
);
2610 enr_bold_on
= off
? 0 : 1;
2611 } else if(!strcmp("italic", token
)) {
2612 GF_PUTC(f
->next
, TAG_EMBED
);
2613 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2614 enr_uline_on
= off
? 0 : 1;
2615 } else if(!strcmp("underline", token
)) {
2616 GF_PUTC(f
->next
, TAG_EMBED
);
2617 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2618 enr_uline_on
= off
? 0 : 1;
2621 /* else we just ignore the token! */
2623 f
->linep
= f
->line
; /* reset token buffer */
2625 else if(c
== '<'){ /* literal '<'? */
2626 if(f
->linep
== f
->line
){
2627 GF_PUTC(f
->next
, '<');
2631 fs_give((void **)&(f
->line
));
2632 gf_error("Malformed Enriched text: unexpected '<'");
2636 else{ /* add char to token */
2637 if(f
->linep
- f
->line
> 60){ /* rfc1523 says 60 MAX! */
2638 fs_give((void **)&(f
->line
));
2639 gf_error("Malformed Enriched text: token too long");
2643 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2648 if(c
!= '\012'){ /* treat as single space? */
2649 state
= DFL
; /* lone cr? */
2650 f
->f2
&= ~TEF_QUELL
;
2651 GF_PUTC(f
->next
, '\015');
2659 if(c
== '\015'){ /* treat as single space? */
2660 state
= CCR
; /* repeat crlf's mean real newlines */
2662 GF_PUTC(f
->next
, '\r');
2663 GF_PUTC(f
->next
, '\n');
2668 if(!((f
->f2
) & TEF_QUELL
))
2669 GF_PUTC(f
->next
, ' ');
2671 f
->f2
&= ~TEF_QUELL
;
2674 /* fall thru to take care of 'c' */
2681 else if(c
== '\015' && (!((f
->f2
) & TEF_NOFILL
)))
2683 else if(!((f
->f2
) & TEF_QUELL
))
2684 GF_PUTC(f
->next
, c
);
2693 else if(flg
== GF_EOD
){
2694 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2695 /* incomplete token!! */
2696 gf_error("Incomplete token in richtext");
2700 GF_PUTC(f
->next
, TAG_EMBED
);
2701 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2705 GF_PUTC(f
->next
, TAG_EMBED
);
2706 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2710 /* Make sure we end with a newline so everything gets flushed */
2711 GF_PUTC(f
->next
, '\015');
2712 GF_PUTC(f
->next
, '\012');
2714 fs_give((void **)&(f
->line
));
2716 (void) GF_FLUSH(f
->next
);
2717 (*f
->next
->f
)(f
->next
, GF_EOD
);
2719 else if(flg
== GF_RESET
){
2720 dprint((9, "-- gf_reset enriched2plain\n"));
2721 f
->f1
= DFL
; /* state */
2722 f
->f2
= 0; /* set means we're in a comment */
2723 f
->linep
= f
->line
= (char *)fs_get(65 * sizeof(char));
2729 * function called from the outside to set
2730 * richtext filter's options
2733 gf_enriched2plain_opt(int *plain
)
2735 return((void *) plain
);
2741 * HTML-TO-PLAIN text filter
2745 /* OK, here's the plan:
2747 * a universal output function handles writing chars and worries
2750 * a unversal element collector reads chars and collects params
2751 * and dispatches the appropriate element handler.
2753 * element handlers are stacked. The most recently dispatched gets
2754 * first crack at the incoming character stream. It passes bytes it's
2755 * done with or not interested in to the next
2757 * installs that handler as the current one collecting data...
2759 * stacked handlers take their params from the element collector and
2760 * accept chars or do whatever they need to do. Sort of a vertical
2761 * piping? recursion-like? hmmm.
2763 * at least I think this is how it'll work. tres simple, non?
2769 * Some important constants
2771 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2772 #define MAX_ENTITY 20 /* maximum length of an entity */
2773 #define MAX_ELEMENT 72 /* maximum length of an element */
2774 #define HTML_MOREDATA 0 /* expect more entity data */
2775 #define HTML_ENTITY 1 /* valid entity collected */
2776 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2777 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2778 #define HTML_LITERAL 0x0400 /* Literal character value */
2779 #define HTML_NEWLINE 0x010A /* hard newline */
2780 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2781 #define HTML_ID_GET 0 /* indent func: return current val */
2782 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2783 #define HTML_ID_INC 2 /* indent func: increment by val */
2784 #define HTML_HX_CENTER 0x0001
2785 #define HTML_HX_ULINE 0x0002
2786 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2789 /* types of lists that we will support */
2790 #define LIST_DECIMAL (long) 0
2791 #define LIST_ALPHALO (long) 1
2792 #define LIST_ALPHAUP (long) 2
2793 #define LIST_ROMANLO (long) 3
2794 #define LIST_ROMANUP (long) 4
2795 #define LIST_UNKNOWN (long) 10
2798 * Handler data, state information including function that uses it
2800 typedef struct handler_s
{
2801 FILTER_S
*html_data
;
2806 struct handler_s
*below
;
2810 * Element Property structure
2812 typedef struct _element_properties
{
2815 int (*handler
)(HANDLER_S
*, int, int);
2816 unsigned blocklevel
:1;
2817 unsigned alternate
:1;
2821 * Types used to manage HTML parsing
2823 static void html_handoff(HANDLER_S
*, int);
2827 * to help manage line wrapping.
2829 typedef struct _wrap_line
{
2830 char *buf
; /* buf to collect wrapped text */
2831 int used
, /* number of chars in buf */
2832 width
, /* text's width as displayed */
2833 len
; /* length of allocated buf */
2838 * to help manage centered text
2840 typedef struct _center_s
{
2841 WRAPLINE_S line
; /* buf to assembled centered text */
2842 WRAPLINE_S word
; /* word being to append to Line */
2849 * Collector data and state information
2851 typedef struct collector_s
{
2852 char buf
[HTML_BUF_LEN
]; /* buffer to collect data */
2853 int len
; /* length of that buffer */
2854 unsigned end_tag
:1; /* collecting a closing tag */
2855 unsigned hit_equal
:1; /* collecting right half of attrib */
2856 unsigned mkup_decl
:1; /* markup declaration */
2857 unsigned start_comment
:1; /* markup declaration comment */
2858 unsigned end_comment
:1; /* legit comment format */
2859 unsigned hyphen
:1; /* markup hyphen read */
2860 unsigned badform
:1; /* malformed markup element */
2861 unsigned overrun
:1; /* Overran buf above */
2862 unsigned proc_inst
:1; /* XML processing instructions */
2863 unsigned empty
:1; /* empty element */
2864 unsigned was_quoted
:1; /* basically to catch null string */
2865 char quoted
; /* quoted element param value */
2866 char *element
; /* element's collected name */
2867 PARAMETER
*attribs
; /* element's collected attributes */
2868 PARAMETER
*cur_attrib
; /* attribute now being collected */
2873 * State information for all element handlers
2875 typedef struct html_data
{
2876 HANDLER_S
*h_stack
; /* handler list */
2877 CLCTR_S
*el_data
; /* element collector data */
2878 CENTER_S
*centered
; /* struct to manage centered text */
2879 int (*token
)(FILTER_S
*, int);
2880 char quoted
; /* quoted, by either ' or ", text */
2881 short indent_level
; /* levels of indention */
2882 int in_anchor
; /* text now being written to anchor */
2883 int blanks
; /* Consecutive blank line count */
2884 int wrapcol
; /* column to wrap lines on */
2885 int *prefix
; /* buffer containing Anchor prefix */
2887 long line_bufsize
; /* current size of the line buffer */
2890 int state
; /* embedded data state */
2891 char *color
; /* embedded color pointer */
2893 CBUF_S cb
; /* utf8->ucs4 conversion state */
2894 unsigned wrapstate
:1; /* whether or not to wrap output */
2895 unsigned li_pending
:1; /* <LI> next token expected */
2896 unsigned de_pending
:1; /* <DT> or <DD> next token expected */
2897 unsigned bold_on
:1; /* currently bolding text */
2898 unsigned uline_on
:1; /* currently underlining text */
2899 unsigned center
:1; /* center output text */
2900 unsigned bitbucket
:1; /* Ignore input */
2901 unsigned head
:1; /* In doc's HEAD */
2902 unsigned body
:1; /* In doc's BODY */
2903 unsigned alt_entity
:1; /* use alternative entity values */
2904 unsigned wrote
:1; /* anything witten yet? */
2909 * HTML filter options
2911 typedef struct _html_opts
{
2912 char *base
; /* Base URL for this html file */
2913 int columns
, /* Display columns (excluding margins) */
2914 indent
; /* Left margin */
2915 HANDLE_S
**handlesp
; /* Head of handles */
2916 htmlrisk_t warnrisk_f
; /* Nasty link warning call */
2917 ELPROP_S
*element_table
; /* markup element table */
2918 RSS_FEED_S
**feedp
; /* hook for RSS feed response */
2919 unsigned strip
:1; /* Hilite TAGs allowed */
2920 unsigned handles_loc
:1; /* Local handles requested? */
2921 unsigned showserver
:1; /* Display server after anchors */
2922 unsigned outputted
:1; /* any */
2923 unsigned no_relative_links
:1; /* Disable embeded relative links */
2924 unsigned related_content
:1; /* Embeded related content */
2925 unsigned html
:1; /* Output content in HTML */
2926 unsigned html_imgs
:1; /* Output IMG tags in HTML content */
2932 * Some macros to make life a little easier
2934 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2935 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2936 #define HTML_WROTE(X) (HD(X)->wrote)
2937 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2938 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2939 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2940 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2941 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2942 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2943 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2944 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2945 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2946 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2947 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2948 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2949 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2950 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2951 #define HD(X) ((HTML_DATA_S *)(X)->data)
2952 #define ED(X) (HD(X)->el_data)
2953 #define EL(X) ((ELPROP_S *) (X)->element)
2954 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2955 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2956 #define NEW_CLCTR(X) { \
2957 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2958 memset(ED(X), 0, sizeof(CLCTR_S)); \
2959 HD(X)->token = html_element_collector; \
2962 #define FREE_CLCTR(X) { \
2963 if(ED(X)->attribs){ \
2965 while((p = ED(X)->attribs) != NULL){ \
2966 ED(X)->attribs = ED(X)->attribs->next; \
2968 fs_give((void **)&p->attribute); \
2970 fs_give((void **)&p->value); \
2971 fs_give((void **)&p); \
2974 if(ED(X)->element) \
2975 fs_give((void **) &ED(X)->element); \
2976 fs_give((void **) &ED(X)); \
2977 HD(X)->token = NULL; \
2979 #define HANDLERS(X) (HD(X)->h_stack)
2980 #define BOLD_BIT(X) (HD(X)->bold_on)
2981 #define ULINE_BIT(X) (HD(X)->uline_on)
2982 #define CENTER_BIT(X) (HD(X)->center)
2983 #define HTML_FLUSH(X) { \
2984 html_write(X, (X)->line, (X)->linep - (X)->line); \
2985 (X)->linep = (X)->line; \
2988 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2990 html_output((X), TAG_EMBED); \
2991 html_output((X), TAG_BOLDON); \
2994 html_output((X), TAG_EMBED); \
2995 html_output((X), TAG_BOLDOFF); \
2998 #define HTML_ULINE(X, S) \
3001 html_output((X), TAG_EMBED); \
3002 html_output((X), TAG_ULINEON); \
3005 html_output((X), TAG_EMBED); \
3006 html_output((X), TAG_ULINEOFF); \
3009 #define HTML_ITALIC(X, S) \
3012 html_output((X), TAG_EMBED); \
3013 html_output((X), TAG_ITALICON); \
3016 html_output((X), TAG_EMBED); \
3017 html_output((X), TAG_ITALICOFF); \
3020 #define HTML_STRIKE(X, S) \
3023 html_output((X), TAG_EMBED); \
3024 html_output((X), TAG_STRIKEON); \
3027 html_output((X), TAG_EMBED); \
3028 html_output((X), TAG_STRIKEOFF); \
3031 #define HTML_BIG(X, S) \
3034 html_output((X), TAG_EMBED); \
3035 html_output((X), TAG_BIGON); \
3038 html_output((X), TAG_EMBED); \
3039 html_output((X), TAG_BIGOFF); \
3042 #define HTML_SMALL(X, S) \
3045 html_output((X), TAG_EMBED); \
3046 html_output((X), TAG_SMALLON); \
3049 html_output((X), TAG_EMBED); \
3050 html_output((X), TAG_SMALLOFF); \
3053 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3054 ? (HD(f)->centered->line.width \
3055 + HD(f)->centered->word.width \
3056 + ((HD(f)->centered->line.width \
3057 && HD(f)->centered->word.width) \
3060 #define HTML_DUMP_LIT(F, S, L) { \
3062 for(i = 0; i < (L); i++){ \
3063 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3065 : MAKE_LITERAL((S)[i]); \
3069 #define HTML_PROC(F, C) { \
3072 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3074 HTML_DUMP_LIT(F, "<", 1); \
3075 if(HD(F)->el_data->element){ \
3077 HD(F)->el_data->element, \
3078 strlen(HD(F)->el_data->element));\
3080 if(HD(F)->el_data->len){ \
3082 HD(F)->el_data->buf, \
3083 HD(F)->el_data->len); \
3090 else if((C) == '<'){ \
3096 #define HTML_LINEP_PUTC(F, C) { \
3097 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3098 size_t offset = (F)->linep - (F)->line; \
3099 fs_resize((void **) &(F)->line, \
3100 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3101 HD(F)->line_bufsize *= 2; \
3102 (F)->linep = &(F)->line[offset]; \
3104 *(F)->linep++ = (C); \
3106 #define HTML_TEXT(F, C) switch((F)->f1){ \
3108 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3110 HTML_TEXT_OUT(F, ' '); \
3111 (F)->f1 = DFL;/* stop sending chars here */ \
3112 /* fall thru to process 'c' */ \
3114 if(HD(F)->bitbucket) \
3115 (F)->f1 = DFL; /* no op */ \
3116 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3117 (F)->f1 = WSPACE;/* coalesce white space */ \
3118 else HTML_TEXT_OUT(F, C); \
3121 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3122 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3126 #define HTML_DEBUG_EL(S, D) { \
3127 dprint((5, "-- html %s: %s\n", \
3130 ? (D)->element : "NULL")); \
3133 for(p = (D)->attribs; \
3134 p && p->attribute; \
3137 " PARM: %s%s%s\n", \
3139 ? p->attribute : "NULL",\
3140 p->value ? "=" : "", \
3141 p->value ? p->value : ""));\
3145 #define HTML_DEBUG_EL(S, D)
3148 #ifndef SYSTEM_PINE_INFO_PATH
3149 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3151 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3152 ? SYSTEM_PINE_INFO_PATH : S)
3155 * Protos for Tag handlers
3157 int html_head(HANDLER_S
*, int, int);
3158 int html_base(HANDLER_S
*, int, int);
3159 int html_title(HANDLER_S
*, int, int);
3160 int html_body(HANDLER_S
*, int, int);
3161 int html_a(HANDLER_S
*, int, int);
3162 int html_br(HANDLER_S
*, int, int);
3163 int html_hr(HANDLER_S
*, int, int);
3164 int html_p(HANDLER_S
*, int, int);
3165 int html_table(HANDLER_S
*, int, int);
3166 int html_caption(HANDLER_S
*, int, int);
3167 int html_tr(HANDLER_S
*, int, int);
3168 int html_td(HANDLER_S
*, int, int);
3169 int html_th(HANDLER_S
*, int, int);
3170 int html_thead(HANDLER_S
*, int, int);
3171 int html_tbody(HANDLER_S
*, int, int);
3172 int html_tfoot(HANDLER_S
*, int, int);
3173 int html_col(HANDLER_S
*, int, int);
3174 int html_colgroup(HANDLER_S
*, int, int);
3175 int html_b(HANDLER_S
*, int, int);
3176 int html_u(HANDLER_S
*, int, int);
3177 int html_i(HANDLER_S
*, int, int);
3178 int html_em(HANDLER_S
*, int, int);
3179 int html_strong(HANDLER_S
*, int, int);
3180 int html_s(HANDLER_S
*, int, int);
3181 int html_big(HANDLER_S
*, int, int);
3182 int html_small(HANDLER_S
*, int, int);
3183 int html_font(HANDLER_S
*, int, int);
3184 int html_img(HANDLER_S
*, int, int);
3185 int html_map(HANDLER_S
*, int, int);
3186 int html_area(HANDLER_S
*, int, int);
3187 int html_form(HANDLER_S
*, int, int);
3188 int html_input(HANDLER_S
*, int, int);
3189 int html_option(HANDLER_S
*, int, int);
3190 int html_optgroup(HANDLER_S
*, int, int);
3191 int html_button(HANDLER_S
*, int, int);
3192 int html_select(HANDLER_S
*, int, int);
3193 int html_textarea(HANDLER_S
*, int, int);
3194 int html_label(HANDLER_S
*, int, int);
3195 int html_fieldset(HANDLER_S
*, int, int);
3196 int html_ul(HANDLER_S
*, int, int);
3197 int html_ol(HANDLER_S
*, int, int);
3198 int html_menu(HANDLER_S
*, int, int);
3199 int html_dir(HANDLER_S
*, int, int);
3200 int html_li(HANDLER_S
*, int, int);
3201 int html_h1(HANDLER_S
*, int, int);
3202 int html_h2(HANDLER_S
*, int, int);
3203 int html_h3(HANDLER_S
*, int, int);
3204 int html_h4(HANDLER_S
*, int, int);
3205 int html_h5(HANDLER_S
*, int, int);
3206 int html_h6(HANDLER_S
*, int, int);
3207 int html_blockquote(HANDLER_S
*, int, int);
3208 int html_address(HANDLER_S
*, int, int);
3209 int html_pre(HANDLER_S
*, int, int);
3210 int html_center(HANDLER_S
*, int, int);
3211 int html_div(HANDLER_S
*, int, int);
3212 int html_span(HANDLER_S
*, int, int);
3213 int html_dl(HANDLER_S
*, int, int);
3214 int html_dt(HANDLER_S
*, int, int);
3215 int html_dd(HANDLER_S
*, int, int);
3216 int html_script(HANDLER_S
*, int, int);
3217 int html_applet(HANDLER_S
*, int, int);
3218 int html_style(HANDLER_S
*, int, int);
3219 int html_kbd(HANDLER_S
*, int, int);
3220 int html_dfn(HANDLER_S
*, int, int);
3221 int html_var(HANDLER_S
*, int, int);
3222 int html_tt(HANDLER_S
*, int, int);
3223 int html_samp(HANDLER_S
*, int, int);
3224 int html_sub(HANDLER_S
*, int, int);
3225 int html_sup(HANDLER_S
*, int, int);
3226 int html_cite(HANDLER_S
*, int, int);
3227 int html_code(HANDLER_S
*, int, int);
3228 int html_ins(HANDLER_S
*, int, int);
3229 int html_del(HANDLER_S
*, int, int);
3230 int html_abbr(HANDLER_S
*, int, int);
3233 * Protos for RSS 2.0 Tag handlers
3235 int rss_rss(HANDLER_S
*, int, int);
3236 int rss_channel(HANDLER_S
*, int, int);
3237 int rss_title(HANDLER_S
*, int, int);
3238 int rss_image(HANDLER_S
*, int, int);
3239 int rss_link(HANDLER_S
*, int, int);
3240 int rss_description(HANDLER_S
*, int, int);
3241 int rss_ttl(HANDLER_S
*, int, int);
3242 int rss_item(HANDLER_S
*, int, int);
3245 * Proto's for support routines
3247 void html_pop(FILTER_S
*, ELPROP_S
*);
3248 int html_push(FILTER_S
*, ELPROP_S
*);
3249 int html_element_collector(FILTER_S
*, int);
3250 int html_element_flush(CLCTR_S
*);
3251 void html_element_comment(FILTER_S
*, char *);
3252 void html_element_output(FILTER_S
*, int);
3253 int html_entity_collector(FILTER_S
*, int, UCS
*, char **);
3254 void html_a_prefix(FILTER_S
*);
3255 void html_a_finish(HANDLER_S
*);
3256 void html_a_output_prefix(FILTER_S
*, int);
3257 void html_a_output_info(HANDLER_S
*);
3258 void html_a_relative(char *, char *, HANDLE_S
*);
3259 int html_href_relative(char *);
3260 int html_indent(FILTER_S
*, int, int);
3261 void html_blank(FILTER_S
*, int);
3262 void html_newline(FILTER_S
*);
3263 void html_output(FILTER_S
*, int);
3264 void html_output_string(FILTER_S
*, char *);
3265 void html_output_raw_tag(FILTER_S
*, char *);
3266 void html_output_normal(FILTER_S
*, int, int, int);
3267 void html_output_flush(FILTER_S
*);
3268 void html_output_centered(FILTER_S
*, int, int, int);
3269 void html_centered_handle(int *, char *, int);
3270 void html_centered_putc(WRAPLINE_S
*, int);
3271 void html_centered_flush(FILTER_S
*);
3272 void html_centered_flush_line(FILTER_S
*);
3273 void html_write_anchor(FILTER_S
*, int);
3274 void html_write_newline(FILTER_S
*);
3275 void html_write_indent(FILTER_S
*, int);
3276 void html_write(FILTER_S
*, char *, int);
3277 void html_putc(FILTER_S
*, int);
3278 int html_event_attribute(char *);
3279 char *rss_skip_whitespace(char *s
);
3280 ELPROP_S
*element_properties(FILTER_S
*, char *);
3284 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3285 * W3C doc "Additional named entities for HTML"
3287 static struct html_entities
{
3288 char *name
; /* entity name */
3289 UCS value
; /* UCS entity value */
3290 char *plain
; /* US-ASCII representation */
3292 {"quot", 0x0022}, /* 34 - quotation mark */
3293 {"amp", 0x0026}, /* 38 - ampersand */
3294 {"apos", 0x0027}, /* 39 - apostrophe */
3295 {"lt", 0x003C}, /* 60 - less-than sign */
3296 {"gt", 0x003E}, /* 62 - greater-than sign */
3297 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3298 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3299 {"cent", 0x00A2}, /* 162 - cent sign */
3300 {"pound", 0x00A3}, /* 163 - pound sign */
3301 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3302 {"yen", 0x00A5}, /* 165 - yen sign */
3303 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3304 {"sect", 0x00A7}, /* 167 - section sign */
3305 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3306 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3307 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3308 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3309 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3310 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3311 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3312 {"macr", 0x00AF}, /* 175 - macron */
3313 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3314 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3315 {"sup2", 0x00B2}, /* 178 - superscript two */
3316 {"sup3", 0x00B3}, /* 179 - superscript three */
3317 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3318 {"micro", 0x00B5}, /* 181 - micro sign */
3319 {"para", 0x00B6}, /* 182 - pilcrow sign */
3320 {"middot", 0x00B7}, /* 183 - middle dot */
3321 {"cedil", 0x00B8}, /* 184 - cedilla */
3322 {"sup1", 0x00B9}, /* 185 - superscript one */
3323 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3324 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3325 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3326 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3327 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3328 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3329 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3330 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3331 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3332 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3333 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3334 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3335 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3336 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3337 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3338 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3339 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3340 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3341 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3342 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3343 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3344 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3345 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3346 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3347 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3348 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3349 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3350 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3351 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3352 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3353 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3354 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3355 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3356 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3357 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3358 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3359 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3360 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3361 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3362 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3363 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3364 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3365 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3366 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3367 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3368 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3369 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3370 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3371 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3372 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3373 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3374 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3375 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3376 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3377 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3378 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3379 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3380 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3381 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3382 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3383 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3384 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3385 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3386 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3387 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3388 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3389 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3390 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3391 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3392 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3393 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3394 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3395 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3396 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3397 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3398 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3399 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3400 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3401 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3402 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3403 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3404 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3405 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3406 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3407 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3408 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3409 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3410 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3411 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3412 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3413 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3414 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3415 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3416 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3417 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3418 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3419 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3420 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3421 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3422 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3423 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3424 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3425 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3426 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3427 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3428 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3429 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3430 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3431 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3432 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3433 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3434 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3435 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3436 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3437 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3438 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3439 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3440 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3441 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3442 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3443 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3444 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3445 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3446 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3447 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3448 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3449 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3450 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3451 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3452 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3453 {"ensp", 0x2002}, /* 8194 - en space */
3454 {"emsp", 0x2003}, /* 8195 - em space */
3455 {"thinsp", 0x2009}, /* 8201 - thin space */
3456 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3457 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3458 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3459 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3460 {"ndash", 0x2013}, /* 8211 - en dash */
3461 {"mdash", 0x2014}, /* 8212 - em dash */
3462 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3463 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3464 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3465 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3466 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3467 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3468 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3469 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3470 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3471 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3472 {"dagger", 0x2020}, /* 8224 - dagger */
3473 {"Dagger", 0x2021}, /* 8225 - double dagger */
3474 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3475 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3476 {"permil", 0x2030}, /* 8240 - per mille sign */
3477 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3478 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3479 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3480 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3481 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3482 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3483 {"oline", 0x203E, "-"}, /* 8254 - overline */
3484 {"frasl", 0x2044}, /* 8260 - fraction slash */
3485 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3486 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3487 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3488 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3489 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3490 {"image", 0x2111}, /* 8465 - black-letter capital i */
3491 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3492 {"real", 0x211C}, /* 8476 - black-letter capital r */
3493 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3494 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3495 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3496 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3497 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3498 {"darr", 0x2193}, /* 8595 - downwards arrow */
3499 {"harr", 0x2194}, /* 8596 - left right arrow */
3500 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3501 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3502 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3503 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3504 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3505 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3506 {"forall", 0x2200}, /* 8704 - for all */
3507 {"part", 0x2202}, /* 8706 - partial differential */
3508 {"exist", 0x2203}, /* 8707 - there exists */
3509 {"empty", 0x2205}, /* 8709 - empty set */
3510 {"nabla", 0x2207}, /* 8711 - nabla */
3511 {"isin", 0x2208}, /* 8712 - element of */
3512 {"notin", 0x2209}, /* 8713 - not an element of */
3513 {"ni", 0x220B}, /* 8715 - contains as member */
3514 {"prod", 0x220F}, /* 8719 - n-ary product */
3515 {"sum", 0x2211}, /* 8721 - n-ary summation */
3516 {"minus", 0x2212}, /* 8722 - minus sign */
3517 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3518 {"radic", 0x221A}, /* 8730 - square root */
3519 {"prop", 0x221D}, /* 8733 - proportional to */
3520 {"infin", 0x221E}, /* 8734 - infinity */
3521 {"ang", 0x2220}, /* 8736 - angle */
3522 {"and", 0x2227}, /* 8743 - logical and */
3523 {"or", 0x2228}, /* 8744 - logical or */
3524 {"cap", 0x2229}, /* 8745 - intersection */
3525 {"cup", 0x222A}, /* 8746 - union */
3526 {"int", 0x222B}, /* 8747 - integral */
3527 {"there4", 0x2234}, /* 8756 - therefore */
3528 {"sim", 0x223C}, /* 8764 - tilde operator */
3529 {"cong", 0x2245}, /* 8773 - congruent to */
3530 {"asymp", 0x2248}, /* 8776 - almost equal to */
3531 {"ne", 0x2260}, /* 8800 - not equal to */
3532 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3533 {"le", 0x2264}, /* 8804 - less-than or equal to */
3534 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3535 {"sub", 0x2282}, /* 8834 - subset of */
3536 {"sup", 0x2283}, /* 8835 - superset of */
3537 {"nsub", 0x2284}, /* 8836 - not a subset of */
3538 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3539 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3540 {"oplus", 0x2295}, /* 8853 - circled plus */
3541 {"otimes", 0x2297}, /* 8855 - circled times */
3542 {"perp", 0x22A5}, /* 8869 - up tack */
3543 {"sdot", 0x22C5}, /* 8901 - dot operator */
3544 {"lceil", 0x2308}, /* 8968 - left ceiling */
3545 {"rceil", 0x2309}, /* 8969 - right ceiling */
3546 {"lfloor", 0x230A}, /* 8970 - left floor */
3547 {"rfloor", 0x230B}, /* 8971 - right floor */
3548 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3549 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3550 {"loz", 0x25CA}, /* 9674 - lozenge */
3551 {"spades", 0x2660}, /* 9824 - black spade suit */
3552 {"clubs", 0x2663}, /* 9827 - black club suit */
3553 {"hearts", 0x2665}, /* 9829 - black heart suit */
3554 {"diams", 0x2666} /* 9830 - black diamond suit */
3559 * Table of supported elements and corresponding handlers
3561 static ELPROP_S html_element_table
[] = {
3562 {"HTML", 4}, /* HTML ignore if seen? */
3563 {"HEAD", 4, html_head
}, /* slurp until <BODY> ? */
3564 {"TITLE", 5, html_title
}, /* Document Title */
3565 {"BASE", 4, html_base
}, /* HREF base */
3566 {"BODY", 4, html_body
}, /* HTML BODY */
3567 {"A", 1, html_a
}, /* Anchor */
3568 {"ABBR", 4, html_abbr
}, /* Abbreviation */
3569 {"IMG", 3, html_img
}, /* Image */
3570 {"MAP", 3, html_map
}, /* Image Map */
3571 {"AREA", 4, html_area
}, /* Image Map Area */
3572 {"HR", 2, html_hr
, 1, 1}, /* Horizontal Rule */
3573 {"BR", 2, html_br
, 0, 1}, /* Line Break */
3574 {"P", 1, html_p
, 1}, /* Paragraph */
3575 {"OL", 2, html_ol
, 1}, /* Ordered List */
3576 {"UL", 2, html_ul
, 1}, /* Unordered List */
3577 {"MENU", 4, html_menu
}, /* Menu List */
3578 {"DIR", 3, html_dir
}, /* Directory List */
3579 {"LI", 2, html_li
}, /* ... List Item */
3580 {"DL", 2, html_dl
, 1}, /* Definition List */
3581 {"DT", 2, html_dt
}, /* ... Def. Term */
3582 {"DD", 2, html_dd
}, /* ... Def. Definition */
3583 {"I", 1, html_i
}, /* Italic Text */
3584 {"EM", 2, html_em
}, /* Typographic Emphasis */
3585 {"STRONG", 6, html_strong
}, /* STRONG Typo Emphasis */
3586 {"VAR", 3, html_i
}, /* Variable Name */
3587 {"B", 1, html_b
}, /* Bold Text */
3588 {"U", 1, html_u
}, /* Underline Text */
3589 {"S", 1, html_s
}, /* Strike-Through Text */
3590 {"STRIKE", 6, html_s
}, /* Strike-Through Text */
3591 {"BIG", 3, html_big
}, /* Big Font Text */
3592 {"SMALL", 5, html_small
}, /* Small Font Text */
3593 {"FONT", 4, html_font
}, /* Font display directives */
3594 {"BLOCKQUOTE", 10, html_blockquote
, 1}, /* Blockquote */
3595 {"ADDRESS", 7, html_address
, 1}, /* Address */
3596 {"CENTER", 6, html_center
}, /* Centered Text v3.2 */
3597 {"DIV", 3, html_div
, 1}, /* Document Division 3.2 */
3598 {"SPAN", 4, html_span
}, /* Text Span */
3599 {"H1", 2, html_h1
, 1}, /* Headings... */
3600 {"H2", 2, html_h2
, 1},
3601 {"H3", 2, html_h3
,1},
3602 {"H4", 2, html_h4
, 1},
3603 {"H5", 2, html_h5
, 1},
3604 {"H6", 2, html_h6
, 1},
3605 {"PRE", 3, html_pre
, 1}, /* Preformatted Text */
3606 {"KBD", 3, html_kbd
}, /* Keyboard Input (NO OP) */
3607 {"DFN", 3, html_dfn
}, /* Definition (NO OP) */
3608 {"VAR", 3, html_var
}, /* Variable (NO OP) */
3609 {"TT", 2, html_tt
}, /* Typetype (NO OP) */
3610 {"SAMP", 4, html_samp
}, /* Sample Text (NO OP) */
3611 {"CITE", 4, html_cite
}, /* Citation (NO OP) */
3612 {"CODE", 4, html_code
}, /* Code Text (NO OP) */
3613 {"INS", 3, html_ins
}, /* Text Inseted (NO OP) */
3614 {"DEL", 3, html_del
}, /* Text Deleted (NO OP) */
3615 {"SUP", 3, html_sup
}, /* Text Superscript (NO OP) */
3616 {"SUB", 3, html_sub
}, /* Text Superscript (NO OP) */
3617 {"STYLE", 5, html_style
}, /* CSS Definitions */
3619 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3621 {"FORM", 4, html_form
, 1}, /* form within a document */
3622 {"INPUT", 5, html_input
}, /* One input field, options */
3623 {"BUTTON", 6, html_button
}, /* Push Button */
3624 {"OPTION", 6, html_option
}, /* One option within Select */
3625 {"OPTION", 6, html_optgroup
}, /* Option Group Definition */
3626 {"SELECT", 6, html_select
}, /* Selection from a set */
3627 {"TEXTAREA", 8, html_textarea
}, /* A multi-line input field */
3628 {"LABEL", 5, html_label
}, /* Control Label */
3629 {"FIELDSET", 8, html_fieldset
, 1}, /* Fieldset Control Group */
3631 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3632 {"SCRIPT", 6, html_script
}, /* Embedded scripting statements */
3633 {"APPLET", 6, NULL
}, /* Embedded applet statements */
3634 {"OBJECT", 6, NULL
}, /* Embedded object statements */
3635 {"LINK", 4, NULL
}, /* References to external data */
3636 {"PARAM", 5, NULL
}, /* Applet/Object parameters */
3638 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3640 {"TABLE", 5, html_table
, 1}, /* Table */
3641 {"CAPTION", 7, html_caption
}, /* Table Caption */
3642 {"TR", 2, html_tr
}, /* Table Table Row */
3643 {"TD", 2, html_td
}, /* Table Table Data */
3644 {"TH", 2, html_th
}, /* Table Table Head */
3645 {"THEAD", 5, html_thead
}, /* Table Table Head */
3646 {"TBODY", 5, html_tbody
}, /* Table Table Body */
3647 {"TFOOT", 5, html_tfoot
}, /* Table Table Foot */
3648 {"COL", 3, html_col
}, /* Table Column Attibutes */
3649 {"COLGROUP", 8, html_colgroup
}, /* Table Column Group Attibutes */
3656 * Table of supported RSS 2.0 elements
3658 static ELPROP_S rss_element_table
[] = {
3659 {"RSS", 3, rss_rss
}, /* RSS 2.0 version */
3660 {"CHANNEL", 7, rss_channel
}, /* RSS 2.0 Channel */
3661 {"TITLE", 5, rss_title
}, /* RSS 2.0 Title */
3662 {"IMAGE", 5, rss_image
}, /* RSS 2.0 Channel Image */
3663 {"LINK", 4, rss_link
}, /* RSS 2.0 Channel/Item Link */
3664 {"DESCRIPTION", 11, rss_description
}, /* RSS 2.0 Channel/Item Description */
3665 {"ITEM", 4, rss_item
}, /* RSS 2.0 Channel ITEM */
3666 {"TTL", 3, rss_ttl
}, /* RSS 2.0 Item TTL */
3672 * Initialize the given handler, and add it to the stack if it
3675 * Returns: 1 if handler chose to get pushed on stack
3676 * 0 if handler declined
3679 html_push(FILTER_S
*fd
, ELPROP_S
*ep
)
3683 new = (HANDLER_S
*)fs_get(sizeof(HANDLER_S
));
3684 memset(new, 0, sizeof(HANDLER_S
));
3685 new->html_data
= fd
;
3687 if((*ep
->handler
)(new, 0, GF_RESET
)){ /* stack the handler? */
3688 new->below
= HANDLERS(fd
);
3689 HANDLERS(fd
) = new; /* push */
3693 fs_give((void **) &new);
3699 * Remove the most recently installed the given handler
3700 * after letting it accept its demise.
3703 html_pop(FILTER_S
*fd
, ELPROP_S
*ep
)
3707 for(tp
= HANDLERS(fd
); tp
&& ep
!= EL(tp
); tp
= tp
->below
){
3710 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep
->element
, EL(tp
)->element
));
3711 /* if no evidence of opening tag, ignore given closing tag */
3712 for(tp2
= HANDLERS(fd
); tp2
&& ep
!= EL(tp2
); tp2
= tp2
->below
)
3716 dprint((3, "-- html error: no opening tag for given tag /%s", ep
->element
));
3720 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
);
3721 HANDLERS(fd
) = tp
->below
;
3725 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
); /* may adjust handler list */
3726 if(tp
!= HANDLERS(fd
)){
3729 for(p
= HANDLERS(fd
); p
->below
!= tp
; p
= p
->below
)
3733 p
->below
= tp
->below
; /* remove from middle of stack */
3734 /* BUG: else programming botch and we should die */
3737 HANDLERS(fd
) = tp
->below
; /* pop */
3739 fs_give((void **)&tp
);
3742 /* BUG: should MAKE SURE NOT TO EMIT IT */
3743 dprint((3, "-- html error: end tag without a start: %s", ep
->element
));
3749 * Deal with data passed a hander in its GF_DATA state
3752 html_handoff(HANDLER_S
*hd
, int ch
)
3755 (void) (*EL(hd
->below
)->handler
)(hd
->below
, ch
, GF_DATA
);
3757 html_output(hd
->html_data
, ch
);
3762 * HTML <BR> element handler
3765 html_br(HANDLER_S
*hd
, int ch
, int cmd
)
3767 if(cmd
== GF_RESET
){
3768 if(PASS_HTML(hd
->html_data
)){
3769 html_output_raw_tag(hd
->html_data
, "br");
3772 html_output(hd
->html_data
, HTML_NEWLINE
);
3776 return(0); /* don't get linked */
3781 * HTML <HR> (Horizontal Rule) element handler
3784 html_hr(HANDLER_S
*hd
, int ch
, int cmd
)
3786 if(cmd
== GF_RESET
){
3787 if(PASS_HTML(hd
->html_data
)){
3788 html_output_raw_tag(hd
->html_data
, "hr");
3791 int i
, old_wrap
, width
, align
;
3794 width
= WRAP_COLS(hd
->html_data
);
3796 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3800 if(!strucmp(p
->attribute
, "ALIGN")){
3801 if(!strucmp(p
->value
, "LEFT"))
3803 else if(!strucmp(p
->value
, "RIGHT"))
3806 else if(!strucmp(p
->attribute
, "WIDTH")){
3810 for(cp
= p
->value
; *cp
; cp
++)
3812 width
= (WRAP_COLS(hd
->html_data
)*MIN(100,width
))/100;
3815 else if(isdigit((unsigned char) *cp
))
3816 width
= (width
* 10) + (*cp
- '0');
3818 width
= MIN(width
, WRAP_COLS(hd
->html_data
));
3822 html_blank(hd
->html_data
, 1); /* at least one blank line */
3824 old_wrap
= HD(hd
->html_data
)->wrapstate
;
3825 HD(hd
->html_data
)->wrapstate
= 0;
3826 if((i
= MAX(0, WRAP_COLS(hd
->html_data
) - width
))
3827 && ((align
== 0) ? i
/= 2 : (align
== 2)))
3829 html_output(hd
->html_data
, ' ');
3831 for(i
= 0; i
< width
; i
++)
3832 html_output(hd
->html_data
, '_');
3834 html_blank(hd
->html_data
, 1);
3835 HD(hd
->html_data
)->wrapstate
= old_wrap
;
3839 return(0); /* don't get linked */
3844 * HTML <P> (paragraph) element handler
3847 html_p(HANDLER_S
*hd
, int ch
, int cmd
)
3850 html_handoff(hd
, ch
);
3852 else if(cmd
== GF_RESET
){
3853 if(PASS_HTML(hd
->html_data
)){
3854 html_output_raw_tag(hd
->html_data
, "p");
3857 /* Make sure there's at least 1 blank line */
3858 html_blank(hd
->html_data
, 1);
3860 /* adjust indent level if needed */
3861 if(HD(hd
->html_data
)->li_pending
){
3862 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
3863 HD(hd
->html_data
)->li_pending
= 0;
3867 else if(cmd
== GF_EOD
){
3868 if(PASS_HTML(hd
->html_data
)){
3869 html_output_string(hd
->html_data
, "</p>");
3872 /* Make sure there's at least 1 blank line */
3873 html_blank(hd
->html_data
, 1);
3877 return(1); /* GET linked */
3882 * HTML Table <TABLE> (paragraph) table row
3885 html_table(HANDLER_S
*hd
, int ch
, int cmd
)
3888 if(PASS_HTML(hd
->html_data
)){
3889 html_handoff(hd
, ch
);
3892 else if(cmd
== GF_RESET
){
3893 if(PASS_HTML(hd
->html_data
)){
3894 html_output_raw_tag(hd
->html_data
, "table");
3897 /* Make sure there's at least 1 blank line */
3898 html_blank(hd
->html_data
, 0);
3900 else if(cmd
== GF_EOD
){
3901 if(PASS_HTML(hd
->html_data
)){
3902 html_output_string(hd
->html_data
, "</table>");
3905 /* Make sure there's at least 1 blank line */
3906 html_blank(hd
->html_data
, 0);
3908 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3913 * HTML <CAPTION> (Table Caption) element handler
3916 html_caption(HANDLER_S
*hd
, int ch
, int cmd
)
3919 html_handoff(hd
, ch
);
3921 else if(cmd
== GF_RESET
){
3922 if(PASS_HTML(hd
->html_data
)){
3923 html_output_raw_tag(hd
->html_data
, "caption");
3926 /* turn ON the centered bit */
3927 CENTER_BIT(hd
->html_data
) = 1;
3930 else if(cmd
== GF_EOD
){
3931 if(PASS_HTML(hd
->html_data
)){
3932 html_output_string(hd
->html_data
, "</caption>");
3935 /* turn OFF the centered bit */
3936 CENTER_BIT(hd
->html_data
) = 0;
3945 * HTML Table <TR> (paragraph) table row
3948 html_tr(HANDLER_S
*hd
, int ch
, int cmd
)
3951 if(PASS_HTML(hd
->html_data
)){
3952 html_handoff(hd
, ch
);
3955 else if(cmd
== GF_RESET
){
3956 if(PASS_HTML(hd
->html_data
)){
3957 html_output_raw_tag(hd
->html_data
, "tr");
3960 /* Make sure there's at least 1 blank line */
3961 html_blank(hd
->html_data
, 0);
3963 else if(cmd
== GF_EOD
){
3964 if(PASS_HTML(hd
->html_data
)){
3965 html_output_string(hd
->html_data
, "</tr>");
3968 /* Make sure there's at least 1 blank line */
3969 html_blank(hd
->html_data
, 0);
3971 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3976 * HTML Table <TD> (paragraph) table data
3979 html_td(HANDLER_S
*hd
, int ch
, int cmd
)
3982 if(PASS_HTML(hd
->html_data
)){
3983 html_handoff(hd
, ch
);
3986 else if(cmd
== GF_RESET
){
3987 if(PASS_HTML(hd
->html_data
)){
3988 html_output_raw_tag(hd
->html_data
, "td");
3993 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3996 if(!strucmp(p
->attribute
, "nowrap")
3997 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
3998 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
4003 else if(cmd
== GF_EOD
){
4004 if(PASS_HTML(hd
->html_data
)){
4005 html_output_string(hd
->html_data
, "</td>");
4009 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4014 * HTML Table <TH> (paragraph) table head
4017 html_th(HANDLER_S
*hd
, int ch
, int cmd
)
4020 if(PASS_HTML(hd
->html_data
)){
4021 html_handoff(hd
, ch
);
4024 else if(cmd
== GF_RESET
){
4025 if(PASS_HTML(hd
->html_data
)){
4026 html_output_raw_tag(hd
->html_data
, "th");
4031 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4034 if(!strucmp(p
->attribute
, "nowrap")
4035 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
4036 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
4041 else if(cmd
== GF_EOD
){
4042 if(PASS_HTML(hd
->html_data
)){
4043 html_output_string(hd
->html_data
, "</th>");
4047 return(PASS_HTML(hd
->html_data
)); /* don't get linked */
4052 * HTML Table <THEAD> table head
4055 html_thead(HANDLER_S
*hd
, int ch
, int cmd
)
4057 if(PASS_HTML(hd
->html_data
)){
4059 html_handoff(hd
, ch
);
4061 else if(cmd
== GF_RESET
){
4062 html_output_raw_tag(hd
->html_data
, "thead");
4064 else if(cmd
== GF_EOD
){
4065 html_output_string(hd
->html_data
, "</thead>");
4068 return(1); /* GET linked */
4071 return(0); /* don't get linked */
4076 * HTML Table <TBODY> table body
4079 html_tbody(HANDLER_S
*hd
, int ch
, int cmd
)
4081 if(PASS_HTML(hd
->html_data
)){
4083 html_handoff(hd
, ch
);
4085 else if(cmd
== GF_RESET
){
4086 html_output_raw_tag(hd
->html_data
, "tbody");
4088 else if(cmd
== GF_EOD
){
4089 html_output_string(hd
->html_data
, "</tbody>");
4092 return(1); /* GET linked */
4095 return(0); /* don't get linked */
4100 * HTML Table <TFOOT> table body
4103 html_tfoot(HANDLER_S
*hd
, int ch
, int cmd
)
4105 if(PASS_HTML(hd
->html_data
)){
4107 html_handoff(hd
, ch
);
4109 else if(cmd
== GF_RESET
){
4110 html_output_raw_tag(hd
->html_data
, "tfoot");
4112 else if(cmd
== GF_EOD
){
4113 html_output_string(hd
->html_data
, "</tfoot>");
4116 return(1); /* GET linked */
4119 return(0); /* don't get linked */
4124 * HTML <COL> (Table Column Attributes) element handler
4127 html_col(HANDLER_S
*hd
, int ch
, int cmd
)
4129 if(cmd
== GF_RESET
){
4130 if(PASS_HTML(hd
->html_data
)){
4131 html_output_raw_tag(hd
->html_data
, "col");
4135 return(0); /* don't get linked */
4140 * HTML Table <COLGROUP> table body
4143 html_colgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4145 if(PASS_HTML(hd
->html_data
)){
4147 html_handoff(hd
, ch
);
4149 else if(cmd
== GF_RESET
){
4150 html_output_raw_tag(hd
->html_data
, "colgroup");
4152 else if(cmd
== GF_EOD
){
4153 html_output_string(hd
->html_data
, "</colgroup>");
4156 return(1); /* GET linked */
4159 return(0); /* don't get linked */
4164 * HTML <I> (italic text) element handler
4167 html_i(HANDLER_S
*hd
, int ch
, int cmd
)
4170 /* include LITERAL in spaceness test! */
4171 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4172 HTML_ITALIC(hd
->html_data
, 1);
4176 html_handoff(hd
, ch
);
4178 else if(cmd
== GF_RESET
){
4181 else if(cmd
== GF_EOD
){
4183 HTML_ITALIC(hd
->html_data
, 0);
4186 return(1); /* get linked */
4191 * HTML <EM> element handler
4194 html_em(HANDLER_S
*hd
, int ch
, int cmd
)
4197 if(!PASS_HTML(hd
->html_data
)){
4198 /* include LITERAL in spaceness test! */
4199 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4200 HTML_ITALIC(hd
->html_data
, 1);
4205 html_handoff(hd
, ch
);
4207 else if(cmd
== GF_RESET
){
4208 if(PASS_HTML(hd
->html_data
)){
4209 html_output_raw_tag(hd
->html_data
, "em");
4215 else if(cmd
== GF_EOD
){
4216 if(PASS_HTML(hd
->html_data
)){
4217 html_output_string(hd
->html_data
, "</em>");
4221 HTML_ITALIC(hd
->html_data
, 0);
4225 return(1); /* get linked */
4230 * HTML <STRONG> element handler
4233 html_strong(HANDLER_S
*hd
, int ch
, int cmd
)
4236 if(!PASS_HTML(hd
->html_data
)){
4237 /* include LITERAL in spaceness test! */
4238 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4239 HTML_ITALIC(hd
->html_data
, 1);
4244 html_handoff(hd
, ch
);
4246 else if(cmd
== GF_RESET
){
4247 if(PASS_HTML(hd
->html_data
)){
4248 html_output_raw_tag(hd
->html_data
, "strong");
4254 else if(cmd
== GF_EOD
){
4255 if(PASS_HTML(hd
->html_data
)){
4256 html_output_string(hd
->html_data
, "</strong>");
4260 HTML_ITALIC(hd
->html_data
, 0);
4264 return(1); /* get linked */
4269 * HTML <u> (Underline text) element handler
4272 html_u(HANDLER_S
*hd
, int ch
, int cmd
)
4274 if(PASS_HTML(hd
->html_data
)){
4276 html_handoff(hd
, ch
);
4278 else if(cmd
== GF_RESET
){
4279 html_output_raw_tag(hd
->html_data
, "u");
4281 else if(cmd
== GF_EOD
){
4282 html_output_string(hd
->html_data
, "</u>");
4285 return(1); /* get linked */
4288 return(0); /* do NOT get linked */
4293 * HTML <b> (Bold text) element handler
4296 html_b(HANDLER_S
*hd
, int ch
, int cmd
)
4299 if(!PASS_HTML(hd
->html_data
)){
4300 /* include LITERAL in spaceness test! */
4301 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4302 HTML_BOLD(hd
->html_data
, 1);
4307 html_handoff(hd
, ch
);
4309 else if(cmd
== GF_RESET
){
4310 if(PASS_HTML(hd
->html_data
)){
4311 html_output_raw_tag(hd
->html_data
, "b");
4317 else if(cmd
== GF_EOD
){
4318 if(PASS_HTML(hd
->html_data
)){
4319 html_output_string(hd
->html_data
, "</b>");
4323 HTML_BOLD(hd
->html_data
, 0);
4327 return(1); /* get linked */
4332 * HTML <s> (strike-through text) element handler
4335 html_s(HANDLER_S
*hd
, int ch
, int cmd
)
4338 if(!PASS_HTML(hd
->html_data
)){
4339 /* include LITERAL in spaceness test! */
4340 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4341 HTML_STRIKE(hd
->html_data
, 1);
4346 html_handoff(hd
, ch
);
4348 else if(cmd
== GF_RESET
){
4349 if(PASS_HTML(hd
->html_data
)){
4350 html_output_raw_tag(hd
->html_data
, "s");
4356 else if(cmd
== GF_EOD
){
4357 if(PASS_HTML(hd
->html_data
)){
4358 html_output_string(hd
->html_data
, "</s>");
4362 HTML_STRIKE(hd
->html_data
, 0);
4366 return(1); /* get linked */
4371 * HTML <big> (BIG text) element handler
4374 html_big(HANDLER_S
*hd
, int ch
, int cmd
)
4377 /* include LITERAL in spaceness test! */
4378 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4379 HTML_BIG(hd
->html_data
, 1);
4383 html_handoff(hd
, ch
);
4385 else if(cmd
== GF_RESET
){
4388 else if(cmd
== GF_EOD
){
4390 HTML_BIG(hd
->html_data
, 0);
4393 return(1); /* get linked */
4398 * HTML <small> (SMALL text) element handler
4401 html_small(HANDLER_S
*hd
, int ch
, int cmd
)
4404 /* include LITERAL in spaceness test! */
4405 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4406 HTML_SMALL(hd
->html_data
, 1);
4410 html_handoff(hd
, ch
);
4412 else if(cmd
== GF_RESET
){
4415 else if(cmd
== GF_EOD
){
4417 HTML_SMALL(hd
->html_data
, 0);
4420 return(1); /* get linked */
4425 * HTML <FONT> element handler
4428 html_font(HANDLER_S
*hd
, int ch
, int cmd
)
4430 if(PASS_HTML(hd
->html_data
)){
4432 html_handoff(hd
, ch
);
4434 else if(cmd
== GF_RESET
){
4435 html_output_raw_tag(hd
->html_data
, "font");
4437 else if(cmd
== GF_EOD
){
4438 html_output_string(hd
->html_data
, "</font>");
4441 return(1); /* get linked */
4449 * HTML <IMG> element handler
4452 html_img(HANDLER_S
*hd
, int ch
, int cmd
)
4455 char *alt
= NULL
, *src
= NULL
, *s
;
4457 if(cmd
== GF_RESET
){
4458 if(PASS_HTML(hd
->html_data
)){
4459 html_output_raw_tag(hd
->html_data
, "img");
4462 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4465 if(p
->value
&& p
->value
[0]){
4466 if(!strucmp(p
->attribute
, "alt"))
4468 if(!strucmp(p
->attribute
, "src"))
4473 * Multipart/Related Content ID pointer
4474 * ONLY attached messages are recognized
4475 * if we ever decide web bugs aren't a problem
4476 * anymore then we might expand the scope
4479 && DO_HANDLES(hd
->html_data
)
4480 && RELATED_OK(hd
->html_data
)
4481 && struncmp(src
, "cid:", 4) == 0){
4484 HANDLE_S
*h
= new_handle(HANDLESP(hd
->html_data
));
4487 h
->h
.img
.src
= cpystr(src
+ 4);
4488 h
->h
.img
.alt
= cpystr((alt
) ? alt
: "Attached Image");
4490 HTML_TEXT(hd
->html_data
, TAG_EMBED
);
4491 HTML_TEXT(hd
->html_data
, TAG_HANDLE
);
4493 sprintf(buf
, "%d", h
->key
);
4495 HTML_TEXT(hd
->html_data
, n
);
4496 for(i
= 0; i
< n
; i
++){
4497 unsigned int uic
= buf
[i
];
4498 HTML_TEXT(hd
->html_data
, uic
);
4503 else if(alt
&& strlen(alt
) < 256){ /* arbitrary "reasonable" limit */
4504 HTML_DUMP_LIT(hd
->html_data
, alt
, strlen(alt
));
4505 HTML_TEXT(hd
->html_data
, ' ');
4509 && (s
= strrindex(src
, '/'))
4511 HTML_TEXT(hd
->html_data
, '[');
4512 HTML_DUMP_LIT(hd
->html_data
, s
, strlen(s
));
4513 HTML_TEXT(hd
->html_data
, ']');
4514 HTML_TEXT(hd
->html_data
, ' ');
4518 /* text filler of last resort */
4519 HTML_DUMP_LIT(hd
->html_data
, "[IMAGE] ", 7);
4523 return(0); /* don't get linked */
4528 * HTML <MAP> (Image Map) element handler
4531 html_map(HANDLER_S
*hd
, int ch
, int cmd
)
4533 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4535 html_handoff(hd
, ch
);
4537 else if(cmd
== GF_RESET
){
4538 html_output_raw_tag(hd
->html_data
, "map");
4540 else if(cmd
== GF_EOD
){
4541 html_output_string(hd
->html_data
, "</map>");
4552 * HTML <AREA> (Image Map Area) element handler
4555 html_area(HANDLER_S
*hd
, int ch
, int cmd
)
4557 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4559 html_handoff(hd
, ch
);
4561 else if(cmd
== GF_RESET
){
4562 html_output_raw_tag(hd
->html_data
, "area");
4564 else if(cmd
== GF_EOD
){
4565 html_output_string(hd
->html_data
, "</area>");
4576 * HTML <FORM> (Form) element handler
4579 html_form(HANDLER_S
*hd
, int ch
, int cmd
)
4581 if(PASS_HTML(hd
->html_data
)){
4583 html_handoff(hd
, ch
);
4585 else if(cmd
== GF_RESET
){
4588 /* SECURITY: make sure to redirect to new browser instance */
4589 for(pp
= &(HD(hd
->html_data
)->el_data
->attribs
);
4590 *pp
&& (*pp
)->attribute
;
4592 if(!strucmp((*pp
)->attribute
, "target")){
4594 fs_give((void **) &(*pp
)->value
);
4596 (*pp
)->value
= cpystr("_blank");
4600 *pp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4601 memset(*pp
, 0, sizeof(PARAMETER
));
4602 (*pp
)->attribute
= cpystr("target");
4603 (*pp
)->value
= cpystr("_blank");
4606 html_output_raw_tag(hd
->html_data
, "form");
4608 else if(cmd
== GF_EOD
){
4609 html_output_string(hd
->html_data
, "</form>");
4613 if(cmd
== GF_RESET
){
4614 html_blank(hd
->html_data
, 0);
4615 HTML_DUMP_LIT(hd
->html_data
, "[FORM]", 6);
4616 html_blank(hd
->html_data
, 0);
4620 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4625 * HTML <INPUT> (Form) element handler
4628 html_input(HANDLER_S
*hd
, int ch
, int cmd
)
4630 if(PASS_HTML(hd
->html_data
)){
4631 if(cmd
== GF_RESET
){
4632 html_output_raw_tag(hd
->html_data
, "input");
4636 return(0); /* don't get linked */
4641 * HTML <BUTTON> (Form) element handler
4644 html_button(HANDLER_S
*hd
, int ch
, int cmd
)
4646 if(PASS_HTML(hd
->html_data
)){
4648 html_handoff(hd
, ch
);
4650 else if(cmd
== GF_RESET
){
4651 html_output_raw_tag(hd
->html_data
, "button");
4653 else if(cmd
== GF_EOD
){
4654 html_output_string(hd
->html_data
, "</button>");
4657 return(1); /* get linked */
4665 * HTML <OPTION> (Form) element handler
4668 html_option(HANDLER_S
*hd
, int ch
, int cmd
)
4670 if(PASS_HTML(hd
->html_data
)){
4672 html_handoff(hd
, ch
);
4674 else if(cmd
== GF_RESET
){
4675 html_output_raw_tag(hd
->html_data
, "option");
4677 else if(cmd
== GF_EOD
){
4678 html_output_string(hd
->html_data
, "</option>");
4681 return(1); /* get linked */
4689 * HTML <OPTGROUP> (Form) element handler
4692 html_optgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4694 if(PASS_HTML(hd
->html_data
)){
4696 html_handoff(hd
, ch
);
4698 else if(cmd
== GF_RESET
){
4699 html_output_raw_tag(hd
->html_data
, "optgroup");
4701 else if(cmd
== GF_EOD
){
4702 html_output_string(hd
->html_data
, "</optgroup>");
4705 return(1); /* get linked */
4713 * HTML <SELECT> (Form) element handler
4716 html_select(HANDLER_S
*hd
, int ch
, int cmd
)
4718 if(PASS_HTML(hd
->html_data
)){
4720 html_handoff(hd
, ch
);
4722 else if(cmd
== GF_RESET
){
4723 html_output_raw_tag(hd
->html_data
, "select");
4725 else if(cmd
== GF_EOD
){
4726 html_output_string(hd
->html_data
, "</select>");
4729 return(1); /* get linked */
4737 * HTML <TEXTAREA> (Form) element handler
4740 html_textarea(HANDLER_S
*hd
, int ch
, int cmd
)
4742 if(PASS_HTML(hd
->html_data
)){
4744 html_handoff(hd
, ch
);
4746 else if(cmd
== GF_RESET
){
4747 html_output_raw_tag(hd
->html_data
, "textarea");
4749 else if(cmd
== GF_EOD
){
4750 html_output_string(hd
->html_data
, "</textarea>");
4753 return(1); /* get linked */
4761 * HTML <LABEL> (Form) element handler
4764 html_label(HANDLER_S
*hd
, int ch
, int cmd
)
4766 if(PASS_HTML(hd
->html_data
)){
4768 html_handoff(hd
, ch
);
4770 else if(cmd
== GF_RESET
){
4771 html_output_raw_tag(hd
->html_data
, "label");
4773 else if(cmd
== GF_EOD
){
4774 html_output_string(hd
->html_data
, "</label>");
4777 return(1); /* get linked */
4785 * HTML <FIELDSET> (Form) element handler
4788 html_fieldset(HANDLER_S
*hd
, int ch
, int cmd
)
4790 if(PASS_HTML(hd
->html_data
)){
4792 html_handoff(hd
, ch
);
4794 else if(cmd
== GF_RESET
){
4795 html_output_raw_tag(hd
->html_data
, "fieldset");
4797 else if(cmd
== GF_EOD
){
4798 html_output_string(hd
->html_data
, "</fieldset>");
4801 return(1); /* get linked */
4809 * HTML <HEAD> element handler
4812 html_head(HANDLER_S
*hd
, int ch
, int cmd
)
4815 html_handoff(hd
, ch
);
4817 else if(cmd
== GF_RESET
){
4818 HD(hd
->html_data
)->head
= 1;
4820 else if(cmd
== GF_EOD
){
4821 HD(hd
->html_data
)->head
= 0;
4824 return(1); /* get linked */
4829 * HTML <BASE> element handler
4832 html_base(HANDLER_S
*hd
, int ch
, int cmd
)
4834 if(cmd
== GF_RESET
){
4835 if(HD(hd
->html_data
)->head
&& !HTML_BASE(hd
->html_data
)){
4838 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4839 p
&& p
->attribute
&& strucmp(p
->attribute
, "HREF");
4843 if(p
&& p
->value
&& !((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
)
4844 ((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
= cpystr(p
->value
);
4848 return(0); /* DON'T get linked */
4853 * HTML <TITLE> element handler
4856 html_title(HANDLER_S
*hd
, int ch
, int cmd
)
4859 if(hd
->x
+ 1 >= hd
->y
){
4861 fs_resize((void **)&hd
->s
, (size_t)hd
->y
* sizeof(unsigned char));
4864 hd
->s
[hd
->x
++] = (unsigned char) ch
;
4866 else if(cmd
== GF_RESET
){
4869 hd
->s
= (unsigned char *)fs_get((size_t)hd
->y
* sizeof(unsigned char));
4871 else if(cmd
== GF_EOD
){
4872 /* Down the road we probably want to give these bytes to
4875 hd
->s
[hd
->x
] = '\0';
4876 fs_give((void **)&hd
->s
);
4879 return(1); /* get linked */
4884 * HTML <BODY> element handler
4887 html_body(HANDLER_S
*hd
, int ch
, int cmd
)
4890 html_handoff(hd
, ch
);
4892 else if(cmd
== GF_RESET
){
4893 if(PASS_HTML(hd
->html_data
)){
4895 char **style
= NULL
, *text
= NULL
, *bgcolor
= NULL
, *pcs
;
4897 /* modify any attributes in a useful way? */
4898 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4902 if(!strucmp(p
->attribute
, "style"))
4904 else if(!strucmp(p
->attribute
, "text"))
4907 * bgcolor NOT passed since user setting takes precedence
4909 else if(!strucmp(p->attribute, "bgcolor"))
4914 /* colors pretty much it */
4915 if(text
|| bgcolor
){
4917 tp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4918 memset(tp
, 0, sizeof(PARAMETER
));
4919 tp
->next
= HD(hd
->html_data
)->el_data
->attribs
;
4920 HD(hd
->html_data
)->el_data
->attribs
= tp
;
4921 tp
->attribute
= cpystr("style");
4923 tmp_20k_buf
[0] = '\0';
4928 snprintf(tmp_20k_buf
, SIZEOF_20KBUF
, "%s", *style
);
4929 fs_give((void **) style
);
4930 pcs
= "; %s%s%s%s%s";
4933 snprintf(tmp_20k_buf
+ strlen(tmp_20k_buf
),
4934 SIZEOF_20KBUF
- strlen(tmp_20k_buf
),
4936 (text
) ? "color: " : "", (text
) ? text
: "",
4937 (text
&& bgcolor
) ? ";" : "",
4938 (bgcolor
) ? "background-color: " : "", (bgcolor
) ? bgcolor
: "");
4939 *style
= cpystr(tmp_20k_buf
);
4942 html_output_raw_tag(hd
->html_data
, "div");
4945 HD(hd
->html_data
)->body
= 1;
4947 else if(cmd
== GF_EOD
){
4948 if(PASS_HTML(hd
->html_data
)){
4949 html_output_string(hd
->html_data
, "</div>");
4952 HD(hd
->html_data
)->body
= 0;
4955 return(1); /* get linked */
4960 * HTML <A> (Anchor) element handler
4963 html_a(HANDLER_S
*hd
, int ch
, int cmd
)
4966 html_handoff(hd
, ch
);
4968 if(hd
->dp
) /* remember text within anchor tags */
4969 so_writec(ch
, (STORE_S
*) hd
->dp
);
4971 else if(cmd
== GF_RESET
){
4975 PARAMETER
*p
, *href
= NULL
, *name
= NULL
;
4978 * Pending Anchor!?!?
4979 * space insertion/line breaking that's yet to get done...
4981 if(HD(hd
->html_data
)->prefix
){
4982 dprint((2, "-- html error: nested or unterminated anchor\n"));
4987 * Look for valid Anchor data vis the filter installer's parms
4988 * (e.g., Only allow references to our internal URLs if asked)
4990 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4993 if(!strucmp(p
->attribute
, "HREF")
4995 && (HANDLES_LOC(hd
->html_data
)
4996 || struncmp(p
->value
, "x-alpine-", 9)
4997 || struncmp(p
->value
, "x-pine-help", 11)
4998 || p
->value
[0] == '#'))
5000 else if(!strucmp(p
->attribute
, "NAME"))
5003 if(DO_HANDLES(hd
->html_data
) && (href
|| name
)){
5004 h
= new_handle(HANDLESP(hd
->html_data
));
5007 * Enhancement: we might want to get fancier and parse the
5008 * href a bit further such that we can launch images using
5009 * our image viewer, or browse local files or directories
5010 * with our internal tools. Of course, having the jump-off
5011 * point into text/html always be the defined "web-browser",
5012 * just might be the least confusing UI-wise...
5016 if(name
&& name
->value
)
5017 h
->h
.url
.name
= cpystr(name
->value
);
5020 * Prepare to build embedded prefix...
5022 HD(hd
->html_data
)->prefix
= (int *) fs_get(64 * sizeof(int));
5026 * Is this something that looks like a URL? If not and
5027 * we were giving some "base" string, proceed ala RFC1808...
5030 if(HTML_BASE(hd
->html_data
) && !rfc1738_scan(href
->value
, &n
)){
5031 html_a_relative(HTML_BASE(hd
->html_data
), href
->value
, h
);
5033 else if(!(NO_RELATIVE(hd
->html_data
) && html_href_relative(href
->value
)))
5034 h
->h
.url
.path
= cpystr(href
->value
);
5036 if(pico_usingcolor()){
5037 char *fg
= NULL
, *bg
= NULL
, *q
;
5039 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5040 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5041 ps_global
->VAR_NORM_FORE_COLOR
))
5042 fg
= ps_global
->VAR_SLCTBL_FORE_COLOR
;
5044 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5045 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5046 ps_global
->VAR_NORM_BACK_COLOR
))
5047 bg
= ps_global
->VAR_SLCTBL_BACK_COLOR
;
5053 * The blacks are just known good colors for testing
5054 * whether the other color is good.
5056 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5057 bg
? bg
: colorx(COL_BLACK
));
5058 if(pico_is_good_colorpair(tmp
)){
5059 q
= color_embed(fg
, bg
);
5061 for(i
= 0; q
[i
]; i
++)
5062 HD(hd
->html_data
)->prefix
[x
++] = q
[i
];
5066 free_color_pair(&tmp
);
5069 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5070 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5073 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5076 HD(hd
->html_data
)->prefix
[x
++] = TAG_EMBED
;
5077 HD(hd
->html_data
)->prefix
[x
++] = TAG_HANDLE
;
5079 snprintf(buf
, sizeof(buf
), "%ld", hd
->x
= h
->key
);
5080 HD(hd
->html_data
)->prefix
[x
++] = n
= strlen(buf
);
5081 for(i
= 0; i
< n
; i
++)
5082 HD(hd
->html_data
)->prefix
[x
++] = buf
[i
];
5084 HD(hd
->html_data
)->prefix_used
= x
;
5086 hd
->dp
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
5089 else if(cmd
== GF_EOD
){
5093 return(1); /* get linked */
5098 html_a_prefix(FILTER_S
*f
)
5102 /* Do this so we don't visit from html_output... */
5103 prefix
= HD(f
)->prefix
;
5104 HD(f
)->prefix
= NULL
;
5106 for(n
= 0; n
< HD(f
)->prefix_used
; n
++)
5107 html_a_output_prefix(f
, prefix
[n
]);
5109 fs_give((void **) &prefix
);
5114 * html_a_finish - house keeping associated with end of link tag
5117 html_a_finish(HANDLER_S
*hd
)
5119 if(DO_HANDLES(hd
->html_data
)){
5120 if(HD(hd
->html_data
)->prefix
){
5121 if(!PASS_HTML(hd
->html_data
)){
5122 char *empty_link
= "[LINK]";
5125 html_a_prefix(hd
->html_data
);
5126 for(i
= 0; empty_link
[i
]; i
++)
5127 html_output(hd
->html_data
, empty_link
[i
]);
5131 if(pico_usingcolor()){
5132 char *fg
= NULL
, *bg
= NULL
, *p
;
5135 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5136 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5137 ps_global
->VAR_NORM_FORE_COLOR
))
5138 fg
= ps_global
->VAR_NORM_FORE_COLOR
;
5140 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5141 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5142 ps_global
->VAR_NORM_BACK_COLOR
))
5143 bg
= ps_global
->VAR_NORM_BACK_COLOR
;
5145 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5146 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5152 * The blacks are just known good colors for testing
5153 * whether the other color is good.
5155 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5156 bg
? bg
: colorx(COL_BLACK
));
5157 if(pico_is_good_colorpair(tmp
)){
5158 p
= color_embed(fg
, bg
);
5160 for(i
= 0; p
[i
]; i
++)
5161 html_output(hd
->html_data
, p
[i
]);
5165 free_color_pair(&tmp
);
5169 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5171 html_output(hd
->html_data
, TAG_EMBED
);
5172 html_output(hd
->html_data
, TAG_HANDLEOFF
);
5174 html_a_output_info(hd
);
5180 * html_output_a_prefix - dump Anchor prefix data
5183 html_a_output_prefix(FILTER_S
*f
, int c
)
5199 * html_a_output_info - dump possibly deceptive link info into text.
5200 * phark the phishers.
5203 html_a_output_info(HANDLER_S
*hd
)
5205 int l
, risky
= 0, hl
= 0, tl
;
5206 char *url
= NULL
, *hn
= NULL
, *txt
;
5209 /* find host anchor references */
5210 if((h
= get_handle(*HANDLESP(hd
->html_data
), (int) hd
->x
)) != NULL
5211 && h
->h
.url
.path
!= NULL
5212 && (hn
= rfc1738_scan(rfc1738_str(url
= cpystr(h
->h
.url
.path
)), &l
)) != NULL
5213 && (hn
= srchstr(hn
,"://")) != NULL
){
5215 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++)
5221 * look over anchor's text to see if there's a
5222 * mismatch between href target and url-ish
5223 * looking text. throw a red flag if so.
5224 * similarly, toss one if the target's referenced
5228 so_writec('\0', (STORE_S
*) hd
->dp
);
5230 if((txt
= (char *) so_text((STORE_S
*) hd
->dp
)) != NULL
5231 && (txt
= rfc1738_scan(txt
, &tl
)) != NULL
5232 && (txt
= srchstr(txt
,"://")) != NULL
){
5234 for(txt
+= 3, tl
= 0; txt
[tl
] && txt
[tl
] != '/' && txt
[tl
] != '?'; tl
++)
5240 /* look for non matching text */
5241 for(l
= 0; l
< tl
&& l
< hl
; l
++)
5242 if(tolower((unsigned char) txt
[l
]) != tolower((unsigned char) hn
[l
])){
5248 so_give((STORE_S
**) &hd
->dp
);
5251 /* look for literal IP, anything possibly encoded or auth specifier */
5255 for(l
= 0; l
< hl
; l
++){
5256 if(hn
[l
] == '@' || hn
[l
] == '%'){
5260 else if(!(hn
[l
] == '.' || isdigit((unsigned char) hn
[l
])))
5268 /* Insert text of link's domain */
5269 if(SHOWSERVER(hd
->html_data
)){
5271 COLOR_PAIR
*col
= NULL
, *colnorm
= NULL
;
5273 html_output(hd
->html_data
, ' ');
5274 html_output(hd
->html_data
, '[');
5276 if(pico_usingcolor()
5277 && ps_global
->VAR_METAMSG_FORE_COLOR
5278 && ps_global
->VAR_METAMSG_BACK_COLOR
5279 && (col
= new_color_pair(ps_global
->VAR_METAMSG_FORE_COLOR
,
5280 ps_global
->VAR_METAMSG_BACK_COLOR
))){
5281 if(!pico_is_good_colorpair(col
))
5282 free_color_pair(&col
);
5285 q
= color_embed(col
->fg
, col
->bg
);
5287 for(l
= 0; q
[l
]; l
++)
5288 html_output(hd
->html_data
, q
[l
]);
5292 for(l
= 0; l
< hl
; l
++)
5293 html_output(hd
->html_data
, hn
[l
]);
5296 if(ps_global
->VAR_NORM_FORE_COLOR
5297 && ps_global
->VAR_NORM_BACK_COLOR
5298 && (colnorm
= new_color_pair(ps_global
->VAR_NORM_FORE_COLOR
,
5299 ps_global
->VAR_NORM_BACK_COLOR
))){
5300 if(!pico_is_good_colorpair(colnorm
))
5301 free_color_pair(&colnorm
);
5304 q
= color_embed(colnorm
->fg
, colnorm
->bg
);
5305 free_color_pair(&colnorm
);
5307 for(l
= 0; q
[l
]; l
++)
5308 html_output(hd
->html_data
, q
[l
]);
5312 free_color_pair(&col
);
5315 html_output(hd
->html_data
, ']');
5320 * if things look OK so far, make sure nothing within
5321 * the url looks too fishy...
5324 && (hn
= rfc1738_scan(hn
, &l
)) != NULL
5325 && (hn
= srchstr(hn
,"://")) != NULL
){
5328 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++){
5330 * auth spec, encoded characters, or possibly non-standard port
5331 * should raise a red flag
5333 if(hn
[hl
] == '@' || hn
[hl
] == '%' || hn
[hl
] == ':'){
5337 else if(!(hn
[hl
] == '.' || isdigit((unsigned char) hn
[hl
])))
5341 /* dotted-dec/raw-int address should cause suspicion as well */
5346 if(risky
&& ((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)
5347 (*((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)();
5349 fs_give((void **) &url
);
5355 * relative_url - put full url path in h based on base and relative url
5358 html_a_relative(char *base_url
, char *rel_url
, HANDLE_S
*h
)
5361 char tmp
[MAILTMPLEN
], *p
, *q
;
5362 char *scheme
= NULL
, *net
= NULL
, *path
= NULL
,
5363 *parms
= NULL
, *query
= NULL
, *frag
= NULL
,
5364 *base_scheme
= NULL
, *base_net_loc
= NULL
,
5365 *base_path
= NULL
, *base_parms
= NULL
,
5366 *base_query
= NULL
, *base_frag
= NULL
,
5367 *rel_scheme
= NULL
, *rel_net_loc
= NULL
,
5368 *rel_path
= NULL
, *rel_parms
= NULL
,
5369 *rel_query
= NULL
, *rel_frag
= NULL
;
5371 /* Rough parse of base URL */
5372 rfc1808_tokens(base_url
, &base_scheme
, &base_net_loc
, &base_path
,
5373 &base_parms
, &base_query
, &base_frag
);
5375 /* Rough parse of this URL */
5376 rfc1808_tokens(rel_url
, &rel_scheme
, &rel_net_loc
, &rel_path
,
5377 &rel_parms
, &rel_query
, &rel_frag
);
5379 scheme
= rel_scheme
; /* defaults */
5385 if(!scheme
&& base_scheme
){
5386 scheme
= base_scheme
;
5392 for(p
= q
= base_path
; /* Drop base path's tail */
5393 (p
= strchr(p
, '/'));
5397 len
= q
- base_path
;
5402 if(len
+ strlen(rel_path
) < sizeof(tmp
)-1){
5404 snprintf(path
= tmp
, sizeof(tmp
), "%.*s", (int) len
, base_path
);
5406 strncpy(tmp
+ len
, rel_path
, sizeof(tmp
)-len
);
5407 tmp
[sizeof(tmp
)-1] = '\0';
5409 /* Follow RFC 1808 "Step 6" */
5410 for(p
= tmp
; (p
= strchr(p
, '.')); )
5413 * a) All occurrences of "./", where "." is a
5414 * complete path segment, are removed.
5418 for(q
= p
; (*q
= *(q
+2)) != '\0'; q
++)
5426 * b) If the path ends with "." as a
5427 * complete path segment, that "." is
5431 if(p
== tmp
|| *(p
-1) == '/')
5439 * c) All occurrences of "<segment>/../",
5440 * where <segment> is a complete path
5441 * segment not equal to "..", are removed.
5442 * Removal of these path segments is
5443 * performed iteratively, removing the
5444 * leftmost matching pattern on each
5445 * iteration, until no matching pattern
5448 * d) If the path ends with "<segment>/..",
5449 * where <segment> is a complete path
5450 * segment not equal to "..", that
5451 * "<segment>/.." is removed.
5455 for(q
= p
- 2; q
> tmp
&& *q
!= '/'; q
--)
5461 if(q
+ 1 == p
/* no "//.." */
5462 || (*q
== '.' /* and "../.." */
5473 for(; (*q
= *(q
+len
)) != '\0'; q
++)
5498 path
= ""; /* lame. */
5512 len
= (scheme
? strlen(scheme
) : 0) + (net
? strlen(net
) : 0)
5513 + (path
? strlen(path
) : 0) + (parms
? strlen(parms
) : 0)
5514 + (query
? strlen(query
) : 0) + (frag
? strlen(frag
) : 0) + 8;
5516 h
->h
.url
.path
= (char *) fs_get(len
* sizeof(char));
5517 snprintf(h
->h
.url
.path
, len
, "%s%s%s%s%s%s%s%s%s%s%s%s",
5518 scheme
? scheme
: "", scheme
? ":" : "",
5519 net
? "//" : "", net
? net
: "",
5520 (path
&& *path
== '/') ? "" : ((path
&& net
) ? "/" : ""),
5522 parms
? ";" : "", parms
? parms
: "",
5523 query
? "?" : "", query
? query
: "",
5524 frag
? "#" : "", frag
? frag
: "");
5527 fs_give((void **) &base_scheme
);
5530 fs_give((void **) &base_net_loc
);
5533 fs_give((void **) &base_path
);
5536 fs_give((void **) &base_parms
);
5539 fs_give((void **) &base_query
);
5542 fs_give((void **) &base_frag
);
5545 fs_give((void **) &rel_scheme
);
5548 fs_give((void **) &rel_net_loc
);
5551 fs_give((void **) &rel_parms
);
5554 fs_give((void **) &rel_query
);
5557 fs_give((void **) &rel_frag
);
5560 fs_give((void **) &rel_path
);
5565 * html_href_relative - href
5568 html_href_relative(char *url
)
5573 for(i
= 0; i
< 32 && url
[i
]; i
++)
5574 if(!(isalpha((unsigned char) url
[i
]) || url
[i
] == '_' || url
[i
] == '-')){
5586 * HTML <UL> (Unordered List) element handler
5589 html_ul(HANDLER_S
*hd
, int ch
, int cmd
)
5592 html_handoff(hd
, ch
);
5594 else if(cmd
== GF_RESET
){
5595 if(PASS_HTML(hd
->html_data
)){
5596 html_output_raw_tag(hd
->html_data
, "ul");
5599 HD(hd
->html_data
)->li_pending
= 1;
5600 html_blank(hd
->html_data
, 0);
5603 else if(cmd
== GF_EOD
){
5604 if(PASS_HTML(hd
->html_data
)){
5605 html_output_string(hd
->html_data
, "</ul>");
5608 html_blank(hd
->html_data
, 0);
5610 if(!HD(hd
->html_data
)->li_pending
)
5611 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5613 HD(hd
->html_data
)->li_pending
= 0;
5617 return(1); /* get linked */
5622 * HTML <OL> (Ordered List) element handler
5625 html_ol(HANDLER_S
*hd
, int ch
, int cmd
)
5628 html_handoff(hd
, ch
);
5630 else if(cmd
== GF_RESET
){
5631 if(PASS_HTML(hd
->html_data
)){
5632 html_output_raw_tag(hd
->html_data
, "ol");
5637 * Signal that we're expecting to see <LI> as our next elemnt
5638 * and set the the initial ordered count.
5640 hd
->x
= 1L; /* set default */
5641 hd
->y
= LIST_DECIMAL
; /* set default */
5642 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
5646 if(!strucmp(p
->attribute
, "TYPE")){
5647 if(!strucmp(p
->value
, "a")) /* alpha, lowercase */
5648 hd
->y
= LIST_ALPHALO
;
5649 else if(!strucmp(p
->value
, "A")) /* alpha, uppercase */
5650 hd
->y
= LIST_ALPHAUP
;
5651 else if(!strucmp(p
->value
, "i")) /* roman, lowercase */
5652 hd
->y
= LIST_ROMANLO
;
5653 else if(!strucmp(p
->value
, "I")) /* roman, uppercase */
5654 hd
->y
= LIST_ROMANUP
;
5655 else if(strucmp(p
->value
, "1")) /* decimal, the default */
5656 hd
->y
= LIST_UNKNOWN
;
5658 else if(!strucmp(p
->attribute
, "START"))
5659 hd
->x
= atol(p
->value
);
5660 // else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5661 // this is not so simple. The main missing support
5662 // is for the STYLE attribute, but implementing that
5663 // correctly will take time, so will be implemented
5664 // after version 2.21 is released.
5666 HD(hd
->html_data
)->li_pending
= 1;
5667 html_blank(hd
->html_data
, 0);
5670 else if(cmd
== GF_EOD
){
5671 if(PASS_HTML(hd
->html_data
)){
5672 html_output_string(hd
->html_data
, "</ol>");
5675 html_blank(hd
->html_data
, 0);
5677 if(!HD(hd
->html_data
)->li_pending
)
5678 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5680 HD(hd
->html_data
)->li_pending
= 0;
5684 return(1); /* get linked */
5689 * HTML <MENU> (Menu List) element handler
5692 html_menu(HANDLER_S
*hd
, int ch
, int cmd
)
5695 html_handoff(hd
, ch
);
5697 else if(cmd
== GF_RESET
){
5698 if(PASS_HTML(hd
->html_data
)){
5699 html_output_raw_tag(hd
->html_data
, "menu");
5702 HD(hd
->html_data
)->li_pending
= 1;
5705 else if(cmd
== GF_EOD
){
5706 if(PASS_HTML(hd
->html_data
)){
5707 html_output_string(hd
->html_data
, "</menu>");
5710 html_blank(hd
->html_data
, 0);
5712 if(!HD(hd
->html_data
)->li_pending
)
5713 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5715 HD(hd
->html_data
)->li_pending
= 0;
5719 return(1); /* get linked */
5724 * HTML <DIR> (Directory List) element handler
5727 html_dir(HANDLER_S
*hd
, int ch
, int cmd
)
5730 html_handoff(hd
, ch
);
5732 else if(cmd
== GF_RESET
){
5733 if(PASS_HTML(hd
->html_data
)){
5734 html_output_raw_tag(hd
->html_data
, "dir");
5737 HD(hd
->html_data
)->li_pending
= 1;
5740 else if(cmd
== GF_EOD
){
5741 if(PASS_HTML(hd
->html_data
)){
5742 html_output_string(hd
->html_data
, "</dir>");
5745 html_blank(hd
->html_data
, 0);
5747 if(!HD(hd
->html_data
)->li_pending
)
5748 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5750 HD(hd
->html_data
)->li_pending
= 0;
5754 return(1); /* get linked */
5759 * HTML <LI> (List Item) element handler
5762 html_li(HANDLER_S
*hd
, int ch
, int cmd
)
5765 if(PASS_HTML(hd
->html_data
)){
5766 html_handoff(hd
, ch
);
5769 else if(cmd
== GF_RESET
){
5770 HANDLER_S
*p
, *found
= NULL
;
5773 * There better be a an unordered list, ordered list,
5774 * Menu or Directory handler installed
5775 * or else we crap out...
5777 for(p
= HANDLERS(hd
->html_data
); p
; p
= p
->below
)
5778 if(EL(p
)->handler
== html_ul
5779 || EL(p
)->handler
== html_ol
5780 || EL(p
)->handler
== html_menu
5781 || EL(p
)->handler
== html_dir
){
5787 if(PASS_HTML(hd
->html_data
)){
5790 char buf
[16], tmp
[16], *p
;
5793 /* Start a new line */
5794 html_blank(hd
->html_data
, 0);
5796 /* adjust indent level if needed */
5797 if(HD(hd
->html_data
)->li_pending
){
5798 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5799 HD(hd
->html_data
)->li_pending
= 0;
5802 if(EL(found
)->handler
== html_ul
){
5803 int l
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5805 strncpy(buf
, " ", sizeof(buf
));
5806 buf
[1] = (l
< 5) ? '*' : (l
< 9) ? '+' : (l
< 17) ? 'o' : '#';
5808 else if(EL(found
)->handler
== html_ol
){
5809 if(found
->y
== LIST_DECIMAL
|| found
->y
== LIST_UNKNOWN
)
5810 snprintf(tmp
, sizeof(tmp
), "%ld", found
->x
++);
5811 else if(found
->y
== LIST_ALPHALO
)
5812 convert_decimal_to_alpha(tmp
, sizeof(tmp
), found
->x
++, 'a');
5813 else if(found
->y
== LIST_ALPHAUP
)
5814 convert_decimal_to_alpha(tmp
, sizeof(tmp
), found
->x
++, 'A');
5815 else if(found
->y
== LIST_ROMANLO
)
5816 convert_decimal_to_roman(tmp
, sizeof(tmp
), found
->x
++, 'i');
5817 else if(found
->y
== LIST_ROMANUP
)
5818 convert_decimal_to_roman(tmp
, sizeof(tmp
), found
->x
++, 'I');
5819 snprintf(buf
, sizeof(buf
), " %s.", tmp
);
5820 buf
[sizeof(buf
)-1] = '\0';
5822 else if(EL(found
)->handler
== html_menu
){
5823 strncpy(buf
, " ->", sizeof(buf
));
5824 buf
[sizeof(buf
)-1] = '\0';
5827 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5829 /* So we don't munge whitespace */
5830 wrapstate
= HD(hd
->html_data
)->wrapstate
;
5831 HD(hd
->html_data
)->wrapstate
= 0;
5833 html_write_indent(hd
->html_data
, HD(hd
->html_data
)->indent_level
);
5834 for(p
= buf
; *p
; p
++)
5835 html_output(hd
->html_data
, (int) *p
);
5836 HD(hd
->html_data
)->wrapstate
= wrapstate
;
5837 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5839 /* else BUG: should really bitch about this */
5842 if(PASS_HTML(hd
->html_data
)){
5843 html_output_raw_tag(hd
->html_data
, "li");
5844 return(1); /* get linked */
5847 else if(cmd
== GF_EOD
){
5848 if(PASS_HTML(hd
->html_data
)){
5849 html_output_string(hd
->html_data
, "</li>");
5853 return(PASS_HTML(hd
->html_data
)); /* DON'T get linked */
5858 * HTML <DL> (Definition List) element handler
5861 html_dl(HANDLER_S
*hd
, int ch
, int cmd
)
5864 html_handoff(hd
, ch
);
5866 else if(cmd
== GF_RESET
){
5867 if(PASS_HTML(hd
->html_data
)){
5868 html_output_raw_tag(hd
->html_data
, "dl");
5872 * Set indention level for definition terms and definitions...
5874 hd
->x
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5879 else if(cmd
== GF_EOD
){
5880 if(PASS_HTML(hd
->html_data
)){
5881 html_output_string(hd
->html_data
, "</dl>");
5884 html_indent(hd
->html_data
, (int) hd
->x
, HTML_ID_SET
);
5885 html_blank(hd
->html_data
, 1);
5889 return(1); /* get linked */
5894 * HTML <DT> (Definition Term) element handler
5897 html_dt(HANDLER_S
*hd
, int ch
, int cmd
)
5899 if(PASS_HTML(hd
->html_data
)){
5901 html_handoff(hd
, ch
);
5903 else if(cmd
== GF_RESET
){
5904 html_output_raw_tag(hd
->html_data
, "dt");
5906 else if(cmd
== GF_EOD
){
5907 html_output_string(hd
->html_data
, "</dt>");
5910 return(1); /* get linked */
5913 if(cmd
== GF_RESET
){
5917 * There better be a Definition Handler installed
5918 * or else we crap out...
5920 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5923 if(p
){ /* adjust indent level if needed */
5924 html_indent(hd
->html_data
, (int) p
->y
, HTML_ID_SET
);
5925 html_blank(hd
->html_data
, 1);
5927 /* BUG: else should really bitch about this */
5930 return(0); /* DON'T get linked */
5935 * HTML <DD> (Definition Definition) element handler
5938 html_dd(HANDLER_S
*hd
, int ch
, int cmd
)
5940 if(PASS_HTML(hd
->html_data
)){
5942 html_handoff(hd
, ch
);
5944 else if(cmd
== GF_RESET
){
5945 html_output_raw_tag(hd
->html_data
, "dd");
5947 else if(cmd
== GF_EOD
){
5948 html_output_string(hd
->html_data
, "</dd>");
5951 return(1); /* get linked */
5954 if(cmd
== GF_RESET
){
5958 * There better be a Definition Handler installed
5959 * or else we crap out...
5961 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5964 if(p
){ /* adjust indent level if needed */
5965 html_indent(hd
->html_data
, (int) p
->z
, HTML_ID_SET
);
5966 html_blank(hd
->html_data
, 0);
5968 /* BUG: should really bitch about this */
5971 return(0); /* DON'T get linked */
5976 * HTML <H1> (Headings 1) element handler.
5978 * Bold, very-large font, CENTERED. One or two blank lines
5979 * above and below. For our silly character cell's that
5980 * means centered and ALL CAPS...
5983 html_h1(HANDLER_S
*hd
, int ch
, int cmd
)
5986 html_handoff(hd
, ch
);
5988 else if(cmd
== GF_RESET
){
5989 if(PASS_HTML(hd
->html_data
)){
5990 html_output_raw_tag(hd
->html_data
, "h1");
5993 /* turn ON the centered bit */
5994 CENTER_BIT(hd
->html_data
) = 1;
5997 else if(cmd
== GF_EOD
){
5998 if(PASS_HTML(hd
->html_data
)){
5999 html_output_string(hd
->html_data
, "</h1>");
6002 /* turn OFF the centered bit, add blank line */
6003 CENTER_BIT(hd
->html_data
) = 0;
6004 html_blank(hd
->html_data
, 1);
6008 return(1); /* get linked */
6013 * HTML <H2> (Headings 2) element handler
6016 html_h2(HANDLER_S
*hd
, int ch
, int cmd
)
6019 if(PASS_HTML(hd
->html_data
)){
6020 html_handoff(hd
, ch
);
6023 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
6024 HTML_ULINE(hd
->html_data
, 1);
6025 hd
->x
^= HTML_HX_ULINE
; /* only once! */
6028 html_handoff(hd
, (ch
< 128 && islower((unsigned char) ch
))
6029 ? toupper((unsigned char) ch
) : ch
);
6032 else if(cmd
== GF_RESET
){
6033 if(PASS_HTML(hd
->html_data
)){
6034 html_output_raw_tag(hd
->html_data
, "h2");
6038 * Bold, large font, flush-left. One or two blank lines
6041 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
6042 hd
->x
= HTML_HX_CENTER
;
6046 hd
->x
|= HTML_HX_ULINE
;
6048 CENTER_BIT(hd
->html_data
) = 0;
6049 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6050 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6051 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6052 html_blank(hd
->html_data
, 1);
6055 else if(cmd
== GF_EOD
){
6056 if(PASS_HTML(hd
->html_data
)){
6057 html_output_string(hd
->html_data
, "</h2>");
6061 * restore previous centering, and indent level
6063 if(!(hd
->x
& HTML_HX_ULINE
))
6064 HTML_ULINE(hd
->html_data
, 0);
6066 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6067 html_blank(hd
->html_data
, 1);
6068 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6069 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6073 return(1); /* get linked */
6078 * HTML <H3> (Headings 3) element handler
6081 html_h3(HANDLER_S
*hd
, int ch
, int cmd
)
6084 if(!PASS_HTML(hd
->html_data
)){
6085 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
6086 HTML_ULINE(hd
->html_data
, 1);
6087 hd
->x
^= HTML_HX_ULINE
; /* only once! */
6091 html_handoff(hd
, ch
);
6093 else if(cmd
== GF_RESET
){
6094 if(PASS_HTML(hd
->html_data
)){
6095 html_output_raw_tag(hd
->html_data
, "h3");
6099 * Italic, large font, slightly indented from the left
6100 * margin. One or two blank lines above and below.
6102 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
6103 hd
->x
= HTML_HX_CENTER
;
6107 hd
->x
|= HTML_HX_ULINE
;
6108 CENTER_BIT(hd
->html_data
) = 0;
6109 hd
->y
= html_indent(hd
->html_data
, 2, HTML_ID_SET
);
6110 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6111 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6112 html_blank(hd
->html_data
, 1);
6115 else if(cmd
== GF_EOD
){
6116 if(PASS_HTML(hd
->html_data
)){
6117 html_output_string(hd
->html_data
, "</h3>");
6121 * restore previous centering, and indent level
6123 if(!(hd
->x
& HTML_HX_ULINE
))
6124 HTML_ULINE(hd
->html_data
, 0);
6126 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6127 html_blank(hd
->html_data
, 1);
6128 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6129 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6133 return(1); /* get linked */
6138 * HTML <H4> (Headings 4) element handler
6141 html_h4(HANDLER_S
*hd
, int ch
, int cmd
)
6144 html_handoff(hd
, ch
);
6146 else if(cmd
== GF_RESET
){
6147 if(PASS_HTML(hd
->html_data
)){
6148 html_output_raw_tag(hd
->html_data
, "h4");
6152 * Bold, normal font, indented more than H3. One blank line
6155 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6156 CENTER_BIT(hd
->html_data
) = 0;
6157 hd
->y
= html_indent(hd
->html_data
, 4, HTML_ID_SET
);
6158 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6159 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6160 html_blank(hd
->html_data
, 1);
6163 else if(cmd
== GF_EOD
){
6164 if(PASS_HTML(hd
->html_data
)){
6165 html_output_string(hd
->html_data
, "</h4>");
6169 * restore previous centering, and indent level
6171 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6172 html_blank(hd
->html_data
, 1);
6173 CENTER_BIT(hd
->html_data
) = hd
->x
;
6174 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6178 return(1); /* get linked */
6183 * HTML <H5> (Headings 5) element handler
6186 html_h5(HANDLER_S
*hd
, int ch
, int cmd
)
6189 html_handoff(hd
, ch
);
6191 else if(cmd
== GF_RESET
){
6192 if(PASS_HTML(hd
->html_data
)){
6193 html_output_raw_tag(hd
->html_data
, "h5");
6197 * Italic, normal font, indented as H4. One blank line
6200 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6201 CENTER_BIT(hd
->html_data
) = 0;
6202 hd
->y
= html_indent(hd
->html_data
, 6, HTML_ID_SET
);
6203 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6204 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6205 html_blank(hd
->html_data
, 1);
6208 else if(cmd
== GF_EOD
){
6209 if(PASS_HTML(hd
->html_data
)){
6210 html_output_string(hd
->html_data
, "</h5>");
6214 * restore previous centering, and indent level
6216 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6217 html_blank(hd
->html_data
, 1);
6218 CENTER_BIT(hd
->html_data
) = hd
->x
;
6219 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6223 return(1); /* get linked */
6228 * HTML <H6> (Headings 6) element handler
6231 html_h6(HANDLER_S
*hd
, int ch
, int cmd
)
6234 html_handoff(hd
, ch
);
6236 else if(cmd
== GF_RESET
){
6237 if(PASS_HTML(hd
->html_data
)){
6238 html_output_raw_tag(hd
->html_data
, "h6");
6242 * Bold, indented same as normal text, more than H5. One
6245 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6246 CENTER_BIT(hd
->html_data
) = 0;
6247 hd
->y
= html_indent(hd
->html_data
, 8, HTML_ID_SET
);
6248 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6249 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6250 html_blank(hd
->html_data
, 1);
6253 else if(cmd
== GF_EOD
){
6254 if(PASS_HTML(hd
->html_data
)){
6255 html_output_string(hd
->html_data
, "</h6>");
6259 * restore previous centering, and indent level
6261 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6262 html_blank(hd
->html_data
, 1);
6263 CENTER_BIT(hd
->html_data
) = hd
->x
;
6264 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6268 return(1); /* get linked */
6273 * HTML <BlockQuote> element handler
6276 html_blockquote(HANDLER_S
*hd
, int ch
, int cmd
)
6279 #define HTML_BQ_INDENT 6
6282 html_handoff(hd
, ch
);
6284 else if(cmd
== GF_RESET
){
6285 if(PASS_HTML(hd
->html_data
)){
6286 html_output_raw_tag(hd
->html_data
, "blockquote");
6290 * A typical rendering might be a slight extra left and
6291 * right indent, and/or italic font. The Blockquote element
6292 * causes a paragraph break, and typically provides space
6293 * above and below the quote.
6295 html_indent(hd
->html_data
, HTML_BQ_INDENT
, HTML_ID_INC
);
6296 j
= HD(hd
->html_data
)->wrapstate
;
6297 HD(hd
->html_data
)->wrapstate
= 0;
6298 html_blank(hd
->html_data
, 1);
6299 HD(hd
->html_data
)->wrapstate
= j
;
6300 HD(hd
->html_data
)->wrapcol
-= HTML_BQ_INDENT
;
6303 else if(cmd
== GF_EOD
){
6304 if(PASS_HTML(hd
->html_data
)){
6305 html_output_string(hd
->html_data
, "</blockquote>");
6308 html_blank(hd
->html_data
, 1);
6310 j
= HD(hd
->html_data
)->wrapstate
;
6311 HD(hd
->html_data
)->wrapstate
= 0;
6312 html_indent(hd
->html_data
, -(HTML_BQ_INDENT
), HTML_ID_INC
);
6313 HD(hd
->html_data
)->wrapstate
= j
;
6314 HD(hd
->html_data
)->wrapcol
+= HTML_BQ_INDENT
;
6318 return(1); /* get linked */
6323 * HTML <Address> element handler
6326 html_address(HANDLER_S
*hd
, int ch
, int cmd
)
6329 #define HTML_ADD_INDENT 2
6332 html_handoff(hd
, ch
);
6334 else if(cmd
== GF_RESET
){
6335 if(PASS_HTML(hd
->html_data
)){
6336 html_output_raw_tag(hd
->html_data
, "address");
6340 * A typical rendering might be a slight extra left and
6341 * right indent, and/or italic font. The Blockquote element
6342 * causes a paragraph break, and typically provides space
6343 * above and below the quote.
6345 html_indent(hd
->html_data
, HTML_ADD_INDENT
, HTML_ID_INC
);
6346 j
= HD(hd
->html_data
)->wrapstate
;
6347 HD(hd
->html_data
)->wrapstate
= 0;
6348 html_blank(hd
->html_data
, 1);
6349 HD(hd
->html_data
)->wrapstate
= j
;
6352 else if(cmd
== GF_EOD
){
6353 if(PASS_HTML(hd
->html_data
)){
6354 html_output_string(hd
->html_data
, "</address>");
6357 html_blank(hd
->html_data
, 1);
6359 j
= HD(hd
->html_data
)->wrapstate
;
6360 HD(hd
->html_data
)->wrapstate
= 0;
6361 html_indent(hd
->html_data
, -(HTML_ADD_INDENT
), HTML_ID_INC
);
6362 HD(hd
->html_data
)->wrapstate
= j
;
6366 return(1); /* get linked */
6371 * HTML <PRE> (Preformatted Text) element handler
6374 html_pre(HANDLER_S
*hd
, int ch
, int cmd
)
6378 * remove CRLF after '>' in element.
6379 * We see CRLF because wrapstate is off.
6388 html_handoff(hd
, '\015');
6399 /* passing tags? replace CRLF with <BR> to make
6400 * sure hard newline survives in the end...
6402 if(PASS_HTML(hd
->html_data
))
6403 hd
->y
= 4; /* keep looking for CRLF */
6405 hd
->y
= 0; /* stop looking */
6420 html_output_string(hd
->html_data
, "<br />");
6424 html_handoff(hd
, '\015'); /* not CRLF, pass raw CR */
6428 default : /* zero case */
6432 html_handoff(hd
, ch
);
6434 else if(cmd
== GF_RESET
){
6436 if(PASS_HTML(hd
->html_data
)){
6437 html_output_raw_tag(hd
->html_data
, "pre");
6441 hd
->html_data
->f1
= DFL
; \
6443 html_blank(hd
->html_data
, 1);
6444 hd
->x
= HD(hd
->html_data
)->wrapstate
;
6445 HD(hd
->html_data
)->wrapstate
= 0;
6448 else if(cmd
== GF_EOD
){
6449 if(PASS_HTML(hd
->html_data
)){
6450 html_output_string(hd
->html_data
, "</pre>");
6453 HD(hd
->html_data
)->wrapstate
= (hd
->x
!= 0);
6454 html_blank(hd
->html_data
, 0);
6463 * HTML <CENTER> (Centerd Text) element handler
6466 html_center(HANDLER_S
*hd
, int ch
, int cmd
)
6469 html_handoff(hd
, ch
);
6471 else if(cmd
== GF_RESET
){
6472 if(PASS_HTML(hd
->html_data
)){
6473 html_output_raw_tag(hd
->html_data
, "center");
6476 /* turn ON the centered bit */
6477 CENTER_BIT(hd
->html_data
) = 1;
6480 else if(cmd
== GF_EOD
){
6481 if(PASS_HTML(hd
->html_data
)){
6482 html_output_string(hd
->html_data
, "</center>");
6485 /* turn OFF the centered bit */
6486 CENTER_BIT(hd
->html_data
) = 0;
6495 * HTML <DIV> (Document Divisions) element handler
6498 html_div(HANDLER_S
*hd
, int ch
, int cmd
)
6501 html_handoff(hd
, ch
);
6503 else if(cmd
== GF_RESET
){
6504 if(PASS_HTML(hd
->html_data
)){
6505 html_output_raw_tag(hd
->html_data
, "div");
6510 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6513 if(!strucmp(p
->attribute
, "ALIGN")){
6515 /* remember previous values */
6516 hd
->x
= CENTER_BIT(hd
->html_data
);
6517 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
6519 html_blank(hd
->html_data
, 0);
6520 CENTER_BIT(hd
->html_data
) = !strucmp(p
->value
, "CENTER");
6521 html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6522 /* NOTE: "RIGHT" not supported yet */
6527 else if(cmd
== GF_EOD
){
6528 if(PASS_HTML(hd
->html_data
)){
6529 html_output_string(hd
->html_data
, "</div>");
6532 /* restore centered bit and indentiousness */
6533 CENTER_BIT(hd
->html_data
) = hd
->y
;
6534 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6535 html_blank(hd
->html_data
, 0);
6544 * HTML <SPAN> (Text Span) element handler
6547 html_span(HANDLER_S
*hd
, int ch
, int cmd
)
6549 if(PASS_HTML(hd
->html_data
)){
6551 html_handoff(hd
, ch
);
6553 else if(cmd
== GF_RESET
){
6554 html_output_raw_tag(hd
->html_data
, "span");
6556 else if(cmd
== GF_EOD
){
6557 html_output_string(hd
->html_data
, "</span>");
6568 * HTML <KBD> (Text Kbd) element handler
6571 html_kbd(HANDLER_S
*hd
, int ch
, int cmd
)
6573 if(PASS_HTML(hd
->html_data
)){
6575 html_handoff(hd
, ch
);
6577 else if(cmd
== GF_RESET
){
6578 html_output_raw_tag(hd
->html_data
, "kbd");
6580 else if(cmd
== GF_EOD
){
6581 html_output_string(hd
->html_data
, "</kbd>");
6592 * HTML <DFN> (Text Definition) element handler
6595 html_dfn(HANDLER_S
*hd
, int ch
, int cmd
)
6597 if(PASS_HTML(hd
->html_data
)){
6599 html_handoff(hd
, ch
);
6601 else if(cmd
== GF_RESET
){
6602 html_output_raw_tag(hd
->html_data
, "dfn");
6604 else if(cmd
== GF_EOD
){
6605 html_output_string(hd
->html_data
, "</dfn>");
6616 * HTML <TT> (Text Tt) element handler
6619 html_tt(HANDLER_S
*hd
, int ch
, int cmd
)
6621 if(PASS_HTML(hd
->html_data
)){
6623 html_handoff(hd
, ch
);
6625 else if(cmd
== GF_RESET
){
6626 html_output_raw_tag(hd
->html_data
, "tt");
6628 else if(cmd
== GF_EOD
){
6629 html_output_string(hd
->html_data
, "</tt>");
6640 * HTML <VAR> (Text Var) element handler
6643 html_var(HANDLER_S
*hd
, int ch
, int cmd
)
6645 if(PASS_HTML(hd
->html_data
)){
6647 html_handoff(hd
, ch
);
6649 else if(cmd
== GF_RESET
){
6650 html_output_raw_tag(hd
->html_data
, "var");
6652 else if(cmd
== GF_EOD
){
6653 html_output_string(hd
->html_data
, "</var>");
6664 * HTML <SAMP> (Text Samp) element handler
6667 html_samp(HANDLER_S
*hd
, int ch
, int cmd
)
6669 if(PASS_HTML(hd
->html_data
)){
6671 html_handoff(hd
, ch
);
6673 else if(cmd
== GF_RESET
){
6674 html_output_raw_tag(hd
->html_data
, "samp");
6676 else if(cmd
== GF_EOD
){
6677 html_output_string(hd
->html_data
, "</samp>");
6688 * HTML <SUP> (Text Superscript) element handler
6691 html_sup(HANDLER_S
*hd
, int ch
, int cmd
)
6693 if(PASS_HTML(hd
->html_data
)){
6695 html_handoff(hd
, ch
);
6697 else if(cmd
== GF_RESET
){
6698 html_output_raw_tag(hd
->html_data
, "sup");
6700 else if(cmd
== GF_EOD
){
6701 html_output_string(hd
->html_data
, "</sup>");
6712 * HTML <SUB> (Text Subscript) element handler
6715 html_sub(HANDLER_S
*hd
, int ch
, int cmd
)
6717 if(PASS_HTML(hd
->html_data
)){
6719 html_handoff(hd
, ch
);
6721 else if(cmd
== GF_RESET
){
6722 html_output_raw_tag(hd
->html_data
, "sub");
6724 else if(cmd
== GF_EOD
){
6725 html_output_string(hd
->html_data
, "</sub>");
6736 * HTML <CITE> (Text Citation) element handler
6739 html_cite(HANDLER_S
*hd
, int ch
, int cmd
)
6741 if(PASS_HTML(hd
->html_data
)){
6743 html_handoff(hd
, ch
);
6745 else if(cmd
== GF_RESET
){
6746 html_output_raw_tag(hd
->html_data
, "cite");
6748 else if(cmd
== GF_EOD
){
6749 html_output_string(hd
->html_data
, "</cite>");
6760 * HTML <CODE> (Text Code) element handler
6763 html_code(HANDLER_S
*hd
, int ch
, int cmd
)
6765 if(PASS_HTML(hd
->html_data
)){
6767 html_handoff(hd
, ch
);
6769 else if(cmd
== GF_RESET
){
6770 html_output_raw_tag(hd
->html_data
, "code");
6772 else if(cmd
== GF_EOD
){
6773 html_output_string(hd
->html_data
, "</code>");
6784 * HTML <INS> (Text Inserted) element handler
6787 html_ins(HANDLER_S
*hd
, int ch
, int cmd
)
6789 if(PASS_HTML(hd
->html_data
)){
6791 html_handoff(hd
, ch
);
6793 else if(cmd
== GF_RESET
){
6794 html_output_raw_tag(hd
->html_data
, "ins");
6796 else if(cmd
== GF_EOD
){
6797 html_output_string(hd
->html_data
, "</ins>");
6808 * HTML <DEL> (Text Deleted) element handler
6811 html_del(HANDLER_S
*hd
, int ch
, int cmd
)
6813 if(PASS_HTML(hd
->html_data
)){
6815 html_handoff(hd
, ch
);
6817 else if(cmd
== GF_RESET
){
6818 html_output_raw_tag(hd
->html_data
, "del");
6820 else if(cmd
== GF_EOD
){
6821 html_output_string(hd
->html_data
, "</del>");
6832 * HTML <ABBR> (Text Abbreviation) element handler
6835 html_abbr(HANDLER_S
*hd
, int ch
, int cmd
)
6837 if(PASS_HTML(hd
->html_data
)){
6839 html_handoff(hd
, ch
);
6841 else if(cmd
== GF_RESET
){
6842 html_output_raw_tag(hd
->html_data
, "abbr");
6844 else if(cmd
== GF_EOD
){
6845 html_output_string(hd
->html_data
, "</abbr>");
6856 * HTML <SCRIPT> element handler
6859 html_script(HANDLER_S
*hd
, int ch
, int cmd
)
6861 /* Link in and drop everything within on the floor */
6867 * HTML <APPLET> element handler
6870 html_applet(HANDLER_S
*hd
, int ch
, int cmd
)
6872 /* Link in and drop everything within on the floor */
6878 * HTML <STYLE> CSS element handler
6881 html_style(HANDLER_S
*hd
, int ch
, int cmd
)
6883 static STORE_S
*css_stuff
;
6885 if(PASS_HTML(hd
->html_data
)){
6887 /* collect style settings */
6888 so_writec(ch
, css_stuff
);
6890 else if(cmd
== GF_RESET
){
6892 so_give(&css_stuff
);
6894 css_stuff
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6896 else if(cmd
== GF_EOD
){
6898 * TODO: strip anything mischievous and pass on
6901 so_give(&css_stuff
);
6909 * RSS 2.0 <RSS> version
6912 rss_rss(HANDLER_S
*hd
, int ch
, int cmd
)
6914 if(cmd
== GF_RESET
){
6917 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6920 if(!strucmp(p
->attribute
, "VERSION")){
6921 if(p
->value
&& !strucmp(p
->value
,"2.0"))
6922 return(0); /* do not link in */
6925 gf_error("Incompatible RSS version");
6929 return(0); /* not linked or error means we never get here */
6936 rss_channel(HANDLER_S
*hd
, int ch
, int cmd
)
6939 html_handoff(hd
, ch
);
6941 else if(cmd
== GF_RESET
){
6944 feed
= RSS_FEED(hd
->html_data
) = fs_get(sizeof(RSS_FEED_S
));
6945 memset(feed
, 0, sizeof(RSS_FEED_S
));
6948 return(1); /* link in */
6955 rss_title(HANDLER_S
*hd
, int ch
, int cmd
)
6957 static STORE_S
*title_so
;
6962 so_writec(ch
, title_so
);
6965 else if(cmd
== GF_RESET
){
6966 if(RSS_FEED(hd
->html_data
)){
6967 /* prepare for data */
6971 title_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6974 else if(cmd
== GF_EOD
){
6976 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
6980 if((rip
= feed
->items
) != NULL
){
6981 for(; rip
->next
; rip
= rip
->next
)
6985 fs_give((void **) &rip
->title
);
6987 rip
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
6991 fs_give((void **) &feed
->title
);
6993 feed
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
7001 return(1); /* link in */
7008 rss_image(HANDLER_S
*hd
, int ch
, int cmd
)
7010 static STORE_S
*img_so
;
7015 so_writec(ch
, img_so
);
7018 else if(cmd
== GF_RESET
){
7019 if(RSS_FEED(hd
->html_data
)){
7020 /* prepare to collect data */
7024 img_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7027 else if(cmd
== GF_EOD
){
7029 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7033 fs_give((void **) &feed
->image
);
7035 feed
->image
= cpystr(rss_skip_whitespace(so_text(img_so
)));
7042 return(1); /* link in */
7049 rss_link(HANDLER_S
*hd
, int ch
, int cmd
)
7051 static STORE_S
*link_so
;
7056 so_writec(ch
, link_so
);
7059 else if(cmd
== GF_RESET
){
7060 if(RSS_FEED(hd
->html_data
)){
7061 /* prepare to collect data */
7065 link_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7068 else if(cmd
== GF_EOD
){
7070 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7074 if((rip
= feed
->items
) != NULL
){
7075 for(; rip
->next
; rip
= rip
->next
)
7079 fs_give((void **) &rip
->link
);
7081 rip
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7085 fs_give((void **) &feed
->link
);
7087 feed
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7095 return(1); /* link in */
7099 * RSS 2.0 <DESCRIPTION>
7102 rss_description(HANDLER_S
*hd
, int ch
, int cmd
)
7104 static STORE_S
*desc_so
;
7109 so_writec(ch
, desc_so
);
7112 else if(cmd
== GF_RESET
){
7113 if(RSS_FEED(hd
->html_data
)){
7114 /* prepare to collect data */
7118 desc_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7121 else if(cmd
== GF_EOD
){
7123 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7127 if((rip
= feed
->items
) != NULL
){
7128 for(; rip
->next
; rip
= rip
->next
)
7131 if(rip
->description
)
7132 fs_give((void **) &rip
->description
);
7134 rip
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7137 if(feed
->description
)
7138 fs_give((void **) &feed
->description
);
7140 feed
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7148 return(1); /* link in */
7152 * RSS 2.0 <TTL> (in minutes)
7155 rss_ttl(HANDLER_S
*hd
, int ch
, int cmd
)
7157 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7160 if(isdigit((unsigned char) ch
))
7161 feed
->ttl
= ((feed
->ttl
* 10) + (ch
- '0'));
7163 else if(cmd
== GF_RESET
){
7164 /* prepare to collect data */
7167 else if(cmd
== GF_EOD
){
7170 return(1); /* link in */
7177 rss_item(HANDLER_S
*hd
, int ch
, int cmd
)
7179 /* BUG: verify no ITEM nesting? */
7180 if(cmd
== GF_RESET
){
7183 if((feed
= RSS_FEED(hd
->html_data
)) != NULL
){
7187 for(rip
= &feed
->items
; *rip
; rip
= &(*rip
)->next
)
7188 if(++n
> RSS_ITEM_LIMIT
)
7191 *rip
= fs_get(sizeof(RSS_ITEM_S
));
7192 memset(*rip
, 0, sizeof(RSS_ITEM_S
));
7196 return(0); /* don't link in */
7201 rss_skip_whitespace(char *s
)
7203 for(; *s
&& isspace((unsigned char) *s
); s
++)
7211 * return the function associated with the given element name
7214 element_properties(FILTER_S
*fd
, char *el_name
)
7216 register ELPROP_S
*el_table
= ELEMENTS(fd
);
7217 size_t len_name
= strlen(el_name
);
7219 for(; el_table
->element
; el_table
++)
7220 if(!strucmp(el_name
, el_table
->element
)
7221 || (el_table
->alternate
7222 && len_name
== el_table
->len
+ 1
7223 && el_name
[el_table
->len
] == '/'
7224 && !struncmp(el_name
, el_table
->element
, el_table
->len
)))
7232 * collect element's name and any attribute/value pairs then
7233 * dispatch to the appropriate handler.
7235 * Returns 1 : got what we wanted
7236 * 0 : we need more data
7240 html_element_collector(FILTER_S
*fd
, int ch
)
7243 if(ED(fd
)->overrun
){
7245 * If problem processing, don't bother doing anything
7246 * internally, just return such that none of what we've
7247 * digested is displayed.
7249 HTML_DEBUG_EL("too long", ED(fd
));
7250 return(1); /* Let it go, Jim */
7252 else if(ED(fd
)->mkup_decl
){
7253 if(ED(fd
)->badform
){
7254 dprint((2, "-- html error: bad form: %.*s\n",
7255 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7257 * Invalid comment -- make some guesses as
7258 * to whether we should stop with this greater-than...
7260 if(ED(fd
)->buf
[0] != '-'
7262 || (ED(fd
)->buf
[1] == '-'
7263 && ED(fd
)->buf
[ED(fd
)->len
- 1] == '-'
7264 && ED(fd
)->buf
[ED(fd
)->len
- 2] == '-'))
7268 dprint((5, "-- html: OK: %.*s\n",
7269 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7270 if(ED(fd
)->start_comment
== ED(fd
)->end_comment
){
7271 if(ED(fd
)->len
> 10){
7272 ED(fd
)->buf
[ED(fd
)->len
- 2] = '\0';
7273 html_element_comment(fd
, ED(fd
)->buf
+ 2);
7278 /* else keep collecting comment below */
7281 else if(ED(fd
)->proc_inst
){
7282 return(1); /* return without display... */
7284 else if(!ED(fd
)->quoted
|| ED(fd
)->badform
){
7288 * We either have the whole thing or all that we could
7289 * salvage from it. Try our best...
7292 if(HD(fd
)->bitbucket
)
7293 return(1); /* element inside chtml clause! */
7295 if(!ED(fd
)->badform
&& html_element_flush(ED(fd
)))
7296 return(1); /* return without display... */
7299 * If we ran into an empty tag or we don't know how to deal
7300 * with it, just go on, ignoring it...
7302 if(ED(fd
)->element
&& (ep
= element_properties(fd
, ED(fd
)->element
))){
7304 /* dispatch the element's handler */
7305 HTML_DEBUG_EL(ED(fd
)->end_tag
? "POP" : "PUSH", ED(fd
));
7306 if(ED(fd
)->end_tag
){
7307 html_pop(fd
, ep
); /* remove it's handler */
7310 /* if a block element, pop any open <p>'s */
7314 for(tp
= HANDLERS(fd
); tp
&& EL(tp
)->handler
== html_p
; tp
= tp
->below
){
7315 HTML_DEBUG_EL("Unclosed <P>", ED(fd
));
7316 html_pop(fd
, EL(tp
));
7321 /* enforce table nesting */
7322 if(!strucmp(ep
->element
, "tr")){
7323 if(!HANDLERS(fd
) || (strucmp(EL(HANDLERS(fd
))->element
, "table") && strucmp(EL(HANDLERS(fd
))->element
, "tbody") && strucmp(EL(HANDLERS(fd
))->element
, "thead"))){
7324 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd
)) ? EL(HANDLERS(fd
))->element
: "NO-HANDLERS"));
7325 if(HANDLERS(fd
) && !strucmp(EL(HANDLERS(fd
))->element
,"tr")){
7326 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7327 html_pop(fd
, EL(HANDLERS(fd
)));
7330 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7331 html_push(fd
, element_properties(fd
, "table"));
7335 else if(!strucmp(ep
->element
, "td") || !strucmp(ep
->element
, "th")){
7337 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7338 html_push(fd
, element_properties(fd
, "table"));
7339 html_push(fd
, element_properties(fd
, "tr"));
7341 else if(strucmp(EL(HANDLERS(fd
))->element
, "tr")){
7342 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd
))->element
));
7343 html_push(fd
, element_properties(fd
, "tr"));
7345 else if(!strucmp(EL(HANDLERS(fd
))->element
, "td")){
7346 dprint((2, "-- html error: bad nesting popping <TD>"));
7347 html_pop(fd
, EL(HANDLERS(fd
)));
7351 /* add it's handler */
7352 if(html_push(fd
, ep
)){
7354 /* remove empty element */
7361 HTML_DEBUG_EL("IGNORED", ED(fd
));
7364 else{ /* else, empty or unrecognized */
7365 HTML_DEBUG_EL("?", ED(fd
));
7368 return(1); /* all done! see, that didn't hurt */
7371 else if(ch
== '/' && ED(fd
)->element
&& ED(fd
)->len
){
7377 if(ED(fd
)->mkup_decl
){
7378 if((ch
&= 0xff) == '-'){
7381 if(ED(fd
)->start_comment
)
7382 ED(fd
)->end_comment
= 1;
7384 ED(fd
)->start_comment
= 1;
7390 if(ED(fd
)->end_comment
)
7391 ED(fd
)->start_comment
= ED(fd
)->end_comment
= 0;
7394 * no "--" after ! or non-whitespace between comments - bad
7396 if(ED(fd
)->len
< 2 || (!ED(fd
)->start_comment
7397 && !ASCII_ISSPACE((unsigned char) ch
)))
7398 ED(fd
)->badform
= 1; /* non-comment! */
7404 * Remember the comment for possible later processing, if
7405 * it get's too long, remember first and last few chars
7406 * so we know when to terminate (and throw some garbage
7407 * in between when we toss out what's between.
7409 if(ED(fd
)->len
== HTML_BUF_LEN
){
7410 ED(fd
)->buf
[2] = ED(fd
)->buf
[3] = 'X';
7411 ED(fd
)->buf
[4] = ED(fd
)->buf
[ED(fd
)->len
- 2];
7412 ED(fd
)->buf
[5] = ED(fd
)->buf
[ED(fd
)->len
- 1];
7416 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7417 return(0); /* comments go in the bit bucket */
7419 else if(ED(fd
)->overrun
|| ED(fd
)->badform
){
7420 return(0); /* swallow char's until next '>' */
7422 else if(!ED(fd
)->element
&& !ED(fd
)->len
){
7423 if(ch
== '/'){ /* validate leading chars */
7424 ED(fd
)->end_tag
= 1;
7428 ED(fd
)->mkup_decl
= 1;
7432 ED(fd
)->proc_inst
= 1;
7435 else if(!isalpha((unsigned char) ch
))
7436 return(-1); /* can't be a tag! */
7438 else if(ch
== '\"' || ch
== '\''){
7439 if(!ED(fd
)->hit_equal
){
7440 ED(fd
)->badform
= 1; /* quote in element name?!? */
7445 if(ED(fd
)->quoted
== (char) ch
){
7446 /* end of a quoted value */
7448 if(ED(fd
)->len
&& html_element_flush(ED(fd
)))
7449 ED(fd
)->badform
= 1;
7451 return(0); /* continue collecting chars */
7453 /* ELSE fall thru writing other quoting char */
7456 ED(fd
)->quoted
= (char) ch
;
7457 ED(fd
)->was_quoted
= 1;
7458 return(0); /* need more data */
7462 ch
&= 0xff; /* strip any "literal" high bits */
7465 || strchr("#-.!", ch
)){
7466 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7467 ? HTML_BUF_LEN
:MAX_ELEMENT
)){
7468 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7471 ED(fd
)->overrun
= 1; /* flag it broken */
7473 else if(ASCII_ISSPACE((unsigned char) ch
) || ch
== '='){
7474 if((ED(fd
)->len
|| ED(fd
)->was_quoted
) && html_element_flush(ED(fd
))){
7475 ED(fd
)->badform
= 1;
7476 return(0); /* else, we ain't done yet */
7479 if(!ED(fd
)->hit_equal
)
7480 ED(fd
)->hit_equal
= (ch
== '=');
7482 else if(ch
== '/' && ED(fd
)->len
&& !ED(fd
)->element
){
7484 ep
= element_properties(fd
, ED(fd
)->buf
);
7487 ED(fd
)->badform
= 1;
7489 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7490 ? HTML_BUF_LEN
:MAX_ELEMENT
)){
7491 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
; /* add this exception */
7494 ED(fd
)->overrun
= 1;
7498 ED(fd
)->badform
= 1;
7501 ED(fd
)->badform
= 1; /* unrecognized data?? */
7503 return(0); /* keep collecting */
7508 * Element collector found complete string, integrate it and reset
7509 * internal collection buffer.
7511 * Returns zero if element collection buffer flushed, error flag otherwise
7514 html_element_flush(CLCTR_S
*el_data
)
7518 if(el_data
->hit_equal
){ /* adding a value */
7519 el_data
->hit_equal
= 0;
7520 if(el_data
->cur_attrib
){
7521 if(!el_data
->cur_attrib
->value
){
7522 el_data
->cur_attrib
->value
= cpystr(el_data
->len
7523 ? el_data
->buf
: "");
7526 dprint((2, "** element: unexpected value: %.10s...\n",
7527 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7532 dprint((2, "** element: missing attribute name: %.10s...\n",
7533 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7537 else if(el_data
->len
){
7538 if(!el_data
->element
){
7539 el_data
->element
= cpystr(el_data
->buf
);
7542 PARAMETER
*p
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
7543 memset(p
, 0, sizeof(PARAMETER
));
7544 if(el_data
->attribs
){
7545 el_data
->cur_attrib
->next
= p
;
7546 el_data
->cur_attrib
= p
;
7549 el_data
->attribs
= el_data
->cur_attrib
= p
;
7551 p
->attribute
= cpystr(el_data
->buf
);
7556 el_data
->was_quoted
= 0; /* reset collector buf and state */
7558 memset(el_data
->buf
, 0, HTML_BUF_LEN
);
7559 return(rv
); /* report whatever happened above */
7564 * html_element_comment - "Special" comment handling here
7567 html_element_comment(FILTER_S
*f
, char *s
)
7571 while(*s
&& ASCII_ISSPACE((unsigned char) *s
))
7575 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7577 if(!struncmp(s
, "chtml ", 6)){
7579 if(!struncmp(s
, "if ", 3)){
7580 HD(f
)->bitbucket
= 1; /* default is failure! */
7584 if(!struncmp(s
+ 1, "inemode=", 8)){
7585 if(!strucmp(s
= removing_quotes(s
+ 9), "function_key")
7586 && F_ON(F_USE_FK
, ps_global
))
7587 HD(f
)->bitbucket
= 0;
7588 else if(!strucmp(s
, "running"))
7589 HD(f
)->bitbucket
= 0;
7590 else if(!strucmp(s
, "PHONE_HOME") && ps_global
->phone_home
)
7591 HD(f
)->bitbucket
= 0;
7593 else if(!strucmp(s
, "os_windows"))
7594 HD(f
)->bitbucket
= 0;
7600 case '[' : /* test */
7601 if((p
= strindex(++s
, ']')) != NULL
){
7602 *p
= '\0'; /* tie off test string */
7603 removing_leading_white_space(s
);
7604 removing_trailing_white_space(s
);
7605 if(*s
== '-' && *(s
+1) == 'r'){ /* readable file? */
7606 for(s
+= 2; *s
&& ASCII_ISSPACE((unsigned char) *s
); s
++)
7610 HD(f
)->bitbucket
= (can_access(CHTML_VAR_EXPAND(removing_quotes(s
)),
7621 else if(!strucmp(s
, "else")){
7622 HD(f
)->bitbucket
= !HD(f
)->bitbucket
;
7624 else if(!strucmp(s
, "endif")){
7625 /* Clean up after chtml here */
7626 HD(f
)->bitbucket
= 0;
7629 else if(!HD(f
)->bitbucket
){
7630 if(!struncmp(s
, "#include ", 9)){
7631 char buf
[MAILTMPLEN
], *bufp
;
7632 int len
, end_of_line
;
7635 /* Include the named file */
7636 if(!struncmp(s
+= 9, "file=", 5)
7637 && (fp
= our_fopen(CHTML_VAR_EXPAND(removing_quotes(s
+5)), "r"))){
7638 html_element_output(f
, HTML_NEWLINE
);
7640 while(fgets(buf
, sizeof(buf
), fp
)){
7641 if((len
= strlen(buf
)) && buf
[len
-1] == '\n'){
7648 for(bufp
= buf
; len
; bufp
++, len
--)
7649 html_element_output(f
, (int) *bufp
);
7652 html_element_output(f
, HTML_NEWLINE
);
7656 html_element_output(f
, HTML_NEWLINE
);
7662 else if(!struncmp(s
, "#echo ", 6)){
7663 if(!struncmp(s
+= 6, "var=", 4)){
7664 char *p
, buf
[MAILTMPLEN
];
7666 extern char datestamp
[];
7668 if(!strcmp(s
= removing_quotes(s
+ 4), "ALPINE_VERSION")){
7671 else if(!strcmp(s
, "ALPINE_REVISION")){
7672 p
= get_alpine_revision_string(buf
, sizeof(buf
));
7674 else if(!strcmp(s
, "C_CLIENT_VERSION")){
7677 else if(!strcmp(s
, "ALPINE_COMPILE_DATE")){
7680 else if(!strcmp(s
, "ALPINE_TODAYS_DATE")){
7681 rfc822_date(p
= buf
);
7683 else if(!strcmp(s
, "_LOCAL_FULLNAME_")){
7684 p
= (ps_global
->VAR_LOCAL_FULLNAME
7685 && ps_global
->VAR_LOCAL_FULLNAME
[0])
7686 ? ps_global
->VAR_LOCAL_FULLNAME
7689 else if(!strcmp(s
, "_LOCAL_ADDRESS_")){
7690 p
= (ps_global
->VAR_LOCAL_ADDRESS
7691 && ps_global
->VAR_LOCAL_ADDRESS
[0])
7692 ? ps_global
->VAR_LOCAL_ADDRESS
7694 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7695 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7696 mail_free_address(&adr
);
7698 else if(!strcmp(s
, "_BUGS_FULLNAME_")){
7699 p
= (ps_global
->VAR_BUGS_FULLNAME
7700 && ps_global
->VAR_BUGS_FULLNAME
[0])
7701 ? ps_global
->VAR_BUGS_FULLNAME
7702 : "Place to report Alpine Bugs";
7704 else if(!strcmp(s
, "_BUGS_ADDRESS_")){
7705 p
= (ps_global
->VAR_BUGS_ADDRESS
7706 && ps_global
->VAR_BUGS_ADDRESS
[0])
7707 ? ps_global
->VAR_BUGS_ADDRESS
: "postmaster";
7708 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7709 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7710 mail_free_address(&adr
);
7712 else if(!strcmp(s
, "CURRENT_DIR")){
7713 getcwd(p
= buf
, sizeof(buf
));
7715 else if(!strcmp(s
, "HOME_DIR")){
7716 p
= ps_global
->home_dir
;
7718 else if(!strcmp(s
, "PINE_CONF_PATH")){
7719 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7720 p
= "/usr/local/lib/pine.conf";
7725 else if(!strcmp(s
, "PINE_CONF_FIXED_PATH")){
7726 #ifdef SYSTEM_PINERC_FIXED
7727 p
= SYSTEM_PINERC_FIXED
;
7729 p
= "/usr/local/lib/pine.conf.fixed";
7732 else if(!strcmp(s
, "PINE_INFO_PATH")){
7733 p
= SYSTEM_PINE_INFO_PATH
;
7735 else if(!strcmp(s
, "MAIL_SPOOL_PATH")){
7738 else if(!strcmp(s
, "MAIL_SPOOL_LOCK_PATH")){
7739 /* Don't put the leading /tmp/. */
7744 for(j
= 0, i
= 0; p
[i
] && j
< MAILTMPLEN
- 1; i
++){
7754 else if(!struncmp(s
, "VAR_", 4)){
7756 if(pith_opt_pretty_var_name
)
7757 p
= (*pith_opt_pretty_var_name
)(p
);
7759 else if(!struncmp(s
, "FEAT_", 5)){
7761 if(pith_opt_pretty_feature_name
)
7762 p
= (*pith_opt_pretty_feature_name
)(p
, -1);
7768 if(f
->f1
== WSPACE
){
7769 html_element_output(f
, ' ');
7770 f
->f1
= DFL
; /* clear it */
7774 html_element_output(f
, (int) *p
++);
7783 html_element_output(FILTER_S
*f
, int ch
)
7786 (*EL(HANDLERS(f
))->handler
)(HANDLERS(f
), ch
, GF_DATA
);
7791 #define ISHEX_DIGIT(X) (isdigit((X)) || \
7792 ((X) >= 'a' && (X) <= 'f') || \
7793 ((X) >= 'A' && (X) <= 'F'))
7796 * collect html entity and return its UCS value when done.
7798 * Returns HTML_MOREDATA : we need more data
7799 * HTML_ENTITY : entity collected
7800 * HTML_BADVALUE : good data, but no named match or out of range
7801 * HTML_BADDATA : invalid input
7804 * - entity format is "'&' tag ';'" and represents a literal char
7805 * - named entities are CASE SENSITIVE.
7806 * - numeric char references (where the tag is prefixed with a '#')
7807 * are a char with that numbers value
7808 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7811 html_entity_collector(FILTER_S
*f
, int ch
, UCS
*ucs
, char **alt
)
7814 static char buf
[MAX_ENTITY
+2];
7817 if(len
== MAX_ENTITY
){
7821 ? (isalpha((unsigned char) ch
) || ch
== '#')
7822 : ((isdigit((unsigned char) ch
)
7823 || (len
== 1 && (unsigned char) ch
== 'x')
7824 || (len
== 1 &&(unsigned char) ch
== 'X')
7825 || (len
> 1 && isxdigit((unsigned char) ch
))
7826 || (isalpha((unsigned char) ch
) && buf
[0] != '#')))){
7828 return(HTML_MOREDATA
);
7830 else if(ch
== ';' || ASCII_ISSPACE((unsigned char) ch
)){
7831 buf
[len
] = '\0'; /* got something! */
7833 if(buf
[1] == 'x' || buf
[1] == 'X')
7834 *ucs
= (UCS
) strtoul(&buf
[2], NULL
, 16);
7836 *ucs
= (UCS
) strtoul(&buf
[1], NULL
, 10);
7840 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7841 if(entity_tab
[i
].value
== *ucs
){
7842 *alt
= entity_tab
[i
].plain
;
7848 return(HTML_ENTITY
);
7851 rv
= HTML_BADVALUE
; /* in case of no match */
7852 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7853 if(strcmp(entity_tab
[i
].name
, buf
) == 0){
7854 *ucs
= entity_tab
[i
].value
;
7856 *alt
= entity_tab
[i
].plain
;
7859 return(HTML_ENTITY
);
7864 rv
= HTML_BADDATA
; /* bogus input! */
7876 /*----------------------------------------------------------------------
7877 HTML text to plain text filter
7879 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7880 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7885 gf_html2plain(FILTER_S
*f
, int flg
)
7887 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7890 GF_INIT(f
, f
->next
);
7895 for(ii
= HTML_INDENT(f
); ii
> 0; ii
--)
7901 while(GF_GETC(f
, c
)){
7903 * First we have to collect any literal entities...
7904 * that is, IF we're not already collecting one
7905 * AND we're not in element's text or, if we are, we're
7906 * not in quoted text. Whew.
7912 switch(html_entity_collector(f
, c
, &ucs
, &alt
)){
7913 case HTML_MOREDATA
: /* more data required? */
7914 continue; /* go get another char */
7916 case HTML_BADVALUE
:
7918 /* if supplied, process bogus data */
7921 unsigned int uic
= *alt
;
7925 if(c
== '&' && !HD(f
)->quoted
){
7930 f
->t
= 0; /* don't come back next time */
7934 default : /* thing to process */
7935 f
->t
= 0; /* don't come back */
7938 * do something with UCS codepoint. If it's
7939 * not displayable then use the alt version
7941 * cvt UCS to UTF-8 and toss into next filter.
7943 if(ucs
> 127 && wcellwidth(ucs
) < 0){
7946 c
= MAKE_LITERAL(*alt
);
7953 c
= MAKE_LITERAL('?');
7956 unsigned char utf8buf
[8], *p1
, *p2
;
7958 p2
= utf8_put(p1
= (unsigned char *) utf8buf
, (unsigned long) ucs
);
7959 for(; p1
< p2
; p1
++){
7960 c
= MAKE_LITERAL(*p1
);
7970 else if(!PASS_HTML(f
) && c
== '&' && !HD(f
)->quoted
){
7976 * then we process whatever we got...
7982 GF_OP_END(f
); /* clean up our input pointers */
7984 else if(flg
== GF_EOD
){
7986 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f
))->element
));
7987 html_pop(f
, EL(HANDLERS(f
)));
7990 html_output(f
, HTML_NEWLINE
);
7992 HTML_ULINE(f
, ULINE_BIT(f
) = 0);
7995 HTML_BOLD(f
, BOLD_BIT(f
) = 0);
7998 fs_give((void **)&f
->line
);
8000 free_color_pair(&HD(f
)->color
);
8004 if(((HTML_OPT_S
*)f
->opt
)->base
)
8005 fs_give((void **) &((HTML_OPT_S
*)f
->opt
)->base
);
8010 (*f
->next
->f
)(f
->next
, GF_DATA
);
8011 (*f
->next
->f
)(f
->next
, GF_EOD
);
8013 else if(flg
== GF_RESET
){
8014 dprint((9, "-- gf_reset html2plain\n"));
8015 f
->data
= (HTML_DATA_S
*) fs_get(sizeof(HTML_DATA_S
));
8016 memset(f
->data
, 0, sizeof(HTML_DATA_S
));
8017 /* start with flowing text */
8018 HD(f
)->wrapstate
= !PASS_HTML(f
);
8019 HD(f
)->wrapcol
= WRAP_COLS(f
);
8020 f
->f1
= DFL
; /* state */
8021 f
->f2
= 0; /* chars in wrap buffer */
8022 f
->n
= 0L; /* chars on line so far */
8023 f
->linep
= f
->line
= (char *)fs_get(HTML_BUF_LEN
* sizeof(char));
8024 HD(f
)->line_bufsize
= HTML_BUF_LEN
; /* initial bufsize of line */
8025 HD(f
)->alt_entity
= (!ps_global
->display_charmap
8026 || strucmp(ps_global
->display_charmap
, "iso-8859-1"));
8027 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
8034 * html_indent - do the requested indent level function with appropriate
8035 * flushing and such.
8037 * Returns: indent level prior to set/increment
8040 html_indent(FILTER_S
*f
, int val
, int func
)
8042 int old
= HD(f
)->indent_level
;
8044 /* flush pending data at old indent level */
8047 html_output_flush(f
);
8048 if((HD(f
)->indent_level
+= val
) < 0)
8049 HD(f
)->indent_level
= 0;
8054 html_output_flush(f
);
8055 HD(f
)->indent_level
= val
;
8068 * html_blanks - Insert n blank lines into output
8071 html_blank(FILTER_S
*f
, int n
)
8073 /* Cap off any flowing text, and then write blank lines */
8074 if(f
->f2
|| f
->n
|| CENTER_BIT(f
) || HD(f
)->centered
|| WRAPPED_LEN(f
))
8075 html_output(f
, HTML_NEWLINE
);
8077 if(HD(f
)->wrapstate
)
8078 while(HD(f
)->blanks
< n
) /* blanks inc'd by HTML_NEWLINE */
8079 html_output(f
, HTML_NEWLINE
);
8085 * html_newline -- insert a newline mindful of embedded tags
8088 html_newline(FILTER_S
*f
)
8090 html_write_newline(f
); /* commit an actual newline */
8092 if(f
->n
){ /* and keep track of blank lines */
8102 * output the given char, handling any requested wrapping.
8103 * It's understood that all whitespace handed us is written. In other
8104 * words, junk whitespace is weeded out before it's given to us here.
8108 html_output(FILTER_S
*f
, int ch
)
8112 void (*o_f
)(FILTER_S
*, int, int, int) = CENTER_BIT(f
) ? html_output_centered
: html_output_normal
;
8115 * if ch is a control token, just pass it on, else, collect
8116 * utf8-encoded characters to determine width,then feed into
8119 if(ch
== TAG_EMBED
|| HD(f
)->embedded
.state
|| (ch
> 0xff && IS_LITERAL(ch
) == 0)){
8120 (*o_f
)(f
, ch
, 1, 0);
8122 else if(utf8_to_ucs4_oneatatime(ch
& 0xff, &(HD(f
)->cb
), &uc
, &width
)){
8125 for(cp
= HD(f
)->cb
.cbuf
; cp
<= HD(f
)->cb
.cbufend
; cp
++){
8126 (*o_f
)(f
, *cp
, width
, HD(f
)->cb
.cbufend
- cp
);
8127 width
= 0; /* only count it once */
8130 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
8133 HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbufp
;
8134 /* else do nothing until we have a full character */
8139 html_output_string(FILTER_S
*f
, char *s
)
8147 html_output_raw_tag(FILTER_S
*f
, char *tag
)
8153 html_output(f
, '<');
8154 html_output_string(f
, tag
);
8155 for(p
= HD(f
)->el_data
->attribs
;
8158 /* SECURITY: no javascript */
8159 /* PRIVACY: no img src without permission */
8160 /* BUGS: no class collisions since <head> ignored */
8161 if(html_event_attribute(p
->attribute
)
8162 || !strucmp(p
->attribute
, "class")
8163 || (!PASS_IMAGES(f
) && !strucmp(tag
, "img") && !strucmp(p
->attribute
, "src")))
8166 /* PRIVACY: sniff out background images */
8167 if(p
->value
&& !PASS_IMAGES(f
)){
8168 if(!strucmp(p
->attribute
, "style")){
8169 if((vp
= srchstr(p
->value
, "background-image")) != NULL
){
8170 /* neuter in place */
8171 vp
[11] = vp
[12] = vp
[13] = vp
[14] = vp
[15] = 'X';
8174 for(vp
= p
->value
; (vp
= srchstr(vp
, "background")) != NULL
; vp
++)
8175 if(vp
[10] == ' ' || vp
[10] == ':')
8176 for(i
= 11; vp
[i
] && vp
[i
] != ';'; i
++)
8177 if((vp
[i
] == 'u' && vp
[i
+1] == 'r' && vp
[i
+2] == 'l' && vp
[i
+3] == '(')
8178 || vp
[i
] == ':' || vp
[i
] == '/' || vp
[i
] == '.')
8182 else if(!strucmp(p
->attribute
, "background")){
8185 for(ip
= p
->value
; *ip
&& !(*ip
== ':' || *ip
== '/' || *ip
== '.'); ip
++)
8193 html_output(f
, ' ');
8194 html_output_string(f
, p
->attribute
);
8196 html_output(f
, '=');
8197 html_output(f
, '\"');
8198 html_output_string(f
, p
->value
);
8199 html_output(f
, '\"');
8203 /* append warning to form submission */
8204 if(!strucmp(tag
, "form")){
8205 html_output_string(f
, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8209 html_output(f
, ' ');
8210 html_output(f
, '/');
8213 html_output(f
, '>');
8218 html_event_attribute(char *attr
)
8221 static char *events
[] = {
8222 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8223 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8224 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8225 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8228 if((attr
[0] == 'o' || attr
[0] == 'O') && (attr
[1] == 'n' || attr
[1] == 'N'))
8229 for(i
= 0; i
< sizeof(events
)/sizeof(events
[0]); i
++)
8230 if(!strucmp(attr
, events
[i
]))
8238 html_output_normal(FILTER_S
*f
, int ch
, int width
, int remaining
)
8240 static int written
= 0;
8243 if(HD(f
)->centered
){
8244 html_centered_flush(f
);
8245 fs_give((void **) &HD(f
)->centered
->line
.buf
);
8246 fs_give((void **) &HD(f
)->centered
->word
.buf
);
8247 fs_give((void **) &HD(f
)->centered
);
8250 if(HD(f
)->wrapstate
){
8251 if(ch
== HTML_NEWLINE
){ /* hard newline */
8252 html_output_flush(f
);
8256 HD(f
)->blanks
= 0; /* reset blank line counter */
8258 if(ch
== TAG_EMBED
){ /* takes up no space */
8259 HD(f
)->embedded
.state
= -5;
8260 HTML_LINEP_PUTC(f
, TAG_EMBED
);
8262 else if(HD(f
)->embedded
.state
){ /* ditto */
8263 if(HD(f
)->embedded
.state
== -5){
8264 /* looking for specially handled tags following TAG_EMBED */
8265 if(ch
== TAG_HANDLE
)
8266 HD(f
)->embedded
.state
= -1; /* next ch is length */
8267 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8269 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8271 if(ch
== TAG_FGCOLOR
)
8272 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8274 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8276 HD(f
)->embedded
.state
= RGBLEN
;
8279 HD(f
)->embedded
.state
= 0; /* non-special */
8281 else if(HD(f
)->embedded
.state
> 0){
8282 /* collecting up an RGBLEN color or length, ignore tags */
8283 (HD(f
)->embedded
.state
)--;
8284 if(HD(f
)->embedded
.color
)
8285 *HD(f
)->embedded
.color
++ = ch
;
8287 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8288 *HD(f
)->embedded
.color
= '\0';
8289 HD(f
)->embedded
.color
= NULL
;
8292 else if(HD(f
)->embedded
.state
< 0){
8293 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8296 (HD(f
)->embedded
.state
)--;
8297 if(HD(f
)->embedded
.color
)
8298 *HD(f
)->embedded
.color
++ = ch
;
8300 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8301 *HD(f
)->embedded
.color
= '\0';
8302 HD(f
)->embedded
.color
= NULL
;
8306 HTML_LINEP_PUTC(f
, ch
);
8308 else if(HTML_ISSPACE(ch
)){
8309 html_output_flush(f
);
8318 if(f
->f2
+ cwidth
+ 1 >= WRAP_COLS(f
)){
8319 HTML_LINEP_PUTC(f
, ch
& 0xff);
8325 if(HD(f
)->in_anchor
)
8326 html_write_anchor(f
, HD(f
)->in_anchor
);
8329 HTML_LINEP_PUTC(f
, ch
& 0xff);
8343 html_output_flush(f
);
8345 switch(HD(f
)->embedded
.state
){
8350 * It's difficult to both preserve whitespace and wrap at the
8351 * same time so we'll do a dumb wrap at the edge of the screen.
8352 * Since this shouldn't come up much in real life we'll hope
8353 * it is good enough.
8355 if(!PASS_HTML(f
) && (f
->n
+ width
) > WRAP_COLS(f
))
8358 f
->n
+= width
; /* inc displayed char count */
8359 HD(f
)->blanks
= 0; /* reset blank line counter */
8360 html_putc(f
, ch
& 0xff);
8363 case TAG_EMBED
: /* takes up no space */
8364 html_putc(f
, TAG_EMBED
);
8365 HD(f
)->embedded
.state
= -2;
8368 case HTML_NEWLINE
: /* newline handling */
8382 HD(f
)->embedded
.state
= 0;
8385 HD(f
)->embedded
.state
= -1; /* next ch is length */
8406 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8408 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8409 HD(f
)->embedded
.state
= 11;
8414 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8416 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8417 HD(f
)->embedded
.state
= 11;
8420 case TAG_HANDLEOFF
:
8422 HD(f
)->in_anchor
= 0;
8433 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8438 HD(f
)->embedded
.state
--;
8439 if(HD(f
)->embedded
.color
)
8440 *HD(f
)->embedded
.color
++ = ch
;
8442 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8443 *HD(f
)->embedded
.color
= '\0';
8444 HD(f
)->embedded
.color
= NULL
;
8455 * flush any buffered chars waiting for wrapping.
8458 html_output_flush(FILTER_S
*f
)
8461 if(f
->n
&& ((int) f
->n
) + 1 + f
->f2
> HD(f
)->wrapcol
)
8462 html_newline(f
); /* wrap? */
8464 if(f
->n
){ /* text already on the line? */
8466 f
->n
++; /* increment count */
8469 /* write at start of new line */
8470 html_write_indent(f
, HD(f
)->indent_level
);
8472 if(HD(f
)->in_anchor
)
8473 html_write_anchor(f
, HD(f
)->in_anchor
);
8484 * html_output_centered - managed writing centered text
8487 html_output_centered(FILTER_S
*f
, int ch
, int width
, int remaining
)
8492 if(!HD(f
)->centered
){ /* new text? */
8493 html_output_flush(f
);
8494 if(f
->n
) /* start on blank line */
8497 HD(f
)->centered
= (CENTER_S
*) fs_get(sizeof(CENTER_S
));
8498 memset(HD(f
)->centered
, 0, sizeof(CENTER_S
));
8499 /* and grab a buf to start collecting centered text */
8500 HD(f
)->centered
->line
.len
= WRAP_COLS(f
);
8501 HD(f
)->centered
->line
.buf
= (char *) fs_get(HD(f
)->centered
->line
.len
8503 HD(f
)->centered
->line
.used
= HD(f
)->centered
->line
.width
= 0;
8504 HD(f
)->centered
->word
.len
= 32;
8505 HD(f
)->centered
->word
.buf
= (char *) fs_get(HD(f
)->centered
->word
.len
8507 HD(f
)->centered
->word
.used
= HD(f
)->centered
->word
.width
= 0;
8510 if(ch
== HTML_NEWLINE
){ /* hard newline */
8511 html_centered_flush(f
);
8513 else if(ch
== TAG_EMBED
){ /* takes up no space */
8514 HD(f
)->embedded
.state
= -5;
8515 html_centered_putc(&HD(f
)->centered
->word
, TAG_EMBED
);
8517 else if(HD(f
)->embedded
.state
){
8518 if(HD(f
)->embedded
.state
== -5){
8519 /* looking for specially handled tags following TAG_EMBED */
8520 if(ch
== TAG_HANDLE
)
8521 HD(f
)->embedded
.state
= -1; /* next ch is length */
8522 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8524 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8526 if(ch
== TAG_FGCOLOR
)
8527 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8529 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8531 HD(f
)->embedded
.state
= RGBLEN
;
8534 HD(f
)->embedded
.state
= 0; /* non-special */
8536 else if(HD(f
)->embedded
.state
> 0){
8537 /* collecting up an RGBLEN color or length, ignore tags */
8538 (HD(f
)->embedded
.state
)--;
8539 if(HD(f
)->embedded
.color
)
8540 *HD(f
)->embedded
.color
++ = ch
;
8542 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8543 *HD(f
)->embedded
.color
= '\0';
8544 HD(f
)->embedded
.color
= NULL
;
8547 else if(HD(f
)->embedded
.state
< 0){
8548 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8551 (HD(f
)->embedded
.state
)--;
8552 if(HD(f
)->embedded
.color
)
8553 *HD(f
)->embedded
.color
++ = ch
;
8555 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8556 *HD(f
)->embedded
.color
= '\0';
8557 HD(f
)->embedded
.color
= NULL
;
8561 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8563 else if(ASCII_ISSPACE((unsigned char) ch
)){
8564 if(!HD(f
)->centered
->space
++){ /* end of a word? flush! */
8567 if(WRAPPED_LEN(f
) > HD(f
)->wrapcol
){
8568 html_centered_flush_line(f
);
8569 /* fall thru to put current "word" on blank "line" */
8571 else if(HD(f
)->centered
->line
.width
){
8572 /* put space char between line and appended word */
8573 html_centered_putc(&HD(f
)->centered
->line
, ' ');
8574 HD(f
)->centered
->line
.width
++;
8577 for(i
= 0; i
< HD(f
)->centered
->word
.used
; i
++)
8578 html_centered_putc(&HD(f
)->centered
->line
,
8579 HD(f
)->centered
->word
.buf
[i
]);
8581 HD(f
)->centered
->line
.width
+= HD(f
)->centered
->word
.width
;
8582 HD(f
)->centered
->word
.used
= 0;
8583 HD(f
)->centered
->word
.width
= 0;
8590 /* ch is start of next word */
8591 HD(f
)->centered
->space
= 0;
8592 if(HD(f
)->centered
->word
.width
>= WRAP_COLS(f
))
8593 html_centered_flush(f
);
8595 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8604 HD(f
)->centered
->word
.width
+= cwidth
;
8611 * html_centered_putc -- add given char to given WRAPLINE_S
8614 html_centered_putc(WRAPLINE_S
*wp
, int ch
)
8616 if(wp
->used
+ 1 >= wp
->len
){
8618 fs_resize((void **) &wp
->buf
, wp
->len
* sizeof(char));
8621 wp
->buf
[wp
->used
++] = ch
;
8627 * html_centered_flush - finish writing any pending centered output
8630 html_centered_flush(FILTER_S
*f
)
8635 * If word present (what about line?) we need to deal with
8638 if(HD(f
)->centered
->word
.width
&& WRAPPED_LEN(f
) > HD(f
)->wrapcol
)
8639 html_centered_flush_line(f
);
8642 /* figure out how much to indent */
8643 if((i
= (WRAP_COLS(f
) - WRAPPED_LEN(f
))/2) > 0)
8644 html_write_indent(f
, i
);
8646 if(HD(f
)->centered
->anchor
)
8647 html_write_anchor(f
, HD(f
)->centered
->anchor
);
8649 html_centered_handle(&HD(f
)->centered
->anchor
,
8650 HD(f
)->centered
->line
.buf
,
8651 HD(f
)->centered
->line
.used
);
8652 html_write(f
, HD(f
)->centered
->line
.buf
, HD(f
)->centered
->line
.used
);
8654 if(HD(f
)->centered
->word
.used
){
8655 if(HD(f
)->centered
->line
.width
)
8658 html_centered_handle(&HD(f
)->centered
->anchor
,
8659 HD(f
)->centered
->word
.buf
,
8660 HD(f
)->centered
->word
.used
);
8661 html_write(f
, HD(f
)->centered
->word
.buf
,
8662 HD(f
)->centered
->word
.used
);
8665 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8666 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8669 if(HD(f
)->centered
->word
.used
){
8670 html_write(f
, HD(f
)->centered
->word
.buf
,
8671 HD(f
)->centered
->word
.used
);
8672 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8673 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8675 HD(f
)->blanks
++; /* advance the blank line counter */
8678 html_newline(f
); /* finish the line */
8683 * html_centered_handle - scan the line for embedded handles
8686 html_centered_handle(int *h
, char *line
, int len
)
8691 if(*line
++ == TAG_EMBED
&& len
-- > 0)
8694 if((n
= *line
++) >= --len
){
8698 *h
= (*h
* 10) + (*line
++ - '0');
8702 case TAG_HANDLEOFF
:
8704 *h
= 0; /* assumption 23,342: inverse off ends tags */
8715 * html_centered_flush_line - flush the centered "line" only
8718 html_centered_flush_line(FILTER_S
*f
)
8720 if(HD(f
)->centered
->line
.used
){
8723 /* hide "word" from flush */
8724 i
= HD(f
)->centered
->word
.used
;
8725 j
= HD(f
)->centered
->word
.width
;
8726 HD(f
)->centered
->word
.used
= 0;
8727 HD(f
)->centered
->word
.width
= 0;
8728 html_centered_flush(f
);
8730 HD(f
)->centered
->word
.used
= i
;
8731 HD(f
)->centered
->word
.width
= j
;
8737 * html_write_indent - write indention mindful of display attributes
8740 html_write_indent(FILTER_S
*f
, int indent
)
8744 html_putc(f
, TAG_EMBED
);
8745 html_putc(f
, TAG_BOLDOFF
);
8749 html_putc(f
, TAG_EMBED
);
8750 html_putc(f
, TAG_ULINEOFF
);
8756 html_putc(f
, ' '); /* indent as needed */
8759 * Resume any previous embedded state
8763 html_putc(f
, TAG_EMBED
);
8764 html_putc(f
, TAG_BOLDON
);
8768 html_putc(f
, TAG_EMBED
);
8769 html_putc(f
, TAG_ULINEON
);
8779 html_write_anchor(FILTER_S
*f
, int anchor
)
8784 html_putc(f
, TAG_EMBED
);
8785 html_putc(f
, TAG_HANDLE
);
8786 snprintf(buf
, sizeof(buf
), "%d", anchor
);
8787 html_putc(f
, (int) strlen(buf
));
8789 for(i
= 0; buf
[i
]; i
++)
8790 html_putc(f
, buf
[i
]);
8795 * html_write_newline - write a newline mindful of display attributes
8798 html_write_newline(FILTER_S
*f
)
8802 if(! STRIP(f
)){ /* First tie, off any embedded state */
8803 if(HD(f
)->in_anchor
){
8804 html_putc(f
, TAG_EMBED
);
8805 html_putc(f
, TAG_INVOFF
);
8809 html_putc(f
, TAG_EMBED
);
8810 html_putc(f
, TAG_BOLDOFF
);
8814 html_putc(f
, TAG_EMBED
);
8815 html_putc(f
, TAG_ULINEOFF
);
8818 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8822 p
= color_embed(ps_global
->VAR_NORM_FORE_COLOR
,
8823 ps_global
->VAR_NORM_BACK_COLOR
);
8824 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8829 html_write(f
, "\015\012", 2);
8830 for(i
= HTML_INDENT(f
); i
> 0; i
--)
8833 if(! STRIP(f
)){ /* First tie, off any embedded state */
8835 html_putc(f
, TAG_EMBED
);
8836 html_putc(f
, TAG_BOLDON
);
8840 html_putc(f
, TAG_EMBED
);
8841 html_putc(f
, TAG_ULINEON
);
8844 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8845 char *p
, *tfg
, *tbg
;
8849 tfg
= HD(f
)->color
->fg
;
8850 tbg
= HD(f
)->color
->bg
;
8851 tmp
= new_color_pair(tfg
[0] ? tfg
8852 : color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
),
8854 : color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
));
8855 if(pico_is_good_colorpair(tmp
)){
8856 p
= color_embed(tfg
[0] ? tfg
8857 : ps_global
->VAR_NORM_FORE_COLOR
,
8859 : ps_global
->VAR_NORM_BACK_COLOR
);
8860 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8865 free_color_pair(&tmp
);
8872 * html_write - write given n-length string to next filter
8875 html_write(FILTER_S
*f
, char *s
, int n
)
8877 GF_INIT(f
, f
->next
);
8880 /* keep track of attribute state? Not if last char! */
8881 if(!STRIP(f
) && *s
== TAG_EMBED
&& n
-- > 0){
8882 GF_PUTC(f
->next
, TAG_EMBED
);
8896 case TAG_HANDLEOFF
:
8897 HD(f
)->in_anchor
= 0;
8898 GF_PUTC(f
->next
, TAG_INVOFF
);
8905 GF_PUTC(f
->next
, TAG_HANDLE
);
8911 GF_PUTC(f
->next
, i
);
8913 anum
= (anum
* 10) + (*++s
- '0');
8915 GF_PUTC(f
->next
, *s
);
8921 && (h
= get_handle(*HANDLESP(f
), anum
)) != NULL
8922 && (h
->type
== URL
|| h
->type
== Attach
)){
8923 HD(f
)->in_anchor
= anum
;
8934 GF_PUTC(f
->next
, (*s
++) & 0xff);
8937 GF_IP_END(f
->next
); /* clean up next's input pointers */
8942 * html_putc -- actual work of writing to next filter.
8943 * NOTE: Small opt not using full GF_END since our input
8944 * pointers don't need adjusting.
8947 html_putc(FILTER_S
*f
, int ch
)
8949 GF_INIT(f
, f
->next
);
8950 GF_PUTC(f
->next
, ch
& 0xff);
8951 GF_IP_END(f
->next
); /* clean up next's input pointers */
8957 * Only current option is to turn on embedded data stripping for text
8958 * bound to a printer or composer.
8961 gf_html2plain_opt(char *base
,
8964 HANDLE_S
**handlesp
,
8969 int margin_l
, margin_r
;
8971 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
8973 op
->base
= cpystr(base
);
8974 margin_l
= (margin
) ? margin
[0] : 0;
8975 margin_r
= (margin
) ? margin
[1] : 0;
8976 op
->indent
= margin_l
;
8977 op
->columns
= columns
- (margin_l
+ margin_r
);
8978 op
->strip
= ((flags
& GFHP_STRIPPED
) == GFHP_STRIPPED
);
8979 op
->handlesp
= handlesp
;
8980 op
->handles_loc
= ((flags
& GFHP_LOCAL_HANDLES
) == GFHP_LOCAL_HANDLES
);
8981 op
->showserver
= ((flags
& GFHP_SHOW_SERVER
) == GFHP_SHOW_SERVER
);
8982 op
->warnrisk_f
= risk_f
;
8983 op
->no_relative_links
= ((flags
& GFHP_NO_RELATIVE
) == GFHP_NO_RELATIVE
);
8984 op
->related_content
= ((flags
& GFHP_RELATED_CONTENT
) == GFHP_RELATED_CONTENT
);
8985 op
->html
= ((flags
& GFHP_HTML
) == GFHP_HTML
);
8986 op
->html_imgs
= ((flags
& GFHP_HTML_IMAGES
) == GFHP_HTML_IMAGES
);
8987 op
->element_table
= html_element_table
;
8988 return((void *) op
);
8993 gf_html2plain_rss_opt(RSS_FEED_S
**feedp
, int flags
)
8997 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
8998 memset(op
, 0, sizeof(HTML_OPT_S
));
9000 op
->base
= cpystr("");
9001 op
->element_table
= rss_element_table
;
9002 *(op
->feedp
= feedp
) = NULL
;
9003 return((void *) op
);
9007 gf_html2plain_rss_free(RSS_FEED_S
**feedp
)
9009 if(feedp
&& *feedp
){
9011 fs_give((void **) &(*feedp
)->title
);
9014 fs_give((void **) &(*feedp
)->link
);
9016 if((*feedp
)->description
)
9017 fs_give((void **) &(*feedp
)->description
);
9019 if((*feedp
)->source
)
9020 fs_give((void **) &(*feedp
)->source
);
9023 fs_give((void **) &(*feedp
)->image
);
9025 gf_html2plain_rss_free_items(&((*feedp
)->items
));
9026 fs_give((void **) feedp
);
9031 gf_html2plain_rss_free_items(RSS_ITEM_S
**itemp
)
9033 if(itemp
&& *itemp
){
9035 fs_give((void **) &(*itemp
)->title
);
9038 fs_give((void **) &(*itemp
)->link
);
9040 if((*itemp
)->description
)
9041 fs_give((void **) &(*itemp
)->description
);
9043 if((*itemp
)->source
)
9044 fs_give((void **) &(*itemp
)->source
);
9046 gf_html2plain_rss_free_items(&(*itemp
)->next
);
9047 fs_give((void **) itemp
);
9052 /* END OF HTML-TO-PLAIN text filter */
9055 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9056 * from the text stream.
9059 #define MAX_ESC_LEN 5
9062 * the simple filter, removes unknown escape codes from the stream
9065 gf_escape_filter(FILTER_S
*f
, int flg
)
9068 GF_INIT(f
, f
->next
);
9071 register unsigned char c
;
9072 register int state
= f
->f1
;
9074 while(GF_GETC(f
, c
)){
9077 if(c
== '\033' || f
->n
== MAX_ESC_LEN
){
9078 f
->line
[f
->n
] = '\0';
9080 if(!match_escapes(f
->line
)){
9081 GF_PUTC(f
->next
, '^');
9082 GF_PUTC(f
->next
, '[');
9085 GF_PUTC(f
->next
, '\033');
9089 GF_PUTC(f
->next
, *p
++);
9094 state
= 0; /* fall thru */
9097 f
->line
[f
->n
++] = c
; /* collect */
9105 GF_PUTC(f
->next
, c
);
9111 else if(flg
== GF_EOD
){
9113 if(!match_escapes(f
->line
)){
9114 GF_PUTC(f
->next
, '^');
9115 GF_PUTC(f
->next
, '[');
9118 GF_PUTC(f
->next
, '\033');
9121 for(p
= f
->line
; f
->n
; f
->n
--, p
++)
9122 GF_PUTC(f
->next
, *p
);
9124 fs_give((void **)&(f
->line
)); /* free temp line buffer */
9125 (void) GF_FLUSH(f
->next
);
9126 (*f
->next
->f
)(f
->next
, GF_EOD
);
9128 else if(flg
== GF_RESET
){
9129 dprint((9, "-- gf_reset escape\n"));
9132 f
->linep
= f
->line
= (char *)fs_get((MAX_ESC_LEN
+ 1) * sizeof(char));
9139 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9140 * corresponding string representations (you know, ^blah and such)...
9144 * the simple filter transforms unknown control characters in the stream
9145 * into harmless strings.
9148 gf_control_filter(FILTER_S
*f
, int flg
)
9150 GF_INIT(f
, f
->next
);
9153 register unsigned char c
;
9154 register int filt_only_c0
;
9156 filt_only_c0
= f
->opt
? (*(int *) f
->opt
) : 0;
9158 while(GF_GETC(f
, c
)){
9160 if(((c
< 0x20 || c
== 0x7f)
9161 || (c
>= 0x80 && c
< 0xA0 && !filt_only_c0
))
9162 && !(ASCII_ISSPACE((unsigned char) c
)
9163 || c
== '\016' || c
== '\017' || c
== '\033')){
9164 GF_PUTC(f
->next
, c
>= 0x80 ? '~' : '^');
9165 GF_PUTC(f
->next
, (c
== 0x7f) ? '?' : (c
& 0x1f) + '@');
9168 GF_PUTC(f
->next
, c
);
9173 else if(flg
== GF_EOD
){
9174 (void) GF_FLUSH(f
->next
);
9175 (*f
->next
->f
)(f
->next
, GF_EOD
);
9181 * function called from the outside to set
9182 * control filter's option, which says to filter C0 control characters
9183 * but not C1 control chars. We don't call it at all if we don't want
9184 * to filter C0 chars either.
9187 gf_control_filter_opt(int *filt_only_c0
)
9189 return((void *) filt_only_c0
);
9194 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9195 * This prevents the possibility of embedding other tags.
9196 * We assume that this filter should only be used for something
9197 * that is eventually writing to a display, which has the special
9198 * knowledge of quoted TAG_EMBEDs.
9201 gf_tag_filter(FILTER_S
*f
, int flg
)
9203 GF_INIT(f
, f
->next
);
9206 register unsigned char c
;
9208 while(GF_GETC(f
, c
)){
9210 if((c
& 0xff) == (TAG_EMBED
& 0xff)){
9211 GF_PUTC(f
->next
, TAG_EMBED
);
9212 GF_PUTC(f
->next
, c
);
9215 GF_PUTC(f
->next
, c
);
9220 else if(flg
== GF_EOD
){
9221 (void) GF_FLUSH(f
->next
);
9222 (*f
->next
->f
)(f
->next
, GF_EOD
);
9228 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9229 * specified line width
9233 typedef struct wrap_col_s
{
9238 unsigned do_indent
:1;
9239 unsigned on_comma
:1;
9245 unsigned leave_flowed
:1;
9246 unsigned use_color
:1;
9247 unsigned hdr_color
:1;
9248 unsigned for_compose
:1;
9249 unsigned handle_soft_hyphen
:1;
9250 unsigned saw_soft_hyphen
:1;
9251 unsigned trailing_space
:1;
9252 unsigned char utf8buf
[7];
9253 unsigned char *utf8bufp
;
9274 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9275 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9276 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9277 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9278 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9279 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9280 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9281 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9282 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9283 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9284 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9285 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9286 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9287 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9288 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9289 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9290 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9291 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9292 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9293 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9294 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9295 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9296 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9297 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9298 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9299 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9300 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9301 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9302 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9303 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9304 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9305 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9306 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9307 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9308 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9309 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9310 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9311 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9312 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9313 #define WRAP_PUTC(F,C,W) { \
9314 if((F)->linep == WRAP_LASTC(F)){ \
9315 size_t offset = (F)->linep - (F)->line; \
9316 fs_resize((void **) &(F)->line, \
9317 (2 * offset) * sizeof(char)); \
9318 (F)->linep = &(F)->line[offset]; \
9319 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9321 *(F)->linep++ = (C); \
9325 #define WRAP_EMBED_PUTC(F,C) { \
9327 WRAP_PUTC((F), C, 0); \
9330 so_writec(C, WRAP_SPACES(F)); \
9333 #define WRAP_COLOR_UNSET(F) { \
9334 if(WRAP_COLOR_SET(F)){ \
9335 WRAP_COLOR(F)->fg[0] = '\0'; \
9340 * wrap_flush_embed flags
9342 #define WFE_NONE 0 /* Nothing special */
9343 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9346 int wrap_flush(FILTER_S
*, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9347 int wrap_flush_embed(FILTER_S
*, unsigned char **, unsigned char **,
9348 unsigned char **, unsigned char **);
9349 int wrap_flush_s(FILTER_S
*,char *, int, int, unsigned char **, unsigned char **,
9350 unsigned char **, unsigned char **, int);
9351 int wrap_eol(FILTER_S
*, int, unsigned char **, unsigned char **,
9352 unsigned char **, unsigned char **);
9353 int wrap_bol(FILTER_S
*, int, int, unsigned char **,
9354 unsigned char **, unsigned char **, unsigned char **);
9355 int wrap_quote_insert(FILTER_S
*, unsigned char **, unsigned char **,
9356 unsigned char **, unsigned char **);
9359 * the no longer simple filter, breaks lines at end of white space nearest
9360 * to global "gf_wrap_width" in length
9361 * It also supports margins, indents (inverse indenting, really) and
9362 * flowed text (ala RFC 3676)
9366 gf_wrap(FILTER_S
*f
, int flg
)
9369 GF_INIT(f
, f
->next
);
9373 * f->line buffer where next "word" being considered is stored
9374 * f->f2 width in screen cells of f->line stuff
9375 * f->n width in screen cells of the part of this line committed to next
9380 register unsigned char c
;
9381 register int state
= f
->f1
;
9382 int width
, full_character
;
9384 while(GF_GETC(f
, c
)){
9387 case CCR
: /* CRLF or CR in text ? */
9388 state
= BOL
; /* either way, handle start */
9392 if(f
->f2
== 0 && WRAP_SPC_LEN(f
) && WRAP_TRL_SPC(f
)){
9394 * whack trailing space char, but be aware
9395 * of embeds in space buffer. grok them just
9396 * in case they contain a 0x20 value
9399 char *sb
, *sbp
, *scp
= NULL
;
9402 for(sb
= sbp
= (char *)so_text(WRAP_SPACES(f
)); *sbp
; sbp
++){
9413 if(strlen(sbp
) >= x
)
9420 if(strlen(sbp
) >= RGBLEN
)
9421 sbp
+= (RGBLEN
- 1);
9436 /* replace space buf without trailing space char */
9438 STORE_S
*ns
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
9442 WRAP_TRL_SPC(f
) = 0;
9447 so_give(&WRAP_SPACES(f
));
9448 WRAP_SPACES(f
) = ns
;
9452 else{ /* fixed line */
9454 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9455 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9458 * When we get to a real end of line, we don't need to
9459 * remember what the special color was anymore because
9460 * we aren't going to be changing back to it. We unset it
9461 * so that we don't keep resetting the color to normal.
9463 WRAP_COLOR_UNSET(f
);
9466 if(c
== '\012'){ /* get c following LF */
9469 /* else c is first char of new line, fall thru */
9472 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9473 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9474 WRAP_COLOR_UNSET(f
); /* see note above */
9478 /* else fall thru to deal with beginning of line */
9484 WRAP_FL_QC(f
) = 1; /* init it */
9485 state
= FL_QLEV
; /* go collect it */
9488 /* if EMBEDed, process it and return here */
9489 if(c
== (unsigned char) TAG_EMBED
){
9490 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9491 WRAP_STATE(f
) = state
;
9496 /* quote level change implies new paragraph */
9499 if(WRAP_HARD(f
) == 0){
9501 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9502 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9503 WRAP_COLOR_UNSET(f
); /* see note above */
9508 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
,
9509 &eob
); /* write quoting prefix */
9514 case '\015' : /* a blank line? */
9515 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9516 state
= CCR
; /* go collect it */
9519 case ' ' : /* space stuffed */
9520 state
= FL_STF
; /* just eat it */
9523 case '-' : /* possible sig-dash */
9524 WRAP_FL_SIG(f
) = 1; /* init state */
9525 state
= FL_SIG
; /* go collect it */
9529 state
= DFL
; /* go back to normal */
9530 goto case_dfl
; /* handle c like DFL case */
9536 if(WRAP_COMMA(f
) && c
== TAB
){
9537 wrap_bol(f
, 1, 0, &ip
, &eib
, &op
,
9538 &eob
); /* convert to normal indent */
9542 wrap_bol(f
,0,0, &ip
, &eib
, &op
, &eob
);
9543 goto case_dfl
; /* handle c like DFL case */
9549 if(c
== '>'){ /* another level */
9553 /* if EMBEDed, process it and return here */
9554 if(c
== (unsigned char) TAG_EMBED
){
9555 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9556 WRAP_STATE(f
) = state
;
9561 /* quote level change signals new paragraph */
9562 if(WRAP_FL_QC(f
) != WRAP_FL_QD(f
)){
9563 WRAP_FL_QD(f
) = WRAP_FL_QC(f
);
9564 if(WRAP_HARD(f
) == 0){ /* add hard newline */
9565 WRAP_HARD(f
) = 1; /* hard newline */
9566 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9567 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9568 WRAP_COLOR_UNSET(f
); /* see note above */
9573 wrap_bol(f
,0,1, &ip
, &eib
, &op
, &eob
);
9578 case '\015' : /* a blank line? */
9579 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9580 state
= CCR
; /* go collect it */
9583 case ' ' : /* space-stuffed! */
9584 state
= FL_STF
; /* just eat it */
9587 case '-' : /* sig dash? */
9592 default : /* something else */
9594 goto case_dfl
; /* handle c like DFL */
9600 case FL_STF
: /* space stuffed */
9602 case '\015' : /* a blank line? */
9603 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9604 state
= CCR
; /* go collect it */
9607 case (unsigned char) TAG_EMBED
: /* process TAG data */
9608 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9609 WRAP_STATE(f
) = state
; /* and return */
9613 case '-' : /* sig dash? */
9619 default : /* something else */
9621 goto case_dfl
; /* handle c like DFL */
9626 case FL_SIG
: /* sig-dash collector */
9627 switch (WRAP_FL_SIG(f
)){ /* possible sig-dash? */
9629 if(c
!= '-'){ /* not a sigdash */
9630 if((f
->n
+ WRAP_SPC_LEN(f
) + 1) > WRAP_COL(f
)){
9631 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9632 &eob
); /* note any embedded*/
9633 wrap_eol(f
, 1, &ip
, &eib
,
9634 &op
, &eob
); /* plunk down newline */
9635 wrap_bol(f
, 1, 1, &ip
, &eib
,
9636 &op
, &eob
); /* write any prefix */
9639 WRAP_PUTC(f
,'-', 1); /* write what we got */
9646 /* don't put anything yet until we know to wrap or not */
9651 if(c
!= ' '){ /* not a sigdash */
9652 WRAP_PUTC(f
, '-', 1);
9653 if((f
->n
+ WRAP_SPC_LEN(f
) + 2) > WRAP_COL(f
)){
9654 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9655 &eob
); /* note any embedded*/
9656 wrap_eol(f
, 1, &ip
, &eib
,
9657 &op
, &eob
); /* plunk down newline */
9658 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
9659 &eob
); /* write any prefix */
9662 WRAP_PUTC(f
,'-', 1); /* write what we got */
9669 /* don't put anything yet until we know to wrap or not */
9674 if(c
== '\015'){ /* success! */
9675 /* known sigdash, newline if soft nl */
9676 if(WRAP_SPC_LEN(f
)){
9677 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9678 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9679 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9689 WRAP_FL_SIG(f
) = 4; /* possible success */
9694 case (unsigned char) TAG_EMBED
:
9696 * At this point we're almost 100% sure that we've got
9697 * a sigdash. Putc it (adding newline if previous
9698 * was a soft nl) so we get it the right color
9699 * before we store this new embedded stuff
9701 if(WRAP_SPC_LEN(f
)){
9702 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9703 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9704 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9713 case '\015' : /* success! */
9715 * We shouldn't get here, but in case we do, we have
9716 * not yet put the sigdash
9718 if(WRAP_SPC_LEN(f
)){
9719 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9720 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9721 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9730 default : /* that's no sigdash! */
9731 /* write what we got but didn't put yet */
9732 WRAP_PUTC(f
,'-', 1);
9733 WRAP_PUTC(f
,'-', 1);
9734 WRAP_PUTC(f
,' ', 1);
9737 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9738 WRAP_SPC_LEN(f
) = 1;
9739 state
= DFL
; /* set normal state */
9740 goto case_dfl
; /* and go do "c" */
9746 WRAP_STATE(f
) = FL_SIG
; /* come back here */
9747 WRAP_FL_SIG(f
) = 6; /* and seek EOL */
9748 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9749 state
= TAG
; /* process embed */
9754 * at this point we've already putc the sigdash in case 4
9757 case (unsigned char) TAG_EMBED
:
9761 case '\015' : /* success! */
9765 default : /* that's no sigdash! */
9767 * probably never reached (fake sigdash with embedded
9768 * stuff) but if this did get reached, then we
9769 * might have accidentally disobeyed a soft nl
9772 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9773 WRAP_SPC_LEN(f
) = 1;
9774 state
= DFL
; /* set normal state */
9775 goto case_dfl
; /* and go do "c" */
9782 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9785 state
= DFL
; /* set normal state */
9786 goto case_dfl
; /* and go process "c" */
9794 * This was just if(WRAP_SPEC(f, c)) before the change to add
9795 * the == 0 test. This isn't quite right, either. We should really
9796 * be looking for special characters in the UCS characters, not
9797 * in the incoming stream of UTF-8. It is not right to
9798 * call this on bytes that are in the middle of a UTF-8 character,
9799 * hence the == 0 test which restricts it to the first byte
9800 * of a character. This isn't right, either, but it's closer.
9801 * Also change the definition of WRAP_SPEC so that isspace only
9802 * matches ascii characters, which will never be in the middle
9803 * of a UTF-8 multi-byte character.
9805 if((WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 0 && WRAP_SPEC(f
, c
)){
9806 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
9812 if(f
->f2
){ /* any non-lwsp to flush? */
9814 /* remember our second best break point */
9815 WRAP_PB_OFF(f
) = f
->linep
- f
->line
;
9816 WRAP_PB_LEN(f
) = f
->f2
;
9820 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9823 switch(c
){ /* remember separator */
9826 WRAP_TRL_SPC(f
) = 1;
9827 so_writec(' ',WRAP_SPACES(f
));
9832 int i
= (int) f
->n
+ WRAP_SPC_LEN(f
);
9838 so_writec(TAB
,WRAP_SPACES(f
));
9839 WRAP_TRL_SPC(f
) = 0;
9844 default : /* some control char? */
9845 WRAP_SPC_LEN(f
) += 2;
9846 WRAP_TRL_SPC(f
) = 0;
9853 WRAP_QUOTED(f
) = !WRAP_QUOTED(f
);
9856 case '\015' : /* already has newline? */
9860 case '\012' : /* bare LF in text? */
9861 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* they must've */
9862 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
); /* meant */
9863 wrap_bol(f
,1,1, &ip
, &eib
, &op
, &eob
); /* newline... */
9866 case (unsigned char) TAG_EMBED
:
9867 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9868 WRAP_STATE(f
) = state
;
9873 if(!WRAP_QUOTED(f
)){
9874 /* handle this special case in general code below */
9875 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_MAX_COL(f
)
9876 && WRAP_ALLWSP(f
) && WRAP_PB_OFF(f
))
9879 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_COL(f
)){
9880 if(WRAP_ALLWSP(f
)) /* if anything visible */
9881 wrap_flush(f
, &ip
, &eib
, &op
,
9882 &eob
); /* ... blat buf'd chars */
9884 wrap_eol(f
, 1, &ip
, &eib
, &op
,
9885 &eob
); /* plunk down newline */
9886 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
9887 &eob
); /* write any prefix */
9890 WRAP_PUTC(f
, ',', 1); /* put out comma */
9891 wrap_flush(f
, &ip
, &eib
, &op
,
9892 &eob
); /* write buf'd chars */
9899 else if(WRAP_HANDLE_SOFT_HYPHEN(f
)
9900 && (WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 1
9901 && WRAP_UTF8BUF(f
, 0) == 0xC2 && c
== 0xAD){
9903 * This is a soft hyphen. If there is enough space for
9904 * a real hyphen to fit on the line here then we can
9905 * flush everything up to before the soft hyphen,
9906 * and simply remember that we saw a soft hyphen.
9907 * If it turns out that we can't fit the next piece in
9908 * then wrap_eol will append a real hyphen to the line.
9909 * If we can fit another piece in it will be because we've
9910 * reached the next break point. At that point we'll flush
9911 * everything but won't include the unneeded hyphen. We erase
9912 * the fact that we saw this soft hyphen because it have
9913 * become irrelevant.
9915 * If the hyphen is the character that puts us over the edge
9916 * we go through the else case.
9919 /* erase this soft hyphen character from buffer */
9920 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
9922 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
9923 if(f
->f2
) /* any non-lwsp to flush? */
9924 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9926 /* remember that we saw the soft hyphen */
9927 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
9931 * Everything up to the hyphen fits, otherwise it
9932 * would have already been flushed the last time
9933 * through the loop. But the hyphen won't fit. So
9934 * we need to go back to the last line break and
9935 * break there instead. Then start a new line with
9936 * the buffered up characters and the soft hyphen.
9938 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
9939 wrap_eol(f
, 1, &ip
, &eib
, &op
,
9940 &eob
); /* plunk down newline */
9941 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
9942 &eob
); /* write any prefix */
9945 * Now we're in the same situation as we would have
9946 * been above except we're on a new line. Try to
9947 * flush out the characters seen up to the hyphen.
9949 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
9950 if(f
->f2
) /* any non-lwsp to flush? */
9951 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9953 /* remember that we saw the soft hyphen */
9954 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
9957 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
9966 unsigned char *inputp
;
9967 unsigned long remaining_octets
;
9970 if(WRAP_UTF8BUFP(f
) < &WRAP_UTF8BUF(f
, 0) + 6){ /* always true */
9972 *WRAP_UTF8BUFP(f
)++ = c
;
9973 remaining_octets
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
9974 if(remaining_octets
== 1 && isascii(WRAP_UTF8BUF(f
, 0))){
9984 else if(c
< 0x80 && iscntrl((unsigned char) c
))
9990 inputp
= &WRAP_UTF8BUF(f
, 0);
9991 ucs
= (UCS
) utf8_get(&inputp
, &remaining_octets
);
9993 case U8G_ENDSTRG
: /* incomplete character, wait */
9994 case U8G_ENDSTRI
: /* incomplete character, wait */
9999 if(ucs
& U8G_ERROR
|| ucs
== UBOGON
){
10001 * None of these cases is supposed to happen. If it
10002 * does happen then the input stream isn't UTF-8
10003 * so something is wrong. Writechar will treat
10004 * each octet in the input buffer as a separate
10005 * error character and print a '?' for each,
10006 * so the width will be the number of octets.
10008 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10012 /* got a character */
10013 width
= wcellwidth(ucs
);
10018 * This happens when we have a UTF-8 character that
10019 * we aren't able to print in our locale. For example,
10020 * if the locale is setup with the terminal
10021 * expecting ISO-8859-1 characters then there are
10022 * lots of UTF-8 characters that can't be printed.
10023 * Print a '?' instead.
10035 * This cannot happen because an error would have
10036 * happened at least by character #6. So if we get
10037 * here there is a bug in utf8_get().
10039 if(WRAP_UTF8BUFP(f
) == &WRAP_UTF8BUF(f
, 0) + 6){
10040 *WRAP_UTF8BUFP(f
)++ = c
;
10044 * We could possibly do some more sophisticated
10045 * resynchronization here, but we aren't doing
10046 * anything in Writechar so it wouldn't match up
10047 * with that anyway. Just figure each character will
10048 * end up being printed as a ? character.
10050 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10055 if(WRAP_ALLWSP(f
)){
10057 * Nothing is visible yet but the first word may be too long
10058 * all by itself. We need to break early.
10060 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
> WRAP_MAX_COL(f
)){
10062 * A little reaching behind the curtain here.
10063 * if there's at least a preferable break point, use
10064 * it and stuff what's left back into the wrap buffer.
10065 * The "nwsp" latch is used to skip leading whitespace
10066 * The second half of the test prevents us from wrapping
10067 * at the preferred break point in the case that it
10068 * is so early in the line that it doesn't help.
10069 * That is, the width of the indent is even more than
10070 * the width of the first part before the preferred
10071 * break point. An example would be breaking after
10072 * "To:" when the indent is 4 which is > 3.
10074 if(WRAP_PB_OFF(f
) && WRAP_PB_LEN(f
) >= WRAP_INDENT(f
)){
10075 char *p1
= f
->line
+ WRAP_PB_OFF(f
);
10076 char *p2
= f
->linep
;
10078 int nwsp
= 0, left_after_wrap
;
10080 left_after_wrap
= f
->f2
- WRAP_PB_LEN(f
);
10082 f
->f2
= WRAP_PB_LEN(f
);
10085 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* flush shortened buf */
10087 /* put back rest of characters */
10090 if(!(c2
== ' ' || c2
== '\t') || nwsp
){
10091 WRAP_PUTC(f
, c2
, 0);
10095 left_after_wrap
--; /* wrong if a tab! */
10098 f
->f2
= MAX(left_after_wrap
, 0);
10100 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10101 &eob
); /* plunk down newline */
10102 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10103 &eob
); /* write any prefix */
10107 * If we do the less preferable break point at
10108 * the space we don't want to lose the fact that
10109 * we might be able to break at this comma for
10112 if(full_character
&& c
== ','){
10113 WRAP_PUTC(f
, c
, 1);
10114 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10115 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10119 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10121 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10122 &eob
); /* plunk down newline */
10123 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10124 &eob
); /* write any prefix */
10128 else if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
) > WRAP_COL(f
)){
10129 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
10130 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10131 &eob
); /* plunk down newline */
10132 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10133 &eob
); /* write any prefix */
10137 * Commit entire multibyte UTF-8 character at once
10138 * instead of writing partial characters into the
10141 if(full_character
){
10144 for(q
= &WRAP_UTF8BUF(f
, 0); q
< WRAP_UTF8BUFP(f
); q
++){
10145 WRAP_PUTC(f
, *q
, width
);
10149 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10156 WRAP_EMBED_PUTC(f
, c
);
10159 WRAP_EMBED(f
) = -1;
10165 WRAP_EMBED(f
) = RGBLEN
;
10170 state
= WRAP_STATE(f
);
10177 WRAP_EMBED_PUTC(f
, c
);
10184 WRAP_PUTC(f
, c
, 0);
10187 so_writec(c
, WRAP_SPACES(f
));
10189 if(!(WRAP_EMBED(f
) -= 1)){
10190 state
= WRAP_STATE(f
);
10198 GF_END(f
, f
->next
);
10200 else if(flg
== GF_EOD
){
10201 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10203 free_color_pair(&WRAP_COLOR(f
));
10205 fs_give((void **) &f
->line
); /* free temp line buffer */
10206 so_give(&WRAP_SPACES(f
));
10207 fs_give((void **) &f
->opt
); /* free wrap widths struct */
10208 (void) GF_FLUSH(f
->next
);
10209 (*f
->next
->f
)(f
->next
, GF_EOD
);
10211 else if(flg
== GF_RESET
){
10212 dprint((9, "-- gf_reset wrap\n"));
10214 f
->n
= 0L; /* displayed length of line so far */
10215 f
->f2
= 0; /* displayed length of buffered chars */
10216 WRAP_HARD(f
) = 1; /* starting at beginning of line */
10217 if(! (WRAP_S
*) f
->opt
)
10218 f
->opt
= gf_wrap_filter_opt(75, 80, NULL
, 0, 0);
10220 while(WRAP_INDENT(f
) >= WRAP_MAX_COL(f
))
10221 WRAP_INDENT(f
) /= 2;
10223 f
->line
= (char *) fs_get(WRAP_MAX_COL(f
) * sizeof(char));
10224 f
->linep
= f
->line
;
10225 WRAP_LASTC(f
) = &f
->line
[WRAP_MAX_COL(f
) - 1];
10227 for(i
= 0; i
< 256; i
++)
10228 ((WRAP_S
*) f
->opt
)->special
[i
] = ((i
== '\"' && WRAP_COMMA(f
))
10231 || (i
== (unsigned char) TAG_EMBED
10233 || (i
== ',' && WRAP_COMMA(f
)
10234 && !WRAP_QUOTED(f
))
10235 || ASCII_ISSPACE(i
));
10236 WRAP_SPACES(f
) = so_get(CharStar
, NULL
, EDIT_ACCESS
);
10237 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10242 wrap_flush(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10243 unsigned char **opp
, unsigned char **eobp
)
10248 s
= (char *)so_text(WRAP_SPACES(f
));
10249 n
= so_tell(WRAP_SPACES(f
));
10250 so_seek(WRAP_SPACES(f
), 0L, 0);
10251 wrap_flush_s(f
, s
, n
, WRAP_SPC_LEN(f
), ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10252 so_truncate(WRAP_SPACES(f
), 0L);
10253 WRAP_SPC_LEN(f
) = 0;
10254 WRAP_TRL_SPC(f
) = 0;
10256 n
= f
->linep
- f
->line
;
10257 wrap_flush_s(f
, s
, n
, f
->f2
, ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10259 f
->linep
= f
->line
;
10260 WRAP_PB_OFF(f
) = 0;
10261 WRAP_PB_LEN(f
) = 0;
10267 wrap_flush_embed(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
)
10271 s
= (char *)so_text(WRAP_SPACES(f
));
10272 n
= so_tell(WRAP_SPACES(f
));
10273 so_seek(WRAP_SPACES(f
), 0L, 0);
10274 wrap_flush_s(f
, s
, n
, 0, ipp
, eibp
, opp
, eobp
, WFE_CNT_HANDLE
);
10275 so_truncate(WRAP_SPACES(f
), 0L);
10276 WRAP_SPC_LEN(f
) = 0;
10277 WRAP_TRL_SPC(f
) = 0;
10283 wrap_flush_s(FILTER_S
*f
, char *s
, int n
, int w
, unsigned char **ipp
,
10284 unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
, int flags
)
10288 for(; n
> 0; n
--,s
++){
10289 if(*s
== TAG_EMBED
){
10293 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10294 GF_PUTC_GLO(f
->next
,TAG_BOLDON
);
10298 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10299 GF_PUTC_GLO(f
->next
,TAG_BOLDOFF
);
10303 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10304 GF_PUTC_GLO(f
->next
,TAG_ULINEON
);
10307 case TAG_ULINEOFF
:
10308 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10309 GF_PUTC_GLO(f
->next
,TAG_ULINEOFF
);
10313 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10314 GF_PUTC_GLO(f
->next
,TAG_INVOFF
);
10315 WRAP_ANCHOR(f
) = 0;
10318 if((flags
& WFE_CNT_HANDLE
) == 0)
10319 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10324 if((flags
& WFE_CNT_HANDLE
) == 0)
10325 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10330 if((flags
& WFE_CNT_HANDLE
) == 0)
10331 GF_PUTC_GLO(f
->next
, i
);
10333 WRAP_ANCHOR(f
) = 0;
10335 WRAP_ANCHOR(f
) = (WRAP_ANCHOR(f
) * 10) + (*++s
-'0');
10337 if((flags
& WFE_CNT_HANDLE
) == 0)
10338 GF_PUTC_GLO(f
->next
,*s
);
10345 if(pico_usingcolor() && n
>= RGBLEN
){
10347 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10348 GF_PUTC_GLO(f
->next
,TAG_FGCOLOR
);
10350 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10351 strncpy(WRAP_COLOR(f
)->fg
, s
+1, RGBLEN
);
10352 WRAP_COLOR(f
)->fg
[RGBLEN
]='\0';
10356 GF_PUTC_GLO(f
->next
,
10361 if(pico_usingcolor() && n
>= RGBLEN
){
10363 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10364 GF_PUTC_GLO(f
->next
,TAG_BGCOLOR
);
10366 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10367 strncpy(WRAP_COLOR(f
)->bg
, s
+1, RGBLEN
);
10368 WRAP_COLOR(f
)->bg
[RGBLEN
]='\0';
10372 GF_PUTC_GLO(f
->next
,
10383 if(f
->n
<= WRAP_MAX_COL(f
)){
10384 GF_PUTC_GLO(f
->next
, (*s
) & 0xff);
10387 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s
) & 0xff));
10390 WRAP_ALLWSP(f
) = 0;
10398 wrap_eol(FILTER_S
*f
, int c
, unsigned char **ipp
, unsigned char **eibp
,
10399 unsigned char **opp
, unsigned char **eobp
)
10401 if(WRAP_SAW_SOFT_HYPHEN(f
)){
10402 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
10403 GF_PUTC_GLO(f
->next
, '-'); /* real hyphen */
10406 if(c
&& WRAP_LV_FLD(f
))
10407 GF_PUTC_GLO(f
->next
, ' ');
10410 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10411 GF_PUTC_GLO(f
->next
, TAG_BOLDOFF
);
10415 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10416 GF_PUTC_GLO(f
->next
, TAG_ULINEOFF
);
10419 if(WRAP_INVERSE(f
) || WRAP_ANCHOR(f
)){
10420 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10421 GF_PUTC_GLO(f
->next
, TAG_INVOFF
);
10424 if(WRAP_COLOR_SET(f
)){
10427 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10428 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10429 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
), sizeof(cb
));
10430 cb
[sizeof(cb
)-1] = '\0';
10433 GF_PUTC_GLO(f
->next
, *p
);
10434 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10435 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10436 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
), sizeof(cb
));
10437 cb
[sizeof(cb
)-1] = '\0';
10440 GF_PUTC_GLO(f
->next
, *p
);
10443 GF_PUTC_GLO(f
->next
, '\015');
10444 GF_PUTC_GLO(f
->next
, '\012');
10446 so_truncate(WRAP_SPACES(f
), 0L);
10447 WRAP_SPC_LEN(f
) = 0;
10448 WRAP_TRL_SPC(f
) = 0;
10454 wrap_bol(FILTER_S
*f
, int ivar
, int q
, unsigned char **ipp
, unsigned char **eibp
,
10455 unsigned char **opp
, unsigned char **eobp
)
10457 int n
= WRAP_MARG_L(f
) + (ivar
? WRAP_INDENT(f
) : 0);
10459 if(WRAP_HDR_CLR(f
)){
10461 char cbuf
[RGBLEN
+1];
10464 if((k
= WRAP_MARG_L(f
)) > 0)
10468 GF_PUTC_GLO(f
->next
, ' ');
10471 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10472 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10474 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_FORE_COLOR
),
10476 cbuf
[sizeof(cbuf
)-1] = '\0';
10479 GF_PUTC_GLO(f
->next
, *p
);
10480 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10481 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10483 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_BACK_COLOR
),
10485 cbuf
[sizeof(cbuf
)-1] = '\0';
10488 GF_PUTC_GLO(f
->next
, *p
);
10493 GF_PUTC_GLO(f
->next
, ' ');
10496 WRAP_ALLWSP(f
) = 1;
10499 wrap_quote_insert(f
, ipp
, eibp
, opp
, eobp
);
10502 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10503 GF_PUTC_GLO(f
->next
, TAG_BOLDON
);
10506 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10507 GF_PUTC_GLO(f
->next
, TAG_ULINEON
);
10509 if(WRAP_INVERSE(f
)){
10510 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10511 GF_PUTC_GLO(f
->next
, TAG_INVON
);
10513 if(WRAP_COLOR_SET(f
)){
10515 if(WRAP_COLOR(f
)->fg
[0]){
10517 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10518 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10519 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->fg
), sizeof(cb
));
10520 cb
[sizeof(cb
)-1] = '\0';
10523 GF_PUTC_GLO(f
->next
, *p
);
10525 if(WRAP_COLOR(f
)->bg
[0]){
10527 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10528 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10529 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->bg
), sizeof(cb
));
10530 cb
[sizeof(cb
)-1] = '\0';
10533 GF_PUTC_GLO(f
->next
, *p
);
10536 if(WRAP_ANCHOR(f
)){
10537 char buf
[64]; int i
;
10538 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10539 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10540 snprintf(buf
, sizeof(buf
), "%d", WRAP_ANCHOR(f
));
10541 GF_PUTC_GLO(f
->next
, (int) strlen(buf
));
10542 for(i
= 0; buf
[i
]; i
++)
10543 GF_PUTC_GLO(f
->next
, buf
[i
]);
10550 wrap_quote_insert(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10551 unsigned char **opp
, unsigned char **eobp
)
10554 COLOR_PAIR
*col
= NULL
;
10555 char *prefix
= NULL
, *last_prefix
= NULL
;
10557 if(ps_global
->VAR_QUOTE_REPLACE_STRING
){
10558 get_pair(ps_global
->VAR_QUOTE_REPLACE_STRING
, &prefix
, &last_prefix
, 0, 0);
10559 if(!prefix
&& last_prefix
){
10560 prefix
= last_prefix
;
10561 last_prefix
= NULL
;
10565 for(j
= 0; j
< WRAP_FL_QD(f
); j
++){
10566 if(WRAP_USE_CLR(f
)){
10568 && ps_global
->VAR_QUOTE1_FORE_COLOR
10569 && ps_global
->VAR_QUOTE1_BACK_COLOR
10570 && (col
= new_color_pair(ps_global
->VAR_QUOTE1_FORE_COLOR
,
10571 ps_global
->VAR_QUOTE1_BACK_COLOR
))
10572 && pico_is_good_colorpair(col
)){
10573 GF_COLOR_PUTC(f
, col
);
10575 else if((j
% 3) == 1
10576 && ps_global
->VAR_QUOTE2_FORE_COLOR
10577 && ps_global
->VAR_QUOTE2_BACK_COLOR
10578 && (col
= new_color_pair(ps_global
->VAR_QUOTE2_FORE_COLOR
,
10579 ps_global
->VAR_QUOTE2_BACK_COLOR
))
10580 && pico_is_good_colorpair(col
)){
10581 GF_COLOR_PUTC(f
, col
);
10583 else if((j
% 3) == 2
10584 && ps_global
->VAR_QUOTE3_FORE_COLOR
10585 && ps_global
->VAR_QUOTE3_BACK_COLOR
10586 && (col
= new_color_pair(ps_global
->VAR_QUOTE3_FORE_COLOR
,
10587 ps_global
->VAR_QUOTE3_BACK_COLOR
))
10588 && pico_is_good_colorpair(col
)){
10589 GF_COLOR_PUTC(f
, col
);
10592 free_color_pair(&col
);
10597 if(!WRAP_LV_FLD(f
)){
10598 if(!WRAP_FOR_CMPS(f
) && ps_global
->VAR_QUOTE_REPLACE_STRING
&& prefix
){
10599 for(i
= 0; prefix
[i
]; i
++)
10600 GF_PUTC_GLO(f
->next
, prefix
[i
]);
10601 f
->n
+= utf8_width(prefix
);
10603 else if(ps_global
->VAR_REPLY_STRING
10604 && (!strcmp(ps_global
->VAR_REPLY_STRING
, ">")
10605 || !strcmp(ps_global
->VAR_REPLY_STRING
, "\">\""))){
10606 GF_PUTC_GLO(f
->next
, '>');
10610 GF_PUTC_GLO(f
->next
, '>');
10611 GF_PUTC_GLO(f
->next
, ' ');
10616 GF_PUTC_GLO(f
->next
, '>');
10620 if(j
&& WRAP_LV_FLD(f
)){
10621 GF_PUTC_GLO(f
->next
, ' ');
10624 else if(j
&& last_prefix
){
10625 for(i
= 0; last_prefix
[i
]; i
++)
10626 GF_PUTC_GLO(f
->next
, last_prefix
[i
]);
10627 f
->n
+= utf8_width(last_prefix
);
10631 fs_give((void **)&prefix
);
10633 fs_give((void **)&last_prefix
);
10640 * function called from the outside to set
10641 * wrap filter's width option
10644 gf_wrap_filter_opt(int width
, int width_max
, int *margin
, int indent
, int flags
)
10648 /* NOTE: variables MUST be sanity checked before they get here */
10649 wrap
= (WRAP_S
*) fs_get(sizeof(WRAP_S
));
10650 memset(wrap
, 0, sizeof(WRAP_S
));
10651 wrap
->wrap_col
= width
;
10652 wrap
->wrap_max
= width_max
;
10653 wrap
->indent
= indent
;
10654 wrap
->margin_l
= (margin
) ? margin
[0] : 0;
10655 wrap
->margin_r
= (margin
) ? margin
[1] : 0;
10656 wrap
->tags
= (GFW_HANDLES
& flags
) == GFW_HANDLES
;
10657 wrap
->on_comma
= (GFW_ONCOMMA
& flags
) == GFW_ONCOMMA
;
10658 wrap
->flowed
= (GFW_FLOWED
& flags
) == GFW_FLOWED
;
10659 wrap
->leave_flowed
= (GFW_FLOW_RESULT
& flags
) == GFW_FLOW_RESULT
;
10660 wrap
->delsp
= (GFW_DELSP
& flags
) == GFW_DELSP
;
10661 wrap
->use_color
= (GFW_USECOLOR
& flags
) == GFW_USECOLOR
;
10662 wrap
->hdr_color
= (GFW_HDRCOLOR
& flags
) == GFW_HDRCOLOR
;
10663 wrap
->for_compose
= (GFW_FORCOMPOSE
& flags
) == GFW_FORCOMPOSE
;
10664 wrap
->handle_soft_hyphen
= (GFW_SOFTHYPHEN
& flags
) == GFW_SOFTHYPHEN
;
10666 return((void *) wrap
);
10671 gf_url_hilite_opt(URL_HILITE_S
*uh
, HANDLE_S
**handlesp
, int flags
)
10674 memset(uh
, 0, sizeof(URL_HILITE_S
));
10675 uh
->handlesp
= handlesp
;
10676 uh
->hdr_color
= (URH_HDRCOLOR
& flags
) == URH_HDRCOLOR
;
10679 return((void *) uh
);
10683 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10684 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10685 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10687 typedef struct preflow_s
{
10694 * This would normally be handled in gf_wrap. If there is a possibility
10695 * that a url we want to recognize is cut in half by a soft newline we
10696 * want to fix that up by putting the halves back together. We do that
10697 * by deleting the soft newline and putting it all in one line. It will
10698 * still get wrapped later in gf_wrap. It isn't pretty with all the
10699 * goto's, but whatta ya gonna do?
10702 gf_preflow(FILTER_S
*f
, int flg
)
10704 GF_INIT(f
, f
->next
);
10706 if(flg
== GF_DATA
){
10707 register unsigned char c
;
10708 register int state
= f
->f1
;
10709 register int pending
= f
->f2
;
10711 while(GF_GETC(f
, c
)){
10725 GF_PUTC(f
->next
, c
);
10739 GF_PUTC(f
->next
, '\012');
10754 GF_PUTC(f
->next
, ' ');
10770 GF_PUTC(f
->next
, ' ');
10771 GF_PUTC(f
->next
, '\012');
10781 if(c
== '>'){ /* count quote level */
10786 done_counting_quotes
:
10787 if(c
== ' '){ /* eat stuffed space */
10792 done_with_stuffed_space
:
10793 if(c
== '-'){ /* look for signature */
10801 if(PF_QD(f
) == PF_QC(f
) && PF_SIG(f
) < 4){
10802 /* delete pending */
10804 PF_QD(f
) = PF_QC(f
);
10806 /* suppress quotes, too */
10811 * This should have been a hard new line
10812 * instead so leave out the trailing space.
10814 GF_PUTC(f
->next
, '\015');
10815 GF_PUTC(f
->next
, '\012');
10817 PF_QD(f
) = PF_QC(f
);
10820 else if(pending
== 1){
10821 GF_PUTC(f
->next
, '\015');
10822 GF_PUTC(f
->next
, '\012');
10823 PF_QD(f
) = PF_QC(f
);
10826 PF_QD(f
) = PF_QC(f
);
10831 while(PF_QC(f
)-- > 0)
10832 GF_PUTC(f
->next
, '>');
10840 GF_PUTC(f
->next
, '-');
10844 GF_PUTC(f
->next
, '-');
10845 GF_PUTC(f
->next
, '-');
10850 GF_PUTC(f
->next
, '-');
10851 GF_PUTC(f
->next
, '-');
10852 GF_PUTC(f
->next
, ' ');
10857 goto default_case
; /* to handle c */
10862 case FL_QLEV
: /* count quote level */
10866 goto done_counting_quotes
;
10870 case FL_STF
: /* eat stuffed space */
10871 goto done_with_stuffed_space
;
10874 case FL_SIG
: /* deal with sig indicator */
10876 case 1: /* saw '-' */
10880 goto done_with_sig
;
10884 case 2: /* saw '--' */
10888 goto done_with_sig
;
10892 case 3: /* saw '-- ' */
10894 PF_SIG(f
) = 4; /* it really is a sig line */
10896 goto done_with_sig
;
10906 GF_END(f
, f
->next
);
10908 else if(flg
== GF_EOD
){
10909 fs_give((void **) &f
->opt
);
10910 (void) GF_FLUSH(f
->next
);
10911 (*f
->next
->f
)(f
->next
, GF_EOD
);
10913 else if(flg
== GF_RESET
){
10916 pf
= (PREFLOW_S
*) fs_get(sizeof(*pf
));
10917 memset(pf
, 0, sizeof(*pf
));
10918 f
->opt
= (void *) pf
;
10920 f
->f1
= BOL
; /* state */
10921 f
->f2
= 0; /* pending */
10922 PF_QD(f
) = 0; /* quote depth */
10923 PF_QC(f
) = 0; /* quote count */
10924 PF_SIG(f
) = 0; /* sig level */
10932 * LINE PREFIX FILTER - insert given text at beginning of each
10937 #define GF_PREFIX_WRITE(s) { \
10938 register char *p; \
10939 if((p = (s)) != NULL) \
10941 GF_PUTC(f->next, *p++); \
10946 * the simple filter, prepends each line with the requested prefix.
10947 * if prefix is null, does nothing, and as with all filters, assumes
10948 * NVT end of lines.
10951 gf_prefix(FILTER_S
*f
, int flg
)
10953 GF_INIT(f
, f
->next
);
10955 if(flg
== GF_DATA
){
10956 register unsigned char c
;
10957 register int state
= f
->f1
;
10958 register int first
= f
->f2
;
10960 while(GF_GETC(f
, c
)){
10962 if(first
){ /* write initial prefix!! */
10963 first
= 0; /* but just once */
10964 GF_PREFIX_WRITE((char *) f
->opt
);
10968 * State == 0 is the starting state and the usual state.
10969 * State == 1 means we saw a CR and haven't acted on it yet.
10970 * We are looking for a LF to get the CRLF end of line.
10971 * However, we also treat bare CR and bare LF as if they
10972 * were CRLF sequences. What else could it mean in text?
10973 * This filter is only used for text so that is probably
10974 * a reasonable interpretation of the bad input.
10976 if(c
== '\015'){ /* CR */
10977 if(state
){ /* Treat pending CR as endofline, */
10978 GF_PUTC(f
->next
, '\015'); /* and remain in saw-a-CR state. */
10979 GF_PUTC(f
->next
, '\012');
10980 GF_PREFIX_WRITE((char *) f
->opt
);
10986 else if(c
== '\012'){ /* LF */
10987 GF_PUTC(f
->next
, '\015'); /* Got either a CRLF or a bare LF, */
10988 GF_PUTC(f
->next
, '\012'); /* treat both as if a CRLF. */
10989 GF_PREFIX_WRITE((char *) f
->opt
);
10992 else{ /* any other character */
10994 GF_PUTC(f
->next
, '\015'); /* Treat pending CR as endofline. */
10995 GF_PUTC(f
->next
, '\012');
10996 GF_PREFIX_WRITE((char *) f
->opt
);
11000 GF_PUTC(f
->next
, c
);
11004 f
->f1
= state
; /* save state for next chunk of data */
11006 GF_END(f
, f
->next
);
11008 else if(flg
== GF_EOD
){
11009 (void) GF_FLUSH(f
->next
);
11010 (*f
->next
->f
)(f
->next
, GF_EOD
);
11012 else if(flg
== GF_RESET
){
11013 dprint((9, "-- gf_reset prefix\n"));
11015 f
->f2
= 1; /* nothing written yet */
11021 * function called from the outside to set
11022 * prefix filter's prefix string
11025 gf_prefix_opt(char *prefix
)
11027 return((void *) prefix
);
11032 * LINE TEST FILTER - accumulate lines and offer each to the provided
11036 typedef struct _linetest_s
{
11042 /* accumulator growth increment */
11043 #define LINE_TEST_BLOCK 1024
11045 #define GF_LINE_TEST_EOB(f) \
11046 ((f)->line + ((f)->f2 - 1))
11048 #define GF_LINE_TEST_ADD(f, c) \
11051 f->f2 += LINE_TEST_BLOCK; \
11052 fs_resize((void **)&f->line, \
11053 (size_t) f->f2 * sizeof(char)); \
11054 eobuf = GF_LINE_TEST_EOB(f); \
11055 p = eobuf - LINE_TEST_BLOCK; \
11060 #define GF_LINE_TEST_TEST(F, D) \
11063 register char *cp; \
11065 LT_INS_S *ins = NULL, *insp; \
11067 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11069 ((LINETEST_S *) (F)->opt)->local); \
11073 fs_give((void **) &(F)->line); \
11075 fs_give((void **) &(F)->opt); \
11076 gf_error(_("translation error")); \
11079 for(insp = ins, cp = (F)->line; cp < p; ){ \
11080 if(insp && cp == insp->where){ \
11081 if(insp->len > 0){ \
11082 for(l = 0; l < insp->len; l++){ \
11083 c = (unsigned char) insp->text[l]; \
11084 GF_PUTC((F)->next, c); \
11086 insp = insp->next; \
11088 } else if(insp->len < 0){ \
11090 insp = insp->next; \
11094 GF_PUTC((F)->next, *cp); \
11098 for(l = 0; l < insp->len; l++){ \
11099 c = (unsigned char) insp->text[l]; \
11100 GF_PUTC((F)->next, c); \
11102 insp = insp->next; \
11104 gf_line_test_free_ins(&ins); \
11111 * this simple filter accumulates characters until a newline, offers it
11112 * to the provided test function, and then passes it on. It assumes
11116 gf_line_test(FILTER_S
*f
, int flg
)
11118 register char *p
= f
->linep
;
11119 register char *eobuf
= GF_LINE_TEST_EOB(f
);
11120 GF_INIT(f
, f
->next
);
11122 if(flg
== GF_DATA
){
11123 register unsigned char c
;
11124 register int state
= f
->f1
;
11126 while(GF_GETC(f
, c
)){
11133 GF_LINE_TEST_TEST(f
, done
);
11137 if(done
== 2) /* skip this line! */
11140 GF_PUTC(f
->next
, '\015');
11141 GF_PUTC(f
->next
, '\012');
11143 * if the line tester returns TRUE, it's
11144 * telling us its seen enough and doesn't
11145 * want to see any more. Remove ourself
11146 * from the pipeline...
11149 if(gf_master
== f
){
11150 gf_master
= f
->next
;
11155 for(fprev
= gf_master
;
11156 fprev
&& fprev
->next
!= f
;
11157 fprev
= fprev
->next
)
11160 if(fprev
) /* wha??? */
11161 fprev
->next
= f
->next
;
11166 while(GF_GETC(f
, c
)) /* pass input */
11167 GF_PUTC(f
->next
, c
);
11169 (void) GF_FLUSH(f
->next
); /* and drain queue */
11170 fs_give((void **)&f
->line
);
11171 fs_give((void **)&f
); /* wax our data */
11177 else /* add CR to buffer */
11178 GF_LINE_TEST_ADD(f
, '\015');
11179 } /* fall thru to handle 'c' */
11181 if(c
== '\015') /* newline? */
11184 GF_LINE_TEST_ADD(f
, c
);
11188 GF_END(f
, f
->next
);
11190 else if(flg
== GF_EOD
){
11193 GF_LINE_TEST_TEST(f
, i
); /* examine remaining data */
11194 fs_give((void **) &f
->line
); /* free line buffer */
11195 fs_give((void **) &f
->opt
); /* free test struct */
11196 (void) GF_FLUSH(f
->next
);
11197 (*f
->next
->f
)(f
->next
, GF_EOD
);
11199 else if(flg
== GF_RESET
){
11200 dprint((9, "-- gf_reset line_test\n"));
11201 f
->f1
= 0; /* state */
11202 f
->n
= 0L; /* line number */
11203 f
->f2
= LINE_TEST_BLOCK
; /* size of alloc'd line */
11204 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
11212 * function called from the outside to operate on accumulated line.
11215 gf_line_test_opt(linetest_t test_f
, void *local
)
11219 ltp
= (LINETEST_S
*) fs_get(sizeof(LINETEST_S
));
11220 memset(ltp
, 0, sizeof(LINETEST_S
));
11222 ltp
->local
= local
;
11223 return((void *) ltp
);
11229 gf_line_test_new_ins(LT_INS_S
**ins
, char *p
, char *s
, int n
)
11231 *ins
= (LT_INS_S
*) fs_get(sizeof(LT_INS_S
));
11232 if(((*ins
)->len
= n
) > 0)
11233 strncpy((*ins
)->text
= (char *) fs_get(n
* sizeof(char)), s
, n
);
11235 (*ins
)->text
= NULL
;
11238 (*ins
)->next
= NULL
;
11239 return(&(*ins
)->next
);
11244 gf_line_test_free_ins(LT_INS_S
**ins
)
11248 gf_line_test_free_ins(&(*ins
)->next
);
11251 fs_give((void **) &(*ins
)->text
);
11253 fs_give((void **) ins
);
11259 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11260 * with editorial comment
11263 typedef struct _preped_s
{
11270 * gf_prepend_editorial - accumulate filtered text and prepend its
11271 * output with given text
11276 gf_prepend_editorial(FILTER_S
*f
, int flg
)
11278 GF_INIT(f
, f
->next
);
11280 if(flg
== GF_DATA
){
11281 register unsigned char c
;
11283 while(GF_GETC(f
, c
)){
11284 so_writec(c
, (STORE_S
*) f
->data
);
11287 GF_END(f
, f
->next
);
11289 else if(flg
== GF_EOD
){
11292 if(!((PREPED_S
*)(f
)->opt
)->f
|| (*((PREPED_S
*)(f
)->opt
)->f
)()){
11293 char *p
= ((PREPED_S
*)(f
)->opt
)->text
;
11295 for( ; p
&& *p
; p
++)
11296 GF_PUTC(f
->next
, *p
);
11299 so_seek((STORE_S
*) f
->data
, 0L, 0);
11300 while(so_readc(&c
, (STORE_S
*) f
->data
)){
11301 GF_PUTC(f
->next
, c
);
11304 so_give((STORE_S
**) &f
->data
);
11305 fs_give((void **) &f
->opt
);
11306 (void) GF_FLUSH(f
->next
);
11307 (*f
->next
->f
)(f
->next
, GF_EOD
);
11309 else if(flg
== GF_RESET
){
11310 dprint((9, "-- gf_reset line_test\n"));
11311 f
->data
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
11317 * function called from the outside to setup prepending editorial
11321 gf_prepend_editorial_opt(prepedtest_t test_f
, char *text
)
11325 pep
= (PREPED_S
*) fs_get(sizeof(PREPED_S
));
11326 memset(pep
, 0, sizeof(PREPED_S
));
11329 return((void *) pep
);
11334 * Network virtual terminal to local newline convention filter
11337 gf_nvtnl_local(FILTER_S
*f
, int flg
)
11339 GF_INIT(f
, f
->next
);
11341 if(flg
== GF_DATA
){
11342 register unsigned char c
;
11343 register int state
= f
->f1
;
11345 while(GF_GETC(f
, c
)){
11349 GF_PUTC(f
->next
, '\012');
11353 GF_PUTC(f
->next
, '\015');
11354 /* fall thru to deal with 'c' */
11360 GF_PUTC(f
->next
, c
);
11364 GF_END(f
, f
->next
);
11366 else if(flg
== GF_EOD
){
11367 (void) GF_FLUSH(f
->next
);
11368 (*f
->next
->f
)(f
->next
, GF_EOD
);
11370 else if(flg
== GF_RESET
){
11371 dprint((9, "-- gf_reset nvtnl_local\n"));
11378 * local to network newline convention filter
11381 gf_local_nvtnl(FILTER_S
*f
, int flg
)
11383 GF_INIT(f
, f
->next
);
11385 if(flg
== GF_DATA
){
11386 register unsigned char c
;
11388 while(GF_GETC(f
, c
)){
11390 GF_PUTC(f
->next
, '\015');
11391 GF_PUTC(f
->next
, '\012');
11393 else if(c
!= '\015') /* do not copy isolated \015 into source */
11394 GF_PUTC(f
->next
, c
);
11397 GF_END(f
, f
->next
);
11399 else if(flg
== GF_EOD
){
11400 (void) GF_FLUSH(f
->next
);
11401 (*f
->next
->f
)(f
->next
, GF_EOD
);
11404 dprint((9, "-- gf_reset local_nvtnl\n"));