2 * ========================================================================
3 * Copyright 2013-2022 Eduardo Chappa
4 * Copyright 2006-2008 University of Washington
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * ========================================================================
15 /*======================================================================
18 This code provides a generalized, flexible way to allow
19 piping of data thru filters. Each filter is passed a structure
20 that it will use to hold its static data while it operates on
21 the stream of characters that are passed to it. After processing
22 it will either return or call the next filter in
23 the pipe with any character (or characters) it has ready to go. This
24 means some terminal type of filter has to be the last in the
25 chain (i.e., one that writes the passed char someplace, but doesn't
28 See below for more details.
30 The motivation is to handle MIME decoding, richtext conversion,
31 iso_code stripping and anything else that may come down the
32 pike (e.g., PEM) in an elegant fashion. mikes (920811)
35 reasonable error handling
40 #include "../pith/headers.h"
41 #include "../pith/filter.h"
42 #include "../pith/conf.h"
43 #include "../pith/store.h"
44 #include "../pith/color.h"
45 #include "../pith/escapes.h"
46 #include "../pith/pipe.h"
47 #include "../pith/status.h"
48 #include "../pith/string.h"
49 #include "../pith/util.h"
50 #include "../pith/url.h"
51 #include "../pith/init.h"
52 #include "../pith/help.h"
53 #include "../pico/keydefs.h"
56 #include "../pico/osdep/mswin.h"
63 int gf_so_writec(int);
64 int gf_so_readc(unsigned char *);
65 int gf_freadc(unsigned char *);
66 int gf_freadc_locale(unsigned char *);
67 int gf_freadc_getchar(unsigned char *, void *);
69 int gf_fwritec_locale(int);
71 int gf_freadc_windows(unsigned char *);
73 int gf_preadc(unsigned char *);
74 int gf_preadc_locale(unsigned char *);
75 int gf_preadc_getchar(unsigned char *, void *);
77 int gf_pwritec_locale(int);
78 int gf_sreadc(unsigned char *);
79 int gf_sreadc_locale(unsigned char *);
80 int gf_sreadc_getchar(unsigned char *, void *);
82 int gf_swritec_locale(int);
83 void gf_terminal(FILTER_S
*, int);
84 void gf_error(char *);
85 char *gf_filter_puts(char *);
86 void gf_filter_eod(void);
88 void gf_8bit_put(FILTER_S
*, int);
93 * System specific options
101 * Hooks for callers to adjust behavior
103 char *(*pith_opt_pretty_var_name
)(char *);
104 char *(*pith_opt_pretty_feature_name
)(char *, int);
108 * pointer to first function in a pipe, and pointer to last filter
110 FILTER_S
*gf_master
= NULL
;
111 static gf_io_t last_filter
;
112 static char *gf_error_string
;
113 static long gf_byte_count
;
114 static jmp_buf gf_error_state
;
117 #define GF_NOOP 0x01 /* flags used by generalized */
118 #define GF_EOD 0x02 /* filters */
119 #define GF_DATA 0x04 /* See filter.c for more */
120 #define GF_ERROR 0x08 /* details */
121 #define GF_RESET 0x10
125 * A list of states used by the various filters. Reused in many filters.
145 #define STOP_DECODING 18
151 * Macros to reduce function call overhead associated with calling
152 * each filter for each byte filtered, and to minimize filter structure
153 * dereferences. NOTE: "queuein" has to do with putting chars into the
154 * filter structs data queue. So, writing at the queuein offset is
155 * what a filter does to pass processed data out of itself. Ditto for
156 * queueout. This explains the FI --> queueout init stuff below.
158 #define GF_QUE_START(F) (&(F)->queue[0])
159 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
161 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
162 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
163 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
164 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
165 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
166 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
168 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
169 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
170 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
172 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
173 unsigned char *GF_EOB_INIT(FI); \
174 unsigned char *GF_IP_INIT(FO); \
175 unsigned char *GF_EIB_INIT(FO);
177 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
178 (F)->queueout = (F)->queuein = 0)
180 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
182 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
183 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
184 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
185 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
187 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
188 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
191 * Introducing the *_GLO macros for use in splitting the big macros out
192 * into functions (wrap_flush, wrap_eol). The reason we need a
193 * separate macro is because of the vars ip, eib, op, and eob, which are
194 * set up locally in a call to GF_INIT. To preserve these variables
195 * in the new functions, we now pass pointers to these four vars. Each
196 * of these new functions expects the presence of pointer vars
197 * ipp, eibp, opp, and eobp.
200 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
202 #define GF_COLOR_PUTC(F, C) { \
205 GF_PUTC_GLO((F)->next, TAG_EMBED); \
206 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
207 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
208 cb[sizeof(cb)-1] = '\0'; \
211 GF_PUTC_GLO((F)->next, *p); \
212 GF_PUTC_GLO((F)->next, TAG_EMBED); \
213 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
214 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
215 cb[sizeof(cb)-1] = '\0'; \
218 GF_PUTC_GLO((F)->next, *p); \
222 * Generalized getc and putc routines. provided here so they don't
223 * need to be re-done elsewhere to
227 * pointers to objects to be used by the generic getc and putc
230 static struct gf_io_struct
{
239 #define GF_SO_STACK struct gf_so_stack
243 } *gf_so_in
, *gf_so_out
;
248 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
250 * The purpose of this routine is so that we can avoid setting SIGALARM
251 * when writing into a PicoText object, because that type of object uses
252 * unprotected malloc/free/realloc, which can't be interrupted.
255 pc_is_picotext(gf_io_t pc
)
257 return(pc
== gf_so_writec
&& gf_so_out
&& gf_so_out
->so
&&
258 gf_so_out
->so
->src
== ExternalText
);
264 * setup to use and return a pointer to the generic
268 gf_set_readc(gf_io_t
*gc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
272 gf_in
.cb
.cbuf
[0] = '\0';
273 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
274 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
277 gf_in
.file
= (FILE *)txt
;
278 fseek(gf_in
.file
, 0L, 0);
280 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_windows
283 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_freadc_locale
287 else if(src
== PipeStar
){
288 gf_in
.pipe
= (PIPE_S
*)txt
;
290 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_preadc_locale
294 gf_in
.txtp
= (char *)txt
;
295 *gc
= (flags
& READ_FROM_LOCALE
) ? gf_sreadc_locale
302 * setup to use and return a pointer to the generic
306 gf_set_writec(gf_io_t
*pc
, void *txt
, long unsigned int len
, SourceType src
, int flags
)
309 gf_out
.flags
= flags
;
310 gf_out
.cb
.cbuf
[0] = '\0';
311 gf_out
.cb
.cbufp
= gf_out
.cb
.cbuf
;
312 gf_out
.cb
.cbufend
= gf_out
.cb
.cbuf
;
315 gf_out
.file
= (FILE *)txt
;
319 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_fwritec_locale
323 else if(src
== PipeStar
){
324 gf_out
.pipe
= (PIPE_S
*)txt
;
325 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_pwritec_locale
329 gf_out
.txtp
= (char *)txt
;
330 *pc
= (flags
& WRITE_TO_LOCALE
) ? gf_swritec_locale
337 * setup to use and return a pointer to the generic
341 gf_set_so_readc(gf_io_t
*gc
, STORE_S
*so
)
343 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
353 gf_clear_so_readc(STORE_S
*so
)
357 if((sp
= gf_so_in
) != NULL
){
359 gf_so_in
= gf_so_in
->next
;
360 fs_give((void **) &sp
);
363 alpine_panic("Programmer botch: Can't unstack store readc");
366 alpine_panic("Programmer botch: NULL store clearing store readc");
371 * setup to use and return a pointer to the generic
375 gf_set_so_writec(gf_io_t
*pc
, STORE_S
*so
)
377 GF_SO_STACK
*sp
= (GF_SO_STACK
*) fs_get(sizeof(GF_SO_STACK
));
380 sp
->next
= gf_so_out
;
387 gf_clear_so_writec(STORE_S
*so
)
391 if((sp
= gf_so_out
) != NULL
){
393 gf_so_out
= gf_so_out
->next
;
394 fs_give((void **) &sp
);
397 alpine_panic("Programmer botch: Can't unstack store writec");
400 alpine_panic("Programmer botch: NULL store clearing store writec");
405 * put the character to the object previously defined
410 return(so_writec(c
, gf_so_out
->so
));
415 * get a character from an object previously defined
418 gf_so_readc(unsigned char *c
)
420 return(so_readc(c
, gf_so_in
->so
));
424 /* get a character from a file */
425 /* assumes gf_out struct is filled in */
427 gf_freadc(unsigned char *c
)
433 clearerr(gf_in
.file
);
434 rv
= fread(c
, sizeof(unsigned char), (size_t)1, gf_in
.file
);
435 } while(!rv
&& ferror(gf_in
.file
) && errno
== EINTR
);
442 gf_freadc_locale(unsigned char *c
)
444 return(generic_readc_locale(c
, gf_freadc_getchar
, (void *) gf_in
.file
, &gf_in
.cb
));
449 * This is just to make it work with generic_readc_locale.
452 gf_freadc_getchar(unsigned char *c
, void *extraarg
)
457 file
= (FILE *) extraarg
;
462 rv
= fread(c
, sizeof(unsigned char), (size_t)1, file
);
463 } while(!rv
&& ferror(file
) && errno
== EINTR
);
470 * Put a character to a file.
471 * Assumes gf_out struct is filled in.
472 * Returns 1 on success, <= 0 on failure.
477 unsigned char ch
= (unsigned char)c
;
481 rv
= fwrite(&ch
, sizeof(unsigned char), (size_t)1, gf_out
.file
);
482 while(!rv
&& ferror(gf_out
.file
) && errno
== EINTR
);
489 * The locale version converts from UTF-8 to user's locale charset
490 * before writing the characters.
493 gf_fwritec_locale(int c
)
497 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
499 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
500 for(i
= 0; i
< outchars
; i
++)
501 if(gf_fwritec(obuf
[i
]) != 1){
513 * Read unicode characters from windows filesystem and return
514 * them as a stream of UTF-8 characters. The stream is assumed
515 * opened so that it will know how to put together the unicode.
517 * (This is totally untested, copied loosely from so_file_readc_windows
518 * which may or may not be appropriate.)
521 gf_freadc_windows(unsigned char *c
)
526 /* already got some from previous call? */
527 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
528 *c
= *gf_in
.cb
.cbufp
;
531 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
532 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
533 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
540 /* windows only so second arg is ignored */
541 ucs
= read_a_wide_char(gf_in
.file
, NULL
);
542 rv
= (ucs
== CCONV_EOF
) ? 0 : 1;
547 * Now we need to convert the UCS character to UTF-8
548 * and dole out the UTF-8 one char at a time.
550 gf_in
.cb
.cbufend
= utf8_put(gf_in
.cb
.cbuf
, (unsigned long) ucs
);
551 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
552 if(gf_in
.cb
.cbufend
> gf_in
.cb
.cbuf
){
553 *c
= *gf_in
.cb
.cbufp
;
555 if(gf_in
.cb
.cbufp
>= gf_in
.cb
.cbufend
){
556 gf_in
.cb
.cbufend
= gf_in
.cb
.cbuf
;
557 gf_in
.cb
.cbufp
= gf_in
.cb
.cbuf
;
566 #endif /* _WINDOWS */
570 gf_preadc(unsigned char *c
)
572 return(pipe_readc(c
, gf_in
.pipe
));
577 gf_preadc_locale(unsigned char *c
)
579 return(generic_readc_locale(c
, gf_preadc_getchar
, (void *) gf_in
.pipe
, &gf_in
.cb
));
584 * This is just to make it work with generic_readc_locale.
587 gf_preadc_getchar(unsigned char *c
, void *extraarg
)
591 pipe
= (PIPE_S
*) extraarg
;
593 return(pipe_readc(c
, pipe
));
598 * Put a character to a pipe.
599 * Assumes gf_out struct is filled in.
600 * Returns 1 on success, <= 0 on failure.
605 return(pipe_writec(c
, gf_out
.pipe
));
610 * The locale version converts from UTF-8 to user's locale charset
611 * before writing the characters.
614 gf_pwritec_locale(int c
)
618 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
620 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
621 for(i
= 0; i
< outchars
; i
++)
622 if(gf_pwritec(obuf
[i
]) != 1){
632 /* get a character from a string, return nonzero if things OK */
633 /* assumes gf_out struct is filled in */
635 gf_sreadc(unsigned char *c
)
637 return((gf_in
.n
) ? *c
= *(gf_in
.txtp
)++, gf_in
.n
-- : 0);
642 gf_sreadc_locale(unsigned char *c
)
644 return(generic_readc_locale(c
, gf_sreadc_getchar
, NULL
, &gf_in
.cb
));
649 gf_sreadc_getchar(unsigned char *c
, void *extraarg
)
652 * extraarg is ignored and gf_sreadc just uses globals instead.
653 * That's ok as long as we don't call it more than once at a time.
655 return(gf_sreadc(c
));
660 * Put a character to a string.
661 * Assumes gf_out struct is filled in.
662 * Returns 1 on success, <= 0 on failure.
667 return((gf_out
.n
) ? *(gf_out
.txtp
)++ = c
, gf_out
.n
-- : 0);
672 * The locale version converts from UTF-8 to user's locale charset
673 * before writing the characters.
676 gf_swritec_locale(int c
)
680 unsigned char obuf
[MAX(MB_LEN_MAX
,32)];
682 if((outchars
= utf8_to_locale(c
, &gf_out
.cb
, obuf
, sizeof(obuf
))) != 0){
683 for(i
= 0; i
< outchars
; i
++)
684 if(gf_swritec(obuf
[i
]) != 1){
695 * output the given string with the given function
698 gf_puts(register char *s
, gf_io_t pc
)
701 if(!(*pc
)((unsigned char)*s
++))
702 return(0); /* ERROR putting char ! */
709 * output the given string with the given function
712 gf_nputs(register char *s
, long int n
, gf_io_t pc
)
715 if(!(*pc
)((unsigned char)*s
++))
716 return(0); /* ERROR putting char ! */
723 * Read a stream of multi-byte characters from the
724 * user's locale charset and return a stream of
725 * UTF-8 characters, one at a time. The input characters
726 * are obtained by using the get_a_char function.
728 * Args c -- the returned octet
729 * get_a_char -- function to get a single octet of the multibyte
730 * character. The first arg of that function is the
731 * returned value and the second arg is for the
732 * functions use. The second arg is replaced with
733 * extraarg when it is called.
734 * extraarg -- The second arg to get_a_char.
735 * cb -- Storage area for state between calls to this func.
738 generic_readc_locale(unsigned char *c
,
739 int (*get_a_char
)(unsigned char *, void *),
743 unsigned long octets_so_far
= 0, remaining_octets
;
744 unsigned char *inputp
;
747 unsigned char inputbuf
[20];
751 /* already got some from previous call? */
752 if(cb
->cbufend
> cb
->cbuf
){
756 if(cb
->cbufp
>= cb
->cbufend
){
757 cb
->cbufend
= cb
->cbuf
;
758 cb
->cbufp
= cb
->cbuf
;
764 memset(inputbuf
, 0, sizeof(inputbuf
));
765 if((*get_a_char
)(&ch
, extraarg
) == 0)
768 inputbuf
[octets_so_far
++] = ch
;
771 remaining_octets
= octets_so_far
;
773 ucs
= mbtow(ps_global
->input_cs
, &inputp
, &remaining_octets
);
780 * Do we need to do something with the characters we've
781 * collected that don't form a valid UCS character?
782 * Probably need to try discarding them one at a time
783 * from the front instead of just throwing them all out.
785 if(octets_so_far
>= sizeof(inputbuf
))
788 if((*get_a_char
)(&ch
, extraarg
) == 0)
791 inputbuf
[octets_so_far
++] = ch
;
795 /* got a good UCS-4 character */
802 * Now we need to convert the UCS character to UTF-8
803 * and dole out the UTF-8 one char at a time.
806 cb
->cbufend
= utf8_put(cb
->cbuf
, (unsigned long) ucs
);
807 cb
->cbufp
= cb
->cbuf
;
808 if(cb
->cbufend
> cb
->cbuf
){
811 if(cb
->cbufp
>= cb
->cbufend
){
812 cb
->cbufend
= cb
->cbuf
;
813 cb
->cbufp
= cb
->cbuf
;
824 * Start of generalized filter routines
828 * initializing function to make sure list of filters is empty.
833 FILTER_S
*flt
, *fltn
= gf_master
;
835 while((flt
= fltn
) != NULL
){ /* free list of old filters */
837 fs_give((void **)&flt
);
841 gf_error_string
= NULL
; /* clear previous errors */
842 gf_byte_count
= 0L; /* reset counter */
848 * link the given filter into the filter chain
851 gf_link_filter(filter_t f
, void *data
)
853 FILTER_S
*new, *tail
;
857 * If the system's native EOL convention is CRLF, then there's no
858 * point in passing data thru a filter that's not doing anything
860 if(f
== gf_nvtnl_local
|| f
== gf_local_nvtnl
)
864 new = (FILTER_S
*)fs_get(sizeof(FILTER_S
));
865 memset(new, 0, sizeof(FILTER_S
));
867 new->f
= f
; /* set the function pointer */
868 new->opt
= data
; /* set any optional parameter data */
869 (*f
)(new, GF_RESET
); /* have it setup initial state */
871 if((tail
= gf_master
) != NULL
){ /* or add it to end of existing */
872 while(tail
->next
) /* list */
877 else /* attach new struct to list */
878 gf_master
= new; /* start a new list */
883 * terminal filter, doesn't call any other filters, typically just does
884 * something with the output
887 gf_terminal(FILTER_S
*f
, int flg
)
893 if((*last_filter
)(*op
++) <= 0) /* generic terminal filter */
894 gf_error(errno
? error_description(errno
) : "Error writing pipe");
898 else if(flg
== GF_RESET
)
899 errno
= 0; /* prepare for problems */
904 * set some outside gf_io_t function to the terminal function
905 * for example: a function to write a char to a file or into a buffer
908 gf_set_terminal(gf_io_t f
) /* function to set generic filter */
916 * common function for filter's to make it known that an error
917 * has occurred. Jumps back to gf_pipe with error message.
922 /* let the user know the error passed in s */
924 longjmp(gf_error_state
, 1);
929 * The routine that shoves each byte through the chain of
930 * filters. It sets up error handling, and the terminal function.
931 * Then loops getting bytes with the given function, and passing
932 * it on to the first filter in the chain.
935 gf_pipe(gf_io_t gc
, gf_io_t pc
)
936 /* how to get a character */
940 dprint((4, "-- gf_pipe: "));
943 * set up for any errors a filter may encounter
945 if(setjmp(gf_error_state
)){
946 dprint((4, "ERROR: %s\n",
947 gf_error_string
? gf_error_string
: "NULL"));
948 return(gf_error_string
); /* */
952 * set and link in the terminal filter
955 gf_link_filter(gf_terminal
, NULL
);
958 * while there are chars to process, send them thru the pipe.
959 * NOTE: it's necessary to enclose the loop below in a block
960 * as the GF_INIT macro calls some automatic var's into
961 * existence. It can't be placed at the start of gf_pipe
962 * because its useful for us to be called without filters loaded
963 * when we're just being used to copy bytes between storage
967 GF_INIT(gf_master
, gf_master
);
973 if(!(gf_byte_count
& 0x3ff))
974 /* Under windows we yield to allow event processing.
975 * Progress display is handled through the alarm()
981 GF_PUTC(gf_master
, c
& 0xff);
985 * toss an end-of-data marker down the pipe to give filters
986 * that have any buffered data the opportunity to dump it
988 (void) GF_FLUSH(gf_master
);
989 (*gf_master
->f
)(gf_master
, GF_EOD
);
992 dprint((4, "done.\n"));
993 return(NULL
); /* everything went OK */
998 * return the number of bytes piped so far
1001 gf_bytes_piped(void)
1003 return(gf_byte_count
);
1008 * filter the given input with the given command
1010 * Args: cmd -- command string to execute
1011 * prepend -- string to prepend to filtered input
1012 * source_so -- storage object containing data to be filtered
1013 * pc -- function to write filtered output with
1014 * aux_filters -- additional filters to pass data thru after "cmd"
1016 * Returns: NULL on success, reason for failure (not alloc'd!) on error
1019 gf_filter(char *cmd
, char *prepend
, STORE_S
*source_so
, gf_io_t pc
,
1020 FILTLIST_S
*aux_filters
, int silent
, int disable_reset
,
1021 void (*pipecb_f
)(PIPE_S
*, int, void *))
1023 unsigned char c
, obuf
[MAX(MB_LEN_MAX
,32)];
1024 int flags
, outchars
, i
;
1025 char *errstr
= NULL
, buf
[MAILTMPLEN
];
1028 #ifdef NON_BLOCKING_IO
1032 dprint((4, "so_filter: \"%s\"\n", cmd
? cmd
: "?"));
1037 * After coming back from user's pipe command we need to convert
1038 * the output from the pipe back to UTF-8.
1040 if(ps_global
->keyboard_charmap
&& strucmp("UTF-8", ps_global
->keyboard_charmap
))
1041 gf_link_filter(gf_utf8
, gf_utf8_opt(ps_global
->keyboard_charmap
));
1043 for( ; aux_filters
&& aux_filters
->filter
; aux_filters
++)
1044 gf_link_filter(aux_filters
->filter
, aux_filters
->data
);
1046 gf_set_terminal(pc
);
1047 gf_link_filter(gf_terminal
, NULL
);
1051 cb
.cbufend
= cb
.cbuf
;
1054 * Spawn filter feeding it data, and reading what it writes.
1056 so_seek(source_so
, 0L, 0);
1057 flags
= PIPE_WRITE
| PIPE_READ
| PIPE_NOSHELL
1058 | (silent
? PIPE_SILENT
: 0)
1059 | (!disable_reset
? PIPE_RESET
: 0);
1061 if((fpipe
= open_system_pipe(cmd
, NULL
, NULL
, flags
, 0, pipecb_f
, pipe_report_error
)) != NULL
){
1063 #ifdef NON_BLOCKING_IO
1065 if(fcntl(fileno(fpipe
->in
.f
), F_SETFL
, NON_BLOCKING_IO
) == -1)
1066 errstr
= "Can't set up non-blocking IO";
1068 if(prepend
&& (fputs(prepend
, fpipe
->out
.f
) == EOF
1069 || fputc('\n', fpipe
->out
.f
) == EOF
))
1070 errstr
= error_description(errno
);
1073 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1074 * isn't ubiquitous ;).
1076 for(n
= 0; !errstr
&& fpipe
->out
.f
&& n
< 1024; n
++)
1077 if(!so_readc(&c
, source_so
)){
1078 fclose(fpipe
->out
.f
);
1079 fpipe
->out
.f
= NULL
;
1083 * Got a UTF-8 character from source_so.
1084 * We need to convert it to the user's locale charset
1085 * and then send the result to the pipe.
1087 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1088 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1089 if(fputc(obuf
[i
], fpipe
->out
.f
) == EOF
)
1090 errstr
= error_description(errno
);
1094 * Note: We clear errno here and test below, before ferror,
1095 * because *some* stdio implementations consider
1096 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1099 clearerr(fpipe
->in
.f
); /* fix from <cananian@cananian.mit.edu> */
1101 while(!errstr
&& fgets(buf
, sizeof(buf
), fpipe
->in
.f
))
1102 errstr
= gf_filter_puts(buf
);
1104 /* then fgets failed! */
1105 if(!errstr
&& !(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)){
1106 if(feof(fpipe
->in
.f
)) /* nothing else interesting! */
1108 else if(ferror(fpipe
->in
.f
)) /* bummer. */
1109 errstr
= error_description(errno
);
1111 else if(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)
1112 clearerr(fpipe
->in
.f
);
1115 #else /* !NON_BLOCKING_IO */
1117 if(prepend
&& (pipe_puts(prepend
, fpipe
) == EOF
1118 || pipe_putc('\n', fpipe
) == EOF
))
1119 errstr
= error_description(errno
);
1122 * Well, do the best we can, and hope the pipe we're writing
1123 * doesn't fill up before we start reading...
1125 while(!errstr
&& so_readc(&c
, source_so
))
1126 if((outchars
= utf8_to_locale((int) c
, &cb
, obuf
, sizeof(obuf
))) != 0)
1127 for(i
= 0; i
< outchars
&& !errstr
; i
++)
1128 if(pipe_putc(obuf
[i
], fpipe
) == EOF
)
1129 errstr
= error_description(errno
);
1131 if(pipe_close_write(fpipe
))
1132 errstr
= _("Pipe command returned error.");
1134 while(!errstr
&& pipe_gets(buf
, sizeof(buf
), fpipe
))
1135 errstr
= gf_filter_puts(buf
);
1137 #endif /* !NON_BLOCKING_IO */
1139 if(close_system_pipe(&fpipe
, NULL
, pipecb_f
) && !errstr
)
1140 errstr
= _("Pipe command returned error.");
1145 errstr
= _("Error setting up pipe command.");
1152 * gf_filter_puts - write the given string down the filter's pipe
1155 gf_filter_puts(register char *s
)
1157 GF_INIT(gf_master
, gf_master
);
1160 * set up for any errors a filter may encounter
1162 if(setjmp(gf_error_state
)){
1163 dprint((4, "ERROR: gf_filter_puts: %s\n",
1164 gf_error_string
? gf_error_string
: "NULL"));
1165 return(gf_error_string
);
1169 GF_PUTC(gf_master
, (*s
++) & 0xff);
1171 GF_END(gf_master
, gf_master
);
1177 * gf_filter_eod - flush pending data filter's input queue and deliver
1178 * the GF_EOD marker.
1183 GF_INIT(gf_master
, gf_master
);
1184 (void) GF_FLUSH(gf_master
);
1185 (*gf_master
->f
)(gf_master
, GF_EOD
);
1190 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1192 * Filters MUST use the specified interface (pointer to filter
1193 * structure, the unsigned character buffer in that struct, and a
1194 * cmd flag), and pass each resulting octet to the next filter in the
1195 * chain. Only the terminal filter need not call another filter.
1196 * As a result, filters share a pretty general structure.
1197 * Typically three main conditionals separate initialization from
1198 * data from end-of-data command processing.
1200 * Lastly, being character-at-a-time, they're a little more complex
1201 * to write than filters operating on buffers because some state
1202 * must typically be kept between characters. However, for a
1203 * little bit of complexity here, much convenience is gained later
1204 * as they can be arbitrarily chained together at run time and
1205 * consume few resources (especially memory or disk) as they work.
1206 * (NOTE 951005: even less cpu now that data between filters is passed
1209 * A few notes about implementing filters:
1211 * - A generic filter template looks like:
1214 * gf_xxx_filter(f, flg)
1218 * GF_INIT(f, f->next); // def's var's to speed queue drain
1220 * if(flg == GF_DATA){
1221 * register unsigned char c;
1223 * while(GF_GETC(f, c)){ // macro taking data off input queue
1224 * // operate on c and pass it on here
1225 * GF_PUTC(f->next, c); // macro writing output queue
1228 * GF_END(f, f->next); // macro to sync pointers/offsets
1229 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1231 * else if(flg == GF_EOD){
1232 * // process any buffered data here and pass it on
1233 * GF_FLUSH(f->next); // flush pending data to next filter
1234 * (*f->next->f)(f->next, GF_EOD);
1236 * else if(flg == GF_RESET){
1237 * // initialize any data in the struct here
1241 * - Any free storage allocated during initialization (typically tied
1242 * to the "line" pointer in FILTER_S) is the filter's responsibility
1243 * to clean up when the GF_EOD command comes through.
1245 * - Filter's must pass GF_EOD they receive on to the next
1246 * filter in the chain so it has the opportunity to flush
1247 * any buffered data.
1249 * - All filters expect NVT end-of-lines. The idea is to prepend
1250 * or append either the gf_local_nvtnl or gf_nvtnl_local
1251 * os-dependant filters to the data on the appropriate end of the
1252 * pipe for the task at hand.
1254 * - NOTE: As of 951004, filters no longer take their input as a single
1255 * char argument, but rather get data to operate on via a vector
1256 * representing the input queue in the FILTER_S structure.
1263 * BASE64 TO BINARY encoding and decoding routines below
1268 * BINARY to BASE64 filter (encoding described in rfc1341)
1271 gf_binary_b64(FILTER_S
*f
, int flg
)
1274 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1275 GF_INIT(f
, f
->next
);
1278 register unsigned char c
;
1279 register unsigned char t
= f
->t
;
1280 register long n
= f
->n
;
1282 while(GF_GETC(f
, c
)){
1285 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1286 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1288 GF_PUTC(f
->next
, v
[c
>> 2]);
1289 /* byte 1: high 6 bits (1) */
1290 t
= c
<< 4; /* remember high 2 bits for next */
1293 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1294 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1296 GF_PUTC(f
->next
, v
[(t
|(c
>>4)) & 0x3f]);
1300 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1301 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1303 GF_PUTC(f
->next
, v
[(t
|(c
>> 6)) & 0x3f]);
1304 GF_PUTC(f
->next
, v
[c
& 0x3f]);
1308 if(n
== 45){ /* start a new line? */
1309 GF_PUTC(f
->next
, '\015');
1310 GF_PUTC(f
->next
, '\012');
1319 else if(flg
== GF_EOD
){ /* no more data */
1320 switch (f
->n
% 3) { /* handle trailing bytes */
1321 case 0: /* no trailing bytes */
1325 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1326 GF_PUTC(f
->next
, '='); /* byte 3 */
1327 GF_PUTC(f
->next
, '='); /* byte 4 */
1331 GF_PUTC(f
->next
, v
[(f
->t
) & 0x3f]);
1332 GF_PUTC(f
->next
, '='); /* byte 4 */
1338 GF_PUTC(f
->next
, '\015');
1339 GF_PUTC(f
->next
, '\012');
1342 (void) GF_FLUSH(f
->next
);
1343 (*f
->next
->f
)(f
->next
, GF_EOD
);
1345 else if(flg
== GF_RESET
){
1346 dprint((9, "-- gf_reset binary_b64\n"));
1354 * BASE64 to BINARY filter (encoding described in rfc1341)
1357 gf_b64_binary(FILTER_S
*f
, int flg
)
1359 static char v
[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1360 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1361 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1362 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1363 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1364 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1365 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1366 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1367 GF_INIT(f
, f
->next
);
1370 register unsigned char c
;
1371 register unsigned char t
= f
->t
;
1372 register int n
= (int) f
->n
;
1373 register int state
= f
->f1
;
1375 while(GF_GETC(f
, c
)){
1380 gf_error("Illegal '=' in base64 text");
1385 /* in range, and a valid value? */
1386 if((c
& ~0x7f) || (c
= v
[c
]) > 63){
1388 switch (n
++) { /* check quantum position */
1390 state
++; /* expect an equal as next char */
1394 n
= 0L; /* restart quantum */
1397 default: /* impossible quantum position */
1398 gf_error("Internal base64 decoder error");
1404 switch (n
++) { /* install based on quantum position */
1405 case 0: /* byte 1: high 6 bits */
1409 case 1: /* byte 1: low 2 bits */
1410 GF_PUTC(f
->next
, (t
|(c
>> 4)));
1411 t
= c
<< 4; /* byte 2: high 4 bits */
1414 case 2: /* byte 2: low 4 bits */
1415 GF_PUTC(f
->next
, (t
|(c
>> 2)));
1416 t
= c
<< 6; /* byte 3: high 2 bits */
1420 GF_PUTC(f
->next
, t
| c
);
1421 n
= 0L; /* reinitialize mechanism */
1432 else if(flg
== GF_EOD
){
1433 (void) GF_FLUSH(f
->next
);
1434 (*f
->next
->f
)(f
->next
, GF_EOD
);
1436 else if(flg
== GF_RESET
){
1437 dprint((9, "-- gf_reset b64_binary\n"));
1438 f
->n
= 0L; /* quantum position */
1439 f
->f1
= 0; /* state holder: equal seen? */
1447 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1448 * encoding described in rfc1341
1451 #define GF_MAXLINE 80 /* good buffer size */
1454 * default action for QUOTED-PRINTABLE to 8BIT decoder
1456 #define GF_QP_DEFAULT(f, c) { \
1459 /* reset white space! */ \
1460 (f)->linep = (f)->line; \
1461 *((f)->linep)++ = ' '; \
1463 else if((c) == '='){ \
1467 GF_PUTC((f)->next, (c)); \
1472 * QUOTED-PRINTABLE to 8BIT filter
1475 gf_qp_8bit(FILTER_S
*f
, int flg
)
1478 GF_INIT(f
, f
->next
);
1481 register unsigned char c
;
1482 register int state
= f
->f1
;
1484 while(GF_GETC(f
, c
)){
1487 case DFL
: /* default case */
1489 GF_QP_DEFAULT(f
, c
);
1492 case CCR
: /* non-significant space */
1495 continue; /* go on to next char */
1497 GF_QP_DEFAULT(f
, c
);
1501 if(c
== '\015'){ /* "=\015" is a soft EOL */
1506 if(c
== '='){ /* compatibility clause for old guys */
1507 GF_PUTC(f
->next
, '=');
1512 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1514 * First character after '=' not a hex digit.
1515 * This ain't right, but we're going to treat it as
1516 * plain old text instead of an '=' followed by hex.
1517 * In other words, they forgot to encode the '='.
1518 * Before 4.60 we just bailed with an error here, but now
1519 * we keep going as long as we are just displaying
1520 * the result (and not saving it or something).
1522 * Wait! The users don't like that. They want to be able
1523 * to use it even if it might be wrong. So just plow
1524 * ahead even if displaying.
1526 * Better have this be a constant string so that if we
1527 * get multiple instances of it in a single message we
1528 * can avoid the too many error messages problem. It
1529 * better be the same message as the one a few lines
1532 * Turn off decoding after encountering such an error and
1533 * just dump the rest of the text as is.
1535 state
= STOP_DECODING
;
1536 GF_PUTC(f
->next
, '=');
1537 GF_PUTC(f
->next
, c
);
1538 q_status_message(SM_ORDER
,3,3,
1539 _("Warning: Non-hexadecimal character in QP encoding!"));
1541 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c
, c
));
1545 if (isdigit ((unsigned char)c
))
1548 f
->t
= c
- (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1550 f
->f2
= c
; /* store character in case we have to
1551 back out in !isxdigit below */
1558 if(!isxdigit((unsigned char)c
)){ /* must be hex! */
1559 state
= STOP_DECODING
;
1560 GF_PUTC(f
->next
, '=');
1561 GF_PUTC(f
->next
, f
->f2
);
1562 GF_PUTC(f
->next
, c
);
1563 q_status_message(SM_ORDER
,3,3,
1564 _("Warning: Non-hexadecimal character in QP encoding!"));
1566 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c
, c
, f
->f2
));
1570 if (isdigit((unsigned char)c
))
1573 c
-= (isupper((unsigned char)c
) ? 'A' - 10 : 'a' - 10);
1575 GF_PUTC(f
->next
, c
+ (f
->t
<< 4));
1579 if(c
== ' '){ /* toss it in with other spaces */
1580 if(f
->linep
- f
->line
< GF_MAXLINE
)
1581 *(f
->linep
)++ = ' ';
1586 if(c
== '\015'){ /* not our white space! */
1587 f
->linep
= f
->line
; /* reset buffer */
1588 GF_PUTC(f
->next
, '\015');
1592 /* the spaces are ours, write 'em */
1593 f
->n
= f
->linep
- f
->line
;
1595 GF_PUTC(f
->next
, ' ');
1597 GF_QP_DEFAULT(f
, c
); /* take care of 'c' in default way */
1600 case STOP_DECODING
:
1601 GF_PUTC(f
->next
, c
);
1609 else if(flg
== GF_EOD
){
1610 fs_give((void **)&(f
->line
));
1611 (void) GF_FLUSH(f
->next
);
1612 (*f
->next
->f
)(f
->next
, GF_EOD
);
1614 else if(flg
== GF_RESET
){
1615 dprint((9, "-- gf_reset qp_8bit\n"));
1617 f
->linep
= f
->line
= (char *)fs_get(GF_MAXLINE
* sizeof(char));
1624 * USEFUL MACROS TO HELP WITH QP ENCODING
1627 #define QP_MAXL 75 /* 76th place only for continuation */
1630 * Macro to test and wrap long quoted printable lines
1632 #define GF_8BIT_WRAP(f) { \
1633 GF_PUTC((f)->next, '='); \
1634 GF_PUTC((f)->next, '\015'); \
1635 GF_PUTC((f)->next, '\012'); \
1639 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1640 * line break if needed.
1642 #define GF_8BIT_PUT_QUOTE(f, c) { \
1643 if(((f)->n += 3) > QP_MAXL){ \
1645 (f)->n = 3; /* set line count */ \
1647 GF_PUTC((f)->next, '='); \
1648 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1649 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1653 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1656 #define GF_8BIT_PUT(f, c) { \
1657 if((++(f->n)) > QP_MAXL){ \
1661 if(f->n == 1L && c == '.'){ \
1662 GF_8BIT_PUT_QUOTE(f, c); \
1666 GF_PUTC(f->next, c); \
1671 * default action for 8bit to quoted printable encoder
1673 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1676 else if(c == '\015'){ \
1679 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1680 || (c & 0x80) || (c == '=')){ \
1681 GF_8BIT_PUT_QUOTE(f, c); \
1684 GF_8BIT_PUT(f, c); \
1689 * 8BIT to QUOTED-PRINTABLE filter
1692 gf_8bit_qp(FILTER_S
*f
, int flg
)
1694 short dummy_dots
= 0, dummy_dmap
= 1;
1695 GF_INIT(f
, f
->next
);
1698 register unsigned char c
;
1699 register int state
= f
->f1
;
1701 while(GF_GETC(f
, c
)){
1703 /* keep track of "^JFrom " */
1704 Find_Froms(f
->t
, dummy_dots
, f
->f2
, dummy_dmap
, c
);
1707 case DFL
: /* handle ordinary case */
1708 GF_8BIT_DEFAULT(f
, c
);
1711 case CCR
: /* true line break? */
1714 GF_PUTC(f
->next
, '\015');
1715 GF_PUTC(f
->next
, '\012');
1718 else{ /* nope, quote the CR */
1719 GF_8BIT_PUT_QUOTE(f
, '\015');
1720 GF_8BIT_DEFAULT(f
, c
); /* and don't forget about c! */
1726 if(c
== '\015' || f
->t
){ /* handle the space */
1727 GF_8BIT_PUT_QUOTE(f
, ' ');
1728 f
->t
= 0; /* reset From flag */
1731 GF_8BIT_PUT(f
, ' ');
1733 GF_8BIT_DEFAULT(f
, c
); /* handle 'c' in the default way */
1741 else if(flg
== GF_EOD
){
1744 GF_8BIT_PUT_QUOTE(f
, '\015'); /* write the last cr */
1748 GF_8BIT_PUT_QUOTE(f
, ' '); /* write the last space */
1752 (void) GF_FLUSH(f
->next
);
1753 (*f
->next
->f
)(f
->next
, GF_EOD
);
1755 else if(flg
== GF_RESET
){
1756 dprint((9, "-- gf_reset 8bit_qp\n"));
1757 f
->f1
= DFL
; /* state from last character */
1758 f
->f2
= 1; /* state of "^NFrom " bitmap */
1760 f
->n
= 0L; /* number of chars in current line */
1765 * This filter converts characters in one character set (the character
1766 * set of a message, for example) to another (the user's character set).
1769 gf_convert_8bit_charset(FILTER_S
*f
, int flg
)
1771 static unsigned char *conv_table
= NULL
;
1772 GF_INIT(f
, f
->next
);
1775 register unsigned char c
;
1777 while(GF_GETC(f
, c
)){
1778 GF_PUTC(f
->next
, conv_table
? conv_table
[c
] : c
);
1783 else if(flg
== GF_EOD
){
1784 (void) GF_FLUSH(f
->next
);
1785 (*f
->next
->f
)(f
->next
, GF_EOD
);
1787 else if(flg
== GF_RESET
){
1788 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1789 conv_table
= (f
->opt
) ? (unsigned char *) (f
->opt
) : NULL
;
1795 typedef struct _utf8c_s
{
1802 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1803 * Characters missing from the destination set, and invalid UTF-8 sequences,
1804 * will be converted to "?".
1807 gf_convert_utf8_charset(FILTER_S
*f
, int flg
)
1809 static unsigned short *conv_table
= NULL
;
1810 static int report_err
= 0;
1811 register int more
= f
->f2
;
1812 register long u
= f
->n
;
1815 * "more" is the number of subsequent octets needed to complete a character,
1816 * it is stored in f->f2.
1817 * "u" is the accumulated Unicode character, it is stored in f->n
1820 GF_INIT(f
, f
->next
);
1823 register unsigned char c
;
1825 while(GF_GETC(f
, c
)){
1826 if(!conv_table
){ /* can't do much if no conversion table */
1827 GF_PUTC(f
->next
, c
);
1829 /* UTF-8 continuation? */
1830 else if((c
> 0x7f) && (c
< 0xc0)){
1832 u
<<= 6; /* shift current value by 6 bits */
1834 if (!--more
){ /* last octet? */
1835 if(u
>= 0xffff || (u
= conv_table
[u
]) == NOCHAR
){
1837 * non-BMP character or a UTF-8 character
1838 * which is not representable in the
1839 * charset we're converting to.
1844 fs_give((void **) &f
->opt
);
1846 /* TRANSLATORS: error while translating from one
1847 character set to another, for example from UTF-8
1848 to ISO-2022-JP or something like that. */
1849 gf_error(_("translation error"));
1855 c
= (unsigned char) (u
>> 8);
1856 GF_PUTC(f
->next
, c
);
1859 c
= (unsigned char) u
& 0xff;
1862 GF_PUTC(f
->next
, c
);
1865 else{ /* continuation when not in progress */
1866 GF_PUTC(f
->next
, '?');
1870 if(more
){ /* incomplete UTF-8 character */
1871 GF_PUTC(f
->next
, '?');
1874 if(c
< 0x80){ /* U+0000 - U+007f */
1875 GF_PUTC(f
->next
, c
);
1877 else if(c
< 0xe0){ /* U+0080 - U+07ff */
1878 u
= c
& 0x1f; /* first 5 bits of 12 */
1881 else if(c
< 0xf0){ /* U+1000 - U+ffff */
1882 u
= c
& 0x0f; /* first 4 bits of 16 */
1885 /* in case we ever support non-BMP Unicode */
1886 else if (c
< 0xf8){ /* U+10000 - U+10ffff */
1887 u
= c
& 0x07; /* first 3 bits of 20.5 */
1890 #if 0 /* ISO 10646 not in Unicode */
1891 else if (c
< 0xfc){ /* ISO 10646 20000 - 3ffffff */
1892 u
= c
& 0x03; /* first 2 bits of 26 */
1895 else if (c
< 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1896 u
= c
& 0x03; /* first 2 bits of 26 */
1900 else{ /* not in Unicode */
1901 GF_PUTC(f
->next
, '?');
1910 else if(flg
== GF_EOD
){
1911 (void) GF_FLUSH(f
->next
);
1913 fs_give((void **) &f
->opt
);
1914 (*f
->next
->f
)(f
->next
, GF_EOD
);
1916 else if(flg
== GF_RESET
){
1917 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1918 conv_table
= ((UTF8C_S
*) f
->opt
)->conv_table
;
1919 report_err
= ((UTF8C_S
*) f
->opt
)->report_err
;
1927 gf_convert_utf8_charset_opt(void *table
, int report_err
)
1931 utf8c
= (UTF8C_S
*) fs_get(sizeof(UTF8C_S
));
1932 utf8c
->conv_table
= table
;
1933 utf8c
->report_err
= report_err
;
1934 return((void *) utf8c
);
1939 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1941 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1942 * or to Shift-JIS (if PC-Pine).
1945 gf_2022_jp_to_euc(FILTER_S
*f
, int flg
)
1947 register unsigned char c
;
1948 register int state
= f
->f1
;
1951 * f->t lit means we're in middle of decoding a sequence of characters.
1952 * f->f2 keeps track of first character of pair for Shift-JIS.
1953 * f->f1 is the state.
1956 GF_INIT(f
, f
->next
);
1959 while(GF_GETC(f
, c
)){
1961 case ESC
: /* saw ESC */
1962 if(!f
->t
&& c
== '$')
1964 else if(f
->t
&& c
== '(')
1967 GF_PUTC(f
->next
, '\033');
1968 GF_PUTC(f
->next
, c
);
1974 case ESCDOL
: /* saw ESC $ */
1975 if(c
== 'B' || c
== '@'){
1977 f
->t
= 1; /* filtering into euc */
1978 f
->f2
= -1; /* first character of pair */
1981 GF_PUTC(f
->next
, '\033');
1982 GF_PUTC(f
->next
, '$');
1983 GF_PUTC(f
->next
, c
);
1989 case ESCPAR
: /* saw ESC ( */
1990 if(c
== 'B' || c
== 'J' || c
== 'H'){
1992 f
->t
= 0; /* done filtering */
1995 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
1996 GF_PUTC(f
->next
, '('); /* escape sequences, which */
1997 GF_PUTC(f
->next
, c
); /* this appears to be. */
2002 case EUC
: /* filtering into euc */
2006 #ifdef _WINDOWS /* Shift-JIS */
2007 c
&= 0x7f; /* 8-bit can't win */
2008 if (f
->f2
>= 0){ /* second of a pair? */
2009 int rowOffset
= (f
->f2
< 95) ? 112 : 176;
2010 int cellOffset
= (f
->f2
% 2) ? ((c
> 95) ? 32 : 31)
2013 GF_PUTC(f
->next
, ((f
->f2
+ 1) >> 1) + rowOffset
);
2014 GF_PUTC(f
->next
, c
+ cellOffset
);
2015 f
->f2
= -1; /* restart */
2017 else if(c
> 0x20 && c
< 0x7f)
2018 f
->f2
= c
; /* first of pair */
2020 GF_PUTC(f
->next
, c
); /* write CTL as itself */
2024 GF_PUTC(f
->next
, (c
> 0x20 && c
< 0x7f) ? c
| 0x80 : c
);
2035 GF_PUTC(f
->next
, c
);
2044 else if(flg
== GF_EOD
){
2047 GF_PUTC(f
->next
, '\033');
2051 GF_PUTC(f
->next
, '\033');
2052 GF_PUTC(f
->next
, '$');
2056 GF_PUTC(f
->next
, '\033'); /* Don't set hibit for */
2057 GF_PUTC(f
->next
, '('); /* escape sequences. */
2061 (void) GF_FLUSH(f
->next
);
2062 (*f
->next
->f
)(f
->next
, GF_EOD
);
2064 else if(flg
== GF_RESET
){
2065 dprint((9, "-- gf_reset jp_to_euc\n"));
2066 f
->f1
= DFL
; /* state */
2067 f
->t
= 0; /* not translating to euc */
2073 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2076 gf_native8bitjapanese_to_2022_jp(FILTER_S
*f
, int flg
)
2079 gf_sjis_to_2022_jp(f
, flg
);
2081 gf_euc_to_2022_jp(f
, flg
);
2087 gf_euc_to_2022_jp(FILTER_S
*f
, int flg
)
2089 register unsigned char c
;
2092 * f->t lit means we've sent the start esc seq but not the end seq.
2093 * f->f2 keeps track of first character of pair for Shift-JIS.
2096 GF_INIT(f
, f
->next
);
2099 while(GF_GETC(f
, c
)){
2102 GF_PUTC(f
->next
, c
& 0x7f);
2105 GF_PUTC(f
->next
, '\033');
2106 GF_PUTC(f
->next
, '(');
2107 GF_PUTC(f
->next
, 'B');
2108 GF_PUTC(f
->next
, c
);
2115 GF_PUTC(f
->next
, '\033');
2116 GF_PUTC(f
->next
, '$');
2117 GF_PUTC(f
->next
, 'B');
2118 GF_PUTC(f
->next
, c
& 0x7f);
2122 GF_PUTC(f
->next
, c
);
2129 else if(flg
== GF_EOD
){
2131 GF_PUTC(f
->next
, '\033');
2132 GF_PUTC(f
->next
, '(');
2133 GF_PUTC(f
->next
, 'B');
2138 (void) GF_FLUSH(f
->next
);
2139 (*f
->next
->f
)(f
->next
, GF_EOD
);
2141 else if(flg
== GF_RESET
){
2142 dprint((9, "-- gf_reset euc_to_jp\n"));
2149 gf_sjis_to_2022_jp(FILTER_S
*f
, int flg
)
2151 register unsigned char c
;
2154 * f->t lit means we've sent the start esc seq but not the end seq.
2155 * f->f2 keeps track of first character of pair for Shift-JIS.
2158 GF_INIT(f
, f
->next
);
2161 while(GF_GETC(f
, c
)){
2163 if(f
->f2
>= 0){ /* second of a pair? */
2164 int adjust
= c
< 159;
2165 int rowOffset
= f
->f2
< 160 ? 112 : 176;
2166 int cellOffset
= adjust
? (c
> 127 ? 32 : 31) : 126;
2168 GF_PUTC(f
->next
, ((f
->f2
- rowOffset
) << 1) - adjust
);
2169 GF_PUTC(f
->next
, c
- cellOffset
);
2173 f
->f2
= c
; /* remember first of pair */
2176 GF_PUTC(f
->next
, '\033');
2177 GF_PUTC(f
->next
, '(');
2178 GF_PUTC(f
->next
, 'B');
2179 GF_PUTC(f
->next
, c
);
2186 GF_PUTC(f
->next
, '\033');
2187 GF_PUTC(f
->next
, '$');
2188 GF_PUTC(f
->next
, 'B');
2193 GF_PUTC(f
->next
, c
);
2200 else if(flg
== GF_EOD
){
2202 GF_PUTC(f
->next
, '\033');
2203 GF_PUTC(f
->next
, '(');
2204 GF_PUTC(f
->next
, 'B');
2209 (void) GF_FLUSH(f
->next
);
2210 (*f
->next
->f
)(f
->next
, GF_EOD
);
2212 else if(flg
== GF_RESET
){
2213 dprint((9, "-- gf_reset sjis_to_jp\n"));
2222 * Various charset to UTF-8 Translation filter
2226 * utf8 conversion options
2228 typedef struct _utf8_s
{
2233 #define UTF8_BLOCK 1024
2234 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2235 #define UTF8_ADD(f, c) \
2238 f->f2 += UTF8_BLOCK; \
2239 fs_resize((void **)&f->line, \
2240 (size_t) f->f2 * sizeof(char)); \
2241 eobuf = UTF8_EOB(f); \
2242 p = eobuf - UTF8_BLOCK; \
2246 #define GF_UTF8_FLUSH(f) { \
2248 SIZEDTEXT intext, outtext; \
2249 intext.data = (unsigned char *) f->line; \
2250 intext.size = p - f->line; \
2251 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2252 if(!((UTF8_S *) f->opt)->charset){ \
2253 for(n = 0; n < intext.size; n++) \
2254 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2256 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2257 for(n = 0; n < outtext.size; n++) \
2258 GF_PUTC(f->next, outtext.data[n]); \
2259 if(outtext.data && intext.data != outtext.data) \
2260 fs_give((void **) &outtext.data); \
2263 for(n = 0; n < intext.size; n++) \
2264 GF_PUTC(f->next, '?'); \
2270 * gf_utf8 - text in specified charset to to UTF-8 filter
2271 * Process line-at-a-time rather than character
2272 * because ISO-2022-JP. Call utf8_text_cs by hand
2273 * rather than utf8_text to reduce the cost of
2274 * utf8_charset() for each line.
2277 gf_utf8(FILTER_S
*f
, int flg
)
2279 register char *p
= f
->linep
;
2280 register char *eobuf
= UTF8_EOB(f
);
2281 GF_INIT(f
, f
->next
);
2284 register int state
= f
->f1
;
2285 register unsigned char c
;
2287 while(GF_GETC(f
, c
)){
2295 GF_PUTC(f
->next
, '\015');
2296 GF_PUTC(f
->next
, '\012');
2299 UTF8_ADD(f
, '\015');
2317 else if(flg
== GF_EOD
){
2322 fs_give((void **) &f
->line
);
2323 fs_give((void **) &f
->opt
);
2324 (void) GF_FLUSH(f
->next
);
2325 (*f
->next
->f
)(f
->next
, GF_EOD
);
2328 dprint((9, "-- gf_reset utf8\n"));
2330 f
->f2
= UTF8_BLOCK
; /* input buffer length */
2331 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
2339 gf_utf8_opt(char *charset
)
2343 utf8
= (UTF8_S
*) fs_get(sizeof(UTF8_S
));
2345 utf8
->charset
= (CHARSET
*) utf8_charset(charset
);
2348 * When we get 8-bit non-ascii characters but it is supposed to
2349 * be ascii we want it to turn into question marks, not
2350 * just behave as if it is UTF-8 which is what happens
2351 * with ascii because there is no translation table.
2352 * So we need to catch the ascii special case here.
2354 if(utf8
->charset
&& utf8
->charset
->type
== CT_ASCII
)
2355 utf8
->charset
= NULL
;
2357 return((void *) utf8
);
2362 * RICHTEXT-TO-PLAINTEXT filter
2366 * option to be used by rich2plain (NOTE: if this filter is ever
2367 * used more than once in a pipe, all instances will have the same
2372 /*----------------------------------------------------------------------
2373 richtext to plaintext filter
2378 This basically removes all richtext formatting. A cute hack is used
2379 to get bold and underlining to work.
2380 Further work could be done to handle things like centering and right
2381 and left flush, but then it could no longer be done in place. This
2382 operates on text *with* CRLF's.
2384 WARNING: does not wrap lines!
2387 gf_rich2plain(FILTER_S
*f
, int flg
)
2389 static int rich_bold_on
= 0, rich_uline_on
= 0;
2391 /* BUG: quote incoming \255 values */
2392 GF_INIT(f
, f
->next
);
2395 register unsigned char c
;
2396 register int state
= f
->f1
;
2399 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2401 while(GF_GETC(f
, c
)){
2404 case TOKEN
: /* collect a richtext token */
2405 if(c
== '>'){ /* what should we do with it? */
2406 state
= DFL
; /* return to default next time */
2407 *(f
->linep
) = '\0'; /* cap off token */
2408 if(f
->line
[0] == 'l' && f
->line
[1] == 't'){
2409 GF_PUTC(f
->next
, '<'); /* literal '<' */
2411 else if(f
->line
[0] == 'n' && f
->line
[1] == 'l'){
2412 GF_PUTC(f
->next
, '\015');/* newline! */
2413 GF_PUTC(f
->next
, '\012');
2415 else if(!strcmp("comment", f
->line
)){
2418 else if(!strcmp("/comment", f
->line
)){
2421 else if(!strcmp("/paragraph", f
->line
)) {
2422 GF_PUTC(f
->next
, '\r');
2423 GF_PUTC(f
->next
, '\n');
2424 GF_PUTC(f
->next
, '\r');
2425 GF_PUTC(f
->next
, '\n');
2427 else if(!plain
/* gf_rich_plain */){
2428 if(!strcmp(f
->line
, "bold")) {
2429 GF_PUTC(f
->next
, TAG_EMBED
);
2430 GF_PUTC(f
->next
, TAG_BOLDON
);
2432 } else if(!strcmp(f
->line
, "/bold")) {
2433 GF_PUTC(f
->next
, TAG_EMBED
);
2434 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2436 } else if(!strcmp(f
->line
, "italic")) {
2437 GF_PUTC(f
->next
, TAG_EMBED
);
2438 GF_PUTC(f
->next
, TAG_ULINEON
);
2440 } else if(!strcmp(f
->line
, "/italic")) {
2441 GF_PUTC(f
->next
, TAG_EMBED
);
2442 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2444 } else if(!strcmp(f
->line
, "underline")) {
2445 GF_PUTC(f
->next
, TAG_EMBED
);
2446 GF_PUTC(f
->next
, TAG_ULINEON
);
2448 } else if(!strcmp(f
->line
, "/underline")) {
2449 GF_PUTC(f
->next
, TAG_EMBED
);
2450 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2454 /* else we just ignore the token! */
2456 f
->linep
= f
->line
; /* reset token buffer */
2458 else{ /* add char to token */
2459 if(f
->linep
- f
->line
> 40){
2460 /* What? rfc1341 says 40 char tokens MAX! */
2461 fs_give((void **)&(f
->line
));
2462 gf_error("Richtext token over 40 characters");
2466 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2471 state
= DFL
; /* back to default next time */
2472 if(c
== '\012'){ /* treat as single space? */
2473 GF_PUTC(f
->next
, ' ');
2476 /* fall thru to process c */
2482 else if(c
== '\015')
2484 else if(!f
->f2
) /* not in comment! */
2485 GF_PUTC(f
->next
, c
);
2494 else if(flg
== GF_EOD
){
2495 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2496 /* incomplete token!! */
2497 gf_error("Incomplete token in richtext");
2502 GF_PUTC(f
->next
, TAG_EMBED
);
2503 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2507 GF_PUTC(f
->next
, TAG_EMBED
);
2508 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2512 fs_give((void **)&(f
->line
));
2513 (void) GF_FLUSH(f
->next
);
2514 (*f
->next
->f
)(f
->next
, GF_EOD
);
2516 else if(flg
== GF_RESET
){
2517 dprint((9, "-- gf_reset rich2plain\n"));
2518 f
->f1
= DFL
; /* state */
2519 f
->f2
= 0; /* set means we're in a comment */
2520 f
->linep
= f
->line
= (char *)fs_get(45 * sizeof(char));
2526 * function called from the outside to set
2527 * richtext filter's options
2530 gf_rich2plain_opt(int *plain
)
2532 return((void *) plain
);
2538 * ENRICHED-TO-PLAIN text filter
2541 #define TEF_QUELL 0x01
2542 #define TEF_NOFILL 0x02
2546 /*----------------------------------------------------------------------
2547 enriched text to plain text filter (ala rfc1523)
2549 Args: f -- state and input data
2552 This basically removes all enriched formatting. A cute hack is used
2553 to get bold and underlining to work.
2555 Further work could be done to handle things like centering and right
2556 and left flush, but then it could no longer be done in place. This
2557 operates on text *with* CRLF's.
2559 WARNING: does not wrap lines!
2562 gf_enriched2plain(FILTER_S
*f
, int flg
)
2564 static int enr_uline_on
= 0, enr_bold_on
= 0;
2566 /* BUG: quote incoming \255 values */
2567 GF_INIT(f
, f
->next
);
2570 register unsigned char c
;
2571 register int state
= f
->f1
;
2574 plain
= f
->opt
? (*(int *) f
->opt
) : 0;
2576 while(GF_GETC(f
, c
)){
2579 case TOKEN
: /* collect a richtext token */
2580 if(c
== '>'){ /* what should we do with it? */
2581 int off
= *f
->line
== '/';
2582 char *token
= f
->line
+ (off
? 1 : 0);
2585 if(!strcmp("param", token
)){
2587 f
->f2
&= ~TEF_QUELL
;
2591 else if(!strcmp("nofill", token
)){
2593 f
->f2
&= ~TEF_NOFILL
;
2595 f
->f2
|= TEF_NOFILL
;
2597 else if(!plain
/* gf_enriched_plain */){
2598 /* Following is a cute hack or two to get
2599 bold and underline on the screen.
2600 See Putline0n() where these codes are
2602 if(!strcmp("bold", token
)) {
2603 GF_PUTC(f
->next
, TAG_EMBED
);
2604 GF_PUTC(f
->next
, off
? TAG_BOLDOFF
: TAG_BOLDON
);
2605 enr_bold_on
= off
? 0 : 1;
2606 } else if(!strcmp("italic", token
)) {
2607 GF_PUTC(f
->next
, TAG_EMBED
);
2608 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2609 enr_uline_on
= off
? 0 : 1;
2610 } else if(!strcmp("underline", token
)) {
2611 GF_PUTC(f
->next
, TAG_EMBED
);
2612 GF_PUTC(f
->next
, off
? TAG_ULINEOFF
: TAG_ULINEON
);
2613 enr_uline_on
= off
? 0 : 1;
2616 /* else we just ignore the token! */
2618 f
->linep
= f
->line
; /* reset token buffer */
2620 else if(c
== '<'){ /* literal '<'? */
2621 if(f
->linep
== f
->line
){
2622 GF_PUTC(f
->next
, '<');
2626 fs_give((void **)&(f
->line
));
2627 gf_error("Malformed Enriched text: unexpected '<'");
2631 else{ /* add char to token */
2632 if(f
->linep
- f
->line
> 60){ /* rfc1523 says 60 MAX! */
2633 fs_give((void **)&(f
->line
));
2634 gf_error("Malformed Enriched text: token too long");
2638 *(f
->linep
)++ = isupper((unsigned char)c
) ? c
-'A'+'a' : c
;
2643 if(c
!= '\012'){ /* treat as single space? */
2644 state
= DFL
; /* lone cr? */
2645 f
->f2
&= ~TEF_QUELL
;
2646 GF_PUTC(f
->next
, '\015');
2654 if(c
== '\015'){ /* treat as single space? */
2655 state
= CCR
; /* repeat crlf's mean real newlines */
2657 GF_PUTC(f
->next
, '\r');
2658 GF_PUTC(f
->next
, '\n');
2663 if(!((f
->f2
) & TEF_QUELL
))
2664 GF_PUTC(f
->next
, ' ');
2666 f
->f2
&= ~TEF_QUELL
;
2669 /* fall thru to take care of 'c' */
2676 else if(c
== '\015' && (!((f
->f2
) & TEF_NOFILL
)))
2678 else if(!((f
->f2
) & TEF_QUELL
))
2679 GF_PUTC(f
->next
, c
);
2688 else if(flg
== GF_EOD
){
2689 if((f
->f1
= (f
->linep
!= f
->line
)) != 0){
2690 /* incomplete token!! */
2691 gf_error("Incomplete token in richtext");
2695 GF_PUTC(f
->next
, TAG_EMBED
);
2696 GF_PUTC(f
->next
, TAG_ULINEOFF
);
2700 GF_PUTC(f
->next
, TAG_EMBED
);
2701 GF_PUTC(f
->next
, TAG_BOLDOFF
);
2705 /* Make sure we end with a newline so everything gets flushed */
2706 GF_PUTC(f
->next
, '\015');
2707 GF_PUTC(f
->next
, '\012');
2709 fs_give((void **)&(f
->line
));
2711 (void) GF_FLUSH(f
->next
);
2712 (*f
->next
->f
)(f
->next
, GF_EOD
);
2714 else if(flg
== GF_RESET
){
2715 dprint((9, "-- gf_reset enriched2plain\n"));
2716 f
->f1
= DFL
; /* state */
2717 f
->f2
= 0; /* set means we're in a comment */
2718 f
->linep
= f
->line
= (char *)fs_get(65 * sizeof(char));
2724 * function called from the outside to set
2725 * richtext filter's options
2728 gf_enriched2plain_opt(int *plain
)
2730 return((void *) plain
);
2736 * HTML-TO-PLAIN text filter
2740 /* OK, here's the plan:
2742 * a universal output function handles writing chars and worries
2745 * a unversal element collector reads chars and collects params
2746 * and dispatches the appropriate element handler.
2748 * element handlers are stacked. The most recently dispatched gets
2749 * first crack at the incoming character stream. It passes bytes it's
2750 * done with or not interested in to the next
2752 * installs that handler as the current one collecting data...
2754 * stacked handlers take their params from the element collector and
2755 * accept chars or do whatever they need to do. Sort of a vertical
2756 * piping? recursion-like? hmmm.
2758 * at least I think this is how it'll work. tres simple, non?
2764 * Some important constants
2766 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2767 #define MAX_ENTITY 20 /* maximum length of an entity */
2768 #define MAX_ELEMENT 72 /* maximum length of an element */
2769 #define HTML_MOREDATA 0 /* expect more entity data */
2770 #define HTML_ENTITY 1 /* valid entity collected */
2771 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2772 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2773 #define HTML_LITERAL 0x0400 /* Literal character value */
2774 #define HTML_NEWLINE 0x010A /* hard newline */
2775 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2776 #define HTML_ID_GET 0 /* indent func: return current val */
2777 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2778 #define HTML_ID_INC 2 /* indent func: increment by val */
2779 #define HTML_HX_CENTER 0x0001
2780 #define HTML_HX_ULINE 0x0002
2781 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2784 /* types of lists that we will support */
2785 #define LIST_DECIMAL (long) 0
2786 #define LIST_ALPHALO (long) 1
2787 #define LIST_ALPHAUP (long) 2
2788 #define LIST_ROMANLO (long) 3
2789 #define LIST_ROMANUP (long) 4
2790 #define LIST_UNKNOWN (long) 10
2793 * Handler data, state information including function that uses it
2795 typedef struct handler_s
{
2796 FILTER_S
*html_data
;
2801 struct handler_s
*below
;
2805 * Element Property structure
2807 typedef struct _element_properties
{
2810 int (*handler
)(HANDLER_S
*, int, int);
2811 unsigned blocklevel
:1;
2812 unsigned alternate
:1;
2816 * Types used to manage HTML parsing
2818 static void html_handoff(HANDLER_S
*, int);
2822 * to help manage line wrapping.
2824 typedef struct _wrap_line
{
2825 char *buf
; /* buf to collect wrapped text */
2826 int used
, /* number of chars in buf */
2827 width
, /* text's width as displayed */
2828 len
; /* length of allocated buf */
2833 * to help manage centered text
2835 typedef struct _center_s
{
2836 WRAPLINE_S line
; /* buf to assembled centered text */
2837 WRAPLINE_S word
; /* word being to append to Line */
2844 * Collector data and state information
2846 typedef struct collector_s
{
2847 char *buf
; /* buffer to collect data */
2848 unsigned long bufsize
; /* size of buffer to collect data */
2849 int len
; /* length of that buffer */
2850 unsigned unquoted_data
:1; /* parameter is not quoted... */
2851 unsigned end_tag
:1; /* collecting a closing tag */
2852 unsigned hit_equal
:1; /* collecting right half of attrib */
2853 unsigned mkup_decl
:1; /* markup declaration */
2854 unsigned start_comment
:1; /* markup declaration comment */
2855 unsigned end_comment
:1; /* legit comment format */
2856 unsigned hyphen
:1; /* markup hyphen read */
2857 unsigned badform
:1; /* malformed markup element */
2858 unsigned overrun
:1; /* Overran buf above */
2859 unsigned proc_inst
:1; /* XML processing instructions */
2860 unsigned empty
:1; /* empty element */
2861 unsigned was_quoted
:1; /* basically to catch null string */
2862 char quoted
; /* quoted element param value */
2863 char *element
; /* element's collected name */
2864 PARAMETER
*attribs
; /* element's collected attributes */
2865 PARAMETER
*cur_attrib
; /* attribute now being collected */
2870 * State information for all element handlers
2872 typedef struct html_data
{
2873 HANDLER_S
*h_stack
; /* handler list */
2874 CLCTR_S
*el_data
; /* element collector data */
2875 CENTER_S
*centered
; /* struct to manage centered text */
2876 int (*token
)(FILTER_S
*, int);
2877 char quoted
; /* quoted, by either ' or ", text */
2878 short indent_level
; /* levels of indention */
2879 int in_anchor
; /* text now being written to anchor */
2880 int blanks
; /* Consecutive blank line count */
2881 int wrapcol
; /* column to wrap lines on */
2882 int *prefix
; /* buffer containing Anchor prefix */
2884 long line_bufsize
; /* current size of the line buffer */
2887 int state
; /* embedded data state */
2888 char *color
; /* embedded color pointer */
2890 CBUF_S cb
; /* utf8->ucs4 conversion state */
2891 unsigned wrapstate
:1; /* whether or not to wrap output */
2892 unsigned li_pending
:1; /* <LI> next token expected */
2893 unsigned de_pending
:1; /* <DT> or <DD> next token expected */
2894 unsigned bold_on
:1; /* currently bolding text */
2895 unsigned uline_on
:1; /* currently underlining text */
2896 unsigned center
:1; /* center output text */
2897 unsigned bitbucket
:1; /* Ignore input */
2898 unsigned head
:1; /* In doc's HEAD */
2899 unsigned body
:1; /* In doc's BODY */
2900 unsigned alt_entity
:1; /* use alternative entity values */
2901 unsigned wrote
:1; /* anything witten yet? */
2906 * HTML filter options
2908 typedef struct _html_opts
{
2909 char *base
; /* Base URL for this html file */
2910 int columns
, /* Display columns (excluding margins) */
2911 indent
; /* Left margin */
2912 HANDLE_S
**handlesp
; /* Head of handles */
2913 htmlrisk_t warnrisk_f
; /* Nasty link warning call */
2914 ELPROP_S
*element_table
; /* markup element table */
2915 RSS_FEED_S
**feedp
; /* hook for RSS feed response */
2916 unsigned strip
:1; /* Hilite TAGs allowed */
2917 unsigned handles_loc
:1; /* Local handles requested? */
2918 unsigned showserver
:1; /* Display server after anchors */
2919 unsigned outputted
:1; /* any */
2920 unsigned no_relative_links
:1; /* Disable embedded relative links */
2921 unsigned related_content
:1; /* Embedded related content */
2922 unsigned html
:1; /* Output content in HTML */
2923 unsigned html_imgs
:1; /* Output IMG tags in HTML content */
2929 * Some macros to make life a little easier
2931 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2932 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2933 #define HTML_WROTE(X) (HD(X)->wrote)
2934 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2935 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2936 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2937 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2938 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2939 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2940 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2941 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2942 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2943 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2944 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2945 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2946 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2947 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2948 #define HD(X) ((HTML_DATA_S *)(X)->data)
2949 #define ED(X) (HD(X)->el_data)
2950 #define EL(X) ((ELPROP_S *) (X)->element)
2951 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2952 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2953 #define NEW_CLCTR(X) { \
2954 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2955 memset(ED(X), 0, sizeof(CLCTR_S)); \
2956 ED(X)->buf = memset((void *) fs_get(ED(X)->bufsize = HTML_BUF_LEN), 0, HTML_BUF_LEN); \
2957 HD(X)->token = html_element_collector; \
2960 #define FREE_CLCTR(X) { \
2961 if(ED(X)->attribs){ \
2963 while((p = ED(X)->attribs) != NULL){ \
2964 ED(X)->attribs = ED(X)->attribs->next; \
2966 fs_give((void **)&p->attribute); \
2968 fs_give((void **)&p->value); \
2969 fs_give((void **)&p); \
2972 if(ED(X)->element) \
2973 fs_give((void **) &ED(X)->element); \
2974 fs_give((void **) &ED(X)); \
2975 HD(X)->token = NULL; \
2977 #define HANDLERS(X) (HD(X)->h_stack)
2978 #define BOLD_BIT(X) (HD(X)->bold_on)
2979 #define ULINE_BIT(X) (HD(X)->uline_on)
2980 #define CENTER_BIT(X) (HD(X)->center)
2981 #define HTML_FLUSH(X) { \
2982 html_write(X, (X)->line, (X)->linep - (X)->line); \
2983 (X)->linep = (X)->line; \
2986 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2988 html_output((X), TAG_EMBED); \
2989 html_output((X), TAG_BOLDON); \
2992 html_output((X), TAG_EMBED); \
2993 html_output((X), TAG_BOLDOFF); \
2996 #define HTML_ULINE(X, S) \
2999 html_output((X), TAG_EMBED); \
3000 html_output((X), TAG_ULINEON); \
3003 html_output((X), TAG_EMBED); \
3004 html_output((X), TAG_ULINEOFF); \
3007 #define HTML_ITALIC(X, S) \
3010 html_output((X), TAG_EMBED); \
3011 html_output((X), TAG_ITALICON); \
3014 html_output((X), TAG_EMBED); \
3015 html_output((X), TAG_ITALICOFF); \
3018 #define HTML_STRIKE(X, S) \
3021 html_output((X), TAG_EMBED); \
3022 html_output((X), TAG_STRIKEON); \
3025 html_output((X), TAG_EMBED); \
3026 html_output((X), TAG_STRIKEOFF); \
3029 #define HTML_BIG(X, S) \
3032 html_output((X), TAG_EMBED); \
3033 html_output((X), TAG_BIGON); \
3036 html_output((X), TAG_EMBED); \
3037 html_output((X), TAG_BIGOFF); \
3040 #define HTML_SMALL(X, S) \
3043 html_output((X), TAG_EMBED); \
3044 html_output((X), TAG_SMALLON); \
3047 html_output((X), TAG_EMBED); \
3048 html_output((X), TAG_SMALLOFF); \
3051 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3052 ? (HD(f)->centered->line.width \
3053 + HD(f)->centered->word.width \
3054 + ((HD(f)->centered->line.width \
3055 && HD(f)->centered->word.width) \
3058 #define HTML_DUMP_LIT(F, S, L) { \
3060 for(i = 0; i < (L); i++){ \
3061 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3063 : MAKE_LITERAL((S)[i]); \
3067 #define HTML_PROC(F, C) { \
3070 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3072 HTML_DUMP_LIT(F, "<", 1); \
3073 if(HD(F)->el_data->element){ \
3075 HD(F)->el_data->element, \
3076 strlen(HD(F)->el_data->element));\
3078 if(HD(F)->el_data->len){ \
3080 HD(F)->el_data->buf, \
3081 HD(F)->el_data->len); \
3088 else if((C) == '<'){ \
3094 #define HTML_LINEP_PUTC(F, C) { \
3095 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3096 size_t offset = (F)->linep - (F)->line; \
3097 fs_resize((void **) &(F)->line, \
3098 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3099 HD(F)->line_bufsize *= 2; \
3100 (F)->linep = &(F)->line[offset]; \
3102 *(F)->linep++ = (C); \
3104 #define HTML_TEXT(F, C) switch((F)->f1){ \
3106 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3108 HTML_TEXT_OUT(F, ' '); \
3109 (F)->f1 = DFL;/* stop sending chars here */ \
3110 /* fall thru to process 'c' */ \
3112 if(HD(F)->bitbucket) \
3113 (F)->f1 = DFL; /* no op */ \
3114 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3115 (F)->f1 = WSPACE;/* coalesce white space */ \
3116 else HTML_TEXT_OUT(F, C); \
3119 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3120 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3124 #define HTML_DEBUG_EL(S, D) { \
3125 dprint((5, "-- html %s: %s\n", \
3128 ? (D)->element : "NULL")); \
3131 for(p = (D)->attribs; \
3132 p && p->attribute; \
3135 " PARM: %s%s%s\n", \
3137 ? p->attribute : "NULL",\
3138 p->value ? "=" : "", \
3139 p->value ? p->value : ""));\
3143 #define HTML_DEBUG_EL(S, D)
3146 #ifndef SYSTEM_PINE_INFO_PATH
3147 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3149 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3150 ? SYSTEM_PINE_INFO_PATH : S)
3153 * Protos for Tag handlers
3155 int html_head(HANDLER_S
*, int, int);
3156 int html_base(HANDLER_S
*, int, int);
3157 int html_title(HANDLER_S
*, int, int);
3158 int html_body(HANDLER_S
*, int, int);
3159 int html_a(HANDLER_S
*, int, int);
3160 int html_br(HANDLER_S
*, int, int);
3161 int html_hr(HANDLER_S
*, int, int);
3162 int html_p(HANDLER_S
*, int, int);
3163 int html_table(HANDLER_S
*, int, int);
3164 int html_caption(HANDLER_S
*, int, int);
3165 int html_tr(HANDLER_S
*, int, int);
3166 int html_td(HANDLER_S
*, int, int);
3167 int html_th(HANDLER_S
*, int, int);
3168 int html_thead(HANDLER_S
*, int, int);
3169 int html_tbody(HANDLER_S
*, int, int);
3170 int html_tfoot(HANDLER_S
*, int, int);
3171 int html_col(HANDLER_S
*, int, int);
3172 int html_colgroup(HANDLER_S
*, int, int);
3173 int html_b(HANDLER_S
*, int, int);
3174 int html_u(HANDLER_S
*, int, int);
3175 int html_i(HANDLER_S
*, int, int);
3176 int html_em(HANDLER_S
*, int, int);
3177 int html_strong(HANDLER_S
*, int, int);
3178 int html_s(HANDLER_S
*, int, int);
3179 int html_big(HANDLER_S
*, int, int);
3180 int html_small(HANDLER_S
*, int, int);
3181 int html_font(HANDLER_S
*, int, int);
3182 int html_img(HANDLER_S
*, int, int);
3183 int html_map(HANDLER_S
*, int, int);
3184 int html_area(HANDLER_S
*, int, int);
3185 int html_form(HANDLER_S
*, int, int);
3186 int html_input(HANDLER_S
*, int, int);
3187 int html_option(HANDLER_S
*, int, int);
3188 int html_optgroup(HANDLER_S
*, int, int);
3189 int html_button(HANDLER_S
*, int, int);
3190 int html_select(HANDLER_S
*, int, int);
3191 int html_textarea(HANDLER_S
*, int, int);
3192 int html_label(HANDLER_S
*, int, int);
3193 int html_fieldset(HANDLER_S
*, int, int);
3194 int html_ul(HANDLER_S
*, int, int);
3195 int html_ol(HANDLER_S
*, int, int);
3196 int html_menu(HANDLER_S
*, int, int);
3197 int html_dir(HANDLER_S
*, int, int);
3198 int html_li(HANDLER_S
*, int, int);
3199 int html_h1(HANDLER_S
*, int, int);
3200 int html_h2(HANDLER_S
*, int, int);
3201 int html_h3(HANDLER_S
*, int, int);
3202 int html_h4(HANDLER_S
*, int, int);
3203 int html_h5(HANDLER_S
*, int, int);
3204 int html_h6(HANDLER_S
*, int, int);
3205 int html_blockquote(HANDLER_S
*, int, int);
3206 int html_address(HANDLER_S
*, int, int);
3207 int html_pre(HANDLER_S
*, int, int);
3208 int html_center(HANDLER_S
*, int, int);
3209 int html_div(HANDLER_S
*, int, int);
3210 int html_span(HANDLER_S
*, int, int);
3211 int html_dl(HANDLER_S
*, int, int);
3212 int html_dt(HANDLER_S
*, int, int);
3213 int html_dd(HANDLER_S
*, int, int);
3214 int html_script(HANDLER_S
*, int, int);
3215 int html_applet(HANDLER_S
*, int, int);
3216 int html_style(HANDLER_S
*, int, int);
3217 int html_kbd(HANDLER_S
*, int, int);
3218 int html_dfn(HANDLER_S
*, int, int);
3219 int html_var(HANDLER_S
*, int, int);
3220 int html_tt(HANDLER_S
*, int, int);
3221 int html_samp(HANDLER_S
*, int, int);
3222 int html_sub(HANDLER_S
*, int, int);
3223 int html_sup(HANDLER_S
*, int, int);
3224 int html_cite(HANDLER_S
*, int, int);
3225 int html_code(HANDLER_S
*, int, int);
3226 int html_ins(HANDLER_S
*, int, int);
3227 int html_del(HANDLER_S
*, int, int);
3228 int html_abbr(HANDLER_S
*, int, int);
3229 char *img_tempfile_name(char *, long, int *);
3232 * Protos for RSS 2.0 Tag handlers
3234 int rss_rss(HANDLER_S
*, int, int);
3235 int rss_channel(HANDLER_S
*, int, int);
3236 int rss_title(HANDLER_S
*, int, int);
3237 int rss_image(HANDLER_S
*, int, int);
3238 int rss_link(HANDLER_S
*, int, int);
3239 int rss_description(HANDLER_S
*, int, int);
3240 int rss_ttl(HANDLER_S
*, int, int);
3241 int rss_item(HANDLER_S
*, int, int);
3244 * Proto's for support routines
3246 void html_pop(FILTER_S
*, ELPROP_S
*);
3247 int html_push(FILTER_S
*, ELPROP_S
*);
3248 int html_element_collector(FILTER_S
*, int);
3249 int html_element_flush(CLCTR_S
*);
3250 void html_element_comment(FILTER_S
*, char *);
3251 void html_element_output(FILTER_S
*, int);
3252 int html_entity_collector(FILTER_S
*, int, UCS
*, char **);
3253 void html_a_prefix(FILTER_S
*);
3254 void html_a_finish(HANDLER_S
*);
3255 void html_a_output_prefix(FILTER_S
*, int);
3256 void html_a_output_info(HANDLER_S
*);
3257 void html_a_relative(char *, char *, HANDLE_S
*);
3258 int html_href_relative(char *);
3259 int html_indent(FILTER_S
*, int, int);
3260 void html_blank(FILTER_S
*, int);
3261 void html_newline(FILTER_S
*);
3262 void html_output(FILTER_S
*, int);
3263 void html_output_string(FILTER_S
*, char *);
3264 void html_output_raw_tag(FILTER_S
*, char *);
3265 void html_output_normal(FILTER_S
*, int, int, int);
3266 void html_output_flush(FILTER_S
*);
3267 void html_output_centered(FILTER_S
*, int, int, int);
3268 void html_centered_handle(int *, char *, int);
3269 void html_centered_putc(WRAPLINE_S
*, int);
3270 void html_centered_flush(FILTER_S
*);
3271 void html_centered_flush_line(FILTER_S
*);
3272 void html_write_anchor(FILTER_S
*, int);
3273 void html_write_newline(FILTER_S
*);
3274 void html_write_indent(FILTER_S
*, int);
3275 void html_write(FILTER_S
*, char *, int);
3276 void html_putc(FILTER_S
*, int);
3277 int html_event_attribute(char *);
3278 char *rss_skip_whitespace(char *s
);
3279 ELPROP_S
*element_properties(FILTER_S
*, char *);
3283 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3284 * W3C doc "Additional named entities for HTML"
3286 static struct html_entities
{
3287 char *name
; /* entity name */
3288 UCS value
; /* UCS entity value */
3289 char *plain
; /* US-ASCII representation */
3291 {"quot", 0x0022}, /* 34 - quotation mark */
3292 {"amp", 0x0026}, /* 38 - ampersand */
3293 {"apos", 0x0027}, /* 39 - apostrophe */
3294 {"lt", 0x003C}, /* 60 - less-than sign */
3295 {"gt", 0x003E}, /* 62 - greater-than sign */
3296 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3297 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3298 {"cent", 0x00A2}, /* 162 - cent sign */
3299 {"pound", 0x00A3}, /* 163 - pound sign */
3300 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3301 {"yen", 0x00A5}, /* 165 - yen sign */
3302 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3303 {"sect", 0x00A7}, /* 167 - section sign */
3304 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3305 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3306 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3307 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3308 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3309 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3310 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3311 {"macr", 0x00AF}, /* 175 - macron */
3312 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3313 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3314 {"sup2", 0x00B2}, /* 178 - superscript two */
3315 {"sup3", 0x00B3}, /* 179 - superscript three */
3316 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3317 {"micro", 0x00B5}, /* 181 - micro sign */
3318 {"para", 0x00B6}, /* 182 - pilcrow sign */
3319 {"middot", 0x00B7}, /* 183 - middle dot */
3320 {"cedil", 0x00B8}, /* 184 - cedilla */
3321 {"sup1", 0x00B9}, /* 185 - superscript one */
3322 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3323 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3324 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3325 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3326 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3327 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3328 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3329 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3330 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3331 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3332 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3333 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3334 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3335 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3336 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3337 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3338 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3339 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3340 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3341 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3342 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3343 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3344 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3345 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3346 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3347 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3348 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3349 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3350 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3351 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3352 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3353 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3354 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3355 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3356 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3357 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3358 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3359 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3360 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3361 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3362 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3363 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3364 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3365 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3366 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3367 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3368 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3369 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3370 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3371 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3372 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3373 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3374 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3375 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3376 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3377 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3378 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3379 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3380 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3381 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3382 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3383 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3384 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3385 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3386 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3387 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3388 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3389 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3390 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3391 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3392 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3393 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3394 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3395 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3396 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3397 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3398 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3399 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3400 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3401 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3402 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3403 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3404 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3405 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3406 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3407 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3408 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3409 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3410 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3411 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3412 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3413 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3414 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3415 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3416 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3417 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3418 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3419 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3420 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3421 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3422 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3423 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3424 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3425 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3426 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3427 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3428 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3429 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3430 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3431 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3432 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3433 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3434 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3435 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3436 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3437 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3438 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3439 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3440 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3441 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3442 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3443 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3444 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3445 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3446 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3447 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3448 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3449 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3450 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3451 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3452 {"ensp", 0x2002}, /* 8194 - en space */
3453 {"emsp", 0x2003}, /* 8195 - em space */
3454 {"thinsp", 0x2009}, /* 8201 - thin space */
3455 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3456 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3457 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3458 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3459 {"ndash", 0x2013}, /* 8211 - en dash */
3460 {"mdash", 0x2014}, /* 8212 - em dash */
3461 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3462 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3463 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3464 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3465 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3466 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3467 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3468 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3469 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3470 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3471 {"dagger", 0x2020}, /* 8224 - dagger */
3472 {"Dagger", 0x2021}, /* 8225 - double dagger */
3473 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3474 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3475 {"permil", 0x2030}, /* 8240 - per mille sign */
3476 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3477 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3478 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3479 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3480 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3481 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3482 {"oline", 0x203E, "-"}, /* 8254 - overline */
3483 {"frasl", 0x2044}, /* 8260 - fraction slash */
3484 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3485 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3486 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3487 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3488 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3489 {"image", 0x2111}, /* 8465 - black-letter capital i */
3490 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3491 {"real", 0x211C}, /* 8476 - black-letter capital r */
3492 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3493 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3494 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3495 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3496 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3497 {"darr", 0x2193}, /* 8595 - downwards arrow */
3498 {"harr", 0x2194}, /* 8596 - left right arrow */
3499 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3500 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3501 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3502 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3503 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3504 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3505 {"forall", 0x2200}, /* 8704 - for all */
3506 {"part", 0x2202}, /* 8706 - partial differential */
3507 {"exist", 0x2203}, /* 8707 - there exists */
3508 {"empty", 0x2205}, /* 8709 - empty set */
3509 {"nabla", 0x2207}, /* 8711 - nabla */
3510 {"isin", 0x2208}, /* 8712 - element of */
3511 {"notin", 0x2209}, /* 8713 - not an element of */
3512 {"ni", 0x220B}, /* 8715 - contains as member */
3513 {"prod", 0x220F}, /* 8719 - n-ary product */
3514 {"sum", 0x2211}, /* 8721 - n-ary summation */
3515 {"minus", 0x2212}, /* 8722 - minus sign */
3516 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3517 {"radic", 0x221A}, /* 8730 - square root */
3518 {"prop", 0x221D}, /* 8733 - proportional to */
3519 {"infin", 0x221E}, /* 8734 - infinity */
3520 {"ang", 0x2220}, /* 8736 - angle */
3521 {"and", 0x2227}, /* 8743 - logical and */
3522 {"or", 0x2228}, /* 8744 - logical or */
3523 {"cap", 0x2229}, /* 8745 - intersection */
3524 {"cup", 0x222A}, /* 8746 - union */
3525 {"int", 0x222B}, /* 8747 - integral */
3526 {"there4", 0x2234}, /* 8756 - therefore */
3527 {"sim", 0x223C}, /* 8764 - tilde operator */
3528 {"cong", 0x2245}, /* 8773 - congruent to */
3529 {"asymp", 0x2248}, /* 8776 - almost equal to */
3530 {"ne", 0x2260}, /* 8800 - not equal to */
3531 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3532 {"le", 0x2264}, /* 8804 - less-than or equal to */
3533 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3534 {"sub", 0x2282}, /* 8834 - subset of */
3535 {"sup", 0x2283}, /* 8835 - superset of */
3536 {"nsub", 0x2284}, /* 8836 - not a subset of */
3537 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3538 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3539 {"oplus", 0x2295}, /* 8853 - circled plus */
3540 {"otimes", 0x2297}, /* 8855 - circled times */
3541 {"perp", 0x22A5}, /* 8869 - up tack */
3542 {"sdot", 0x22C5}, /* 8901 - dot operator */
3543 {"lceil", 0x2308}, /* 8968 - left ceiling */
3544 {"rceil", 0x2309}, /* 8969 - right ceiling */
3545 {"lfloor", 0x230A}, /* 8970 - left floor */
3546 {"rfloor", 0x230B}, /* 8971 - right floor */
3547 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3548 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3549 {"loz", 0x25CA}, /* 9674 - lozenge */
3550 {"spades", 0x2660}, /* 9824 - black spade suit */
3551 {"clubs", 0x2663}, /* 9827 - black club suit */
3552 {"hearts", 0x2665}, /* 9829 - black heart suit */
3553 {"diams", 0x2666} /* 9830 - black diamond suit */
3558 * Table of supported elements and corresponding handlers
3560 static ELPROP_S html_element_table
[] = {
3561 {"HTML", 4}, /* HTML ignore if seen? */
3562 {"HEAD", 4, html_head
}, /* slurp until <BODY> ? */
3563 {"TITLE", 5, html_title
}, /* Document Title */
3564 {"BASE", 4, html_base
}, /* HREF base */
3565 {"BODY", 4, html_body
}, /* HTML BODY */
3566 {"A", 1, html_a
}, /* Anchor */
3567 {"ABBR", 4, html_abbr
}, /* Abbreviation */
3568 {"IMG", 3, html_img
}, /* Image */
3569 {"MAP", 3, html_map
}, /* Image Map */
3570 {"AREA", 4, html_area
}, /* Image Map Area */
3571 {"HR", 2, html_hr
, 1, 1}, /* Horizontal Rule */
3572 {"BR", 2, html_br
, 0, 1}, /* Line Break */
3573 {"P", 1, html_p
, 1}, /* Paragraph */
3574 {"OL", 2, html_ol
, 1}, /* Ordered List */
3575 {"UL", 2, html_ul
, 1}, /* Unordered List */
3576 {"MENU", 4, html_menu
}, /* Menu List */
3577 {"DIR", 3, html_dir
}, /* Directory List */
3578 {"LI", 2, html_li
}, /* ... List Item */
3579 {"DL", 2, html_dl
, 1}, /* Definition List */
3580 {"DT", 2, html_dt
}, /* ... Def. Term */
3581 {"DD", 2, html_dd
}, /* ... Def. Definition */
3582 {"I", 1, html_i
}, /* Italic Text */
3583 {"EM", 2, html_em
}, /* Typographic Emphasis */
3584 {"STRONG", 6, html_strong
}, /* STRONG Typo Emphasis */
3585 {"VAR", 3, html_i
}, /* Variable Name */
3586 {"B", 1, html_b
}, /* Bold Text */
3587 {"U", 1, html_u
}, /* Underline Text */
3588 {"S", 1, html_s
}, /* Strike-Through Text */
3589 {"STRIKE", 6, html_s
}, /* Strike-Through Text */
3590 {"BIG", 3, html_big
}, /* Big Font Text */
3591 {"SMALL", 5, html_small
}, /* Small Font Text */
3592 {"FONT", 4, html_font
}, /* Font display directives */
3593 {"BLOCKQUOTE", 10, html_blockquote
, 1}, /* Blockquote */
3594 {"ADDRESS", 7, html_address
, 1}, /* Address */
3595 {"CENTER", 6, html_center
}, /* Centered Text v3.2 */
3596 {"DIV", 3, html_div
, 1}, /* Document Division 3.2 */
3597 {"SPAN", 4, html_span
}, /* Text Span */
3598 {"H1", 2, html_h1
, 1}, /* Headings... */
3599 {"H2", 2, html_h2
, 1},
3600 {"H3", 2, html_h3
,1},
3601 {"H4", 2, html_h4
, 1},
3602 {"H5", 2, html_h5
, 1},
3603 {"H6", 2, html_h6
, 1},
3604 {"PRE", 3, html_pre
, 1}, /* Preformatted Text */
3605 {"KBD", 3, html_kbd
}, /* Keyboard Input (NO OP) */
3606 {"DFN", 3, html_dfn
}, /* Definition (NO OP) */
3607 {"VAR", 3, html_var
}, /* Variable (NO OP) */
3608 {"TT", 2, html_tt
}, /* Typetype (NO OP) */
3609 {"SAMP", 4, html_samp
}, /* Sample Text (NO OP) */
3610 {"CITE", 4, html_cite
}, /* Citation (NO OP) */
3611 {"CODE", 4, html_code
}, /* Code Text (NO OP) */
3612 {"INS", 3, html_ins
}, /* Text Inserted (NO OP) */
3613 {"DEL", 3, html_del
}, /* Text Deleted (NO OP) */
3614 {"SUP", 3, html_sup
}, /* Text Superscript (NO OP) */
3615 {"SUB", 3, html_sub
}, /* Text Superscript (NO OP) */
3616 {"STYLE", 5, html_style
}, /* CSS Definitions */
3618 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3620 {"FORM", 4, html_form
, 1}, /* form within a document */
3621 {"INPUT", 5, html_input
}, /* One input field, options */
3622 {"BUTTON", 6, html_button
}, /* Push Button */
3623 {"OPTION", 6, html_option
}, /* One option within Select */
3624 {"OPTION", 6, html_optgroup
}, /* Option Group Definition */
3625 {"SELECT", 6, html_select
}, /* Selection from a set */
3626 {"TEXTAREA", 8, html_textarea
}, /* A multi-line input field */
3627 {"LABEL", 5, html_label
}, /* Control Label */
3628 {"FIELDSET", 8, html_fieldset
, 1}, /* Fieldset Control Group */
3630 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3631 {"SCRIPT", 6, html_script
}, /* Embedded scripting statements */
3632 {"APPLET", 6, NULL
}, /* Embedded applet statements */
3633 {"OBJECT", 6, NULL
}, /* Embedded object statements */
3634 {"LINK", 4, NULL
}, /* References to external data */
3635 {"PARAM", 5, NULL
}, /* Applet/Object parameters */
3637 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3639 {"TABLE", 5, html_table
, 1}, /* Table */
3640 {"CAPTION", 7, html_caption
}, /* Table Caption */
3641 {"TR", 2, html_tr
}, /* Table Table Row */
3642 {"TD", 2, html_td
}, /* Table Table Data */
3643 {"TH", 2, html_th
}, /* Table Table Head */
3644 {"THEAD", 5, html_thead
}, /* Table Table Head */
3645 {"TBODY", 5, html_tbody
}, /* Table Table Body */
3646 {"TFOOT", 5, html_tfoot
}, /* Table Table Foot */
3647 {"COL", 3, html_col
}, /* Table Column Attributes */
3648 {"COLGROUP", 8, html_colgroup
}, /* Table Column Group Attributes */
3655 * Table of supported RSS 2.0 elements
3657 static ELPROP_S rss_element_table
[] = {
3658 {"RSS", 3, rss_rss
}, /* RSS 2.0 version */
3659 {"CHANNEL", 7, rss_channel
}, /* RSS 2.0 Channel */
3660 {"TITLE", 5, rss_title
}, /* RSS 2.0 Title */
3661 {"IMAGE", 5, rss_image
}, /* RSS 2.0 Channel Image */
3662 {"LINK", 4, rss_link
}, /* RSS 2.0 Channel/Item Link */
3663 {"DESCRIPTION", 11, rss_description
}, /* RSS 2.0 Channel/Item Description */
3664 {"ITEM", 4, rss_item
}, /* RSS 2.0 Channel ITEM */
3665 {"TTL", 3, rss_ttl
}, /* RSS 2.0 Item TTL */
3671 * Initialize the given handler, and add it to the stack if it
3674 * Returns: 1 if handler chose to get pushed on stack
3675 * 0 if handler declined
3678 html_push(FILTER_S
*fd
, ELPROP_S
*ep
)
3682 new = (HANDLER_S
*)fs_get(sizeof(HANDLER_S
));
3683 memset(new, 0, sizeof(HANDLER_S
));
3684 new->html_data
= fd
;
3686 if((*ep
->handler
)(new, 0, GF_RESET
)){ /* stack the handler? */
3687 new->below
= HANDLERS(fd
);
3688 HANDLERS(fd
) = new; /* push */
3692 fs_give((void **) &new);
3698 * Remove the most recently installed the given handler
3699 * after letting it accept its demise.
3702 html_pop(FILTER_S
*fd
, ELPROP_S
*ep
)
3706 for(tp
= HANDLERS(fd
); tp
&& ep
!= EL(tp
); tp
= tp
->below
){
3709 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep
->element
, EL(tp
)->element
));
3710 /* if no evidence of opening tag, ignore given closing tag */
3711 for(tp2
= HANDLERS(fd
); tp2
&& ep
!= EL(tp2
); tp2
= tp2
->below
)
3715 dprint((3, "-- html error: no opening tag for given tag /%s", ep
->element
));
3719 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
);
3720 HANDLERS(fd
) = tp
->below
;
3724 (void) (*EL(tp
)->handler
)(tp
, 0, GF_EOD
); /* may adjust handler list */
3725 if(tp
!= HANDLERS(fd
)){
3728 for(p
= HANDLERS(fd
); p
->below
!= tp
; p
= p
->below
)
3732 p
->below
= tp
->below
; /* remove from middle of stack */
3733 /* BUG: else programming botch and we should die */
3736 HANDLERS(fd
) = tp
->below
; /* pop */
3738 fs_give((void **)&tp
);
3741 /* BUG: should MAKE SURE NOT TO EMIT IT */
3742 dprint((3, "-- html error: end tag without a start: %s", ep
->element
));
3748 * Deal with data passed a handler in its GF_DATA state
3751 html_handoff(HANDLER_S
*hd
, int ch
)
3754 (void) (*EL(hd
->below
)->handler
)(hd
->below
, ch
, GF_DATA
);
3756 html_output(hd
->html_data
, ch
);
3761 * HTML <BR> element handler
3764 html_br(HANDLER_S
*hd
, int ch
, int cmd
)
3766 if(cmd
== GF_RESET
){
3767 if(PASS_HTML(hd
->html_data
)){
3768 html_output_raw_tag(hd
->html_data
, "br");
3771 html_output(hd
->html_data
, HTML_NEWLINE
);
3775 return(0); /* don't get linked */
3780 * HTML <HR> (Horizontal Rule) element handler
3783 html_hr(HANDLER_S
*hd
, int ch
, int cmd
)
3785 if(cmd
== GF_RESET
){
3786 if(PASS_HTML(hd
->html_data
)){
3787 html_output_raw_tag(hd
->html_data
, "hr");
3790 int i
, old_wrap
, width
, align
;
3793 width
= WRAP_COLS(hd
->html_data
);
3795 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3799 if(!strucmp(p
->attribute
, "ALIGN")){
3800 if(!strucmp(p
->value
, "LEFT"))
3802 else if(!strucmp(p
->value
, "RIGHT"))
3805 else if(!strucmp(p
->attribute
, "WIDTH")){
3809 for(cp
= p
->value
; *cp
; cp
++)
3811 width
= (WRAP_COLS(hd
->html_data
)*MIN(100,width
))/100;
3814 else if(isdigit((unsigned char) *cp
))
3815 width
= (width
* 10) + (*cp
- '0');
3817 width
= MIN(width
, WRAP_COLS(hd
->html_data
));
3821 html_blank(hd
->html_data
, 1); /* at least one blank line */
3823 old_wrap
= HD(hd
->html_data
)->wrapstate
;
3824 HD(hd
->html_data
)->wrapstate
= 0;
3825 if((i
= MAX(0, WRAP_COLS(hd
->html_data
) - width
))
3826 && ((align
== 0) ? i
/= 2 : (align
== 2)))
3828 html_output(hd
->html_data
, ' ');
3830 for(i
= 0; i
< width
; i
++)
3831 html_output(hd
->html_data
, '_');
3833 html_blank(hd
->html_data
, 1);
3834 HD(hd
->html_data
)->wrapstate
= old_wrap
;
3838 return(0); /* don't get linked */
3843 * HTML <P> (paragraph) element handler
3846 html_p(HANDLER_S
*hd
, int ch
, int cmd
)
3849 html_handoff(hd
, ch
);
3851 else if(cmd
== GF_RESET
){
3852 if(PASS_HTML(hd
->html_data
)){
3853 html_output_raw_tag(hd
->html_data
, "p");
3856 /* Make sure there's at least 1 blank line */
3857 html_blank(hd
->html_data
, 1);
3859 /* adjust indent level if needed */
3860 if(HD(hd
->html_data
)->li_pending
){
3861 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
3862 HD(hd
->html_data
)->li_pending
= 0;
3866 else if(cmd
== GF_EOD
){
3867 if(PASS_HTML(hd
->html_data
)){
3868 html_output_string(hd
->html_data
, "</p>");
3871 /* Make sure there's at least 1 blank line */
3872 html_blank(hd
->html_data
, 1);
3876 return(1); /* GET linked */
3881 * HTML Table <TABLE> (paragraph) table row
3884 html_table(HANDLER_S
*hd
, int ch
, int cmd
)
3887 if(PASS_HTML(hd
->html_data
)){
3888 html_handoff(hd
, ch
);
3891 else if(cmd
== GF_RESET
){
3892 if(PASS_HTML(hd
->html_data
)){
3893 html_output_raw_tag(hd
->html_data
, "table");
3896 /* Make sure there's at least 1 blank line */
3897 html_blank(hd
->html_data
, 0);
3899 else if(cmd
== GF_EOD
){
3900 if(PASS_HTML(hd
->html_data
)){
3901 html_output_string(hd
->html_data
, "</table>");
3904 /* Make sure there's at least 1 blank line */
3905 html_blank(hd
->html_data
, 0);
3907 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3912 * HTML <CAPTION> (Table Caption) element handler
3915 html_caption(HANDLER_S
*hd
, int ch
, int cmd
)
3918 html_handoff(hd
, ch
);
3920 else if(cmd
== GF_RESET
){
3921 if(PASS_HTML(hd
->html_data
)){
3922 html_output_raw_tag(hd
->html_data
, "caption");
3925 /* turn ON the centered bit */
3926 CENTER_BIT(hd
->html_data
) = 1;
3929 else if(cmd
== GF_EOD
){
3930 if(PASS_HTML(hd
->html_data
)){
3931 html_output_string(hd
->html_data
, "</caption>");
3934 /* turn OFF the centered bit */
3935 CENTER_BIT(hd
->html_data
) = 0;
3944 * HTML Table <TR> (paragraph) table row
3947 html_tr(HANDLER_S
*hd
, int ch
, int cmd
)
3950 if(PASS_HTML(hd
->html_data
)){
3951 html_handoff(hd
, ch
);
3954 else if(cmd
== GF_RESET
){
3955 if(PASS_HTML(hd
->html_data
)){
3956 html_output_raw_tag(hd
->html_data
, "tr");
3959 /* Make sure there's at least 1 blank line */
3960 html_blank(hd
->html_data
, 0);
3962 else if(cmd
== GF_EOD
){
3963 if(PASS_HTML(hd
->html_data
)){
3964 html_output_string(hd
->html_data
, "</tr>");
3967 /* Make sure there's at least 1 blank line */
3968 html_blank(hd
->html_data
, 0);
3970 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
3975 * HTML Table <TD> (paragraph) table data
3978 html_td(HANDLER_S
*hd
, int ch
, int cmd
)
3981 if(PASS_HTML(hd
->html_data
)){
3982 html_handoff(hd
, ch
);
3985 else if(cmd
== GF_RESET
){
3986 if(PASS_HTML(hd
->html_data
)){
3987 html_output_raw_tag(hd
->html_data
, "td");
3992 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
3995 if(!strucmp(p
->attribute
, "nowrap")
3996 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
3997 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
4002 else if(cmd
== GF_EOD
){
4003 if(PASS_HTML(hd
->html_data
)){
4004 html_output_string(hd
->html_data
, "</td>");
4008 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4013 * HTML Table <TH> (paragraph) table head
4016 html_th(HANDLER_S
*hd
, int ch
, int cmd
)
4019 if(PASS_HTML(hd
->html_data
)){
4020 html_handoff(hd
, ch
);
4023 else if(cmd
== GF_RESET
){
4024 if(PASS_HTML(hd
->html_data
)){
4025 html_output_raw_tag(hd
->html_data
, "th");
4030 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4033 if(!strucmp(p
->attribute
, "nowrap")
4034 && (hd
->html_data
->f2
|| hd
->html_data
->n
)){
4035 HTML_DUMP_LIT(hd
->html_data
, " | ", 3);
4040 else if(cmd
== GF_EOD
){
4041 if(PASS_HTML(hd
->html_data
)){
4042 html_output_string(hd
->html_data
, "</th>");
4046 return(PASS_HTML(hd
->html_data
)); /* don't get linked */
4051 * HTML Table <THEAD> table head
4054 html_thead(HANDLER_S
*hd
, int ch
, int cmd
)
4056 if(PASS_HTML(hd
->html_data
)){
4058 html_handoff(hd
, ch
);
4060 else if(cmd
== GF_RESET
){
4061 html_output_raw_tag(hd
->html_data
, "thead");
4063 else if(cmd
== GF_EOD
){
4064 html_output_string(hd
->html_data
, "</thead>");
4067 return(1); /* GET linked */
4070 return(0); /* don't get linked */
4075 * HTML Table <TBODY> table body
4078 html_tbody(HANDLER_S
*hd
, int ch
, int cmd
)
4080 if(PASS_HTML(hd
->html_data
)){
4082 html_handoff(hd
, ch
);
4084 else if(cmd
== GF_RESET
){
4085 html_output_raw_tag(hd
->html_data
, "tbody");
4087 else if(cmd
== GF_EOD
){
4088 html_output_string(hd
->html_data
, "</tbody>");
4091 return(1); /* GET linked */
4094 return(0); /* don't get linked */
4099 * HTML Table <TFOOT> table body
4102 html_tfoot(HANDLER_S
*hd
, int ch
, int cmd
)
4104 if(PASS_HTML(hd
->html_data
)){
4106 html_handoff(hd
, ch
);
4108 else if(cmd
== GF_RESET
){
4109 html_output_raw_tag(hd
->html_data
, "tfoot");
4111 else if(cmd
== GF_EOD
){
4112 html_output_string(hd
->html_data
, "</tfoot>");
4115 return(1); /* GET linked */
4118 return(0); /* don't get linked */
4123 * HTML <COL> (Table Column Attributes) element handler
4126 html_col(HANDLER_S
*hd
, int ch
, int cmd
)
4128 if(cmd
== GF_RESET
){
4129 if(PASS_HTML(hd
->html_data
)){
4130 html_output_raw_tag(hd
->html_data
, "col");
4134 return(0); /* don't get linked */
4139 * HTML Table <COLGROUP> table body
4142 html_colgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4144 if(PASS_HTML(hd
->html_data
)){
4146 html_handoff(hd
, ch
);
4148 else if(cmd
== GF_RESET
){
4149 html_output_raw_tag(hd
->html_data
, "colgroup");
4151 else if(cmd
== GF_EOD
){
4152 html_output_string(hd
->html_data
, "</colgroup>");
4155 return(1); /* GET linked */
4158 return(0); /* don't get linked */
4163 * HTML <I> (italic text) element handler
4166 html_i(HANDLER_S
*hd
, int ch
, int cmd
)
4169 /* include LITERAL in spaceness test! */
4170 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4171 HTML_ITALIC(hd
->html_data
, 1);
4175 html_handoff(hd
, ch
);
4177 else if(cmd
== GF_RESET
){
4180 else if(cmd
== GF_EOD
){
4182 HTML_ITALIC(hd
->html_data
, 0);
4185 return(1); /* get linked */
4190 * HTML <EM> element handler
4193 html_em(HANDLER_S
*hd
, int ch
, int cmd
)
4196 if(!PASS_HTML(hd
->html_data
)){
4197 /* include LITERAL in spaceness test! */
4198 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4199 HTML_ITALIC(hd
->html_data
, 1);
4204 html_handoff(hd
, ch
);
4206 else if(cmd
== GF_RESET
){
4207 if(PASS_HTML(hd
->html_data
)){
4208 html_output_raw_tag(hd
->html_data
, "em");
4214 else if(cmd
== GF_EOD
){
4215 if(PASS_HTML(hd
->html_data
)){
4216 html_output_string(hd
->html_data
, "</em>");
4220 HTML_ITALIC(hd
->html_data
, 0);
4224 return(1); /* get linked */
4229 * HTML <STRONG> element handler
4232 html_strong(HANDLER_S
*hd
, int ch
, int cmd
)
4235 if(!PASS_HTML(hd
->html_data
)){
4236 /* include LITERAL in spaceness test! */
4237 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4238 HTML_ITALIC(hd
->html_data
, 1);
4243 html_handoff(hd
, ch
);
4245 else if(cmd
== GF_RESET
){
4246 if(PASS_HTML(hd
->html_data
)){
4247 html_output_raw_tag(hd
->html_data
, "strong");
4253 else if(cmd
== GF_EOD
){
4254 if(PASS_HTML(hd
->html_data
)){
4255 html_output_string(hd
->html_data
, "</strong>");
4259 HTML_ITALIC(hd
->html_data
, 0);
4263 return(1); /* get linked */
4268 * HTML <u> (Underline text) element handler
4271 html_u(HANDLER_S
*hd
, int ch
, int cmd
)
4273 if(PASS_HTML(hd
->html_data
)){
4275 html_handoff(hd
, ch
);
4277 else if(cmd
== GF_RESET
){
4278 html_output_raw_tag(hd
->html_data
, "u");
4280 else if(cmd
== GF_EOD
){
4281 html_output_string(hd
->html_data
, "</u>");
4284 return(1); /* get linked */
4287 return(0); /* do NOT get linked */
4292 * HTML <b> (Bold text) element handler
4295 html_b(HANDLER_S
*hd
, int ch
, int cmd
)
4298 if(!PASS_HTML(hd
->html_data
)){
4299 /* include LITERAL in spaceness test! */
4300 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4301 HTML_BOLD(hd
->html_data
, 1);
4306 html_handoff(hd
, ch
);
4308 else if(cmd
== GF_RESET
){
4309 if(PASS_HTML(hd
->html_data
)){
4310 html_output_raw_tag(hd
->html_data
, "b");
4316 else if(cmd
== GF_EOD
){
4317 if(PASS_HTML(hd
->html_data
)){
4318 html_output_string(hd
->html_data
, "</b>");
4322 HTML_BOLD(hd
->html_data
, 0);
4326 return(1); /* get linked */
4331 * HTML <s> (strike-through text) element handler
4334 html_s(HANDLER_S
*hd
, int ch
, int cmd
)
4337 if(!PASS_HTML(hd
->html_data
)){
4338 /* include LITERAL in spaceness test! */
4339 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4340 HTML_STRIKE(hd
->html_data
, 1);
4345 html_handoff(hd
, ch
);
4347 else if(cmd
== GF_RESET
){
4348 if(PASS_HTML(hd
->html_data
)){
4349 html_output_raw_tag(hd
->html_data
, "s");
4355 else if(cmd
== GF_EOD
){
4356 if(PASS_HTML(hd
->html_data
)){
4357 html_output_string(hd
->html_data
, "</s>");
4361 HTML_STRIKE(hd
->html_data
, 0);
4365 return(1); /* get linked */
4370 * HTML <big> (BIG text) element handler
4373 html_big(HANDLER_S
*hd
, int ch
, int cmd
)
4376 /* include LITERAL in spaceness test! */
4377 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4378 HTML_BIG(hd
->html_data
, 1);
4382 html_handoff(hd
, ch
);
4384 else if(cmd
== GF_RESET
){
4387 else if(cmd
== GF_EOD
){
4389 HTML_BIG(hd
->html_data
, 0);
4392 return(1); /* get linked */
4397 * HTML <small> (SMALL text) element handler
4400 html_small(HANDLER_S
*hd
, int ch
, int cmd
)
4403 /* include LITERAL in spaceness test! */
4404 if(hd
->x
&& !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
4405 HTML_SMALL(hd
->html_data
, 1);
4409 html_handoff(hd
, ch
);
4411 else if(cmd
== GF_RESET
){
4414 else if(cmd
== GF_EOD
){
4416 HTML_SMALL(hd
->html_data
, 0);
4419 return(1); /* get linked */
4424 * HTML <FONT> element handler
4427 html_font(HANDLER_S
*hd
, int ch
, int cmd
)
4429 if(PASS_HTML(hd
->html_data
)){
4431 html_handoff(hd
, ch
);
4433 else if(cmd
== GF_RESET
){
4434 html_output_raw_tag(hd
->html_data
, "font");
4436 else if(cmd
== GF_EOD
){
4437 html_output_string(hd
->html_data
, "</font>");
4440 return(1); /* get linked */
4448 * HTML <IMG> element handler
4451 html_img(HANDLER_S
*hd
, int ch
, int cmd
)
4454 char *alt
= NULL
, *src
= NULL
, *s
;
4456 if(cmd
== GF_RESET
){
4457 if(PASS_HTML(hd
->html_data
)){
4458 html_output_raw_tag(hd
->html_data
, "img");
4461 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4464 if(p
->value
&& p
->value
[0]){
4465 if(!strucmp(p
->attribute
, "alt"))
4467 if(!strucmp(p
->attribute
, "src"))
4472 * Multipart/Related Content ID pointer
4473 * ONLY attached messages are recognized
4474 * if we ever decide web bugs aren't a problem
4475 * anymore then we might expand the scope
4477 if(src
&& DO_HANDLES(hd
->html_data
)){
4478 if(RELATED_OK(hd
->html_data
)
4479 && struncmp(src
, "cid:", 4) == 0){
4482 HANDLE_S
*h
= new_handle(HANDLESP(hd
->html_data
));
4485 h
->h
.img
.src
= cpystr(src
+ 4);
4486 h
->h
.img
.alt
= cpystr((alt
) ? alt
: "Attached Image");
4488 HTML_TEXT(hd
->html_data
, TAG_EMBED
);
4489 HTML_TEXT(hd
->html_data
, TAG_HANDLE
);
4491 sprintf(buf
, "%d", h
->key
);
4493 HTML_TEXT(hd
->html_data
, n
);
4494 for(i
= 0; i
< n
; i
++){
4495 unsigned int uic
= buf
[i
];
4496 HTML_TEXT(hd
->html_data
, uic
);
4501 else if(struncmp(src
, "data:", 5) == 0){
4504 HANDLE_S
*h
= new_handle(HANDLESP(hd
->html_data
));
4507 h
->h
.img
.src
= cpystr(src
+ 5);
4508 h
->h
.img
.alt
= cpystr((alt
) ? alt
: "Embedded Image");
4510 HTML_TEXT(hd
->html_data
, TAG_EMBED
);
4511 HTML_TEXT(hd
->html_data
, TAG_HANDLE
);
4513 sprintf(buf
, "%d", h
->key
);
4515 HTML_TEXT(hd
->html_data
, n
);
4516 for(i
= 0; i
< n
; i
++){
4517 unsigned int uic
= buf
[i
];
4518 HTML_TEXT(hd
->html_data
, uic
);
4522 else if(alt
&& strlen(alt
) < 256){ /* arbitrary "reasonable" limit */
4523 HTML_DUMP_LIT(hd
->html_data
, alt
, strlen(alt
));
4524 HTML_TEXT(hd
->html_data
, ' ');
4528 && (s
= strrindex(src
, '/'))
4530 HTML_TEXT(hd
->html_data
, '[');
4531 HTML_DUMP_LIT(hd
->html_data
, s
, strlen(s
));
4532 HTML_TEXT(hd
->html_data
, ']');
4533 HTML_TEXT(hd
->html_data
, ' ');
4537 /* text filler of last resort */
4538 HTML_DUMP_LIT(hd
->html_data
, "[IMAGE] ", 7);
4542 return(0); /* don't get linked */
4547 * HTML <MAP> (Image Map) element handler
4550 html_map(HANDLER_S
*hd
, int ch
, int cmd
)
4552 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4554 html_handoff(hd
, ch
);
4556 else if(cmd
== GF_RESET
){
4557 html_output_raw_tag(hd
->html_data
, "map");
4559 else if(cmd
== GF_EOD
){
4560 html_output_string(hd
->html_data
, "</map>");
4571 * HTML <AREA> (Image Map Area) element handler
4574 html_area(HANDLER_S
*hd
, int ch
, int cmd
)
4576 if(PASS_HTML(hd
->html_data
) && PASS_IMAGES(hd
->html_data
)){
4578 html_handoff(hd
, ch
);
4580 else if(cmd
== GF_RESET
){
4581 html_output_raw_tag(hd
->html_data
, "area");
4583 else if(cmd
== GF_EOD
){
4584 html_output_string(hd
->html_data
, "</area>");
4595 * HTML <FORM> (Form) element handler
4598 html_form(HANDLER_S
*hd
, int ch
, int cmd
)
4600 if(PASS_HTML(hd
->html_data
)){
4602 html_handoff(hd
, ch
);
4604 else if(cmd
== GF_RESET
){
4607 /* SECURITY: make sure to redirect to new browser instance */
4608 for(pp
= &(HD(hd
->html_data
)->el_data
->attribs
);
4609 *pp
&& (*pp
)->attribute
;
4611 if(!strucmp((*pp
)->attribute
, "target")){
4613 fs_give((void **) &(*pp
)->value
);
4615 (*pp
)->value
= cpystr("_blank");
4619 *pp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4620 memset(*pp
, 0, sizeof(PARAMETER
));
4621 (*pp
)->attribute
= cpystr("target");
4622 (*pp
)->value
= cpystr("_blank");
4625 html_output_raw_tag(hd
->html_data
, "form");
4627 else if(cmd
== GF_EOD
){
4628 html_output_string(hd
->html_data
, "</form>");
4632 if(cmd
== GF_RESET
){
4633 html_blank(hd
->html_data
, 0);
4634 HTML_DUMP_LIT(hd
->html_data
, "[FORM]", 6);
4635 html_blank(hd
->html_data
, 0);
4639 return(PASS_HTML(hd
->html_data
)); /* maybe get linked */
4644 * HTML <INPUT> (Form) element handler
4647 html_input(HANDLER_S
*hd
, int ch
, int cmd
)
4649 if(PASS_HTML(hd
->html_data
)){
4650 if(cmd
== GF_RESET
){
4651 html_output_raw_tag(hd
->html_data
, "input");
4655 return(0); /* don't get linked */
4660 * HTML <BUTTON> (Form) element handler
4663 html_button(HANDLER_S
*hd
, int ch
, int cmd
)
4665 if(PASS_HTML(hd
->html_data
)){
4667 html_handoff(hd
, ch
);
4669 else if(cmd
== GF_RESET
){
4670 html_output_raw_tag(hd
->html_data
, "button");
4672 else if(cmd
== GF_EOD
){
4673 html_output_string(hd
->html_data
, "</button>");
4676 return(1); /* get linked */
4684 * HTML <OPTION> (Form) element handler
4687 html_option(HANDLER_S
*hd
, int ch
, int cmd
)
4689 if(PASS_HTML(hd
->html_data
)){
4691 html_handoff(hd
, ch
);
4693 else if(cmd
== GF_RESET
){
4694 html_output_raw_tag(hd
->html_data
, "option");
4696 else if(cmd
== GF_EOD
){
4697 html_output_string(hd
->html_data
, "</option>");
4700 return(1); /* get linked */
4708 * HTML <OPTGROUP> (Form) element handler
4711 html_optgroup(HANDLER_S
*hd
, int ch
, int cmd
)
4713 if(PASS_HTML(hd
->html_data
)){
4715 html_handoff(hd
, ch
);
4717 else if(cmd
== GF_RESET
){
4718 html_output_raw_tag(hd
->html_data
, "optgroup");
4720 else if(cmd
== GF_EOD
){
4721 html_output_string(hd
->html_data
, "</optgroup>");
4724 return(1); /* get linked */
4732 * HTML <SELECT> (Form) element handler
4735 html_select(HANDLER_S
*hd
, int ch
, int cmd
)
4737 if(PASS_HTML(hd
->html_data
)){
4739 html_handoff(hd
, ch
);
4741 else if(cmd
== GF_RESET
){
4742 html_output_raw_tag(hd
->html_data
, "select");
4744 else if(cmd
== GF_EOD
){
4745 html_output_string(hd
->html_data
, "</select>");
4748 return(1); /* get linked */
4756 * HTML <TEXTAREA> (Form) element handler
4759 html_textarea(HANDLER_S
*hd
, int ch
, int cmd
)
4761 if(PASS_HTML(hd
->html_data
)){
4763 html_handoff(hd
, ch
);
4765 else if(cmd
== GF_RESET
){
4766 html_output_raw_tag(hd
->html_data
, "textarea");
4768 else if(cmd
== GF_EOD
){
4769 html_output_string(hd
->html_data
, "</textarea>");
4772 return(1); /* get linked */
4780 * HTML <LABEL> (Form) element handler
4783 html_label(HANDLER_S
*hd
, int ch
, int cmd
)
4785 if(PASS_HTML(hd
->html_data
)){
4787 html_handoff(hd
, ch
);
4789 else if(cmd
== GF_RESET
){
4790 html_output_raw_tag(hd
->html_data
, "label");
4792 else if(cmd
== GF_EOD
){
4793 html_output_string(hd
->html_data
, "</label>");
4796 return(1); /* get linked */
4804 * HTML <FIELDSET> (Form) element handler
4807 html_fieldset(HANDLER_S
*hd
, int ch
, int cmd
)
4809 if(PASS_HTML(hd
->html_data
)){
4811 html_handoff(hd
, ch
);
4813 else if(cmd
== GF_RESET
){
4814 html_output_raw_tag(hd
->html_data
, "fieldset");
4816 else if(cmd
== GF_EOD
){
4817 html_output_string(hd
->html_data
, "</fieldset>");
4820 return(1); /* get linked */
4828 * HTML <HEAD> element handler
4831 html_head(HANDLER_S
*hd
, int ch
, int cmd
)
4834 html_handoff(hd
, ch
);
4836 else if(cmd
== GF_RESET
){
4837 HD(hd
->html_data
)->head
= 1;
4839 else if(cmd
== GF_EOD
){
4840 HD(hd
->html_data
)->head
= 0;
4843 return(1); /* get linked */
4848 * HTML <BASE> element handler
4851 html_base(HANDLER_S
*hd
, int ch
, int cmd
)
4853 if(cmd
== GF_RESET
){
4854 if(HD(hd
->html_data
)->head
&& !HTML_BASE(hd
->html_data
)){
4857 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4858 p
&& p
->attribute
&& strucmp(p
->attribute
, "HREF");
4862 if(p
&& p
->value
&& !((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
)
4863 ((HTML_OPT_S
*)(hd
->html_data
)->opt
)->base
= cpystr(p
->value
);
4867 return(0); /* DON'T get linked */
4872 * HTML <TITLE> element handler
4875 html_title(HANDLER_S
*hd
, int ch
, int cmd
)
4878 if(hd
->x
+ 1 >= hd
->y
){
4880 fs_resize((void **)&hd
->s
, (size_t)hd
->y
* sizeof(unsigned char));
4883 hd
->s
[hd
->x
++] = (unsigned char) ch
;
4885 else if(cmd
== GF_RESET
){
4888 hd
->s
= (unsigned char *)fs_get((size_t)hd
->y
* sizeof(unsigned char));
4890 else if(cmd
== GF_EOD
){
4891 /* Down the road we probably want to give these bytes to
4894 hd
->s
[hd
->x
] = '\0';
4895 fs_give((void **)&hd
->s
);
4898 return(1); /* get linked */
4903 * HTML <BODY> element handler
4906 html_body(HANDLER_S
*hd
, int ch
, int cmd
)
4909 html_handoff(hd
, ch
);
4911 else if(cmd
== GF_RESET
){
4912 if(PASS_HTML(hd
->html_data
)){
4914 char **style
= NULL
, *text
= NULL
, *bgcolor
= NULL
, *pcs
;
4916 /* modify any attributes in a useful way? */
4917 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
4921 if(!strucmp(p
->attribute
, "style"))
4923 else if(!strucmp(p
->attribute
, "text"))
4926 * bgcolor NOT passed since user setting takes precedence
4928 else if(!strucmp(p->attribute, "bgcolor"))
4933 /* colors pretty much it */
4934 if(text
|| bgcolor
){
4936 tp
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
4937 memset(tp
, 0, sizeof(PARAMETER
));
4938 tp
->next
= HD(hd
->html_data
)->el_data
->attribs
;
4939 HD(hd
->html_data
)->el_data
->attribs
= tp
;
4940 tp
->attribute
= cpystr("style");
4942 tmp_20k_buf
[0] = '\0';
4947 snprintf(tmp_20k_buf
, SIZEOF_20KBUF
, "%s", *style
);
4948 fs_give((void **) style
);
4949 pcs
= "; %s%s%s%s%s";
4952 snprintf(tmp_20k_buf
+ strlen(tmp_20k_buf
),
4953 SIZEOF_20KBUF
- strlen(tmp_20k_buf
),
4955 (text
) ? "color: " : "", (text
) ? text
: "",
4956 (text
&& bgcolor
) ? ";" : "",
4957 (bgcolor
) ? "background-color: " : "", (bgcolor
) ? bgcolor
: "");
4958 *style
= cpystr(tmp_20k_buf
);
4961 html_output_raw_tag(hd
->html_data
, "div");
4964 HD(hd
->html_data
)->body
= 1;
4966 else if(cmd
== GF_EOD
){
4967 if(PASS_HTML(hd
->html_data
)){
4968 html_output_string(hd
->html_data
, "</div>");
4971 HD(hd
->html_data
)->body
= 0;
4974 return(1); /* get linked */
4979 * HTML <A> (Anchor) element handler
4982 html_a(HANDLER_S
*hd
, int ch
, int cmd
)
4985 html_handoff(hd
, ch
);
4987 if(hd
->dp
) /* remember text within anchor tags */
4988 so_writec(ch
, (STORE_S
*) hd
->dp
);
4990 else if(cmd
== GF_RESET
){
4994 PARAMETER
*p
, *href
= NULL
, *name
= NULL
;
4997 * Pending Anchor!?!?
4998 * space insertion/line breaking that's yet to get done...
5000 if(HD(hd
->html_data
)->prefix
){
5001 dprint((2, "-- html error: nested or unterminated anchor\n"));
5006 * Look for valid Anchor data vis the filter installer's parms
5007 * (e.g., Only allow references to our internal URLs if asked)
5009 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
5012 if(!strucmp(p
->attribute
, "HREF")
5014 && (HANDLES_LOC(hd
->html_data
)
5015 || struncmp(p
->value
, "x-alpine-", 9)
5016 || struncmp(p
->value
, "x-pine-help", 11)
5017 || p
->value
[0] == '#'))
5019 else if(!strucmp(p
->attribute
, "NAME"))
5022 if(DO_HANDLES(hd
->html_data
) && (href
|| name
)){
5023 h
= new_handle(HANDLESP(hd
->html_data
));
5026 * Enhancement: we might want to get fancier and parse the
5027 * href a bit further such that we can launch images using
5028 * our image viewer, or browse local files or directories
5029 * with our internal tools. Of course, having the jump-off
5030 * point into text/html always be the defined "web-browser",
5031 * just might be the least confusing UI-wise...
5035 if(name
&& name
->value
)
5036 h
->h
.url
.name
= cpystr(name
->value
);
5039 * Prepare to build embedded prefix...
5041 HD(hd
->html_data
)->prefix
= (int *) fs_get(64 * sizeof(int));
5045 * Is this something that looks like a URL? If not and
5046 * we were giving some "base" string, proceed ala RFC1808...
5050 removing_leading_and_trailing_white_space(href
->value
);
5051 if(HTML_BASE(hd
->html_data
) && !rfc1738_scan(href
->value
, &n
)){
5052 html_a_relative(HTML_BASE(hd
->html_data
), href
->value
, h
);
5054 else if(!(NO_RELATIVE(hd
->html_data
) && html_href_relative(href
->value
)))
5055 h
->h
.url
.path
= cpystr(href
->value
);
5057 if(pico_usingcolor()){
5058 char *fg
= NULL
, *bg
= NULL
, *q
;
5060 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5061 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5062 ps_global
->VAR_NORM_FORE_COLOR
))
5063 fg
= ps_global
->VAR_SLCTBL_FORE_COLOR
;
5065 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5066 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5067 ps_global
->VAR_NORM_BACK_COLOR
))
5068 bg
= ps_global
->VAR_SLCTBL_BACK_COLOR
;
5074 * The blacks are just known good colors for testing
5075 * whether the other color is good.
5077 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5078 bg
? bg
: colorx(COL_BLACK
));
5079 if(pico_is_good_colorpair(tmp
)){
5080 q
= color_embed(fg
, bg
);
5082 for(i
= 0; q
[i
]; i
++)
5083 HD(hd
->html_data
)->prefix
[x
++] = q
[i
];
5087 free_color_pair(&tmp
);
5090 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5091 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5094 HD(hd
->html_data
)->prefix
[x
++] = HTML_DOBOLD
;
5097 HD(hd
->html_data
)->prefix
[x
++] = TAG_EMBED
;
5098 HD(hd
->html_data
)->prefix
[x
++] = TAG_HANDLE
;
5100 snprintf(buf
, sizeof(buf
), "%ld", hd
->x
= h
->key
);
5101 HD(hd
->html_data
)->prefix
[x
++] = n
= strlen(buf
);
5102 for(i
= 0; i
< n
; i
++)
5103 HD(hd
->html_data
)->prefix
[x
++] = buf
[i
];
5105 HD(hd
->html_data
)->prefix_used
= x
;
5107 hd
->dp
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
5110 else if(cmd
== GF_EOD
){
5114 return(1); /* get linked */
5119 html_a_prefix(FILTER_S
*f
)
5123 /* Do this so we don't visit from html_output... */
5124 prefix
= HD(f
)->prefix
;
5125 HD(f
)->prefix
= NULL
;
5127 for(n
= 0; n
< HD(f
)->prefix_used
; n
++)
5128 html_a_output_prefix(f
, prefix
[n
]);
5130 fs_give((void **) &prefix
);
5135 * html_a_finish - house keeping associated with end of link tag
5138 html_a_finish(HANDLER_S
*hd
)
5140 if(DO_HANDLES(hd
->html_data
)){
5141 if(HD(hd
->html_data
)->prefix
){
5142 if(!PASS_HTML(hd
->html_data
)){
5143 char *empty_link
= "[LINK]";
5146 html_a_prefix(hd
->html_data
);
5147 for(i
= 0; empty_link
[i
]; i
++)
5148 html_output(hd
->html_data
, empty_link
[i
]);
5152 if(pico_usingcolor()){
5153 char *fg
= NULL
, *bg
= NULL
, *p
;
5156 if(ps_global
->VAR_SLCTBL_FORE_COLOR
5157 && colorcmp(ps_global
->VAR_SLCTBL_FORE_COLOR
,
5158 ps_global
->VAR_NORM_FORE_COLOR
))
5159 fg
= ps_global
->VAR_NORM_FORE_COLOR
;
5161 if(ps_global
->VAR_SLCTBL_BACK_COLOR
5162 && colorcmp(ps_global
->VAR_SLCTBL_BACK_COLOR
,
5163 ps_global
->VAR_NORM_BACK_COLOR
))
5164 bg
= ps_global
->VAR_NORM_BACK_COLOR
;
5166 if(F_OFF(F_SLCTBL_ITEM_NOBOLD
, ps_global
))
5167 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5173 * The blacks are just known good colors for testing
5174 * whether the other color is good.
5176 tmp
= new_color_pair(fg
? fg
: colorx(COL_BLACK
),
5177 bg
? bg
: colorx(COL_BLACK
));
5178 if(pico_is_good_colorpair(tmp
)){
5179 p
= color_embed(fg
, bg
);
5181 for(i
= 0; p
[i
]; i
++)
5182 html_output(hd
->html_data
, p
[i
]);
5186 free_color_pair(&tmp
);
5190 HTML_BOLD(hd
->html_data
, 0); /* turn OFF bold */
5192 html_output(hd
->html_data
, TAG_EMBED
);
5193 html_output(hd
->html_data
, TAG_HANDLEOFF
);
5195 html_a_output_info(hd
);
5201 * html_output_a_prefix - dump Anchor prefix data
5204 html_a_output_prefix(FILTER_S
*f
, int c
)
5220 * html_a_output_info - dump possibly deceptive link info into text.
5221 * phark the phishers.
5224 html_a_output_info(HANDLER_S
*hd
)
5226 int l
, risky
= 0, hl
= 0, tl
;
5227 char *url
= NULL
, *hn
= NULL
, *txt
;
5230 /* find host anchor references */
5231 if((h
= get_handle(*HANDLESP(hd
->html_data
), (int) hd
->x
)) != NULL
5232 && h
->h
.url
.path
!= NULL
5233 && (hn
= rfc1738_scan(rfc1738_str(url
= cpystr(h
->h
.url
.path
)), &l
)) != NULL
5234 && (hn
= srchstr(hn
,"://")) != NULL
){
5236 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++)
5242 * look over anchor's text to see if there's a
5243 * mismatch between href target and url-ish
5244 * looking text. throw a red flag if so.
5245 * similarly, toss one if the target's referenced
5249 so_writec('\0', (STORE_S
*) hd
->dp
);
5251 if((txt
= (char *) so_text((STORE_S
*) hd
->dp
)) != NULL
5252 && (txt
= rfc1738_scan(txt
, &tl
)) != NULL
5253 && (txt
= srchstr(txt
,"://")) != NULL
){
5255 for(txt
+= 3, tl
= 0; txt
[tl
] && txt
[tl
] != '/' && txt
[tl
] != '?'; tl
++)
5261 /* look for non matching text */
5262 for(l
= 0; l
< tl
&& l
< hl
; l
++)
5263 if(tolower((unsigned char) txt
[l
]) != tolower((unsigned char) hn
[l
])){
5269 so_give((STORE_S
**) &hd
->dp
);
5272 /* look for literal IP, anything possibly encoded or auth specifier */
5276 for(l
= 0; l
< hl
; l
++){
5277 if(hn
[l
] == '@' || hn
[l
] == '%'){
5281 else if(!(hn
[l
] == '.' || isdigit((unsigned char) hn
[l
])))
5289 /* Insert text of link's domain */
5290 if(SHOWSERVER(hd
->html_data
)){
5292 COLOR_PAIR
*col
= NULL
, *colnorm
= NULL
;
5294 html_output(hd
->html_data
, ' ');
5295 html_output(hd
->html_data
, '[');
5297 if(pico_usingcolor()
5298 && ps_global
->VAR_METAMSG_FORE_COLOR
5299 && ps_global
->VAR_METAMSG_BACK_COLOR
5300 && (col
= new_color_pair(ps_global
->VAR_METAMSG_FORE_COLOR
,
5301 ps_global
->VAR_METAMSG_BACK_COLOR
))){
5302 if(!pico_is_good_colorpair(col
))
5303 free_color_pair(&col
);
5306 q
= color_embed(col
->fg
, col
->bg
);
5308 for(l
= 0; q
[l
]; l
++)
5309 html_output(hd
->html_data
, q
[l
]);
5313 for(l
= 0; l
< hl
; l
++)
5314 html_output(hd
->html_data
, hn
[l
]);
5317 if(ps_global
->VAR_NORM_FORE_COLOR
5318 && ps_global
->VAR_NORM_BACK_COLOR
5319 && (colnorm
= new_color_pair(ps_global
->VAR_NORM_FORE_COLOR
,
5320 ps_global
->VAR_NORM_BACK_COLOR
))){
5321 if(!pico_is_good_colorpair(colnorm
))
5322 free_color_pair(&colnorm
);
5325 q
= color_embed(colnorm
->fg
, colnorm
->bg
);
5326 free_color_pair(&colnorm
);
5328 for(l
= 0; q
[l
]; l
++)
5329 html_output(hd
->html_data
, q
[l
]);
5333 free_color_pair(&col
);
5336 html_output(hd
->html_data
, ']');
5341 * if things look OK so far, make sure nothing within
5342 * the url looks too fishy...
5345 && (hn
= rfc1738_scan(hn
, &l
)) != NULL
5346 && (hn
= srchstr(hn
,"://")) != NULL
){
5349 for(hn
+= 3, hl
= 0; hn
[hl
] && hn
[hl
] != '/' && hn
[hl
] != '?'; hl
++){
5351 * auth spec, encoded characters, or possibly non-standard port
5352 * should raise a red flag
5354 if(hn
[hl
] == '@' || hn
[hl
] == '%' || hn
[hl
] == ':'){
5358 else if(!(hn
[hl
] == '.' || isdigit((unsigned char) hn
[hl
])))
5362 /* dotted-dec/raw-int address should cause suspicion as well */
5367 if(risky
&& ((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)
5368 (*((HTML_OPT_S
*) hd
->html_data
->opt
)->warnrisk_f
)();
5371 so_give((STORE_S
**) &hd
->dp
);
5374 fs_give((void **) &url
);
5380 * relative_url - put full url path in h based on base and relative url
5383 html_a_relative(char *base_url
, char *rel_url
, HANDLE_S
*h
)
5386 char tmp
[MAILTMPLEN
], *p
, *q
;
5387 char *scheme
= NULL
, *net
= NULL
, *path
= NULL
,
5388 *parms
= NULL
, *query
= NULL
, *frag
= NULL
,
5389 *base_scheme
= NULL
, *base_net_loc
= NULL
,
5390 *base_path
= NULL
, *base_parms
= NULL
,
5391 *base_query
= NULL
, *base_frag
= NULL
,
5392 *rel_scheme
= NULL
, *rel_net_loc
= NULL
,
5393 *rel_path
= NULL
, *rel_parms
= NULL
,
5394 *rel_query
= NULL
, *rel_frag
= NULL
;
5396 /* Rough parse of base URL */
5397 rfc1808_tokens(base_url
, &base_scheme
, &base_net_loc
, &base_path
,
5398 &base_parms
, &base_query
, &base_frag
);
5400 /* Rough parse of this URL */
5401 rfc1808_tokens(rel_url
, &rel_scheme
, &rel_net_loc
, &rel_path
,
5402 &rel_parms
, &rel_query
, &rel_frag
);
5404 scheme
= rel_scheme
; /* defaults */
5410 if(!scheme
&& base_scheme
){
5411 scheme
= base_scheme
;
5417 for(p
= q
= base_path
; /* Drop base path's tail */
5418 (p
= strchr(p
, '/'));
5422 len
= q
- base_path
;
5427 if(len
+ strlen(rel_path
) < sizeof(tmp
)-1){
5429 snprintf(path
= tmp
, sizeof(tmp
), "%.*s", (int) len
, base_path
);
5431 strncpy(tmp
+ len
, rel_path
, sizeof(tmp
)-len
);
5432 tmp
[sizeof(tmp
)-1] = '\0';
5434 /* Follow RFC 1808 "Step 6" */
5435 for(p
= tmp
; (p
= strchr(p
, '.')); )
5438 * a) All occurrences of "./", where "." is a
5439 * complete path segment, are removed.
5443 for(q
= p
; (*q
= *(q
+2)) != '\0'; q
++)
5451 * b) If the path ends with "." as a
5452 * complete path segment, that "." is
5456 if(p
== tmp
|| *(p
-1) == '/')
5464 * c) All occurrences of "<segment>/../",
5465 * where <segment> is a complete path
5466 * segment not equal to "..", are removed.
5467 * Removal of these path segments is
5468 * performed iteratively, removing the
5469 * leftmost matching pattern on each
5470 * iteration, until no matching pattern
5473 * d) If the path ends with "<segment>/..",
5474 * where <segment> is a complete path
5475 * segment not equal to "..", that
5476 * "<segment>/.." is removed.
5480 for(q
= p
- 2; q
> tmp
&& *q
!= '/'; q
--)
5486 if(q
+ 1 == p
/* no "//.." */
5487 || (*q
== '.' /* and "../.." */
5498 for(; (*q
= *(q
+len
)) != '\0'; q
++)
5523 path
= ""; /* lame. */
5537 len
= (scheme
? strlen(scheme
) : 0) + (net
? strlen(net
) : 0)
5538 + (path
? strlen(path
) : 0) + (parms
? strlen(parms
) : 0)
5539 + (query
? strlen(query
) : 0) + (frag
? strlen(frag
) : 0) + 8;
5541 h
->h
.url
.path
= (char *) fs_get(len
* sizeof(char));
5542 snprintf(h
->h
.url
.path
, len
, "%s%s%s%s%s%s%s%s%s%s%s%s",
5543 scheme
? scheme
: "", scheme
? ":" : "",
5544 net
? "//" : "", net
? net
: "",
5545 (path
&& *path
== '/') ? "" : ((path
&& net
) ? "/" : ""),
5547 parms
? ";" : "", parms
? parms
: "",
5548 query
? "?" : "", query
? query
: "",
5549 frag
? "#" : "", frag
? frag
: "");
5552 fs_give((void **) &base_scheme
);
5555 fs_give((void **) &base_net_loc
);
5558 fs_give((void **) &base_path
);
5561 fs_give((void **) &base_parms
);
5564 fs_give((void **) &base_query
);
5567 fs_give((void **) &base_frag
);
5570 fs_give((void **) &rel_scheme
);
5573 fs_give((void **) &rel_net_loc
);
5576 fs_give((void **) &rel_parms
);
5579 fs_give((void **) &rel_query
);
5582 fs_give((void **) &rel_frag
);
5585 fs_give((void **) &rel_path
);
5590 * html_href_relative - href
5593 html_href_relative(char *url
)
5598 for(i
= 0; i
< 32 && url
[i
]; i
++)
5599 if(!(isalpha((unsigned char) url
[i
]) || url
[i
] == '_' || url
[i
] == '-')){
5611 * HTML <UL> (Unordered List) element handler
5614 html_ul(HANDLER_S
*hd
, int ch
, int cmd
)
5617 html_handoff(hd
, ch
);
5619 else if(cmd
== GF_RESET
){
5620 if(PASS_HTML(hd
->html_data
)){
5621 html_output_raw_tag(hd
->html_data
, "ul");
5624 HD(hd
->html_data
)->li_pending
= 1;
5625 html_blank(hd
->html_data
, 0);
5628 else if(cmd
== GF_EOD
){
5629 if(PASS_HTML(hd
->html_data
)){
5630 html_output_string(hd
->html_data
, "</ul>");
5633 html_blank(hd
->html_data
, 0);
5635 if(!HD(hd
->html_data
)->li_pending
)
5636 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5638 HD(hd
->html_data
)->li_pending
= 0;
5642 return(1); /* get linked */
5647 * HTML <OL> (Ordered List) element handler
5650 html_ol(HANDLER_S
*hd
, int ch
, int cmd
)
5653 html_handoff(hd
, ch
);
5655 else if(cmd
== GF_RESET
){
5656 if(PASS_HTML(hd
->html_data
)){
5657 html_output_raw_tag(hd
->html_data
, "ol");
5662 * Signal that we're expecting to see <LI> as our next element
5663 * and set the the initial ordered count.
5665 hd
->x
= 1L; /* set default */
5666 hd
->y
= LIST_DECIMAL
; /* set default */
5667 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
5671 if(!strucmp(p
->attribute
, "TYPE")){
5672 if(!strucmp(p
->value
, "a")) /* alpha, lowercase */
5673 hd
->y
= LIST_ALPHALO
;
5674 else if(!strucmp(p
->value
, "A")) /* alpha, uppercase */
5675 hd
->y
= LIST_ALPHAUP
;
5676 else if(!strucmp(p
->value
, "i")) /* roman, lowercase */
5677 hd
->y
= LIST_ROMANLO
;
5678 else if(!strucmp(p
->value
, "I")) /* roman, uppercase */
5679 hd
->y
= LIST_ROMANUP
;
5680 else if(strucmp(p
->value
, "1")) /* decimal, the default */
5681 hd
->y
= LIST_UNKNOWN
;
5683 else if(!strucmp(p
->attribute
, "START"))
5684 hd
->x
= atol(p
->value
);
5685 // else ADD SUPPORT FOR OTHER ATTRIBUTES... LATER
5686 // this is not so simple. The main missing support
5687 // is for the STYLE attribute, but implementing that
5688 // correctly will take time, so will be implemented
5689 // after version 2.21 is released.
5691 HD(hd
->html_data
)->li_pending
= 1;
5692 html_blank(hd
->html_data
, 0);
5695 else if(cmd
== GF_EOD
){
5696 if(PASS_HTML(hd
->html_data
)){
5697 html_output_string(hd
->html_data
, "</ol>");
5700 html_blank(hd
->html_data
, 0);
5702 if(!HD(hd
->html_data
)->li_pending
)
5703 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5705 HD(hd
->html_data
)->li_pending
= 0;
5709 return(1); /* get linked */
5714 * HTML <MENU> (Menu List) element handler
5717 html_menu(HANDLER_S
*hd
, int ch
, int cmd
)
5720 html_handoff(hd
, ch
);
5722 else if(cmd
== GF_RESET
){
5723 if(PASS_HTML(hd
->html_data
)){
5724 html_output_raw_tag(hd
->html_data
, "menu");
5727 HD(hd
->html_data
)->li_pending
= 1;
5730 else if(cmd
== GF_EOD
){
5731 if(PASS_HTML(hd
->html_data
)){
5732 html_output_string(hd
->html_data
, "</menu>");
5735 html_blank(hd
->html_data
, 0);
5737 if(!HD(hd
->html_data
)->li_pending
)
5738 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5740 HD(hd
->html_data
)->li_pending
= 0;
5744 return(1); /* get linked */
5749 * HTML <DIR> (Directory List) element handler
5752 html_dir(HANDLER_S
*hd
, int ch
, int cmd
)
5755 html_handoff(hd
, ch
);
5757 else if(cmd
== GF_RESET
){
5758 if(PASS_HTML(hd
->html_data
)){
5759 html_output_raw_tag(hd
->html_data
, "dir");
5762 HD(hd
->html_data
)->li_pending
= 1;
5765 else if(cmd
== GF_EOD
){
5766 if(PASS_HTML(hd
->html_data
)){
5767 html_output_string(hd
->html_data
, "</dir>");
5770 html_blank(hd
->html_data
, 0);
5772 if(!HD(hd
->html_data
)->li_pending
)
5773 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5775 HD(hd
->html_data
)->li_pending
= 0;
5779 return(1); /* get linked */
5784 * HTML <LI> (List Item) element handler
5787 html_li(HANDLER_S
*hd
, int ch
, int cmd
)
5790 if(PASS_HTML(hd
->html_data
)){
5791 html_handoff(hd
, ch
);
5794 else if(cmd
== GF_RESET
){
5795 HANDLER_S
*p
, *found
= NULL
;
5798 * There better be a an unordered list, ordered list,
5799 * Menu or Directory handler installed
5800 * or else we crap out...
5802 for(p
= HANDLERS(hd
->html_data
); p
; p
= p
->below
)
5803 if(EL(p
)->handler
== html_ul
5804 || EL(p
)->handler
== html_ol
5805 || EL(p
)->handler
== html_menu
5806 || EL(p
)->handler
== html_dir
){
5812 if(PASS_HTML(hd
->html_data
)){
5815 char buf
[20], tmp
[16], *p
;
5818 /* Start a new line */
5819 html_blank(hd
->html_data
, 0);
5821 /* adjust indent level if needed */
5822 if(HD(hd
->html_data
)->li_pending
){
5823 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5824 HD(hd
->html_data
)->li_pending
= 0;
5827 if(EL(found
)->handler
== html_ul
){
5828 int l
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5830 strncpy(buf
, " ", sizeof(buf
));
5831 buf
[1] = (l
< 5) ? '*' : (l
< 9) ? '+' : (l
< 17) ? 'o' : '#';
5833 else if(EL(found
)->handler
== html_ol
){
5834 if(found
->y
== LIST_DECIMAL
|| found
->y
== LIST_UNKNOWN
)
5835 snprintf(tmp
, sizeof(tmp
), "%ld", found
->x
++);
5836 else if(found
->y
== LIST_ALPHALO
)
5837 convert_decimal_to_alpha(tmp
, sizeof(tmp
), found
->x
++, 'a');
5838 else if(found
->y
== LIST_ALPHAUP
)
5839 convert_decimal_to_alpha(tmp
, sizeof(tmp
), found
->x
++, 'A');
5840 else if(found
->y
== LIST_ROMANLO
)
5841 convert_decimal_to_roman(tmp
, sizeof(tmp
), found
->x
++, 'i');
5842 else if(found
->y
== LIST_ROMANUP
)
5843 convert_decimal_to_roman(tmp
, sizeof(tmp
), found
->x
++, 'I');
5844 snprintf(buf
, sizeof(buf
), " %s.", tmp
);
5845 buf
[sizeof(buf
)-1] = '\0';
5847 else if(EL(found
)->handler
== html_menu
){
5848 strncpy(buf
, " ->", sizeof(buf
));
5849 buf
[sizeof(buf
)-1] = '\0';
5852 html_indent(hd
->html_data
, -4, HTML_ID_INC
);
5854 /* So we don't munge whitespace */
5855 wrapstate
= HD(hd
->html_data
)->wrapstate
;
5856 HD(hd
->html_data
)->wrapstate
= 0;
5858 html_write_indent(hd
->html_data
, HD(hd
->html_data
)->indent_level
);
5859 for(p
= buf
; *p
; p
++)
5860 html_output(hd
->html_data
, (int) *p
);
5861 HD(hd
->html_data
)->wrapstate
= wrapstate
;
5862 html_indent(hd
->html_data
, 4, HTML_ID_INC
);
5864 /* else BUG: should really bitch about this */
5867 if(PASS_HTML(hd
->html_data
)){
5868 html_output_raw_tag(hd
->html_data
, "li");
5869 return(1); /* get linked */
5872 else if(cmd
== GF_EOD
){
5873 if(PASS_HTML(hd
->html_data
)){
5874 html_output_string(hd
->html_data
, "</li>");
5878 return(PASS_HTML(hd
->html_data
)); /* DON'T get linked */
5883 * HTML <DL> (Definition List) element handler
5886 html_dl(HANDLER_S
*hd
, int ch
, int cmd
)
5889 html_handoff(hd
, ch
);
5891 else if(cmd
== GF_RESET
){
5892 if(PASS_HTML(hd
->html_data
)){
5893 html_output_raw_tag(hd
->html_data
, "dl");
5897 * Set indention level for definition terms and definitions...
5899 hd
->x
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
5904 else if(cmd
== GF_EOD
){
5905 if(PASS_HTML(hd
->html_data
)){
5906 html_output_string(hd
->html_data
, "</dl>");
5909 html_indent(hd
->html_data
, (int) hd
->x
, HTML_ID_SET
);
5910 html_blank(hd
->html_data
, 1);
5914 return(1); /* get linked */
5919 * HTML <DT> (Definition Term) element handler
5922 html_dt(HANDLER_S
*hd
, int ch
, int cmd
)
5924 if(PASS_HTML(hd
->html_data
)){
5926 html_handoff(hd
, ch
);
5928 else if(cmd
== GF_RESET
){
5929 html_output_raw_tag(hd
->html_data
, "dt");
5931 else if(cmd
== GF_EOD
){
5932 html_output_string(hd
->html_data
, "</dt>");
5935 return(1); /* get linked */
5938 if(cmd
== GF_RESET
){
5942 * There better be a Definition Handler installed
5943 * or else we crap out...
5945 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5948 if(p
){ /* adjust indent level if needed */
5949 html_indent(hd
->html_data
, (int) p
->y
, HTML_ID_SET
);
5950 html_blank(hd
->html_data
, 1);
5952 /* BUG: else should really bitch about this */
5955 return(0); /* DON'T get linked */
5960 * HTML <DD> (Definition Definition) element handler
5963 html_dd(HANDLER_S
*hd
, int ch
, int cmd
)
5965 if(PASS_HTML(hd
->html_data
)){
5967 html_handoff(hd
, ch
);
5969 else if(cmd
== GF_RESET
){
5970 html_output_raw_tag(hd
->html_data
, "dd");
5972 else if(cmd
== GF_EOD
){
5973 html_output_string(hd
->html_data
, "</dd>");
5976 return(1); /* get linked */
5979 if(cmd
== GF_RESET
){
5983 * There better be a Definition Handler installed
5984 * or else we crap out...
5986 for(p
= HANDLERS(hd
->html_data
); p
&& EL(p
)->handler
!= html_dl
; p
= p
->below
)
5989 if(p
){ /* adjust indent level if needed */
5990 html_indent(hd
->html_data
, (int) p
->z
, HTML_ID_SET
);
5991 html_blank(hd
->html_data
, 0);
5993 /* BUG: should really bitch about this */
5996 return(0); /* DON'T get linked */
6001 * HTML <H1> (Headings 1) element handler.
6003 * Bold, very-large font, CENTERED. One or two blank lines
6004 * above and below. For our silly character cell's that
6005 * means centered and ALL CAPS...
6008 html_h1(HANDLER_S
*hd
, int ch
, int cmd
)
6011 html_handoff(hd
, ch
);
6013 else if(cmd
== GF_RESET
){
6014 if(PASS_HTML(hd
->html_data
)){
6015 html_output_raw_tag(hd
->html_data
, "h1");
6018 /* turn ON the centered bit */
6019 CENTER_BIT(hd
->html_data
) = 1;
6022 else if(cmd
== GF_EOD
){
6023 if(PASS_HTML(hd
->html_data
)){
6024 html_output_string(hd
->html_data
, "</h1>");
6027 /* turn OFF the centered bit, add blank line */
6028 CENTER_BIT(hd
->html_data
) = 0;
6029 html_blank(hd
->html_data
, 1);
6033 return(1); /* get linked */
6038 * HTML <H2> (Headings 2) element handler
6041 html_h2(HANDLER_S
*hd
, int ch
, int cmd
)
6044 if(PASS_HTML(hd
->html_data
)){
6045 html_handoff(hd
, ch
);
6048 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
6049 HTML_ULINE(hd
->html_data
, 1);
6050 hd
->x
^= HTML_HX_ULINE
; /* only once! */
6053 html_handoff(hd
, (ch
< 128 && islower((unsigned char) ch
))
6054 ? toupper((unsigned char) ch
) : ch
);
6057 else if(cmd
== GF_RESET
){
6058 if(PASS_HTML(hd
->html_data
)){
6059 html_output_raw_tag(hd
->html_data
, "h2");
6063 * Bold, large font, flush-left. One or two blank lines
6066 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
6067 hd
->x
= HTML_HX_CENTER
;
6071 hd
->x
|= HTML_HX_ULINE
;
6073 CENTER_BIT(hd
->html_data
) = 0;
6074 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6075 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6076 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6077 html_blank(hd
->html_data
, 1);
6080 else if(cmd
== GF_EOD
){
6081 if(PASS_HTML(hd
->html_data
)){
6082 html_output_string(hd
->html_data
, "</h2>");
6086 * restore previous centering, and indent level
6088 if(!(hd
->x
& HTML_HX_ULINE
))
6089 HTML_ULINE(hd
->html_data
, 0);
6091 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6092 html_blank(hd
->html_data
, 1);
6093 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6094 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6098 return(1); /* get linked */
6103 * HTML <H3> (Headings 3) element handler
6106 html_h3(HANDLER_S
*hd
, int ch
, int cmd
)
6109 if(!PASS_HTML(hd
->html_data
)){
6110 if((hd
->x
& HTML_HX_ULINE
) && !ASCII_ISSPACE((unsigned char) (ch
& 0xff))){
6111 HTML_ULINE(hd
->html_data
, 1);
6112 hd
->x
^= HTML_HX_ULINE
; /* only once! */
6116 html_handoff(hd
, ch
);
6118 else if(cmd
== GF_RESET
){
6119 if(PASS_HTML(hd
->html_data
)){
6120 html_output_raw_tag(hd
->html_data
, "h3");
6124 * Italic, large font, slightly indented from the left
6125 * margin. One or two blank lines above and below.
6127 if(CENTER_BIT(hd
->html_data
)) /* stop centering for now */
6128 hd
->x
= HTML_HX_CENTER
;
6132 hd
->x
|= HTML_HX_ULINE
;
6133 CENTER_BIT(hd
->html_data
) = 0;
6134 hd
->y
= html_indent(hd
->html_data
, 2, HTML_ID_SET
);
6135 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6136 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6137 html_blank(hd
->html_data
, 1);
6140 else if(cmd
== GF_EOD
){
6141 if(PASS_HTML(hd
->html_data
)){
6142 html_output_string(hd
->html_data
, "</h3>");
6146 * restore previous centering, and indent level
6148 if(!(hd
->x
& HTML_HX_ULINE
))
6149 HTML_ULINE(hd
->html_data
, 0);
6151 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6152 html_blank(hd
->html_data
, 1);
6153 CENTER_BIT(hd
->html_data
) = (hd
->x
& HTML_HX_CENTER
) != 0;
6154 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6158 return(1); /* get linked */
6163 * HTML <H4> (Headings 4) element handler
6166 html_h4(HANDLER_S
*hd
, int ch
, int cmd
)
6169 html_handoff(hd
, ch
);
6171 else if(cmd
== GF_RESET
){
6172 if(PASS_HTML(hd
->html_data
)){
6173 html_output_raw_tag(hd
->html_data
, "h4");
6177 * Bold, normal font, indented more than H3. One blank line
6180 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6181 CENTER_BIT(hd
->html_data
) = 0;
6182 hd
->y
= html_indent(hd
->html_data
, 4, HTML_ID_SET
);
6183 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6184 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6185 html_blank(hd
->html_data
, 1);
6188 else if(cmd
== GF_EOD
){
6189 if(PASS_HTML(hd
->html_data
)){
6190 html_output_string(hd
->html_data
, "</h4>");
6194 * restore previous centering, and indent level
6196 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6197 html_blank(hd
->html_data
, 1);
6198 CENTER_BIT(hd
->html_data
) = hd
->x
;
6199 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6203 return(1); /* get linked */
6208 * HTML <H5> (Headings 5) element handler
6211 html_h5(HANDLER_S
*hd
, int ch
, int cmd
)
6214 html_handoff(hd
, ch
);
6216 else if(cmd
== GF_RESET
){
6217 if(PASS_HTML(hd
->html_data
)){
6218 html_output_raw_tag(hd
->html_data
, "h5");
6222 * Italic, normal font, indented as H4. One blank line
6225 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6226 CENTER_BIT(hd
->html_data
) = 0;
6227 hd
->y
= html_indent(hd
->html_data
, 6, HTML_ID_SET
);
6228 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6229 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6230 html_blank(hd
->html_data
, 1);
6233 else if(cmd
== GF_EOD
){
6234 if(PASS_HTML(hd
->html_data
)){
6235 html_output_string(hd
->html_data
, "</h5>");
6239 * restore previous centering, and indent level
6241 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6242 html_blank(hd
->html_data
, 1);
6243 CENTER_BIT(hd
->html_data
) = hd
->x
;
6244 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6248 return(1); /* get linked */
6253 * HTML <H6> (Headings 6) element handler
6256 html_h6(HANDLER_S
*hd
, int ch
, int cmd
)
6259 html_handoff(hd
, ch
);
6261 else if(cmd
== GF_RESET
){
6262 if(PASS_HTML(hd
->html_data
)){
6263 html_output_raw_tag(hd
->html_data
, "h6");
6267 * Bold, indented same as normal text, more than H5. One
6270 hd
->x
= CENTER_BIT(hd
->html_data
); /* stop centering for now */
6271 CENTER_BIT(hd
->html_data
) = 0;
6272 hd
->y
= html_indent(hd
->html_data
, 8, HTML_ID_SET
);
6273 hd
->z
= HD(hd
->html_data
)->wrapcol
;
6274 HD(hd
->html_data
)->wrapcol
= WRAP_COLS(hd
->html_data
) - 8;
6275 html_blank(hd
->html_data
, 1);
6278 else if(cmd
== GF_EOD
){
6279 if(PASS_HTML(hd
->html_data
)){
6280 html_output_string(hd
->html_data
, "</h6>");
6284 * restore previous centering, and indent level
6286 html_indent(hd
->html_data
, (int) hd
->y
, HTML_ID_SET
);
6287 html_blank(hd
->html_data
, 1);
6288 CENTER_BIT(hd
->html_data
) = hd
->x
;
6289 HD(hd
->html_data
)->wrapcol
= hd
->z
;
6293 return(1); /* get linked */
6298 * HTML <BlockQuote> element handler
6301 html_blockquote(HANDLER_S
*hd
, int ch
, int cmd
)
6304 #define HTML_BQ_INDENT 6
6307 html_handoff(hd
, ch
);
6309 else if(cmd
== GF_RESET
){
6310 if(PASS_HTML(hd
->html_data
)){
6311 html_output_raw_tag(hd
->html_data
, "blockquote");
6315 * A typical rendering might be a slight extra left and
6316 * right indent, and/or italic font. The Blockquote element
6317 * causes a paragraph break, and typically provides space
6318 * above and below the quote.
6320 html_indent(hd
->html_data
, HTML_BQ_INDENT
, HTML_ID_INC
);
6321 j
= HD(hd
->html_data
)->wrapstate
;
6322 HD(hd
->html_data
)->wrapstate
= 0;
6323 html_blank(hd
->html_data
, 1);
6324 HD(hd
->html_data
)->wrapstate
= j
;
6325 HD(hd
->html_data
)->wrapcol
-= HTML_BQ_INDENT
;
6328 else if(cmd
== GF_EOD
){
6329 if(PASS_HTML(hd
->html_data
)){
6330 html_output_string(hd
->html_data
, "</blockquote>");
6333 html_blank(hd
->html_data
, 1);
6335 j
= HD(hd
->html_data
)->wrapstate
;
6336 HD(hd
->html_data
)->wrapstate
= 0;
6337 html_indent(hd
->html_data
, -(HTML_BQ_INDENT
), HTML_ID_INC
);
6338 HD(hd
->html_data
)->wrapstate
= j
;
6339 HD(hd
->html_data
)->wrapcol
+= HTML_BQ_INDENT
;
6343 return(1); /* get linked */
6348 * HTML <Address> element handler
6351 html_address(HANDLER_S
*hd
, int ch
, int cmd
)
6354 #define HTML_ADD_INDENT 2
6357 html_handoff(hd
, ch
);
6359 else if(cmd
== GF_RESET
){
6360 if(PASS_HTML(hd
->html_data
)){
6361 html_output_raw_tag(hd
->html_data
, "address");
6365 * A typical rendering might be a slight extra left and
6366 * right indent, and/or italic font. The Blockquote element
6367 * causes a paragraph break, and typically provides space
6368 * above and below the quote.
6370 html_indent(hd
->html_data
, HTML_ADD_INDENT
, HTML_ID_INC
);
6371 j
= HD(hd
->html_data
)->wrapstate
;
6372 HD(hd
->html_data
)->wrapstate
= 0;
6373 html_blank(hd
->html_data
, 1);
6374 HD(hd
->html_data
)->wrapstate
= j
;
6377 else if(cmd
== GF_EOD
){
6378 if(PASS_HTML(hd
->html_data
)){
6379 html_output_string(hd
->html_data
, "</address>");
6382 html_blank(hd
->html_data
, 1);
6384 j
= HD(hd
->html_data
)->wrapstate
;
6385 HD(hd
->html_data
)->wrapstate
= 0;
6386 html_indent(hd
->html_data
, -(HTML_ADD_INDENT
), HTML_ID_INC
);
6387 HD(hd
->html_data
)->wrapstate
= j
;
6391 return(1); /* get linked */
6396 * HTML <PRE> (Preformatted Text) element handler
6399 html_pre(HANDLER_S
*hd
, int ch
, int cmd
)
6403 * remove CRLF after '>' in element.
6404 * We see CRLF because wrapstate is off.
6413 html_handoff(hd
, '\015');
6424 /* passing tags? replace CRLF with <BR> to make
6425 * sure hard newline survives in the end...
6427 if(PASS_HTML(hd
->html_data
))
6428 hd
->y
= 4; /* keep looking for CRLF */
6430 hd
->y
= 0; /* stop looking */
6445 html_output_string(hd
->html_data
, "<br />");
6449 html_handoff(hd
, '\015'); /* not CRLF, pass raw CR */
6453 default : /* zero case */
6457 html_handoff(hd
, ch
);
6459 else if(cmd
== GF_RESET
){
6461 if(PASS_HTML(hd
->html_data
)){
6462 html_output_raw_tag(hd
->html_data
, "pre");
6466 hd
->html_data
->f1
= DFL
; \
6468 html_blank(hd
->html_data
, 1);
6469 hd
->x
= HD(hd
->html_data
)->wrapstate
;
6470 HD(hd
->html_data
)->wrapstate
= 0;
6473 else if(cmd
== GF_EOD
){
6474 if(PASS_HTML(hd
->html_data
)){
6475 html_output_string(hd
->html_data
, "</pre>");
6478 HD(hd
->html_data
)->wrapstate
= (hd
->x
!= 0);
6479 html_blank(hd
->html_data
, 0);
6488 * HTML <CENTER> (Centered Text) element handler
6491 html_center(HANDLER_S
*hd
, int ch
, int cmd
)
6494 html_handoff(hd
, ch
);
6496 else if(cmd
== GF_RESET
){
6497 if(PASS_HTML(hd
->html_data
)){
6498 html_output_raw_tag(hd
->html_data
, "center");
6501 /* turn ON the centered bit */
6502 CENTER_BIT(hd
->html_data
) = 1;
6505 else if(cmd
== GF_EOD
){
6506 if(PASS_HTML(hd
->html_data
)){
6507 html_output_string(hd
->html_data
, "</center>");
6510 /* turn OFF the centered bit */
6511 CENTER_BIT(hd
->html_data
) = 0;
6520 * HTML <DIV> (Document Divisions) element handler
6523 html_div(HANDLER_S
*hd
, int ch
, int cmd
)
6526 html_handoff(hd
, ch
);
6528 else if(cmd
== GF_RESET
){
6529 if(PASS_HTML(hd
->html_data
)){
6530 html_output_raw_tag(hd
->html_data
, "div");
6535 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6538 if(!strucmp(p
->attribute
, "ALIGN")){
6540 /* remember previous values */
6541 hd
->x
= CENTER_BIT(hd
->html_data
);
6542 hd
->y
= html_indent(hd
->html_data
, 0, HTML_ID_GET
);
6544 html_blank(hd
->html_data
, 0);
6545 CENTER_BIT(hd
->html_data
) = !strucmp(p
->value
, "CENTER");
6546 html_indent(hd
->html_data
, 0, HTML_ID_SET
);
6547 /* NOTE: "RIGHT" not supported yet */
6552 else if(cmd
== GF_EOD
){
6553 if(PASS_HTML(hd
->html_data
)){
6554 html_output_string(hd
->html_data
, "</div>");
6557 /* restore centered bit and indentiousness */
6558 CENTER_BIT(hd
->html_data
) = hd
->y
;
6559 html_indent(hd
->html_data
, hd
->y
, HTML_ID_SET
);
6560 html_blank(hd
->html_data
, 0);
6569 * HTML <SPAN> (Text Span) element handler
6572 html_span(HANDLER_S
*hd
, int ch
, int cmd
)
6574 if(PASS_HTML(hd
->html_data
)){
6576 html_handoff(hd
, ch
);
6578 else if(cmd
== GF_RESET
){
6579 html_output_raw_tag(hd
->html_data
, "span");
6581 else if(cmd
== GF_EOD
){
6582 html_output_string(hd
->html_data
, "</span>");
6593 * HTML <KBD> (Text Kbd) element handler
6596 html_kbd(HANDLER_S
*hd
, int ch
, int cmd
)
6598 if(PASS_HTML(hd
->html_data
)){
6600 html_handoff(hd
, ch
);
6602 else if(cmd
== GF_RESET
){
6603 html_output_raw_tag(hd
->html_data
, "kbd");
6605 else if(cmd
== GF_EOD
){
6606 html_output_string(hd
->html_data
, "</kbd>");
6617 * HTML <DFN> (Text Definition) element handler
6620 html_dfn(HANDLER_S
*hd
, int ch
, int cmd
)
6622 if(PASS_HTML(hd
->html_data
)){
6624 html_handoff(hd
, ch
);
6626 else if(cmd
== GF_RESET
){
6627 html_output_raw_tag(hd
->html_data
, "dfn");
6629 else if(cmd
== GF_EOD
){
6630 html_output_string(hd
->html_data
, "</dfn>");
6641 * HTML <TT> (Text Tt) element handler
6644 html_tt(HANDLER_S
*hd
, int ch
, int cmd
)
6646 if(PASS_HTML(hd
->html_data
)){
6648 html_handoff(hd
, ch
);
6650 else if(cmd
== GF_RESET
){
6651 html_output_raw_tag(hd
->html_data
, "tt");
6653 else if(cmd
== GF_EOD
){
6654 html_output_string(hd
->html_data
, "</tt>");
6665 * HTML <VAR> (Text Var) element handler
6668 html_var(HANDLER_S
*hd
, int ch
, int cmd
)
6670 if(PASS_HTML(hd
->html_data
)){
6672 html_handoff(hd
, ch
);
6674 else if(cmd
== GF_RESET
){
6675 html_output_raw_tag(hd
->html_data
, "var");
6677 else if(cmd
== GF_EOD
){
6678 html_output_string(hd
->html_data
, "</var>");
6689 * HTML <SAMP> (Text Samp) element handler
6692 html_samp(HANDLER_S
*hd
, int ch
, int cmd
)
6694 if(PASS_HTML(hd
->html_data
)){
6696 html_handoff(hd
, ch
);
6698 else if(cmd
== GF_RESET
){
6699 html_output_raw_tag(hd
->html_data
, "samp");
6701 else if(cmd
== GF_EOD
){
6702 html_output_string(hd
->html_data
, "</samp>");
6713 * HTML <SUP> (Text Superscript) element handler
6716 html_sup(HANDLER_S
*hd
, int ch
, int cmd
)
6718 if(PASS_HTML(hd
->html_data
)){
6720 html_handoff(hd
, ch
);
6722 else if(cmd
== GF_RESET
){
6723 html_output_raw_tag(hd
->html_data
, "sup");
6725 else if(cmd
== GF_EOD
){
6726 html_output_string(hd
->html_data
, "</sup>");
6737 * HTML <SUB> (Text Subscript) element handler
6740 html_sub(HANDLER_S
*hd
, int ch
, int cmd
)
6742 if(PASS_HTML(hd
->html_data
)){
6744 html_handoff(hd
, ch
);
6746 else if(cmd
== GF_RESET
){
6747 html_output_raw_tag(hd
->html_data
, "sub");
6749 else if(cmd
== GF_EOD
){
6750 html_output_string(hd
->html_data
, "</sub>");
6761 * HTML <CITE> (Text Citation) element handler
6764 html_cite(HANDLER_S
*hd
, int ch
, int cmd
)
6766 if(PASS_HTML(hd
->html_data
)){
6768 html_handoff(hd
, ch
);
6770 else if(cmd
== GF_RESET
){
6771 html_output_raw_tag(hd
->html_data
, "cite");
6773 else if(cmd
== GF_EOD
){
6774 html_output_string(hd
->html_data
, "</cite>");
6785 * HTML <CODE> (Text Code) element handler
6788 html_code(HANDLER_S
*hd
, int ch
, int cmd
)
6790 if(PASS_HTML(hd
->html_data
)){
6792 html_handoff(hd
, ch
);
6794 else if(cmd
== GF_RESET
){
6795 html_output_raw_tag(hd
->html_data
, "code");
6797 else if(cmd
== GF_EOD
){
6798 html_output_string(hd
->html_data
, "</code>");
6809 * HTML <INS> (Text Inserted) element handler
6812 html_ins(HANDLER_S
*hd
, int ch
, int cmd
)
6814 if(PASS_HTML(hd
->html_data
)){
6816 html_handoff(hd
, ch
);
6818 else if(cmd
== GF_RESET
){
6819 html_output_raw_tag(hd
->html_data
, "ins");
6821 else if(cmd
== GF_EOD
){
6822 html_output_string(hd
->html_data
, "</ins>");
6833 * HTML <DEL> (Text Deleted) element handler
6836 html_del(HANDLER_S
*hd
, int ch
, int cmd
)
6838 if(PASS_HTML(hd
->html_data
)){
6840 html_handoff(hd
, ch
);
6842 else if(cmd
== GF_RESET
){
6843 html_output_raw_tag(hd
->html_data
, "del");
6845 else if(cmd
== GF_EOD
){
6846 html_output_string(hd
->html_data
, "</del>");
6857 * HTML <ABBR> (Text Abbreviation) element handler
6860 html_abbr(HANDLER_S
*hd
, int ch
, int cmd
)
6862 if(PASS_HTML(hd
->html_data
)){
6864 html_handoff(hd
, ch
);
6866 else if(cmd
== GF_RESET
){
6867 html_output_raw_tag(hd
->html_data
, "abbr");
6869 else if(cmd
== GF_EOD
){
6870 html_output_string(hd
->html_data
, "</abbr>");
6881 * HTML <SCRIPT> element handler
6884 html_script(HANDLER_S
*hd
, int ch
, int cmd
)
6886 /* Link in and drop everything within on the floor */
6892 * HTML <APPLET> element handler
6895 html_applet(HANDLER_S
*hd
, int ch
, int cmd
)
6897 /* Link in and drop everything within on the floor */
6903 * HTML <STYLE> CSS element handler
6906 html_style(HANDLER_S
*hd
, int ch
, int cmd
)
6908 static STORE_S
*css_stuff
;
6910 if(PASS_HTML(hd
->html_data
)){
6912 /* collect style settings */
6913 so_writec(ch
, css_stuff
);
6915 else if(cmd
== GF_RESET
){
6917 so_give(&css_stuff
);
6919 css_stuff
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6921 else if(cmd
== GF_EOD
){
6923 * TODO: strip anything mischievous and pass on
6926 so_give(&css_stuff
);
6934 * RSS 2.0 <RSS> version
6937 rss_rss(HANDLER_S
*hd
, int ch
, int cmd
)
6939 if(cmd
== GF_RESET
){
6942 for(p
= HD(hd
->html_data
)->el_data
->attribs
;
6945 if(!strucmp(p
->attribute
, "VERSION")){
6946 if(p
->value
&& !strucmp(p
->value
,"2.0"))
6947 return(0); /* do not link in */
6950 gf_error("Incompatible RSS version");
6954 return(0); /* not linked or error means we never get here */
6961 rss_channel(HANDLER_S
*hd
, int ch
, int cmd
)
6964 html_handoff(hd
, ch
);
6966 else if(cmd
== GF_RESET
){
6969 feed
= RSS_FEED(hd
->html_data
) = fs_get(sizeof(RSS_FEED_S
));
6970 memset(feed
, 0, sizeof(RSS_FEED_S
));
6973 return(1); /* link in */
6980 rss_title(HANDLER_S
*hd
, int ch
, int cmd
)
6982 static STORE_S
*title_so
;
6987 so_writec(ch
, title_so
);
6990 else if(cmd
== GF_RESET
){
6991 if(RSS_FEED(hd
->html_data
)){
6992 /* prepare for data */
6996 title_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
6999 else if(cmd
== GF_EOD
){
7001 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7005 if((rip
= feed
->items
) != NULL
){
7006 for(; rip
->next
; rip
= rip
->next
)
7010 fs_give((void **) &rip
->title
);
7012 rip
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
7016 fs_give((void **) &feed
->title
);
7018 feed
->title
= cpystr(rss_skip_whitespace(so_text(title_so
)));
7026 return(1); /* link in */
7033 rss_image(HANDLER_S
*hd
, int ch
, int cmd
)
7035 static STORE_S
*img_so
;
7040 so_writec(ch
, img_so
);
7043 else if(cmd
== GF_RESET
){
7044 if(RSS_FEED(hd
->html_data
)){
7045 /* prepare to collect data */
7049 img_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7052 else if(cmd
== GF_EOD
){
7054 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7058 fs_give((void **) &feed
->image
);
7060 feed
->image
= cpystr(rss_skip_whitespace(so_text(img_so
)));
7067 return(1); /* link in */
7074 rss_link(HANDLER_S
*hd
, int ch
, int cmd
)
7076 static STORE_S
*link_so
;
7081 so_writec(ch
, link_so
);
7084 else if(cmd
== GF_RESET
){
7085 if(RSS_FEED(hd
->html_data
)){
7086 /* prepare to collect data */
7090 link_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7093 else if(cmd
== GF_EOD
){
7095 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7099 if((rip
= feed
->items
) != NULL
){
7100 for(; rip
->next
; rip
= rip
->next
)
7104 fs_give((void **) &rip
->link
);
7106 rip
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7110 fs_give((void **) &feed
->link
);
7112 feed
->link
= cpystr(rss_skip_whitespace(so_text(link_so
)));
7120 return(1); /* link in */
7124 * RSS 2.0 <DESCRIPTION>
7127 rss_description(HANDLER_S
*hd
, int ch
, int cmd
)
7129 static STORE_S
*desc_so
;
7134 so_writec(ch
, desc_so
);
7137 else if(cmd
== GF_RESET
){
7138 if(RSS_FEED(hd
->html_data
)){
7139 /* prepare to collect data */
7143 desc_so
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
7146 else if(cmd
== GF_EOD
){
7148 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7152 if((rip
= feed
->items
) != NULL
){
7153 for(; rip
->next
; rip
= rip
->next
)
7156 if(rip
->description
)
7157 fs_give((void **) &rip
->description
);
7159 rip
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7162 if(feed
->description
)
7163 fs_give((void **) &feed
->description
);
7165 feed
->description
= cpystr(rss_skip_whitespace(so_text(desc_so
)));
7173 return(1); /* link in */
7177 * RSS 2.0 <TTL> (in minutes)
7180 rss_ttl(HANDLER_S
*hd
, int ch
, int cmd
)
7182 RSS_FEED_S
*feed
= RSS_FEED(hd
->html_data
);
7185 if(isdigit((unsigned char) ch
))
7186 feed
->ttl
= ((feed
->ttl
* 10) + (ch
- '0'));
7188 else if(cmd
== GF_RESET
){
7189 /* prepare to collect data */
7192 else if(cmd
== GF_EOD
){
7195 return(1); /* link in */
7202 rss_item(HANDLER_S
*hd
, int ch
, int cmd
)
7204 /* BUG: verify no ITEM nesting? */
7205 if(cmd
== GF_RESET
){
7208 if((feed
= RSS_FEED(hd
->html_data
)) != NULL
){
7212 for(rip
= &feed
->items
; *rip
; rip
= &(*rip
)->next
)
7213 if(++n
> RSS_ITEM_LIMIT
)
7216 *rip
= fs_get(sizeof(RSS_ITEM_S
));
7217 memset(*rip
, 0, sizeof(RSS_ITEM_S
));
7221 return(0); /* don't link in */
7226 rss_skip_whitespace(char *s
)
7228 for(; *s
&& isspace((unsigned char) *s
); s
++)
7236 * return the function associated with the given element name
7239 element_properties(FILTER_S
*fd
, char *el_name
)
7241 register ELPROP_S
*el_table
= ELEMENTS(fd
);
7242 size_t len_name
= strlen(el_name
);
7244 for(; el_table
->element
; el_table
++)
7245 if(!strucmp(el_name
, el_table
->element
)
7246 || (el_table
->alternate
7247 && len_name
== el_table
->len
+ 1
7248 && el_name
[el_table
->len
] == '/'
7249 && !struncmp(el_name
, el_table
->element
, el_table
->len
)))
7257 * collect element's name and any attribute/value pairs then
7258 * dispatch to the appropriate handler.
7260 * Returns 1 : got what we wanted
7261 * 0 : we need more data
7265 html_element_collector(FILTER_S
*fd
, int ch
)
7268 if(ED(fd
)->overrun
){
7270 * If problem processing, don't bother doing anything
7271 * internally, just return such that none of what we've
7272 * digested is displayed.
7274 HTML_DEBUG_EL("too long", ED(fd
));
7275 return(1); /* Let it go, Jim */
7277 else if(ED(fd
)->mkup_decl
){
7278 if(ED(fd
)->badform
){
7279 dprint((2, "-- html error: bad form: %.*s\n",
7280 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7282 * Invalid comment -- make some guesses as
7283 * to whether we should stop with this greater-than...
7285 if(ED(fd
)->buf
[0] != '-'
7287 || (ED(fd
)->buf
[1] == '-'
7288 && ED(fd
)->buf
[ED(fd
)->len
- 1] == '-'
7289 && ED(fd
)->buf
[ED(fd
)->len
- 2] == '-'))
7293 dprint((5, "-- html: OK: %.*s\n",
7294 ED(fd
)->len
, ED(fd
)->buf
? ED(fd
)->buf
: "?"));
7295 if(ED(fd
)->start_comment
== ED(fd
)->end_comment
){
7296 if(ED(fd
)->len
> 10){
7297 ED(fd
)->buf
[ED(fd
)->len
- 2] = '\0';
7298 html_element_comment(fd
, ED(fd
)->buf
+ 2);
7303 /* else keep collecting comment below */
7306 else if(ED(fd
)->proc_inst
){
7307 return(1); /* return without display... */
7309 else if(!ED(fd
)->quoted
|| ED(fd
)->badform
){
7313 * We either have the whole thing or all that we could
7314 * salvage from it. Try our best...
7317 if(HD(fd
)->bitbucket
)
7318 return(1); /* element inside chtml clause! */
7320 if(!ED(fd
)->badform
&& html_element_flush(ED(fd
)))
7321 return(1); /* return without display... */
7324 * If we ran into an empty tag or we don't know how to deal
7325 * with it, just go on, ignoring it...
7327 if(ED(fd
)->element
&& (ep
= element_properties(fd
, ED(fd
)->element
))){
7329 /* dispatch the element's handler */
7330 HTML_DEBUG_EL(ED(fd
)->end_tag
? "POP" : "PUSH", ED(fd
));
7331 if(ED(fd
)->end_tag
){
7332 html_pop(fd
, ep
); /* remove it's handler */
7335 /* if a block element, pop any open <p>'s */
7339 for(tp
= HANDLERS(fd
); tp
&& EL(tp
)->handler
== html_p
; tp
= tp
->below
){
7340 HTML_DEBUG_EL("Unclosed <P>", ED(fd
));
7341 html_pop(fd
, EL(tp
));
7346 /* enforce table nesting */
7347 if(!strucmp(ep
->element
, "tr")){
7348 if(!HANDLERS(fd
) || (strucmp(EL(HANDLERS(fd
))->element
, "table") && strucmp(EL(HANDLERS(fd
))->element
, "tbody") && strucmp(EL(HANDLERS(fd
))->element
, "thead"))){
7349 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd
)) ? EL(HANDLERS(fd
))->element
: "NO-HANDLERS"));
7350 if(HANDLERS(fd
) && !strucmp(EL(HANDLERS(fd
))->element
,"tr")){
7351 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7352 html_pop(fd
, EL(HANDLERS(fd
)));
7355 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7356 html_push(fd
, element_properties(fd
, "table"));
7360 else if(!strucmp(ep
->element
, "td") || !strucmp(ep
->element
, "th")){
7362 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7363 html_push(fd
, element_properties(fd
, "table"));
7364 html_push(fd
, element_properties(fd
, "tr"));
7366 else if(strucmp(EL(HANDLERS(fd
))->element
, "tr")){
7367 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd
))->element
));
7368 html_push(fd
, element_properties(fd
, "tr"));
7370 else if(!strucmp(EL(HANDLERS(fd
))->element
, "td")){
7371 dprint((2, "-- html error: bad nesting popping <TD>"));
7372 html_pop(fd
, EL(HANDLERS(fd
)));
7376 /* add it's handler */
7377 if(html_push(fd
, ep
)){
7379 /* remove empty element */
7386 HTML_DEBUG_EL("IGNORED", ED(fd
));
7389 else{ /* else, empty or unrecognized */
7390 HTML_DEBUG_EL("?", ED(fd
));
7393 return(1); /* all done! see, that didn't hurt */
7396 else if(ch
== '/' && ED(fd
)->element
&& ED(fd
)->len
){
7402 if(ED(fd
)->mkup_decl
){
7403 if((ch
&= 0xff) == '-'){
7406 if(ED(fd
)->start_comment
)
7407 ED(fd
)->end_comment
= 1;
7409 ED(fd
)->start_comment
= 1;
7415 if(ED(fd
)->end_comment
)
7416 ED(fd
)->start_comment
= ED(fd
)->end_comment
= 0;
7419 * no "--" after ! or non-whitespace between comments - bad
7421 if(ED(fd
)->len
< 2 || (!ED(fd
)->start_comment
7422 && !ASCII_ISSPACE((unsigned char) ch
)))
7423 ED(fd
)->badform
= 1; /* non-comment! */
7429 * Remember the comment for possible later processing, if
7430 * it gets too long, remember first and last few chars
7431 * so we know when to terminate (and throw some garbage
7432 * in between when we toss out what's between.
7434 if(ED(fd
)->len
== HTML_BUF_LEN
){
7435 ED(fd
)->buf
[2] = ED(fd
)->buf
[3] = 'X';
7436 ED(fd
)->buf
[4] = ED(fd
)->buf
[ED(fd
)->len
- 2];
7437 ED(fd
)->buf
[5] = ED(fd
)->buf
[ED(fd
)->len
- 1];
7441 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7442 return(0); /* comments go in the bit bucket */
7444 else if(ED(fd
)->overrun
|| ED(fd
)->badform
){
7445 return(0); /* swallow char's until next '>' */
7447 else if(!ED(fd
)->element
&& !ED(fd
)->len
){
7448 if(ch
== '/'){ /* validate leading chars */
7449 ED(fd
)->end_tag
= 1;
7453 ED(fd
)->mkup_decl
= 1;
7457 ED(fd
)->proc_inst
= 1;
7460 else if(!isalpha((unsigned char) ch
))
7461 return(-1); /* can't be a tag! */
7463 else if(ch
== '\"' || ch
== '\''){
7464 if(!ED(fd
)->hit_equal
){
7465 ED(fd
)->badform
= 1; /* quote in element name?!? */
7470 if(ED(fd
)->quoted
== (char) ch
){
7471 /* end of a quoted value */
7473 if(ED(fd
)->len
&& html_element_flush(ED(fd
)))
7474 ED(fd
)->badform
= 1;
7476 return(0); /* continue collecting chars */
7478 /* ELSE fall thru writing other quoting char */
7481 ED(fd
)->quoted
= (char) ch
;
7482 ED(fd
)->was_quoted
= 1;
7483 return(0); /* need more data */
7486 else if (ASCII_ISSPACE((unsigned char) ch
))
7487 ED(fd
)->unquoted_data
= 0;
7488 else if (ED(fd
)->hit_equal
)
7489 ED(fd
)->unquoted_data
= 1;
7491 ch
&= 0xff; /* strip any "literal" high bits */
7493 || ED(fd
)->unquoted_data
7495 || strchr("#-.!", ch
)){
7496 if(ED(fd
)->len
>= ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7497 ? ED(fd
)->bufsize
:MAX_ELEMENT
)){
7498 unsigned long i
, bufsize
= ED(fd
)->bufsize
;
7499 ED(fd
)->bufsize
= ED(fd
)->len
+ HTML_BUF_LEN
;
7500 fs_resize((void **) &ED(fd
)->buf
, ED(fd
)->bufsize
);
7501 memset(&ED(fd
)->buf
[bufsize
], '\0', ED(fd
)->bufsize
- bufsize
);
7503 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7504 ? ED(fd
)->bufsize
:MAX_ELEMENT
)){
7505 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
;
7508 ED(fd
)->overrun
= 1; /* flag it broken */
7510 else if(ASCII_ISSPACE((unsigned char) ch
) || ch
== '='){
7511 if((ED(fd
)->len
|| ED(fd
)->was_quoted
) && html_element_flush(ED(fd
))){
7512 ED(fd
)->badform
= 1;
7513 return(0); /* else, we ain't done yet */
7516 if(!ED(fd
)->hit_equal
)
7517 ED(fd
)->hit_equal
= (ch
== '=');
7519 else if(ch
== '/' && ED(fd
)->len
&& !ED(fd
)->element
){
7521 ep
= element_properties(fd
, ED(fd
)->buf
);
7524 ED(fd
)->badform
= 1;
7526 if(ED(fd
)->len
>= ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7527 ? ED(fd
)->bufsize
:MAX_ELEMENT
)){
7528 unsigned long bufsize
= ED(fd
)->bufsize
;
7529 ED(fd
)->bufsize
= ED(fd
)->len
+ HTML_BUF_LEN
;
7530 fs_resize((void **) &ED(fd
)->buf
, ED(fd
)->bufsize
);
7531 memset(&ED(fd
)->buf
[bufsize
], '\0', ED(fd
)->bufsize
- bufsize
);
7533 if(ED(fd
)->len
< ((ED(fd
)->element
|| !ED(fd
)->hit_equal
)
7534 ? ED(fd
)->bufsize
:MAX_ELEMENT
)){
7535 ED(fd
)->buf
[(ED(fd
)->len
)++] = ch
; /* add this exception */
7538 ED(fd
)->overrun
= 1;
7542 ED(fd
)->badform
= 1;
7545 ED(fd
)->badform
= 1; /* unrecognized data?? */
7547 return(0); /* keep collecting */
7552 * Element collector found complete string, integrate it and reset
7553 * internal collection buffer.
7555 * Returns zero if element collection buffer flushed, error flag otherwise
7558 html_element_flush(CLCTR_S
*el_data
)
7562 if(el_data
->hit_equal
){ /* adding a value */
7563 el_data
->hit_equal
= 0;
7564 if(el_data
->cur_attrib
){
7565 if(!el_data
->cur_attrib
->value
){
7566 el_data
->cur_attrib
->value
= cpystr(el_data
->len
7567 ? el_data
->buf
: "");
7570 dprint((2, "** element: unexpected value: %.10s...\n",
7571 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7576 dprint((2, "** element: missing attribute name: %.10s...\n",
7577 (el_data
->len
&& el_data
->buf
) ? el_data
->buf
: "\"\""));
7581 else if(el_data
->len
){
7582 if(!el_data
->element
){
7583 el_data
->element
= cpystr(el_data
->buf
);
7586 PARAMETER
*p
= (PARAMETER
*)fs_get(sizeof(PARAMETER
));
7587 memset(p
, 0, sizeof(PARAMETER
));
7588 if(el_data
->attribs
){
7589 el_data
->cur_attrib
->next
= p
;
7590 el_data
->cur_attrib
= p
;
7593 el_data
->attribs
= el_data
->cur_attrib
= p
;
7595 p
->attribute
= cpystr(el_data
->buf
);
7600 el_data
->was_quoted
= 0; /* reset collector buf and state */
7602 fs_give((void **) &el_data
->buf
);
7603 el_data
->bufsize
= 0;
7604 return(rv
); /* report whatever happened above */
7609 * html_element_comment - "Special" comment handling here
7612 html_element_comment(FILTER_S
*f
, char *s
)
7616 while(*s
&& ASCII_ISSPACE((unsigned char) *s
))
7620 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7622 if(!struncmp(s
, "chtml ", 6)){
7624 if(!struncmp(s
, "if ", 3)){
7625 HD(f
)->bitbucket
= 1; /* default is failure! */
7629 if(!struncmp(s
+ 1, "inemode=", 8)){
7630 if(!strucmp(s
= removing_quotes(s
+ 9), "function_key")
7631 && F_ON(F_USE_FK
, ps_global
))
7632 HD(f
)->bitbucket
= 0;
7633 else if(!strucmp(s
, "running"))
7634 HD(f
)->bitbucket
= 0;
7636 else if(!strucmp(s
, "os_windows"))
7637 HD(f
)->bitbucket
= 0;
7639 else if(!strucmp(s
, "os_osx") || !strucmp(s
, "os_unix_and_osx"))
7640 HD(f
)->bitbucket
= 0;
7642 else if(!strucmp(s
, "os_unix") || !strucmp(s
, "os_unix_and_osx"))
7643 HD(f
)->bitbucket
= 0;
7649 case '[' : /* test */
7650 if((p
= strindex(++s
, ']')) != NULL
){
7651 *p
= '\0'; /* tie off test string */
7652 removing_leading_white_space(s
);
7653 removing_trailing_white_space(s
);
7654 if(*s
== '-' && *(s
+1) == 'r'){ /* readable file? */
7655 for(s
+= 2; *s
&& ASCII_ISSPACE((unsigned char) *s
); s
++)
7659 HD(f
)->bitbucket
= (can_access(CHTML_VAR_EXPAND(removing_quotes(s
)),
7670 else if(!strucmp(s
, "else")){
7671 HD(f
)->bitbucket
= !HD(f
)->bitbucket
;
7673 else if(!strucmp(s
, "endif")){
7674 /* Clean up after chtml here */
7675 HD(f
)->bitbucket
= 0;
7678 else if(!HD(f
)->bitbucket
){
7679 if(!struncmp(s
, "#include ", 9)){
7680 char buf
[MAILTMPLEN
], *bufp
;
7681 int len
, end_of_line
;
7684 /* Include the named file */
7685 if(!struncmp(s
+= 9, "file=", 5)
7686 && (fp
= our_fopen(CHTML_VAR_EXPAND(removing_quotes(s
+5)), "r"))){
7687 html_element_output(f
, HTML_NEWLINE
);
7689 while(fgets(buf
, sizeof(buf
), fp
)){
7690 if((len
= strlen(buf
)) && buf
[len
-1] == '\n'){
7697 for(bufp
= buf
; len
; bufp
++, len
--)
7698 html_element_output(f
, (int) *bufp
);
7701 html_element_output(f
, HTML_NEWLINE
);
7705 html_element_output(f
, HTML_NEWLINE
);
7711 else if(!struncmp(s
, "#echo ", 6)){
7712 if(!struncmp(s
+= 6, "var=", 4)){
7713 char *p
, buf
[MAILTMPLEN
];
7715 extern char datestamp
[];
7717 if(!strcmp(s
= removing_quotes(s
+ 4), "ALPINE_VERSION")){
7720 else if(!strcmp(s
, "ALPINE_REVISION")){
7721 p
= get_alpine_revision_string(buf
, sizeof(buf
));
7723 else if(!strcmp(s
, "C_CLIENT_VERSION")){
7727 else if(!strcmp(s
, "PASSWORD_FILE")){
7731 else if(!strcmp(s
, "ALPINE_COMPILE_DATE")){
7734 else if(!strcmp(s
, "ALPINE_TODAYS_DATE")){
7735 rfc822_date(p
= buf
);
7737 else if(!strcmp(s
, "_LOCAL_FULLNAME_")){
7738 p
= (ps_global
->VAR_LOCAL_FULLNAME
7739 && ps_global
->VAR_LOCAL_FULLNAME
[0])
7740 ? ps_global
->VAR_LOCAL_FULLNAME
7743 else if(!strcmp(s
, "_LOCAL_ADDRESS_")){
7744 p
= (ps_global
->VAR_LOCAL_ADDRESS
7745 && ps_global
->VAR_LOCAL_ADDRESS
[0])
7746 ? ps_global
->VAR_LOCAL_ADDRESS
7748 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7749 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7750 mail_free_address(&adr
);
7752 else if(!strcmp(s
, "_BUGS_FULLNAME_")){
7753 p
= (ps_global
->VAR_BUGS_FULLNAME
7754 && ps_global
->VAR_BUGS_FULLNAME
[0])
7755 ? ps_global
->VAR_BUGS_FULLNAME
7756 : "Place to report Alpine Bugs";
7758 else if(!strcmp(s
, "_BUGS_ADDRESS_")){
7759 p
= (ps_global
->VAR_BUGS_ADDRESS
7760 && ps_global
->VAR_BUGS_ADDRESS
[0])
7761 ? ps_global
->VAR_BUGS_ADDRESS
: "postmaster";
7762 adr
= rfc822_parse_mailbox(&p
, ps_global
->maildomain
);
7763 snprintf(p
= buf
, sizeof(buf
), "%s@%s", adr
->mailbox
, adr
->host
);
7764 mail_free_address(&adr
);
7766 else if(!strcmp(s
, "CURRENT_DIR")){
7767 getcwd(p
= buf
, sizeof(buf
));
7769 else if(!strcmp(s
, "HOME_DIR")){
7770 p
= ps_global
->home_dir
;
7772 else if(!strcmp(s
, "PINE_CONF_PATH")){
7773 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7774 p
= "/usr/local/lib/pine.conf";
7779 else if(!strcmp(s
, "PINE_CONF_FIXED_PATH")){
7780 #ifdef SYSTEM_PINERC_FIXED
7781 p
= SYSTEM_PINERC_FIXED
;
7783 p
= "/usr/local/lib/pine.conf.fixed";
7786 else if(!strcmp(s
, "PINE_INFO_PATH")){
7787 p
= SYSTEM_PINE_INFO_PATH
;
7789 else if(!strcmp(s
, "MAIL_SPOOL_PATH")){
7792 else if(!strcmp(s
, "MAIL_SPOOL_LOCK_PATH")){
7793 /* Don't put the leading /tmp/. */
7798 for(j
= 0, i
= 0; p
[i
] && j
< MAILTMPLEN
- 1; i
++){
7808 else if(!struncmp(s
, "VAR_", 4)){
7810 if(pith_opt_pretty_var_name
)
7811 p
= (*pith_opt_pretty_var_name
)(p
);
7813 else if(!struncmp(s
, "FEAT_", 5)){
7815 if(pith_opt_pretty_feature_name
)
7816 p
= (*pith_opt_pretty_feature_name
)(p
, -1);
7822 if(f
->f1
== WSPACE
){
7823 html_element_output(f
, ' ');
7824 f
->f1
= DFL
; /* clear it */
7828 html_element_output(f
, (int) *p
++);
7837 html_element_output(FILTER_S
*f
, int ch
)
7840 (*EL(HANDLERS(f
))->handler
)(HANDLERS(f
), ch
, GF_DATA
);
7846 * collect html entity and return its UCS value when done.
7848 * Returns HTML_MOREDATA : we need more data
7849 * HTML_ENTITY : entity collected
7850 * HTML_BADVALUE : good data, but no named match or out of range
7851 * HTML_BADDATA : invalid input
7854 * - entity format is "'&' tag ';'" and represents a literal char
7855 * - named entities are CASE SENSITIVE.
7856 * - numeric char references (where the tag is prefixed with a '#')
7857 * are a char with that numbers value
7858 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7861 html_entity_collector(FILTER_S
*f
, int ch
, UCS
*ucs
, char **alt
)
7864 static char buf
[MAX_ENTITY
+2];
7867 if(len
== MAX_ENTITY
){
7871 ? (isalpha((unsigned char) ch
) || ch
== '#')
7872 : ((isdigit((unsigned char) ch
)
7873 || (len
== 1 && (unsigned char) ch
== 'x')
7874 || (len
== 1 &&(unsigned char) ch
== 'X')
7875 || (len
> 1 && isxdigit((unsigned char) ch
))
7876 || (isalpha((unsigned char) ch
) && buf
[0] != '#')))){
7878 return(HTML_MOREDATA
);
7880 else if(ch
== ';' || ASCII_ISSPACE((unsigned char) ch
)){
7881 buf
[len
] = '\0'; /* got something! */
7883 if(buf
[1] == 'x' || buf
[1] == 'X')
7884 *ucs
= (UCS
) strtoul(&buf
[2], NULL
, 16);
7886 *ucs
= (UCS
) strtoul(&buf
[1], NULL
, 10);
7890 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7891 if(entity_tab
[i
].value
== *ucs
){
7892 *alt
= entity_tab
[i
].plain
;
7898 return(HTML_ENTITY
);
7901 rv
= HTML_BADVALUE
; /* in case of no match */
7902 for(i
= 0; i
< sizeof(entity_tab
)/sizeof(struct html_entities
); i
++)
7903 if(strcmp(entity_tab
[i
].name
, buf
) == 0){
7904 *ucs
= entity_tab
[i
].value
;
7906 *alt
= entity_tab
[i
].plain
;
7909 return(HTML_ENTITY
);
7914 rv
= HTML_BADDATA
; /* bogus input! */
7926 /*----------------------------------------------------------------------
7927 HTML text to plain text filter
7929 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7930 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7935 gf_html2plain(FILTER_S
*f
, int flg
)
7937 /* BUG: quote incoming \255 values (see "yuml" above!) */
7940 GF_INIT(f
, f
->next
);
7945 for(ii
= HTML_INDENT(f
); ii
> 0; ii
--)
7951 while(GF_GETC(f
, c
)){
7953 * First we have to collect any literal entities...
7954 * that is, IF we're not already collecting one
7955 * AND we're not in element's text or, if we are, we're
7956 * not in quoted text. Whew.
7962 switch(html_entity_collector(f
, c
, &ucs
, &alt
)){
7963 case HTML_MOREDATA
: /* more data required? */
7964 continue; /* go get another char */
7966 case HTML_BADVALUE
:
7968 /* if supplied, process bogus data */
7971 unsigned int uic
= *alt
;
7975 if(c
== '&' && !HD(f
)->quoted
){
7980 f
->t
= 0; /* don't come back next time */
7984 default : /* thing to process */
7985 f
->t
= 0; /* don't come back */
7988 * do something with UCS codepoint. If it's
7989 * not displayable then use the alt version
7991 * cvt UCS to UTF-8 and toss into next filter.
7993 if(ucs
> 127 && wcellwidth(ucs
) < 0){
7996 c
= MAKE_LITERAL(*alt
);
8003 c
= MAKE_LITERAL('?');
8006 unsigned char utf8buf
[8], *p1
, *p2
;
8008 p2
= utf8_put(p1
= (unsigned char *) utf8buf
, (unsigned long) ucs
);
8009 for(; p1
< p2
; p1
++){
8010 c
= MAKE_LITERAL(*p1
);
8020 else if(!PASS_HTML(f
) && c
== '&' && !HD(f
)->quoted
){
8026 * then we process whatever we got...
8032 GF_OP_END(f
); /* clean up our input pointers */
8034 else if(flg
== GF_EOD
){
8036 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f
))->element
));
8037 html_pop(f
, EL(HANDLERS(f
)));
8040 html_output(f
, HTML_NEWLINE
);
8042 HTML_ULINE(f
, ULINE_BIT(f
) = 0);
8045 HTML_BOLD(f
, BOLD_BIT(f
) = 0);
8048 fs_give((void **)&f
->line
);
8050 free_color_pair(&HD(f
)->color
);
8054 if(((HTML_OPT_S
*)f
->opt
)->base
)
8055 fs_give((void **) &((HTML_OPT_S
*)f
->opt
)->base
);
8060 (*f
->next
->f
)(f
->next
, GF_DATA
);
8061 (*f
->next
->f
)(f
->next
, GF_EOD
);
8063 else if(flg
== GF_RESET
){
8064 dprint((9, "-- gf_reset html2plain\n"));
8065 f
->data
= (HTML_DATA_S
*) fs_get(sizeof(HTML_DATA_S
));
8066 memset(f
->data
, 0, sizeof(HTML_DATA_S
));
8067 /* start with flowing text */
8068 HD(f
)->wrapstate
= !PASS_HTML(f
);
8069 HD(f
)->wrapcol
= WRAP_COLS(f
);
8070 f
->f1
= DFL
; /* state */
8071 f
->f2
= 0; /* chars in wrap buffer */
8072 f
->n
= 0L; /* chars on line so far */
8073 f
->linep
= f
->line
= (char *)fs_get(HTML_BUF_LEN
* sizeof(char));
8074 HD(f
)->line_bufsize
= HTML_BUF_LEN
; /* initial bufsize of line */
8075 HD(f
)->alt_entity
= (!ps_global
->display_charmap
8076 || strucmp(ps_global
->display_charmap
, "iso-8859-1"));
8077 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
8084 * html_indent - do the requested indent level function with appropriate
8085 * flushing and such.
8087 * Returns: indent level prior to set/increment
8090 html_indent(FILTER_S
*f
, int val
, int func
)
8092 int old
= HD(f
)->indent_level
;
8094 /* flush pending data at old indent level */
8097 html_output_flush(f
);
8098 if((HD(f
)->indent_level
+= val
) < 0)
8099 HD(f
)->indent_level
= 0;
8104 html_output_flush(f
);
8105 HD(f
)->indent_level
= val
;
8118 * html_blanks - Insert n blank lines into output
8121 html_blank(FILTER_S
*f
, int n
)
8123 /* Cap off any flowing text, and then write blank lines */
8124 if(f
->f2
|| f
->n
|| CENTER_BIT(f
) || HD(f
)->centered
|| WRAPPED_LEN(f
))
8125 html_output(f
, HTML_NEWLINE
);
8127 if(HD(f
)->wrapstate
)
8128 while(HD(f
)->blanks
< n
) /* blanks inc'd by HTML_NEWLINE */
8129 html_output(f
, HTML_NEWLINE
);
8135 * html_newline -- insert a newline mindful of embedded tags
8138 html_newline(FILTER_S
*f
)
8140 html_write_newline(f
); /* commit an actual newline */
8142 if(f
->n
){ /* and keep track of blank lines */
8152 * output the given char, handling any requested wrapping.
8153 * It's understood that all whitespace handed us is written. In other
8154 * words, junk whitespace is weeded out before it's given to us here.
8158 html_output(FILTER_S
*f
, int ch
)
8162 void (*o_f
)(FILTER_S
*, int, int, int) = CENTER_BIT(f
) ? html_output_centered
: html_output_normal
;
8165 * if ch is a control token, just pass it on, else, collect
8166 * utf8-encoded characters to determine width,then feed into
8169 if(ch
== TAG_EMBED
|| HD(f
)->embedded
.state
|| (ch
> 0xff && IS_LITERAL(ch
) == 0)){
8170 (*o_f
)(f
, ch
, 1, 0);
8172 else if(utf8_to_ucs4_oneatatime(ch
& 0xff, &(HD(f
)->cb
), &uc
, &width
)){
8175 for(cp
= HD(f
)->cb
.cbuf
; cp
<= HD(f
)->cb
.cbufend
; cp
++){
8176 (*o_f
)(f
, *cp
, width
, HD(f
)->cb
.cbufend
- cp
);
8177 width
= 0; /* only count it once */
8180 HD(f
)->cb
.cbufp
= HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbuf
;
8183 HD(f
)->cb
.cbufend
= HD(f
)->cb
.cbufp
;
8184 /* else do nothing until we have a full character */
8189 html_output_string(FILTER_S
*f
, char *s
)
8197 html_output_raw_tag(FILTER_S
*f
, char *tag
)
8203 html_output(f
, '<');
8204 html_output_string(f
, tag
);
8205 for(p
= HD(f
)->el_data
->attribs
;
8208 /* SECURITY: no javascript */
8209 /* PRIVACY: no img src without permission */
8210 /* BUGS: no class collisions since <head> ignored */
8211 if(html_event_attribute(p
->attribute
)
8212 || !strucmp(p
->attribute
, "class")
8213 || (!PASS_IMAGES(f
) && !strucmp(tag
, "img") && !strucmp(p
->attribute
, "src")))
8216 /* PRIVACY: sniff out background images */
8217 if(p
->value
&& !PASS_IMAGES(f
)){
8218 if(!strucmp(p
->attribute
, "style")){
8219 if((vp
= srchstr(p
->value
, "background-image")) != NULL
){
8220 /* neuter in place */
8221 vp
[11] = vp
[12] = vp
[13] = vp
[14] = vp
[15] = 'X';
8224 for(vp
= p
->value
; (vp
= srchstr(vp
, "background")) != NULL
; vp
++)
8225 if(vp
[10] == ' ' || vp
[10] == ':')
8226 for(i
= 11; vp
[i
] && vp
[i
] != ';'; i
++)
8227 if((vp
[i
] == 'u' && vp
[i
+1] == 'r' && vp
[i
+2] == 'l' && vp
[i
+3] == '(')
8228 || vp
[i
] == ':' || vp
[i
] == '/' || vp
[i
] == '.')
8232 else if(!strucmp(p
->attribute
, "background")){
8235 for(ip
= p
->value
; *ip
&& !(*ip
== ':' || *ip
== '/' || *ip
== '.'); ip
++)
8243 html_output(f
, ' ');
8244 html_output_string(f
, p
->attribute
);
8246 html_output(f
, '=');
8247 html_output(f
, '\"');
8248 html_output_string(f
, p
->value
);
8249 html_output(f
, '\"');
8253 /* append warning to form submission */
8254 if(!strucmp(tag
, "form")){
8255 html_output_string(f
, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8259 html_output(f
, ' ');
8260 html_output(f
, '/');
8263 html_output(f
, '>');
8268 html_event_attribute(char *attr
)
8271 static char *events
[] = {
8272 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8273 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8274 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8275 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8278 if((attr
[0] == 'o' || attr
[0] == 'O') && (attr
[1] == 'n' || attr
[1] == 'N'))
8279 for(i
= 0; i
< sizeof(events
)/sizeof(events
[0]); i
++)
8280 if(!strucmp(attr
, events
[i
]))
8288 html_output_normal(FILTER_S
*f
, int ch
, int width
, int remaining
)
8290 static int written
= 0;
8293 if(HD(f
)->centered
){
8294 html_centered_flush(f
);
8295 fs_give((void **) &HD(f
)->centered
->line
.buf
);
8296 fs_give((void **) &HD(f
)->centered
->word
.buf
);
8297 fs_give((void **) &HD(f
)->centered
);
8300 if(HD(f
)->wrapstate
){
8301 if(ch
== HTML_NEWLINE
){ /* hard newline */
8302 html_output_flush(f
);
8306 HD(f
)->blanks
= 0; /* reset blank line counter */
8308 if(ch
== TAG_EMBED
){ /* takes up no space */
8309 HD(f
)->embedded
.state
= -5;
8310 HTML_LINEP_PUTC(f
, TAG_EMBED
);
8312 else if(HD(f
)->embedded
.state
){ /* ditto */
8313 if(HD(f
)->embedded
.state
== -5){
8314 /* looking for specially handled tags following TAG_EMBED */
8315 if(ch
== TAG_HANDLE
)
8316 HD(f
)->embedded
.state
= -1; /* next ch is length */
8317 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8319 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8321 if(ch
== TAG_FGCOLOR
)
8322 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8324 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8326 HD(f
)->embedded
.state
= RGBLEN
;
8329 HD(f
)->embedded
.state
= 0; /* non-special */
8331 else if(HD(f
)->embedded
.state
> 0){
8332 /* collecting up an RGBLEN color or length, ignore tags */
8333 (HD(f
)->embedded
.state
)--;
8334 if(HD(f
)->embedded
.color
)
8335 *HD(f
)->embedded
.color
++ = ch
;
8337 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8338 *HD(f
)->embedded
.color
= '\0';
8339 HD(f
)->embedded
.color
= NULL
;
8342 else if(HD(f
)->embedded
.state
< 0){
8343 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8346 (HD(f
)->embedded
.state
)--;
8347 if(HD(f
)->embedded
.color
)
8348 *HD(f
)->embedded
.color
++ = ch
;
8350 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8351 *HD(f
)->embedded
.color
= '\0';
8352 HD(f
)->embedded
.color
= NULL
;
8356 HTML_LINEP_PUTC(f
, ch
);
8358 else if(HTML_ISSPACE(ch
)){
8359 html_output_flush(f
);
8368 if(f
->f2
+ cwidth
+ 1 >= WRAP_COLS(f
)){
8369 HTML_LINEP_PUTC(f
, ch
& 0xff);
8375 if(HD(f
)->in_anchor
)
8376 html_write_anchor(f
, HD(f
)->in_anchor
);
8379 HTML_LINEP_PUTC(f
, ch
& 0xff);
8393 html_output_flush(f
);
8395 switch(HD(f
)->embedded
.state
){
8400 * It's difficult to both preserve whitespace and wrap at the
8401 * same time so we'll do a dumb wrap at the edge of the screen.
8402 * Since this shouldn't come up much in real life we'll hope
8403 * it is good enough.
8405 if(!PASS_HTML(f
) && (f
->n
+ width
) > WRAP_COLS(f
))
8408 f
->n
+= width
; /* inc displayed char count */
8409 HD(f
)->blanks
= 0; /* reset blank line counter */
8410 html_putc(f
, ch
& 0xff);
8413 case TAG_EMBED
: /* takes up no space */
8414 html_putc(f
, TAG_EMBED
);
8415 HD(f
)->embedded
.state
= -2;
8418 case HTML_NEWLINE
: /* newline handling */
8432 HD(f
)->embedded
.state
= 0;
8435 HD(f
)->embedded
.state
= -1; /* next ch is length */
8456 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8458 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8459 HD(f
)->embedded
.state
= 11;
8464 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8466 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8467 HD(f
)->embedded
.state
= 11;
8470 case TAG_HANDLEOFF
:
8472 HD(f
)->in_anchor
= 0;
8483 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8488 HD(f
)->embedded
.state
--;
8489 if(HD(f
)->embedded
.color
)
8490 *HD(f
)->embedded
.color
++ = ch
;
8492 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8493 *HD(f
)->embedded
.color
= '\0';
8494 HD(f
)->embedded
.color
= NULL
;
8505 * flush any buffered chars waiting for wrapping.
8508 html_output_flush(FILTER_S
*f
)
8511 if(f
->n
&& ((int) f
->n
) + 1 + f
->f2
> HD(f
)->wrapcol
)
8512 html_newline(f
); /* wrap? */
8514 if(f
->n
){ /* text already on the line? */
8516 f
->n
++; /* increment count */
8519 /* write at start of new line */
8520 html_write_indent(f
, HD(f
)->indent_level
);
8522 if(HD(f
)->in_anchor
)
8523 html_write_anchor(f
, HD(f
)->in_anchor
);
8534 * html_output_centered - managed writing centered text
8537 html_output_centered(FILTER_S
*f
, int ch
, int width
, int remaining
)
8542 if(!HD(f
)->centered
){ /* new text? */
8543 html_output_flush(f
);
8544 if(f
->n
) /* start on blank line */
8547 HD(f
)->centered
= (CENTER_S
*) fs_get(sizeof(CENTER_S
));
8548 memset(HD(f
)->centered
, 0, sizeof(CENTER_S
));
8549 /* and grab a buf to start collecting centered text */
8550 HD(f
)->centered
->line
.len
= WRAP_COLS(f
);
8551 HD(f
)->centered
->line
.buf
= (char *) fs_get(HD(f
)->centered
->line
.len
8553 HD(f
)->centered
->line
.used
= HD(f
)->centered
->line
.width
= 0;
8554 HD(f
)->centered
->word
.len
= 32;
8555 HD(f
)->centered
->word
.buf
= (char *) fs_get(HD(f
)->centered
->word
.len
8557 HD(f
)->centered
->word
.used
= HD(f
)->centered
->word
.width
= 0;
8560 if(ch
== HTML_NEWLINE
){ /* hard newline */
8561 html_centered_flush(f
);
8563 else if(ch
== TAG_EMBED
){ /* takes up no space */
8564 HD(f
)->embedded
.state
= -5;
8565 html_centered_putc(&HD(f
)->centered
->word
, TAG_EMBED
);
8567 else if(HD(f
)->embedded
.state
){
8568 if(HD(f
)->embedded
.state
== -5){
8569 /* looking for specially handled tags following TAG_EMBED */
8570 if(ch
== TAG_HANDLE
)
8571 HD(f
)->embedded
.state
= -1; /* next ch is length */
8572 else if(ch
== TAG_FGCOLOR
|| ch
== TAG_BGCOLOR
){
8574 HD(f
)->color
= new_color_pair(NULL
, NULL
);
8576 if(ch
== TAG_FGCOLOR
)
8577 HD(f
)->embedded
.color
= HD(f
)->color
->fg
;
8579 HD(f
)->embedded
.color
= HD(f
)->color
->bg
;
8581 HD(f
)->embedded
.state
= RGBLEN
;
8584 HD(f
)->embedded
.state
= 0; /* non-special */
8586 else if(HD(f
)->embedded
.state
> 0){
8587 /* collecting up an RGBLEN color or length, ignore tags */
8588 (HD(f
)->embedded
.state
)--;
8589 if(HD(f
)->embedded
.color
)
8590 *HD(f
)->embedded
.color
++ = ch
;
8592 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8593 *HD(f
)->embedded
.color
= '\0';
8594 HD(f
)->embedded
.color
= NULL
;
8597 else if(HD(f
)->embedded
.state
< 0){
8598 HD(f
)->embedded
.state
= ch
; /* number of embedded chars */
8601 (HD(f
)->embedded
.state
)--;
8602 if(HD(f
)->embedded
.color
)
8603 *HD(f
)->embedded
.color
++ = ch
;
8605 if(HD(f
)->embedded
.state
== 0 && HD(f
)->embedded
.color
){
8606 *HD(f
)->embedded
.color
= '\0';
8607 HD(f
)->embedded
.color
= NULL
;
8611 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8613 else if(ASCII_ISSPACE((unsigned char) ch
)){
8614 if(!HD(f
)->centered
->space
++){ /* end of a word? flush! */
8617 if(WRAPPED_LEN(f
) > HD(f
)->wrapcol
){
8618 html_centered_flush_line(f
);
8619 /* fall thru to put current "word" on blank "line" */
8621 else if(HD(f
)->centered
->line
.width
){
8622 /* put space char between line and appended word */
8623 html_centered_putc(&HD(f
)->centered
->line
, ' ');
8624 HD(f
)->centered
->line
.width
++;
8627 for(i
= 0; i
< HD(f
)->centered
->word
.used
; i
++)
8628 html_centered_putc(&HD(f
)->centered
->line
,
8629 HD(f
)->centered
->word
.buf
[i
]);
8631 HD(f
)->centered
->line
.width
+= HD(f
)->centered
->word
.width
;
8632 HD(f
)->centered
->word
.used
= 0;
8633 HD(f
)->centered
->word
.width
= 0;
8640 /* ch is start of next word */
8641 HD(f
)->centered
->space
= 0;
8642 if(HD(f
)->centered
->word
.width
>= WRAP_COLS(f
))
8643 html_centered_flush(f
);
8645 html_centered_putc(&HD(f
)->centered
->word
, ch
);
8654 HD(f
)->centered
->word
.width
+= cwidth
;
8661 * html_centered_putc -- add given char to given WRAPLINE_S
8664 html_centered_putc(WRAPLINE_S
*wp
, int ch
)
8666 if(wp
->used
+ 1 >= wp
->len
){
8668 fs_resize((void **) &wp
->buf
, wp
->len
* sizeof(char));
8671 wp
->buf
[wp
->used
++] = ch
;
8677 * html_centered_flush - finish writing any pending centered output
8680 html_centered_flush(FILTER_S
*f
)
8685 * If word present (what about line?) we need to deal with
8688 if(HD(f
)->centered
->word
.width
&& WRAPPED_LEN(f
) > HD(f
)->wrapcol
)
8689 html_centered_flush_line(f
);
8692 /* figure out how much to indent */
8693 if((i
= (WRAP_COLS(f
) - WRAPPED_LEN(f
))/2) > 0)
8694 html_write_indent(f
, i
);
8696 if(HD(f
)->centered
->anchor
)
8697 html_write_anchor(f
, HD(f
)->centered
->anchor
);
8699 html_centered_handle(&HD(f
)->centered
->anchor
,
8700 HD(f
)->centered
->line
.buf
,
8701 HD(f
)->centered
->line
.used
);
8702 html_write(f
, HD(f
)->centered
->line
.buf
, HD(f
)->centered
->line
.used
);
8704 if(HD(f
)->centered
->word
.used
){
8705 if(HD(f
)->centered
->line
.width
)
8708 html_centered_handle(&HD(f
)->centered
->anchor
,
8709 HD(f
)->centered
->word
.buf
,
8710 HD(f
)->centered
->word
.used
);
8711 html_write(f
, HD(f
)->centered
->word
.buf
,
8712 HD(f
)->centered
->word
.used
);
8715 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8716 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8719 if(HD(f
)->centered
->word
.used
){
8720 html_write(f
, HD(f
)->centered
->word
.buf
,
8721 HD(f
)->centered
->word
.used
);
8722 HD(f
)->centered
->line
.used
= HD(f
)->centered
->word
.used
= 0;
8723 HD(f
)->centered
->line
.width
= HD(f
)->centered
->word
.width
= 0;
8725 HD(f
)->blanks
++; /* advance the blank line counter */
8728 html_newline(f
); /* finish the line */
8733 * html_centered_handle - scan the line for embedded handles
8736 html_centered_handle(int *h
, char *line
, int len
)
8741 if(*line
++ == TAG_EMBED
&& len
-- > 0)
8744 if((n
= *line
++) >= --len
){
8748 *h
= (*h
* 10) + (*line
++ - '0');
8752 case TAG_HANDLEOFF
:
8754 *h
= 0; /* assumption 23,342: inverse off ends tags */
8765 * html_centered_flush_line - flush the centered "line" only
8768 html_centered_flush_line(FILTER_S
*f
)
8770 if(HD(f
)->centered
->line
.used
){
8773 /* hide "word" from flush */
8774 i
= HD(f
)->centered
->word
.used
;
8775 j
= HD(f
)->centered
->word
.width
;
8776 HD(f
)->centered
->word
.used
= 0;
8777 HD(f
)->centered
->word
.width
= 0;
8778 html_centered_flush(f
);
8780 HD(f
)->centered
->word
.used
= i
;
8781 HD(f
)->centered
->word
.width
= j
;
8787 * html_write_indent - write indention mindful of display attributes
8790 html_write_indent(FILTER_S
*f
, int indent
)
8794 html_putc(f
, TAG_EMBED
);
8795 html_putc(f
, TAG_BOLDOFF
);
8799 html_putc(f
, TAG_EMBED
);
8800 html_putc(f
, TAG_ULINEOFF
);
8806 html_putc(f
, ' '); /* indent as needed */
8809 * Resume any previous embedded state
8813 html_putc(f
, TAG_EMBED
);
8814 html_putc(f
, TAG_BOLDON
);
8818 html_putc(f
, TAG_EMBED
);
8819 html_putc(f
, TAG_ULINEON
);
8829 html_write_anchor(FILTER_S
*f
, int anchor
)
8834 html_putc(f
, TAG_EMBED
);
8835 html_putc(f
, TAG_HANDLE
);
8836 snprintf(buf
, sizeof(buf
), "%d", anchor
);
8837 html_putc(f
, (int) strlen(buf
));
8839 for(i
= 0; buf
[i
]; i
++)
8840 html_putc(f
, buf
[i
]);
8845 * html_write_newline - write a newline mindful of display attributes
8848 html_write_newline(FILTER_S
*f
)
8852 if(! STRIP(f
)){ /* First tie, off any embedded state */
8853 if(HD(f
)->in_anchor
){
8854 html_putc(f
, TAG_EMBED
);
8855 html_putc(f
, TAG_INVOFF
);
8859 html_putc(f
, TAG_EMBED
);
8860 html_putc(f
, TAG_BOLDOFF
);
8864 html_putc(f
, TAG_EMBED
);
8865 html_putc(f
, TAG_ULINEOFF
);
8868 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8872 p
= color_embed(ps_global
->VAR_NORM_FORE_COLOR
,
8873 ps_global
->VAR_NORM_BACK_COLOR
);
8874 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8879 html_write(f
, "\015\012", 2);
8880 for(i
= HTML_INDENT(f
); i
> 0; i
--)
8883 if(! STRIP(f
)){ /* First tie, off any embedded state */
8885 html_putc(f
, TAG_EMBED
);
8886 html_putc(f
, TAG_BOLDON
);
8890 html_putc(f
, TAG_EMBED
);
8891 html_putc(f
, TAG_ULINEON
);
8894 if(HD(f
)->color
&& (HD(f
)->color
->fg
[0] || HD(f
)->color
->bg
[0])){
8895 char *p
, *tfg
, *tbg
;
8899 tfg
= HD(f
)->color
->fg
;
8900 tbg
= HD(f
)->color
->bg
;
8901 tmp
= new_color_pair(tfg
[0] ? tfg
8902 : color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
),
8904 : color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
));
8905 if(pico_is_good_colorpair(tmp
)){
8906 p
= color_embed(tfg
[0] ? tfg
8907 : ps_global
->VAR_NORM_FORE_COLOR
,
8909 : ps_global
->VAR_NORM_BACK_COLOR
);
8910 for(i
= 0; i
< 2 * (RGBLEN
+ 2); i
++)
8915 free_color_pair(&tmp
);
8922 * html_write - write given n-length string to next filter
8925 html_write(FILTER_S
*f
, char *s
, int n
)
8927 GF_INIT(f
, f
->next
);
8930 /* keep track of attribute state? Not if last char! */
8931 if(!STRIP(f
) && *s
== TAG_EMBED
&& n
-- > 0){
8932 GF_PUTC(f
->next
, TAG_EMBED
);
8946 case TAG_HANDLEOFF
:
8947 HD(f
)->in_anchor
= 0;
8948 GF_PUTC(f
->next
, TAG_INVOFF
);
8955 GF_PUTC(f
->next
, TAG_HANDLE
);
8961 GF_PUTC(f
->next
, i
);
8963 anum
= (anum
* 10) + (*++s
- '0');
8965 GF_PUTC(f
->next
, *s
);
8971 && (h
= get_handle(*HANDLESP(f
), anum
)) != NULL
8972 && (h
->type
== URL
|| h
->type
== Attach
)){
8973 HD(f
)->in_anchor
= anum
;
8984 GF_PUTC(f
->next
, (*s
++) & 0xff);
8987 GF_IP_END(f
->next
); /* clean up next's input pointers */
8992 * html_putc -- actual work of writing to next filter.
8993 * NOTE: Small opt not using full GF_END since our input
8994 * pointers don't need adjusting.
8997 html_putc(FILTER_S
*f
, int ch
)
8999 GF_INIT(f
, f
->next
);
9000 GF_PUTC(f
->next
, ch
& 0xff);
9001 GF_IP_END(f
->next
); /* clean up next's input pointers */
9007 * Only current option is to turn on embedded data stripping for text
9008 * bound to a printer or composer.
9011 gf_html2plain_opt(char *base
,
9014 HANDLE_S
**handlesp
,
9019 int margin_l
, margin_r
;
9021 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
9023 op
->base
= cpystr(base
);
9024 margin_l
= (margin
) ? margin
[0] : 0;
9025 margin_r
= (margin
) ? margin
[1] : 0;
9026 op
->indent
= margin_l
;
9027 op
->columns
= columns
- (margin_l
+ margin_r
);
9028 op
->strip
= ((flags
& GFHP_STRIPPED
) == GFHP_STRIPPED
);
9029 op
->handlesp
= handlesp
;
9030 op
->handles_loc
= ((flags
& GFHP_LOCAL_HANDLES
) == GFHP_LOCAL_HANDLES
);
9031 op
->showserver
= ((flags
& GFHP_SHOW_SERVER
) == GFHP_SHOW_SERVER
);
9032 op
->warnrisk_f
= risk_f
;
9033 op
->no_relative_links
= ((flags
& GFHP_NO_RELATIVE
) == GFHP_NO_RELATIVE
);
9034 op
->related_content
= ((flags
& GFHP_RELATED_CONTENT
) == GFHP_RELATED_CONTENT
);
9035 op
->html
= ((flags
& GFHP_HTML
) == GFHP_HTML
);
9036 op
->html_imgs
= ((flags
& GFHP_HTML_IMAGES
) == GFHP_HTML_IMAGES
);
9037 op
->element_table
= html_element_table
;
9038 return((void *) op
);
9043 gf_html2plain_rss_opt(RSS_FEED_S
**feedp
, int flags
)
9047 op
= (HTML_OPT_S
*) fs_get(sizeof(HTML_OPT_S
));
9048 memset(op
, 0, sizeof(HTML_OPT_S
));
9050 op
->base
= cpystr("");
9051 op
->element_table
= rss_element_table
;
9052 *(op
->feedp
= feedp
) = NULL
;
9053 return((void *) op
);
9057 gf_html2plain_rss_free(RSS_FEED_S
**feedp
)
9059 if(feedp
&& *feedp
){
9061 fs_give((void **) &(*feedp
)->title
);
9064 fs_give((void **) &(*feedp
)->link
);
9066 if((*feedp
)->description
)
9067 fs_give((void **) &(*feedp
)->description
);
9069 if((*feedp
)->source
)
9070 fs_give((void **) &(*feedp
)->source
);
9073 fs_give((void **) &(*feedp
)->image
);
9075 gf_html2plain_rss_free_items(&((*feedp
)->items
));
9076 fs_give((void **) feedp
);
9081 gf_html2plain_rss_free_items(RSS_ITEM_S
**itemp
)
9083 if(itemp
&& *itemp
){
9085 fs_give((void **) &(*itemp
)->title
);
9088 fs_give((void **) &(*itemp
)->link
);
9090 if((*itemp
)->description
)
9091 fs_give((void **) &(*itemp
)->description
);
9093 if((*itemp
)->source
)
9094 fs_give((void **) &(*itemp
)->source
);
9096 gf_html2plain_rss_free_items(&(*itemp
)->next
);
9097 fs_give((void **) itemp
);
9101 #define CID_NONE 0x00
9102 #define CID_DATA 0x01
9103 #define IMG_DATA 0x10
9106 img_tempfile_name(char *line
, long n
, int *flagp
)
9110 char *s
, *t
= NULL
, c
;
9119 if (line
[0] == '\"')
9121 if (n
- f2
> 4 && !struncmp(line
+f2
, "data:", 5))
9123 else if (n
- f2
> 3){
9124 if (!struncmp(line
+f2
, "cid:", 4)){
9127 s
= fs_get((n
- f2
+ 4)*sizeof(char));
9128 sprintf(s
, "<%s", line
+f2
);
9129 if (s
[strlen(s
)-1] == '\"')
9130 s
[strlen(s
)-1] = '>';
9136 /* find the tmpdir where all these files will be saved to */
9138 for(i
= 0; ps_global
->atmts
[i
].tmpdir
== NULL
&& ps_global
->atmts
[i
].description
!= NULL
; i
++);
9139 t
= ps_global
->atmts
[i
].description
? ps_global
->atmts
[i
].tmpdir
: NULL
;
9142 /* now we need to look for s in the list of attachments */
9143 for (i
= 0, found
= 0; found
== 0 && ps_global
->atmts
[i
].description
!= NULL
; i
++)
9144 if (ps_global
->atmts
[i
].body
9145 && ps_global
->atmts
[i
].body
->type
== TYPEIMAGE
9146 && strcmp(ps_global
->atmts
[i
].body
->id
, s
) == 0){
9151 fs_give((void **) &s
);
9152 if(found
&& ps_global
->atmts
[i
].cid_tmpfile
== NULL
){
9154 if (ps_global
->atmts
[i
].cid_tmpfile
== NULL
){
9155 for(param
= ps_global
->atmts
[i
].body
->parameter
; param
; param
= param
->next
){
9156 if (!strucmp(param
->attribute
, "NAME")){
9157 strncpy(imgfile
, param
->value
, sizeof(imgfile
));
9158 imgfile
[sizeof(imgfile
)-1] = '\0';
9159 extp
= strrchr(imgfile
, '.');
9163 ps_global
->atmts
[i
].cid_tmpfile
= temp_nam_ext(t
, "tmp-img-", extp
);
9166 if(found
&& ps_global
->atmts
[i
].cid_tmpfile
!= NULL
)
9167 s
= strstr(ps_global
->atmts
[i
].cid_tmpfile
, "tmp-img-");
9175 #define COLLECT(X, C) { \
9176 if((X)->n == buflen){ \
9177 fs_resize((void **) &((X)->line), buflen + 1024); \
9178 (X)->linep = (X)->line + buflen; \
9181 *((X)->linep)++ = (C); \
9182 (X)->n = (X)->linep - (X)->line; \
9185 #define RESET_FILTER(X) { \
9186 (X)->linep = (X)->line; \
9191 gf_html_cid2file(FILTER_S
*f
, int cmd
)
9194 register unsigned char c
;
9195 static long buflen
= 0L;
9197 GF_INIT(f
, f
->next
);
9200 register int state
= f
->f1
;
9202 while(GF_GETC(f
, c
)){
9204 if(state
== 0){ /* look for "<img " */
9205 if (c
== '<') f
->f2
= 1;
9207 if (f
->f2
== 1 && (c
== 'i' || c
== 'I')) f
->f2
= 2;
9208 else if (f
->f2
== 2 && (c
== 'm' || c
== 'M')) f
->f2
= 3;
9209 else if (f
->f2
== 3 && (c
== 'g' || c
== 'G')) f
->f2
= 4;
9210 else if (f
->f2
== 4 && ASCII_ISSPACE(c
)){ f
->f2
= 0; state
= 1; }
9214 else if(state
== 1){ /* look for "src=" */
9215 if (c
== 's' || c
== 'S') f
->f2
= 1;
9216 else if (f
->f2
== 1 && (c
== 'r' || c
== 'R')) f
->f2
= 2;
9217 else if (f
->f2
== 2 && (c
== 'c' || c
== 'C')) f
->f2
= 3;
9218 else if (f
->f2
== 3 && c
== '='){ GF_PUTC(f
->next
, c
); state
= 2; }
9219 else if (f
->f2
== 3 && !ASCII_ISSPACE(c
)) f
->f2
= 0;
9222 else if (state
== 2){ /* collect all data */
9223 if(ASCII_ISSPACE(c
) || c
== '>'){
9226 char *s
= img_tempfile_name(f
->line
, f
->n
, &flag
);
9227 if(flag
& CID_DATA
){
9230 for(; *s
!= '\0'; s
++)
9234 GF_PUTC(f
->next
, '\"');
9235 if((flag
& (CID_DATA
| IMG_DATA
)) || f
->t
){
9236 for(p
= f
->line
; f
->n
; f
->n
--, p
++){
9237 if(*p
== '\"') continue;
9238 GF_PUTC(f
->next
, *p
);
9242 GF_PUTC(f
->next
, '\"');
9243 /* no need to write "c" right now to the stream. It will be written below */
9244 state
= ASCII_ISSPACE(c
) ? 1 : 0;
9247 else COLLECT(f
, c
); /* collect this data */
9252 GF_PUTC(f
->next
, c
);
9258 else if(cmd
== GF_EOD
){
9260 char *s
= img_tempfile_name(f
->line
, f
->n
, &f
->f2
);
9261 GF_PUTC(f
->next
, '\"');
9263 for(p
= s
; *p
; p
++){
9264 if(*p
== '\"') continue;
9265 GF_PUTC(f
->next
, *p
);
9268 GF_PUTC(f
->next
, '\"');
9269 GF_PUTC(f
->next
, '>');
9273 fs_give((void **)&(f
->line
)); /* free temp line buffer */
9274 (void) GF_FLUSH(f
->next
);
9275 (*f
->next
->f
)(f
->next
, GF_EOD
);
9277 else if(cmd
== GF_RESET
){
9278 dprint((9, "-- gf_reset cid2file\n"));
9279 f
->n
= 0L; /* number of bytes in buffer */
9280 f
->f1
= 0; /* state */
9281 f
->f2
= 0; /* total number of bytes read that match pattern */
9282 f
->t
= *(char *)f
->opt
;
9286 /* END OF HTML-TO-PLAIN text filter */
9289 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9290 * from the text stream.
9293 #define MAX_ESC_LEN 5
9296 * the simple filter, removes unknown escape codes from the stream
9299 gf_escape_filter(FILTER_S
*f
, int flg
)
9302 GF_INIT(f
, f
->next
);
9305 register unsigned char c
;
9306 register int state
= f
->f1
;
9308 while(GF_GETC(f
, c
)){
9311 if(c
== '\033' || f
->n
== MAX_ESC_LEN
){
9312 f
->line
[f
->n
] = '\0';
9314 if(!match_escapes(f
->line
)){
9315 GF_PUTC(f
->next
, '^');
9316 GF_PUTC(f
->next
, '[');
9319 GF_PUTC(f
->next
, '\033');
9323 GF_PUTC(f
->next
, *p
++);
9328 state
= 0; /* fall thru */
9331 f
->line
[f
->n
++] = c
; /* collect */
9339 GF_PUTC(f
->next
, c
);
9345 else if(flg
== GF_EOD
){
9347 if(!match_escapes(f
->line
)){
9348 GF_PUTC(f
->next
, '^');
9349 GF_PUTC(f
->next
, '[');
9352 GF_PUTC(f
->next
, '\033');
9355 for(p
= f
->line
; f
->n
; f
->n
--, p
++)
9356 GF_PUTC(f
->next
, *p
);
9358 fs_give((void **)&(f
->line
)); /* free temp line buffer */
9359 (void) GF_FLUSH(f
->next
);
9360 (*f
->next
->f
)(f
->next
, GF_EOD
);
9362 else if(flg
== GF_RESET
){
9363 dprint((9, "-- gf_reset escape\n"));
9366 f
->linep
= f
->line
= (char *)fs_get((MAX_ESC_LEN
+ 1) * sizeof(char));
9373 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9374 * corresponding string representations (you know, ^blah and such)...
9378 * the simple filter transforms unknown control characters in the stream
9379 * into harmless strings.
9382 gf_control_filter(FILTER_S
*f
, int flg
)
9384 GF_INIT(f
, f
->next
);
9387 register unsigned char c
;
9388 register int filt_only_c0
;
9390 filt_only_c0
= f
->opt
? (*(int *) f
->opt
) : 0;
9392 while(GF_GETC(f
, c
)){
9394 if(((c
< 0x20 || c
== 0x7f)
9395 || (c
>= 0x80 && c
< 0xA0 && !filt_only_c0
))
9396 && !(ASCII_ISSPACE((unsigned char) c
)
9397 || c
== '\016' || c
== '\017' || c
== '\033')){
9398 GF_PUTC(f
->next
, c
>= 0x80 ? '~' : '^');
9399 GF_PUTC(f
->next
, (c
== 0x7f) ? '?' : (c
& 0x1f) + '@');
9402 GF_PUTC(f
->next
, c
);
9407 else if(flg
== GF_EOD
){
9408 (void) GF_FLUSH(f
->next
);
9409 (*f
->next
->f
)(f
->next
, GF_EOD
);
9415 * function called from the outside to set
9416 * control filter's option, which says to filter C0 control characters
9417 * but not C1 control chars. We don't call it at all if we don't want
9418 * to filter C0 chars either.
9421 gf_control_filter_opt(int *filt_only_c0
)
9423 return((void *) filt_only_c0
);
9428 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9429 * This prevents the possibility of embedding other tags.
9430 * We assume that this filter should only be used for something
9431 * that is eventually writing to a display, which has the special
9432 * knowledge of quoted TAG_EMBEDs.
9435 gf_tag_filter(FILTER_S
*f
, int flg
)
9437 GF_INIT(f
, f
->next
);
9440 register unsigned char c
;
9442 while(GF_GETC(f
, c
)){
9444 if((c
& 0xff) == (TAG_EMBED
& 0xff)){
9445 GF_PUTC(f
->next
, TAG_EMBED
);
9446 GF_PUTC(f
->next
, c
);
9449 GF_PUTC(f
->next
, c
);
9454 else if(flg
== GF_EOD
){
9455 (void) GF_FLUSH(f
->next
);
9456 (*f
->next
->f
)(f
->next
, GF_EOD
);
9462 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9463 * specified line width
9467 typedef struct wrap_col_s
{
9472 unsigned do_indent
:1;
9473 unsigned on_comma
:1;
9479 unsigned leave_flowed
:1;
9480 unsigned use_color
:1;
9481 unsigned hdr_color
:1;
9482 unsigned for_compose
:1;
9483 unsigned handle_soft_hyphen
:1;
9484 unsigned saw_soft_hyphen
:1;
9485 unsigned trailing_space
:1;
9486 unsigned char utf8buf
[7];
9487 unsigned char *utf8bufp
;
9508 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9509 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9510 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9511 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9512 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9513 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9514 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9515 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9516 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9517 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9518 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9519 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9520 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9521 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9522 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9523 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9524 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9525 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9526 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9527 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9528 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9529 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9530 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9531 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9532 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9533 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9534 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9535 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9536 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9537 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9538 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9539 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9540 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9541 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9542 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9543 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9544 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9545 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9546 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9547 #define WRAP_PUTC(F,C,W) { \
9548 if((F)->linep == WRAP_LASTC(F)){ \
9549 size_t offset = (F)->linep - (F)->line; \
9550 fs_resize((void **) &(F)->line, \
9551 (2 * offset) * sizeof(char)); \
9552 (F)->linep = &(F)->line[offset]; \
9553 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9555 *(F)->linep++ = (C); \
9559 #define WRAP_EMBED_PUTC(F,C) { \
9561 WRAP_PUTC((F), C, 0); \
9564 so_writec(C, WRAP_SPACES(F)); \
9567 #define WRAP_COLOR_UNSET(F) { \
9568 if(WRAP_COLOR_SET(F)){ \
9569 WRAP_COLOR(F)->fg[0] = '\0'; \
9574 * wrap_flush_embed flags
9576 #define WFE_NONE 0 /* Nothing special */
9577 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9580 int wrap_flush(FILTER_S
*, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9581 int wrap_flush_embed(FILTER_S
*, unsigned char **, unsigned char **,
9582 unsigned char **, unsigned char **);
9583 int wrap_flush_s(FILTER_S
*,char *, int, int, unsigned char **, unsigned char **,
9584 unsigned char **, unsigned char **, int);
9585 int wrap_eol(FILTER_S
*, int, unsigned char **, unsigned char **,
9586 unsigned char **, unsigned char **);
9587 int wrap_bol(FILTER_S
*, int, int, unsigned char **,
9588 unsigned char **, unsigned char **, unsigned char **);
9589 int wrap_quote_insert(FILTER_S
*, unsigned char **, unsigned char **,
9590 unsigned char **, unsigned char **);
9593 * the no longer simple filter, breaks lines at end of white space nearest
9594 * to global "gf_wrap_width" in length
9595 * It also supports margins, indents (inverse indenting, really) and
9596 * flowed text (ala RFC 3676)
9600 gf_wrap(FILTER_S
*f
, int flg
)
9603 GF_INIT(f
, f
->next
);
9607 * f->line buffer where next "word" being considered is stored
9608 * f->f2 width in screen cells of f->line stuff
9609 * f->n width in screen cells of the part of this line committed to next
9614 register unsigned char c
;
9615 register int state
= f
->f1
;
9616 int width
, full_character
;
9618 while(GF_GETC(f
, c
)){
9621 case CCR
: /* CRLF or CR in text ? */
9622 state
= BOL
; /* either way, handle start */
9626 if(f
->f2
== 0 && WRAP_SPC_LEN(f
) && WRAP_TRL_SPC(f
)){
9628 * whack trailing space char, but be aware
9629 * of embeds in space buffer. grok them just
9630 * in case they contain a 0x20 value
9633 char *sb
, *sbp
, *scp
= NULL
;
9636 for(sb
= sbp
= (char *)so_text(WRAP_SPACES(f
)); *sbp
; sbp
++){
9647 if(strlen(sbp
) >= x
)
9654 if(strlen(sbp
) >= RGBLEN
)
9655 sbp
+= (RGBLEN
- 1);
9670 /* replace space buf without trailing space char */
9672 STORE_S
*ns
= so_get(CharStar
, NULL
, EDIT_ACCESS
);
9676 WRAP_TRL_SPC(f
) = 0;
9681 so_give(&WRAP_SPACES(f
));
9682 WRAP_SPACES(f
) = ns
;
9686 else{ /* fixed line */
9688 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9689 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9692 * When we get to a real end of line, we don't need to
9693 * remember what the special color was anymore because
9694 * we aren't going to be changing back to it. We unset it
9695 * so that we don't keep resetting the color to normal.
9697 WRAP_COLOR_UNSET(f
);
9700 if(c
== '\012'){ /* get c following LF */
9703 /* else c is first char of new line, fall thru */
9706 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9707 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9708 WRAP_COLOR_UNSET(f
); /* see note above */
9712 /* else fall thru to deal with beginning of line */
9718 WRAP_FL_QC(f
) = 1; /* init it */
9719 state
= FL_QLEV
; /* go collect it */
9722 /* if EMBEDed, process it and return here */
9723 if(c
== (unsigned char) TAG_EMBED
){
9724 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9725 WRAP_STATE(f
) = state
;
9730 /* quote level change implies new paragraph */
9733 if(WRAP_HARD(f
) == 0){
9735 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9736 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9737 WRAP_COLOR_UNSET(f
); /* see note above */
9742 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
,
9743 &eob
); /* write quoting prefix */
9748 case '\015' : /* a blank line? */
9749 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9750 state
= CCR
; /* go collect it */
9753 case ' ' : /* space stuffed */
9754 state
= FL_STF
; /* just eat it */
9757 case '-' : /* possible sig-dash */
9758 WRAP_FL_SIG(f
) = 1; /* init state */
9759 state
= FL_SIG
; /* go collect it */
9763 state
= DFL
; /* go back to normal */
9764 goto case_dfl
; /* handle c like DFL case */
9770 if(WRAP_COMMA(f
) && c
== TAB
){
9771 wrap_bol(f
, 1, 0, &ip
, &eib
, &op
,
9772 &eob
); /* convert to normal indent */
9776 wrap_bol(f
,0,0, &ip
, &eib
, &op
, &eob
);
9777 goto case_dfl
; /* handle c like DFL case */
9783 if(c
== '>'){ /* another level */
9787 /* if EMBEDed, process it and return here */
9788 if(c
== (unsigned char) TAG_EMBED
){
9789 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9790 WRAP_STATE(f
) = state
;
9795 /* quote level change signals new paragraph */
9796 if(WRAP_FL_QC(f
) != WRAP_FL_QD(f
)){
9797 WRAP_FL_QD(f
) = WRAP_FL_QC(f
);
9798 if(WRAP_HARD(f
) == 0){ /* add hard newline */
9799 WRAP_HARD(f
) = 1; /* hard newline */
9800 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9801 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9802 WRAP_COLOR_UNSET(f
); /* see note above */
9807 wrap_bol(f
,0,1, &ip
, &eib
, &op
, &eob
);
9812 case '\015' : /* a blank line? */
9813 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9814 state
= CCR
; /* go collect it */
9817 case ' ' : /* space-stuffed! */
9818 state
= FL_STF
; /* just eat it */
9821 case '-' : /* sig dash? */
9826 default : /* something else */
9828 goto case_dfl
; /* handle c like DFL */
9834 case FL_STF
: /* space stuffed */
9836 case '\015' : /* a blank line? */
9837 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9838 state
= CCR
; /* go collect it */
9841 case (unsigned char) TAG_EMBED
: /* process TAG data */
9842 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9843 WRAP_STATE(f
) = state
; /* and return */
9847 case '-' : /* sig dash? */
9853 default : /* something else */
9855 goto case_dfl
; /* handle c like DFL */
9860 case FL_SIG
: /* sig-dash collector */
9861 switch (WRAP_FL_SIG(f
)){ /* possible sig-dash? */
9863 if(c
!= '-'){ /* not a sigdash */
9864 if((f
->n
+ WRAP_SPC_LEN(f
) + 1) > WRAP_COL(f
)){
9865 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9866 &eob
); /* note any embedded*/
9867 wrap_eol(f
, 1, &ip
, &eib
,
9868 &op
, &eob
); /* plunk down newline */
9869 wrap_bol(f
, 1, 1, &ip
, &eib
,
9870 &op
, &eob
); /* write any prefix */
9873 WRAP_PUTC(f
,'-', 1); /* write what we got */
9880 /* don't put anything yet until we know to wrap or not */
9885 if(c
!= ' '){ /* not a sigdash */
9886 WRAP_PUTC(f
, '-', 1);
9887 if((f
->n
+ WRAP_SPC_LEN(f
) + 2) > WRAP_COL(f
)){
9888 wrap_flush_embed(f
, &ip
, &eib
, &op
,
9889 &eob
); /* note any embedded*/
9890 wrap_eol(f
, 1, &ip
, &eib
,
9891 &op
, &eob
); /* plunk down newline */
9892 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
9893 &eob
); /* write any prefix */
9896 WRAP_PUTC(f
,'-', 1); /* write what we got */
9903 /* don't put anything yet until we know to wrap or not */
9908 if(c
== '\015'){ /* success! */
9909 /* known sigdash, newline if soft nl */
9910 if(WRAP_SPC_LEN(f
)){
9911 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9912 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9913 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9923 WRAP_FL_SIG(f
) = 4; /* possible success */
9928 case (unsigned char) TAG_EMBED
:
9930 * At this point we're almost 100% sure that we've got
9931 * a sigdash. Putc it (adding newline if previous
9932 * was a soft nl) so we get it the right color
9933 * before we store this new embedded stuff
9935 if(WRAP_SPC_LEN(f
)){
9936 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9937 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9938 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9947 case '\015' : /* success! */
9949 * We shouldn't get here, but in case we do, we have
9950 * not yet put the sigdash
9952 if(WRAP_SPC_LEN(f
)){
9953 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9954 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
);
9955 wrap_bol(f
, 0, 1, &ip
, &eib
, &op
, &eob
);
9964 default : /* that's no sigdash! */
9965 /* write what we got but didn't put yet */
9966 WRAP_PUTC(f
,'-', 1);
9967 WRAP_PUTC(f
,'-', 1);
9968 WRAP_PUTC(f
,' ', 1);
9971 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
9972 WRAP_SPC_LEN(f
) = 1;
9973 state
= DFL
; /* set normal state */
9974 goto case_dfl
; /* and go do "c" */
9980 WRAP_STATE(f
) = FL_SIG
; /* come back here */
9981 WRAP_FL_SIG(f
) = 6; /* and seek EOL */
9982 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
9983 state
= TAG
; /* process embed */
9988 * at this point we've already putc the sigdash in case 4
9991 case (unsigned char) TAG_EMBED
:
9995 case '\015' : /* success! */
9999 default : /* that's no sigdash! */
10001 * probably never reached (fake sigdash with embedded
10002 * stuff) but if this did get reached, then we
10003 * might have accidentally disobeyed a soft nl
10005 WRAP_FL_SIG(f
) = 0;
10006 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10007 WRAP_SPC_LEN(f
) = 1;
10008 state
= DFL
; /* set normal state */
10009 goto case_dfl
; /* and go do "c" */
10016 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
10018 WRAP_FL_SIG(f
) = 0;
10019 state
= DFL
; /* set normal state */
10020 goto case_dfl
; /* and go process "c" */
10028 * This was just if(WRAP_SPEC(f, c)) before the change to add
10029 * the == 0 test. This isn't quite right, either. We should really
10030 * be looking for special characters in the UCS characters, not
10031 * in the incoming stream of UTF-8. It is not right to
10032 * call this on bytes that are in the middle of a UTF-8 character,
10033 * hence the == 0 test which restricts it to the first byte
10034 * of a character. This isn't right, either, but it's closer.
10035 * Also change the definition of WRAP_SPEC so that isspace only
10036 * matches ascii characters, which will never be in the middle
10037 * of a UTF-8 multi-byte character.
10039 if((WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 0 && WRAP_SPEC(f
, c
)){
10040 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
10046 if(f
->f2
){ /* any non-lwsp to flush? */
10048 /* remember our second best break point */
10049 WRAP_PB_OFF(f
) = f
->linep
- f
->line
;
10050 WRAP_PB_LEN(f
) = f
->f2
;
10054 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10057 switch(c
){ /* remember separator */
10060 WRAP_TRL_SPC(f
) = 1;
10061 so_writec(' ',WRAP_SPACES(f
));
10066 int i
= (int) f
->n
+ WRAP_SPC_LEN(f
);
10072 so_writec(TAB
,WRAP_SPACES(f
));
10073 WRAP_TRL_SPC(f
) = 0;
10078 default : /* some control char? */
10079 WRAP_SPC_LEN(f
) += 2;
10080 WRAP_TRL_SPC(f
) = 0;
10087 WRAP_QUOTED(f
) = !WRAP_QUOTED(f
);
10090 case '\015' : /* already has newline? */
10094 case '\012' : /* bare LF in text? */
10095 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* they must've */
10096 wrap_eol(f
, 0, &ip
, &eib
, &op
, &eob
); /* meant */
10097 wrap_bol(f
,1,1, &ip
, &eib
, &op
, &eob
); /* newline... */
10100 case (unsigned char) TAG_EMBED
:
10101 WRAP_EMBED_PUTC(f
, TAG_EMBED
);
10102 WRAP_STATE(f
) = state
;
10107 if(!WRAP_QUOTED(f
)){
10108 /* handle this special case in general code below */
10109 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_MAX_COL(f
)
10110 && WRAP_ALLWSP(f
) && WRAP_PB_OFF(f
))
10113 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1 > WRAP_COL(f
)){
10114 if(WRAP_ALLWSP(f
)) /* if anything visible */
10115 wrap_flush(f
, &ip
, &eib
, &op
,
10116 &eob
); /* ... blat buf'd chars */
10118 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10119 &eob
); /* plunk down newline */
10120 wrap_bol(f
, 1, 1, &ip
, &eib
, &op
,
10121 &eob
); /* write any prefix */
10124 WRAP_PUTC(f
, ',', 1); /* put out comma */
10125 wrap_flush(f
, &ip
, &eib
, &op
,
10126 &eob
); /* write buf'd chars */
10133 else if(WRAP_HANDLE_SOFT_HYPHEN(f
)
10134 && (WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0)) == 1
10135 && WRAP_UTF8BUF(f
, 0) == 0xC2 && c
== 0xAD){
10137 * This is a soft hyphen. If there is enough space for
10138 * a real hyphen to fit on the line here then we can
10139 * flush everything up to before the soft hyphen,
10140 * and simply remember that we saw a soft hyphen.
10141 * If it turns out that we can't fit the next piece in
10142 * then wrap_eol will append a real hyphen to the line.
10143 * If we can fit another piece in it will be because we've
10144 * reached the next break point. At that point we'll flush
10145 * everything but won't include the unneeded hyphen. We erase
10146 * the fact that we saw this soft hyphen because it have
10147 * become irrelevant.
10149 * If the hyphen is the character that puts us over the edge
10150 * we go through the else case.
10153 /* erase this soft hyphen character from buffer */
10154 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10156 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
10157 if(f
->f2
) /* any non-lwsp to flush? */
10158 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10160 /* remember that we saw the soft hyphen */
10161 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
10165 * Everything up to the hyphen fits, otherwise it
10166 * would have already been flushed the last time
10167 * through the loop. But the hyphen won't fit. So
10168 * we need to go back to the last line break and
10169 * break there instead. Then start a new line with
10170 * the buffered up characters and the soft hyphen.
10172 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
10173 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10174 &eob
); /* plunk down newline */
10175 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10176 &eob
); /* write any prefix */
10179 * Now we're in the same situation as we would have
10180 * been above except we're on a new line. Try to
10181 * flush out the characters seen up to the hyphen.
10183 if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ 1) <= WRAP_COL(f
)){
10184 if(f
->f2
) /* any non-lwsp to flush? */
10185 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10187 /* remember that we saw the soft hyphen */
10188 WRAP_SAW_SOFT_HYPHEN(f
) = 1;
10191 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
10197 full_character
= 0;
10200 unsigned char *inputp
;
10201 unsigned long remaining_octets
;
10204 if(WRAP_UTF8BUFP(f
) < &WRAP_UTF8BUF(f
, 0) + 6){ /* always true */
10206 *WRAP_UTF8BUFP(f
)++ = c
;
10207 remaining_octets
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10208 if(remaining_octets
== 1 && isascii(WRAP_UTF8BUF(f
, 0))){
10211 int i
= (int) f
->n
;
10218 else if(c
< 0x80 && iscntrl((unsigned char) c
))
10224 inputp
= &WRAP_UTF8BUF(f
, 0);
10225 ucs
= (UCS
) utf8_get(&inputp
, &remaining_octets
);
10227 case U8G_ENDSTRG
: /* incomplete character, wait */
10228 case U8G_ENDSTRI
: /* incomplete character, wait */
10233 if(ucs
& U8G_ERROR
|| ucs
== UBOGON
){
10235 * None of these cases is supposed to happen. If it
10236 * does happen then the input stream isn't UTF-8
10237 * so something is wrong. Writechar will treat
10238 * each octet in the input buffer as a separate
10239 * error character and print a '?' for each,
10240 * so the width will be the number of octets.
10242 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10246 /* got a character */
10247 width
= wcellwidth(ucs
);
10252 * This happens when we have a UTF-8 character that
10253 * we aren't able to print in our locale. For example,
10254 * if the locale is setup with the terminal
10255 * expecting ISO-8859-1 characters then there are
10256 * lots of UTF-8 characters that can't be printed.
10257 * Print a '?' instead.
10269 * This cannot happen because an error would have
10270 * happened at least by character #6. So if we get
10271 * here there is a bug in utf8_get().
10273 if(WRAP_UTF8BUFP(f
) == &WRAP_UTF8BUF(f
, 0) + 6){
10274 *WRAP_UTF8BUFP(f
)++ = c
;
10278 * We could possibly do some more sophisticated
10279 * resynchronization here, but we aren't doing
10280 * anything in Writechar so it wouldn't match up
10281 * with that anyway. Just figure each character will
10282 * end up being printed as a ? character.
10284 width
= WRAP_UTF8BUFP(f
) - &WRAP_UTF8BUF(f
, 0);
10289 if(WRAP_ALLWSP(f
)){
10291 * Nothing is visible yet but the first word may be too long
10292 * all by itself. We need to break early.
10294 if(f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
> WRAP_MAX_COL(f
)){
10296 * A little reaching behind the curtain here.
10297 * if there's at least a preferable break point, use
10298 * it and stuff what's left back into the wrap buffer.
10299 * The "nwsp" latch is used to skip leading whitespace
10300 * The second half of the test prevents us from wrapping
10301 * at the preferred break point in the case that it
10302 * is so early in the line that it doesn't help.
10303 * That is, the width of the indent is even more than
10304 * the width of the first part before the preferred
10305 * break point. An example would be breaking after
10306 * "To:" when the indent is 4 which is > 3.
10308 if(WRAP_PB_OFF(f
) && WRAP_PB_LEN(f
) >= WRAP_INDENT(f
)){
10309 char *p1
= f
->line
+ WRAP_PB_OFF(f
);
10310 char *p2
= f
->linep
;
10312 int nwsp
= 0, left_after_wrap
;
10314 left_after_wrap
= f
->f2
- WRAP_PB_LEN(f
);
10316 f
->f2
= WRAP_PB_LEN(f
);
10319 wrap_flush(f
, &ip
, &eib
, &op
, &eob
); /* flush shortened buf */
10321 /* put back rest of characters */
10324 if(!(c2
== ' ' || c2
== '\t') || nwsp
){
10325 WRAP_PUTC(f
, c2
, 0);
10329 left_after_wrap
--; /* wrong if a tab! */
10332 f
->f2
= MAX(left_after_wrap
, 0);
10334 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10335 &eob
); /* plunk down newline */
10336 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10337 &eob
); /* write any prefix */
10341 * If we do the less preferable break point at
10342 * the space we don't want to lose the fact that
10343 * we might be able to break at this comma for
10346 if(full_character
&& c
== ','){
10347 WRAP_PUTC(f
, c
, 1);
10348 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10349 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10353 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10355 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10356 &eob
); /* plunk down newline */
10357 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10358 &eob
); /* write any prefix */
10362 else if((f
->n
+ WRAP_SPC_LEN(f
) + f
->f2
+ width
) > WRAP_COL(f
)){
10363 wrap_flush_embed(f
, &ip
, &eib
, &op
, &eob
);
10364 wrap_eol(f
, 1, &ip
, &eib
, &op
,
10365 &eob
); /* plunk down newline */
10366 wrap_bol(f
,1,1, &ip
, &eib
, &op
,
10367 &eob
); /* write any prefix */
10371 * Commit entire multibyte UTF-8 character at once
10372 * instead of writing partial characters into the
10375 if(full_character
){
10378 for(q
= &WRAP_UTF8BUF(f
, 0); q
< WRAP_UTF8BUFP(f
); q
++){
10379 WRAP_PUTC(f
, *q
, width
);
10383 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10390 WRAP_EMBED_PUTC(f
, c
);
10393 WRAP_EMBED(f
) = -1;
10399 WRAP_EMBED(f
) = RGBLEN
;
10404 state
= WRAP_STATE(f
);
10411 WRAP_EMBED_PUTC(f
, c
);
10418 WRAP_PUTC(f
, c
, 0);
10421 so_writec(c
, WRAP_SPACES(f
));
10423 if(!(WRAP_EMBED(f
) -= 1)){
10424 state
= WRAP_STATE(f
);
10432 GF_END(f
, f
->next
);
10434 else if(flg
== GF_EOD
){
10435 wrap_flush(f
, &ip
, &eib
, &op
, &eob
);
10437 free_color_pair(&WRAP_COLOR(f
));
10439 fs_give((void **) &f
->line
); /* free temp line buffer */
10440 so_give(&WRAP_SPACES(f
));
10441 fs_give((void **) &f
->opt
); /* free wrap widths struct */
10442 (void) GF_FLUSH(f
->next
);
10443 (*f
->next
->f
)(f
->next
, GF_EOD
);
10445 else if(flg
== GF_RESET
){
10446 dprint((9, "-- gf_reset wrap\n"));
10448 f
->n
= 0L; /* displayed length of line so far */
10449 f
->f2
= 0; /* displayed length of buffered chars */
10450 WRAP_HARD(f
) = 1; /* starting at beginning of line */
10451 if(! (WRAP_S
*) f
->opt
)
10452 f
->opt
= gf_wrap_filter_opt(75, 80, NULL
, 0, 0);
10454 while(WRAP_INDENT(f
) >= WRAP_MAX_COL(f
))
10455 WRAP_INDENT(f
) /= 2;
10457 f
->line
= (char *) fs_get(WRAP_MAX_COL(f
) * sizeof(char));
10458 f
->linep
= f
->line
;
10459 WRAP_LASTC(f
) = &f
->line
[WRAP_MAX_COL(f
) - 1];
10461 for(i
= 0; i
< 256; i
++)
10462 ((WRAP_S
*) f
->opt
)->special
[i
] = ((i
== '\"' && WRAP_COMMA(f
))
10465 || (i
== (unsigned char) TAG_EMBED
10467 || (i
== ',' && WRAP_COMMA(f
)
10468 && !WRAP_QUOTED(f
))
10469 || ASCII_ISSPACE(i
));
10470 WRAP_SPACES(f
) = so_get(CharStar
, NULL
, EDIT_ACCESS
);
10471 WRAP_UTF8BUFP(f
) = &WRAP_UTF8BUF(f
, 0);
10476 wrap_flush(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10477 unsigned char **opp
, unsigned char **eobp
)
10482 s
= (char *)so_text(WRAP_SPACES(f
));
10483 n
= so_tell(WRAP_SPACES(f
));
10484 so_seek(WRAP_SPACES(f
), 0L, 0);
10485 wrap_flush_s(f
, s
, n
, WRAP_SPC_LEN(f
), ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10486 so_truncate(WRAP_SPACES(f
), 0L);
10487 WRAP_SPC_LEN(f
) = 0;
10488 WRAP_TRL_SPC(f
) = 0;
10490 n
= f
->linep
- f
->line
;
10491 wrap_flush_s(f
, s
, n
, f
->f2
, ipp
, eibp
, opp
, eobp
, WFE_NONE
);
10493 f
->linep
= f
->line
;
10494 WRAP_PB_OFF(f
) = 0;
10495 WRAP_PB_LEN(f
) = 0;
10501 wrap_flush_embed(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
)
10505 s
= (char *)so_text(WRAP_SPACES(f
));
10506 n
= so_tell(WRAP_SPACES(f
));
10507 so_seek(WRAP_SPACES(f
), 0L, 0);
10508 wrap_flush_s(f
, s
, n
, 0, ipp
, eibp
, opp
, eobp
, WFE_CNT_HANDLE
);
10509 so_truncate(WRAP_SPACES(f
), 0L);
10510 WRAP_SPC_LEN(f
) = 0;
10511 WRAP_TRL_SPC(f
) = 0;
10517 wrap_flush_s(FILTER_S
*f
, char *s
, int n
, int w
, unsigned char **ipp
,
10518 unsigned char **eibp
, unsigned char **opp
, unsigned char **eobp
, int flags
)
10522 for(; n
> 0; n
--,s
++){
10523 if(*s
== TAG_EMBED
){
10527 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10528 GF_PUTC_GLO(f
->next
,TAG_BOLDON
);
10532 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10533 GF_PUTC_GLO(f
->next
,TAG_BOLDOFF
);
10537 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10538 GF_PUTC_GLO(f
->next
,TAG_ULINEON
);
10541 case TAG_ULINEOFF
:
10542 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10543 GF_PUTC_GLO(f
->next
,TAG_ULINEOFF
);
10547 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10548 GF_PUTC_GLO(f
->next
,TAG_INVOFF
);
10549 WRAP_ANCHOR(f
) = 0;
10552 if((flags
& WFE_CNT_HANDLE
) == 0)
10553 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10558 if((flags
& WFE_CNT_HANDLE
) == 0)
10559 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10564 if((flags
& WFE_CNT_HANDLE
) == 0)
10565 GF_PUTC_GLO(f
->next
, i
);
10567 WRAP_ANCHOR(f
) = 0;
10569 WRAP_ANCHOR(f
) = (WRAP_ANCHOR(f
) * 10) + (*++s
-'0');
10571 if((flags
& WFE_CNT_HANDLE
) == 0)
10572 GF_PUTC_GLO(f
->next
,*s
);
10579 if(pico_usingcolor() && n
>= RGBLEN
){
10581 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10582 GF_PUTC_GLO(f
->next
,TAG_FGCOLOR
);
10584 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10585 strncpy(WRAP_COLOR(f
)->fg
, s
+1, RGBLEN
);
10586 WRAP_COLOR(f
)->fg
[RGBLEN
]='\0';
10590 GF_PUTC_GLO(f
->next
,
10595 if(pico_usingcolor() && n
>= RGBLEN
){
10597 GF_PUTC_GLO(f
->next
,TAG_EMBED
);
10598 GF_PUTC_GLO(f
->next
,TAG_BGCOLOR
);
10600 WRAP_COLOR(f
)=new_color_pair(NULL
,NULL
);
10601 strncpy(WRAP_COLOR(f
)->bg
, s
+1, RGBLEN
);
10602 WRAP_COLOR(f
)->bg
[RGBLEN
]='\0';
10606 GF_PUTC_GLO(f
->next
,
10617 if(f
->n
<= WRAP_MAX_COL(f
)){
10618 GF_PUTC_GLO(f
->next
, (*s
) & 0xff);
10621 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s
) & 0xff));
10624 WRAP_ALLWSP(f
) = 0;
10632 wrap_eol(FILTER_S
*f
, int c
, unsigned char **ipp
, unsigned char **eibp
,
10633 unsigned char **opp
, unsigned char **eobp
)
10635 if(WRAP_SAW_SOFT_HYPHEN(f
)){
10636 WRAP_SAW_SOFT_HYPHEN(f
) = 0;
10637 GF_PUTC_GLO(f
->next
, '-'); /* real hyphen */
10640 if(c
&& WRAP_LV_FLD(f
))
10641 GF_PUTC_GLO(f
->next
, ' ');
10644 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10645 GF_PUTC_GLO(f
->next
, TAG_BOLDOFF
);
10649 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10650 GF_PUTC_GLO(f
->next
, TAG_ULINEOFF
);
10653 if(WRAP_INVERSE(f
) || WRAP_ANCHOR(f
)){
10654 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10655 GF_PUTC_GLO(f
->next
, TAG_INVOFF
);
10658 if(WRAP_COLOR_SET(f
)){
10661 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10662 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10663 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_FORE_COLOR
), sizeof(cb
));
10664 cb
[sizeof(cb
)-1] = '\0';
10667 GF_PUTC_GLO(f
->next
, *p
);
10668 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10669 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10670 strncpy(cb
, color_to_asciirgb(ps_global
->VAR_NORM_BACK_COLOR
), sizeof(cb
));
10671 cb
[sizeof(cb
)-1] = '\0';
10674 GF_PUTC_GLO(f
->next
, *p
);
10677 GF_PUTC_GLO(f
->next
, '\015');
10678 GF_PUTC_GLO(f
->next
, '\012');
10680 so_truncate(WRAP_SPACES(f
), 0L);
10681 WRAP_SPC_LEN(f
) = 0;
10682 WRAP_TRL_SPC(f
) = 0;
10688 wrap_bol(FILTER_S
*f
, int ivar
, int q
, unsigned char **ipp
, unsigned char **eibp
,
10689 unsigned char **opp
, unsigned char **eobp
)
10691 int n
= WRAP_MARG_L(f
) + (ivar
? WRAP_INDENT(f
) : 0);
10693 if(WRAP_HDR_CLR(f
)){
10695 char cbuf
[RGBLEN
+1];
10698 if((k
= WRAP_MARG_L(f
)) > 0)
10702 GF_PUTC_GLO(f
->next
, ' ');
10705 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10706 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10708 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_FORE_COLOR
),
10710 cbuf
[sizeof(cbuf
)-1] = '\0';
10713 GF_PUTC_GLO(f
->next
, *p
);
10714 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10715 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10717 color_to_asciirgb(ps_global
->VAR_HEADER_GENERAL_BACK_COLOR
),
10719 cbuf
[sizeof(cbuf
)-1] = '\0';
10722 GF_PUTC_GLO(f
->next
, *p
);
10727 GF_PUTC_GLO(f
->next
, ' ');
10730 WRAP_ALLWSP(f
) = 1;
10733 wrap_quote_insert(f
, ipp
, eibp
, opp
, eobp
);
10736 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10737 GF_PUTC_GLO(f
->next
, TAG_BOLDON
);
10740 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10741 GF_PUTC_GLO(f
->next
, TAG_ULINEON
);
10743 if(WRAP_INVERSE(f
)){
10744 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10745 GF_PUTC_GLO(f
->next
, TAG_INVON
);
10747 if(WRAP_COLOR_SET(f
)){
10749 if(WRAP_COLOR(f
)->fg
[0]){
10751 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10752 GF_PUTC_GLO(f
->next
, TAG_FGCOLOR
);
10753 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->fg
), sizeof(cb
));
10754 cb
[sizeof(cb
)-1] = '\0';
10757 GF_PUTC_GLO(f
->next
, *p
);
10759 if(WRAP_COLOR(f
)->bg
[0]){
10761 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10762 GF_PUTC_GLO(f
->next
, TAG_BGCOLOR
);
10763 strncpy(cb
, color_to_asciirgb(WRAP_COLOR(f
)->bg
), sizeof(cb
));
10764 cb
[sizeof(cb
)-1] = '\0';
10767 GF_PUTC_GLO(f
->next
, *p
);
10770 if(WRAP_ANCHOR(f
)){
10771 char buf
[64]; int i
;
10772 GF_PUTC_GLO(f
->next
, TAG_EMBED
);
10773 GF_PUTC_GLO(f
->next
, TAG_HANDLE
);
10774 snprintf(buf
, sizeof(buf
), "%d", WRAP_ANCHOR(f
));
10775 GF_PUTC_GLO(f
->next
, (int) strlen(buf
));
10776 for(i
= 0; buf
[i
]; i
++)
10777 GF_PUTC_GLO(f
->next
, buf
[i
]);
10784 wrap_quote_insert(FILTER_S
*f
, unsigned char **ipp
, unsigned char **eibp
,
10785 unsigned char **opp
, unsigned char **eobp
)
10788 COLOR_PAIR
*col
= NULL
;
10789 char *prefix
= NULL
, *last_prefix
= NULL
;
10791 if(ps_global
->VAR_QUOTE_REPLACE_STRING
){
10792 get_pair(ps_global
->VAR_QUOTE_REPLACE_STRING
, &prefix
, &last_prefix
, 0, 0);
10793 if(!prefix
&& last_prefix
){
10794 prefix
= last_prefix
;
10795 last_prefix
= NULL
;
10799 for(j
= 0; j
< WRAP_FL_QD(f
); j
++){
10800 if(WRAP_USE_CLR(f
)){
10802 && ps_global
->VAR_QUOTE1_FORE_COLOR
10803 && ps_global
->VAR_QUOTE1_BACK_COLOR
10804 && (col
= new_color_pair(ps_global
->VAR_QUOTE1_FORE_COLOR
,
10805 ps_global
->VAR_QUOTE1_BACK_COLOR
))
10806 && pico_is_good_colorpair(col
)){
10807 GF_COLOR_PUTC(f
, col
);
10809 else if((j
% 3) == 1
10810 && ps_global
->VAR_QUOTE2_FORE_COLOR
10811 && ps_global
->VAR_QUOTE2_BACK_COLOR
10812 && (col
= new_color_pair(ps_global
->VAR_QUOTE2_FORE_COLOR
,
10813 ps_global
->VAR_QUOTE2_BACK_COLOR
))
10814 && pico_is_good_colorpair(col
)){
10815 GF_COLOR_PUTC(f
, col
);
10817 else if((j
% 3) == 2
10818 && ps_global
->VAR_QUOTE3_FORE_COLOR
10819 && ps_global
->VAR_QUOTE3_BACK_COLOR
10820 && (col
= new_color_pair(ps_global
->VAR_QUOTE3_FORE_COLOR
,
10821 ps_global
->VAR_QUOTE3_BACK_COLOR
))
10822 && pico_is_good_colorpair(col
)){
10823 GF_COLOR_PUTC(f
, col
);
10826 free_color_pair(&col
);
10831 if(!WRAP_LV_FLD(f
)){
10832 if(!WRAP_FOR_CMPS(f
) && ps_global
->VAR_QUOTE_REPLACE_STRING
&& prefix
){
10833 for(i
= 0; prefix
[i
]; i
++)
10834 GF_PUTC_GLO(f
->next
, prefix
[i
]);
10835 f
->n
+= utf8_width(prefix
);
10837 else if(ps_global
->VAR_REPLY_STRING
10838 && (!strcmp(ps_global
->VAR_REPLY_STRING
, ">")
10839 || !strcmp(ps_global
->VAR_REPLY_STRING
, "\">\""))){
10840 GF_PUTC_GLO(f
->next
, '>');
10844 GF_PUTC_GLO(f
->next
, '>');
10845 GF_PUTC_GLO(f
->next
, ' ');
10850 GF_PUTC_GLO(f
->next
, '>');
10854 if(j
&& WRAP_LV_FLD(f
)){
10855 GF_PUTC_GLO(f
->next
, ' ');
10858 else if(j
&& last_prefix
){
10859 for(i
= 0; last_prefix
[i
]; i
++)
10860 GF_PUTC_GLO(f
->next
, last_prefix
[i
]);
10861 f
->n
+= utf8_width(last_prefix
);
10865 fs_give((void **)&prefix
);
10867 fs_give((void **)&last_prefix
);
10874 * function called from the outside to set
10875 * wrap filter's width option
10878 gf_wrap_filter_opt(int width
, int width_max
, int *margin
, int indent
, int flags
)
10882 /* NOTE: variables MUST be sanity checked before they get here */
10883 wrap
= (WRAP_S
*) fs_get(sizeof(WRAP_S
));
10884 memset(wrap
, 0, sizeof(WRAP_S
));
10885 wrap
->wrap_col
= width
;
10886 wrap
->wrap_max
= width_max
;
10887 wrap
->indent
= indent
;
10888 wrap
->margin_l
= (margin
) ? margin
[0] : 0;
10889 wrap
->margin_r
= (margin
) ? margin
[1] : 0;
10890 wrap
->tags
= (GFW_HANDLES
& flags
) == GFW_HANDLES
;
10891 wrap
->on_comma
= (GFW_ONCOMMA
& flags
) == GFW_ONCOMMA
;
10892 wrap
->flowed
= (GFW_FLOWED
& flags
) == GFW_FLOWED
;
10893 wrap
->leave_flowed
= (GFW_FLOW_RESULT
& flags
) == GFW_FLOW_RESULT
;
10894 wrap
->delsp
= (GFW_DELSP
& flags
) == GFW_DELSP
;
10895 wrap
->use_color
= (GFW_USECOLOR
& flags
) == GFW_USECOLOR
;
10896 wrap
->hdr_color
= (GFW_HDRCOLOR
& flags
) == GFW_HDRCOLOR
;
10897 wrap
->for_compose
= (GFW_FORCOMPOSE
& flags
) == GFW_FORCOMPOSE
;
10898 wrap
->handle_soft_hyphen
= (GFW_SOFTHYPHEN
& flags
) == GFW_SOFTHYPHEN
;
10900 return((void *) wrap
);
10905 gf_url_hilite_opt(URL_HILITE_S
*uh
, HANDLE_S
**handlesp
, int flags
)
10908 memset(uh
, 0, sizeof(URL_HILITE_S
));
10909 uh
->handlesp
= handlesp
;
10910 uh
->hdr_color
= (URH_HDRCOLOR
& flags
) == URH_HDRCOLOR
;
10913 return((void *) uh
);
10917 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10918 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10919 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10921 typedef struct preflow_s
{
10928 * This would normally be handled in gf_wrap. If there is a possibility
10929 * that a url we want to recognize is cut in half by a soft newline we
10930 * want to fix that up by putting the halves back together. We do that
10931 * by deleting the soft newline and putting it all in one line. It will
10932 * still get wrapped later in gf_wrap. It isn't pretty with all the
10933 * goto's, but whatta ya gonna do?
10936 gf_preflow(FILTER_S
*f
, int flg
)
10938 GF_INIT(f
, f
->next
);
10940 if(flg
== GF_DATA
){
10941 register unsigned char c
;
10942 register int state
= f
->f1
;
10943 register int pending
= f
->f2
;
10945 while(GF_GETC(f
, c
)){
10959 GF_PUTC(f
->next
, c
);
10973 GF_PUTC(f
->next
, '\012');
10988 GF_PUTC(f
->next
, ' ');
11004 GF_PUTC(f
->next
, ' ');
11005 GF_PUTC(f
->next
, '\012');
11015 if(c
== '>'){ /* count quote level */
11020 done_counting_quotes
:
11021 if(c
== ' '){ /* eat stuffed space */
11026 done_with_stuffed_space
:
11027 if(c
== '-'){ /* look for signature */
11035 if(PF_QD(f
) == PF_QC(f
) && PF_SIG(f
) < 4){
11036 /* delete pending */
11038 PF_QD(f
) = PF_QC(f
);
11040 /* suppress quotes, too */
11045 * This should have been a hard new line
11046 * instead so leave out the trailing space.
11048 GF_PUTC(f
->next
, '\015');
11049 GF_PUTC(f
->next
, '\012');
11051 PF_QD(f
) = PF_QC(f
);
11054 else if(pending
== 1){
11055 GF_PUTC(f
->next
, '\015');
11056 GF_PUTC(f
->next
, '\012');
11057 PF_QD(f
) = PF_QC(f
);
11060 PF_QD(f
) = PF_QC(f
);
11065 while(PF_QC(f
)-- > 0)
11066 GF_PUTC(f
->next
, '>');
11074 GF_PUTC(f
->next
, '-');
11078 GF_PUTC(f
->next
, '-');
11079 GF_PUTC(f
->next
, '-');
11084 GF_PUTC(f
->next
, '-');
11085 GF_PUTC(f
->next
, '-');
11086 GF_PUTC(f
->next
, ' ');
11091 goto default_case
; /* to handle c */
11096 case FL_QLEV
: /* count quote level */
11100 goto done_counting_quotes
;
11104 case FL_STF
: /* eat stuffed space */
11105 goto done_with_stuffed_space
;
11108 case FL_SIG
: /* deal with sig indicator */
11110 case 1: /* saw '-' */
11114 goto done_with_sig
;
11118 case 2: /* saw '--' */
11122 goto done_with_sig
;
11126 case 3: /* saw '-- ' */
11128 PF_SIG(f
) = 4; /* it really is a sig line */
11130 goto done_with_sig
;
11140 GF_END(f
, f
->next
);
11142 else if(flg
== GF_EOD
){
11143 fs_give((void **) &f
->opt
);
11144 (void) GF_FLUSH(f
->next
);
11145 (*f
->next
->f
)(f
->next
, GF_EOD
);
11147 else if(flg
== GF_RESET
){
11150 pf
= (PREFLOW_S
*) fs_get(sizeof(*pf
));
11151 memset(pf
, 0, sizeof(*pf
));
11152 f
->opt
= (void *) pf
;
11154 f
->f1
= BOL
; /* state */
11155 f
->f2
= 0; /* pending */
11156 PF_QD(f
) = 0; /* quote depth */
11157 PF_QC(f
) = 0; /* quote count */
11158 PF_SIG(f
) = 0; /* sig level */
11166 * LINE PREFIX FILTER - insert given text at beginning of each
11171 #define GF_PREFIX_WRITE(s) { \
11172 register char *p; \
11173 if((p = (s)) != NULL) \
11175 GF_PUTC(f->next, *p++); \
11180 * the simple filter, prepends each line with the requested prefix.
11181 * if prefix is null, does nothing, and as with all filters, assumes
11182 * NVT end of lines.
11185 gf_prefix(FILTER_S
*f
, int flg
)
11187 GF_INIT(f
, f
->next
);
11189 if(flg
== GF_DATA
){
11190 register unsigned char c
;
11191 register int state
= f
->f1
;
11192 register int first
= f
->f2
;
11194 while(GF_GETC(f
, c
)){
11196 if(first
){ /* write initial prefix!! */
11197 first
= 0; /* but just once */
11198 GF_PREFIX_WRITE((char *) f
->opt
);
11202 * State == 0 is the starting state and the usual state.
11203 * State == 1 means we saw a CR and haven't acted on it yet.
11204 * We are looking for a LF to get the CRLF end of line.
11205 * However, we also treat bare CR and bare LF as if they
11206 * were CRLF sequences. What else could it mean in text?
11207 * This filter is only used for text so that is probably
11208 * a reasonable interpretation of the bad input.
11210 if(c
== '\015'){ /* CR */
11211 if(state
){ /* Treat pending CR as endofline, */
11212 GF_PUTC(f
->next
, '\015'); /* and remain in saw-a-CR state. */
11213 GF_PUTC(f
->next
, '\012');
11214 GF_PREFIX_WRITE((char *) f
->opt
);
11220 else if(c
== '\012'){ /* LF */
11221 GF_PUTC(f
->next
, '\015'); /* Got either a CRLF or a bare LF, */
11222 GF_PUTC(f
->next
, '\012'); /* treat both as if a CRLF. */
11223 GF_PREFIX_WRITE((char *) f
->opt
);
11226 else{ /* any other character */
11228 GF_PUTC(f
->next
, '\015'); /* Treat pending CR as endofline. */
11229 GF_PUTC(f
->next
, '\012');
11230 GF_PREFIX_WRITE((char *) f
->opt
);
11234 GF_PUTC(f
->next
, c
);
11238 f
->f1
= state
; /* save state for next chunk of data */
11240 GF_END(f
, f
->next
);
11242 else if(flg
== GF_EOD
){
11243 (void) GF_FLUSH(f
->next
);
11244 (*f
->next
->f
)(f
->next
, GF_EOD
);
11246 else if(flg
== GF_RESET
){
11247 dprint((9, "-- gf_reset prefix\n"));
11249 f
->f2
= 1; /* nothing written yet */
11255 * function called from the outside to set
11256 * prefix filter's prefix string
11259 gf_prefix_opt(char *prefix
)
11261 return((void *) prefix
);
11266 * LINE TEST FILTER - accumulate lines and offer each to the provided
11270 typedef struct _linetest_s
{
11276 /* accumulator growth increment */
11277 #define LINE_TEST_BLOCK 1024
11279 #define GF_LINE_TEST_EOB(f) \
11280 ((f)->line + ((f)->f2 - 1))
11282 #define GF_LINE_TEST_ADD(f, c) \
11285 f->f2 += LINE_TEST_BLOCK; \
11286 fs_resize((void **)&f->line, \
11287 (size_t) f->f2 * sizeof(char)); \
11288 eobuf = GF_LINE_TEST_EOB(f); \
11289 p = eobuf - LINE_TEST_BLOCK; \
11294 #define GF_LINE_TEST_TEST(F, D) \
11297 register char *cp; \
11299 LT_INS_S *ins = NULL, *insp; \
11301 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11303 ((LINETEST_S *) (F)->opt)->local); \
11307 fs_give((void **) &(F)->line); \
11309 fs_give((void **) &(F)->opt); \
11310 gf_error(_("translation error")); \
11313 for(insp = ins, cp = (F)->line; cp < p; ){ \
11314 if(insp && cp == insp->where){ \
11315 if(insp->len > 0){ \
11316 for(l = 0; l < insp->len; l++){ \
11317 c = (unsigned char) insp->text[l]; \
11318 GF_PUTC((F)->next, c); \
11320 insp = insp->next; \
11322 } else if(insp->len < 0){ \
11324 insp = insp->next; \
11328 GF_PUTC((F)->next, *cp); \
11332 for(l = 0; l < insp->len; l++){ \
11333 c = (unsigned char) insp->text[l]; \
11334 GF_PUTC((F)->next, c); \
11336 insp = insp->next; \
11338 gf_line_test_free_ins(&ins); \
11345 * this simple filter accumulates characters until a newline, offers it
11346 * to the provided test function, and then passes it on. It assumes
11350 gf_line_test(FILTER_S
*f
, int flg
)
11352 register char *p
= f
->linep
;
11353 register char *eobuf
= GF_LINE_TEST_EOB(f
);
11354 GF_INIT(f
, f
->next
);
11356 if(flg
== GF_DATA
){
11357 register unsigned char c
;
11358 register int state
= f
->f1
;
11360 while(GF_GETC(f
, c
)){
11367 GF_LINE_TEST_TEST(f
, done
);
11371 if(done
== 2) /* skip this line! */
11374 GF_PUTC(f
->next
, '\015');
11375 GF_PUTC(f
->next
, '\012');
11377 * if the line tester returns TRUE, it's
11378 * telling us its seen enough and doesn't
11379 * want to see any more. Remove ourself
11380 * from the pipeline...
11383 if(gf_master
== f
){
11384 gf_master
= f
->next
;
11389 for(fprev
= gf_master
;
11390 fprev
&& fprev
->next
!= f
;
11391 fprev
= fprev
->next
)
11394 if(fprev
) /* wha??? */
11395 fprev
->next
= f
->next
;
11400 while(GF_GETC(f
, c
)) /* pass input */
11401 GF_PUTC(f
->next
, c
);
11403 (void) GF_FLUSH(f
->next
); /* and drain queue */
11404 fs_give((void **)&f
->line
);
11405 fs_give((void **)&f
); /* wax our data */
11411 else /* add CR to buffer */
11412 GF_LINE_TEST_ADD(f
, '\015');
11413 } /* fall thru to handle 'c' */
11415 if(c
== '\015') /* newline? */
11418 GF_LINE_TEST_ADD(f
, c
);
11422 GF_END(f
, f
->next
);
11424 else if(flg
== GF_EOD
){
11427 GF_LINE_TEST_TEST(f
, i
); /* examine remaining data */
11428 fs_give((void **) &f
->line
); /* free line buffer */
11429 fs_give((void **) &f
->opt
); /* free test struct */
11430 (void) GF_FLUSH(f
->next
);
11431 (*f
->next
->f
)(f
->next
, GF_EOD
);
11433 else if(flg
== GF_RESET
){
11434 dprint((9, "-- gf_reset line_test\n"));
11435 f
->f1
= 0; /* state */
11436 f
->n
= 0L; /* line number */
11437 f
->f2
= LINE_TEST_BLOCK
; /* size of alloc'd line */
11438 f
->line
= p
= (char *) fs_get(f
->f2
* sizeof(char));
11446 * function called from the outside to operate on accumulated line.
11449 gf_line_test_opt(linetest_t test_f
, void *local
)
11453 ltp
= (LINETEST_S
*) fs_get(sizeof(LINETEST_S
));
11454 memset(ltp
, 0, sizeof(LINETEST_S
));
11456 ltp
->local
= local
;
11457 return((void *) ltp
);
11463 gf_line_test_new_ins(LT_INS_S
**ins
, char *p
, char *s
, int n
)
11465 *ins
= (LT_INS_S
*) fs_get(sizeof(LT_INS_S
));
11466 if(((*ins
)->len
= n
) > 0)
11467 strncpy((*ins
)->text
= (char *) fs_get(n
* sizeof(char)), s
, n
);
11469 (*ins
)->text
= NULL
;
11472 (*ins
)->next
= NULL
;
11473 return(&(*ins
)->next
);
11478 gf_line_test_free_ins(LT_INS_S
**ins
)
11482 gf_line_test_free_ins(&(*ins
)->next
);
11485 fs_give((void **) &(*ins
)->text
);
11487 fs_give((void **) ins
);
11493 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11494 * with editorial comment
11497 typedef struct _preped_s
{
11504 * gf_prepend_editorial - accumulate filtered text and prepend its
11505 * output with given text
11510 gf_prepend_editorial(FILTER_S
*f
, int flg
)
11512 GF_INIT(f
, f
->next
);
11514 if(flg
== GF_DATA
){
11515 register unsigned char c
;
11517 while(GF_GETC(f
, c
)){
11518 so_writec(c
, (STORE_S
*) f
->data
);
11521 GF_END(f
, f
->next
);
11523 else if(flg
== GF_EOD
){
11526 if(!((PREPED_S
*)(f
)->opt
)->f
|| (*((PREPED_S
*)(f
)->opt
)->f
)()){
11527 char *p
= ((PREPED_S
*)(f
)->opt
)->text
;
11529 for( ; p
&& *p
; p
++)
11530 GF_PUTC(f
->next
, *p
);
11533 so_seek((STORE_S
*) f
->data
, 0L, 0);
11534 while(so_readc(&c
, (STORE_S
*) f
->data
)){
11535 GF_PUTC(f
->next
, c
);
11538 so_give((STORE_S
**) &f
->data
);
11539 fs_give((void **) &f
->opt
);
11540 (void) GF_FLUSH(f
->next
);
11541 (*f
->next
->f
)(f
->next
, GF_EOD
);
11543 else if(flg
== GF_RESET
){
11544 dprint((9, "-- gf_reset line_test\n"));
11545 f
->data
= (void *) so_get(CharStar
, NULL
, EDIT_ACCESS
);
11551 * function called from the outside to setup prepending editorial
11555 gf_prepend_editorial_opt(prepedtest_t test_f
, char *text
)
11559 pep
= (PREPED_S
*) fs_get(sizeof(PREPED_S
));
11560 memset(pep
, 0, sizeof(PREPED_S
));
11563 return((void *) pep
);
11568 * Network virtual terminal to local newline convention filter
11571 gf_nvtnl_local(FILTER_S
*f
, int flg
)
11573 GF_INIT(f
, f
->next
);
11575 if(flg
== GF_DATA
){
11576 register unsigned char c
;
11577 register int state
= f
->f1
;
11579 while(GF_GETC(f
, c
)){
11583 GF_PUTC(f
->next
, '\012');
11587 GF_PUTC(f
->next
, '\015');
11588 /* fall thru to deal with 'c' */
11594 GF_PUTC(f
->next
, c
);
11598 GF_END(f
, f
->next
);
11600 else if(flg
== GF_EOD
){
11601 (void) GF_FLUSH(f
->next
);
11602 (*f
->next
->f
)(f
->next
, GF_EOD
);
11604 else if(flg
== GF_RESET
){
11605 dprint((9, "-- gf_reset nvtnl_local\n"));
11612 * local to network newline convention filter
11615 gf_local_nvtnl(FILTER_S
*f
, int flg
)
11617 GF_INIT(f
, f
->next
);
11619 if(flg
== GF_DATA
){
11620 register unsigned char c
;
11622 while(GF_GETC(f
, c
)){
11624 GF_PUTC(f
->next
, '\015');
11625 GF_PUTC(f
->next
, '\012');
11627 else if(c
!= '\015') /* do not copy isolated \015 into source */
11628 GF_PUTC(f
->next
, c
);
11631 GF_END(f
, f
->next
);
11633 else if(flg
== GF_EOD
){
11634 (void) GF_FLUSH(f
->next
);
11635 (*f
->next
->f
)(f
->next
, GF_EOD
);
11638 dprint((9, "-- gf_reset local_nvtnl\n"));
11645 free_filter_module_globals(void)
11647 FILTER_S
*flt
, *fltn
= gf_master
;
11649 while((flt
= fltn
) != NULL
){ /* free list of old filters */
11651 fs_give((void **)&flt
);