* Further improvements to color code to remove a bug that makes Pico
[alpine.git] / pith / filter.c
blob5136d8daf71c3d382a9c62867b11076295949d49
1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filter.c 1266 2009-07-14 18:39:12Z hubert@u.washington.edu $";
3 #endif
5 /*
6 * ========================================================================
7 * Copyright 2006-2008 University of Washington
8 * Copyright 2013-2016 Eduardo Chappa
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * ========================================================================
19 /*======================================================================
20 filter.c
22 This code provides a generalized, flexible way to allow
23 piping of data thru filters. Each filter is passed a structure
24 that it will use to hold its static data while it operates on
25 the stream of characters that are passed to it. After processing
26 it will either return or call the next filter in
27 the pipe with any character (or characters) it has ready to go. This
28 means some terminal type of filter has to be the last in the
29 chain (i.e., one that writes the passed char someplace, but doesn't
30 call another filter).
32 See below for more details.
34 The motivation is to handle MIME decoding, richtext conversion,
35 iso_code stripping and anything else that may come down the
36 pike (e.g., PEM) in an elegant fashion. mikes (920811)
38 TODO:
39 reasonable error handling
41 ====*/
44 #include "../pith/headers.h"
45 #include "../pith/filter.h"
46 #include "../pith/conf.h"
47 #include "../pith/store.h"
48 #include "../pith/color.h"
49 #include "../pith/escapes.h"
50 #include "../pith/pipe.h"
51 #include "../pith/status.h"
52 #include "../pith/string.h"
53 #include "../pith/util.h"
54 #include "../pith/url.h"
55 #include "../pith/init.h"
56 #include "../pith/help.h"
57 #include "../pico/keydefs.h"
59 #ifdef _WINDOWS
60 #include "../pico/osdep/mswin.h"
61 #endif
65 * Internal prototypes
67 int gf_so_writec(int);
68 int gf_so_readc(unsigned char *);
69 int gf_freadc(unsigned char *);
70 int gf_freadc_locale(unsigned char *);
71 int gf_freadc_getchar(unsigned char *, void *);
72 int gf_fwritec(int);
73 int gf_fwritec_locale(int);
74 #ifdef _WINDOWS
75 int gf_freadc_windows(unsigned char *);
76 #endif /* _WINDOWS */
77 int gf_preadc(unsigned char *);
78 int gf_preadc_locale(unsigned char *);
79 int gf_preadc_getchar(unsigned char *, void *);
80 int gf_pwritec(int);
81 int gf_pwritec_locale(int);
82 int gf_sreadc(unsigned char *);
83 int gf_sreadc_locale(unsigned char *);
84 int gf_sreadc_getchar(unsigned char *, void *);
85 int gf_swritec(int);
86 int gf_swritec_locale(int);
87 void gf_terminal(FILTER_S *, int);
88 void gf_error(char *);
89 char *gf_filter_puts(char *);
90 void gf_filter_eod(void);
92 void gf_8bit_put(FILTER_S *, int);
97 * System specific options
99 #ifdef _WINDOWS
100 #define CRLF_NEWLINES
101 #endif
105 * Hooks for callers to adjust behavior
107 char *(*pith_opt_pretty_var_name)(char *);
108 char *(*pith_opt_pretty_feature_name)(char *, int);
112 * pointer to first function in a pipe, and pointer to last filter
114 FILTER_S *gf_master = NULL;
115 static gf_io_t last_filter;
116 static char *gf_error_string;
117 static long gf_byte_count;
118 static jmp_buf gf_error_state;
121 #define GF_NOOP 0x01 /* flags used by generalized */
122 #define GF_EOD 0x02 /* filters */
123 #define GF_DATA 0x04 /* See filter.c for more */
124 #define GF_ERROR 0x08 /* details */
125 #define GF_RESET 0x10
129 * A list of states used by the various filters. Reused in many filters.
131 #define DFL 0
132 #define EQUAL 1
133 #define HEX 2
134 #define WSPACE 3
135 #define CCR 4
136 #define CLF 5
137 #define TOKEN 6
138 #define TAG 7
139 #define HANDLE 8
140 #define HDATA 9
141 #define ESC 10
142 #define ESCDOL 11
143 #define ESCPAR 12
144 #define EUC 13
145 #define BOL 14
146 #define FL_QLEV 15
147 #define FL_STF 16
148 #define FL_SIG 17
149 #define STOP_DECODING 18
150 #define SPACECR 19
155 * Macros to reduce function call overhead associated with calling
156 * each filter for each byte filtered, and to minimize filter structure
157 * dereferences. NOTE: "queuein" has to do with putting chars into the
158 * filter structs data queue. So, writing at the queuein offset is
159 * what a filter does to pass processed data out of itself. Ditto for
160 * queueout. This explains the FI --> queueout init stuff below.
162 #define GF_QUE_START(F) (&(F)->queue[0])
163 #define GF_QUE_END(F) (&(F)->queue[GF_MAXBUF - 1])
165 #define GF_IP_INIT(F) ip = (F) ? &(F)->queue[(F)->queuein] : NULL
166 #define GF_IP_INIT_GLO(F) (*ipp) = (F) ? &(F)->queue[(F)->queuein] : NULL
167 #define GF_EIB_INIT(F) eib = (F) ? GF_QUE_END(F) : NULL
168 #define GF_EIB_INIT_GLO(F) (*eibp) = (F) ? GF_QUE_END(F) : NULL
169 #define GF_OP_INIT(F) op = (F) ? &(F)->queue[(F)->queueout] : NULL
170 #define GF_EOB_INIT(F) eob = (F) ? &(F)->queue[(F)->queuein] : NULL
172 #define GF_IP_END(F) (F)->queuein = ip - GF_QUE_START(F)
173 #define GF_IP_END_GLO(F) (F)->queuein = (unsigned char *)(*ipp) - (unsigned char *)GF_QUE_START(F)
174 #define GF_OP_END(F) (F)->queueout = op - GF_QUE_START(F)
176 #define GF_INIT(FI, FO) unsigned char *GF_OP_INIT(FI); \
177 unsigned char *GF_EOB_INIT(FI); \
178 unsigned char *GF_IP_INIT(FO); \
179 unsigned char *GF_EIB_INIT(FO);
181 #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \
182 (F)->queueout = (F)->queuein = 0)
184 #define GF_END(FI, FO) (GF_OP_END(FI), GF_IP_END(FO))
186 #define GF_FLUSH(F) ((GF_IP_END(F), (*(F)->f)((F), GF_DATA), \
187 GF_IP_INIT(F), GF_EIB_INIT(F)) ? 1 : 0)
188 #define GF_FLUSH_GLO(F) ((GF_IP_END_GLO(F), (*(F)->f)((F), GF_DATA), \
189 GF_IP_INIT_GLO(F), GF_EIB_INIT_GLO(F)) ? 1 : 0)
191 #define GF_PUTC(F, C) ((int)(*ip++ = (C), (ip >= eib) ? GF_FLUSH(F) : 1))
192 #define GF_PUTC_GLO(F, C) ((int)(*(*ipp)++ = (C), ((*ipp) >= (*eibp)) ? GF_FLUSH_GLO(F) : 1))
195 * Introducing the *_GLO macros for use in splitting the big macros out
196 * into functions (wrap_flush, wrap_eol). The reason we need a
197 * separate macro is because of the vars ip, eib, op, and eob, which are
198 * set up locally in a call to GF_INIT. To preserve these variables
199 * in the new functions, we now pass pointers to these four vars. Each
200 * of these new functions expects the presence of pointer vars
201 * ipp, eibp, opp, and eobp.
204 #define GF_GETC(F, C) ((op < eob) ? (((C) = *op++), 1) : GF_CH_RESET(F))
206 #define GF_COLOR_PUTC(F, C) { \
207 char *p; \
208 char cb[RGBLEN+1]; \
209 GF_PUTC_GLO((F)->next, TAG_EMBED); \
210 GF_PUTC_GLO((F)->next, TAG_FGCOLOR); \
211 strncpy(cb, color_to_asciirgb((C)->fg), sizeof(cb)); \
212 cb[sizeof(cb)-1] = '\0'; \
213 p = cb; \
214 for(; *p; p++) \
215 GF_PUTC_GLO((F)->next, *p); \
216 GF_PUTC_GLO((F)->next, TAG_EMBED); \
217 GF_PUTC_GLO((F)->next, TAG_BGCOLOR); \
218 strncpy(cb, color_to_asciirgb((C)->bg), sizeof(cb)); \
219 cb[sizeof(cb)-1] = '\0'; \
220 p = cb; \
221 for(; *p; p++) \
222 GF_PUTC_GLO((F)->next, *p); \
226 * Generalized getc and putc routines. provided here so they don't
227 * need to be re-done elsewhere to
231 * pointers to objects to be used by the generic getc and putc
232 * functions
234 static struct gf_io_struct {
235 FILE *file;
236 PIPE_S *pipe;
237 char *txtp;
238 unsigned long n;
239 int flags;
240 CBUF_S cb;
241 } gf_in, gf_out;
243 #define GF_SO_STACK struct gf_so_stack
244 static GF_SO_STACK {
245 STORE_S *so;
246 GF_SO_STACK *next;
247 } *gf_so_in, *gf_so_out;
252 * Returns 1 if pc will write into a PicoText object, 0 otherwise.
254 * The purpose of this routine is so that we can avoid setting SIGALARM
255 * when writing into a PicoText object, because that type of object uses
256 * unprotected malloc/free/realloc, which can't be interrupted.
259 pc_is_picotext(gf_io_t pc)
261 return(pc == gf_so_writec && gf_so_out && gf_so_out->so &&
262 gf_so_out->so->src == ExternalText);
268 * setup to use and return a pointer to the generic
269 * getc function
271 void
272 gf_set_readc(gf_io_t *gc, void *txt, long unsigned int len, SourceType src, int flags)
274 gf_in.n = len;
275 gf_in.flags = flags;
276 gf_in.cb.cbuf[0] = '\0';
277 gf_in.cb.cbufp = gf_in.cb.cbuf;
278 gf_in.cb.cbufend = gf_in.cb.cbuf;
280 if(src == FileStar){
281 gf_in.file = (FILE *)txt;
282 fseek(gf_in.file, 0L, 0);
283 #ifdef _WINDOWS
284 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_windows
285 : gf_freadc;
286 #else /* UNIX */
287 *gc = (flags & READ_FROM_LOCALE) ? gf_freadc_locale
288 : gf_freadc;
289 #endif /* UNIX */
291 else if(src == PipeStar){
292 gf_in.pipe = (PIPE_S *)txt;
293 *gc = gf_preadc;
294 *gc = (flags & READ_FROM_LOCALE) ? gf_preadc_locale
295 : gf_preadc;
297 else{
298 gf_in.txtp = (char *)txt;
299 *gc = (flags & READ_FROM_LOCALE) ? gf_sreadc_locale
300 : gf_sreadc;
306 * setup to use and return a pointer to the generic
307 * putc function
309 void
310 gf_set_writec(gf_io_t *pc, void *txt, long unsigned int len, SourceType src, int flags)
312 gf_out.n = len;
313 gf_out.flags = flags;
314 gf_out.cb.cbuf[0] = '\0';
315 gf_out.cb.cbufp = gf_out.cb.cbuf;
316 gf_out.cb.cbufend = gf_out.cb.cbuf;
318 if(src == FileStar){
319 gf_out.file = (FILE *)txt;
320 #ifdef _WINDOWS
321 *pc = gf_fwritec;
322 #else /* UNIX */
323 *pc = (flags & WRITE_TO_LOCALE) ? gf_fwritec_locale
324 : gf_fwritec;
325 #endif /* UNIX */
327 else if(src == PipeStar){
328 gf_out.pipe = (PIPE_S *)txt;
329 *pc = (flags & WRITE_TO_LOCALE) ? gf_pwritec_locale
330 : gf_pwritec;
332 else{
333 gf_out.txtp = (char *)txt;
334 *pc = (flags & WRITE_TO_LOCALE) ? gf_swritec_locale
335 : gf_swritec;
341 * setup to use and return a pointer to the generic
342 * getc function
344 void
345 gf_set_so_readc(gf_io_t *gc, STORE_S *so)
347 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
349 sp->so = so;
350 sp->next = gf_so_in;
351 gf_so_in = sp;
352 *gc = gf_so_readc;
356 void
357 gf_clear_so_readc(STORE_S *so)
359 GF_SO_STACK *sp;
361 if((sp = gf_so_in) != NULL){
362 if(so == sp->so){
363 gf_so_in = gf_so_in->next;
364 fs_give((void **) &sp);
366 else
367 alpine_panic("Programmer botch: Can't unstack store readc");
369 else
370 alpine_panic("Programmer botch: NULL store clearing store readc");
375 * setup to use and return a pointer to the generic
376 * putc function
378 void
379 gf_set_so_writec(gf_io_t *pc, STORE_S *so)
381 GF_SO_STACK *sp = (GF_SO_STACK *) fs_get(sizeof(GF_SO_STACK));
383 sp->so = so;
384 sp->next = gf_so_out;
385 gf_so_out = sp;
386 *pc = gf_so_writec;
390 void
391 gf_clear_so_writec(STORE_S *so)
393 GF_SO_STACK *sp;
395 if((sp = gf_so_out) != NULL){
396 if(so == sp->so){
397 gf_so_out = gf_so_out->next;
398 fs_give((void **) &sp);
400 else
401 alpine_panic("Programmer botch: Can't unstack store writec");
403 else
404 alpine_panic("Programmer botch: NULL store clearing store writec");
409 * put the character to the object previously defined
412 gf_so_writec(int c)
414 return(so_writec(c, gf_so_out->so));
419 * get a character from an object previously defined
422 gf_so_readc(unsigned char *c)
424 return(so_readc(c, gf_so_in->so));
428 /* get a character from a file */
429 /* assumes gf_out struct is filled in */
431 gf_freadc(unsigned char *c)
433 int rv = 0;
435 do {
436 errno = 0;
437 clearerr(gf_in.file);
438 rv = fread(c, sizeof(unsigned char), (size_t)1, gf_in.file);
439 } while(!rv && ferror(gf_in.file) && errno == EINTR);
441 return(rv);
446 gf_freadc_locale(unsigned char *c)
448 return(generic_readc_locale(c, gf_freadc_getchar, (void *) gf_in.file, &gf_in.cb));
453 * This is just to make it work with generic_readc_locale.
456 gf_freadc_getchar(unsigned char *c, void *extraarg)
458 FILE *file;
459 int rv = 0;
461 file = (FILE *) extraarg;
463 do {
464 errno = 0;
465 clearerr(file);
466 rv = fread(c, sizeof(unsigned char), (size_t)1, file);
467 } while(!rv && ferror(file) && errno == EINTR);
469 return(rv);
474 * Put a character to a file.
475 * Assumes gf_out struct is filled in.
476 * Returns 1 on success, <= 0 on failure.
479 gf_fwritec(int c)
481 unsigned char ch = (unsigned char)c;
482 int rv = 0;
485 rv = fwrite(&ch, sizeof(unsigned char), (size_t)1, gf_out.file);
486 while(!rv && ferror(gf_out.file) && errno == EINTR);
488 return(rv);
493 * The locale version converts from UTF-8 to user's locale charset
494 * before writing the characters.
497 gf_fwritec_locale(int c)
499 int rv = 1;
500 int i, outchars;
501 unsigned char obuf[MAX(MB_LEN_MAX,32)];
503 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
504 for(i = 0; i < outchars; i++)
505 if(gf_fwritec(obuf[i]) != 1){
506 rv = 0;
507 break;
511 return(rv);
515 #ifdef _WINDOWS
517 * Read unicode characters from windows filesystem and return
518 * them as a stream of UTF-8 characters. The stream is assumed
519 * opened so that it will know how to put together the unicode.
521 * (This is totally untested, copied loosely from so_file_readc_windows
522 * which may or may not be appropriate.)
525 gf_freadc_windows(unsigned char *c)
527 int rv = 0;
528 UCS ucs;
530 /* already got some from previous call? */
531 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
532 *c = *gf_in.cb.cbufp;
533 gf_in.cb.cbufp++;
534 rv++;
535 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
536 gf_in.cb.cbufend = gf_in.cb.cbuf;
537 gf_in.cb.cbufp = gf_in.cb.cbuf;
540 return(rv);
543 if(gf_in.file){
544 /* windows only so second arg is ignored */
545 ucs = read_a_wide_char(gf_in.file, NULL);
546 rv = (ucs == CCONV_EOF) ? 0 : 1;
549 if(rv){
551 * Now we need to convert the UCS character to UTF-8
552 * and dole out the UTF-8 one char at a time.
554 gf_in.cb.cbufend = utf8_put(gf_in.cb.cbuf, (unsigned long) ucs);
555 gf_in.cb.cbufp = gf_in.cb.cbuf;
556 if(gf_in.cb.cbufend > gf_in.cb.cbuf){
557 *c = *gf_in.cb.cbufp;
558 gf_in.cb.cbufp++;
559 if(gf_in.cb.cbufp >= gf_in.cb.cbufend){
560 gf_in.cb.cbufend = gf_in.cb.cbuf;
561 gf_in.cb.cbufp = gf_in.cb.cbuf;
564 else
565 *c = '?';
568 return(rv);
570 #endif /* _WINDOWS */
574 gf_preadc(unsigned char *c)
576 return(pipe_readc(c, gf_in.pipe));
581 gf_preadc_locale(unsigned char *c)
583 return(generic_readc_locale(c, gf_preadc_getchar, (void *) gf_in.pipe, &gf_in.cb));
588 * This is just to make it work with generic_readc_locale.
591 gf_preadc_getchar(unsigned char *c, void *extraarg)
593 PIPE_S *pipe;
595 pipe = (PIPE_S *) extraarg;
597 return(pipe_readc(c, pipe));
602 * Put a character to a pipe.
603 * Assumes gf_out struct is filled in.
604 * Returns 1 on success, <= 0 on failure.
607 gf_pwritec(int c)
609 return(pipe_writec(c, gf_out.pipe));
614 * The locale version converts from UTF-8 to user's locale charset
615 * before writing the characters.
618 gf_pwritec_locale(int c)
620 int rv = 1;
621 int i, outchars;
622 unsigned char obuf[MAX(MB_LEN_MAX,32)];
624 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
625 for(i = 0; i < outchars; i++)
626 if(gf_pwritec(obuf[i]) != 1){
627 rv = 0;
628 break;
632 return(rv);
636 /* get a character from a string, return nonzero if things OK */
637 /* assumes gf_out struct is filled in */
639 gf_sreadc(unsigned char *c)
641 return((gf_in.n) ? *c = *(gf_in.txtp)++, gf_in.n-- : 0);
646 gf_sreadc_locale(unsigned char *c)
648 return(generic_readc_locale(c, gf_sreadc_getchar, NULL, &gf_in.cb));
653 gf_sreadc_getchar(unsigned char *c, void *extraarg)
656 * extraarg is ignored and gf_sreadc just uses globals instead.
657 * That's ok as long as we don't call it more than once at a time.
659 return(gf_sreadc(c));
664 * Put a character to a string.
665 * Assumes gf_out struct is filled in.
666 * Returns 1 on success, <= 0 on failure.
669 gf_swritec(int c)
671 return((gf_out.n) ? *(gf_out.txtp)++ = c, gf_out.n-- : 0);
676 * The locale version converts from UTF-8 to user's locale charset
677 * before writing the characters.
680 gf_swritec_locale(int c)
682 int rv = 1;
683 int i, outchars;
684 unsigned char obuf[MAX(MB_LEN_MAX,32)];
686 if((outchars = utf8_to_locale(c, &gf_out.cb, obuf, sizeof(obuf))) != 0){
687 for(i = 0; i < outchars; i++)
688 if(gf_swritec(obuf[i]) != 1){
689 rv = 0;
690 break;
694 return(rv);
699 * output the given string with the given function
702 gf_puts(register char *s, gf_io_t pc)
704 while(*s != '\0')
705 if(!(*pc)((unsigned char)*s++))
706 return(0); /* ERROR putting char ! */
708 return(1);
713 * output the given string with the given function
716 gf_nputs(register char *s, long int n, gf_io_t pc)
718 while(n--)
719 if(!(*pc)((unsigned char)*s++))
720 return(0); /* ERROR putting char ! */
722 return(1);
727 * Read a stream of multi-byte characters from the
728 * user's locale charset and return a stream of
729 * UTF-8 characters, one at a time. The input characters
730 * are obtained by using the get_a_char function.
732 * Args c -- the returned octet
733 * get_a_char -- function to get a single octet of the multibyte
734 * character. The first arg of that function is the
735 * returned value and the second arg is for the
736 * functions use. The second arg is replaced with
737 * extraarg when it is called.
738 * extraarg -- The second arg to get_a_char.
739 * cb -- Storage area for state between calls to this func.
742 generic_readc_locale(unsigned char *c,
743 int (*get_a_char)(unsigned char *, void *),
744 void *extraarg,
745 CBUF_S *cb)
747 unsigned long octets_so_far = 0, remaining_octets;
748 unsigned char *inputp;
749 unsigned char ch;
750 UCS ucs;
751 unsigned char inputbuf[20];
752 int rv = 0;
753 int got_one = 0;
755 /* already got some from previous call? */
756 if(cb->cbufend > cb->cbuf){
757 *c = *cb->cbufp;
758 cb->cbufp++;
759 rv++;
760 if(cb->cbufp >= cb->cbufend){
761 cb->cbufend = cb->cbuf;
762 cb->cbufp = cb->cbuf;
765 return(rv);
768 memset(inputbuf, 0, sizeof(inputbuf));
769 if((*get_a_char)(&ch, extraarg) == 0)
770 return(0);
772 inputbuf[octets_so_far++] = ch;
774 while(!got_one){
775 remaining_octets = octets_so_far;
776 inputp = inputbuf;
777 ucs = mbtow(ps_global->input_cs, &inputp, &remaining_octets);
778 switch(ucs){
779 case CCONV_BADCHAR:
780 return(rv);
782 case CCONV_NEEDMORE:
784 * Do we need to do something with the characters we've
785 * collected that don't form a valid UCS character?
786 * Probably need to try discarding them one at a time
787 * from the front instead of just throwing them all out.
789 if(octets_so_far >= sizeof(inputbuf))
790 return(rv);
792 if((*get_a_char)(&ch, extraarg) == 0)
793 return(rv);
795 inputbuf[octets_so_far++] = ch;
796 break;
798 default:
799 /* got a good UCS-4 character */
800 got_one++;
801 break;
806 * Now we need to convert the UCS character to UTF-8
807 * and dole out the UTF-8 one char at a time.
809 rv++;
810 cb->cbufend = utf8_put(cb->cbuf, (unsigned long) ucs);
811 cb->cbufp = cb->cbuf;
812 if(cb->cbufend > cb->cbuf){
813 *c = *cb->cbufp;
814 cb->cbufp++;
815 if(cb->cbufp >= cb->cbufend){
816 cb->cbufend = cb->cbuf;
817 cb->cbufp = cb->cbuf;
820 else
821 *c = '?';
823 return(rv);
828 * Start of generalized filter routines
832 * initializing function to make sure list of filters is empty.
834 void
835 gf_filter_init(void)
837 FILTER_S *flt, *fltn = gf_master;
839 while((flt = fltn) != NULL){ /* free list of old filters */
840 fltn = flt->next;
841 fs_give((void **)&flt);
844 gf_master = NULL;
845 gf_error_string = NULL; /* clear previous errors */
846 gf_byte_count = 0L; /* reset counter */
852 * link the given filter into the filter chain
854 void
855 gf_link_filter(filter_t f, void *data)
857 FILTER_S *new, *tail;
859 #ifdef CRLF_NEWLINES
861 * If the system's native EOL convention is CRLF, then there's no
862 * point in passing data thru a filter that's not doing anything
864 if(f == gf_nvtnl_local || f == gf_local_nvtnl)
865 return;
866 #endif
868 new = (FILTER_S *)fs_get(sizeof(FILTER_S));
869 memset(new, 0, sizeof(FILTER_S));
871 new->f = f; /* set the function pointer */
872 new->opt = data; /* set any optional parameter data */
873 (*f)(new, GF_RESET); /* have it setup initial state */
875 if((tail = gf_master) != NULL){ /* or add it to end of existing */
876 while(tail->next) /* list */
877 tail = tail->next;
879 tail->next = new;
881 else /* attach new struct to list */
882 gf_master = new; /* start a new list */
887 * terminal filter, doesn't call any other filters, typically just does
888 * something with the output
890 void
891 gf_terminal(FILTER_S *f, int flg)
893 if(flg == GF_DATA){
894 GF_INIT(f, f);
896 while(op < eob)
897 if((*last_filter)(*op++) <= 0) /* generic terminal filter */
898 gf_error(errno ? error_description(errno) : "Error writing pipe");
900 GF_CH_RESET(f);
902 else if(flg == GF_RESET)
903 errno = 0; /* prepare for problems */
908 * set some outside gf_io_t function to the terminal function
909 * for example: a function to write a char to a file or into a buffer
911 void
912 gf_set_terminal(gf_io_t f) /* function to set generic filter */
915 last_filter = f;
920 * common function for filter's to make it known that an error
921 * has occurred. Jumps back to gf_pipe with error message.
923 void
924 gf_error(char *s)
926 /* let the user know the error passed in s */
927 gf_error_string = s;
928 longjmp(gf_error_state, 1);
933 * The routine that shoves each byte through the chain of
934 * filters. It sets up error handling, and the terminal function.
935 * Then loops getting bytes with the given function, and passing
936 * it on to the first filter in the chain.
938 char *
939 gf_pipe(gf_io_t gc, gf_io_t pc)
940 /* how to get a character */
942 unsigned char c;
944 dprint((4, "-- gf_pipe: "));
947 * set up for any errors a filter may encounter
949 if(setjmp(gf_error_state)){
950 dprint((4, "ERROR: %s\n",
951 gf_error_string ? gf_error_string : "NULL"));
952 return(gf_error_string); /* */
956 * set and link in the terminal filter
958 gf_set_terminal(pc);
959 gf_link_filter(gf_terminal, NULL);
962 * while there are chars to process, send them thru the pipe.
963 * NOTE: it's necessary to enclose the loop below in a block
964 * as the GF_INIT macro calls some automatic var's into
965 * existence. It can't be placed at the start of gf_pipe
966 * because its useful for us to be called without filters loaded
967 * when we're just being used to copy bytes between storage
968 * objects.
971 GF_INIT(gf_master, gf_master);
973 while((*gc)(&c)){
974 gf_byte_count++;
976 #ifdef _WINDOWS
977 if(!(gf_byte_count & 0x3ff))
978 /* Under windows we yield to allow event processing.
979 * Progress display is handled throught the alarm()
980 * mechinism.
982 mswin_yield ();
983 #endif
985 GF_PUTC(gf_master, c & 0xff);
989 * toss an end-of-data marker down the pipe to give filters
990 * that have any buffered data the opportunity to dump it
992 (void) GF_FLUSH(gf_master);
993 (*gf_master->f)(gf_master, GF_EOD);
996 dprint((4, "done.\n"));
997 return(NULL); /* everything went OK */
1002 * return the number of bytes piped so far
1004 long
1005 gf_bytes_piped(void)
1007 return(gf_byte_count);
1012 * filter the given input with the given command
1014 * Args: cmd -- command string to execute
1015 * prepend -- string to prepend to filtered input
1016 * source_so -- storage object containing data to be filtered
1017 * pc -- function to write filtered output with
1018 * aux_filters -- additional filters to pass data thru after "cmd"
1020 * Returns: NULL on sucess, reason for failure (not alloc'd!) on error
1022 char *
1023 gf_filter(char *cmd, char *prepend, STORE_S *source_so, gf_io_t pc,
1024 FILTLIST_S *aux_filters, int silent, int disable_reset,
1025 void (*pipecb_f)(PIPE_S *, int, void *))
1027 unsigned char c, obuf[MAX(MB_LEN_MAX,32)];
1028 int flags, outchars, i;
1029 char *errstr = NULL, buf[MAILTMPLEN];
1030 PIPE_S *fpipe;
1031 CBUF_S cb;
1032 #ifdef NON_BLOCKING_IO
1033 int n;
1034 #endif
1036 dprint((4, "so_filter: \"%s\"\n", cmd ? cmd : "?"));
1038 gf_filter_init();
1041 * After coming back from user's pipe command we need to convert
1042 * the output from the pipe back to UTF-8.
1044 if(ps_global->keyboard_charmap && strucmp("UTF-8", ps_global->keyboard_charmap))
1045 gf_link_filter(gf_utf8, gf_utf8_opt(ps_global->keyboard_charmap));
1047 for( ; aux_filters && aux_filters->filter; aux_filters++)
1048 gf_link_filter(aux_filters->filter, aux_filters->data);
1050 gf_set_terminal(pc);
1051 gf_link_filter(gf_terminal, NULL);
1053 cb.cbuf[0] = '\0';
1054 cb.cbufp = cb.cbuf;
1055 cb.cbufend = cb.cbuf;
1058 * Spawn filter feeding it data, and reading what it writes.
1060 so_seek(source_so, 0L, 0);
1061 flags = PIPE_WRITE | PIPE_READ | PIPE_NOSHELL
1062 | (silent ? PIPE_SILENT : 0)
1063 | (!disable_reset ? PIPE_RESET : 0);
1065 if((fpipe = open_system_pipe(cmd, NULL, NULL, flags, 0, pipecb_f, pipe_report_error)) != NULL){
1067 #ifdef NON_BLOCKING_IO
1069 if(fcntl(fileno(fpipe->in.f), F_SETFL, NON_BLOCKING_IO) == -1)
1070 errstr = "Can't set up non-blocking IO";
1072 if(prepend && (fputs(prepend, fpipe->out.f) == EOF
1073 || fputc('\n', fpipe->out.f) == EOF))
1074 errstr = error_description(errno);
1076 while(!errstr){
1077 /* if the pipe can't hold a K we're sunk (too bad PIPE_MAX
1078 * isn't ubiquitous ;).
1080 for(n = 0; !errstr && fpipe->out.f && n < 1024; n++)
1081 if(!so_readc(&c, source_so)){
1082 fclose(fpipe->out.f);
1083 fpipe->out.f = NULL;
1085 else{
1087 * Got a UTF-8 character from source_so.
1088 * We need to convert it to the user's locale charset
1089 * and then send the result to the pipe.
1091 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1092 for(i = 0; i < outchars && !errstr; i++)
1093 if(fputc(obuf[i], fpipe->out.f) == EOF)
1094 errstr = error_description(errno);
1098 * Note: We clear errno here and test below, before ferror,
1099 * because *some* stdio implementations consider
1100 * EAGAIN and EWOULDBLOCK equivalent to EOF...
1102 errno = 0;
1103 clearerr(fpipe->in.f); /* fix from <cananian@cananian.mit.edu> */
1105 while(!errstr && fgets(buf, sizeof(buf), fpipe->in.f))
1106 errstr = gf_filter_puts(buf);
1108 /* then fgets failed! */
1109 if(!errstr && !(errno == EAGAIN || errno == EWOULDBLOCK)){
1110 if(feof(fpipe->in.f)) /* nothing else interesting! */
1111 break;
1112 else if(ferror(fpipe->in.f)) /* bummer. */
1113 errstr = error_description(errno);
1115 else if(errno == EAGAIN || errno == EWOULDBLOCK)
1116 clearerr(fpipe->in.f);
1119 #else /* !NON_BLOCKING_IO */
1121 if(prepend && (pipe_puts(prepend, fpipe) == EOF
1122 || pipe_putc('\n', fpipe) == EOF))
1123 errstr = error_description(errno);
1126 * Well, do the best we can, and hope the pipe we're writing
1127 * doesn't fill up before we start reading...
1129 while(!errstr && so_readc(&c, source_so))
1130 if((outchars = utf8_to_locale((int) c, &cb, obuf, sizeof(obuf))) != 0)
1131 for(i = 0; i < outchars && !errstr; i++)
1132 if(pipe_putc(obuf[i], fpipe) == EOF)
1133 errstr = error_description(errno);
1135 if(pipe_close_write(fpipe))
1136 errstr = _("Pipe command returned error.");
1138 while(!errstr && pipe_gets(buf, sizeof(buf), fpipe))
1139 errstr = gf_filter_puts(buf);
1141 #endif /* !NON_BLOCKING_IO */
1143 if(close_system_pipe(&fpipe, NULL, pipecb_f) && !errstr)
1144 errstr = _("Pipe command returned error.");
1146 gf_filter_eod();
1148 else
1149 errstr = _("Error setting up pipe command.");
1151 return(errstr);
1156 * gf_filter_puts - write the given string down the filter's pipe
1158 char *
1159 gf_filter_puts(register char *s)
1161 GF_INIT(gf_master, gf_master);
1164 * set up for any errors a filter may encounter
1166 if(setjmp(gf_error_state)){
1167 dprint((4, "ERROR: gf_filter_puts: %s\n",
1168 gf_error_string ? gf_error_string : "NULL"));
1169 return(gf_error_string);
1172 while(*s)
1173 GF_PUTC(gf_master, (*s++) & 0xff);
1175 GF_END(gf_master, gf_master);
1176 return(NULL);
1181 * gf_filter_eod - flush pending data filter's input queue and deliver
1182 * the GF_EOD marker.
1184 void
1185 gf_filter_eod(void)
1187 GF_INIT(gf_master, gf_master);
1188 (void) GF_FLUSH(gf_master);
1189 (*gf_master->f)(gf_master, GF_EOD);
1194 * END OF PIPE SUPPORT ROUTINES, BEGINNING OF FILTERS
1196 * Filters MUST use the specified interface (pointer to filter
1197 * structure, the unsigned character buffer in that struct, and a
1198 * cmd flag), and pass each resulting octet to the next filter in the
1199 * chain. Only the terminal filter need not call another filter.
1200 * As a result, filters share a pretty general structure.
1201 * Typically three main conditionals separate initialization from
1202 * data from end-of-data command processing.
1204 * Lastly, being character-at-a-time, they're a little more complex
1205 * to write than filters operating on buffers because some state
1206 * must typically be kept between characters. However, for a
1207 * little bit of complexity here, much convenience is gained later
1208 * as they can be arbitrarily chained together at run time and
1209 * consume few resources (especially memory or disk) as they work.
1210 * (NOTE 951005: even less cpu now that data between filters is passed
1211 * via a vector.)
1213 * A few notes about implementing filters:
1215 * - A generic filter template looks like:
1217 * void
1218 * gf_xxx_filter(f, flg)
1219 * FILTER_S *f;
1220 * int flg;
1222 * GF_INIT(f, f->next); // def's var's to speed queue drain
1224 * if(flg == GF_DATA){
1225 * register unsigned char c;
1227 * while(GF_GETC(f, c)){ // macro taking data off input queue
1228 * // operate on c and pass it on here
1229 * GF_PUTC(f->next, c); // macro writing output queue
1232 * GF_END(f, f->next); // macro to sync pointers/offsets
1233 * //WARNING: DO NOT RETURN BEFORE ALL INCOMING DATA'S PROCESSED
1235 * else if(flg == GF_EOD){
1236 * // process any buffered data here and pass it on
1237 * GF_FLUSH(f->next); // flush pending data to next filter
1238 * (*f->next->f)(f->next, GF_EOD);
1240 * else if(flg == GF_RESET){
1241 * // initialize any data in the struct here
1245 * - Any free storage allocated during initialization (typically tied
1246 * to the "line" pointer in FILTER_S) is the filter's responsibility
1247 * to clean up when the GF_EOD command comes through.
1249 * - Filter's must pass GF_EOD they receive on to the next
1250 * filter in the chain so it has the opportunity to flush
1251 * any buffered data.
1253 * - All filters expect NVT end-of-lines. The idea is to prepend
1254 * or append either the gf_local_nvtnl or gf_nvtnl_local
1255 * os-dependant filters to the data on the appropriate end of the
1256 * pipe for the task at hand.
1258 * - NOTE: As of 951004, filters no longer take their input as a single
1259 * char argument, but rather get data to operate on via a vector
1260 * representing the input queue in the FILTER_S structure.
1267 * BASE64 TO BINARY encoding and decoding routines below
1272 * BINARY to BASE64 filter (encoding described in rfc1341)
1274 void
1275 gf_binary_b64(FILTER_S *f, int flg)
1277 static char *v =
1278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1279 GF_INIT(f, f->next);
1281 if(flg == GF_DATA){
1282 register unsigned char c;
1283 register unsigned char t = f->t;
1284 register long n = f->n;
1286 while(GF_GETC(f, c)){
1288 switch(n++){
1289 case 0 : case 3 : case 6 : case 9 : case 12: case 15: case 18:
1290 case 21: case 24: case 27: case 30: case 33: case 36: case 39:
1291 case 42: case 45:
1292 GF_PUTC(f->next, v[c >> 2]);
1293 /* byte 1: high 6 bits (1) */
1294 t = c << 4; /* remember high 2 bits for next */
1295 break;
1297 case 1 : case 4 : case 7 : case 10: case 13: case 16: case 19:
1298 case 22: case 25: case 28: case 31: case 34: case 37: case 40:
1299 case 43:
1300 GF_PUTC(f->next, v[(t|(c>>4)) & 0x3f]);
1301 t = c << 2;
1302 break;
1304 case 2 : case 5 : case 8 : case 11: case 14: case 17: case 20:
1305 case 23: case 26: case 29: case 32: case 35: case 38: case 41:
1306 case 44:
1307 GF_PUTC(f->next, v[(t|(c >> 6)) & 0x3f]);
1308 GF_PUTC(f->next, v[c & 0x3f]);
1309 break;
1312 if(n == 45){ /* start a new line? */
1313 GF_PUTC(f->next, '\015');
1314 GF_PUTC(f->next, '\012');
1315 n = 0L;
1319 f->n = n;
1320 f->t = t;
1321 GF_END(f, f->next);
1323 else if(flg == GF_EOD){ /* no more data */
1324 switch (f->n % 3) { /* handle trailing bytes */
1325 case 0: /* no trailing bytes */
1326 break;
1328 case 1:
1329 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1330 GF_PUTC(f->next, '='); /* byte 3 */
1331 GF_PUTC(f->next, '='); /* byte 4 */
1332 break;
1334 case 2:
1335 GF_PUTC(f->next, v[(f->t) & 0x3f]);
1336 GF_PUTC(f->next, '='); /* byte 4 */
1337 break;
1340 /* end with CRLF */
1341 if(f->n){
1342 GF_PUTC(f->next, '\015');
1343 GF_PUTC(f->next, '\012');
1346 (void) GF_FLUSH(f->next);
1347 (*f->next->f)(f->next, GF_EOD);
1349 else if(flg == GF_RESET){
1350 dprint((9, "-- gf_reset binary_b64\n"));
1351 f->n = 0L;
1358 * BASE64 to BINARY filter (encoding described in rfc1341)
1360 void
1361 gf_b64_binary(FILTER_S *f, int flg)
1363 static char v[] = {65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1364 65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,
1365 65,65,65,65,65,65,65,65,65,65,65,62,65,65,65,63,
1366 52,53,54,55,56,57,58,59,60,61,65,65,65,64,65,65,
1367 65, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
1368 15,16,17,18,19,20,21,22,23,24,25,65,65,65,65,65,
1369 65,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
1370 41,42,43,44,45,46,47,48,49,50,51,65,65,65,65,65};
1371 GF_INIT(f, f->next);
1373 if(flg == GF_DATA){
1374 register unsigned char c;
1375 register unsigned char t = f->t;
1376 register int n = (int) f->n;
1377 register int state = f->f1;
1379 while(GF_GETC(f, c)){
1381 if(state){
1382 state = 0;
1383 if (c != '=') {
1384 gf_error("Illegal '=' in base64 text");
1385 /* NO RETURN */
1389 /* in range, and a valid value? */
1390 if((c & ~0x7f) || (c = v[c]) > 63){
1391 if(c == 64){
1392 switch (n++) { /* check quantum position */
1393 case 2:
1394 state++; /* expect an equal as next char */
1395 break;
1397 case 3:
1398 n = 0L; /* restart quantum */
1399 break;
1401 default: /* impossible quantum position */
1402 gf_error("Internal base64 decoder error");
1403 /* NO RETURN */
1407 else{
1408 switch (n++) { /* install based on quantum position */
1409 case 0: /* byte 1: high 6 bits */
1410 t = c << 2;
1411 break;
1413 case 1: /* byte 1: low 2 bits */
1414 GF_PUTC(f->next, (t|(c >> 4)));
1415 t = c << 4; /* byte 2: high 4 bits */
1416 break;
1418 case 2: /* byte 2: low 4 bits */
1419 GF_PUTC(f->next, (t|(c >> 2)));
1420 t = c << 6; /* byte 3: high 2 bits */
1421 break;
1423 case 3:
1424 GF_PUTC(f->next, t | c);
1425 n = 0L; /* reinitialize mechanism */
1426 break;
1431 f->f1 = state;
1432 f->t = t;
1433 f->n = n;
1434 GF_END(f, f->next);
1436 else if(flg == GF_EOD){
1437 (void) GF_FLUSH(f->next);
1438 (*f->next->f)(f->next, GF_EOD);
1440 else if(flg == GF_RESET){
1441 dprint((9, "-- gf_reset b64_binary\n"));
1442 f->n = 0L; /* quantum position */
1443 f->f1 = 0; /* state holder: equal seen? */
1451 * QUOTED-PRINTABLE ENCODING AND DECODING filters below.
1452 * encoding described in rfc1341
1455 #define GF_MAXLINE 80 /* good buffer size */
1458 * default action for QUOTED-PRINTABLE to 8BIT decoder
1460 #define GF_QP_DEFAULT(f, c) { \
1461 if((c) == ' '){ \
1462 state = WSPACE; \
1463 /* reset white space! */ \
1464 (f)->linep = (f)->line; \
1465 *((f)->linep)++ = ' '; \
1467 else if((c) == '='){ \
1468 state = EQUAL; \
1470 else \
1471 GF_PUTC((f)->next, (c)); \
1476 * QUOTED-PRINTABLE to 8BIT filter
1478 void
1479 gf_qp_8bit(FILTER_S *f, int flg)
1482 GF_INIT(f, f->next);
1484 if(flg == GF_DATA){
1485 register unsigned char c;
1486 register int state = f->f1;
1488 while(GF_GETC(f, c)){
1490 switch(state){
1491 case DFL : /* default case */
1492 default:
1493 GF_QP_DEFAULT(f, c);
1494 break;
1496 case CCR : /* non-significant space */
1497 state = DFL;
1498 if(c == '\012')
1499 continue; /* go on to next char */
1501 GF_QP_DEFAULT(f, c);
1502 break;
1504 case EQUAL :
1505 if(c == '\015'){ /* "=\015" is a soft EOL */
1506 state = CCR;
1507 break;
1510 if(c == '='){ /* compatibility clause for old guys */
1511 GF_PUTC(f->next, '=');
1512 state = DFL;
1513 break;
1516 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1518 * First character after '=' not a hex digit.
1519 * This ain't right, but we're going to treat it as
1520 * plain old text instead of an '=' followed by hex.
1521 * In other words, they forgot to encode the '='.
1522 * Before 4.60 we just bailed with an error here, but now
1523 * we keep going as long as we are just displaying
1524 * the result (and not saving it or something).
1526 * Wait! The users don't like that. They want to be able
1527 * to use it even if it might be wrong. So just plow
1528 * ahead even if displaying.
1530 * Better have this be a constant string so that if we
1531 * get multiple instances of it in a single message we
1532 * can avoid the too many error messages problem. It
1533 * better be the same message as the one a few lines
1534 * below, as well.
1536 * Turn off decoding after encountering such an error and
1537 * just dump the rest of the text as is.
1539 state = STOP_DECODING;
1540 GF_PUTC(f->next, '=');
1541 GF_PUTC(f->next, c);
1542 q_status_message(SM_ORDER,3,3,
1543 _("Warning: Non-hexadecimal character in QP encoding!"));
1545 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =\n", c, c));
1546 break;
1549 if (isdigit ((unsigned char)c))
1550 f->t = c - '0';
1551 else
1552 f->t = c - (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1554 f->f2 = c; /* store character in case we have to
1555 back out in !isxdigit below */
1557 state = HEX;
1558 break;
1560 case HEX :
1561 state = DFL;
1562 if(!isxdigit((unsigned char)c)){ /* must be hex! */
1563 state = STOP_DECODING;
1564 GF_PUTC(f->next, '=');
1565 GF_PUTC(f->next, f->f2);
1566 GF_PUTC(f->next, c);
1567 q_status_message(SM_ORDER,3,3,
1568 _("Warning: Non-hexadecimal character in QP encoding!"));
1570 dprint((2, "gf_qp_8bit: warning: non-hex char in QP encoding: char \"%c\" (%d) follows =%c\n", c, c, f->f2));
1571 break;
1574 if (isdigit((unsigned char)c))
1575 c -= '0';
1576 else
1577 c -= (isupper((unsigned char)c) ? 'A' - 10 : 'a' - 10);
1579 GF_PUTC(f->next, c + (f->t << 4));
1580 break;
1582 case WSPACE :
1583 if(c == ' '){ /* toss it in with other spaces */
1584 if(f->linep - f->line < GF_MAXLINE)
1585 *(f->linep)++ = ' ';
1586 break;
1589 state = DFL;
1590 if(c == '\015'){ /* not our white space! */
1591 f->linep = f->line; /* reset buffer */
1592 GF_PUTC(f->next, '\015');
1593 break;
1596 /* the spaces are ours, write 'em */
1597 f->n = f->linep - f->line;
1598 while((f->n)--)
1599 GF_PUTC(f->next, ' ');
1601 GF_QP_DEFAULT(f, c); /* take care of 'c' in default way */
1602 break;
1604 case STOP_DECODING :
1605 GF_PUTC(f->next, c);
1606 break;
1610 f->f1 = state;
1611 GF_END(f, f->next);
1613 else if(flg == GF_EOD){
1614 fs_give((void **)&(f->line));
1615 (void) GF_FLUSH(f->next);
1616 (*f->next->f)(f->next, GF_EOD);
1618 else if(flg == GF_RESET){
1619 dprint((9, "-- gf_reset qp_8bit\n"));
1620 f->f1 = DFL;
1621 f->linep = f->line = (char *)fs_get(GF_MAXLINE * sizeof(char));
1628 * USEFUL MACROS TO HELP WITH QP ENCODING
1631 #define QP_MAXL 75 /* 76th place only for continuation */
1634 * Macro to test and wrap long quoted printable lines
1636 #define GF_8BIT_WRAP(f) { \
1637 GF_PUTC((f)->next, '='); \
1638 GF_PUTC((f)->next, '\015'); \
1639 GF_PUTC((f)->next, '\012'); \
1643 * write a quoted octet in QUOTED-PRINTABLE encoding, adding soft
1644 * line break if needed.
1646 #define GF_8BIT_PUT_QUOTE(f, c) { \
1647 if(((f)->n += 3) > QP_MAXL){ \
1648 GF_8BIT_WRAP(f); \
1649 (f)->n = 3; /* set line count */ \
1651 GF_PUTC((f)->next, '='); \
1652 GF_PUTC((f)->next, HEX_CHAR1(c)); \
1653 GF_PUTC((f)->next, HEX_CHAR2(c)); \
1657 * just write an ordinary octet in QUOTED-PRINTABLE, wrapping line
1658 * if needed.
1660 #define GF_8BIT_PUT(f, c) { \
1661 if((++(f->n)) > QP_MAXL){ \
1662 GF_8BIT_WRAP(f); \
1663 f->n = 1L; \
1665 if(f->n == 1L && c == '.'){ \
1666 GF_8BIT_PUT_QUOTE(f, c); \
1667 f->n = 3; \
1669 else \
1670 GF_PUTC(f->next, c); \
1675 * default action for 8bit to quoted printable encoder
1677 #define GF_8BIT_DEFAULT(f, c) if((c) == ' '){ \
1678 state = WSPACE; \
1680 else if(c == '\015'){ \
1681 state = CCR; \
1683 else if(iscntrl(c & 0x7f) || (c == 0x7f) \
1684 || (c & 0x80) || (c == '=')){ \
1685 GF_8BIT_PUT_QUOTE(f, c); \
1687 else{ \
1688 GF_8BIT_PUT(f, c); \
1693 * 8BIT to QUOTED-PRINTABLE filter
1695 void
1696 gf_8bit_qp(FILTER_S *f, int flg)
1698 short dummy_dots = 0, dummy_dmap = 1;
1699 GF_INIT(f, f->next);
1701 if(flg == GF_DATA){
1702 register unsigned char c;
1703 register int state = f->f1;
1705 while(GF_GETC(f, c)){
1707 /* keep track of "^JFrom " */
1708 Find_Froms(f->t, dummy_dots, f->f2, dummy_dmap, c);
1710 switch(state){
1711 case DFL : /* handle ordinary case */
1712 GF_8BIT_DEFAULT(f, c);
1713 break;
1715 case CCR : /* true line break? */
1716 state = DFL;
1717 if(c == '\012'){
1718 GF_PUTC(f->next, '\015');
1719 GF_PUTC(f->next, '\012');
1720 f->n = 0L;
1722 else{ /* nope, quote the CR */
1723 GF_8BIT_PUT_QUOTE(f, '\015');
1724 GF_8BIT_DEFAULT(f, c); /* and don't forget about c! */
1726 break;
1728 case WSPACE:
1729 state = DFL;
1730 if(c == '\015' || f->t){ /* handle the space */
1731 GF_8BIT_PUT_QUOTE(f, ' ');
1732 f->t = 0; /* reset From flag */
1734 else
1735 GF_8BIT_PUT(f, ' ');
1737 GF_8BIT_DEFAULT(f, c); /* handle 'c' in the default way */
1738 break;
1742 f->f1 = state;
1743 GF_END(f, f->next);
1745 else if(flg == GF_EOD){
1746 switch(f->f1){
1747 case CCR :
1748 GF_8BIT_PUT_QUOTE(f, '\015'); /* write the last cr */
1749 break;
1751 case WSPACE :
1752 GF_8BIT_PUT_QUOTE(f, ' '); /* write the last space */
1753 break;
1756 (void) GF_FLUSH(f->next);
1757 (*f->next->f)(f->next, GF_EOD);
1759 else if(flg == GF_RESET){
1760 dprint((9, "-- gf_reset 8bit_qp\n"));
1761 f->f1 = DFL; /* state from last character */
1762 f->f2 = 1; /* state of "^NFrom " bitmap */
1763 f->t = 0;
1764 f->n = 0L; /* number of chars in current line */
1769 * This filter converts characters in one character set (the character
1770 * set of a message, for example) to another (the user's character set).
1772 void
1773 gf_convert_8bit_charset(FILTER_S *f, int flg)
1775 static unsigned char *conv_table = NULL;
1776 GF_INIT(f, f->next);
1778 if(flg == GF_DATA){
1779 register unsigned char c;
1781 while(GF_GETC(f, c)){
1782 GF_PUTC(f->next, conv_table ? conv_table[c] : c);
1785 GF_END(f, f->next);
1787 else if(flg == GF_EOD){
1788 (void) GF_FLUSH(f->next);
1789 (*f->next->f)(f->next, GF_EOD);
1791 else if(flg == GF_RESET){
1792 dprint((9, "-- gf_reset convert_8bit_charset\n"));
1793 conv_table = (f->opt) ? (unsigned char *) (f->opt) : NULL;
1799 typedef struct _utf8c_s {
1800 void *conv_table;
1801 int report_err;
1802 } UTF8C_S;
1806 * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset.
1807 * Characters missing from the destination set, and invalid UTF-8 sequences,
1808 * will be converted to "?".
1810 void
1811 gf_convert_utf8_charset(FILTER_S *f, int flg)
1813 static unsigned short *conv_table = NULL;
1814 static int report_err = 0;
1815 register int more = f->f2;
1816 register long u = f->n;
1819 * "more" is the number of subsequent octets needed to complete a character,
1820 * it is stored in f->f2.
1821 * "u" is the accumulated Unicode character, it is stored in f->n
1824 GF_INIT(f, f->next);
1826 if(flg == GF_DATA){
1827 register unsigned char c;
1829 while(GF_GETC(f, c)){
1830 if(!conv_table){ /* can't do much if no conversion table */
1831 GF_PUTC(f->next, c);
1833 /* UTF-8 continuation? */
1834 else if((c > 0x7f) && (c < 0xc0)){
1835 if(more){
1836 u <<= 6; /* shift current value by 6 bits */
1837 u |= c & 0x3f;
1838 if (!--more){ /* last octet? */
1839 if(u >= 0xffff || (u = conv_table[u]) == NOCHAR){
1841 * non-BMP character or a UTF-8 character
1842 * which is not representable in the
1843 * charset we're converting to.
1845 c = '?';
1846 if(report_err){
1847 if(f->opt)
1848 fs_give((void **) &f->opt);
1850 /* TRANSLATORS: error while translating from one
1851 character set to another, for example from UTF-8
1852 to ISO-2022-JP or something like that. */
1853 gf_error(_("translation error"));
1854 /* NO RETURN */
1857 else{
1858 if(u > 0xff){
1859 c = (unsigned char) (u >> 8);
1860 GF_PUTC(f->next, c);
1863 c = (unsigned char) u & 0xff;
1866 GF_PUTC(f->next, c);
1869 else{ /* continuation when not in progress */
1870 GF_PUTC(f->next, '?');
1873 else{
1874 if(more){ /* incomplete UTF-8 character */
1875 GF_PUTC(f->next, '?');
1876 more = 0;
1878 if(c < 0x80){ /* U+0000 - U+007f */
1879 GF_PUTC(f->next, c);
1881 else if(c < 0xe0){ /* U+0080 - U+07ff */
1882 u = c & 0x1f; /* first 5 bits of 12 */
1883 more = 1;
1885 else if(c < 0xf0){ /* U+1000 - U+ffff */
1886 u = c & 0x0f; /* first 4 bits of 16 */
1887 more = 2;
1889 /* in case we ever support non-BMP Unicode */
1890 else if (c < 0xf8){ /* U+10000 - U+10ffff */
1891 u = c & 0x07; /* first 3 bits of 20.5 */
1892 more = 3;
1894 #if 0 /* ISO 10646 not in Unicode */
1895 else if (c < 0xfc){ /* ISO 10646 20000 - 3ffffff */
1896 u = c & 0x03; /* first 2 bits of 26 */
1897 more = 4;
1899 else if (c < 0xfe){ /* ISO 10646 4000000 - 7fffffff */
1900 u = c & 0x03; /* first 2 bits of 26 */
1901 more = 5;
1903 #endif
1904 else{ /* not in Unicode */
1905 GF_PUTC(f->next, '?');
1910 f->f2 = more;
1911 f->n = u;
1912 GF_END(f, f->next);
1914 else if(flg == GF_EOD){
1915 (void) GF_FLUSH(f->next);
1916 if(f->opt)
1917 fs_give((void **) &f->opt);
1919 (*f->next->f)(f->next, GF_EOD);
1921 else if(flg == GF_RESET){
1922 dprint((9, "-- gf_reset convert_utf8_charset\n"));
1923 conv_table = ((UTF8C_S *) f->opt)->conv_table;
1924 report_err = ((UTF8C_S *) f->opt)->report_err;
1925 f->f2 = 0;
1926 f->n = 0L;
1931 void *
1932 gf_convert_utf8_charset_opt(void *table, int report_err)
1934 UTF8C_S *utf8c;
1936 utf8c = (UTF8C_S *) fs_get(sizeof(UTF8C_S));
1937 utf8c->conv_table = table;
1938 utf8c->report_err = report_err;
1939 return((void *) utf8c);
1944 * ISO-2022-JP to EUC (on Unix) or Shift-JIS (on PC) filter
1946 * The routine is call ..._to_euc but it is really to either euc (unix Pine)
1947 * or to Shift-JIS (if PC-Pine).
1949 void
1950 gf_2022_jp_to_euc(FILTER_S *f, int flg)
1952 register unsigned char c;
1953 register int state = f->f1;
1956 * f->t lit means we're in middle of decoding a sequence of characters.
1957 * f->f2 keeps track of first character of pair for Shift-JIS.
1958 * f->f1 is the state.
1961 GF_INIT(f, f->next);
1963 if(flg == GF_DATA){
1964 while(GF_GETC(f, c)){
1965 switch(state){
1966 case ESC: /* saw ESC */
1967 if(!f->t && c == '$')
1968 state = ESCDOL;
1969 else if(f->t && c == '(')
1970 state = ESCPAR;
1971 else{
1972 GF_PUTC(f->next, '\033');
1973 GF_PUTC(f->next, c);
1974 state = DFL;
1977 break;
1979 case ESCDOL: /* saw ESC $ */
1980 if(c == 'B' || c == '@'){
1981 state = EUC;
1982 f->t = 1; /* filtering into euc */
1983 f->f2 = -1; /* first character of pair */
1985 else{
1986 GF_PUTC(f->next, '\033');
1987 GF_PUTC(f->next, '$');
1988 GF_PUTC(f->next, c);
1989 state = DFL;
1992 break;
1994 case ESCPAR: /* saw ESC ( */
1995 if(c == 'B' || c == 'J' || c == 'H'){
1996 state = DFL;
1997 f->t = 0; /* done filtering */
1999 else{
2000 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2001 GF_PUTC(f->next, '('); /* escape sequences, which */
2002 GF_PUTC(f->next, c); /* this appears to be. */
2005 break;
2007 case EUC: /* filtering into euc */
2008 if(c == '\033')
2009 state = ESC;
2010 else{
2011 #ifdef _WINDOWS /* Shift-JIS */
2012 c &= 0x7f; /* 8-bit can't win */
2013 if (f->f2 >= 0){ /* second of a pair? */
2014 int rowOffset = (f->f2 < 95) ? 112 : 176;
2015 int cellOffset = (f->f2 % 2) ? ((c > 95) ? 32 : 31)
2016 : 126;
2018 GF_PUTC(f->next, ((f->f2 + 1) >> 1) + rowOffset);
2019 GF_PUTC(f->next, c + cellOffset);
2020 f->f2 = -1; /* restart */
2022 else if(c > 0x20 && c < 0x7f)
2023 f->f2 = c; /* first of pair */
2024 else{
2025 GF_PUTC(f->next, c); /* write CTL as itself */
2026 f->f2 = -1;
2028 #else /* EUC */
2029 GF_PUTC(f->next, (c > 0x20 && c < 0x7f) ? c | 0x80 : c);
2030 #endif
2033 break;
2035 case DFL:
2036 default:
2037 if(c == '\033')
2038 state = ESC;
2039 else
2040 GF_PUTC(f->next, c);
2042 break;
2046 f->f1 = state;
2047 GF_END(f, f->next);
2049 else if(flg == GF_EOD){
2050 switch(state){
2051 case ESC:
2052 GF_PUTC(f->next, '\033');
2053 break;
2055 case ESCDOL:
2056 GF_PUTC(f->next, '\033');
2057 GF_PUTC(f->next, '$');
2058 break;
2060 case ESCPAR:
2061 GF_PUTC(f->next, '\033'); /* Don't set hibit for */
2062 GF_PUTC(f->next, '('); /* escape sequences. */
2063 break;
2066 (void) GF_FLUSH(f->next);
2067 (*f->next->f)(f->next, GF_EOD);
2069 else if(flg == GF_RESET){
2070 dprint((9, "-- gf_reset jp_to_euc\n"));
2071 f->f1 = DFL; /* state */
2072 f->t = 0; /* not translating to euc */
2078 * EUC (on Unix) or Shift-JIS (on PC) to ISO-2022-JP filter
2080 void
2081 gf_native8bitjapanese_to_2022_jp(FILTER_S *f, int flg)
2083 #ifdef _WINDOWS
2084 gf_sjis_to_2022_jp(f, flg);
2085 #else
2086 gf_euc_to_2022_jp(f, flg);
2087 #endif
2091 void
2092 gf_euc_to_2022_jp(FILTER_S *f, int flg)
2094 register unsigned char c;
2097 * f->t lit means we've sent the start esc seq but not the end seq.
2098 * f->f2 keeps track of first character of pair for Shift-JIS.
2101 GF_INIT(f, f->next);
2103 if(flg == GF_DATA){
2104 while(GF_GETC(f, c)){
2105 if(f->t){
2106 if(c & 0x80){
2107 GF_PUTC(f->next, c & 0x7f);
2109 else{
2110 GF_PUTC(f->next, '\033');
2111 GF_PUTC(f->next, '(');
2112 GF_PUTC(f->next, 'B');
2113 GF_PUTC(f->next, c);
2114 f->f2 = -1;
2115 f->t = 0;
2118 else{
2119 if(c & 0x80){
2120 GF_PUTC(f->next, '\033');
2121 GF_PUTC(f->next, '$');
2122 GF_PUTC(f->next, 'B');
2123 GF_PUTC(f->next, c & 0x7f);
2124 f->t = 1;
2126 else{
2127 GF_PUTC(f->next, c);
2132 GF_END(f, f->next);
2134 else if(flg == GF_EOD){
2135 if(f->t){
2136 GF_PUTC(f->next, '\033');
2137 GF_PUTC(f->next, '(');
2138 GF_PUTC(f->next, 'B');
2139 f->t = 0;
2140 f->f2 = -1;
2143 (void) GF_FLUSH(f->next);
2144 (*f->next->f)(f->next, GF_EOD);
2146 else if(flg == GF_RESET){
2147 dprint((9, "-- gf_reset euc_to_jp\n"));
2148 f->t = 0;
2149 f->f2 = -1;
2153 void
2154 gf_sjis_to_2022_jp(FILTER_S *f, int flg)
2156 register unsigned char c;
2159 * f->t lit means we've sent the start esc seq but not the end seq.
2160 * f->f2 keeps track of first character of pair for Shift-JIS.
2163 GF_INIT(f, f->next);
2165 if(flg == GF_DATA){
2166 while(GF_GETC(f, c)){
2167 if(f->t){
2168 if(f->f2 >= 0){ /* second of a pair? */
2169 int adjust = c < 159;
2170 int rowOffset = f->f2 < 160 ? 112 : 176;
2171 int cellOffset = adjust ? (c > 127 ? 32 : 31) : 126;
2173 GF_PUTC(f->next, ((f->f2 - rowOffset) << 1) - adjust);
2174 GF_PUTC(f->next, c - cellOffset);
2175 f->f2 = -1;
2177 else if(c & 0x80){
2178 f->f2 = c; /* remember first of pair */
2180 else{
2181 GF_PUTC(f->next, '\033');
2182 GF_PUTC(f->next, '(');
2183 GF_PUTC(f->next, 'B');
2184 GF_PUTC(f->next, c);
2185 f->f2 = -1;
2186 f->t = 0;
2189 else{
2190 if(c & 0x80){
2191 GF_PUTC(f->next, '\033');
2192 GF_PUTC(f->next, '$');
2193 GF_PUTC(f->next, 'B');
2194 f->f2 = c;
2195 f->t = 1;
2197 else{
2198 GF_PUTC(f->next, c);
2203 GF_END(f, f->next);
2205 else if(flg == GF_EOD){
2206 if(f->t){
2207 GF_PUTC(f->next, '\033');
2208 GF_PUTC(f->next, '(');
2209 GF_PUTC(f->next, 'B');
2210 f->t = 0;
2211 f->f2 = -1;
2214 (void) GF_FLUSH(f->next);
2215 (*f->next->f)(f->next, GF_EOD);
2217 else if(flg == GF_RESET){
2218 dprint((9, "-- gf_reset sjis_to_jp\n"));
2219 f->t = 0;
2220 f->f2 = -1;
2227 * Various charset to UTF-8 Translation filter
2231 * utf8 conversion options
2233 typedef struct _utf8_s {
2234 CHARSET *charset;
2235 unsigned long ucsc;
2236 } UTF8_S;
2238 #define UTF8_BLOCK 1024
2239 #define UTF8_EOB(f) ((f)->line + (f)->f2 - 1)
2240 #define UTF8_ADD(f, c) \
2242 if(p >= eobuf){ \
2243 f->f2 += UTF8_BLOCK; \
2244 fs_resize((void **)&f->line, \
2245 (size_t) f->f2 * sizeof(char)); \
2246 eobuf = UTF8_EOB(f); \
2247 p = eobuf - UTF8_BLOCK; \
2249 *p++ = c; \
2251 #define GF_UTF8_FLUSH(f) { \
2252 register long n; \
2253 SIZEDTEXT intext, outtext; \
2254 intext.data = (unsigned char *) f->line; \
2255 intext.size = p - f->line; \
2256 memset(&outtext, 0, sizeof(SIZEDTEXT)); \
2257 if(!((UTF8_S *) f->opt)->charset){ \
2258 for(n = 0; n < intext.size; n++) \
2259 GF_PUTC(f->next, (intext.data[n] & 0x80) ? '?' : intext.data[n]); \
2261 else if(utf8_text_cs(&intext, ((UTF8_S *) f->opt)->charset, &outtext, NULL, NULL)){ \
2262 for(n = 0; n < outtext.size; n++) \
2263 GF_PUTC(f->next, outtext.data[n]); \
2264 if(outtext.data && intext.data != outtext.data) \
2265 fs_give((void **) &outtext.data); \
2267 else{ \
2268 for(n = 0; n < intext.size; n++) \
2269 GF_PUTC(f->next, '?'); \
2275 * gf_utf8 - text in specified charset to to UTF-8 filter
2276 * Process line-at-a-time rather than character
2277 * because ISO-2022-JP. Call utf8_text_cs by hand
2278 * rather than utf8_text to reduce the cost of
2279 * utf8_charset() for each line.
2281 void
2282 gf_utf8(FILTER_S *f, int flg)
2284 register char *p = f->linep;
2285 register char *eobuf = UTF8_EOB(f);
2286 GF_INIT(f, f->next);
2288 if(flg == GF_DATA){
2289 register int state = f->f1;
2290 register unsigned char c;
2292 while(GF_GETC(f, c)){
2294 switch(state){
2295 case CCR :
2296 state = DFL;
2297 if(c == '\012'){
2298 GF_UTF8_FLUSH(f);
2299 p = f->line;
2300 GF_PUTC(f->next, '\015');
2301 GF_PUTC(f->next, '\012');
2303 else{
2304 UTF8_ADD(f, '\015');
2305 UTF8_ADD(f, c);
2308 break;
2310 default :
2311 if(c == '\015'){
2312 state = CCR;
2314 else
2315 UTF8_ADD(f, c);
2319 f->f1 = state;
2320 GF_END(f, f->next);
2322 else if(flg == GF_EOD){
2324 if(p != f->line)
2325 GF_UTF8_FLUSH(f);
2327 fs_give((void **) &f->line);
2328 fs_give((void **) &f->opt);
2329 (void) GF_FLUSH(f->next);
2330 (*f->next->f)(f->next, GF_EOD);
2332 else if(GF_RESET){
2333 dprint((9, "-- gf_reset utf8\n"));
2334 f->f1 = DFL;
2335 f->f2 = UTF8_BLOCK; /* input buffer length */
2336 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
2339 f->linep = p;
2343 void *
2344 gf_utf8_opt(char *charset)
2346 UTF8_S *utf8;
2348 utf8 = (UTF8_S *) fs_get(sizeof(UTF8_S));
2350 utf8->charset = (CHARSET *) utf8_charset(charset);
2353 * When we get 8-bit non-ascii characters but it is supposed to
2354 * be ascii we want it to turn into question marks, not
2355 * just behave as if it is UTF-8 which is what happens
2356 * with ascii because there is no translation table.
2357 * So we need to catch the ascii special case here.
2359 if(utf8->charset && utf8->charset->type == CT_ASCII)
2360 utf8->charset = NULL;
2362 return((void *) utf8);
2367 * RICHTEXT-TO-PLAINTEXT filter
2371 * option to be used by rich2plain (NOTE: if this filter is ever
2372 * used more than once in a pipe, all instances will have the same
2373 * option value)
2377 /*----------------------------------------------------------------------
2378 richtext to plaintext filter
2380 Args: f --
2381 flg --
2383 This basically removes all richtext formatting. A cute hack is used
2384 to get bold and underlining to work.
2385 Further work could be done to handle things like centering and right
2386 and left flush, but then it could no longer be done in place. This
2387 operates on text *with* CRLF's.
2389 WARNING: does not wrap lines!
2390 ----*/
2391 void
2392 gf_rich2plain(FILTER_S *f, int flg)
2394 static int rich_bold_on = 0, rich_uline_on = 0;
2396 /* BUG: qoute incoming \255 values */
2397 GF_INIT(f, f->next);
2399 if(flg == GF_DATA){
2400 register unsigned char c;
2401 register int state = f->f1;
2402 register int plain;
2404 plain = f->opt ? (*(int *) f->opt) : 0;
2406 while(GF_GETC(f, c)){
2408 switch(state){
2409 case TOKEN : /* collect a richtext token */
2410 if(c == '>'){ /* what should we do with it? */
2411 state = DFL; /* return to default next time */
2412 *(f->linep) = '\0'; /* cap off token */
2413 if(f->line[0] == 'l' && f->line[1] == 't'){
2414 GF_PUTC(f->next, '<'); /* literal '<' */
2416 else if(f->line[0] == 'n' && f->line[1] == 'l'){
2417 GF_PUTC(f->next, '\015');/* newline! */
2418 GF_PUTC(f->next, '\012');
2420 else if(!strcmp("comment", f->line)){
2421 (f->f2)++;
2423 else if(!strcmp("/comment", f->line)){
2424 f->f2 = 0;
2426 else if(!strcmp("/paragraph", f->line)) {
2427 GF_PUTC(f->next, '\r');
2428 GF_PUTC(f->next, '\n');
2429 GF_PUTC(f->next, '\r');
2430 GF_PUTC(f->next, '\n');
2432 else if(!plain /* gf_rich_plain */){
2433 if(!strcmp(f->line, "bold")) {
2434 GF_PUTC(f->next, TAG_EMBED);
2435 GF_PUTC(f->next, TAG_BOLDON);
2436 rich_bold_on = 1;
2437 } else if(!strcmp(f->line, "/bold")) {
2438 GF_PUTC(f->next, TAG_EMBED);
2439 GF_PUTC(f->next, TAG_BOLDOFF);
2440 rich_bold_on = 0;
2441 } else if(!strcmp(f->line, "italic")) {
2442 GF_PUTC(f->next, TAG_EMBED);
2443 GF_PUTC(f->next, TAG_ULINEON);
2444 rich_uline_on = 1;
2445 } else if(!strcmp(f->line, "/italic")) {
2446 GF_PUTC(f->next, TAG_EMBED);
2447 GF_PUTC(f->next, TAG_ULINEOFF);
2448 rich_uline_on = 0;
2449 } else if(!strcmp(f->line, "underline")) {
2450 GF_PUTC(f->next, TAG_EMBED);
2451 GF_PUTC(f->next, TAG_ULINEON);
2452 rich_uline_on = 1;
2453 } else if(!strcmp(f->line, "/underline")) {
2454 GF_PUTC(f->next, TAG_EMBED);
2455 GF_PUTC(f->next, TAG_ULINEOFF);
2456 rich_uline_on = 0;
2459 /* else we just ignore the token! */
2461 f->linep = f->line; /* reset token buffer */
2463 else{ /* add char to token */
2464 if(f->linep - f->line > 40){
2465 /* What? rfc1341 says 40 char tokens MAX! */
2466 fs_give((void **)&(f->line));
2467 gf_error("Richtext token over 40 characters");
2468 /* NO RETURN */
2471 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2473 break;
2475 case CCR :
2476 state = DFL; /* back to default next time */
2477 if(c == '\012'){ /* treat as single space? */
2478 GF_PUTC(f->next, ' ');
2479 break;
2481 /* fall thru to process c */
2483 case DFL :
2484 default:
2485 if(c == '<')
2486 state = TOKEN;
2487 else if(c == '\015')
2488 state = CCR;
2489 else if(!f->f2) /* not in comment! */
2490 GF_PUTC(f->next, c);
2492 break;
2496 f->f1 = state;
2497 GF_END(f, f->next);
2499 else if(flg == GF_EOD){
2500 if((f->f1 = (f->linep != f->line)) != 0){
2501 /* incomplete token!! */
2502 gf_error("Incomplete token in richtext");
2503 /* NO RETURN */
2506 if(rich_uline_on){
2507 GF_PUTC(f->next, TAG_EMBED);
2508 GF_PUTC(f->next, TAG_ULINEOFF);
2509 rich_uline_on = 0;
2511 if(rich_bold_on){
2512 GF_PUTC(f->next, TAG_EMBED);
2513 GF_PUTC(f->next, TAG_BOLDOFF);
2514 rich_bold_on = 0;
2517 fs_give((void **)&(f->line));
2518 (void) GF_FLUSH(f->next);
2519 (*f->next->f)(f->next, GF_EOD);
2521 else if(flg == GF_RESET){
2522 dprint((9, "-- gf_reset rich2plain\n"));
2523 f->f1 = DFL; /* state */
2524 f->f2 = 0; /* set means we're in a comment */
2525 f->linep = f->line = (char *)fs_get(45 * sizeof(char));
2531 * function called from the outside to set
2532 * richtext filter's options
2534 void *
2535 gf_rich2plain_opt(int *plain)
2537 return((void *) plain);
2543 * ENRICHED-TO-PLAIN text filter
2546 #define TEF_QUELL 0x01
2547 #define TEF_NOFILL 0x02
2551 /*----------------------------------------------------------------------
2552 enriched text to plain text filter (ala rfc1523)
2554 Args: f -- state and input data
2555 flg --
2557 This basically removes all enriched formatting. A cute hack is used
2558 to get bold and underlining to work.
2560 Further work could be done to handle things like centering and right
2561 and left flush, but then it could no longer be done in place. This
2562 operates on text *with* CRLF's.
2564 WARNING: does not wrap lines!
2565 ----*/
2566 void
2567 gf_enriched2plain(FILTER_S *f, int flg)
2569 static int enr_uline_on = 0, enr_bold_on = 0;
2571 /* BUG: qoute incoming \255 values */
2572 GF_INIT(f, f->next);
2574 if(flg == GF_DATA){
2575 register unsigned char c;
2576 register int state = f->f1;
2577 register int plain;
2579 plain = f->opt ? (*(int *) f->opt) : 0;
2581 while(GF_GETC(f, c)){
2583 switch(state){
2584 case TOKEN : /* collect a richtext token */
2585 if(c == '>'){ /* what should we do with it? */
2586 int off = *f->line == '/';
2587 char *token = f->line + (off ? 1 : 0);
2588 state = DFL;
2589 *f->linep = '\0';
2590 if(!strcmp("param", token)){
2591 if(off)
2592 f->f2 &= ~TEF_QUELL;
2593 else
2594 f->f2 |= TEF_QUELL;
2596 else if(!strcmp("nofill", token)){
2597 if(off)
2598 f->f2 &= ~TEF_NOFILL;
2599 else
2600 f->f2 |= TEF_NOFILL;
2602 else if(!plain /* gf_enriched_plain */){
2603 /* Following is a cute hack or two to get
2604 bold and underline on the screen.
2605 See Putline0n() where these codes are
2606 interpreted */
2607 if(!strcmp("bold", token)) {
2608 GF_PUTC(f->next, TAG_EMBED);
2609 GF_PUTC(f->next, off ? TAG_BOLDOFF : TAG_BOLDON);
2610 enr_bold_on = off ? 0 : 1;
2611 } else if(!strcmp("italic", token)) {
2612 GF_PUTC(f->next, TAG_EMBED);
2613 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2614 enr_uline_on = off ? 0 : 1;
2615 } else if(!strcmp("underline", token)) {
2616 GF_PUTC(f->next, TAG_EMBED);
2617 GF_PUTC(f->next, off ? TAG_ULINEOFF : TAG_ULINEON);
2618 enr_uline_on = off ? 0 : 1;
2621 /* else we just ignore the token! */
2623 f->linep = f->line; /* reset token buffer */
2625 else if(c == '<'){ /* literal '<'? */
2626 if(f->linep == f->line){
2627 GF_PUTC(f->next, '<');
2628 state = DFL;
2630 else{
2631 fs_give((void **)&(f->line));
2632 gf_error("Malformed Enriched text: unexpected '<'");
2633 /* NO RETURN */
2636 else{ /* add char to token */
2637 if(f->linep - f->line > 60){ /* rfc1523 says 60 MAX! */
2638 fs_give((void **)&(f->line));
2639 gf_error("Malformed Enriched text: token too long");
2640 /* NO RETURN */
2643 *(f->linep)++ = isupper((unsigned char)c) ? c-'A'+'a' : c;
2645 break;
2647 case CCR :
2648 if(c != '\012'){ /* treat as single space? */
2649 state = DFL; /* lone cr? */
2650 f->f2 &= ~TEF_QUELL;
2651 GF_PUTC(f->next, '\015');
2652 goto df;
2655 state = CLF;
2656 break;
2658 case CLF :
2659 if(c == '\015'){ /* treat as single space? */
2660 state = CCR; /* repeat crlf's mean real newlines */
2661 f->f2 |= TEF_QUELL;
2662 GF_PUTC(f->next, '\r');
2663 GF_PUTC(f->next, '\n');
2664 break;
2666 else{
2667 state = DFL;
2668 if(!((f->f2) & TEF_QUELL))
2669 GF_PUTC(f->next, ' ');
2671 f->f2 &= ~TEF_QUELL;
2674 /* fall thru to take care of 'c' */
2676 case DFL :
2677 default :
2678 df :
2679 if(c == '<')
2680 state = TOKEN;
2681 else if(c == '\015' && (!((f->f2) & TEF_NOFILL)))
2682 state = CCR;
2683 else if(!((f->f2) & TEF_QUELL))
2684 GF_PUTC(f->next, c);
2686 break;
2690 f->f1 = state;
2691 GF_END(f, f->next);
2693 else if(flg == GF_EOD){
2694 if((f->f1 = (f->linep != f->line)) != 0){
2695 /* incomplete token!! */
2696 gf_error("Incomplete token in richtext");
2697 /* NO RETURN */
2699 if(enr_uline_on){
2700 GF_PUTC(f->next, TAG_EMBED);
2701 GF_PUTC(f->next, TAG_ULINEOFF);
2702 enr_uline_on = 0;
2704 if(enr_bold_on){
2705 GF_PUTC(f->next, TAG_EMBED);
2706 GF_PUTC(f->next, TAG_BOLDOFF);
2707 enr_bold_on = 0;
2710 /* Make sure we end with a newline so everything gets flushed */
2711 GF_PUTC(f->next, '\015');
2712 GF_PUTC(f->next, '\012');
2714 fs_give((void **)&(f->line));
2716 (void) GF_FLUSH(f->next);
2717 (*f->next->f)(f->next, GF_EOD);
2719 else if(flg == GF_RESET){
2720 dprint((9, "-- gf_reset enriched2plain\n"));
2721 f->f1 = DFL; /* state */
2722 f->f2 = 0; /* set means we're in a comment */
2723 f->linep = f->line = (char *)fs_get(65 * sizeof(char));
2729 * function called from the outside to set
2730 * richtext filter's options
2732 void *
2733 gf_enriched2plain_opt(int *plain)
2735 return((void *) plain);
2741 * HTML-TO-PLAIN text filter
2745 /* OK, here's the plan:
2747 * a universal output function handles writing chars and worries
2748 * about wrapping.
2750 * a unversal element collector reads chars and collects params
2751 * and dispatches the appropriate element handler.
2753 * element handlers are stacked. The most recently dispatched gets
2754 * first crack at the incoming character stream. It passes bytes it's
2755 * done with or not interested in to the next
2757 * installs that handler as the current one collecting data...
2759 * stacked handlers take their params from the element collector and
2760 * accept chars or do whatever they need to do. Sort of a vertical
2761 * piping? recursion-like? hmmm.
2763 * at least I think this is how it'll work. tres simple, non?
2769 * Some important constants
2771 #define HTML_BUF_LEN 2048 /* max scratch buffer length */
2772 #define MAX_ENTITY 20 /* maximum length of an entity */
2773 #define MAX_ELEMENT 72 /* maximum length of an element */
2774 #define HTML_MOREDATA 0 /* expect more entity data */
2775 #define HTML_ENTITY 1 /* valid entity collected */
2776 #define HTML_BADVALUE 0x0100 /* good data, but bad entity value */
2777 #define HTML_BADDATA 0x0200 /* bad data found looking for entity */
2778 #define HTML_LITERAL 0x0400 /* Literal character value */
2779 #define HTML_NEWLINE 0x010A /* hard newline */
2780 #define HTML_DOBOLD 0x0400 /* Start Bold display */
2781 #define HTML_ID_GET 0 /* indent func: return current val */
2782 #define HTML_ID_SET 1 /* indent func: set to absolute val */
2783 #define HTML_ID_INC 2 /* indent func: increment by val */
2784 #define HTML_HX_CENTER 0x0001
2785 #define HTML_HX_ULINE 0x0002
2786 #define RSS_ITEM_LIMIT 20 /* RSS 2.0 ITEM depth limit */
2790 * Handler data, state information including function that uses it
2792 typedef struct handler_s {
2793 FILTER_S *html_data;
2794 void *element;
2795 long x, y, z;
2796 void *dp;
2797 unsigned char *s;
2798 struct handler_s *below;
2799 } HANDLER_S;
2802 * Element Property structure
2804 typedef struct _element_properties {
2805 char *element;
2806 size_t len;
2807 int (*handler)(HANDLER_S *, int, int);
2808 unsigned blocklevel:1;
2809 unsigned alternate:1;
2810 } ELPROP_S;
2813 * Types used to manage HTML parsing
2815 static void html_handoff(HANDLER_S *, int);
2819 * to help manage line wrapping.
2821 typedef struct _wrap_line {
2822 char *buf; /* buf to collect wrapped text */
2823 int used, /* number of chars in buf */
2824 width, /* text's width as displayed */
2825 len; /* length of allocated buf */
2826 } WRAPLINE_S;
2830 * to help manage centered text
2832 typedef struct _center_s {
2833 WRAPLINE_S line; /* buf to assembled centered text */
2834 WRAPLINE_S word; /* word being to append to Line */
2835 int anchor;
2836 short space;
2837 } CENTER_S;
2841 * Collector data and state information
2843 typedef struct collector_s {
2844 char buf[HTML_BUF_LEN]; /* buffer to collect data */
2845 int len; /* length of that buffer */
2846 unsigned end_tag:1; /* collecting a closing tag */
2847 unsigned hit_equal:1; /* collecting right half of attrib */
2848 unsigned mkup_decl:1; /* markup declaration */
2849 unsigned start_comment:1; /* markup declaration comment */
2850 unsigned end_comment:1; /* legit comment format */
2851 unsigned hyphen:1; /* markup hyphen read */
2852 unsigned badform:1; /* malformed markup element */
2853 unsigned overrun:1; /* Overran buf above */
2854 unsigned proc_inst:1; /* XML processing instructions */
2855 unsigned empty:1; /* empty element */
2856 unsigned was_quoted:1; /* basically to catch null string */
2857 char quoted; /* quoted element param value */
2858 char *element; /* element's collected name */
2859 PARAMETER *attribs; /* element's collected attributes */
2860 PARAMETER *cur_attrib; /* attribute now being collected */
2861 } CLCTR_S;
2865 * State information for all element handlers
2867 typedef struct html_data {
2868 HANDLER_S *h_stack; /* handler list */
2869 CLCTR_S *el_data; /* element collector data */
2870 CENTER_S *centered; /* struct to manage centered text */
2871 int (*token)(FILTER_S *, int);
2872 char quoted; /* quoted, by either ' or ", text */
2873 short indent_level; /* levels of indention */
2874 int in_anchor; /* text now being written to anchor */
2875 int blanks; /* Consecutive blank line count */
2876 int wrapcol; /* column to wrap lines on */
2877 int *prefix; /* buffer containing Anchor prefix */
2878 int prefix_used;
2879 long line_bufsize; /* current size of the line buffer */
2880 COLOR_PAIR *color;
2881 struct {
2882 int state; /* embedded data state */
2883 char *color; /* embedded color pointer */
2884 } embedded;
2885 CBUF_S cb; /* utf8->ucs4 conversion state */
2886 unsigned wrapstate:1; /* whether or not to wrap output */
2887 unsigned li_pending:1; /* <LI> next token expected */
2888 unsigned de_pending:1; /* <DT> or <DD> next token expected */
2889 unsigned bold_on:1; /* currently bolding text */
2890 unsigned uline_on:1; /* currently underlining text */
2891 unsigned center:1; /* center output text */
2892 unsigned bitbucket:1; /* Ignore input */
2893 unsigned head:1; /* In doc's HEAD */
2894 unsigned body:1; /* In doc's BODY */
2895 unsigned alt_entity:1; /* use alternative entity values */
2896 unsigned wrote:1; /* anything witten yet? */
2897 } HTML_DATA_S;
2901 * HTML filter options
2903 typedef struct _html_opts {
2904 char *base; /* Base URL for this html file */
2905 int columns, /* Display columns (excluding margins) */
2906 indent; /* Left margin */
2907 HANDLE_S **handlesp; /* Head of handles */
2908 htmlrisk_t warnrisk_f; /* Nasty link warning call */
2909 ELPROP_S *element_table; /* markup element table */
2910 RSS_FEED_S **feedp; /* hook for RSS feed response */
2911 unsigned strip:1; /* Hilite TAGs allowed */
2912 unsigned handles_loc:1; /* Local handles requested? */
2913 unsigned showserver:1; /* Display server after anchors */
2914 unsigned outputted:1; /* any */
2915 unsigned no_relative_links:1; /* Disable embeded relative links */
2916 unsigned related_content:1; /* Embeded related content */
2917 unsigned html:1; /* Output content in HTML */
2918 unsigned html_imgs:1; /* Output IMG tags in HTML content */
2919 } HTML_OPT_S;
2924 * Some macros to make life a little easier
2926 #define WRAP_COLS(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->columns : 80)
2927 #define HTML_INDENT(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->indent : 0)
2928 #define HTML_WROTE(X) (HD(X)->wrote)
2929 #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL)
2930 #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip)
2931 #define PASS_HTML(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html)
2932 #define PASS_IMAGES(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->html_imgs)
2933 #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp)
2934 #define DO_HANDLES(X) ((X)->opt && HANDLESP(X))
2935 #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc)
2936 #define SHOWSERVER(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->showserver)
2937 #define NO_RELATIVE(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->no_relative_links)
2938 #define RELATED_OK(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->related_content)
2939 #define ELEMENTS(X) (((HTML_OPT_S *)(X)->opt)->element_table)
2940 #define RSS_FEED(X) (*(((HTML_OPT_S *)(X)->opt)->feedp))
2941 #define MAKE_LITERAL(C) (HTML_LITERAL | ((C) & 0xff))
2942 #define IS_LITERAL(C) (HTML_LITERAL & (C))
2943 #define HD(X) ((HTML_DATA_S *)(X)->data)
2944 #define ED(X) (HD(X)->el_data)
2945 #define EL(X) ((ELPROP_S *) (X)->element)
2946 #define ASCII_ISSPACE(C) ((C) < 0x80 && isspace((unsigned char) (C)))
2947 #define HTML_ISSPACE(C) (IS_LITERAL(C) == 0 && ((C) == HTML_NEWLINE || ASCII_ISSPACE(C)))
2948 #define NEW_CLCTR(X) { \
2949 ED(X) = (CLCTR_S *)fs_get(sizeof(CLCTR_S)); \
2950 memset(ED(X), 0, sizeof(CLCTR_S)); \
2951 HD(X)->token = html_element_collector; \
2954 #define FREE_CLCTR(X) { \
2955 if(ED(X)->attribs){ \
2956 PARAMETER *p; \
2957 while((p = ED(X)->attribs) != NULL){ \
2958 ED(X)->attribs = ED(X)->attribs->next; \
2959 if(p->attribute) \
2960 fs_give((void **)&p->attribute); \
2961 if(p->value) \
2962 fs_give((void **)&p->value); \
2963 fs_give((void **)&p); \
2966 if(ED(X)->element) \
2967 fs_give((void **) &ED(X)->element); \
2968 fs_give((void **) &ED(X)); \
2969 HD(X)->token = NULL; \
2971 #define HANDLERS(X) (HD(X)->h_stack)
2972 #define BOLD_BIT(X) (HD(X)->bold_on)
2973 #define ULINE_BIT(X) (HD(X)->uline_on)
2974 #define CENTER_BIT(X) (HD(X)->center)
2975 #define HTML_FLUSH(X) { \
2976 html_write(X, (X)->line, (X)->linep - (X)->line); \
2977 (X)->linep = (X)->line; \
2978 (X)->f2 = 0L; \
2980 #define HTML_BOLD(X, S) if(! STRIP(X)){ \
2981 if((S)){ \
2982 html_output((X), TAG_EMBED); \
2983 html_output((X), TAG_BOLDON); \
2985 else if(!(S)){ \
2986 html_output((X), TAG_EMBED); \
2987 html_output((X), TAG_BOLDOFF); \
2990 #define HTML_ULINE(X, S) \
2991 if(! STRIP(X)){ \
2992 if((S)){ \
2993 html_output((X), TAG_EMBED); \
2994 html_output((X), TAG_ULINEON); \
2996 else if(!(S)){ \
2997 html_output((X), TAG_EMBED); \
2998 html_output((X), TAG_ULINEOFF); \
3001 #define HTML_ITALIC(X, S) \
3002 if(! STRIP(X)){ \
3003 if(S){ \
3004 html_output((X), TAG_EMBED); \
3005 html_output((X), TAG_ITALICON); \
3007 else if(!(S)){ \
3008 html_output((X), TAG_EMBED); \
3009 html_output((X), TAG_ITALICOFF); \
3012 #define HTML_STRIKE(X, S) \
3013 if(! STRIP(X)){ \
3014 if(S){ \
3015 html_output((X), TAG_EMBED); \
3016 html_output((X), TAG_STRIKEON); \
3018 else if(!(S)){ \
3019 html_output((X), TAG_EMBED); \
3020 html_output((X), TAG_STRIKEOFF); \
3023 #define HTML_BIG(X, S) \
3024 if(! STRIP(X)){ \
3025 if(S){ \
3026 html_output((X), TAG_EMBED); \
3027 html_output((X), TAG_BIGON); \
3029 else if(!(S)){ \
3030 html_output((X), TAG_EMBED); \
3031 html_output((X), TAG_BIGOFF); \
3034 #define HTML_SMALL(X, S) \
3035 if(! STRIP(X)){ \
3036 if(S){ \
3037 html_output((X), TAG_EMBED); \
3038 html_output((X), TAG_SMALLON); \
3040 else if(!(S)){ \
3041 html_output((X), TAG_EMBED); \
3042 html_output((X), TAG_SMALLOFF); \
3045 #define WRAPPED_LEN(X) ((HD(f)->centered) \
3046 ? (HD(f)->centered->line.width \
3047 + HD(f)->centered->word.width \
3048 + ((HD(f)->centered->line.width \
3049 && HD(f)->centered->word.width) \
3050 ? 1 : 0)) \
3051 : 0)
3052 #define HTML_DUMP_LIT(F, S, L) { \
3053 int i, c; \
3054 for(i = 0; i < (L); i++){ \
3055 c = ASCII_ISSPACE((unsigned char)(S)[i]) \
3056 ? (S)[i] \
3057 : MAKE_LITERAL((S)[i]); \
3058 HTML_TEXT(F, c); \
3061 #define HTML_PROC(F, C) { \
3062 if(HD(F)->token){ \
3063 int i; \
3064 if((i = (*(HD(F)->token))(F, C)) != 0){ \
3065 if(i < 0){ \
3066 HTML_DUMP_LIT(F, "<", 1); \
3067 if(HD(F)->el_data->element){ \
3068 HTML_DUMP_LIT(F, \
3069 HD(F)->el_data->element, \
3070 strlen(HD(F)->el_data->element));\
3072 if(HD(F)->el_data->len){ \
3073 HTML_DUMP_LIT(F, \
3074 HD(F)->el_data->buf, \
3075 HD(F)->el_data->len); \
3077 HTML_TEXT(F, C); \
3079 FREE_CLCTR(F); \
3082 else if((C) == '<'){ \
3083 NEW_CLCTR(F); \
3085 else \
3086 HTML_TEXT(F, C); \
3088 #define HTML_LINEP_PUTC(F, C) { \
3089 if((F)->linep - (F)->line >= (HD(F)->line_bufsize - 1)){ \
3090 size_t offset = (F)->linep - (F)->line; \
3091 fs_resize((void **) &(F)->line, \
3092 (HD(F)->line_bufsize * 2) * sizeof(char)); \
3093 HD(F)->line_bufsize *= 2; \
3094 (F)->linep = &(F)->line[offset]; \
3096 *(F)->linep++ = (C); \
3098 #define HTML_TEXT(F, C) switch((F)->f1){ \
3099 case WSPACE : \
3100 if(HTML_ISSPACE(C)) /* ignore repeated WS */ \
3101 break; \
3102 HTML_TEXT_OUT(F, ' '); \
3103 (F)->f1 = DFL;/* stop sending chars here */ \
3104 /* fall thru to process 'c' */ \
3105 case DFL: \
3106 if(HD(F)->bitbucket) \
3107 (F)->f1 = DFL; /* no op */ \
3108 else if(HTML_ISSPACE(C) && HD(F)->wrapstate) \
3109 (F)->f1 = WSPACE;/* coalesce white space */ \
3110 else HTML_TEXT_OUT(F, C); \
3111 break; \
3113 #define HTML_TEXT_OUT(F, C) if(HANDLERS(F)) /* let handlers see C */ \
3114 (*EL(HANDLERS(F))->handler)(HANDLERS(F),(C),GF_DATA); \
3115 else \
3116 html_output(F, C);
3117 #ifdef DEBUG
3118 #define HTML_DEBUG_EL(S, D) { \
3119 dprint((5, "-- html %s: %s\n", \
3120 S ? S : "?", \
3121 (D)->element \
3122 ? (D)->element : "NULL")); \
3123 if(debug > 5){ \
3124 PARAMETER *p; \
3125 for(p = (D)->attribs; \
3126 p && p->attribute; \
3127 p = p->next) \
3128 dprint((6, \
3129 " PARM: %s%s%s\n", \
3130 p->attribute \
3131 ? p->attribute : "NULL",\
3132 p->value ? "=" : "", \
3133 p->value ? p->value : ""));\
3136 #else
3137 #define HTML_DEBUG_EL(S, D)
3138 #endif
3140 #ifndef SYSTEM_PINE_INFO_PATH
3141 #define SYSTEM_PINE_INFO_PATH "/usr/local/lib/pine.info"
3142 #endif
3143 #define CHTML_VAR_EXPAND(S) (!strcmp(S, "PINE_INFO_PATH") \
3144 ? SYSTEM_PINE_INFO_PATH : S)
3147 * Protos for Tag handlers
3149 int html_head(HANDLER_S *, int, int);
3150 int html_base(HANDLER_S *, int, int);
3151 int html_title(HANDLER_S *, int, int);
3152 int html_body(HANDLER_S *, int, int);
3153 int html_a(HANDLER_S *, int, int);
3154 int html_br(HANDLER_S *, int, int);
3155 int html_hr(HANDLER_S *, int, int);
3156 int html_p(HANDLER_S *, int, int);
3157 int html_table(HANDLER_S *, int, int);
3158 int html_caption(HANDLER_S *, int, int);
3159 int html_tr(HANDLER_S *, int, int);
3160 int html_td(HANDLER_S *, int, int);
3161 int html_th(HANDLER_S *, int, int);
3162 int html_thead(HANDLER_S *, int, int);
3163 int html_tbody(HANDLER_S *, int, int);
3164 int html_tfoot(HANDLER_S *, int, int);
3165 int html_col(HANDLER_S *, int, int);
3166 int html_colgroup(HANDLER_S *, int, int);
3167 int html_b(HANDLER_S *, int, int);
3168 int html_u(HANDLER_S *, int, int);
3169 int html_i(HANDLER_S *, int, int);
3170 int html_em(HANDLER_S *, int, int);
3171 int html_strong(HANDLER_S *, int, int);
3172 int html_s(HANDLER_S *, int, int);
3173 int html_big(HANDLER_S *, int, int);
3174 int html_small(HANDLER_S *, int, int);
3175 int html_font(HANDLER_S *, int, int);
3176 int html_img(HANDLER_S *, int, int);
3177 int html_map(HANDLER_S *, int, int);
3178 int html_area(HANDLER_S *, int, int);
3179 int html_form(HANDLER_S *, int, int);
3180 int html_input(HANDLER_S *, int, int);
3181 int html_option(HANDLER_S *, int, int);
3182 int html_optgroup(HANDLER_S *, int, int);
3183 int html_button(HANDLER_S *, int, int);
3184 int html_select(HANDLER_S *, int, int);
3185 int html_textarea(HANDLER_S *, int, int);
3186 int html_label(HANDLER_S *, int, int);
3187 int html_fieldset(HANDLER_S *, int, int);
3188 int html_ul(HANDLER_S *, int, int);
3189 int html_ol(HANDLER_S *, int, int);
3190 int html_menu(HANDLER_S *, int, int);
3191 int html_dir(HANDLER_S *, int, int);
3192 int html_li(HANDLER_S *, int, int);
3193 int html_h1(HANDLER_S *, int, int);
3194 int html_h2(HANDLER_S *, int, int);
3195 int html_h3(HANDLER_S *, int, int);
3196 int html_h4(HANDLER_S *, int, int);
3197 int html_h5(HANDLER_S *, int, int);
3198 int html_h6(HANDLER_S *, int, int);
3199 int html_blockquote(HANDLER_S *, int, int);
3200 int html_address(HANDLER_S *, int, int);
3201 int html_pre(HANDLER_S *, int, int);
3202 int html_center(HANDLER_S *, int, int);
3203 int html_div(HANDLER_S *, int, int);
3204 int html_span(HANDLER_S *, int, int);
3205 int html_dl(HANDLER_S *, int, int);
3206 int html_dt(HANDLER_S *, int, int);
3207 int html_dd(HANDLER_S *, int, int);
3208 int html_script(HANDLER_S *, int, int);
3209 int html_applet(HANDLER_S *, int, int);
3210 int html_style(HANDLER_S *, int, int);
3211 int html_kbd(HANDLER_S *, int, int);
3212 int html_dfn(HANDLER_S *, int, int);
3213 int html_var(HANDLER_S *, int, int);
3214 int html_tt(HANDLER_S *, int, int);
3215 int html_samp(HANDLER_S *, int, int);
3216 int html_sub(HANDLER_S *, int, int);
3217 int html_sup(HANDLER_S *, int, int);
3218 int html_cite(HANDLER_S *, int, int);
3219 int html_code(HANDLER_S *, int, int);
3220 int html_ins(HANDLER_S *, int, int);
3221 int html_del(HANDLER_S *, int, int);
3222 int html_abbr(HANDLER_S *, int, int);
3225 * Protos for RSS 2.0 Tag handlers
3227 int rss_rss(HANDLER_S *, int, int);
3228 int rss_channel(HANDLER_S *, int, int);
3229 int rss_title(HANDLER_S *, int, int);
3230 int rss_image(HANDLER_S *, int, int);
3231 int rss_link(HANDLER_S *, int, int);
3232 int rss_description(HANDLER_S *, int, int);
3233 int rss_ttl(HANDLER_S *, int, int);
3234 int rss_item(HANDLER_S *, int, int);
3237 * Proto's for support routines
3239 void html_pop(FILTER_S *, ELPROP_S *);
3240 int html_push(FILTER_S *, ELPROP_S *);
3241 int html_element_collector(FILTER_S *, int);
3242 int html_element_flush(CLCTR_S *);
3243 void html_element_comment(FILTER_S *, char *);
3244 void html_element_output(FILTER_S *, int);
3245 int html_entity_collector(FILTER_S *, int, UCS *, char **);
3246 void html_a_prefix(FILTER_S *);
3247 void html_a_finish(HANDLER_S *);
3248 void html_a_output_prefix(FILTER_S *, int);
3249 void html_a_output_info(HANDLER_S *);
3250 void html_a_relative(char *, char *, HANDLE_S *);
3251 int html_href_relative(char *);
3252 int html_indent(FILTER_S *, int, int);
3253 void html_blank(FILTER_S *, int);
3254 void html_newline(FILTER_S *);
3255 void html_output(FILTER_S *, int);
3256 void html_output_string(FILTER_S *, char *);
3257 void html_output_raw_tag(FILTER_S *, char *);
3258 void html_output_normal(FILTER_S *, int, int, int);
3259 void html_output_flush(FILTER_S *);
3260 void html_output_centered(FILTER_S *, int, int, int);
3261 void html_centered_handle(int *, char *, int);
3262 void html_centered_putc(WRAPLINE_S *, int);
3263 void html_centered_flush(FILTER_S *);
3264 void html_centered_flush_line(FILTER_S *);
3265 void html_write_anchor(FILTER_S *, int);
3266 void html_write_newline(FILTER_S *);
3267 void html_write_indent(FILTER_S *, int);
3268 void html_write(FILTER_S *, char *, int);
3269 void html_putc(FILTER_S *, int);
3270 int html_event_attribute(char *);
3271 char *rss_skip_whitespace(char *s);
3272 ELPROP_S *element_properties(FILTER_S *, char *);
3276 * Named entity table -- most from HTML 2.0 (rfc1866) plus some from
3277 * W3C doc "Additional named entities for HTML"
3279 static struct html_entities {
3280 char *name; /* entity name */
3281 UCS value; /* UCS entity value */
3282 char *plain; /* US-ASCII representation */
3283 } entity_tab[] = {
3284 {"quot", 0x0022}, /* 34 - quotation mark */
3285 {"amp", 0x0026}, /* 38 - ampersand */
3286 {"apos", 0x0027}, /* 39 - apostrophe */
3287 {"lt", 0x003C}, /* 60 - less-than sign */
3288 {"gt", 0x003E}, /* 62 - greater-than sign */
3289 {"nbsp", 0x00A0, " "}, /* 160 - no-break space */
3290 {"iexcl", 0x00A1}, /* 161 - inverted exclamation mark */
3291 {"cent", 0x00A2}, /* 162 - cent sign */
3292 {"pound", 0x00A3}, /* 163 - pound sign */
3293 {"curren", 0x00A4, "CUR"}, /* 164 - currency sign */
3294 {"yen", 0x00A5}, /* 165 - yen sign */
3295 {"brvbar", 0x00A6, "|"}, /* 166 - broken bar */
3296 {"sect", 0x00A7}, /* 167 - section sign */
3297 {"uml", 0x00A8, "\""}, /* 168 - diaeresis */
3298 {"copy", 0x00A9, "(C)"}, /* 169 - copyright sign */
3299 {"ordf", 0x00AA, "a"}, /* 170 - feminine ordinal indicator */
3300 {"laquo", 0x00AB, "<<"}, /* 171 - left-pointing double angle quotation mark */
3301 {"not", 0x00AC, "NOT"}, /* 172 - not sign */
3302 {"shy", 0x00AD, "-"}, /* 173 - soft hyphen */
3303 {"reg", 0x00AE, "(R)"}, /* 174 - registered sign */
3304 {"macr", 0x00AF}, /* 175 - macron */
3305 {"deg", 0x00B0, "DEG"}, /* 176 - degree sign */
3306 {"plusmn", 0x00B1, "+/-"}, /* 177 - plus-minus sign */
3307 {"sup2", 0x00B2}, /* 178 - superscript two */
3308 {"sup3", 0x00B3}, /* 179 - superscript three */
3309 {"acute", 0x00B4, "'"}, /* 180 - acute accent */
3310 {"micro", 0x00B5}, /* 181 - micro sign */
3311 {"para", 0x00B6}, /* 182 - pilcrow sign */
3312 {"middot", 0x00B7}, /* 183 - middle dot */
3313 {"cedil", 0x00B8}, /* 184 - cedilla */
3314 {"sup1", 0x00B9}, /* 185 - superscript one */
3315 {"ordm", 0x00BA, "o"}, /* 186 - masculine ordinal indicator */
3316 {"raquo", 0x00BB, ">>"}, /* 187 - right-pointing double angle quotation mark */
3317 {"frac14", 0x00BC, " 1/4"}, /* 188 - vulgar fraction one quarter */
3318 {"frac12", 0x00BD, " 1/2"}, /* 189 - vulgar fraction one half */
3319 {"frac34", 0x00BE, " 3/4"}, /* 190 - vulgar fraction three quarters */
3320 {"iquest", 0x00BF}, /* 191 - inverted question mark */
3321 {"Agrave", 0x00C0, "A"}, /* 192 - latin capital letter a with grave */
3322 {"Aacute", 0x00C1, "A"}, /* 193 - latin capital letter a with acute */
3323 {"Acirc", 0x00C2, "A"}, /* 194 - latin capital letter a with circumflex */
3324 {"Atilde", 0x00C3, "A"}, /* 195 - latin capital letter a with tilde */
3325 {"Auml", 0x00C4, "AE"}, /* 196 - latin capital letter a with diaeresis */
3326 {"Aring", 0x00C5, "A"}, /* 197 - latin capital letter a with ring above */
3327 {"AElig", 0x00C6, "AE"}, /* 198 - latin capital letter ae */
3328 {"Ccedil", 0x00C7, "C"}, /* 199 - latin capital letter c with cedilla */
3329 {"Egrave", 0x00C8, "E"}, /* 200 - latin capital letter e with grave */
3330 {"Eacute", 0x00C9, "E"}, /* 201 - latin capital letter e with acute */
3331 {"Ecirc", 0x00CA, "E"}, /* 202 - latin capital letter e with circumflex */
3332 {"Euml", 0x00CB, "E"}, /* 203 - latin capital letter e with diaeresis */
3333 {"Igrave", 0x00CC, "I"}, /* 204 - latin capital letter i with grave */
3334 {"Iacute", 0x00CD, "I"}, /* 205 - latin capital letter i with acute */
3335 {"Icirc", 0x00CE, "I"}, /* 206 - latin capital letter i with circumflex */
3336 {"Iuml", 0x00CF, "I"}, /* 207 - latin capital letter i with diaeresis */
3337 {"ETH", 0x00D0, "DH"}, /* 208 - latin capital letter eth */
3338 {"Ntilde", 0x00D1, "N"}, /* 209 - latin capital letter n with tilde */
3339 {"Ograve", 0x00D2, "O"}, /* 210 - latin capital letter o with grave */
3340 {"Oacute", 0x00D3, "O"}, /* 211 - latin capital letter o with acute */
3341 {"Ocirc", 0x00D4, "O"}, /* 212 - latin capital letter o with circumflex */
3342 {"Otilde", 0x00D5, "O"}, /* 213 - latin capital letter o with tilde */
3343 {"Ouml", 0x00D6, "O"}, /* 214 - latin capital letter o with diaeresis */
3344 {"times", 0x00D7, "x"}, /* 215 - multiplication sign */
3345 {"Oslash", 0x00D8, "O"}, /* 216 - latin capital letter o with stroke */
3346 {"Ugrave", 0x00D9, "U"}, /* 217 - latin capital letter u with grave */
3347 {"Uacute", 0x00DA, "U"}, /* 218 - latin capital letter u with acute */
3348 {"Ucirc", 0x00DB, "U"}, /* 219 - latin capital letter u with circumflex */
3349 {"Uuml", 0x00DC, "UE"}, /* 220 - latin capital letter u with diaeresis */
3350 {"Yacute", 0x00DD, "Y"}, /* 221 - latin capital letter y with acute */
3351 {"THORN", 0x00DE, "P"}, /* 222 - latin capital letter thorn */
3352 {"szlig", 0x00DF, "ss"}, /* 223 - latin small letter sharp s (German <a href="/wiki/Eszett" title="Eszett">Eszett</a>) */
3353 {"agrave", 0x00E0, "a"}, /* 224 - latin small letter a with grave */
3354 {"aacute", 0x00E1, "a"}, /* 225 - latin small letter a with acute */
3355 {"acirc", 0x00E2, "a"}, /* 226 - latin small letter a with circumflex */
3356 {"atilde", 0x00E3, "a"}, /* 227 - latin small letter a with tilde */
3357 {"auml", 0x00E4, "ae"}, /* 228 - latin small letter a with diaeresis */
3358 {"aring", 0x00E5, "a"}, /* 229 - latin small letter a with ring above */
3359 {"aelig", 0x00E6, "ae"}, /* 230 - latin lowercase ligature ae */
3360 {"ccedil", 0x00E7, "c"}, /* 231 - latin small letter c with cedilla */
3361 {"egrave", 0x00E8, "e"}, /* 232 - latin small letter e with grave */
3362 {"eacute", 0x00E9, "e"}, /* 233 - latin small letter e with acute */
3363 {"ecirc", 0x00EA, "e"}, /* 234 - latin small letter e with circumflex */
3364 {"euml", 0x00EB, "e"}, /* 235 - latin small letter e with diaeresis */
3365 {"igrave", 0x00EC, "i"}, /* 236 - latin small letter i with grave */
3366 {"iacute", 0x00ED, "i"}, /* 237 - latin small letter i with acute */
3367 {"icirc", 0x00EE, "i"}, /* 238 - latin small letter i with circumflex */
3368 {"iuml", 0x00EF, "i"}, /* 239 - latin small letter i with diaeresis */
3369 {"eth", 0x00F0, "dh"}, /* 240 - latin small letter eth */
3370 {"ntilde", 0x00F1, "n"}, /* 241 - latin small letter n with tilde */
3371 {"ograve", 0x00F2, "o"}, /* 242 - latin small letter o with grave */
3372 {"oacute", 0x00F3, "o"}, /* 243 - latin small letter o with acute */
3373 {"ocirc", 0x00F4, "o"}, /* 244 - latin small letter o with circumflex */
3374 {"otilde", 0x00F5, "o"}, /* 245 - latin small letter o with tilde */
3375 {"ouml", 0x00F6, "oe"}, /* 246 - latin small letter o with diaeresis */
3376 {"divide", 0x00F7, "/"}, /* 247 - division sign */
3377 {"oslash", 0x00F8, "o"}, /* 248 - latin small letter o with stroke */
3378 {"ugrave", 0x00F9, "u"}, /* 249 - latin small letter u with grave */
3379 {"uacute", 0x00FA, "u"}, /* 250 - latin small letter u with acute */
3380 {"ucirc", 0x00FB, "u"}, /* 251 - latin small letter u with circumflex */
3381 {"uuml", 0x00FC, "ue"}, /* 252 - latin small letter u with diaeresis */
3382 {"yacute", 0x00FD, "y"}, /* 253 - latin small letter y with acute */
3383 {"thorn", 0x00FE, "p"}, /* 254 - latin small letter thorn */
3384 {"yuml", 0x00FF, "y"}, /* 255 - latin small letter y with diaeresis */
3385 {"OElig", 0x0152, "OE"}, /* 338 - latin capital ligature oe */
3386 {"oelig", 0x0153, "oe"}, /* 339 - latin small ligature oe */
3387 {"Scaron", 0x0160, "S"}, /* 352 - latin capital letter s with caron */
3388 {"scaron", 0x0161, "s"}, /* 353 - latin small letter s with caron */
3389 {"Yuml", 0x0178, "Y"}, /* 376 - latin capital letter y with diaeresis */
3390 {"fnof", 0x0192, "f"}, /* 402 - latin small letter f with hook */
3391 {"circ", 0x02C6}, /* 710 - modifier letter circumflex accent */
3392 {"tilde", 0x02DC, "~"}, /* 732 - small tilde */
3393 {"Alpha", 0x0391}, /* 913 - greek capital letter alpha */
3394 {"Beta", 0x0392}, /* 914 - greek capital letter beta */
3395 {"Gamma", 0x0393}, /* 915 - greek capital letter gamma */
3396 {"Delta", 0x0394}, /* 916 - greek capital letter delta */
3397 {"Epsilon", 0x0395}, /* 917 - greek capital letter epsilon */
3398 {"Zeta", 0x0396}, /* 918 - greek capital letter zeta */
3399 {"Eta", 0x0397}, /* 919 - greek capital letter eta */
3400 {"Theta", 0x0398}, /* 920 - greek capital letter theta */
3401 {"Iota", 0x0399}, /* 921 - greek capital letter iota */
3402 {"Kappa", 0x039A}, /* 922 - greek capital letter kappa */
3403 {"Lambda", 0x039B}, /* 923 - greek capital letter lamda */
3404 {"Mu", 0x039C}, /* 924 - greek capital letter mu */
3405 {"Nu", 0x039D}, /* 925 - greek capital letter nu */
3406 {"Xi", 0x039E}, /* 926 - greek capital letter xi */
3407 {"Omicron", 0x039F}, /* 927 - greek capital letter omicron */
3408 {"Pi", 0x03A0}, /* 928 - greek capital letter pi */
3409 {"Rho", 0x03A1}, /* 929 - greek capital letter rho */
3410 {"Sigma", 0x03A3}, /* 931 - greek capital letter sigma */
3411 {"Tau", 0x03A4}, /* 932 - greek capital letter tau */
3412 {"Upsilon", 0x03A5}, /* 933 - greek capital letter upsilon */
3413 {"Phi", 0x03A6}, /* 934 - greek capital letter phi */
3414 {"Chi", 0x03A7}, /* 935 - greek capital letter chi */
3415 {"Psi", 0x03A8}, /* 936 - greek capital letter psi */
3416 {"Omega", 0x03A9}, /* 937 - greek capital letter omega */
3417 {"alpha", 0x03B1}, /* 945 - greek small letter alpha */
3418 {"beta", 0x03B2}, /* 946 - greek small letter beta */
3419 {"gamma", 0x03B3}, /* 947 - greek small letter gamma */
3420 {"delta", 0x03B4}, /* 948 - greek small letter delta */
3421 {"epsilon", 0x03B5}, /* 949 - greek small letter epsilon */
3422 {"zeta", 0x03B6}, /* 950 - greek small letter zeta */
3423 {"eta", 0x03B7}, /* 951 - greek small letter eta */
3424 {"theta", 0x03B8}, /* 952 - greek small letter theta */
3425 {"iota", 0x03B9}, /* 953 - greek small letter iota */
3426 {"kappa", 0x03BA}, /* 954 - greek small letter kappa */
3427 {"lambda", 0x03BB}, /* 955 - greek small letter lamda */
3428 {"mu", 0x03BC}, /* 956 - greek small letter mu */
3429 {"nu", 0x03BD}, /* 957 - greek small letter nu */
3430 {"xi", 0x03BE}, /* 958 - greek small letter xi */
3431 {"omicron", 0x03BF}, /* 959 - greek small letter omicron */
3432 {"pi", 0x03C0}, /* 960 - greek small letter pi */
3433 {"rho", 0x03C1}, /* 961 - greek small letter rho */
3434 {"sigmaf", 0x03C2}, /* 962 - greek small letter final sigma */
3435 {"sigma", 0x03C3}, /* 963 - greek small letter sigma */
3436 {"tau", 0x03C4}, /* 964 - greek small letter tau */
3437 {"upsilon", 0x03C5}, /* 965 - greek small letter upsilon */
3438 {"phi", 0x03C6}, /* 966 - greek small letter phi */
3439 {"chi", 0x03C7}, /* 967 - greek small letter chi */
3440 {"psi", 0x03C8}, /* 968 - greek small letter psi */
3441 {"omega", 0x03C9}, /* 969 - greek small letter omega */
3442 {"thetasym", 0x03D1}, /* 977 - greek theta symbol */
3443 {"upsih", 0x03D2}, /* 978 - greek upsilon with hook symbol */
3444 {"piv", 0x03D6}, /* 982 - greek pi symbol */
3445 {"ensp", 0x2002}, /* 8194 - en space */
3446 {"emsp", 0x2003}, /* 8195 - em space */
3447 {"thinsp", 0x2009}, /* 8201 - thin space */
3448 {"zwnj", 0x200C}, /* 8204 - zero width non-joiner */
3449 {"zwj", 0x200D}, /* 8205 - zero width joiner */
3450 {"lrm", 0x200E}, /* 8206 - left-to-right mark */
3451 {"rlm", 0x200F}, /* 8207 - right-to-left mark */
3452 {"ndash", 0x2013}, /* 8211 - en dash */
3453 {"mdash", 0x2014}, /* 8212 - em dash */
3454 {"#8213", 0x2015, "--"}, /* 2015 - horizontal bar */
3455 {"#8214", 0x2016, "||"}, /* 2016 - double vertical line */
3456 {"#8215", 0x2017, "__"}, /* 2017 - double low line */
3457 {"lsquo", 0x2018}, /* 8216 - left single quotation mark */
3458 {"rsquo", 0x2019}, /* 8217 - right single quotation mark */
3459 {"sbquo", 0x201A}, /* 8218 - single low-9 quotation mark */
3460 {"ldquo", 0x201C}, /* 8220 - left double quotation mark */
3461 {"rdquo", 0x201D}, /* 8221 - right double quotation mark */
3462 {"bdquo", 0x201E, ",,"}, /* 8222 - double low-9 quotation mark */
3463 {"#8223", 0x201F, "``"}, /* 201F - double high reversed-9 quotation mark */
3464 {"dagger", 0x2020}, /* 8224 - dagger */
3465 {"Dagger", 0x2021}, /* 8225 - double dagger */
3466 {"bull", 0x2022, "*"}, /* 8226 - bullet */
3467 {"hellip", 0x2026}, /* 8230 - horizontal ellipsis */
3468 {"permil", 0x2030}, /* 8240 - per mille sign */
3469 {"prime", 0x2032, "\'"}, /* 8242 - prime */
3470 {"Prime", 0x2033, "\'\'"}, /* 8243 - double prime */
3471 {"#8244", 0x2034, "\'\'\'"}, /* 2034 - triple prime */
3472 {"lsaquo", 0x2039}, /* 8249 - single left-pointing angle quotation mark */
3473 {"rsaquo", 0x203A}, /* 8250 - single right-pointing angle quotation mark */
3474 {"#8252", 0x203C, "!!"}, /* 203C - double exclamation mark */
3475 {"oline", 0x203E, "-"}, /* 8254 - overline */
3476 {"frasl", 0x2044}, /* 8260 - fraction slash */
3477 {"#8263", 0x2047, "??"}, /* 2047 - double question mark */
3478 {"#8264", 0x2048, "?!"}, /* 2048 - question exclamation mark */
3479 {"#8265", 0x2049, "!?"}, /* 2049 - exclamation question mark */
3480 {"#8279", 0x2057, "\'\'\'\'"}, /* 2057 - quad prime */
3481 {"euro", 0x20AC, "EUR"}, /* 8364 - euro sign */
3482 {"image", 0x2111}, /* 8465 - black-letter capital i */
3483 {"weierp", 0x2118}, /* 8472 - script capital p (<a href="/wiki/Weierstrass" title="Weierstrass">Weierstrass</a> p) */
3484 {"real", 0x211C}, /* 8476 - black-letter capital r */
3485 {"trade", 0x2122, "[tm]"}, /* 8482 - trademark sign */
3486 {"alefsym", 0x2135}, /* 8501 - alef symbol */
3487 {"larr", 0x2190}, /* 8592 - leftwards arrow */
3488 {"uarr", 0x2191}, /* 8593 - upwards arrow */
3489 {"rarr", 0x2192}, /* 8594 - rightwards arrow */
3490 {"darr", 0x2193}, /* 8595 - downwards arrow */
3491 {"harr", 0x2194}, /* 8596 - left right arrow */
3492 {"crarr", 0x21B5}, /* 8629 - downwards arrow with corner leftwards */
3493 {"lArr", 0x21D0}, /* 8656 - leftwards double arrow */
3494 {"uArr", 0x21D1}, /* 8657 - upwards double arrow */
3495 {"rArr", 0x21D2}, /* 8658 - rightwards double arrow */
3496 {"dArr", 0x21D3}, /* 8659 - downwards double arrow */
3497 {"hArr", 0x21D4}, /* 8660 - left right double arrow */
3498 {"forall", 0x2200}, /* 8704 - for all */
3499 {"part", 0x2202}, /* 8706 - partial differential */
3500 {"exist", 0x2203}, /* 8707 - there exists */
3501 {"empty", 0x2205}, /* 8709 - empty set */
3502 {"nabla", 0x2207}, /* 8711 - nabla */
3503 {"isin", 0x2208}, /* 8712 - element of */
3504 {"notin", 0x2209}, /* 8713 - not an element of */
3505 {"ni", 0x220B}, /* 8715 - contains as member */
3506 {"prod", 0x220F}, /* 8719 - n-ary product */
3507 {"sum", 0x2211}, /* 8721 - n-ary summation */
3508 {"minus", 0x2212}, /* 8722 - minus sign */
3509 {"lowast", 0x2217}, /* 8727 - asterisk operator */
3510 {"radic", 0x221A}, /* 8730 - square root */
3511 {"prop", 0x221D}, /* 8733 - proportional to */
3512 {"infin", 0x221E}, /* 8734 - infinity */
3513 {"ang", 0x2220}, /* 8736 - angle */
3514 {"and", 0x2227}, /* 8743 - logical and */
3515 {"or", 0x2228}, /* 8744 - logical or */
3516 {"cap", 0x2229}, /* 8745 - intersection */
3517 {"cup", 0x222A}, /* 8746 - union */
3518 {"int", 0x222B}, /* 8747 - integral */
3519 {"there4", 0x2234}, /* 8756 - therefore */
3520 {"sim", 0x223C}, /* 8764 - tilde operator */
3521 {"cong", 0x2245}, /* 8773 - congruent to */
3522 {"asymp", 0x2248}, /* 8776 - almost equal to */
3523 {"ne", 0x2260}, /* 8800 - not equal to */
3524 {"equiv", 0x2261}, /* 8801 - identical to (equivalent to) */
3525 {"le", 0x2264}, /* 8804 - less-than or equal to */
3526 {"ge", 0x2265}, /* 8805 - greater-than or equal to */
3527 {"sub", 0x2282}, /* 8834 - subset of */
3528 {"sup", 0x2283}, /* 8835 - superset of */
3529 {"nsub", 0x2284}, /* 8836 - not a subset of */
3530 {"sube", 0x2286}, /* 8838 - subset of or equal to */
3531 {"supe", 0x2287}, /* 8839 - superset of or equal to */
3532 {"oplus", 0x2295}, /* 8853 - circled plus */
3533 {"otimes", 0x2297}, /* 8855 - circled times */
3534 {"perp", 0x22A5}, /* 8869 - up tack */
3535 {"sdot", 0x22C5}, /* 8901 - dot operator */
3536 {"lceil", 0x2308}, /* 8968 - left ceiling */
3537 {"rceil", 0x2309}, /* 8969 - right ceiling */
3538 {"lfloor", 0x230A}, /* 8970 - left floor */
3539 {"rfloor", 0x230B}, /* 8971 - right floor */
3540 {"lang", 0x2329}, /* 9001 - left-pointing angle bracket */
3541 {"rang", 0x232A}, /* 9002 - right-pointing angle bracket */
3542 {"loz", 0x25CA}, /* 9674 - lozenge */
3543 {"spades", 0x2660}, /* 9824 - black spade suit */
3544 {"clubs", 0x2663}, /* 9827 - black club suit */
3545 {"hearts", 0x2665}, /* 9829 - black heart suit */
3546 {"diams", 0x2666} /* 9830 - black diamond suit */
3551 * Table of supported elements and corresponding handlers
3553 static ELPROP_S html_element_table[] = {
3554 {"HTML", 4}, /* HTML ignore if seen? */
3555 {"HEAD", 4, html_head}, /* slurp until <BODY> ? */
3556 {"TITLE", 5, html_title}, /* Document Title */
3557 {"BASE", 4, html_base}, /* HREF base */
3558 {"BODY", 4, html_body}, /* HTML BODY */
3559 {"A", 1, html_a}, /* Anchor */
3560 {"ABBR", 4, html_abbr}, /* Abbreviation */
3561 {"IMG", 3, html_img}, /* Image */
3562 {"MAP", 3, html_map}, /* Image Map */
3563 {"AREA", 4, html_area}, /* Image Map Area */
3564 {"HR", 2, html_hr, 1, 1}, /* Horizontal Rule */
3565 {"BR", 2, html_br, 0, 1}, /* Line Break */
3566 {"P", 1, html_p, 1}, /* Paragraph */
3567 {"OL", 2, html_ol, 1}, /* Ordered List */
3568 {"UL", 2, html_ul, 1}, /* Unordered List */
3569 {"MENU", 4, html_menu}, /* Menu List */
3570 {"DIR", 3, html_dir}, /* Directory List */
3571 {"LI", 2, html_li}, /* ... List Item */
3572 {"DL", 2, html_dl, 1}, /* Definition List */
3573 {"DT", 2, html_dt}, /* ... Def. Term */
3574 {"DD", 2, html_dd}, /* ... Def. Definition */
3575 {"I", 1, html_i}, /* Italic Text */
3576 {"EM", 2, html_em}, /* Typographic Emphasis */
3577 {"STRONG", 6, html_strong}, /* STRONG Typo Emphasis */
3578 {"VAR", 3, html_i}, /* Variable Name */
3579 {"B", 1, html_b}, /* Bold Text */
3580 {"U", 1, html_u}, /* Underline Text */
3581 {"S", 1, html_s}, /* Strike-Through Text */
3582 {"STRIKE", 6, html_s}, /* Strike-Through Text */
3583 {"BIG", 3, html_big}, /* Big Font Text */
3584 {"SMALL", 5, html_small}, /* Small Font Text */
3585 {"FONT", 4, html_font}, /* Font display directives */
3586 {"BLOCKQUOTE", 10, html_blockquote, 1}, /* Blockquote */
3587 {"ADDRESS", 7, html_address, 1}, /* Address */
3588 {"CENTER", 6, html_center}, /* Centered Text v3.2 */
3589 {"DIV", 3, html_div, 1}, /* Document Division 3.2 */
3590 {"SPAN", 4, html_span}, /* Text Span */
3591 {"H1", 2, html_h1, 1}, /* Headings... */
3592 {"H2", 2, html_h2, 1},
3593 {"H3", 2, html_h3,1},
3594 {"H4", 2, html_h4, 1},
3595 {"H5", 2, html_h5, 1},
3596 {"H6", 2, html_h6, 1},
3597 {"PRE", 3, html_pre, 1}, /* Preformatted Text */
3598 {"KBD", 3, html_kbd}, /* Keyboard Input (NO OP) */
3599 {"DFN", 3, html_dfn}, /* Definition (NO OP) */
3600 {"VAR", 3, html_var}, /* Variable (NO OP) */
3601 {"TT", 2, html_tt}, /* Typetype (NO OP) */
3602 {"SAMP", 4, html_samp}, /* Sample Text (NO OP) */
3603 {"CITE", 4, html_cite}, /* Citation (NO OP) */
3604 {"CODE", 4, html_code}, /* Code Text (NO OP) */
3605 {"INS", 3, html_ins}, /* Text Inseted (NO OP) */
3606 {"DEL", 3, html_del}, /* Text Deleted (NO OP) */
3607 {"SUP", 3, html_sup}, /* Text Superscript (NO OP) */
3608 {"SUB", 3, html_sub}, /* Text Superscript (NO OP) */
3609 {"STYLE", 5, html_style}, /* CSS Definitions */
3611 /*----- Handlers below UNIMPLEMENTED (and won't until later) -----*/
3613 {"FORM", 4, html_form, 1}, /* form within a document */
3614 {"INPUT", 5, html_input}, /* One input field, options */
3615 {"BUTTON", 6, html_button}, /* Push Button */
3616 {"OPTION", 6, html_option}, /* One option within Select */
3617 {"OPTION", 6, html_optgroup}, /* Option Group Definition */
3618 {"SELECT", 6, html_select}, /* Selection from a set */
3619 {"TEXTAREA", 8, html_textarea}, /* A multi-line input field */
3620 {"LABEL", 5, html_label}, /* Control Label */
3621 {"FIELDSET", 8, html_fieldset, 1}, /* Fieldset Control Group */
3623 /*----- Handlers below NEVER TO BE IMPLEMENTED -----*/
3624 {"SCRIPT", 6, html_script}, /* Embedded scripting statements */
3625 {"APPLET", 6, NULL}, /* Embedded applet statements */
3626 {"OBJECT", 6, NULL}, /* Embedded object statements */
3627 {"LINK", 4, NULL}, /* References to external data */
3628 {"PARAM", 5, NULL}, /* Applet/Object parameters */
3630 /*----- Handlers below provide limited support for RFC 1942 Tables -----*/
3632 {"TABLE", 5, html_table, 1}, /* Table */
3633 {"CAPTION", 7, html_caption}, /* Table Caption */
3634 {"TR", 2, html_tr}, /* Table Table Row */
3635 {"TD", 2, html_td}, /* Table Table Data */
3636 {"TH", 2, html_th}, /* Table Table Head */
3637 {"THEAD", 5, html_thead}, /* Table Table Head */
3638 {"TBODY", 5, html_tbody}, /* Table Table Body */
3639 {"TFOOT", 5, html_tfoot}, /* Table Table Foot */
3640 {"COL", 3, html_col}, /* Table Column Attibutes */
3641 {"COLGROUP", 8, html_colgroup}, /* Table Column Group Attibutes */
3643 {NULL, 0, NULL}
3648 * Table of supported RSS 2.0 elements
3650 static ELPROP_S rss_element_table[] = {
3651 {"RSS", 3, rss_rss}, /* RSS 2.0 version */
3652 {"CHANNEL", 7, rss_channel}, /* RSS 2.0 Channel */
3653 {"TITLE", 5, rss_title}, /* RSS 2.0 Title */
3654 {"IMAGE", 5, rss_image}, /* RSS 2.0 Channel Image */
3655 {"LINK", 4, rss_link}, /* RSS 2.0 Channel/Item Link */
3656 {"DESCRIPTION", 11, rss_description}, /* RSS 2.0 Channel/Item Description */
3657 {"ITEM", 4, rss_item}, /* RSS 2.0 Channel ITEM */
3658 {"TTL", 3, rss_ttl}, /* RSS 2.0 Item TTL */
3659 {NULL, 0, NULL}
3664 * Initialize the given handler, and add it to the stack if it
3665 * requests it.
3667 * Returns: 1 if handler chose to get pushed on stack
3668 * 0 if handler declined
3671 html_push(FILTER_S *fd, ELPROP_S *ep)
3673 HANDLER_S *new;
3675 new = (HANDLER_S *)fs_get(sizeof(HANDLER_S));
3676 memset(new, 0, sizeof(HANDLER_S));
3677 new->html_data = fd;
3678 new->element = ep;
3679 if((*ep->handler)(new, 0, GF_RESET)){ /* stack the handler? */
3680 new->below = HANDLERS(fd);
3681 HANDLERS(fd) = new; /* push */
3682 return(1);
3685 fs_give((void **) &new);
3686 return(0);
3691 * Remove the most recently installed the given handler
3692 * after letting it accept its demise.
3694 void
3695 html_pop(FILTER_S *fd, ELPROP_S *ep)
3697 HANDLER_S *tp;
3699 for(tp = HANDLERS(fd); tp && ep != EL(tp); tp = tp->below){
3700 HANDLER_S *tp2;
3702 dprint((3, "-- html error: bad nesting: given /%s expected /%s", ep->element, EL(tp)->element));
3703 /* if no evidence of opening tag, ignore given closing tag */
3704 for(tp2 = HANDLERS(fd); tp2 && ep != EL(tp2); tp2 = tp2->below)
3707 if(!tp2){
3708 dprint((3, "-- html error: no opening tag for given tag /%s", ep->element));
3709 return;
3712 (void) (*EL(tp)->handler)(tp, 0, GF_EOD);
3713 HANDLERS(fd) = tp->below;
3716 if(tp){
3717 (void) (*EL(tp)->handler)(tp, 0, GF_EOD); /* may adjust handler list */
3718 if(tp != HANDLERS(fd)){
3719 HANDLER_S *p;
3721 for(p = HANDLERS(fd); p->below != tp; p = p->below)
3724 if(p)
3725 p->below = tp->below; /* remove from middle of stack */
3726 /* BUG: else programming botch and we should die */
3728 else
3729 HANDLERS(fd) = tp->below; /* pop */
3731 fs_give((void **)&tp);
3733 else{
3734 /* BUG: should MAKE SURE NOT TO EMIT IT */
3735 dprint((3, "-- html error: end tag without a start: %s", ep->element));
3741 * Deal with data passed a hander in its GF_DATA state
3743 static void
3744 html_handoff(HANDLER_S *hd, int ch)
3746 if(hd->below)
3747 (void) (*EL(hd->below)->handler)(hd->below, ch, GF_DATA);
3748 else
3749 html_output(hd->html_data, ch);
3754 * HTML <BR> element handler
3757 html_br(HANDLER_S *hd, int ch, int cmd)
3759 if(cmd == GF_RESET){
3760 if(PASS_HTML(hd->html_data)){
3761 html_output_raw_tag(hd->html_data, "br");
3763 else{
3764 html_output(hd->html_data, HTML_NEWLINE);
3768 return(0); /* don't get linked */
3773 * HTML <HR> (Horizontal Rule) element handler
3776 html_hr(HANDLER_S *hd, int ch, int cmd)
3778 if(cmd == GF_RESET){
3779 if(PASS_HTML(hd->html_data)){
3780 html_output_raw_tag(hd->html_data, "hr");
3782 else{
3783 int i, old_wrap, width, align;
3784 PARAMETER *p;
3786 width = WRAP_COLS(hd->html_data);
3787 align = 0;
3788 for(p = HD(hd->html_data)->el_data->attribs;
3789 p && p->attribute;
3790 p = p->next)
3791 if(p->value){
3792 if(!strucmp(p->attribute, "ALIGN")){
3793 if(!strucmp(p->value, "LEFT"))
3794 align = 1;
3795 else if(!strucmp(p->value, "RIGHT"))
3796 align = 2;
3798 else if(!strucmp(p->attribute, "WIDTH")){
3799 char *cp;
3801 width = 0;
3802 for(cp = p->value; *cp; cp++)
3803 if(*cp == '%'){
3804 width = (WRAP_COLS(hd->html_data)*MIN(100,width))/100;
3805 break;
3807 else if(isdigit((unsigned char) *cp))
3808 width = (width * 10) + (*cp - '0');
3810 width = MIN(width, WRAP_COLS(hd->html_data));
3814 html_blank(hd->html_data, 1); /* at least one blank line */
3816 old_wrap = HD(hd->html_data)->wrapstate;
3817 HD(hd->html_data)->wrapstate = 0;
3818 if((i = MAX(0, WRAP_COLS(hd->html_data) - width))
3819 && ((align == 0) ? i /= 2 : (align == 2)))
3820 for(; i > 0; i--)
3821 html_output(hd->html_data, ' ');
3823 for(i = 0; i < width; i++)
3824 html_output(hd->html_data, '_');
3826 html_blank(hd->html_data, 1);
3827 HD(hd->html_data)->wrapstate = old_wrap;
3831 return(0); /* don't get linked */
3836 * HTML <P> (paragraph) element handler
3839 html_p(HANDLER_S *hd, int ch, int cmd)
3841 if(cmd == GF_DATA){
3842 html_handoff(hd, ch);
3844 else if(cmd == GF_RESET){
3845 if(PASS_HTML(hd->html_data)){
3846 html_output_raw_tag(hd->html_data, "p");
3848 else{
3849 /* Make sure there's at least 1 blank line */
3850 html_blank(hd->html_data, 1);
3852 /* adjust indent level if needed */
3853 if(HD(hd->html_data)->li_pending){
3854 html_indent(hd->html_data, 4, HTML_ID_INC);
3855 HD(hd->html_data)->li_pending = 0;
3859 else if(cmd == GF_EOD){
3860 if(PASS_HTML(hd->html_data)){
3861 html_output_string(hd->html_data, "</p>");
3863 else{
3864 /* Make sure there's at least 1 blank line */
3865 html_blank(hd->html_data, 1);
3869 return(1); /* GET linked */
3874 * HTML Table <TABLE> (paragraph) table row
3877 html_table(HANDLER_S *hd, int ch, int cmd)
3879 if(cmd == GF_DATA){
3880 if(PASS_HTML(hd->html_data)){
3881 html_handoff(hd, ch);
3884 else if(cmd == GF_RESET){
3885 if(PASS_HTML(hd->html_data)){
3886 html_output_raw_tag(hd->html_data, "table");
3888 else
3889 /* Make sure there's at least 1 blank line */
3890 html_blank(hd->html_data, 0);
3892 else if(cmd == GF_EOD){
3893 if(PASS_HTML(hd->html_data)){
3894 html_output_string(hd->html_data, "</table>");
3896 else
3897 /* Make sure there's at least 1 blank line */
3898 html_blank(hd->html_data, 0);
3900 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3905 * HTML <CAPTION> (Table Caption) element handler
3908 html_caption(HANDLER_S *hd, int ch, int cmd)
3910 if(cmd == GF_DATA){
3911 html_handoff(hd, ch);
3913 else if(cmd == GF_RESET){
3914 if(PASS_HTML(hd->html_data)){
3915 html_output_raw_tag(hd->html_data, "caption");
3917 else{
3918 /* turn ON the centered bit */
3919 CENTER_BIT(hd->html_data) = 1;
3922 else if(cmd == GF_EOD){
3923 if(PASS_HTML(hd->html_data)){
3924 html_output_string(hd->html_data, "</caption>");
3926 else{
3927 /* turn OFF the centered bit */
3928 CENTER_BIT(hd->html_data) = 0;
3932 return(1);
3937 * HTML Table <TR> (paragraph) table row
3940 html_tr(HANDLER_S *hd, int ch, int cmd)
3942 if(cmd == GF_DATA){
3943 if(PASS_HTML(hd->html_data)){
3944 html_handoff(hd, ch);
3947 else if(cmd == GF_RESET){
3948 if(PASS_HTML(hd->html_data)){
3949 html_output_raw_tag(hd->html_data, "tr");
3951 else
3952 /* Make sure there's at least 1 blank line */
3953 html_blank(hd->html_data, 0);
3955 else if(cmd == GF_EOD){
3956 if(PASS_HTML(hd->html_data)){
3957 html_output_string(hd->html_data, "</tr>");
3959 else
3960 /* Make sure there's at least 1 blank line */
3961 html_blank(hd->html_data, 0);
3963 return(PASS_HTML(hd->html_data)); /* maybe get linked */
3968 * HTML Table <TD> (paragraph) table data
3971 html_td(HANDLER_S *hd, int ch, int cmd)
3973 if(cmd == GF_DATA){
3974 if(PASS_HTML(hd->html_data)){
3975 html_handoff(hd, ch);
3978 else if(cmd == GF_RESET){
3979 if(PASS_HTML(hd->html_data)){
3980 html_output_raw_tag(hd->html_data, "td");
3982 else{
3983 PARAMETER *p;
3985 for(p = HD(hd->html_data)->el_data->attribs;
3986 p && p->attribute;
3987 p = p->next)
3988 if(!strucmp(p->attribute, "nowrap")
3989 && (hd->html_data->f2 || hd->html_data->n)){
3990 HTML_DUMP_LIT(hd->html_data, " | ", 3);
3991 break;
3995 else if(cmd == GF_EOD){
3996 if(PASS_HTML(hd->html_data)){
3997 html_output_string(hd->html_data, "</td>");
4001 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4006 * HTML Table <TH> (paragraph) table head
4009 html_th(HANDLER_S *hd, int ch, int cmd)
4011 if(cmd == GF_DATA){
4012 if(PASS_HTML(hd->html_data)){
4013 html_handoff(hd, ch);
4016 else if(cmd == GF_RESET){
4017 if(PASS_HTML(hd->html_data)){
4018 html_output_raw_tag(hd->html_data, "th");
4020 else{
4021 PARAMETER *p;
4023 for(p = HD(hd->html_data)->el_data->attribs;
4024 p && p->attribute;
4025 p = p->next)
4026 if(!strucmp(p->attribute, "nowrap")
4027 && (hd->html_data->f2 || hd->html_data->n)){
4028 HTML_DUMP_LIT(hd->html_data, " | ", 3);
4029 break;
4033 else if(cmd == GF_EOD){
4034 if(PASS_HTML(hd->html_data)){
4035 html_output_string(hd->html_data, "</th>");
4039 return(PASS_HTML(hd->html_data)); /* don't get linked */
4044 * HTML Table <THEAD> table head
4047 html_thead(HANDLER_S *hd, int ch, int cmd)
4049 if(PASS_HTML(hd->html_data)){
4050 if(cmd == GF_DATA){
4051 html_handoff(hd, ch);
4053 else if(cmd == GF_RESET){
4054 html_output_raw_tag(hd->html_data, "thead");
4056 else if(cmd == GF_EOD){
4057 html_output_string(hd->html_data, "</thead>");
4060 return(1); /* GET linked */
4063 return(0); /* don't get linked */
4068 * HTML Table <TBODY> table body
4071 html_tbody(HANDLER_S *hd, int ch, int cmd)
4073 if(PASS_HTML(hd->html_data)){
4074 if(cmd == GF_DATA){
4075 html_handoff(hd, ch);
4077 else if(cmd == GF_RESET){
4078 html_output_raw_tag(hd->html_data, "tbody");
4080 else if(cmd == GF_EOD){
4081 html_output_string(hd->html_data, "</tbody>");
4084 return(1); /* GET linked */
4087 return(0); /* don't get linked */
4092 * HTML Table <TFOOT> table body
4095 html_tfoot(HANDLER_S *hd, int ch, int cmd)
4097 if(PASS_HTML(hd->html_data)){
4098 if(cmd == GF_DATA){
4099 html_handoff(hd, ch);
4101 else if(cmd == GF_RESET){
4102 html_output_raw_tag(hd->html_data, "tfoot");
4104 else if(cmd == GF_EOD){
4105 html_output_string(hd->html_data, "</tfoot>");
4108 return(1); /* GET linked */
4111 return(0); /* don't get linked */
4116 * HTML <COL> (Table Column Attributes) element handler
4119 html_col(HANDLER_S *hd, int ch, int cmd)
4121 if(cmd == GF_RESET){
4122 if(PASS_HTML(hd->html_data)){
4123 html_output_raw_tag(hd->html_data, "col");
4127 return(0); /* don't get linked */
4132 * HTML Table <COLGROUP> table body
4135 html_colgroup(HANDLER_S *hd, int ch, int cmd)
4137 if(PASS_HTML(hd->html_data)){
4138 if(cmd == GF_DATA){
4139 html_handoff(hd, ch);
4141 else if(cmd == GF_RESET){
4142 html_output_raw_tag(hd->html_data, "colgroup");
4144 else if(cmd == GF_EOD){
4145 html_output_string(hd->html_data, "</colgroup>");
4148 return(1); /* GET linked */
4151 return(0); /* don't get linked */
4156 * HTML <I> (italic text) element handler
4159 html_i(HANDLER_S *hd, int ch, int cmd)
4161 if(cmd == GF_DATA){
4162 /* include LITERAL in spaceness test! */
4163 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4164 HTML_ITALIC(hd->html_data, 1);
4165 hd->x = 0;
4168 html_handoff(hd, ch);
4170 else if(cmd == GF_RESET){
4171 hd->x = 1;
4173 else if(cmd == GF_EOD){
4174 if(!hd->x)
4175 HTML_ITALIC(hd->html_data, 0);
4178 return(1); /* get linked */
4183 * HTML <EM> element handler
4186 html_em(HANDLER_S *hd, int ch, int cmd)
4188 if(cmd == GF_DATA){
4189 if(!PASS_HTML(hd->html_data)){
4190 /* include LITERAL in spaceness test! */
4191 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4192 HTML_ITALIC(hd->html_data, 1);
4193 hd->x = 0;
4197 html_handoff(hd, ch);
4199 else if(cmd == GF_RESET){
4200 if(PASS_HTML(hd->html_data)){
4201 html_output_raw_tag(hd->html_data, "em");
4203 else{
4204 hd->x = 1;
4207 else if(cmd == GF_EOD){
4208 if(PASS_HTML(hd->html_data)){
4209 html_output_string(hd->html_data, "</em>");
4211 else{
4212 if(!hd->x)
4213 HTML_ITALIC(hd->html_data, 0);
4217 return(1); /* get linked */
4222 * HTML <STRONG> element handler
4225 html_strong(HANDLER_S *hd, int ch, int cmd)
4227 if(cmd == GF_DATA){
4228 if(!PASS_HTML(hd->html_data)){
4229 /* include LITERAL in spaceness test! */
4230 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4231 HTML_ITALIC(hd->html_data, 1);
4232 hd->x = 0;
4236 html_handoff(hd, ch);
4238 else if(cmd == GF_RESET){
4239 if(PASS_HTML(hd->html_data)){
4240 html_output_raw_tag(hd->html_data, "strong");
4242 else{
4243 hd->x = 1;
4246 else if(cmd == GF_EOD){
4247 if(PASS_HTML(hd->html_data)){
4248 html_output_string(hd->html_data, "</strong>");
4250 else{
4251 if(!hd->x)
4252 HTML_ITALIC(hd->html_data, 0);
4256 return(1); /* get linked */
4261 * HTML <u> (Underline text) element handler
4264 html_u(HANDLER_S *hd, int ch, int cmd)
4266 if(PASS_HTML(hd->html_data)){
4267 if(cmd == GF_DATA){
4268 html_handoff(hd, ch);
4270 else if(cmd == GF_RESET){
4271 html_output_raw_tag(hd->html_data, "u");
4273 else if(cmd == GF_EOD){
4274 html_output_string(hd->html_data, "</u>");
4277 return(1); /* get linked */
4280 return(0); /* do NOT get linked */
4285 * HTML <b> (Bold text) element handler
4288 html_b(HANDLER_S *hd, int ch, int cmd)
4290 if(cmd == GF_DATA){
4291 if(!PASS_HTML(hd->html_data)){
4292 /* include LITERAL in spaceness test! */
4293 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4294 HTML_BOLD(hd->html_data, 1);
4295 hd->x = 0;
4299 html_handoff(hd, ch);
4301 else if(cmd == GF_RESET){
4302 if(PASS_HTML(hd->html_data)){
4303 html_output_raw_tag(hd->html_data, "b");
4305 else{
4306 hd->x = 1;
4309 else if(cmd == GF_EOD){
4310 if(PASS_HTML(hd->html_data)){
4311 html_output_string(hd->html_data, "</b>");
4313 else{
4314 if(!hd->x)
4315 HTML_BOLD(hd->html_data, 0);
4319 return(1); /* get linked */
4324 * HTML <s> (strike-through text) element handler
4327 html_s(HANDLER_S *hd, int ch, int cmd)
4329 if(cmd == GF_DATA){
4330 if(!PASS_HTML(hd->html_data)){
4331 /* include LITERAL in spaceness test! */
4332 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4333 HTML_STRIKE(hd->html_data, 1);
4334 hd->x = 0;
4338 html_handoff(hd, ch);
4340 else if(cmd == GF_RESET){
4341 if(PASS_HTML(hd->html_data)){
4342 html_output_raw_tag(hd->html_data, "s");
4344 else{
4345 hd->x = 1;
4348 else if(cmd == GF_EOD){
4349 if(PASS_HTML(hd->html_data)){
4350 html_output_string(hd->html_data, "</s>");
4352 else{
4353 if(!hd->x)
4354 HTML_STRIKE(hd->html_data, 0);
4358 return(1); /* get linked */
4363 * HTML <big> (BIG text) element handler
4366 html_big(HANDLER_S *hd, int ch, int cmd)
4368 if(cmd == GF_DATA){
4369 /* include LITERAL in spaceness test! */
4370 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4371 HTML_BIG(hd->html_data, 1);
4372 hd->x = 0;
4375 html_handoff(hd, ch);
4377 else if(cmd == GF_RESET){
4378 hd->x = 1;
4380 else if(cmd == GF_EOD){
4381 if(!hd->x)
4382 HTML_BIG(hd->html_data, 0);
4385 return(1); /* get linked */
4390 * HTML <small> (SMALL text) element handler
4393 html_small(HANDLER_S *hd, int ch, int cmd)
4395 if(cmd == GF_DATA){
4396 /* include LITERAL in spaceness test! */
4397 if(hd->x && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
4398 HTML_SMALL(hd->html_data, 1);
4399 hd->x = 0;
4402 html_handoff(hd, ch);
4404 else if(cmd == GF_RESET){
4405 hd->x = 1;
4407 else if(cmd == GF_EOD){
4408 if(!hd->x)
4409 HTML_SMALL(hd->html_data, 0);
4412 return(1); /* get linked */
4417 * HTML <FONT> element handler
4420 html_font(HANDLER_S *hd, int ch, int cmd)
4422 if(PASS_HTML(hd->html_data)){
4423 if(cmd == GF_DATA){
4424 html_handoff(hd, ch);
4426 else if(cmd == GF_RESET){
4427 html_output_raw_tag(hd->html_data, "font");
4429 else if(cmd == GF_EOD){
4430 html_output_string(hd->html_data, "</font>");
4433 return(1); /* get linked */
4436 return(0);
4441 * HTML <IMG> element handler
4444 html_img(HANDLER_S *hd, int ch, int cmd)
4446 PARAMETER *p;
4447 char *alt = NULL, *src = NULL, *s;
4449 if(cmd == GF_RESET){
4450 if(PASS_HTML(hd->html_data)){
4451 html_output_raw_tag(hd->html_data, "img");
4453 else{
4454 for(p = HD(hd->html_data)->el_data->attribs;
4455 p && p->attribute;
4456 p = p->next)
4457 if(p->value && p->value[0]){
4458 if(!strucmp(p->attribute, "alt"))
4459 alt = p->value;
4460 if(!strucmp(p->attribute, "src"))
4461 src = p->value;
4465 * Multipart/Related Content ID pointer
4466 * ONLY attached messages are recognized
4467 * if we ever decide web bugs aren't a problem
4468 * anymore then we might expand the scope
4470 if(src
4471 && DO_HANDLES(hd->html_data)
4472 && RELATED_OK(hd->html_data)
4473 && struncmp(src, "cid:", 4) == 0){
4474 char buf[32];
4475 int i, n;
4476 HANDLE_S *h = new_handle(HANDLESP(hd->html_data));
4478 h->type = IMG;
4479 h->h.img.src = cpystr(src + 4);
4480 h->h.img.alt = cpystr((alt) ? alt : "Attached Image");
4482 HTML_TEXT(hd->html_data, TAG_EMBED);
4483 HTML_TEXT(hd->html_data, TAG_HANDLE);
4485 sprintf(buf, "%d", h->key);
4486 n = strlen(buf);
4487 HTML_TEXT(hd->html_data, n);
4488 for(i = 0; i < n; i++){
4489 unsigned int uic = buf[i];
4490 HTML_TEXT(hd->html_data, uic);
4493 return(0);
4495 else if(alt && strlen(alt) < 256){ /* arbitrary "reasonable" limit */
4496 HTML_DUMP_LIT(hd->html_data, alt, strlen(alt));
4497 HTML_TEXT(hd->html_data, ' ');
4498 return(0);
4500 else if(src
4501 && (s = strrindex(src, '/'))
4502 && *++s != '\0'){
4503 HTML_TEXT(hd->html_data, '[');
4504 HTML_DUMP_LIT(hd->html_data, s, strlen(s));
4505 HTML_TEXT(hd->html_data, ']');
4506 HTML_TEXT(hd->html_data, ' ');
4507 return(0);
4510 /* text filler of last resort */
4511 HTML_DUMP_LIT(hd->html_data, "[IMAGE] ", 7);
4515 return(0); /* don't get linked */
4520 * HTML <MAP> (Image Map) element handler
4523 html_map(HANDLER_S *hd, int ch, int cmd)
4525 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4526 if(cmd == GF_DATA){
4527 html_handoff(hd, ch);
4529 else if(cmd == GF_RESET){
4530 html_output_raw_tag(hd->html_data, "map");
4532 else if(cmd == GF_EOD){
4533 html_output_string(hd->html_data, "</map>");
4536 return(1);
4539 return(0);
4544 * HTML <AREA> (Image Map Area) element handler
4547 html_area(HANDLER_S *hd, int ch, int cmd)
4549 if(PASS_HTML(hd->html_data) && PASS_IMAGES(hd->html_data)){
4550 if(cmd == GF_DATA){
4551 html_handoff(hd, ch);
4553 else if(cmd == GF_RESET){
4554 html_output_raw_tag(hd->html_data, "area");
4556 else if(cmd == GF_EOD){
4557 html_output_string(hd->html_data, "</area>");
4560 return(1);
4563 return(0);
4568 * HTML <FORM> (Form) element handler
4571 html_form(HANDLER_S *hd, int ch, int cmd)
4573 if(PASS_HTML(hd->html_data)){
4574 if(cmd == GF_DATA){
4575 html_handoff(hd, ch);
4577 else if(cmd == GF_RESET){
4578 PARAMETER **pp;
4580 /* SECURITY: make sure to redirect to new browser instance */
4581 for(pp = &(HD(hd->html_data)->el_data->attribs);
4582 *pp && (*pp)->attribute;
4583 pp = &(*pp)->next)
4584 if(!strucmp((*pp)->attribute, "target")){
4585 if((*pp)->value)
4586 fs_give((void **) &(*pp)->value);
4588 (*pp)->value = cpystr("_blank");
4591 if(!*pp){
4592 *pp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4593 memset(*pp, 0, sizeof(PARAMETER));
4594 (*pp)->attribute = cpystr("target");
4595 (*pp)->value = cpystr("_blank");
4598 html_output_raw_tag(hd->html_data, "form");
4600 else if(cmd == GF_EOD){
4601 html_output_string(hd->html_data, "</form>");
4604 else{
4605 if(cmd == GF_RESET){
4606 html_blank(hd->html_data, 0);
4607 HTML_DUMP_LIT(hd->html_data, "[FORM]", 6);
4608 html_blank(hd->html_data, 0);
4612 return(PASS_HTML(hd->html_data)); /* maybe get linked */
4617 * HTML <INPUT> (Form) element handler
4620 html_input(HANDLER_S *hd, int ch, int cmd)
4622 if(PASS_HTML(hd->html_data)){
4623 if(cmd == GF_RESET){
4624 html_output_raw_tag(hd->html_data, "input");
4628 return(0); /* don't get linked */
4633 * HTML <BUTTON> (Form) element handler
4636 html_button(HANDLER_S *hd, int ch, int cmd)
4638 if(PASS_HTML(hd->html_data)){
4639 if(cmd == GF_DATA){
4640 html_handoff(hd, ch);
4642 else if(cmd == GF_RESET){
4643 html_output_raw_tag(hd->html_data, "button");
4645 else if(cmd == GF_EOD){
4646 html_output_string(hd->html_data, "</button>");
4649 return(1); /* get linked */
4652 return(0);
4657 * HTML <OPTION> (Form) element handler
4660 html_option(HANDLER_S *hd, int ch, int cmd)
4662 if(PASS_HTML(hd->html_data)){
4663 if(cmd == GF_DATA){
4664 html_handoff(hd, ch);
4666 else if(cmd == GF_RESET){
4667 html_output_raw_tag(hd->html_data, "option");
4669 else if(cmd == GF_EOD){
4670 html_output_string(hd->html_data, "</option>");
4673 return(1); /* get linked */
4676 return(0);
4681 * HTML <OPTGROUP> (Form) element handler
4684 html_optgroup(HANDLER_S *hd, int ch, int cmd)
4686 if(PASS_HTML(hd->html_data)){
4687 if(cmd == GF_DATA){
4688 html_handoff(hd, ch);
4690 else if(cmd == GF_RESET){
4691 html_output_raw_tag(hd->html_data, "optgroup");
4693 else if(cmd == GF_EOD){
4694 html_output_string(hd->html_data, "</optgroup>");
4697 return(1); /* get linked */
4700 return(0);
4705 * HTML <SELECT> (Form) element handler
4708 html_select(HANDLER_S *hd, int ch, int cmd)
4710 if(PASS_HTML(hd->html_data)){
4711 if(cmd == GF_DATA){
4712 html_handoff(hd, ch);
4714 else if(cmd == GF_RESET){
4715 html_output_raw_tag(hd->html_data, "select");
4717 else if(cmd == GF_EOD){
4718 html_output_string(hd->html_data, "</select>");
4721 return(1); /* get linked */
4724 return(0);
4729 * HTML <TEXTAREA> (Form) element handler
4732 html_textarea(HANDLER_S *hd, int ch, int cmd)
4734 if(PASS_HTML(hd->html_data)){
4735 if(cmd == GF_DATA){
4736 html_handoff(hd, ch);
4738 else if(cmd == GF_RESET){
4739 html_output_raw_tag(hd->html_data, "textarea");
4741 else if(cmd == GF_EOD){
4742 html_output_string(hd->html_data, "</textarea>");
4745 return(1); /* get linked */
4748 return(0);
4753 * HTML <LABEL> (Form) element handler
4756 html_label(HANDLER_S *hd, int ch, int cmd)
4758 if(PASS_HTML(hd->html_data)){
4759 if(cmd == GF_DATA){
4760 html_handoff(hd, ch);
4762 else if(cmd == GF_RESET){
4763 html_output_raw_tag(hd->html_data, "label");
4765 else if(cmd == GF_EOD){
4766 html_output_string(hd->html_data, "</label>");
4769 return(1); /* get linked */
4772 return(0);
4777 * HTML <FIELDSET> (Form) element handler
4780 html_fieldset(HANDLER_S *hd, int ch, int cmd)
4782 if(PASS_HTML(hd->html_data)){
4783 if(cmd == GF_DATA){
4784 html_handoff(hd, ch);
4786 else if(cmd == GF_RESET){
4787 html_output_raw_tag(hd->html_data, "fieldset");
4789 else if(cmd == GF_EOD){
4790 html_output_string(hd->html_data, "</fieldset>");
4793 return(1); /* get linked */
4796 return(0);
4801 * HTML <HEAD> element handler
4804 html_head(HANDLER_S *hd, int ch, int cmd)
4806 if(cmd == GF_DATA){
4807 html_handoff(hd, ch);
4809 else if(cmd == GF_RESET){
4810 HD(hd->html_data)->head = 1;
4812 else if(cmd == GF_EOD){
4813 HD(hd->html_data)->head = 0;
4816 return(1); /* get linked */
4821 * HTML <BASE> element handler
4824 html_base(HANDLER_S *hd, int ch, int cmd)
4826 if(cmd == GF_RESET){
4827 if(HD(hd->html_data)->head && !HTML_BASE(hd->html_data)){
4828 PARAMETER *p;
4830 for(p = HD(hd->html_data)->el_data->attribs;
4831 p && p->attribute && strucmp(p->attribute, "HREF");
4832 p = p->next)
4835 if(p && p->value && !((HTML_OPT_S *)(hd->html_data)->opt)->base)
4836 ((HTML_OPT_S *)(hd->html_data)->opt)->base = cpystr(p->value);
4840 return(0); /* DON'T get linked */
4845 * HTML <TITLE> element handler
4848 html_title(HANDLER_S *hd, int ch, int cmd)
4850 if(cmd == GF_DATA){
4851 if(hd->x + 1 >= hd->y){
4852 hd->y += 80;
4853 fs_resize((void **)&hd->s, (size_t)hd->y * sizeof(unsigned char));
4856 hd->s[hd->x++] = (unsigned char) ch;
4858 else if(cmd == GF_RESET){
4859 hd->x = 0L;
4860 hd->y = 80L;
4861 hd->s = (unsigned char *)fs_get((size_t)hd->y * sizeof(unsigned char));
4863 else if(cmd == GF_EOD){
4864 /* Down the road we probably want to give these bytes to
4865 * someone...
4867 hd->s[hd->x] = '\0';
4868 fs_give((void **)&hd->s);
4871 return(1); /* get linked */
4876 * HTML <BODY> element handler
4879 html_body(HANDLER_S *hd, int ch, int cmd)
4881 if(cmd == GF_DATA){
4882 html_handoff(hd, ch);
4884 else if(cmd == GF_RESET){
4885 if(PASS_HTML(hd->html_data)){
4886 PARAMETER *p, *tp;
4887 char **style = NULL, *text = NULL, *bgcolor = NULL, *pcs;
4889 /* modify any attributes in a useful way? */
4890 for(p = HD(hd->html_data)->el_data->attribs;
4891 p && p->attribute;
4892 p = p->next)
4893 if(p->value){
4894 if(!strucmp(p->attribute, "style"))
4895 style = &p->value;
4896 else if(!strucmp(p->attribute, "text"))
4897 text = p->value;
4899 * bgcolor NOT passed since user setting takes precedence
4901 else if(!strucmp(p->attribute, "bgcolor"))
4902 bgcolor = p->value;
4906 /* colors pretty much it */
4907 if(text || bgcolor){
4908 if(!style){
4909 tp = (PARAMETER *)fs_get(sizeof(PARAMETER));
4910 memset(tp, 0, sizeof(PARAMETER));
4911 tp->next = HD(hd->html_data)->el_data->attribs;
4912 HD(hd->html_data)->el_data->attribs = tp;
4913 tp->attribute = cpystr("style");
4915 tmp_20k_buf[0] = '\0';
4916 style = &tp->value;
4917 pcs = "%s%s%s%s%s";
4919 else{
4920 snprintf(tmp_20k_buf, SIZEOF_20KBUF, "%s", *style);
4921 fs_give((void **) style);
4922 pcs = "; %s%s%s%s%s";
4925 snprintf(tmp_20k_buf + strlen(tmp_20k_buf),
4926 SIZEOF_20KBUF - strlen(tmp_20k_buf),
4927 pcs,
4928 (text) ? "color: " : "", (text) ? text : "",
4929 (text && bgcolor) ? ";" : "",
4930 (bgcolor) ? "background-color: " : "", (bgcolor) ? bgcolor : "");
4931 *style = cpystr(tmp_20k_buf);
4934 html_output_raw_tag(hd->html_data, "div");
4937 HD(hd->html_data)->body = 1;
4939 else if(cmd == GF_EOD){
4940 if(PASS_HTML(hd->html_data)){
4941 html_output_string(hd->html_data, "</div>");
4944 HD(hd->html_data)->body = 0;
4947 return(1); /* get linked */
4952 * HTML <A> (Anchor) element handler
4955 html_a(HANDLER_S *hd, int ch, int cmd)
4957 if(cmd == GF_DATA){
4958 html_handoff(hd, ch);
4960 if(hd->dp) /* remember text within anchor tags */
4961 so_writec(ch, (STORE_S *) hd->dp);
4963 else if(cmd == GF_RESET){
4964 int i, n, x;
4965 char buf[256];
4966 HANDLE_S *h;
4967 PARAMETER *p, *href = NULL, *name = NULL;
4970 * Pending Anchor!?!?
4971 * space insertion/line breaking that's yet to get done...
4973 if(HD(hd->html_data)->prefix){
4974 dprint((2, "-- html error: nested or unterminated anchor\n"));
4975 html_a_finish(hd);
4979 * Look for valid Anchor data vis the filter installer's parms
4980 * (e.g., Only allow references to our internal URLs if asked)
4982 for(p = HD(hd->html_data)->el_data->attribs;
4983 p && p->attribute;
4984 p = p->next)
4985 if(!strucmp(p->attribute, "HREF")
4986 && p->value
4987 && (HANDLES_LOC(hd->html_data)
4988 || struncmp(p->value, "x-alpine-", 9)
4989 || struncmp(p->value, "x-pine-help", 11)
4990 || p->value[0] == '#'))
4991 href = p;
4992 else if(!strucmp(p->attribute, "NAME"))
4993 name = p;
4995 if(DO_HANDLES(hd->html_data) && (href || name)){
4996 h = new_handle(HANDLESP(hd->html_data));
4999 * Enhancement: we might want to get fancier and parse the
5000 * href a bit further such that we can launch images using
5001 * our image viewer, or browse local files or directories
5002 * with our internal tools. Of course, having the jump-off
5003 * point into text/html always be the defined "web-browser",
5004 * just might be the least confusing UI-wise...
5006 h->type = URL;
5008 if(name && name->value)
5009 h->h.url.name = cpystr(name->value);
5012 * Prepare to build embedded prefix...
5014 HD(hd->html_data)->prefix = (int *) fs_get(64 * sizeof(int));
5015 x = 0;
5018 * Is this something that looks like a URL? If not and
5019 * we were giving some "base" string, proceed ala RFC1808...
5021 if(href){
5022 if(HTML_BASE(hd->html_data) && !rfc1738_scan(href->value, &n)){
5023 html_a_relative(HTML_BASE(hd->html_data), href->value, h);
5025 else if(!(NO_RELATIVE(hd->html_data) && html_href_relative(href->value)))
5026 h->h.url.path = cpystr(href->value);
5028 if(pico_usingcolor()){
5029 char *fg = NULL, *bg = NULL, *q;
5031 if(ps_global->VAR_SLCTBL_FORE_COLOR
5032 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5033 ps_global->VAR_NORM_FORE_COLOR))
5034 fg = ps_global->VAR_SLCTBL_FORE_COLOR;
5036 if(ps_global->VAR_SLCTBL_BACK_COLOR
5037 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5038 ps_global->VAR_NORM_BACK_COLOR))
5039 bg = ps_global->VAR_SLCTBL_BACK_COLOR;
5041 if(fg || bg){
5042 COLOR_PAIR *tmp;
5045 * The blacks are just known good colors for testing
5046 * whether the other color is good.
5048 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5049 bg ? bg : colorx(COL_BLACK));
5050 if(pico_is_good_colorpair(tmp)){
5051 q = color_embed(fg, bg);
5053 for(i = 0; q[i]; i++)
5054 HD(hd->html_data)->prefix[x++] = q[i];
5057 if(tmp)
5058 free_color_pair(&tmp);
5061 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5062 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5064 else
5065 HD(hd->html_data)->prefix[x++] = HTML_DOBOLD;
5068 HD(hd->html_data)->prefix[x++] = TAG_EMBED;
5069 HD(hd->html_data)->prefix[x++] = TAG_HANDLE;
5071 snprintf(buf, sizeof(buf), "%ld", hd->x = h->key);
5072 HD(hd->html_data)->prefix[x++] = n = strlen(buf);
5073 for(i = 0; i < n; i++)
5074 HD(hd->html_data)->prefix[x++] = buf[i];
5076 HD(hd->html_data)->prefix_used = x;
5078 hd->dp = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
5081 else if(cmd == GF_EOD){
5082 html_a_finish(hd);
5085 return(1); /* get linked */
5089 void
5090 html_a_prefix(FILTER_S *f)
5092 int *prefix, n;
5094 /* Do this so we don't visit from html_output... */
5095 prefix = HD(f)->prefix;
5096 HD(f)->prefix = NULL;
5098 for(n = 0; n < HD(f)->prefix_used; n++)
5099 html_a_output_prefix(f, prefix[n]);
5101 fs_give((void **) &prefix);
5106 * html_a_finish - house keeping associated with end of link tag
5108 void
5109 html_a_finish(HANDLER_S *hd)
5111 if(DO_HANDLES(hd->html_data)){
5112 if(HD(hd->html_data)->prefix){
5113 if(!PASS_HTML(hd->html_data)){
5114 char *empty_link = "[LINK]";
5115 int i;
5117 html_a_prefix(hd->html_data);
5118 for(i = 0; empty_link[i]; i++)
5119 html_output(hd->html_data, empty_link[i]);
5123 if(pico_usingcolor()){
5124 char *fg = NULL, *bg = NULL, *p;
5125 int i;
5127 if(ps_global->VAR_SLCTBL_FORE_COLOR
5128 && colorcmp(ps_global->VAR_SLCTBL_FORE_COLOR,
5129 ps_global->VAR_NORM_FORE_COLOR))
5130 fg = ps_global->VAR_NORM_FORE_COLOR;
5132 if(ps_global->VAR_SLCTBL_BACK_COLOR
5133 && colorcmp(ps_global->VAR_SLCTBL_BACK_COLOR,
5134 ps_global->VAR_NORM_BACK_COLOR))
5135 bg = ps_global->VAR_NORM_BACK_COLOR;
5137 if(F_OFF(F_SLCTBL_ITEM_NOBOLD, ps_global))
5138 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5140 if(fg || bg){
5141 COLOR_PAIR *tmp;
5144 * The blacks are just known good colors for testing
5145 * whether the other color is good.
5147 tmp = new_color_pair(fg ? fg : colorx(COL_BLACK),
5148 bg ? bg : colorx(COL_BLACK));
5149 if(pico_is_good_colorpair(tmp)){
5150 p = color_embed(fg, bg);
5152 for(i = 0; p[i]; i++)
5153 html_output(hd->html_data, p[i]);
5156 if(tmp)
5157 free_color_pair(&tmp);
5160 else
5161 HTML_BOLD(hd->html_data, 0); /* turn OFF bold */
5163 html_output(hd->html_data, TAG_EMBED);
5164 html_output(hd->html_data, TAG_HANDLEOFF);
5166 html_a_output_info(hd);
5172 * html_output_a_prefix - dump Anchor prefix data
5174 void
5175 html_a_output_prefix(FILTER_S *f, int c)
5177 switch(c){
5178 case HTML_DOBOLD :
5179 HTML_BOLD(f, 1);
5180 break;
5182 default :
5183 html_output(f, c);
5184 break;
5191 * html_a_output_info - dump possibly deceptive link info into text.
5192 * phark the phishers.
5194 void
5195 html_a_output_info(HANDLER_S *hd)
5197 int l, risky = 0, hl = 0, tl;
5198 char *url = NULL, *hn = NULL, *txt;
5199 HANDLE_S *h;
5201 /* find host anchor references */
5202 if((h = get_handle(*HANDLESP(hd->html_data), (int) hd->x)) != NULL
5203 && h->h.url.path != NULL
5204 && (hn = rfc1738_scan(rfc1738_str(url = cpystr(h->h.url.path)), &l)) != NULL
5205 && (hn = srchstr(hn,"://")) != NULL){
5207 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++)
5211 if(hn && hl){
5213 * look over anchor's text to see if there's a
5214 * mismatch between href target and url-ish
5215 * looking text. throw a red flag if so.
5216 * similarly, toss one if the target's referenced
5217 * by a
5219 if(hd->dp){
5220 so_writec('\0', (STORE_S *) hd->dp);
5222 if((txt = (char *) so_text((STORE_S *) hd->dp)) != NULL
5223 && (txt = rfc1738_scan(txt, &tl)) != NULL
5224 && (txt = srchstr(txt,"://")) != NULL){
5226 for(txt += 3, tl = 0; txt[tl] && txt[tl] != '/' && txt[tl] != '?'; tl++)
5229 if(tl != hl)
5230 risky++;
5231 else
5232 /* look for non matching text */
5233 for(l = 0; l < tl && l < hl; l++)
5234 if(tolower((unsigned char) txt[l]) != tolower((unsigned char) hn[l])){
5235 risky++;
5236 break;
5240 so_give((STORE_S **) &hd->dp);
5243 /* look for literal IP, anything possibly encoded or auth specifier */
5244 if(!risky){
5245 int digits = 1;
5247 for(l = 0; l < hl; l++){
5248 if(hn[l] == '@' || hn[l] == '%'){
5249 risky++;
5250 break;
5252 else if(!(hn[l] == '.' || isdigit((unsigned char) hn[l])))
5253 digits = 0;
5256 if(digits)
5257 risky++;
5260 /* Insert text of link's domain */
5261 if(SHOWSERVER(hd->html_data)){
5262 char *q;
5263 COLOR_PAIR *col = NULL, *colnorm = NULL;
5265 html_output(hd->html_data, ' ');
5266 html_output(hd->html_data, '[');
5268 if(pico_usingcolor()
5269 && ps_global->VAR_METAMSG_FORE_COLOR
5270 && ps_global->VAR_METAMSG_BACK_COLOR
5271 && (col = new_color_pair(ps_global->VAR_METAMSG_FORE_COLOR,
5272 ps_global->VAR_METAMSG_BACK_COLOR))){
5273 if(!pico_is_good_colorpair(col))
5274 free_color_pair(&col);
5276 if(col){
5277 q = color_embed(col->fg, col->bg);
5279 for(l = 0; q[l]; l++)
5280 html_output(hd->html_data, q[l]);
5284 for(l = 0; l < hl; l++)
5285 html_output(hd->html_data, hn[l]);
5287 if(col){
5288 if(ps_global->VAR_NORM_FORE_COLOR
5289 && ps_global->VAR_NORM_BACK_COLOR
5290 && (colnorm = new_color_pair(ps_global->VAR_NORM_FORE_COLOR,
5291 ps_global->VAR_NORM_BACK_COLOR))){
5292 if(!pico_is_good_colorpair(colnorm))
5293 free_color_pair(&colnorm);
5295 if(colnorm){
5296 q = color_embed(colnorm->fg, colnorm->bg);
5297 free_color_pair(&colnorm);
5299 for(l = 0; q[l]; l++)
5300 html_output(hd->html_data, q[l]);
5304 free_color_pair(&col);
5307 html_output(hd->html_data, ']');
5312 * if things look OK so far, make sure nothing within
5313 * the url looks too fishy...
5315 while(!risky && hn
5316 && (hn = rfc1738_scan(hn, &l)) != NULL
5317 && (hn = srchstr(hn,"://")) != NULL){
5318 int digits = 1;
5320 for(hn += 3, hl = 0; hn[hl] && hn[hl] != '/' && hn[hl] != '?'; hl++){
5322 * auth spec, encoded characters, or possibly non-standard port
5323 * should raise a red flag
5325 if(hn[hl] == '@' || hn[hl] == '%' || hn[hl] == ':'){
5326 risky++;
5327 break;
5329 else if(!(hn[hl] == '.' || isdigit((unsigned char) hn[hl])))
5330 digits = 0;
5333 /* dotted-dec/raw-int address should cause suspicion as well */
5334 if(digits)
5335 risky++;
5338 if(risky && ((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)
5339 (*((HTML_OPT_S *) hd->html_data->opt)->warnrisk_f)();
5341 fs_give((void **) &url);
5347 * relative_url - put full url path in h based on base and relative url
5349 void
5350 html_a_relative(char *base_url, char *rel_url, HANDLE_S *h)
5352 size_t len;
5353 char tmp[MAILTMPLEN], *p, *q;
5354 char *scheme = NULL, *net = NULL, *path = NULL,
5355 *parms = NULL, *query = NULL, *frag = NULL,
5356 *base_scheme = NULL, *base_net_loc = NULL,
5357 *base_path = NULL, *base_parms = NULL,
5358 *base_query = NULL, *base_frag = NULL,
5359 *rel_scheme = NULL, *rel_net_loc = NULL,
5360 *rel_path = NULL, *rel_parms = NULL,
5361 *rel_query = NULL, *rel_frag = NULL;
5363 /* Rough parse of base URL */
5364 rfc1808_tokens(base_url, &base_scheme, &base_net_loc, &base_path,
5365 &base_parms, &base_query, &base_frag);
5367 /* Rough parse of this URL */
5368 rfc1808_tokens(rel_url, &rel_scheme, &rel_net_loc, &rel_path,
5369 &rel_parms, &rel_query, &rel_frag);
5371 scheme = rel_scheme; /* defaults */
5372 net = rel_net_loc;
5373 path = rel_path;
5374 parms = rel_parms;
5375 query = rel_query;
5376 frag = rel_frag;
5377 if(!scheme && base_scheme){
5378 scheme = base_scheme;
5379 if(!net){
5380 net = base_net_loc;
5381 if(path){
5382 if(*path != '/'){
5383 if(base_path){
5384 for(p = q = base_path; /* Drop base path's tail */
5385 (p = strchr(p, '/'));
5386 q = ++p)
5389 len = q - base_path;
5391 else
5392 len = 0;
5394 if(len + strlen(rel_path) < sizeof(tmp)-1){
5395 if(len)
5396 snprintf(path = tmp, sizeof(tmp), "%.*s", (int) len, base_path);
5398 strncpy(tmp + len, rel_path, sizeof(tmp)-len);
5399 tmp[sizeof(tmp)-1] = '\0';
5401 /* Follow RFC 1808 "Step 6" */
5402 for(p = tmp; (p = strchr(p, '.')); )
5403 switch(*(p+1)){
5405 * a) All occurrences of "./", where "." is a
5406 * complete path segment, are removed.
5408 case '/' :
5409 if(p > tmp)
5410 for(q = p; (*q = *(q+2)) != '\0'; q++)
5412 else
5413 p++;
5415 break;
5418 * b) If the path ends with "." as a
5419 * complete path segment, that "." is
5420 * removed.
5422 case '\0' :
5423 if(p == tmp || *(p-1) == '/')
5424 *p = '\0';
5425 else
5426 p++;
5428 break;
5431 * c) All occurrences of "<segment>/../",
5432 * where <segment> is a complete path
5433 * segment not equal to "..", are removed.
5434 * Removal of these path segments is
5435 * performed iteratively, removing the
5436 * leftmost matching pattern on each
5437 * iteration, until no matching pattern
5438 * remains.
5440 * d) If the path ends with "<segment>/..",
5441 * where <segment> is a complete path
5442 * segment not equal to "..", that
5443 * "<segment>/.." is removed.
5445 case '.' :
5446 if(p > tmp + 1){
5447 for(q = p - 2; q > tmp && *q != '/'; q--)
5450 if(*q == '/')
5451 q++;
5453 if(q + 1 == p /* no "//.." */
5454 || (*q == '.' /* and "../.." */
5455 && *(q+1) == '.'
5456 && *(q+2) == '/')){
5457 p += 2;
5458 break;
5461 switch(*(p+2)){
5462 case '/' :
5463 len = (p - q) + 3;
5464 p = q;
5465 for(; (*q = *(q+len)) != '\0'; q++)
5468 break;
5470 case '\0':
5471 *(p = q) = '\0';
5472 break;
5474 default:
5475 p += 2;
5476 break;
5479 else
5480 p += 2;
5482 break;
5484 default :
5485 p++;
5486 break;
5489 else
5490 path = ""; /* lame. */
5493 else{
5494 path = base_path;
5495 if(!parms){
5496 parms = base_parms;
5497 if(!query)
5498 query = base_query;
5504 len = (scheme ? strlen(scheme) : 0) + (net ? strlen(net) : 0)
5505 + (path ? strlen(path) : 0) + (parms ? strlen(parms) : 0)
5506 + (query ? strlen(query) : 0) + (frag ? strlen(frag ) : 0) + 8;
5508 h->h.url.path = (char *) fs_get(len * sizeof(char));
5509 snprintf(h->h.url.path, len, "%s%s%s%s%s%s%s%s%s%s%s%s",
5510 scheme ? scheme : "", scheme ? ":" : "",
5511 net ? "//" : "", net ? net : "",
5512 (path && *path == '/') ? "" : ((path && net) ? "/" : ""),
5513 path ? path : "",
5514 parms ? ";" : "", parms ? parms : "",
5515 query ? "?" : "", query ? query : "",
5516 frag ? "#" : "", frag ? frag : "");
5518 if(base_scheme)
5519 fs_give((void **) &base_scheme);
5521 if(base_net_loc)
5522 fs_give((void **) &base_net_loc);
5524 if(base_path)
5525 fs_give((void **) &base_path);
5527 if(base_parms)
5528 fs_give((void **) &base_parms);
5530 if(base_query)
5531 fs_give((void **) &base_query);
5533 if(base_frag)
5534 fs_give((void **) &base_frag);
5536 if(rel_scheme)
5537 fs_give((void **) &rel_scheme);
5539 if(rel_net_loc)
5540 fs_give((void **) &rel_net_loc);
5542 if(rel_parms)
5543 fs_give((void **) &rel_parms);
5545 if(rel_query)
5546 fs_give((void **) &rel_query);
5548 if(rel_frag)
5549 fs_give((void **) &rel_frag);
5551 if(rel_path)
5552 fs_give((void **) &rel_path);
5557 * html_href_relative - href
5560 html_href_relative(char *url)
5562 int i;
5564 if(url)
5565 for(i = 0; i < 32 && url[i]; i++)
5566 if(!(isalpha((unsigned char) url[i]) || url[i] == '_' || url[i] == '-')){
5567 if(url[i] == ':')
5568 return(FALSE);
5569 else
5570 break;
5573 return(TRUE);
5578 * HTML <UL> (Unordered List) element handler
5581 html_ul(HANDLER_S *hd, int ch, int cmd)
5583 if(cmd == GF_DATA){
5584 html_handoff(hd, ch);
5586 else if(cmd == GF_RESET){
5587 if(PASS_HTML(hd->html_data)){
5588 html_output_raw_tag(hd->html_data, "ul");
5590 else{
5591 HD(hd->html_data)->li_pending = 1;
5592 html_blank(hd->html_data, 0);
5595 else if(cmd == GF_EOD){
5596 if(PASS_HTML(hd->html_data)){
5597 html_output_string(hd->html_data, "</ul>");
5599 else{
5600 html_blank(hd->html_data, 0);
5602 if(!HD(hd->html_data)->li_pending)
5603 html_indent(hd->html_data, -4, HTML_ID_INC);
5604 else
5605 HD(hd->html_data)->li_pending = 0;
5609 return(1); /* get linked */
5614 * HTML <OL> (Ordered List) element handler
5617 html_ol(HANDLER_S *hd, int ch, int cmd)
5619 if(cmd == GF_DATA){
5620 html_handoff(hd, ch);
5622 else if(cmd == GF_RESET){
5623 if(PASS_HTML(hd->html_data)){
5624 html_output_raw_tag(hd->html_data, "ol");
5626 else{
5628 * Signal that we're expecting to see <LI> as our next elemnt
5629 * and set the the initial ordered count.
5631 HD(hd->html_data)->li_pending = 1;
5632 hd->x = 1L;
5633 html_blank(hd->html_data, 0);
5636 else if(cmd == GF_EOD){
5637 if(PASS_HTML(hd->html_data)){
5638 html_output_string(hd->html_data, "</ol>");
5640 else{
5641 html_blank(hd->html_data, 0);
5643 if(!HD(hd->html_data)->li_pending)
5644 html_indent(hd->html_data, -4, HTML_ID_INC);
5645 else
5646 HD(hd->html_data)->li_pending = 0;
5650 return(1); /* get linked */
5655 * HTML <MENU> (Menu List) element handler
5658 html_menu(HANDLER_S *hd, int ch, int cmd)
5660 if(cmd == GF_DATA){
5661 html_handoff(hd, ch);
5663 else if(cmd == GF_RESET){
5664 if(PASS_HTML(hd->html_data)){
5665 html_output_raw_tag(hd->html_data, "menu");
5667 else{
5668 HD(hd->html_data)->li_pending = 1;
5671 else if(cmd == GF_EOD){
5672 if(PASS_HTML(hd->html_data)){
5673 html_output_string(hd->html_data, "</menu>");
5675 else{
5676 html_blank(hd->html_data, 0);
5678 if(!HD(hd->html_data)->li_pending)
5679 html_indent(hd->html_data, -4, HTML_ID_INC);
5680 else
5681 HD(hd->html_data)->li_pending = 0;
5685 return(1); /* get linked */
5690 * HTML <DIR> (Directory List) element handler
5693 html_dir(HANDLER_S *hd, int ch, int cmd)
5695 if(cmd == GF_DATA){
5696 html_handoff(hd, ch);
5698 else if(cmd == GF_RESET){
5699 if(PASS_HTML(hd->html_data)){
5700 html_output_raw_tag(hd->html_data, "dir");
5702 else{
5703 HD(hd->html_data)->li_pending = 1;
5706 else if(cmd == GF_EOD){
5707 if(PASS_HTML(hd->html_data)){
5708 html_output_string(hd->html_data, "</dir>");
5710 else{
5711 html_blank(hd->html_data, 0);
5713 if(!HD(hd->html_data)->li_pending)
5714 html_indent(hd->html_data, -4, HTML_ID_INC);
5715 else
5716 HD(hd->html_data)->li_pending = 0;
5720 return(1); /* get linked */
5725 * HTML <LI> (List Item) element handler
5728 html_li(HANDLER_S *hd, int ch, int cmd)
5730 if(cmd == GF_DATA){
5731 if(PASS_HTML(hd->html_data)){
5732 html_handoff(hd, ch);
5735 else if(cmd == GF_RESET){
5736 HANDLER_S *p, *found = NULL;
5739 * There better be a an unordered list, ordered list,
5740 * Menu or Directory handler installed
5741 * or else we crap out...
5743 for(p = HANDLERS(hd->html_data); p; p = p->below)
5744 if(EL(p)->handler == html_ul
5745 || EL(p)->handler == html_ol
5746 || EL(p)->handler == html_menu
5747 || EL(p)->handler == html_dir){
5748 found = p;
5749 break;
5752 if(found){
5753 if(PASS_HTML(hd->html_data)){
5755 else{
5756 char buf[8], *p;
5757 int wrapstate;
5759 /* Start a new line */
5760 html_blank(hd->html_data, 0);
5762 /* adjust indent level if needed */
5763 if(HD(hd->html_data)->li_pending){
5764 html_indent(hd->html_data, 4, HTML_ID_INC);
5765 HD(hd->html_data)->li_pending = 0;
5768 if(EL(found)->handler == html_ul){
5769 int l = html_indent(hd->html_data, 0, HTML_ID_GET);
5771 strncpy(buf, " ", sizeof(buf));
5772 buf[1] = (l < 5) ? '*' : (l < 9) ? '+' : (l < 17) ? 'o' : '#';
5774 else if(EL(found)->handler == html_ol)
5775 snprintf(buf, sizeof(buf), "%2ld.", found->x++);
5776 else if(EL(found)->handler == html_menu){
5777 strncpy(buf, " ->", sizeof(buf));
5778 buf[sizeof(buf)-1] = '\0';
5781 html_indent(hd->html_data, -4, HTML_ID_INC);
5783 /* So we don't munge whitespace */
5784 wrapstate = HD(hd->html_data)->wrapstate;
5785 HD(hd->html_data)->wrapstate = 0;
5787 html_write_indent(hd->html_data, HD(hd->html_data)->indent_level);
5788 for(p = buf; *p; p++)
5789 html_output(hd->html_data, (int) *p);
5791 HD(hd->html_data)->wrapstate = wrapstate;
5792 html_indent(hd->html_data, 4, HTML_ID_INC);
5794 /* else BUG: should really bitch about this */
5797 if(PASS_HTML(hd->html_data)){
5798 html_output_raw_tag(hd->html_data, "li");
5799 return(1); /* get linked */
5802 else if(cmd == GF_EOD){
5803 if(PASS_HTML(hd->html_data)){
5804 html_output_string(hd->html_data, "</li>");
5808 return(PASS_HTML(hd->html_data)); /* DON'T get linked */
5813 * HTML <DL> (Definition List) element handler
5816 html_dl(HANDLER_S *hd, int ch, int cmd)
5818 if(cmd == GF_DATA){
5819 html_handoff(hd, ch);
5821 else if(cmd == GF_RESET){
5822 if(PASS_HTML(hd->html_data)){
5823 html_output_raw_tag(hd->html_data, "dl");
5825 else{
5827 * Set indention level for definition terms and definitions...
5829 hd->x = html_indent(hd->html_data, 0, HTML_ID_GET);
5830 hd->y = hd->x + 2;
5831 hd->z = hd->y + 4;
5834 else if(cmd == GF_EOD){
5835 if(PASS_HTML(hd->html_data)){
5836 html_output_string(hd->html_data, "</dl>");
5838 else{
5839 html_indent(hd->html_data, (int) hd->x, HTML_ID_SET);
5840 html_blank(hd->html_data, 1);
5844 return(1); /* get linked */
5849 * HTML <DT> (Definition Term) element handler
5852 html_dt(HANDLER_S *hd, int ch, int cmd)
5854 if(PASS_HTML(hd->html_data)){
5855 if(cmd == GF_DATA){
5856 html_handoff(hd, ch);
5858 else if(cmd == GF_RESET){
5859 html_output_raw_tag(hd->html_data, "dt");
5861 else if(cmd == GF_EOD){
5862 html_output_string(hd->html_data, "</dt>");
5865 return(1); /* get linked */
5868 if(cmd == GF_RESET){
5869 HANDLER_S *p;
5872 * There better be a Definition Handler installed
5873 * or else we crap out...
5875 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5878 if(p){ /* adjust indent level if needed */
5879 html_indent(hd->html_data, (int) p->y, HTML_ID_SET);
5880 html_blank(hd->html_data, 1);
5882 /* BUG: else should really bitch about this */
5885 return(0); /* DON'T get linked */
5890 * HTML <DD> (Definition Definition) element handler
5893 html_dd(HANDLER_S *hd, int ch, int cmd)
5895 if(PASS_HTML(hd->html_data)){
5896 if(cmd == GF_DATA){
5897 html_handoff(hd, ch);
5899 else if(cmd == GF_RESET){
5900 html_output_raw_tag(hd->html_data, "dd");
5902 else if(cmd == GF_EOD){
5903 html_output_string(hd->html_data, "</dd>");
5906 return(1); /* get linked */
5909 if(cmd == GF_RESET){
5910 HANDLER_S *p;
5913 * There better be a Definition Handler installed
5914 * or else we crap out...
5916 for(p = HANDLERS(hd->html_data); p && EL(p)->handler != html_dl; p = p->below)
5919 if(p){ /* adjust indent level if needed */
5920 html_indent(hd->html_data, (int) p->z, HTML_ID_SET);
5921 html_blank(hd->html_data, 0);
5923 /* BUG: should really bitch about this */
5926 return(0); /* DON'T get linked */
5931 * HTML <H1> (Headings 1) element handler.
5933 * Bold, very-large font, CENTERED. One or two blank lines
5934 * above and below. For our silly character cell's that
5935 * means centered and ALL CAPS...
5938 html_h1(HANDLER_S *hd, int ch, int cmd)
5940 if(cmd == GF_DATA){
5941 html_handoff(hd, ch);
5943 else if(cmd == GF_RESET){
5944 if(PASS_HTML(hd->html_data)){
5945 html_output_raw_tag(hd->html_data, "h1");
5947 else{
5948 /* turn ON the centered bit */
5949 CENTER_BIT(hd->html_data) = 1;
5952 else if(cmd == GF_EOD){
5953 if(PASS_HTML(hd->html_data)){
5954 html_output_string(hd->html_data, "</h1>");
5956 else{
5957 /* turn OFF the centered bit, add blank line */
5958 CENTER_BIT(hd->html_data) = 0;
5959 html_blank(hd->html_data, 1);
5963 return(1); /* get linked */
5968 * HTML <H2> (Headings 2) element handler
5971 html_h2(HANDLER_S *hd, int ch, int cmd)
5973 if(cmd == GF_DATA){
5974 if(PASS_HTML(hd->html_data)){
5975 html_handoff(hd, ch);
5977 else{
5978 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
5979 HTML_ULINE(hd->html_data, 1);
5980 hd->x ^= HTML_HX_ULINE; /* only once! */
5983 html_handoff(hd, (ch < 128 && islower((unsigned char) ch))
5984 ? toupper((unsigned char) ch) : ch);
5987 else if(cmd == GF_RESET){
5988 if(PASS_HTML(hd->html_data)){
5989 html_output_raw_tag(hd->html_data, "h2");
5991 else{
5993 * Bold, large font, flush-left. One or two blank lines
5994 * above and below.
5996 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
5997 hd->x = HTML_HX_CENTER;
5998 else
5999 hd->x = 0;
6001 hd->x |= HTML_HX_ULINE;
6003 CENTER_BIT(hd->html_data) = 0;
6004 hd->y = html_indent(hd->html_data, 0, HTML_ID_SET);
6005 hd->z = HD(hd->html_data)->wrapcol;
6006 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6007 html_blank(hd->html_data, 1);
6010 else if(cmd == GF_EOD){
6011 if(PASS_HTML(hd->html_data)){
6012 html_output_string(hd->html_data, "</h2>");
6014 else{
6016 * restore previous centering, and indent level
6018 if(!(hd->x & HTML_HX_ULINE))
6019 HTML_ULINE(hd->html_data, 0);
6021 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6022 html_blank(hd->html_data, 1);
6023 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6024 HD(hd->html_data)->wrapcol = hd->z;
6028 return(1); /* get linked */
6033 * HTML <H3> (Headings 3) element handler
6036 html_h3(HANDLER_S *hd, int ch, int cmd)
6038 if(cmd == GF_DATA){
6039 if(!PASS_HTML(hd->html_data)){
6040 if((hd->x & HTML_HX_ULINE) && !ASCII_ISSPACE((unsigned char) (ch & 0xff))){
6041 HTML_ULINE(hd->html_data, 1);
6042 hd->x ^= HTML_HX_ULINE; /* only once! */
6046 html_handoff(hd, ch);
6048 else if(cmd == GF_RESET){
6049 if(PASS_HTML(hd->html_data)){
6050 html_output_raw_tag(hd->html_data, "h3");
6052 else{
6054 * Italic, large font, slightly indented from the left
6055 * margin. One or two blank lines above and below.
6057 if(CENTER_BIT(hd->html_data)) /* stop centering for now */
6058 hd->x = HTML_HX_CENTER;
6059 else
6060 hd->x = 0;
6062 hd->x |= HTML_HX_ULINE;
6063 CENTER_BIT(hd->html_data) = 0;
6064 hd->y = html_indent(hd->html_data, 2, HTML_ID_SET);
6065 hd->z = HD(hd->html_data)->wrapcol;
6066 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6067 html_blank(hd->html_data, 1);
6070 else if(cmd == GF_EOD){
6071 if(PASS_HTML(hd->html_data)){
6072 html_output_string(hd->html_data, "</h3>");
6074 else{
6076 * restore previous centering, and indent level
6078 if(!(hd->x & HTML_HX_ULINE))
6079 HTML_ULINE(hd->html_data, 0);
6081 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6082 html_blank(hd->html_data, 1);
6083 CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0;
6084 HD(hd->html_data)->wrapcol = hd->z;
6088 return(1); /* get linked */
6093 * HTML <H4> (Headings 4) element handler
6096 html_h4(HANDLER_S *hd, int ch, int cmd)
6098 if(cmd == GF_DATA){
6099 html_handoff(hd, ch);
6101 else if(cmd == GF_RESET){
6102 if(PASS_HTML(hd->html_data)){
6103 html_output_raw_tag(hd->html_data, "h4");
6105 else{
6107 * Bold, normal font, indented more than H3. One blank line
6108 * above and below.
6110 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6111 CENTER_BIT(hd->html_data) = 0;
6112 hd->y = html_indent(hd->html_data, 4, HTML_ID_SET);
6113 hd->z = HD(hd->html_data)->wrapcol;
6114 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6115 html_blank(hd->html_data, 1);
6118 else if(cmd == GF_EOD){
6119 if(PASS_HTML(hd->html_data)){
6120 html_output_string(hd->html_data, "</h4>");
6122 else{
6124 * restore previous centering, and indent level
6126 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6127 html_blank(hd->html_data, 1);
6128 CENTER_BIT(hd->html_data) = hd->x;
6129 HD(hd->html_data)->wrapcol = hd->z;
6133 return(1); /* get linked */
6138 * HTML <H5> (Headings 5) element handler
6141 html_h5(HANDLER_S *hd, int ch, int cmd)
6143 if(cmd == GF_DATA){
6144 html_handoff(hd, ch);
6146 else if(cmd == GF_RESET){
6147 if(PASS_HTML(hd->html_data)){
6148 html_output_raw_tag(hd->html_data, "h5");
6150 else{
6152 * Italic, normal font, indented as H4. One blank line
6153 * above.
6155 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6156 CENTER_BIT(hd->html_data) = 0;
6157 hd->y = html_indent(hd->html_data, 6, HTML_ID_SET);
6158 hd->z = HD(hd->html_data)->wrapcol;
6159 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6160 html_blank(hd->html_data, 1);
6163 else if(cmd == GF_EOD){
6164 if(PASS_HTML(hd->html_data)){
6165 html_output_string(hd->html_data, "</h5>");
6167 else{
6169 * restore previous centering, and indent level
6171 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6172 html_blank(hd->html_data, 1);
6173 CENTER_BIT(hd->html_data) = hd->x;
6174 HD(hd->html_data)->wrapcol = hd->z;
6178 return(1); /* get linked */
6183 * HTML <H6> (Headings 6) element handler
6186 html_h6(HANDLER_S *hd, int ch, int cmd)
6188 if(cmd == GF_DATA){
6189 html_handoff(hd, ch);
6191 else if(cmd == GF_RESET){
6192 if(PASS_HTML(hd->html_data)){
6193 html_output_raw_tag(hd->html_data, "h6");
6195 else{
6197 * Bold, indented same as normal text, more than H5. One
6198 * blank line above.
6200 hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */
6201 CENTER_BIT(hd->html_data) = 0;
6202 hd->y = html_indent(hd->html_data, 8, HTML_ID_SET);
6203 hd->z = HD(hd->html_data)->wrapcol;
6204 HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8;
6205 html_blank(hd->html_data, 1);
6208 else if(cmd == GF_EOD){
6209 if(PASS_HTML(hd->html_data)){
6210 html_output_string(hd->html_data, "</h6>");
6212 else{
6214 * restore previous centering, and indent level
6216 html_indent(hd->html_data, (int) hd->y, HTML_ID_SET);
6217 html_blank(hd->html_data, 1);
6218 CENTER_BIT(hd->html_data) = hd->x;
6219 HD(hd->html_data)->wrapcol = hd->z;
6223 return(1); /* get linked */
6228 * HTML <BlockQuote> element handler
6231 html_blockquote(HANDLER_S *hd, int ch, int cmd)
6233 int j;
6234 #define HTML_BQ_INDENT 6
6236 if(cmd == GF_DATA){
6237 html_handoff(hd, ch);
6239 else if(cmd == GF_RESET){
6240 if(PASS_HTML(hd->html_data)){
6241 html_output_raw_tag(hd->html_data, "blockquote");
6243 else{
6245 * A typical rendering might be a slight extra left and
6246 * right indent, and/or italic font. The Blockquote element
6247 * causes a paragraph break, and typically provides space
6248 * above and below the quote.
6250 html_indent(hd->html_data, HTML_BQ_INDENT, HTML_ID_INC);
6251 j = HD(hd->html_data)->wrapstate;
6252 HD(hd->html_data)->wrapstate = 0;
6253 html_blank(hd->html_data, 1);
6254 HD(hd->html_data)->wrapstate = j;
6255 HD(hd->html_data)->wrapcol -= HTML_BQ_INDENT;
6258 else if(cmd == GF_EOD){
6259 if(PASS_HTML(hd->html_data)){
6260 html_output_string(hd->html_data, "</blockquote>");
6262 else{
6263 html_blank(hd->html_data, 1);
6265 j = HD(hd->html_data)->wrapstate;
6266 HD(hd->html_data)->wrapstate = 0;
6267 html_indent(hd->html_data, -(HTML_BQ_INDENT), HTML_ID_INC);
6268 HD(hd->html_data)->wrapstate = j;
6269 HD(hd->html_data)->wrapcol += HTML_BQ_INDENT;
6273 return(1); /* get linked */
6278 * HTML <Address> element handler
6281 html_address(HANDLER_S *hd, int ch, int cmd)
6283 int j;
6284 #define HTML_ADD_INDENT 2
6286 if(cmd == GF_DATA){
6287 html_handoff(hd, ch);
6289 else if(cmd == GF_RESET){
6290 if(PASS_HTML(hd->html_data)){
6291 html_output_raw_tag(hd->html_data, "address");
6293 else{
6295 * A typical rendering might be a slight extra left and
6296 * right indent, and/or italic font. The Blockquote element
6297 * causes a paragraph break, and typically provides space
6298 * above and below the quote.
6300 html_indent(hd->html_data, HTML_ADD_INDENT, HTML_ID_INC);
6301 j = HD(hd->html_data)->wrapstate;
6302 HD(hd->html_data)->wrapstate = 0;
6303 html_blank(hd->html_data, 1);
6304 HD(hd->html_data)->wrapstate = j;
6307 else if(cmd == GF_EOD){
6308 if(PASS_HTML(hd->html_data)){
6309 html_output_string(hd->html_data, "</address>");
6311 else{
6312 html_blank(hd->html_data, 1);
6314 j = HD(hd->html_data)->wrapstate;
6315 HD(hd->html_data)->wrapstate = 0;
6316 html_indent(hd->html_data, -(HTML_ADD_INDENT), HTML_ID_INC);
6317 HD(hd->html_data)->wrapstate = j;
6321 return(1); /* get linked */
6326 * HTML <PRE> (Preformatted Text) element handler
6329 html_pre(HANDLER_S *hd, int ch, int cmd)
6331 if(cmd == GF_DATA){
6333 * remove CRLF after '>' in element.
6334 * We see CRLF because wrapstate is off.
6336 switch(hd->y){
6337 case 2 :
6338 if(ch == '\012'){
6339 hd->y = 3;
6340 return(1);
6342 else
6343 html_handoff(hd, '\015');
6345 break;
6347 case 1 :
6348 if(ch == '\015'){
6349 hd->y = 2;
6350 return(1);
6353 case 3 :
6354 /* passing tags? replace CRLF with <BR> to make
6355 * sure hard newline survives in the end...
6357 if(PASS_HTML(hd->html_data))
6358 hd->y = 4; /* keep looking for CRLF */
6359 else
6360 hd->y = 0; /* stop looking */
6362 break;
6364 case 4 :
6365 if(ch == '\015'){
6366 hd->y = 5;
6367 return(1);
6370 break;
6372 case 5 :
6373 hd->y = 4;
6374 if(ch == '\012'){
6375 html_output_string(hd->html_data, "<br />");
6376 return(1);
6378 else
6379 html_handoff(hd, '\015'); /* not CRLF, pass raw CR */
6381 break;
6383 default : /* zero case */
6384 break;
6387 html_handoff(hd, ch);
6389 else if(cmd == GF_RESET){
6390 hd->y = 1;
6391 if(PASS_HTML(hd->html_data)){
6392 html_output_raw_tag(hd->html_data, "pre");
6394 else{
6395 if(hd->html_data)
6396 hd->html_data->f1 = DFL; \
6398 html_blank(hd->html_data, 1);
6399 hd->x = HD(hd->html_data)->wrapstate;
6400 HD(hd->html_data)->wrapstate = 0;
6403 else if(cmd == GF_EOD){
6404 if(PASS_HTML(hd->html_data)){
6405 html_output_string(hd->html_data, "</pre>");
6407 else{
6408 HD(hd->html_data)->wrapstate = (hd->x != 0);
6409 html_blank(hd->html_data, 0);
6413 return(1);
6418 * HTML <CENTER> (Centerd Text) element handler
6421 html_center(HANDLER_S *hd, int ch, int cmd)
6423 if(cmd == GF_DATA){
6424 html_handoff(hd, ch);
6426 else if(cmd == GF_RESET){
6427 if(PASS_HTML(hd->html_data)){
6428 html_output_raw_tag(hd->html_data, "center");
6430 else{
6431 /* turn ON the centered bit */
6432 CENTER_BIT(hd->html_data) = 1;
6435 else if(cmd == GF_EOD){
6436 if(PASS_HTML(hd->html_data)){
6437 html_output_string(hd->html_data, "</center>");
6439 else{
6440 /* turn OFF the centered bit */
6441 CENTER_BIT(hd->html_data) = 0;
6445 return(1);
6450 * HTML <DIV> (Document Divisions) element handler
6453 html_div(HANDLER_S *hd, int ch, int cmd)
6455 if(cmd == GF_DATA){
6456 html_handoff(hd, ch);
6458 else if(cmd == GF_RESET){
6459 if(PASS_HTML(hd->html_data)){
6460 html_output_raw_tag(hd->html_data, "div");
6462 else{
6463 PARAMETER *p;
6465 for(p = HD(hd->html_data)->el_data->attribs;
6466 p && p->attribute;
6467 p = p->next)
6468 if(!strucmp(p->attribute, "ALIGN")){
6469 if(p->value){
6470 /* remember previous values */
6471 hd->x = CENTER_BIT(hd->html_data);
6472 hd->y = html_indent(hd->html_data, 0, HTML_ID_GET);
6474 html_blank(hd->html_data, 0);
6475 CENTER_BIT(hd->html_data) = !strucmp(p->value, "CENTER");
6476 html_indent(hd->html_data, 0, HTML_ID_SET);
6477 /* NOTE: "RIGHT" not supported yet */
6482 else if(cmd == GF_EOD){
6483 if(PASS_HTML(hd->html_data)){
6484 html_output_string(hd->html_data, "</div>");
6486 else{
6487 /* restore centered bit and indentiousness */
6488 CENTER_BIT(hd->html_data) = hd->y;
6489 html_indent(hd->html_data, hd->y, HTML_ID_SET);
6490 html_blank(hd->html_data, 0);
6494 return(1);
6499 * HTML <SPAN> (Text Span) element handler
6502 html_span(HANDLER_S *hd, int ch, int cmd)
6504 if(PASS_HTML(hd->html_data)){
6505 if(cmd == GF_DATA){
6506 html_handoff(hd, ch);
6508 else if(cmd == GF_RESET){
6509 html_output_raw_tag(hd->html_data, "span");
6511 else if(cmd == GF_EOD){
6512 html_output_string(hd->html_data, "</span>");
6515 return(1);
6518 return(0);
6523 * HTML <KBD> (Text Kbd) element handler
6526 html_kbd(HANDLER_S *hd, int ch, int cmd)
6528 if(PASS_HTML(hd->html_data)){
6529 if(cmd == GF_DATA){
6530 html_handoff(hd, ch);
6532 else if(cmd == GF_RESET){
6533 html_output_raw_tag(hd->html_data, "kbd");
6535 else if(cmd == GF_EOD){
6536 html_output_string(hd->html_data, "</kbd>");
6539 return(1);
6542 return(0);
6547 * HTML <DFN> (Text Definition) element handler
6550 html_dfn(HANDLER_S *hd, int ch, int cmd)
6552 if(PASS_HTML(hd->html_data)){
6553 if(cmd == GF_DATA){
6554 html_handoff(hd, ch);
6556 else if(cmd == GF_RESET){
6557 html_output_raw_tag(hd->html_data, "dfn");
6559 else if(cmd == GF_EOD){
6560 html_output_string(hd->html_data, "</dfn>");
6563 return(1);
6566 return(0);
6571 * HTML <TT> (Text Tt) element handler
6574 html_tt(HANDLER_S *hd, int ch, int cmd)
6576 if(PASS_HTML(hd->html_data)){
6577 if(cmd == GF_DATA){
6578 html_handoff(hd, ch);
6580 else if(cmd == GF_RESET){
6581 html_output_raw_tag(hd->html_data, "tt");
6583 else if(cmd == GF_EOD){
6584 html_output_string(hd->html_data, "</tt>");
6587 return(1);
6590 return(0);
6595 * HTML <VAR> (Text Var) element handler
6598 html_var(HANDLER_S *hd, int ch, int cmd)
6600 if(PASS_HTML(hd->html_data)){
6601 if(cmd == GF_DATA){
6602 html_handoff(hd, ch);
6604 else if(cmd == GF_RESET){
6605 html_output_raw_tag(hd->html_data, "var");
6607 else if(cmd == GF_EOD){
6608 html_output_string(hd->html_data, "</var>");
6611 return(1);
6614 return(0);
6619 * HTML <SAMP> (Text Samp) element handler
6622 html_samp(HANDLER_S *hd, int ch, int cmd)
6624 if(PASS_HTML(hd->html_data)){
6625 if(cmd == GF_DATA){
6626 html_handoff(hd, ch);
6628 else if(cmd == GF_RESET){
6629 html_output_raw_tag(hd->html_data, "samp");
6631 else if(cmd == GF_EOD){
6632 html_output_string(hd->html_data, "</samp>");
6635 return(1);
6638 return(0);
6643 * HTML <SUP> (Text Superscript) element handler
6646 html_sup(HANDLER_S *hd, int ch, int cmd)
6648 if(PASS_HTML(hd->html_data)){
6649 if(cmd == GF_DATA){
6650 html_handoff(hd, ch);
6652 else if(cmd == GF_RESET){
6653 html_output_raw_tag(hd->html_data, "sup");
6655 else if(cmd == GF_EOD){
6656 html_output_string(hd->html_data, "</sup>");
6659 return(1);
6662 return(0);
6667 * HTML <SUB> (Text Subscript) element handler
6670 html_sub(HANDLER_S *hd, int ch, int cmd)
6672 if(PASS_HTML(hd->html_data)){
6673 if(cmd == GF_DATA){
6674 html_handoff(hd, ch);
6676 else if(cmd == GF_RESET){
6677 html_output_raw_tag(hd->html_data, "sub");
6679 else if(cmd == GF_EOD){
6680 html_output_string(hd->html_data, "</sub>");
6683 return(1);
6686 return(0);
6691 * HTML <CITE> (Text Citation) element handler
6694 html_cite(HANDLER_S *hd, int ch, int cmd)
6696 if(PASS_HTML(hd->html_data)){
6697 if(cmd == GF_DATA){
6698 html_handoff(hd, ch);
6700 else if(cmd == GF_RESET){
6701 html_output_raw_tag(hd->html_data, "cite");
6703 else if(cmd == GF_EOD){
6704 html_output_string(hd->html_data, "</cite>");
6707 return(1);
6710 return(0);
6715 * HTML <CODE> (Text Code) element handler
6718 html_code(HANDLER_S *hd, int ch, int cmd)
6720 if(PASS_HTML(hd->html_data)){
6721 if(cmd == GF_DATA){
6722 html_handoff(hd, ch);
6724 else if(cmd == GF_RESET){
6725 html_output_raw_tag(hd->html_data, "code");
6727 else if(cmd == GF_EOD){
6728 html_output_string(hd->html_data, "</code>");
6731 return(1);
6734 return(0);
6739 * HTML <INS> (Text Inserted) element handler
6742 html_ins(HANDLER_S *hd, int ch, int cmd)
6744 if(PASS_HTML(hd->html_data)){
6745 if(cmd == GF_DATA){
6746 html_handoff(hd, ch);
6748 else if(cmd == GF_RESET){
6749 html_output_raw_tag(hd->html_data, "ins");
6751 else if(cmd == GF_EOD){
6752 html_output_string(hd->html_data, "</ins>");
6755 return(1);
6758 return(0);
6763 * HTML <DEL> (Text Deleted) element handler
6766 html_del(HANDLER_S *hd, int ch, int cmd)
6768 if(PASS_HTML(hd->html_data)){
6769 if(cmd == GF_DATA){
6770 html_handoff(hd, ch);
6772 else if(cmd == GF_RESET){
6773 html_output_raw_tag(hd->html_data, "del");
6775 else if(cmd == GF_EOD){
6776 html_output_string(hd->html_data, "</del>");
6779 return(1);
6782 return(0);
6787 * HTML <ABBR> (Text Abbreviation) element handler
6790 html_abbr(HANDLER_S *hd, int ch, int cmd)
6792 if(PASS_HTML(hd->html_data)){
6793 if(cmd == GF_DATA){
6794 html_handoff(hd, ch);
6796 else if(cmd == GF_RESET){
6797 html_output_raw_tag(hd->html_data, "abbr");
6799 else if(cmd == GF_EOD){
6800 html_output_string(hd->html_data, "</abbr>");
6803 return(1);
6806 return(0);
6811 * HTML <SCRIPT> element handler
6814 html_script(HANDLER_S *hd, int ch, int cmd)
6816 /* Link in and drop everything within on the floor */
6817 return(1);
6822 * HTML <APPLET> element handler
6825 html_applet(HANDLER_S *hd, int ch, int cmd)
6827 /* Link in and drop everything within on the floor */
6828 return(1);
6833 * HTML <STYLE> CSS element handler
6836 html_style(HANDLER_S *hd, int ch, int cmd)
6838 static STORE_S *css_stuff ;
6840 if(PASS_HTML(hd->html_data)){
6841 if(cmd == GF_DATA){
6842 /* collect style settings */
6843 so_writec(ch, css_stuff);
6845 else if(cmd == GF_RESET){
6846 if(css_stuff)
6847 so_give(&css_stuff);
6849 css_stuff = so_get(CharStar, NULL, EDIT_ACCESS);
6851 else if(cmd == GF_EOD){
6853 * TODO: strip anything mischievous and pass on
6856 so_give(&css_stuff);
6860 return(1);
6864 * RSS 2.0 <RSS> version
6867 rss_rss(HANDLER_S *hd, int ch, int cmd)
6869 if(cmd == GF_RESET){
6870 PARAMETER *p;
6872 for(p = HD(hd->html_data)->el_data->attribs;
6873 p && p->attribute;
6874 p = p->next)
6875 if(!strucmp(p->attribute, "VERSION")){
6876 if(p->value && !strucmp(p->value,"2.0"))
6877 return(0); /* do not link in */
6880 gf_error("Incompatible RSS version");
6881 /* NO RETURN */
6884 return(0); /* not linked or error means we never get here */
6888 * RSS 2.0 <CHANNEL>
6891 rss_channel(HANDLER_S *hd, int ch, int cmd)
6893 if(cmd == GF_DATA){
6894 html_handoff(hd, ch);
6896 else if(cmd == GF_RESET){
6897 RSS_FEED_S *feed;
6899 feed = RSS_FEED(hd->html_data) = fs_get(sizeof(RSS_FEED_S));
6900 memset(feed, 0, sizeof(RSS_FEED_S));
6903 return(1); /* link in */
6907 * RSS 2.0 <TITLE>
6910 rss_title(HANDLER_S *hd, int ch, int cmd)
6912 static STORE_S *title_so;
6914 if(cmd == GF_DATA){
6915 /* collect data */
6916 if(title_so){
6917 so_writec(ch, title_so);
6920 else if(cmd == GF_RESET){
6921 if(RSS_FEED(hd->html_data)){
6922 /* prepare for data */
6923 if(title_so)
6924 so_give(&title_so);
6926 title_so = so_get(CharStar, NULL, EDIT_ACCESS);
6929 else if(cmd == GF_EOD){
6930 if(title_so){
6931 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6932 RSS_ITEM_S *rip;
6934 if(feed){
6935 if((rip = feed->items) != NULL){
6936 for(; rip->next; rip = rip->next)
6939 if(rip->title)
6940 fs_give((void **) &rip->title);
6942 rip->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6944 else{
6945 if(feed->title)
6946 fs_give((void **) &feed->title);
6948 feed->title = cpystr(rss_skip_whitespace(so_text(title_so)));
6952 so_give(&title_so);
6956 return(1); /* link in */
6960 * RSS 2.0 <IMAGE>
6963 rss_image(HANDLER_S *hd, int ch, int cmd)
6965 static STORE_S *img_so;
6967 if(cmd == GF_DATA){
6968 /* collect data */
6969 if(img_so){
6970 so_writec(ch, img_so);
6973 else if(cmd == GF_RESET){
6974 if(RSS_FEED(hd->html_data)){
6975 /* prepare to collect data */
6976 if(img_so)
6977 so_give(&img_so);
6979 img_so = so_get(CharStar, NULL, EDIT_ACCESS);
6982 else if(cmd == GF_EOD){
6983 if(img_so){
6984 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
6986 if(feed){
6987 if(feed->image)
6988 fs_give((void **) &feed->image);
6990 feed->image = cpystr(rss_skip_whitespace(so_text(img_so)));
6993 so_give(&img_so);
6997 return(1); /* link in */
7001 * RSS 2.0 <LINK>
7004 rss_link(HANDLER_S *hd, int ch, int cmd)
7006 static STORE_S *link_so;
7008 if(cmd == GF_DATA){
7009 /* collect data */
7010 if(link_so){
7011 so_writec(ch, link_so);
7014 else if(cmd == GF_RESET){
7015 if(RSS_FEED(hd->html_data)){
7016 /* prepare to collect data */
7017 if(link_so)
7018 so_give(&link_so);
7020 link_so = so_get(CharStar, NULL, EDIT_ACCESS);
7023 else if(cmd == GF_EOD){
7024 if(link_so){
7025 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7026 RSS_ITEM_S *rip;
7028 if(feed){
7029 if((rip = feed->items) != NULL){
7030 for(; rip->next; rip = rip->next)
7033 if(rip->link)
7034 fs_give((void **) &rip->link);
7036 rip->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7038 else{
7039 if(feed->link)
7040 fs_give((void **) &feed->link);
7042 feed->link = cpystr(rss_skip_whitespace(so_text(link_so)));
7046 so_give(&link_so);
7050 return(1); /* link in */
7054 * RSS 2.0 <DESCRIPTION>
7057 rss_description(HANDLER_S *hd, int ch, int cmd)
7059 static STORE_S *desc_so;
7061 if(cmd == GF_DATA){
7062 /* collect data */
7063 if(desc_so){
7064 so_writec(ch, desc_so);
7067 else if(cmd == GF_RESET){
7068 if(RSS_FEED(hd->html_data)){
7069 /* prepare to collect data */
7070 if(desc_so)
7071 so_give(&desc_so);
7073 desc_so = so_get(CharStar, NULL, EDIT_ACCESS);
7076 else if(cmd == GF_EOD){
7077 if(desc_so){
7078 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7079 RSS_ITEM_S *rip;
7081 if(feed){
7082 if((rip = feed->items) != NULL){
7083 for(; rip->next; rip = rip->next)
7086 if(rip->description)
7087 fs_give((void **) &rip->description);
7089 rip->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7091 else{
7092 if(feed->description)
7093 fs_give((void **) &feed->description);
7095 feed->description = cpystr(rss_skip_whitespace(so_text(desc_so)));
7099 so_give(&desc_so);
7103 return(1); /* link in */
7107 * RSS 2.0 <TTL> (in minutes)
7110 rss_ttl(HANDLER_S *hd, int ch, int cmd)
7112 RSS_FEED_S *feed = RSS_FEED(hd->html_data);
7114 if(cmd == GF_DATA){
7115 if(isdigit((unsigned char) ch))
7116 feed->ttl = ((feed->ttl * 10) + (ch - '0'));
7118 else if(cmd == GF_RESET){
7119 /* prepare to collect data */
7120 feed->ttl = 0;
7122 else if(cmd == GF_EOD){
7125 return(1); /* link in */
7129 * RSS 2.0 <ITEM>
7132 rss_item(HANDLER_S *hd, int ch, int cmd)
7134 /* BUG: verify no ITEM nesting? */
7135 if(cmd == GF_RESET){
7136 RSS_FEED_S *feed;
7138 if((feed = RSS_FEED(hd->html_data)) != NULL){
7139 RSS_ITEM_S **rip;
7140 int n = 0;
7142 for(rip = &feed->items; *rip; rip = &(*rip)->next)
7143 if(++n > RSS_ITEM_LIMIT)
7144 return(0);
7146 *rip = fs_get(sizeof(RSS_ITEM_S));
7147 memset(*rip, 0, sizeof(RSS_ITEM_S));
7151 return(0); /* don't link in */
7155 char *
7156 rss_skip_whitespace(char *s)
7158 for(; *s && isspace((unsigned char) *s); s++)
7161 return(s);
7166 * return the function associated with the given element name
7168 ELPROP_S *
7169 element_properties(FILTER_S *fd, char *el_name)
7171 register ELPROP_S *el_table = ELEMENTS(fd);
7172 size_t len_name = strlen(el_name);
7174 for(; el_table->element; el_table++)
7175 if(!strucmp(el_name, el_table->element)
7176 || (el_table->alternate
7177 && len_name == el_table->len + 1
7178 && el_name[el_table->len] == '/'
7179 && !struncmp(el_name, el_table->element, el_table->len)))
7180 return(el_table);
7182 return(NULL);
7187 * collect element's name and any attribute/value pairs then
7188 * dispatch to the appropriate handler.
7190 * Returns 1 : got what we wanted
7191 * 0 : we need more data
7192 * -1 : bogus input
7195 html_element_collector(FILTER_S *fd, int ch)
7197 if(ch == '>'){
7198 if(ED(fd)->overrun){
7200 * If problem processing, don't bother doing anything
7201 * internally, just return such that none of what we've
7202 * digested is displayed.
7204 HTML_DEBUG_EL("too long", ED(fd));
7205 return(1); /* Let it go, Jim */
7207 else if(ED(fd)->mkup_decl){
7208 if(ED(fd)->badform){
7209 dprint((2, "-- html error: bad form: %.*s\n",
7210 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7212 * Invalid comment -- make some guesses as
7213 * to whether we should stop with this greater-than...
7215 if(ED(fd)->buf[0] != '-'
7216 || ED(fd)->len < 4
7217 || (ED(fd)->buf[1] == '-'
7218 && ED(fd)->buf[ED(fd)->len - 1] == '-'
7219 && ED(fd)->buf[ED(fd)->len - 2] == '-'))
7220 return(1);
7222 else{
7223 dprint((5, "-- html: OK: %.*s\n",
7224 ED(fd)->len, ED(fd)->buf ? ED(fd)->buf : "?"));
7225 if(ED(fd)->start_comment == ED(fd)->end_comment){
7226 if(ED(fd)->len > 10){
7227 ED(fd)->buf[ED(fd)->len - 2] = '\0';
7228 html_element_comment(fd, ED(fd)->buf + 2);
7231 return(1);
7233 /* else keep collecting comment below */
7236 else if(ED(fd)->proc_inst){
7237 return(1); /* return without display... */
7239 else if(!ED(fd)->quoted || ED(fd)->badform){
7240 ELPROP_S *ep;
7243 * We either have the whole thing or all that we could
7244 * salvage from it. Try our best...
7247 if(HD(fd)->bitbucket)
7248 return(1); /* element inside chtml clause! */
7250 if(!ED(fd)->badform && html_element_flush(ED(fd)))
7251 return(1); /* return without display... */
7254 * If we ran into an empty tag or we don't know how to deal
7255 * with it, just go on, ignoring it...
7257 if(ED(fd)->element && (ep = element_properties(fd, ED(fd)->element))){
7258 if(ep->handler){
7259 /* dispatch the element's handler */
7260 HTML_DEBUG_EL(ED(fd)->end_tag ? "POP" : "PUSH", ED(fd));
7261 if(ED(fd)->end_tag){
7262 html_pop(fd, ep); /* remove it's handler */
7264 else{
7265 /* if a block element, pop any open <p>'s */
7266 if(ep->blocklevel){
7267 HANDLER_S *tp;
7269 for(tp = HANDLERS(fd); tp && EL(tp)->handler == html_p; tp = tp->below){
7270 HTML_DEBUG_EL("Unclosed <P>", ED(fd));
7271 html_pop(fd, EL(tp));
7272 break;
7276 /* enforce table nesting */
7277 if(!strucmp(ep->element, "tr")){
7278 if(!HANDLERS(fd) || (strucmp(EL(HANDLERS(fd))->element, "table") && strucmp(EL(HANDLERS(fd))->element, "tbody") && strucmp(EL(HANDLERS(fd))->element, "thead"))){
7279 dprint((2, "-- html error: bad nesting for <TR>, GOT %s\n", (HANDLERS(fd)) ? EL(HANDLERS(fd))->element : "NO-HANDLERS"));
7280 if(HANDLERS(fd) && !strucmp(EL(HANDLERS(fd))->element,"tr")){
7281 dprint((2, "-- html error: bad nesting popping previous <TR>"));
7282 html_pop(fd, EL(HANDLERS(fd)));
7284 else{
7285 dprint((2, "-- html error: bad nesting pusing <TABLE>"));
7286 html_push(fd, element_properties(fd, "table"));
7290 else if(!strucmp(ep->element, "td") || !strucmp(ep->element, "th")){
7291 if(!HANDLERS(fd)){
7292 dprint((2, "-- html error: bad nesting: NO HANDLERS before <TD>"));
7293 html_push(fd, element_properties(fd, "table"));
7294 html_push(fd, element_properties(fd, "tr"));
7296 else if(strucmp(EL(HANDLERS(fd))->element, "tr")){
7297 dprint((2, "-- html error: bad nesting for <TD>, GOT %s\n", EL(HANDLERS(fd))->element));
7298 html_push(fd, element_properties(fd, "tr"));
7300 else if(!strucmp(EL(HANDLERS(fd))->element, "td")){
7301 dprint((2, "-- html error: bad nesting popping <TD>"));
7302 html_pop(fd, EL(HANDLERS(fd)));
7306 /* add it's handler */
7307 if(html_push(fd, ep)){
7308 if(ED(fd)->empty){
7309 /* remove empty element */
7310 html_pop(fd, ep);
7315 else {
7316 HTML_DEBUG_EL("IGNORED", ED(fd));
7319 else{ /* else, empty or unrecognized */
7320 HTML_DEBUG_EL("?", ED(fd));
7323 return(1); /* all done! see, that didn't hurt */
7326 else if(ch == '/' && ED(fd)->element && ED(fd)->len){
7327 ED(fd)->empty = 1;
7329 else
7330 ED(fd)->empty = 0;
7332 if(ED(fd)->mkup_decl){
7333 if((ch &= 0xff) == '-'){
7334 if(ED(fd)->hyphen){
7335 ED(fd)->hyphen = 0;
7336 if(ED(fd)->start_comment)
7337 ED(fd)->end_comment = 1;
7338 else
7339 ED(fd)->start_comment = 1;
7341 else
7342 ED(fd)->hyphen = 1;
7344 else{
7345 if(ED(fd)->end_comment)
7346 ED(fd)->start_comment = ED(fd)->end_comment = 0;
7349 * no "--" after ! or non-whitespace between comments - bad
7351 if(ED(fd)->len < 2 || (!ED(fd)->start_comment
7352 && !ASCII_ISSPACE((unsigned char) ch)))
7353 ED(fd)->badform = 1; /* non-comment! */
7355 ED(fd)->hyphen = 0;
7359 * Remember the comment for possible later processing, if
7360 * it get's too long, remember first and last few chars
7361 * so we know when to terminate (and throw some garbage
7362 * in between when we toss out what's between.
7364 if(ED(fd)->len == HTML_BUF_LEN){
7365 ED(fd)->buf[2] = ED(fd)->buf[3] = 'X';
7366 ED(fd)->buf[4] = ED(fd)->buf[ED(fd)->len - 2];
7367 ED(fd)->buf[5] = ED(fd)->buf[ED(fd)->len - 1];
7368 ED(fd)->len = 6;
7371 ED(fd)->buf[(ED(fd)->len)++] = ch;
7372 return(0); /* comments go in the bit bucket */
7374 else if(ED(fd)->overrun || ED(fd)->badform){
7375 return(0); /* swallow char's until next '>' */
7377 else if(!ED(fd)->element && !ED(fd)->len){
7378 if(ch == '/'){ /* validate leading chars */
7379 ED(fd)->end_tag = 1;
7380 return(0);
7382 else if(ch == '!'){
7383 ED(fd)->mkup_decl = 1;
7384 return(0);
7386 else if(ch == '?'){
7387 ED(fd)->proc_inst = 1;
7388 return(0);
7390 else if(!isalpha((unsigned char) ch))
7391 return(-1); /* can't be a tag! */
7393 else if(ch == '\"' || ch == '\''){
7394 if(!ED(fd)->hit_equal){
7395 ED(fd)->badform = 1; /* quote in element name?!? */
7396 return(0);
7399 if(ED(fd)->quoted){
7400 if(ED(fd)->quoted == (char) ch){
7401 /* end of a quoted value */
7402 ED(fd)->quoted = 0;
7403 if(ED(fd)->len && html_element_flush(ED(fd)))
7404 ED(fd)->badform = 1;
7406 return(0); /* continue collecting chars */
7408 /* ELSE fall thru writing other quoting char */
7410 else{
7411 ED(fd)->quoted = (char) ch;
7412 ED(fd)->was_quoted = 1;
7413 return(0); /* need more data */
7417 ch &= 0xff; /* strip any "literal" high bits */
7418 if(ED(fd)->quoted
7419 || isalnum(ch)
7420 || strchr("#-.!", ch)){
7421 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7422 ? HTML_BUF_LEN:MAX_ELEMENT)){
7423 ED(fd)->buf[(ED(fd)->len)++] = ch;
7425 else
7426 ED(fd)->overrun = 1; /* flag it broken */
7428 else if(ASCII_ISSPACE((unsigned char) ch) || ch == '='){
7429 if((ED(fd)->len || ED(fd)->was_quoted) && html_element_flush(ED(fd))){
7430 ED(fd)->badform = 1;
7431 return(0); /* else, we ain't done yet */
7434 if(!ED(fd)->hit_equal)
7435 ED(fd)->hit_equal = (ch == '=');
7437 else if(ch == '/' && ED(fd)->len && !ED(fd)->element){
7438 ELPROP_S *ep;
7439 ep = element_properties(fd, ED(fd)->buf);
7440 if(ep){
7441 if(!ep->alternate)
7442 ED(fd)->badform = 1;
7443 else{
7444 if(ED(fd)->len < ((ED(fd)->element || !ED(fd)->hit_equal)
7445 ? HTML_BUF_LEN:MAX_ELEMENT)){
7446 ED(fd)->buf[(ED(fd)->len)++] = ch; /* add this exception */
7448 else
7449 ED(fd)->overrun = 1;
7452 else
7453 ED(fd)->badform = 1;
7455 else
7456 ED(fd)->badform = 1; /* unrecognized data?? */
7458 return(0); /* keep collecting */
7463 * Element collector found complete string, integrate it and reset
7464 * internal collection buffer.
7466 * Returns zero if element collection buffer flushed, error flag otherwise
7469 html_element_flush(CLCTR_S *el_data)
7471 int rv = 0;
7473 if(el_data->hit_equal){ /* adding a value */
7474 el_data->hit_equal = 0;
7475 if(el_data->cur_attrib){
7476 if(!el_data->cur_attrib->value){
7477 el_data->cur_attrib->value = cpystr(el_data->len
7478 ? el_data->buf : "");
7480 else{
7481 dprint((2, "** element: unexpected value: %.10s...\n",
7482 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7483 rv = 1;
7486 else{
7487 dprint((2, "** element: missing attribute name: %.10s...\n",
7488 (el_data->len && el_data->buf) ? el_data->buf : "\"\""));
7489 rv = 2;
7492 else if(el_data->len){
7493 if(!el_data->element){
7494 el_data->element = cpystr(el_data->buf);
7496 else{
7497 PARAMETER *p = (PARAMETER *)fs_get(sizeof(PARAMETER));
7498 memset(p, 0, sizeof(PARAMETER));
7499 if(el_data->attribs){
7500 el_data->cur_attrib->next = p;
7501 el_data->cur_attrib = p;
7503 else
7504 el_data->attribs = el_data->cur_attrib = p;
7506 p->attribute = cpystr(el_data->buf);
7511 el_data->was_quoted = 0; /* reset collector buf and state */
7512 el_data->len = 0;
7513 memset(el_data->buf, 0, HTML_BUF_LEN);
7514 return(rv); /* report whatever happened above */
7519 * html_element_comment - "Special" comment handling here
7521 void
7522 html_element_comment(FILTER_S *f, char *s)
7524 char *p;
7526 while(*s && ASCII_ISSPACE((unsigned char) *s))
7527 s++;
7530 * WARNING: "!--chtml" denotes "Conditional HTML", a UW-ism.
7532 if(!struncmp(s, "chtml ", 6)){
7533 s += 6;
7534 if(!struncmp(s, "if ", 3)){
7535 HD(f)->bitbucket = 1; /* default is failure! */
7536 switch(*(s += 3)){
7537 case 'P' :
7538 case 'p' :
7539 if(!struncmp(s + 1, "inemode=", 8)){
7540 if(!strucmp(s = removing_quotes(s + 9), "function_key")
7541 && F_ON(F_USE_FK, ps_global))
7542 HD(f)->bitbucket = 0;
7543 else if(!strucmp(s, "running"))
7544 HD(f)->bitbucket = 0;
7545 else if(!strucmp(s, "phone_home") && ps_global->phone_home)
7546 HD(f)->bitbucket = 0;
7547 #ifdef _WINDOWS
7548 else if(!strucmp(s, "os_windows"))
7549 HD(f)->bitbucket = 0;
7550 #endif
7553 break;
7555 case '[' : /* test */
7556 if((p = strindex(++s, ']')) != NULL){
7557 *p = '\0'; /* tie off test string */
7558 removing_leading_white_space(s);
7559 removing_trailing_white_space(s);
7560 if(*s == '-' && *(s+1) == 'r'){ /* readable file? */
7561 for(s += 2; *s && ASCII_ISSPACE((unsigned char) *s); s++)
7565 HD(f)->bitbucket = (can_access(CHTML_VAR_EXPAND(removing_quotes(s)),
7566 READ_ACCESS) != 0);
7570 break;
7572 default :
7573 break;
7576 else if(!strucmp(s, "else")){
7577 HD(f)->bitbucket = !HD(f)->bitbucket;
7579 else if(!strucmp(s, "endif")){
7580 /* Clean up after chtml here */
7581 HD(f)->bitbucket = 0;
7584 else if(!HD(f)->bitbucket){
7585 if(!struncmp(s, "#include ", 9)){
7586 char buf[MAILTMPLEN], *bufp;
7587 int len, end_of_line;
7588 FILE *fp;
7590 /* Include the named file */
7591 if(!struncmp(s += 9, "file=", 5)
7592 && (fp = our_fopen(CHTML_VAR_EXPAND(removing_quotes(s+5)), "r"))){
7593 html_element_output(f, HTML_NEWLINE);
7595 while(fgets(buf, sizeof(buf), fp)){
7596 if((len = strlen(buf)) && buf[len-1] == '\n'){
7597 end_of_line = 1;
7598 buf[--len] = '\0';
7600 else
7601 end_of_line = 0;
7603 for(bufp = buf; len; bufp++, len--)
7604 html_element_output(f, (int) *bufp);
7606 if(end_of_line)
7607 html_element_output(f, HTML_NEWLINE);
7610 fclose(fp);
7611 html_element_output(f, HTML_NEWLINE);
7612 HD(f)->blanks = 0;
7613 if(f->f1 == WSPACE)
7614 f->f1 = DFL;
7617 else if(!struncmp(s, "#echo ", 6)){
7618 if(!struncmp(s += 6, "var=", 4)){
7619 char *p, buf[MAILTMPLEN];
7620 ADDRESS *adr;
7621 extern char datestamp[];
7623 if(!strcmp(s = removing_quotes(s + 4), "ALPINE_VERSION")){
7624 p = ALPINE_VERSION;
7626 else if(!strcmp(s, "ALPINE_REVISION")){
7627 p = get_alpine_revision_string(buf, sizeof(buf));
7629 else if(!strcmp(s, "C_CLIENT_VERSION")){
7630 p = CCLIENTVERSION;
7632 else if(!strcmp(s, "ALPINE_COMPILE_DATE")){
7633 p = datestamp;
7635 else if(!strcmp(s, "ALPINE_TODAYS_DATE")){
7636 rfc822_date(p = buf);
7638 else if(!strcmp(s, "_LOCAL_FULLNAME_")){
7639 p = (ps_global->VAR_LOCAL_FULLNAME
7640 && ps_global->VAR_LOCAL_FULLNAME[0])
7641 ? ps_global->VAR_LOCAL_FULLNAME
7642 : "Local Support";
7644 else if(!strcmp(s, "_LOCAL_ADDRESS_")){
7645 p = (ps_global->VAR_LOCAL_ADDRESS
7646 && ps_global->VAR_LOCAL_ADDRESS[0])
7647 ? ps_global->VAR_LOCAL_ADDRESS
7648 : "postmaster";
7649 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7650 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7651 mail_free_address(&adr);
7653 else if(!strcmp(s, "_BUGS_FULLNAME_")){
7654 p = (ps_global->VAR_BUGS_FULLNAME
7655 && ps_global->VAR_BUGS_FULLNAME[0])
7656 ? ps_global->VAR_BUGS_FULLNAME
7657 : "Place to report Alpine Bugs";
7659 else if(!strcmp(s, "_BUGS_ADDRESS_")){
7660 p = (ps_global->VAR_BUGS_ADDRESS
7661 && ps_global->VAR_BUGS_ADDRESS[0])
7662 ? ps_global->VAR_BUGS_ADDRESS : "postmaster";
7663 adr = rfc822_parse_mailbox(&p, ps_global->maildomain);
7664 snprintf(p = buf, sizeof(buf), "%s@%s", adr->mailbox, adr->host);
7665 mail_free_address(&adr);
7667 else if(!strcmp(s, "CURRENT_DIR")){
7668 getcwd(p = buf, sizeof(buf));
7670 else if(!strcmp(s, "HOME_DIR")){
7671 p = ps_global->home_dir;
7673 else if(!strcmp(s, "PINE_CONF_PATH")){
7674 #if defined(_WINDOWS) || !defined(SYSTEM_PINERC)
7675 p = "/usr/local/lib/pine.conf";
7676 #else
7677 p = SYSTEM_PINERC;
7678 #endif
7680 else if(!strcmp(s, "PINE_CONF_FIXED_PATH")){
7681 #ifdef SYSTEM_PINERC_FIXED
7682 p = SYSTEM_PINERC_FIXED;
7683 #else
7684 p = "/usr/local/lib/pine.conf.fixed";
7685 #endif
7687 else if(!strcmp(s, "PINE_INFO_PATH")){
7688 p = SYSTEM_PINE_INFO_PATH;
7690 else if(!strcmp(s, "MAIL_SPOOL_PATH")){
7691 p = sysinbox();
7693 else if(!strcmp(s, "MAIL_SPOOL_LOCK_PATH")){
7694 /* Don't put the leading /tmp/. */
7695 int i, j;
7697 p = sysinbox();
7698 if(p){
7699 for(j = 0, i = 0; p[i] && j < MAILTMPLEN - 1; i++){
7700 if(p[i] == '/')
7701 buf[j++] = '\\';
7702 else
7703 buf[j++] = p[i];
7705 buf[j++] = '\0';
7706 p = buf;
7709 else if(!struncmp(s, "VAR_", 4)){
7710 p = s+4;
7711 if(pith_opt_pretty_var_name)
7712 p = (*pith_opt_pretty_var_name)(p);
7714 else if(!struncmp(s, "FEAT_", 5)){
7715 p = s+5;
7716 if(pith_opt_pretty_feature_name)
7717 p = (*pith_opt_pretty_feature_name)(p, -1);
7719 else
7720 p = NULL;
7722 if(p){
7723 if(f->f1 == WSPACE){
7724 html_element_output(f, ' ');
7725 f->f1 = DFL; /* clear it */
7728 while(*p)
7729 html_element_output(f, (int) *p++);
7737 void
7738 html_element_output(FILTER_S *f, int ch)
7740 if(HANDLERS(f))
7741 (*EL(HANDLERS(f))->handler)(HANDLERS(f), ch, GF_DATA);
7742 else
7743 html_output(f, ch);
7746 #define ISHEX_DIGIT(X) (isdigit((X)) || \
7747 ((X) >= 'a' && (X) <= 'f') || \
7748 ((X) >= 'A' && (X) <= 'F'))
7751 * collect html entity and return its UCS value when done.
7753 * Returns HTML_MOREDATA : we need more data
7754 * HTML_ENTITY : entity collected
7755 * HTML_BADVALUE : good data, but no named match or out of range
7756 * HTML_BADDATA : invalid input
7758 * NOTES:
7759 * - entity format is "'&' tag ';'" and represents a literal char
7760 * - named entities are CASE SENSITIVE.
7761 * - numeric char references (where the tag is prefixed with a '#')
7762 * are a char with that numbers value
7763 * - numeric vals are 0-255 except for the ranges: 0-8, 11-31, 127-159.
7766 html_entity_collector(FILTER_S *f, int ch, UCS *ucs, char **alt)
7768 static int len = 0;
7769 static char buf[MAX_ENTITY+2];
7770 int rv, i;
7772 if(len == MAX_ENTITY){
7773 rv = HTML_BADDATA;
7775 else if((len == 0)
7776 ? (isalpha((unsigned char) ch) || ch == '#')
7777 : ((isdigit((unsigned char) ch)
7778 || (len == 1 && (unsigned char) ch == 'x')
7779 || (len == 1 &&(unsigned char) ch == 'X')
7780 || (len > 1 && isxdigit((unsigned char) ch))
7781 || (isalpha((unsigned char) ch) && buf[0] != '#')))){
7782 buf[len++] = ch;
7783 return(HTML_MOREDATA);
7785 else if(ch == ';' || ASCII_ISSPACE((unsigned char) ch)){
7786 buf[len] = '\0'; /* got something! */
7787 if(buf[0] == '#'){
7788 if(buf[1] == 'x' || buf[1] == 'X')
7789 *ucs = (UCS) strtoul(&buf[2], NULL, 16);
7790 else
7791 *ucs = (UCS) strtoul(&buf[1], NULL, 10);
7793 if(alt){
7794 *alt = NULL;
7795 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7796 if(entity_tab[i].value == *ucs){
7797 *alt = entity_tab[i].plain;
7798 break;
7802 len = 0;
7803 return(HTML_ENTITY);
7805 else{
7806 rv = HTML_BADVALUE; /* in case of no match */
7807 for(i = 0; i < sizeof(entity_tab)/sizeof(struct html_entities); i++)
7808 if(strcmp(entity_tab[i].name, buf) == 0){
7809 *ucs = entity_tab[i].value;
7810 if(alt)
7811 *alt = entity_tab[i].plain;
7813 len = 0;
7814 return(HTML_ENTITY);
7818 else
7819 rv = HTML_BADDATA; /* bogus input! */
7821 if(alt){
7822 buf[len] = '\0';
7823 *alt = buf;
7826 len = 0;
7827 return(rv);
7831 /*----------------------------------------------------------------------
7832 HTML text to plain text filter
7834 This basically tries to do the best it can with HTML 2.0 (RFC1866)
7835 with bits of RFC 1942 (plus some HTML 3.2 thrown in as well) text
7836 formatting.
7838 ----*/
7839 void
7840 gf_html2plain(FILTER_S *f, int flg)
7842 /* BUG: qoute incoming \255 values (see "yuml" above!) */
7843 if(flg == GF_DATA){
7844 register int c;
7845 GF_INIT(f, f->next);
7847 if(!HTML_WROTE(f)){
7848 int ii;
7850 for(ii = HTML_INDENT(f); ii > 0; ii--)
7851 html_putc(f, ' ');
7853 HTML_WROTE(f) = 1;
7856 while(GF_GETC(f, c)){
7858 * First we have to collect any literal entities...
7859 * that is, IF we're not already collecting one
7860 * AND we're not in element's text or, if we are, we're
7861 * not in quoted text. Whew.
7863 if(f->t){
7864 char *alt = NULL;
7865 UCS ucs;
7867 switch(html_entity_collector(f, c, &ucs, &alt)){
7868 case HTML_MOREDATA: /* more data required? */
7869 continue; /* go get another char */
7871 case HTML_BADVALUE :
7872 case HTML_BADDATA :
7873 /* if supplied, process bogus data */
7874 HTML_PROC(f, '&');
7875 for(; *alt; alt++){
7876 unsigned int uic = *alt;
7877 HTML_PROC(f, uic);
7880 if(c == '&' && !HD(f)->quoted){
7881 f->t = '&';
7882 continue;
7884 else
7885 f->t = 0; /* don't come back next time */
7887 break;
7889 default : /* thing to process */
7890 f->t = 0; /* don't come back */
7893 * do something with UCS codepoint. If it's
7894 * not displayable then use the alt version
7895 * otherwise
7896 * cvt UCS to UTF-8 and toss into next filter.
7898 if(ucs > 127 && wcellwidth(ucs) < 0){
7899 if(alt){
7900 for(; *alt; alt++){
7901 c = MAKE_LITERAL(*alt);
7902 HTML_PROC(f, c);
7905 continue;
7907 else
7908 c = MAKE_LITERAL('?');
7910 else{
7911 unsigned char utf8buf[8], *p1, *p2;
7913 p2 = utf8_put(p1 = (unsigned char *) utf8buf, (unsigned long) ucs);
7914 for(; p1 < p2; p1++){
7915 c = MAKE_LITERAL(*p1);
7916 HTML_PROC(f, c);
7919 continue;
7922 break;
7925 else if(!PASS_HTML(f) && c == '&' && !HD(f)->quoted){
7926 f->t = '&';
7927 continue;
7931 * then we process whatever we got...
7934 HTML_PROC(f, c);
7937 GF_OP_END(f); /* clean up our input pointers */
7939 else if(flg == GF_EOD){
7940 while(HANDLERS(f)){
7941 dprint((2, "-- html error: no closing tag for %s",EL(HANDLERS(f))->element));
7942 html_pop(f, EL(HANDLERS(f)));
7945 html_output(f, HTML_NEWLINE);
7946 if(ULINE_BIT(f))
7947 HTML_ULINE(f, ULINE_BIT(f) = 0);
7949 if(BOLD_BIT(f))
7950 HTML_BOLD(f, BOLD_BIT(f) = 0);
7952 HTML_FLUSH(f);
7953 fs_give((void **)&f->line);
7954 if(HD(f)->color)
7955 free_color_pair(&HD(f)->color);
7957 fs_give(&f->data);
7958 if(f->opt){
7959 if(((HTML_OPT_S *)f->opt)->base)
7960 fs_give((void **) &((HTML_OPT_S *)f->opt)->base);
7962 fs_give(&f->opt);
7965 (*f->next->f)(f->next, GF_DATA);
7966 (*f->next->f)(f->next, GF_EOD);
7968 else if(flg == GF_RESET){
7969 dprint((9, "-- gf_reset html2plain\n"));
7970 f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S));
7971 memset(f->data, 0, sizeof(HTML_DATA_S));
7972 /* start with flowing text */
7973 HD(f)->wrapstate = !PASS_HTML(f);
7974 HD(f)->wrapcol = WRAP_COLS(f);
7975 f->f1 = DFL; /* state */
7976 f->f2 = 0; /* chars in wrap buffer */
7977 f->n = 0L; /* chars on line so far */
7978 f->linep = f->line = (char *)fs_get(HTML_BUF_LEN * sizeof(char));
7979 HD(f)->line_bufsize = HTML_BUF_LEN; /* initial bufsize of line */
7980 HD(f)->alt_entity = (!ps_global->display_charmap
7981 || strucmp(ps_global->display_charmap, "iso-8859-1"));
7982 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
7989 * html_indent - do the requested indent level function with appropriate
7990 * flushing and such.
7992 * Returns: indent level prior to set/increment
7995 html_indent(FILTER_S *f, int val, int func)
7997 int old = HD(f)->indent_level;
7999 /* flush pending data at old indent level */
8000 switch(func){
8001 case HTML_ID_INC :
8002 html_output_flush(f);
8003 if((HD(f)->indent_level += val) < 0)
8004 HD(f)->indent_level = 0;
8006 break;
8008 case HTML_ID_SET :
8009 html_output_flush(f);
8010 HD(f)->indent_level = val;
8011 break;
8013 default :
8014 break;
8017 return(old);
8023 * html_blanks - Insert n blank lines into output
8025 void
8026 html_blank(FILTER_S *f, int n)
8028 /* Cap off any flowing text, and then write blank lines */
8029 if(f->f2 || f->n || CENTER_BIT(f) || HD(f)->centered || WRAPPED_LEN(f))
8030 html_output(f, HTML_NEWLINE);
8032 if(HD(f)->wrapstate)
8033 while(HD(f)->blanks < n) /* blanks inc'd by HTML_NEWLINE */
8034 html_output(f, HTML_NEWLINE);
8040 * html_newline -- insert a newline mindful of embedded tags
8042 void
8043 html_newline(FILTER_S *f)
8045 html_write_newline(f); /* commit an actual newline */
8047 if(f->n){ /* and keep track of blank lines */
8048 HD(f)->blanks = 0;
8049 f->n = 0L;
8051 else
8052 HD(f)->blanks++;
8057 * output the given char, handling any requested wrapping.
8058 * It's understood that all whitespace handed us is written. In other
8059 * words, junk whitespace is weeded out before it's given to us here.
8062 void
8063 html_output(FILTER_S *f, int ch)
8065 UCS uc;
8066 int width;
8067 void (*o_f)(FILTER_S *, int, int, int) = CENTER_BIT(f) ? html_output_centered : html_output_normal;
8070 * if ch is a control token, just pass it on, else, collect
8071 * utf8-encoded characters to determine width,then feed into
8072 * output routines
8074 if(ch == TAG_EMBED || HD(f)->embedded.state || (ch > 0xff && IS_LITERAL(ch) == 0)){
8075 (*o_f)(f, ch, 1, 0);
8077 else if(utf8_to_ucs4_oneatatime(ch & 0xff, &(HD(f)->cb), &uc, &width)){
8078 unsigned char *cp;
8080 for(cp = HD(f)->cb.cbuf; cp <= HD(f)->cb.cbufend; cp++){
8081 (*o_f)(f, *cp, width, HD(f)->cb.cbufend - cp);
8082 width = 0; /* only count it once */
8085 HD(f)->cb.cbufp = HD(f)->cb.cbufend = HD(f)->cb.cbuf;
8087 else
8088 HD(f)->cb.cbufend = HD(f)->cb.cbufp;
8089 /* else do nothing until we have a full character */
8093 void
8094 html_output_string(FILTER_S *f, char *s)
8096 for(; *s; s++)
8097 html_output(f, *s);
8101 void
8102 html_output_raw_tag(FILTER_S *f, char *tag)
8104 PARAMETER *p;
8105 char *vp;
8106 int i;
8108 html_output(f, '<');
8109 html_output_string(f, tag);
8110 for(p = HD(f)->el_data->attribs;
8111 p && p->attribute;
8112 p = p->next){
8113 /* SECURITY: no javascript */
8114 /* PRIVACY: no img src without permission */
8115 /* BUGS: no class collisions since <head> ignored */
8116 if(html_event_attribute(p->attribute)
8117 || !strucmp(p->attribute, "class")
8118 || (!PASS_IMAGES(f) && !strucmp(tag, "img") && !strucmp(p->attribute, "src")))
8119 continue;
8121 /* PRIVACY: sniff out background images */
8122 if(p->value && !PASS_IMAGES(f)){
8123 if(!strucmp(p->attribute, "style")){
8124 if((vp = srchstr(p->value, "background-image")) != NULL){
8125 /* neuter in place */
8126 vp[11] = vp[12] = vp[13] = vp[14] = vp[15] = 'X';
8128 else{
8129 for(vp = p->value; (vp = srchstr(vp, "background")) != NULL; vp++)
8130 if(vp[10] == ' ' || vp[10] == ':')
8131 for(i = 11; vp[i] && vp[i] != ';'; i++)
8132 if((vp[i] == 'u' && vp[i+1] == 'r' && vp[i+2] == 'l' && vp[i+3] == '(')
8133 || vp[i] == ':' || vp[i] == '/' || vp[i] == '.')
8134 vp[0] = 'X';
8137 else if(!strucmp(p->attribute, "background")){
8138 char *ip;
8140 for(ip = p->value; *ip && !(*ip == ':' || *ip == '/' || *ip == '.'); ip++)
8143 if(ip)
8144 continue;
8148 html_output(f, ' ');
8149 html_output_string(f, p->attribute);
8150 if(p->value){
8151 html_output(f, '=');
8152 html_output(f, '\"');
8153 html_output_string(f, p->value);
8154 html_output(f, '\"');
8158 /* append warning to form submission */
8159 if(!strucmp(tag, "form")){
8160 html_output_string(f, " onsubmit=\"return window.confirm('This form is submitting information to an outside server.\\nAre you sure?');\"");
8163 if(ED(f)->end_tag){
8164 html_output(f, ' ');
8165 html_output(f, '/');
8168 html_output(f, '>');
8173 html_event_attribute(char *attr)
8175 int i;
8176 static char *events[] = {
8177 "onabort", "onblur", "onchange", "onclick", "ondblclick", "ondragdrop",
8178 "onerror", "onfocus", "onkeydown", "onkeypress", "onkeyup", "onload",
8179 "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmove",
8180 "onreset", "onresize", "onselec", "onsubmit", "onunload"
8183 if((attr[0] == 'o' || attr[0] == 'O') && (attr[1] == 'n' || attr[1] == 'N'))
8184 for(i = 0; i < sizeof(events)/sizeof(events[0]); i++)
8185 if(!strucmp(attr, events[i]))
8186 return(TRUE);
8188 return(FALSE);
8192 void
8193 html_output_normal(FILTER_S *f, int ch, int width, int remaining)
8195 static int written = 0;
8196 static int cwidth;
8198 if(HD(f)->centered){
8199 html_centered_flush(f);
8200 fs_give((void **) &HD(f)->centered->line.buf);
8201 fs_give((void **) &HD(f)->centered->word.buf);
8202 fs_give((void **) &HD(f)->centered);
8205 if(HD(f)->wrapstate){
8206 if(ch == HTML_NEWLINE){ /* hard newline */
8207 html_output_flush(f);
8208 html_newline(f);
8210 else
8211 HD(f)->blanks = 0; /* reset blank line counter */
8213 if(ch == TAG_EMBED){ /* takes up no space */
8214 HD(f)->embedded.state = -5;
8215 HTML_LINEP_PUTC(f, TAG_EMBED);
8217 else if(HD(f)->embedded.state){ /* ditto */
8218 if(HD(f)->embedded.state == -5){
8219 /* looking for specially handled tags following TAG_EMBED */
8220 if(ch == TAG_HANDLE)
8221 HD(f)->embedded.state = -1; /* next ch is length */
8222 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8223 if(!HD(f)->color)
8224 HD(f)->color = new_color_pair(NULL, NULL);
8226 if(ch == TAG_FGCOLOR)
8227 HD(f)->embedded.color = HD(f)->color->fg;
8228 else
8229 HD(f)->embedded.color = HD(f)->color->bg;
8231 HD(f)->embedded.state = RGBLEN;
8233 else
8234 HD(f)->embedded.state = 0; /* non-special */
8236 else if(HD(f)->embedded.state > 0){
8237 /* collecting up an RGBLEN color or length, ignore tags */
8238 (HD(f)->embedded.state)--;
8239 if(HD(f)->embedded.color)
8240 *HD(f)->embedded.color++ = ch;
8242 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8243 *HD(f)->embedded.color = '\0';
8244 HD(f)->embedded.color = NULL;
8247 else if(HD(f)->embedded.state < 0){
8248 HD(f)->embedded.state = ch; /* number of embedded chars */
8250 else{
8251 (HD(f)->embedded.state)--;
8252 if(HD(f)->embedded.color)
8253 *HD(f)->embedded.color++ = ch;
8255 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8256 *HD(f)->embedded.color = '\0';
8257 HD(f)->embedded.color = NULL;
8261 HTML_LINEP_PUTC(f, ch);
8263 else if(HTML_ISSPACE(ch)){
8264 html_output_flush(f);
8266 else{
8267 if(HD(f)->prefix)
8268 html_a_prefix(f);
8270 if(written == 0)
8271 cwidth = width;
8273 if(f->f2 + cwidth + 1 >= WRAP_COLS(f)){
8274 HTML_LINEP_PUTC(f, ch & 0xff);
8275 written++;
8276 if(remaining == 0){
8277 HTML_FLUSH(f);
8278 html_newline(f);
8280 if(HD(f)->in_anchor)
8281 html_write_anchor(f, HD(f)->in_anchor);
8283 else{
8284 HTML_LINEP_PUTC(f, ch & 0xff);
8285 written++;
8288 if(remaining == 0){
8289 written = 0;
8290 f->f2 += cwidth;
8294 else{
8295 if(HD(f)->prefix)
8296 html_a_prefix(f);
8298 html_output_flush(f);
8300 switch(HD(f)->embedded.state){
8301 case 0 :
8302 switch(ch){
8303 default :
8305 * It's difficult to both preserve whitespace and wrap at the
8306 * same time so we'll do a dumb wrap at the edge of the screen.
8307 * Since this shouldn't come up much in real life we'll hope
8308 * it is good enough.
8310 if(!PASS_HTML(f) && (f->n + width) > WRAP_COLS(f))
8311 html_newline(f);
8313 f->n += width; /* inc displayed char count */
8314 HD(f)->blanks = 0; /* reset blank line counter */
8315 html_putc(f, ch & 0xff);
8316 break;
8318 case TAG_EMBED : /* takes up no space */
8319 html_putc(f, TAG_EMBED);
8320 HD(f)->embedded.state = -2;
8321 break;
8323 case HTML_NEWLINE : /* newline handling */
8324 if(!f->n)
8325 break;
8327 case '\n' :
8328 html_newline(f);
8330 case '\r' :
8331 break;
8334 break;
8336 case -2 :
8337 HD(f)->embedded.state = 0;
8338 switch(ch){
8339 case TAG_HANDLE :
8340 HD(f)->embedded.state = -1; /* next ch is length */
8341 break;
8343 case TAG_BOLDON :
8344 BOLD_BIT(f) = 1;
8345 break;
8347 case TAG_BOLDOFF :
8348 BOLD_BIT(f) = 0;
8349 break;
8351 case TAG_ULINEON :
8352 ULINE_BIT(f) = 1;
8353 break;
8355 case TAG_ULINEOFF :
8356 ULINE_BIT(f) = 0;
8357 break;
8359 case TAG_FGCOLOR :
8360 if(!HD(f)->color)
8361 HD(f)->color = new_color_pair(NULL, NULL);
8363 HD(f)->embedded.color = HD(f)->color->fg;
8364 HD(f)->embedded.state = 11;
8365 break;
8367 case TAG_BGCOLOR :
8368 if(!HD(f)->color)
8369 HD(f)->color = new_color_pair(NULL, NULL);
8371 HD(f)->embedded.color = HD(f)->color->bg;
8372 HD(f)->embedded.state = 11;
8373 break;
8375 case TAG_HANDLEOFF :
8376 ch = TAG_INVOFF;
8377 HD(f)->in_anchor = 0;
8378 break;
8380 default :
8381 break;
8384 html_putc(f, ch);
8385 break;
8387 case -1 :
8388 HD(f)->embedded.state = ch; /* number of embedded chars */
8389 html_putc(f, ch);
8390 break;
8392 default :
8393 HD(f)->embedded.state--;
8394 if(HD(f)->embedded.color)
8395 *HD(f)->embedded.color++ = ch;
8397 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8398 *HD(f)->embedded.color = '\0';
8399 HD(f)->embedded.color = NULL;
8402 html_putc(f, ch);
8403 break;
8410 * flush any buffered chars waiting for wrapping.
8412 void
8413 html_output_flush(FILTER_S *f)
8415 if(f->f2){
8416 if(f->n && ((int) f->n) + 1 + f->f2 > HD(f)->wrapcol)
8417 html_newline(f); /* wrap? */
8419 if(f->n){ /* text already on the line? */
8420 html_putc(f, ' ');
8421 f->n++; /* increment count */
8423 else{
8424 /* write at start of new line */
8425 html_write_indent(f, HD(f)->indent_level);
8427 if(HD(f)->in_anchor)
8428 html_write_anchor(f, HD(f)->in_anchor);
8431 f->n += f->f2;
8432 HTML_FLUSH(f);
8439 * html_output_centered - managed writing centered text
8441 void
8442 html_output_centered(FILTER_S *f, int ch, int width, int remaining)
8444 static int written;
8445 static int cwidth;
8447 if(!HD(f)->centered){ /* new text? */
8448 html_output_flush(f);
8449 if(f->n) /* start on blank line */
8450 html_newline(f);
8452 HD(f)->centered = (CENTER_S *) fs_get(sizeof(CENTER_S));
8453 memset(HD(f)->centered, 0, sizeof(CENTER_S));
8454 /* and grab a buf to start collecting centered text */
8455 HD(f)->centered->line.len = WRAP_COLS(f);
8456 HD(f)->centered->line.buf = (char *) fs_get(HD(f)->centered->line.len
8457 * sizeof(char));
8458 HD(f)->centered->line.used = HD(f)->centered->line.width = 0;
8459 HD(f)->centered->word.len = 32;
8460 HD(f)->centered->word.buf = (char *) fs_get(HD(f)->centered->word.len
8461 * sizeof(char));
8462 HD(f)->centered->word.used = HD(f)->centered->word.width = 0;
8465 if(ch == HTML_NEWLINE){ /* hard newline */
8466 html_centered_flush(f);
8468 else if(ch == TAG_EMBED){ /* takes up no space */
8469 HD(f)->embedded.state = -5;
8470 html_centered_putc(&HD(f)->centered->word, TAG_EMBED);
8472 else if(HD(f)->embedded.state){
8473 if(HD(f)->embedded.state == -5){
8474 /* looking for specially handled tags following TAG_EMBED */
8475 if(ch == TAG_HANDLE)
8476 HD(f)->embedded.state = -1; /* next ch is length */
8477 else if(ch == TAG_FGCOLOR || ch == TAG_BGCOLOR){
8478 if(!HD(f)->color)
8479 HD(f)->color = new_color_pair(NULL, NULL);
8481 if(ch == TAG_FGCOLOR)
8482 HD(f)->embedded.color = HD(f)->color->fg;
8483 else
8484 HD(f)->embedded.color = HD(f)->color->bg;
8486 HD(f)->embedded.state = RGBLEN;
8488 else
8489 HD(f)->embedded.state = 0; /* non-special */
8491 else if(HD(f)->embedded.state > 0){
8492 /* collecting up an RGBLEN color or length, ignore tags */
8493 (HD(f)->embedded.state)--;
8494 if(HD(f)->embedded.color)
8495 *HD(f)->embedded.color++ = ch;
8497 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8498 *HD(f)->embedded.color = '\0';
8499 HD(f)->embedded.color = NULL;
8502 else if(HD(f)->embedded.state < 0){
8503 HD(f)->embedded.state = ch; /* number of embedded chars */
8505 else{
8506 (HD(f)->embedded.state)--;
8507 if(HD(f)->embedded.color)
8508 *HD(f)->embedded.color++ = ch;
8510 if(HD(f)->embedded.state == 0 && HD(f)->embedded.color){
8511 *HD(f)->embedded.color = '\0';
8512 HD(f)->embedded.color = NULL;
8516 html_centered_putc(&HD(f)->centered->word, ch);
8518 else if(ASCII_ISSPACE((unsigned char) ch)){
8519 if(!HD(f)->centered->space++){ /* end of a word? flush! */
8520 int i;
8522 if(WRAPPED_LEN(f) > HD(f)->wrapcol){
8523 html_centered_flush_line(f);
8524 /* fall thru to put current "word" on blank "line" */
8526 else if(HD(f)->centered->line.width){
8527 /* put space char between line and appended word */
8528 html_centered_putc(&HD(f)->centered->line, ' ');
8529 HD(f)->centered->line.width++;
8532 for(i = 0; i < HD(f)->centered->word.used; i++)
8533 html_centered_putc(&HD(f)->centered->line,
8534 HD(f)->centered->word.buf[i]);
8536 HD(f)->centered->line.width += HD(f)->centered->word.width;
8537 HD(f)->centered->word.used = 0;
8538 HD(f)->centered->word.width = 0;
8541 else{
8542 if(HD(f)->prefix)
8543 html_a_prefix(f);
8545 /* ch is start of next word */
8546 HD(f)->centered->space = 0;
8547 if(HD(f)->centered->word.width >= WRAP_COLS(f))
8548 html_centered_flush(f);
8550 html_centered_putc(&HD(f)->centered->word, ch);
8552 if(written == 0)
8553 cwidth = width;
8555 written++;
8557 if(remaining == 0){
8558 written = 0;
8559 HD(f)->centered->word.width += cwidth;
8566 * html_centered_putc -- add given char to given WRAPLINE_S
8568 void
8569 html_centered_putc(WRAPLINE_S *wp, int ch)
8571 if(wp->used + 1 >= wp->len){
8572 wp->len += 64;
8573 fs_resize((void **) &wp->buf, wp->len * sizeof(char));
8576 wp->buf[wp->used++] = ch;
8582 * html_centered_flush - finish writing any pending centered output
8584 void
8585 html_centered_flush(FILTER_S *f)
8587 int i;
8590 * If word present (what about line?) we need to deal with
8591 * appending it...
8593 if(HD(f)->centered->word.width && WRAPPED_LEN(f) > HD(f)->wrapcol)
8594 html_centered_flush_line(f);
8596 if(WRAPPED_LEN(f)){
8597 /* figure out how much to indent */
8598 if((i = (WRAP_COLS(f) - WRAPPED_LEN(f))/2) > 0)
8599 html_write_indent(f, i);
8601 if(HD(f)->centered->anchor)
8602 html_write_anchor(f, HD(f)->centered->anchor);
8604 html_centered_handle(&HD(f)->centered->anchor,
8605 HD(f)->centered->line.buf,
8606 HD(f)->centered->line.used);
8607 html_write(f, HD(f)->centered->line.buf, HD(f)->centered->line.used);
8609 if(HD(f)->centered->word.used){
8610 if(HD(f)->centered->line.width)
8611 html_putc(f, ' ');
8613 html_centered_handle(&HD(f)->centered->anchor,
8614 HD(f)->centered->word.buf,
8615 HD(f)->centered->word.used);
8616 html_write(f, HD(f)->centered->word.buf,
8617 HD(f)->centered->word.used);
8620 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8621 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8623 else{
8624 if(HD(f)->centered->word.used){
8625 html_write(f, HD(f)->centered->word.buf,
8626 HD(f)->centered->word.used);
8627 HD(f)->centered->line.used = HD(f)->centered->word.used = 0;
8628 HD(f)->centered->line.width = HD(f)->centered->word.width = 0;
8630 HD(f)->blanks++; /* advance the blank line counter */
8633 html_newline(f); /* finish the line */
8638 * html_centered_handle - scan the line for embedded handles
8640 void
8641 html_centered_handle(int *h, char *line, int len)
8643 int n;
8645 while(len-- > 0)
8646 if(*line++ == TAG_EMBED && len-- > 0)
8647 switch(*line++){
8648 case TAG_HANDLE :
8649 if((n = *line++) >= --len){
8650 *h = 0;
8651 len -= n;
8652 while(n--)
8653 *h = (*h * 10) + (*line++ - '0');
8655 break;
8657 case TAG_HANDLEOFF :
8658 case TAG_INVOFF :
8659 *h = 0; /* assumption 23,342: inverse off ends tags */
8660 break;
8662 default :
8663 break;
8670 * html_centered_flush_line - flush the centered "line" only
8672 void
8673 html_centered_flush_line(FILTER_S *f)
8675 if(HD(f)->centered->line.used){
8676 int i, j;
8678 /* hide "word" from flush */
8679 i = HD(f)->centered->word.used;
8680 j = HD(f)->centered->word.width;
8681 HD(f)->centered->word.used = 0;
8682 HD(f)->centered->word.width = 0;
8683 html_centered_flush(f);
8685 HD(f)->centered->word.used = i;
8686 HD(f)->centered->word.width = j;
8692 * html_write_indent - write indention mindful of display attributes
8694 void
8695 html_write_indent(FILTER_S *f, int indent)
8697 if(! STRIP(f)){
8698 if(BOLD_BIT(f)){
8699 html_putc(f, TAG_EMBED);
8700 html_putc(f, TAG_BOLDOFF);
8703 if(ULINE_BIT(f)){
8704 html_putc(f, TAG_EMBED);
8705 html_putc(f, TAG_ULINEOFF);
8709 f->n = indent;
8710 while(indent-- > 0)
8711 html_putc(f, ' '); /* indent as needed */
8714 * Resume any previous embedded state
8716 if(! STRIP(f)){
8717 if(BOLD_BIT(f)){
8718 html_putc(f, TAG_EMBED);
8719 html_putc(f, TAG_BOLDON);
8722 if(ULINE_BIT(f)){
8723 html_putc(f, TAG_EMBED);
8724 html_putc(f, TAG_ULINEON);
8733 void
8734 html_write_anchor(FILTER_S *f, int anchor)
8736 char buf[256];
8737 int i;
8739 html_putc(f, TAG_EMBED);
8740 html_putc(f, TAG_HANDLE);
8741 snprintf(buf, sizeof(buf), "%d", anchor);
8742 html_putc(f, (int) strlen(buf));
8744 for(i = 0; buf[i]; i++)
8745 html_putc(f, buf[i]);
8750 * html_write_newline - write a newline mindful of display attributes
8752 void
8753 html_write_newline(FILTER_S *f)
8755 int i;
8757 if(! STRIP(f)){ /* First tie, off any embedded state */
8758 if(HD(f)->in_anchor){
8759 html_putc(f, TAG_EMBED);
8760 html_putc(f, TAG_INVOFF);
8763 if(BOLD_BIT(f)){
8764 html_putc(f, TAG_EMBED);
8765 html_putc(f, TAG_BOLDOFF);
8768 if(ULINE_BIT(f)){
8769 html_putc(f, TAG_EMBED);
8770 html_putc(f, TAG_ULINEOFF);
8773 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8774 char *p;
8775 int i;
8777 p = color_embed(ps_global->VAR_NORM_FORE_COLOR,
8778 ps_global->VAR_NORM_BACK_COLOR);
8779 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8780 html_putc(f, p[i]);
8784 html_write(f, "\015\012", 2);
8785 for(i = HTML_INDENT(f); i > 0; i--)
8786 html_putc(f, ' ');
8788 if(! STRIP(f)){ /* First tie, off any embedded state */
8789 if(BOLD_BIT(f)){
8790 html_putc(f, TAG_EMBED);
8791 html_putc(f, TAG_BOLDON);
8794 if(ULINE_BIT(f)){
8795 html_putc(f, TAG_EMBED);
8796 html_putc(f, TAG_ULINEON);
8799 if(HD(f)->color && (HD(f)->color->fg[0] || HD(f)->color->bg[0])){
8800 char *p, *tfg, *tbg;
8801 int i;
8802 COLOR_PAIR *tmp;
8804 tfg = HD(f)->color->fg;
8805 tbg = HD(f)->color->bg;
8806 tmp = new_color_pair(tfg[0] ? tfg
8807 : color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR),
8808 tbg[0] ? tbg
8809 : color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR));
8810 if(pico_is_good_colorpair(tmp)){
8811 p = color_embed(tfg[0] ? tfg
8812 : ps_global->VAR_NORM_FORE_COLOR,
8813 tbg[0] ? tbg
8814 : ps_global->VAR_NORM_BACK_COLOR);
8815 for(i = 0; i < 2 * (RGBLEN + 2); i++)
8816 html_putc(f, p[i]);
8819 if(tmp)
8820 free_color_pair(&tmp);
8827 * html_write - write given n-length string to next filter
8829 void
8830 html_write(FILTER_S *f, char *s, int n)
8832 GF_INIT(f, f->next);
8834 while(n-- > 0){
8835 /* keep track of attribute state? Not if last char! */
8836 if(!STRIP(f) && *s == TAG_EMBED && n-- > 0){
8837 GF_PUTC(f->next, TAG_EMBED);
8838 switch(*++s){
8839 case TAG_BOLDON :
8840 BOLD_BIT(f) = 1;
8841 break;
8842 case TAG_BOLDOFF :
8843 BOLD_BIT(f) = 0;
8844 break;
8845 case TAG_ULINEON :
8846 ULINE_BIT(f) = 1;
8847 break;
8848 case TAG_ULINEOFF :
8849 ULINE_BIT(f) = 0;
8850 break;
8851 case TAG_HANDLEOFF :
8852 HD(f)->in_anchor = 0;
8853 GF_PUTC(f->next, TAG_INVOFF);
8854 s++;
8855 continue;
8856 case TAG_HANDLE :
8857 if(n-- > 0){
8858 int i = *++s;
8860 GF_PUTC(f->next, TAG_HANDLE);
8861 if(i <= n){
8862 int anum = 0;
8863 HANDLE_S *h;
8865 n -= i;
8866 GF_PUTC(f->next, i);
8867 while(1){
8868 anum = (anum * 10) + (*++s - '0');
8869 if(--i)
8870 GF_PUTC(f->next, *s);
8871 else
8872 break;
8875 if(DO_HANDLES(f)
8876 && (h = get_handle(*HANDLESP(f), anum)) != NULL
8877 && (h->type == URL || h->type == Attach)){
8878 HD(f)->in_anchor = anum;
8883 break;
8884 default:
8885 break;
8889 GF_PUTC(f->next, (*s++) & 0xff);
8892 GF_IP_END(f->next); /* clean up next's input pointers */
8897 * html_putc -- actual work of writing to next filter.
8898 * NOTE: Small opt not using full GF_END since our input
8899 * pointers don't need adjusting.
8901 void
8902 html_putc(FILTER_S *f, int ch)
8904 GF_INIT(f, f->next);
8905 GF_PUTC(f->next, ch & 0xff);
8906 GF_IP_END(f->next); /* clean up next's input pointers */
8912 * Only current option is to turn on embedded data stripping for text
8913 * bound to a printer or composer.
8915 void *
8916 gf_html2plain_opt(char *base,
8917 int columns,
8918 int *margin,
8919 HANDLE_S **handlesp,
8920 htmlrisk_t risk_f,
8921 int flags)
8923 HTML_OPT_S *op;
8924 int margin_l, margin_r;
8926 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8928 op->base = cpystr(base);
8929 margin_l = (margin) ? margin[0] : 0;
8930 margin_r = (margin) ? margin[1] : 0;
8931 op->indent = margin_l;
8932 op->columns = columns - (margin_l + margin_r);
8933 op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED);
8934 op->handlesp = handlesp;
8935 op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES);
8936 op->showserver = ((flags & GFHP_SHOW_SERVER) == GFHP_SHOW_SERVER);
8937 op->warnrisk_f = risk_f;
8938 op->no_relative_links = ((flags & GFHP_NO_RELATIVE) == GFHP_NO_RELATIVE);
8939 op->related_content = ((flags & GFHP_RELATED_CONTENT) == GFHP_RELATED_CONTENT);
8940 op->html = ((flags & GFHP_HTML) == GFHP_HTML);
8941 op->html_imgs = ((flags & GFHP_HTML_IMAGES) == GFHP_HTML_IMAGES);
8942 op->element_table = html_element_table;
8943 return((void *) op);
8947 void *
8948 gf_html2plain_rss_opt(RSS_FEED_S **feedp, int flags)
8950 HTML_OPT_S *op;
8952 op = (HTML_OPT_S *) fs_get(sizeof(HTML_OPT_S));
8953 memset(op, 0, sizeof(HTML_OPT_S));
8955 op->base = cpystr("");
8956 op->element_table = rss_element_table;
8957 *(op->feedp = feedp) = NULL;
8958 return((void *) op);
8961 void
8962 gf_html2plain_rss_free(RSS_FEED_S **feedp)
8964 if(feedp && *feedp){
8965 if((*feedp)->title)
8966 fs_give((void **) &(*feedp)->title);
8968 if((*feedp)->link)
8969 fs_give((void **) &(*feedp)->link);
8971 if((*feedp)->description)
8972 fs_give((void **) &(*feedp)->description);
8974 if((*feedp)->source)
8975 fs_give((void **) &(*feedp)->source);
8977 if((*feedp)->image)
8978 fs_give((void **) &(*feedp)->image);
8980 gf_html2plain_rss_free_items(&((*feedp)->items));
8981 fs_give((void **) feedp);
8985 void
8986 gf_html2plain_rss_free_items(RSS_ITEM_S **itemp)
8988 if(itemp && *itemp){
8989 if((*itemp)->title)
8990 fs_give((void **) &(*itemp)->title);
8992 if((*itemp)->link)
8993 fs_give((void **) &(*itemp)->link);
8995 if((*itemp)->description)
8996 fs_give((void **) &(*itemp)->description);
8998 if((*itemp)->source)
8999 fs_give((void **) &(*itemp)->source);
9001 gf_html2plain_rss_free_items(&(*itemp)->next);
9002 fs_give((void **) itemp);
9007 /* END OF HTML-TO-PLAIN text filter */
9010 * ESCAPE CODE FILTER - remove unknown and possibly dangerous escape codes
9011 * from the text stream.
9014 #define MAX_ESC_LEN 5
9017 * the simple filter, removes unknown escape codes from the stream
9019 void
9020 gf_escape_filter(FILTER_S *f, int flg)
9022 register char *p;
9023 GF_INIT(f, f->next);
9025 if(flg == GF_DATA){
9026 register unsigned char c;
9027 register int state = f->f1;
9029 while(GF_GETC(f, c)){
9031 if(state){
9032 if(c == '\033' || f->n == MAX_ESC_LEN){
9033 f->line[f->n] = '\0';
9034 f->n = 0L;
9035 if(!match_escapes(f->line)){
9036 GF_PUTC(f->next, '^');
9037 GF_PUTC(f->next, '[');
9039 else
9040 GF_PUTC(f->next, '\033');
9042 p = f->line;
9043 while(*p)
9044 GF_PUTC(f->next, *p++);
9046 if(c == '\033')
9047 continue;
9048 else
9049 state = 0; /* fall thru */
9051 else{
9052 f->line[f->n++] = c; /* collect */
9053 continue;
9057 if(c == '\033')
9058 state = 1;
9059 else
9060 GF_PUTC(f->next, c);
9063 f->f1 = state;
9064 GF_END(f, f->next);
9066 else if(flg == GF_EOD){
9067 if(f->f1){
9068 if(!match_escapes(f->line)){
9069 GF_PUTC(f->next, '^');
9070 GF_PUTC(f->next, '[');
9072 else
9073 GF_PUTC(f->next, '\033');
9076 for(p = f->line; f->n; f->n--, p++)
9077 GF_PUTC(f->next, *p);
9079 fs_give((void **)&(f->line)); /* free temp line buffer */
9080 (void) GF_FLUSH(f->next);
9081 (*f->next->f)(f->next, GF_EOD);
9083 else if(flg == GF_RESET){
9084 dprint((9, "-- gf_reset escape\n"));
9085 f->f1 = 0;
9086 f->n = 0L;
9087 f->linep = f->line = (char *)fs_get((MAX_ESC_LEN + 1) * sizeof(char));
9094 * CONTROL CHARACTER FILTER - transmogrify control characters into their
9095 * corresponding string representations (you know, ^blah and such)...
9099 * the simple filter transforms unknown control characters in the stream
9100 * into harmless strings.
9102 void
9103 gf_control_filter(FILTER_S *f, int flg)
9105 GF_INIT(f, f->next);
9107 if(flg == GF_DATA){
9108 register unsigned char c;
9109 register int filt_only_c0;
9111 filt_only_c0 = f->opt ? (*(int *) f->opt) : 0;
9113 while(GF_GETC(f, c)){
9115 if(((c < 0x20 || c == 0x7f)
9116 || (c >= 0x80 && c < 0xA0 && !filt_only_c0))
9117 && !(ASCII_ISSPACE((unsigned char) c)
9118 || c == '\016' || c == '\017' || c == '\033')){
9119 GF_PUTC(f->next, c >= 0x80 ? '~' : '^');
9120 GF_PUTC(f->next, (c == 0x7f) ? '?' : (c & 0x1f) + '@');
9122 else
9123 GF_PUTC(f->next, c);
9126 GF_END(f, f->next);
9128 else if(flg == GF_EOD){
9129 (void) GF_FLUSH(f->next);
9130 (*f->next->f)(f->next, GF_EOD);
9136 * function called from the outside to set
9137 * control filter's option, which says to filter C0 control characters
9138 * but not C1 control chars. We don't call it at all if we don't want
9139 * to filter C0 chars either.
9141 void *
9142 gf_control_filter_opt(int *filt_only_c0)
9144 return((void *) filt_only_c0);
9149 * TAG FILTER - quote all TAG_EMBED characters by doubling them.
9150 * This prevents the possibility of embedding other tags.
9151 * We assume that this filter should only be used for something
9152 * that is eventually writing to a display, which has the special
9153 * knowledge of quoted TAG_EMBEDs.
9155 void
9156 gf_tag_filter(FILTER_S *f, int flg)
9158 GF_INIT(f, f->next);
9160 if(flg == GF_DATA){
9161 register unsigned char c;
9163 while(GF_GETC(f, c)){
9165 if((c & 0xff) == (TAG_EMBED & 0xff)){
9166 GF_PUTC(f->next, TAG_EMBED);
9167 GF_PUTC(f->next, c);
9169 else
9170 GF_PUTC(f->next, c);
9173 GF_END(f, f->next);
9175 else if(flg == GF_EOD){
9176 (void) GF_FLUSH(f->next);
9177 (*f->next->f)(f->next, GF_EOD);
9183 * LINEWRAP FILTER - insert CRLF's at end of nearest whitespace before
9184 * specified line width
9188 typedef struct wrap_col_s {
9189 unsigned bold:1;
9190 unsigned uline:1;
9191 unsigned inverse:1;
9192 unsigned tags:1;
9193 unsigned do_indent:1;
9194 unsigned on_comma:1;
9195 unsigned flowed:1;
9196 unsigned delsp:1;
9197 unsigned quoted:1;
9198 unsigned allwsp:1;
9199 unsigned hard_nl:1;
9200 unsigned leave_flowed:1;
9201 unsigned use_color:1;
9202 unsigned hdr_color:1;
9203 unsigned for_compose:1;
9204 unsigned handle_soft_hyphen:1;
9205 unsigned saw_soft_hyphen:1;
9206 unsigned trailing_space:1;
9207 unsigned char utf8buf[7];
9208 unsigned char *utf8bufp;
9209 COLOR_PAIR *color;
9210 STORE_S *spaces;
9211 short embedded,
9212 space_len;
9213 char *lineendp;
9214 int anchor,
9215 prefbrk,
9216 prefbrkn,
9217 quote_depth,
9218 quote_count,
9219 sig,
9220 state,
9221 wrap_col,
9222 wrap_max,
9223 margin_l,
9224 margin_r,
9225 indent;
9226 char special[256];
9227 } WRAP_S;
9229 #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l)
9230 #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r)
9231 #define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9232 #define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0))
9233 #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent)
9234 #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent)
9235 #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma)
9236 #define WRAP_FLOW(F) (((WRAP_S *)(F)->opt)->flowed)
9237 #define WRAP_DELSP(F) (((WRAP_S *)(F)->opt)->delsp)
9238 #define WRAP_FL_QD(F) (((WRAP_S *)(F)->opt)->quote_depth)
9239 #define WRAP_FL_QC(F) (((WRAP_S *)(F)->opt)->quote_count)
9240 #define WRAP_FL_SIG(F) (((WRAP_S *)(F)->opt)->sig)
9241 #define WRAP_HARD(F) (((WRAP_S *)(F)->opt)->hard_nl)
9242 #define WRAP_LV_FLD(F) (((WRAP_S *)(F)->opt)->leave_flowed)
9243 #define WRAP_USE_CLR(F) (((WRAP_S *)(F)->opt)->use_color)
9244 #define WRAP_HDR_CLR(F) (((WRAP_S *)(F)->opt)->hdr_color)
9245 #define WRAP_FOR_CMPS(F) (((WRAP_S *)(F)->opt)->for_compose)
9246 #define WRAP_HANDLE_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->handle_soft_hyphen)
9247 #define WRAP_SAW_SOFT_HYPHEN(F) (((WRAP_S *)(F)->opt)->saw_soft_hyphen)
9248 #define WRAP_UTF8BUF(F, C) (((WRAP_S *)(F)->opt)->utf8buf[C])
9249 #define WRAP_UTF8BUFP(F) (((WRAP_S *)(F)->opt)->utf8bufp)
9250 #define WRAP_STATE(F) (((WRAP_S *)(F)->opt)->state)
9251 #define WRAP_QUOTED(F) (((WRAP_S *)(F)->opt)->quoted)
9252 #define WRAP_TAGS(F) (((WRAP_S *)(F)->opt)->tags)
9253 #define WRAP_BOLD(F) (((WRAP_S *)(F)->opt)->bold)
9254 #define WRAP_ULINE(F) (((WRAP_S *)(F)->opt)->uline)
9255 #define WRAP_INVERSE(F) (((WRAP_S *)(F)->opt)->inverse)
9256 #define WRAP_LASTC(F) (((WRAP_S *)(F)->opt)->lineendp)
9257 #define WRAP_EMBED(F) (((WRAP_S *)(F)->opt)->embedded)
9258 #define WRAP_ANCHOR(F) (((WRAP_S *)(F)->opt)->anchor)
9259 #define WRAP_PB_OFF(F) (((WRAP_S *)(F)->opt)->prefbrk)
9260 #define WRAP_PB_LEN(F) (((WRAP_S *)(F)->opt)->prefbrkn)
9261 #define WRAP_ALLWSP(F) (((WRAP_S *)(F)->opt)->allwsp)
9262 #define WRAP_SPC_LEN(F) (((WRAP_S *)(F)->opt)->space_len)
9263 #define WRAP_TRL_SPC(F) (((WRAP_S *)(F)->opt)->trailing_space)
9264 #define WRAP_SPEC(F, C) ((WRAP_S *) (F)->opt)->special[C]
9265 #define WRAP_COLOR(F) (((WRAP_S *)(F)->opt)->color)
9266 #define WRAP_COLOR_SET(F) ((WRAP_COLOR(F)) && (WRAP_COLOR(F)->fg[0]))
9267 #define WRAP_SPACES(F) (((WRAP_S *)(F)->opt)->spaces)
9268 #define WRAP_PUTC(F,C,W) { \
9269 if((F)->linep == WRAP_LASTC(F)){ \
9270 size_t offset = (F)->linep - (F)->line; \
9271 fs_resize((void **) &(F)->line, \
9272 (2 * offset) * sizeof(char)); \
9273 (F)->linep = &(F)->line[offset]; \
9274 WRAP_LASTC(F) = &(F)->line[2*offset-1]; \
9276 *(F)->linep++ = (C); \
9277 (F)->f2 += (W); \
9280 #define WRAP_EMBED_PUTC(F,C) { \
9281 if((F)->f2){ \
9282 WRAP_PUTC((F), C, 0); \
9284 else \
9285 so_writec(C, WRAP_SPACES(F)); \
9288 #define WRAP_COLOR_UNSET(F) { \
9289 if(WRAP_COLOR_SET(F)){ \
9290 WRAP_COLOR(F)->fg[0] = '\0'; \
9295 * wrap_flush_embed flags
9297 #define WFE_NONE 0 /* Nothing special */
9298 #define WFE_CNT_HANDLE 1 /* account for/don't write handles */
9301 int wrap_flush(FILTER_S *, unsigned char **, unsigned char **, unsigned char **, unsigned char **);
9302 int wrap_flush_embed(FILTER_S *, unsigned char **, unsigned char **,
9303 unsigned char **, unsigned char **);
9304 int wrap_flush_s(FILTER_S *,char *, int, int, unsigned char **, unsigned char **,
9305 unsigned char **, unsigned char **, int);
9306 int wrap_eol(FILTER_S *, int, unsigned char **, unsigned char **,
9307 unsigned char **, unsigned char **);
9308 int wrap_bol(FILTER_S *, int, int, unsigned char **,
9309 unsigned char **, unsigned char **, unsigned char **);
9310 int wrap_quote_insert(FILTER_S *, unsigned char **, unsigned char **,
9311 unsigned char **, unsigned char **);
9314 * the no longer simple filter, breaks lines at end of white space nearest
9315 * to global "gf_wrap_width" in length
9316 * It also supports margins, indents (inverse indenting, really) and
9317 * flowed text (ala RFC 3676)
9320 void
9321 gf_wrap(FILTER_S *f, int flg)
9323 register long i;
9324 GF_INIT(f, f->next);
9327 * f->f1 state
9328 * f->line buffer where next "word" being considered is stored
9329 * f->f2 width in screen cells of f->line stuff
9330 * f->n width in screen cells of the part of this line committed to next
9331 * filter so far
9334 if(flg == GF_DATA){
9335 register unsigned char c;
9336 register int state = f->f1;
9337 int width, full_character;
9339 while(GF_GETC(f, c)){
9341 switch(state){
9342 case CCR : /* CRLF or CR in text ? */
9343 state = BOL; /* either way, handle start */
9345 if(WRAP_FLOW(f)){
9346 /* wrapped line? */
9347 if(f->f2 == 0 && WRAP_SPC_LEN(f) && WRAP_TRL_SPC(f)){
9349 * whack trailing space char, but be aware
9350 * of embeds in space buffer. grok them just
9351 * in case they contain a 0x20 value
9353 if(WRAP_DELSP(f)){
9354 char *sb, *sbp, *scp = NULL;
9355 int x;
9357 for(sb = sbp = (char *)so_text(WRAP_SPACES(f)); *sbp; sbp++){
9358 switch(*sbp){
9359 case ' ' :
9360 scp = sbp;
9361 break;
9363 case TAG_EMBED :
9364 sbp++;
9365 switch (*sbp++){
9366 case TAG_HANDLE :
9367 x = (int) *sbp++;
9368 if(strlen(sbp) >= x)
9369 sbp += (x - 1);
9371 break;
9373 case TAG_FGCOLOR :
9374 case TAG_BGCOLOR :
9375 if(strlen(sbp) >= RGBLEN)
9376 sbp += (RGBLEN - 1);
9378 break;
9380 default :
9381 break;
9384 break;
9386 default :
9387 break;
9391 /* replace space buf without trailing space char */
9392 if(scp){
9393 STORE_S *ns = so_get(CharStar, NULL, EDIT_ACCESS);
9395 *scp++ = '\0';
9396 WRAP_SPC_LEN(f)--;
9397 WRAP_TRL_SPC(f) = 0;
9399 so_puts(ns, sb);
9400 so_puts(ns, scp);
9402 so_give(&WRAP_SPACES(f));
9403 WRAP_SPACES(f) = ns;
9407 else{ /* fixed line */
9408 WRAP_HARD(f) = 1;
9409 wrap_flush(f, &ip, &eib, &op, &eob);
9410 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9413 * When we get to a real end of line, we don't need to
9414 * remember what the special color was anymore because
9415 * we aren't going to be changing back to it. We unset it
9416 * so that we don't keep resetting the color to normal.
9418 WRAP_COLOR_UNSET(f);
9421 if(c == '\012'){ /* get c following LF */
9422 break;
9424 /* else c is first char of new line, fall thru */
9426 else{
9427 wrap_flush(f, &ip, &eib, &op, &eob);
9428 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9429 WRAP_COLOR_UNSET(f); /* see note above */
9430 if(c == '\012'){
9431 break;
9433 /* else fall thru to deal with beginning of line */
9436 case BOL :
9437 if(WRAP_FLOW(f)){
9438 if(c == '>'){
9439 WRAP_FL_QC(f) = 1; /* init it */
9440 state = FL_QLEV; /* go collect it */
9442 else {
9443 /* if EMBEDed, process it and return here */
9444 if(c == (unsigned char) TAG_EMBED){
9445 WRAP_EMBED_PUTC(f, TAG_EMBED);
9446 WRAP_STATE(f) = state;
9447 state = TAG;
9448 continue;
9451 /* quote level change implies new paragraph */
9452 if(WRAP_FL_QD(f)){
9453 WRAP_FL_QD(f) = 0;
9454 if(WRAP_HARD(f) == 0){
9455 WRAP_HARD(f) = 1;
9456 wrap_flush(f, &ip, &eib, &op, &eob);
9457 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9458 WRAP_COLOR_UNSET(f); /* see note above */
9462 if(WRAP_HARD(f)){
9463 wrap_bol(f, 0, 1, &ip, &eib, &op,
9464 &eob); /* write quoting prefix */
9465 WRAP_HARD(f) = 0;
9468 switch (c) {
9469 case '\015' : /* a blank line? */
9470 wrap_flush(f, &ip, &eib, &op, &eob);
9471 state = CCR; /* go collect it */
9472 break;
9474 case ' ' : /* space stuffed */
9475 state = FL_STF; /* just eat it */
9476 break;
9478 case '-' : /* possible sig-dash */
9479 WRAP_FL_SIG(f) = 1; /* init state */
9480 state = FL_SIG; /* go collect it */
9481 break;
9483 default :
9484 state = DFL; /* go back to normal */
9485 goto case_dfl; /* handle c like DFL case */
9489 else{
9490 state = DFL;
9491 if(WRAP_COMMA(f) && c == TAB){
9492 wrap_bol(f, 1, 0, &ip, &eib, &op,
9493 &eob); /* convert to normal indent */
9494 break;
9497 wrap_bol(f,0,0, &ip, &eib, &op, &eob);
9498 goto case_dfl; /* handle c like DFL case */
9501 break;
9503 case FL_QLEV :
9504 if(c == '>'){ /* another level */
9505 WRAP_FL_QC(f)++;
9507 else {
9508 /* if EMBEDed, process it and return here */
9509 if(c == (unsigned char) TAG_EMBED){
9510 WRAP_EMBED_PUTC(f, TAG_EMBED);
9511 WRAP_STATE(f) = state;
9512 state = TAG;
9513 continue;
9516 /* quote level change signals new paragraph */
9517 if(WRAP_FL_QC(f) != WRAP_FL_QD(f)){
9518 WRAP_FL_QD(f) = WRAP_FL_QC(f);
9519 if(WRAP_HARD(f) == 0){ /* add hard newline */
9520 WRAP_HARD(f) = 1; /* hard newline */
9521 wrap_flush(f, &ip, &eib, &op, &eob);
9522 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9523 WRAP_COLOR_UNSET(f); /* see note above */
9527 if(WRAP_HARD(f)){
9528 wrap_bol(f,0,1, &ip, &eib, &op, &eob);
9529 WRAP_HARD(f) = 0;
9532 switch (c) {
9533 case '\015' : /* a blank line? */
9534 wrap_flush(f, &ip, &eib, &op, &eob);
9535 state = CCR; /* go collect it */
9536 break;
9538 case ' ' : /* space-stuffed! */
9539 state = FL_STF; /* just eat it */
9540 break;
9542 case '-' : /* sig dash? */
9543 WRAP_FL_SIG(f) = 1;
9544 state = FL_SIG;
9545 break;
9547 default : /* something else */
9548 state = DFL;
9549 goto case_dfl; /* handle c like DFL */
9553 break;
9555 case FL_STF : /* space stuffed */
9556 switch (c) {
9557 case '\015' : /* a blank line? */
9558 wrap_flush(f, &ip, &eib, &op, &eob);
9559 state = CCR; /* go collect it */
9560 break;
9562 case (unsigned char) TAG_EMBED : /* process TAG data */
9563 WRAP_EMBED_PUTC(f, TAG_EMBED);
9564 WRAP_STATE(f) = state; /* and return */
9565 state = TAG;
9566 continue;
9568 case '-' : /* sig dash? */
9569 WRAP_FL_SIG(f) = 1;
9570 WRAP_ALLWSP(f) = 0;
9571 state = FL_SIG;
9572 break;
9574 default : /* something else */
9575 state = DFL;
9576 goto case_dfl; /* handle c like DFL */
9579 break;
9581 case FL_SIG : /* sig-dash collector */
9582 switch (WRAP_FL_SIG(f)){ /* possible sig-dash? */
9583 case 1 :
9584 if(c != '-'){ /* not a sigdash */
9585 if((f->n + WRAP_SPC_LEN(f) + 1) > WRAP_COL(f)){
9586 wrap_flush_embed(f, &ip, &eib, &op,
9587 &eob); /* note any embedded*/
9588 wrap_eol(f, 1, &ip, &eib,
9589 &op, &eob); /* plunk down newline */
9590 wrap_bol(f, 1, 1, &ip, &eib,
9591 &op, &eob); /* write any prefix */
9594 WRAP_PUTC(f,'-', 1); /* write what we got */
9596 WRAP_FL_SIG(f) = 0;
9597 state = DFL;
9598 goto case_dfl;
9601 /* don't put anything yet until we know to wrap or not */
9602 WRAP_FL_SIG(f) = 2;
9603 break;
9605 case 2 :
9606 if(c != ' '){ /* not a sigdash */
9607 WRAP_PUTC(f, '-', 1);
9608 if((f->n + WRAP_SPC_LEN(f) + 2) > WRAP_COL(f)){
9609 wrap_flush_embed(f, &ip, &eib, &op,
9610 &eob); /* note any embedded*/
9611 wrap_eol(f, 1, &ip, &eib,
9612 &op, &eob); /* plunk down newline */
9613 wrap_bol(f, 1, 1, &ip, &eib, &op,
9614 &eob); /* write any prefix */
9617 WRAP_PUTC(f,'-', 1); /* write what we got */
9619 WRAP_FL_SIG(f) = 0;
9620 state = DFL;
9621 goto case_dfl;
9624 /* don't put anything yet until we know to wrap or not */
9625 WRAP_FL_SIG(f) = 3;
9626 break;
9628 case 3 :
9629 if(c == '\015'){ /* success! */
9630 /* known sigdash, newline if soft nl */
9631 if(WRAP_SPC_LEN(f)){
9632 wrap_flush(f, &ip, &eib, &op, &eob);
9633 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9634 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9636 WRAP_PUTC(f,'-',1);
9637 WRAP_PUTC(f,'-',1);
9638 WRAP_PUTC(f,' ',1);
9640 state = CCR;
9641 break;
9643 else{
9644 WRAP_FL_SIG(f) = 4; /* possible success */
9647 case 4 :
9648 switch(c){
9649 case (unsigned char) TAG_EMBED :
9651 * At this point we're almost 100% sure that we've got
9652 * a sigdash. Putc it (adding newline if previous
9653 * was a soft nl) so we get it the right color
9654 * before we store this new embedded stuff
9656 if(WRAP_SPC_LEN(f)){
9657 wrap_flush(f, &ip, &eib, &op, &eob);
9658 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9659 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9661 WRAP_PUTC(f,'-',1);
9662 WRAP_PUTC(f,'-',1);
9663 WRAP_PUTC(f,' ',1);
9665 WRAP_FL_SIG(f) = 5;
9666 break;
9668 case '\015' : /* success! */
9670 * We shouldn't get here, but in case we do, we have
9671 * not yet put the sigdash
9673 if(WRAP_SPC_LEN(f)){
9674 wrap_flush(f, &ip, &eib, &op, &eob);
9675 wrap_eol(f, 0, &ip, &eib, &op, &eob);
9676 wrap_bol(f, 0, 1, &ip, &eib, &op, &eob);
9678 WRAP_PUTC(f,'-',1);
9679 WRAP_PUTC(f,'-',1);
9680 WRAP_PUTC(f,' ',1);
9682 state = CCR;
9683 break;
9685 default : /* that's no sigdash! */
9686 /* write what we got but didn't put yet */
9687 WRAP_PUTC(f,'-', 1);
9688 WRAP_PUTC(f,'-', 1);
9689 WRAP_PUTC(f,' ', 1);
9691 WRAP_FL_SIG(f) = 0;
9692 wrap_flush(f, &ip, &eib, &op, &eob);
9693 WRAP_SPC_LEN(f) = 1;
9694 state = DFL; /* set normal state */
9695 goto case_dfl; /* and go do "c" */
9698 break;
9700 case 5 :
9701 WRAP_STATE(f) = FL_SIG; /* come back here */
9702 WRAP_FL_SIG(f) = 6; /* and seek EOL */
9703 WRAP_EMBED_PUTC(f, TAG_EMBED);
9704 state = TAG; /* process embed */
9705 goto case_tag;
9707 case 6 :
9709 * at this point we've already putc the sigdash in case 4
9711 switch(c){
9712 case (unsigned char) TAG_EMBED :
9713 WRAP_FL_SIG(f) = 5;
9714 break;
9716 case '\015' : /* success! */
9717 state = CCR;
9718 break;
9720 default : /* that's no sigdash! */
9722 * probably never reached (fake sigdash with embedded
9723 * stuff) but if this did get reached, then we
9724 * might have accidentally disobeyed a soft nl
9726 WRAP_FL_SIG(f) = 0;
9727 wrap_flush(f, &ip, &eib, &op, &eob);
9728 WRAP_SPC_LEN(f) = 1;
9729 state = DFL; /* set normal state */
9730 goto case_dfl; /* and go do "c" */
9733 break;
9736 default :
9737 dprint((2, "-- gf_wrap: BROKEN FLOW STATE: %d\n",
9738 WRAP_FL_SIG(f)));
9739 WRAP_FL_SIG(f) = 0;
9740 state = DFL; /* set normal state */
9741 goto case_dfl; /* and go process "c" */
9744 break;
9746 case_dfl :
9747 case DFL :
9749 * This was just if(WRAP_SPEC(f, c)) before the change to add
9750 * the == 0 test. This isn't quite right, either. We should really
9751 * be looking for special characters in the UCS characters, not
9752 * in the incoming stream of UTF-8. It is not right to
9753 * call this on bytes that are in the middle of a UTF-8 character,
9754 * hence the == 0 test which restricts it to the first byte
9755 * of a character. This isn't right, either, but it's closer.
9756 * Also change the definition of WRAP_SPEC so that isspace only
9757 * matches ascii characters, which will never be in the middle
9758 * of a UTF-8 multi-byte character.
9760 if((WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 0 && WRAP_SPEC(f, c)){
9761 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9762 switch(c){
9763 default :
9764 if(WRAP_QUOTED(f))
9765 break;
9767 if(f->f2){ /* any non-lwsp to flush? */
9768 if(WRAP_COMMA(f)){
9769 /* remember our second best break point */
9770 WRAP_PB_OFF(f) = f->linep - f->line;
9771 WRAP_PB_LEN(f) = f->f2;
9772 break;
9774 else
9775 wrap_flush(f, &ip, &eib, &op, &eob);
9778 switch(c){ /* remember separator */
9779 case ' ' :
9780 WRAP_SPC_LEN(f)++;
9781 WRAP_TRL_SPC(f) = 1;
9782 so_writec(' ',WRAP_SPACES(f));
9783 break;
9785 case TAB :
9787 int i = (int) f->n + WRAP_SPC_LEN(f);
9790 WRAP_SPC_LEN(f)++;
9791 while(++i & 0x07);
9793 so_writec(TAB,WRAP_SPACES(f));
9794 WRAP_TRL_SPC(f) = 0;
9797 break;
9799 default : /* some control char? */
9800 WRAP_SPC_LEN(f) += 2;
9801 WRAP_TRL_SPC(f) = 0;
9802 break;
9805 continue;
9807 case '\"' :
9808 WRAP_QUOTED(f) = !WRAP_QUOTED(f);
9809 break;
9811 case '\015' : /* already has newline? */
9812 state = CCR;
9813 continue;
9815 case '\012' : /* bare LF in text? */
9816 wrap_flush(f, &ip, &eib, &op, &eob); /* they must've */
9817 wrap_eol(f, 0, &ip, &eib, &op, &eob); /* meant */
9818 wrap_bol(f,1,1, &ip, &eib, &op, &eob); /* newline... */
9819 continue;
9821 case (unsigned char) TAG_EMBED :
9822 WRAP_EMBED_PUTC(f, TAG_EMBED);
9823 WRAP_STATE(f) = state;
9824 state = TAG;
9825 continue;
9827 case ',' :
9828 if(!WRAP_QUOTED(f)){
9829 /* handle this special case in general code below */
9830 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_MAX_COL(f)
9831 && WRAP_ALLWSP(f) && WRAP_PB_OFF(f))
9832 break;
9834 if(f->n + WRAP_SPC_LEN(f) + f->f2 + 1 > WRAP_COL(f)){
9835 if(WRAP_ALLWSP(f)) /* if anything visible */
9836 wrap_flush(f, &ip, &eib, &op,
9837 &eob); /* ... blat buf'd chars */
9839 wrap_eol(f, 1, &ip, &eib, &op,
9840 &eob); /* plunk down newline */
9841 wrap_bol(f, 1, 1, &ip, &eib, &op,
9842 &eob); /* write any prefix */
9845 WRAP_PUTC(f, ',', 1); /* put out comma */
9846 wrap_flush(f, &ip, &eib, &op,
9847 &eob); /* write buf'd chars */
9848 continue;
9851 break;
9854 else if(WRAP_HANDLE_SOFT_HYPHEN(f)
9855 && (WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0)) == 1
9856 && WRAP_UTF8BUF(f, 0) == 0xC2 && c == 0xAD){
9858 * This is a soft hyphen. If there is enough space for
9859 * a real hyphen to fit on the line here then we can
9860 * flush everything up to before the soft hyphen,
9861 * and simply remember that we saw a soft hyphen.
9862 * If it turns out that we can't fit the next piece in
9863 * then wrap_eol will append a real hyphen to the line.
9864 * If we can fit another piece in it will be because we've
9865 * reached the next break point. At that point we'll flush
9866 * everything but won't include the unneeded hyphen. We erase
9867 * the fact that we saw this soft hyphen because it have
9868 * become irrelevant.
9870 * If the hyphen is the character that puts us over the edge
9871 * we go through the else case.
9874 /* erase this soft hyphen character from buffer */
9875 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
9877 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9878 if(f->f2) /* any non-lwsp to flush? */
9879 wrap_flush(f, &ip, &eib, &op, &eob);
9881 /* remember that we saw the soft hyphen */
9882 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9884 else{
9886 * Everything up to the hyphen fits, otherwise it
9887 * would have already been flushed the last time
9888 * through the loop. But the hyphen won't fit. So
9889 * we need to go back to the last line break and
9890 * break there instead. Then start a new line with
9891 * the buffered up characters and the soft hyphen.
9893 wrap_flush_embed(f, &ip, &eib, &op, &eob);
9894 wrap_eol(f, 1, &ip, &eib, &op,
9895 &eob); /* plunk down newline */
9896 wrap_bol(f,1,1, &ip, &eib, &op,
9897 &eob); /* write any prefix */
9900 * Now we're in the same situation as we would have
9901 * been above except we're on a new line. Try to
9902 * flush out the characters seen up to the hyphen.
9904 if((f->n + WRAP_SPC_LEN(f) + f->f2 + 1) <= WRAP_COL(f)){
9905 if(f->f2) /* any non-lwsp to flush? */
9906 wrap_flush(f, &ip, &eib, &op, &eob);
9908 /* remember that we saw the soft hyphen */
9909 WRAP_SAW_SOFT_HYPHEN(f) = 1;
9911 else
9912 WRAP_SAW_SOFT_HYPHEN(f) = 0;
9915 continue;
9918 full_character = 0;
9921 unsigned char *inputp;
9922 unsigned long remaining_octets;
9923 UCS ucs;
9925 if(WRAP_UTF8BUFP(f) < &WRAP_UTF8BUF(f, 0) + 6){ /* always true */
9927 *WRAP_UTF8BUFP(f)++ = c;
9928 remaining_octets = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9929 if(remaining_octets == 1 && isascii(WRAP_UTF8BUF(f, 0))){
9930 full_character++;
9931 if(c == TAB){
9932 int i = (int) f->n;
9934 while(i & 0x07)
9935 i++;
9937 width = i - f->n;
9939 else if(c < 0x80 && iscntrl((unsigned char) c))
9940 width = 2;
9941 else
9942 width = 1;
9944 else{
9945 inputp = &WRAP_UTF8BUF(f, 0);
9946 ucs = (UCS) utf8_get(&inputp, &remaining_octets);
9947 switch(ucs){
9948 case U8G_ENDSTRG: /* incomplete character, wait */
9949 case U8G_ENDSTRI: /* incomplete character, wait */
9950 width = 0;
9951 break;
9953 default:
9954 if(ucs & U8G_ERROR || ucs == UBOGON){
9956 * None of these cases is supposed to happen. If it
9957 * does happen then the input stream isn't UTF-8
9958 * so something is wrong. Writechar will treat
9959 * each octet in the input buffer as a separate
9960 * error character and print a '?' for each,
9961 * so the width will be the number of octets.
9963 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
9964 full_character++;
9966 else{
9967 /* got a character */
9968 width = wcellwidth(ucs);
9969 full_character++;
9971 if(width < 0){
9973 * This happens when we have a UTF-8 character that
9974 * we aren't able to print in our locale. For example,
9975 * if the locale is setup with the terminal
9976 * expecting ISO-8859-1 characters then there are
9977 * lots of UTF-8 characters that can't be printed.
9978 * Print a '?' instead.
9980 width = 1;
9984 break;
9988 else{
9990 * This cannot happen because an error would have
9991 * happened at least by character #6. So if we get
9992 * here there is a bug in utf8_get().
9994 if(WRAP_UTF8BUFP(f) == &WRAP_UTF8BUF(f, 0) + 6){
9995 *WRAP_UTF8BUFP(f)++ = c;
9999 * We could possibly do some more sophisticated
10000 * resynchronization here, but we aren't doing
10001 * anything in Writechar so it wouldn't match up
10002 * with that anyway. Just figure each character will
10003 * end up being printed as a ? character.
10005 width = WRAP_UTF8BUFP(f) - &WRAP_UTF8BUF(f, 0);
10006 full_character++;
10010 if(WRAP_ALLWSP(f)){
10012 * Nothing is visible yet but the first word may be too long
10013 * all by itself. We need to break early.
10015 if(f->n + WRAP_SPC_LEN(f) + f->f2 + width > WRAP_MAX_COL(f)){
10017 * A little reaching behind the curtain here.
10018 * if there's at least a preferable break point, use
10019 * it and stuff what's left back into the wrap buffer.
10020 * The "nwsp" latch is used to skip leading whitespace
10021 * The second half of the test prevents us from wrapping
10022 * at the preferred break point in the case that it
10023 * is so early in the line that it doesn't help.
10024 * That is, the width of the indent is even more than
10025 * the width of the first part before the preferred
10026 * break point. An example would be breaking after
10027 * "To:" when the indent is 4 which is > 3.
10029 if(WRAP_PB_OFF(f) && WRAP_PB_LEN(f) >= WRAP_INDENT(f)){
10030 char *p1 = f->line + WRAP_PB_OFF(f);
10031 char *p2 = f->linep;
10032 char c2;
10033 int nwsp = 0, left_after_wrap;
10035 left_after_wrap = f->f2 - WRAP_PB_LEN(f);
10037 f->f2 = WRAP_PB_LEN(f);
10038 f->linep = p1;
10040 wrap_flush(f, &ip, &eib, &op, &eob); /* flush shortened buf */
10042 /* put back rest of characters */
10043 while(p1 < p2){
10044 c2 = *p1++;
10045 if(!(c2 == ' ' || c2 == '\t') || nwsp){
10046 WRAP_PUTC(f, c2, 0);
10047 nwsp = 1;
10049 else
10050 left_after_wrap--; /* wrong if a tab! */
10053 f->f2 = MAX(left_after_wrap, 0);
10055 wrap_eol(f, 1, &ip, &eib, &op,
10056 &eob); /* plunk down newline */
10057 wrap_bol(f,1,1, &ip, &eib, &op,
10058 &eob); /* write any prefix */
10061 * What's this for?
10062 * If we do the less preferable break point at
10063 * the space we don't want to lose the fact that
10064 * we might be able to break at this comma for
10065 * the next one.
10067 if(full_character && c == ','){
10068 WRAP_PUTC(f, c, 1);
10069 wrap_flush(f, &ip, &eib, &op, &eob);
10070 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10073 else{
10074 wrap_flush(f, &ip, &eib, &op, &eob);
10076 wrap_eol(f, 1, &ip, &eib, &op,
10077 &eob); /* plunk down newline */
10078 wrap_bol(f,1,1, &ip, &eib, &op,
10079 &eob); /* write any prefix */
10083 else if((f->n + WRAP_SPC_LEN(f) + f->f2 + width) > WRAP_COL(f)){
10084 wrap_flush_embed(f, &ip, &eib, &op, &eob);
10085 wrap_eol(f, 1, &ip, &eib, &op,
10086 &eob); /* plunk down newline */
10087 wrap_bol(f,1,1, &ip, &eib, &op,
10088 &eob); /* write any prefix */
10092 * Commit entire multibyte UTF-8 character at once
10093 * instead of writing partial characters into the
10094 * buffer.
10096 if(full_character){
10097 unsigned char *q;
10099 for(q = &WRAP_UTF8BUF(f, 0); q < WRAP_UTF8BUFP(f); q++){
10100 WRAP_PUTC(f, *q, width);
10101 width = 0;
10104 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10107 break;
10109 case_tag :
10110 case TAG :
10111 WRAP_EMBED_PUTC(f, c);
10112 switch(c){
10113 case TAG_HANDLE :
10114 WRAP_EMBED(f) = -1;
10115 state = HANDLE;
10116 break;
10118 case TAG_FGCOLOR :
10119 case TAG_BGCOLOR :
10120 WRAP_EMBED(f) = RGBLEN;
10121 state = HDATA;
10122 break;
10124 default :
10125 state = WRAP_STATE(f);
10126 break;
10129 break;
10131 case HANDLE :
10132 WRAP_EMBED_PUTC(f, c);
10133 WRAP_EMBED(f) = c;
10134 state = HDATA;
10135 break;
10137 case HDATA :
10138 if(f->f2){
10139 WRAP_PUTC(f, c, 0);
10141 else
10142 so_writec(c, WRAP_SPACES(f));
10144 if(!(WRAP_EMBED(f) -= 1)){
10145 state = WRAP_STATE(f);
10148 break;
10152 f->f1 = state;
10153 GF_END(f, f->next);
10155 else if(flg == GF_EOD){
10156 wrap_flush(f, &ip, &eib, &op, &eob);
10157 if(WRAP_COLOR(f))
10158 free_color_pair(&WRAP_COLOR(f));
10160 fs_give((void **) &f->line); /* free temp line buffer */
10161 so_give(&WRAP_SPACES(f));
10162 fs_give((void **) &f->opt); /* free wrap widths struct */
10163 (void) GF_FLUSH(f->next);
10164 (*f->next->f)(f->next, GF_EOD);
10166 else if(flg == GF_RESET){
10167 dprint((9, "-- gf_reset wrap\n"));
10168 f->f1 = BOL;
10169 f->n = 0L; /* displayed length of line so far */
10170 f->f2 = 0; /* displayed length of buffered chars */
10171 WRAP_HARD(f) = 1; /* starting at beginning of line */
10172 if(! (WRAP_S *) f->opt)
10173 f->opt = gf_wrap_filter_opt(75, 80, NULL, 0, 0);
10175 while(WRAP_INDENT(f) >= WRAP_MAX_COL(f))
10176 WRAP_INDENT(f) /= 2;
10178 f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char));
10179 f->linep = f->line;
10180 WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1];
10182 for(i = 0; i < 256; i++)
10183 ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f))
10184 || i == '\015'
10185 || i == '\012'
10186 || (i == (unsigned char) TAG_EMBED
10187 && WRAP_TAGS(f))
10188 || (i == ',' && WRAP_COMMA(f)
10189 && !WRAP_QUOTED(f))
10190 || ASCII_ISSPACE(i));
10191 WRAP_SPACES(f) = so_get(CharStar, NULL, EDIT_ACCESS);
10192 WRAP_UTF8BUFP(f) = &WRAP_UTF8BUF(f, 0);
10197 wrap_flush(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10198 unsigned char **opp, unsigned char **eobp)
10200 register char *s;
10201 register int n;
10203 s = (char *)so_text(WRAP_SPACES(f));
10204 n = so_tell(WRAP_SPACES(f));
10205 so_seek(WRAP_SPACES(f), 0L, 0);
10206 wrap_flush_s(f, s, n, WRAP_SPC_LEN(f), ipp, eibp, opp, eobp, WFE_NONE);
10207 so_truncate(WRAP_SPACES(f), 0L);
10208 WRAP_SPC_LEN(f) = 0;
10209 WRAP_TRL_SPC(f) = 0;
10210 s = f->line;
10211 n = f->linep - f->line;
10212 wrap_flush_s(f, s, n, f->f2, ipp, eibp, opp, eobp, WFE_NONE);
10213 f->f2 = 0;
10214 f->linep = f->line;
10215 WRAP_PB_OFF(f) = 0;
10216 WRAP_PB_LEN(f) = 0;
10218 return 0;
10222 wrap_flush_embed(FILTER_S *f, unsigned char **ipp, unsigned char **eibp, unsigned char **opp, unsigned char **eobp)
10224 register char *s;
10225 register int n;
10226 s = (char *)so_text(WRAP_SPACES(f));
10227 n = so_tell(WRAP_SPACES(f));
10228 so_seek(WRAP_SPACES(f), 0L, 0);
10229 wrap_flush_s(f, s, n, 0, ipp, eibp, opp, eobp, WFE_CNT_HANDLE);
10230 so_truncate(WRAP_SPACES(f), 0L);
10231 WRAP_SPC_LEN(f) = 0;
10232 WRAP_TRL_SPC(f) = 0;
10234 return 0;
10238 wrap_flush_s(FILTER_S *f, char *s, int n, int w, unsigned char **ipp,
10239 unsigned char **eibp, unsigned char **opp, unsigned char **eobp, int flags)
10241 f->n += w;
10243 for(; n > 0; n--,s++){
10244 if(*s == TAG_EMBED){
10245 if(n-- > 0){
10246 switch(*++s){
10247 case TAG_BOLDON :
10248 GF_PUTC_GLO(f->next,TAG_EMBED);
10249 GF_PUTC_GLO(f->next,TAG_BOLDON);
10250 WRAP_BOLD(f) = 1;
10251 break;
10252 case TAG_BOLDOFF :
10253 GF_PUTC_GLO(f->next,TAG_EMBED);
10254 GF_PUTC_GLO(f->next,TAG_BOLDOFF);
10255 WRAP_BOLD(f) = 0;
10256 break;
10257 case TAG_ULINEON :
10258 GF_PUTC_GLO(f->next,TAG_EMBED);
10259 GF_PUTC_GLO(f->next,TAG_ULINEON);
10260 WRAP_ULINE(f) = 1;
10261 break;
10262 case TAG_ULINEOFF :
10263 GF_PUTC_GLO(f->next,TAG_EMBED);
10264 GF_PUTC_GLO(f->next,TAG_ULINEOFF);
10265 WRAP_ULINE(f) = 0;
10266 break;
10267 case TAG_INVOFF :
10268 GF_PUTC_GLO(f->next,TAG_EMBED);
10269 GF_PUTC_GLO(f->next,TAG_INVOFF);
10270 WRAP_ANCHOR(f) = 0;
10271 break;
10272 case TAG_HANDLE :
10273 if((flags & WFE_CNT_HANDLE) == 0)
10274 GF_PUTC_GLO(f->next,TAG_EMBED);
10276 if(n-- > 0){
10277 int i = *++s;
10279 if((flags & WFE_CNT_HANDLE) == 0)
10280 GF_PUTC_GLO(f->next, TAG_HANDLE);
10282 if(i <= n){
10283 n -= i;
10285 if((flags & WFE_CNT_HANDLE) == 0)
10286 GF_PUTC_GLO(f->next, i);
10288 WRAP_ANCHOR(f) = 0;
10289 while(i-- > 0){
10290 WRAP_ANCHOR(f) = (WRAP_ANCHOR(f) * 10) + (*++s-'0');
10292 if((flags & WFE_CNT_HANDLE) == 0)
10293 GF_PUTC_GLO(f->next,*s);
10298 break;
10299 case TAG_FGCOLOR :
10300 if(pico_usingcolor() && n >= RGBLEN){
10301 int i;
10302 GF_PUTC_GLO(f->next,TAG_EMBED);
10303 GF_PUTC_GLO(f->next,TAG_FGCOLOR);
10304 if(!WRAP_COLOR(f))
10305 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10306 strncpy(WRAP_COLOR(f)->fg, s+1, RGBLEN);
10307 WRAP_COLOR(f)->fg[RGBLEN]='\0';
10308 i = RGBLEN;
10309 n -= i;
10310 while(i-- > 0)
10311 GF_PUTC_GLO(f->next,
10312 (*++s) & 0xff);
10314 break;
10315 case TAG_BGCOLOR :
10316 if(pico_usingcolor() && n >= RGBLEN){
10317 int i;
10318 GF_PUTC_GLO(f->next,TAG_EMBED);
10319 GF_PUTC_GLO(f->next,TAG_BGCOLOR);
10320 if(!WRAP_COLOR(f))
10321 WRAP_COLOR(f)=new_color_pair(NULL,NULL);
10322 strncpy(WRAP_COLOR(f)->bg, s+1, RGBLEN);
10323 WRAP_COLOR(f)->bg[RGBLEN]='\0';
10324 i = RGBLEN;
10325 n -= i;
10326 while(i-- > 0)
10327 GF_PUTC_GLO(f->next,
10328 (*++s) & 0xff);
10330 break;
10331 default :
10332 break;
10336 else if(w){
10338 if(f->n <= WRAP_MAX_COL(f)){
10339 GF_PUTC_GLO(f->next, (*s) & 0xff);
10341 else{
10342 dprint((2, "-- gf_wrap: OVERRUN: %c\n", (*s) & 0xff));
10345 WRAP_ALLWSP(f) = 0;
10349 return 0;
10353 wrap_eol(FILTER_S *f, int c, unsigned char **ipp, unsigned char **eibp,
10354 unsigned char **opp, unsigned char **eobp)
10356 if(WRAP_SAW_SOFT_HYPHEN(f)){
10357 WRAP_SAW_SOFT_HYPHEN(f) = 0;
10358 GF_PUTC_GLO(f->next, '-'); /* real hyphen */
10361 if(c && WRAP_LV_FLD(f))
10362 GF_PUTC_GLO(f->next, ' ');
10364 if(WRAP_BOLD(f)){
10365 GF_PUTC_GLO(f->next, TAG_EMBED);
10366 GF_PUTC_GLO(f->next, TAG_BOLDOFF);
10369 if(WRAP_ULINE(f)){
10370 GF_PUTC_GLO(f->next, TAG_EMBED);
10371 GF_PUTC_GLO(f->next, TAG_ULINEOFF);
10374 if(WRAP_INVERSE(f) || WRAP_ANCHOR(f)){
10375 GF_PUTC_GLO(f->next, TAG_EMBED);
10376 GF_PUTC_GLO(f->next, TAG_INVOFF);
10379 if(WRAP_COLOR_SET(f)){
10380 char *p;
10381 char cb[RGBLEN+1];
10382 GF_PUTC_GLO(f->next, TAG_EMBED);
10383 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10384 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_FORE_COLOR), sizeof(cb));
10385 cb[sizeof(cb)-1] = '\0';
10386 p = cb;
10387 for(; *p; p++)
10388 GF_PUTC_GLO(f->next, *p);
10389 GF_PUTC_GLO(f->next, TAG_EMBED);
10390 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10391 strncpy(cb, color_to_asciirgb(ps_global->VAR_NORM_BACK_COLOR), sizeof(cb));
10392 cb[sizeof(cb)-1] = '\0';
10393 p = cb;
10394 for(; *p; p++)
10395 GF_PUTC_GLO(f->next, *p);
10398 GF_PUTC_GLO(f->next, '\015');
10399 GF_PUTC_GLO(f->next, '\012');
10400 f->n = 0L;
10401 so_truncate(WRAP_SPACES(f), 0L);
10402 WRAP_SPC_LEN(f) = 0;
10403 WRAP_TRL_SPC(f) = 0;
10405 return 0;
10409 wrap_bol(FILTER_S *f, int ivar, int q, unsigned char **ipp, unsigned char **eibp,
10410 unsigned char **opp, unsigned char **eobp)
10412 int n = WRAP_MARG_L(f) + (ivar ? WRAP_INDENT(f) : 0);
10414 if(WRAP_HDR_CLR(f)){
10415 char *p;
10416 char cbuf[RGBLEN+1];
10417 int k;
10419 if((k = WRAP_MARG_L(f)) > 0)
10420 while(k-- > 0){
10421 n--;
10422 f->n++;
10423 GF_PUTC_GLO(f->next, ' ');
10426 GF_PUTC_GLO(f->next, TAG_EMBED);
10427 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10428 strncpy(cbuf,
10429 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_FORE_COLOR),
10430 sizeof(cbuf));
10431 cbuf[sizeof(cbuf)-1] = '\0';
10432 p = cbuf;
10433 for(; *p; p++)
10434 GF_PUTC_GLO(f->next, *p);
10435 GF_PUTC_GLO(f->next, TAG_EMBED);
10436 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10437 strncpy(cbuf,
10438 color_to_asciirgb(ps_global->VAR_HEADER_GENERAL_BACK_COLOR),
10439 sizeof(cbuf));
10440 cbuf[sizeof(cbuf)-1] = '\0';
10441 p = cbuf;
10442 for(; *p; p++)
10443 GF_PUTC_GLO(f->next, *p);
10446 while(n-- > 0){
10447 f->n++;
10448 GF_PUTC_GLO(f->next, ' ');
10451 WRAP_ALLWSP(f) = 1;
10453 if(q)
10454 wrap_quote_insert(f, ipp, eibp, opp, eobp);
10456 if(WRAP_BOLD(f)){
10457 GF_PUTC_GLO(f->next, TAG_EMBED);
10458 GF_PUTC_GLO(f->next, TAG_BOLDON);
10460 if(WRAP_ULINE(f)){
10461 GF_PUTC_GLO(f->next, TAG_EMBED);
10462 GF_PUTC_GLO(f->next, TAG_ULINEON);
10464 if(WRAP_INVERSE(f)){
10465 GF_PUTC_GLO(f->next, TAG_EMBED);
10466 GF_PUTC_GLO(f->next, TAG_INVON);
10468 if(WRAP_COLOR_SET(f)){
10469 char *p;
10470 if(WRAP_COLOR(f)->fg[0]){
10471 char cb[RGBLEN+1];
10472 GF_PUTC_GLO(f->next, TAG_EMBED);
10473 GF_PUTC_GLO(f->next, TAG_FGCOLOR);
10474 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->fg), sizeof(cb));
10475 cb[sizeof(cb)-1] = '\0';
10476 p = cb;
10477 for(; *p; p++)
10478 GF_PUTC_GLO(f->next, *p);
10480 if(WRAP_COLOR(f)->bg[0]){
10481 char cb[RGBLEN+1];
10482 GF_PUTC_GLO(f->next, TAG_EMBED);
10483 GF_PUTC_GLO(f->next, TAG_BGCOLOR);
10484 strncpy(cb, color_to_asciirgb(WRAP_COLOR(f)->bg), sizeof(cb));
10485 cb[sizeof(cb)-1] = '\0';
10486 p = cb;
10487 for(; *p; p++)
10488 GF_PUTC_GLO(f->next, *p);
10491 if(WRAP_ANCHOR(f)){
10492 char buf[64]; int i;
10493 GF_PUTC_GLO(f->next, TAG_EMBED);
10494 GF_PUTC_GLO(f->next, TAG_HANDLE);
10495 snprintf(buf, sizeof(buf), "%d", WRAP_ANCHOR(f));
10496 GF_PUTC_GLO(f->next, (int) strlen(buf));
10497 for(i = 0; buf[i]; i++)
10498 GF_PUTC_GLO(f->next, buf[i]);
10501 return 0;
10505 wrap_quote_insert(FILTER_S *f, unsigned char **ipp, unsigned char **eibp,
10506 unsigned char **opp, unsigned char **eobp)
10508 int j, i;
10509 COLOR_PAIR *col = NULL;
10510 char *prefix = NULL, *last_prefix = NULL;
10512 if(ps_global->VAR_QUOTE_REPLACE_STRING){
10513 get_pair(ps_global->VAR_QUOTE_REPLACE_STRING, &prefix, &last_prefix, 0, 0);
10514 if(!prefix && last_prefix){
10515 prefix = last_prefix;
10516 last_prefix = NULL;
10520 for(j = 0; j < WRAP_FL_QD(f); j++){
10521 if(WRAP_USE_CLR(f)){
10522 if((j % 3) == 0
10523 && ps_global->VAR_QUOTE1_FORE_COLOR
10524 && ps_global->VAR_QUOTE1_BACK_COLOR
10525 && (col = new_color_pair(ps_global->VAR_QUOTE1_FORE_COLOR,
10526 ps_global->VAR_QUOTE1_BACK_COLOR))
10527 && pico_is_good_colorpair(col)){
10528 GF_COLOR_PUTC(f, col);
10530 else if((j % 3) == 1
10531 && ps_global->VAR_QUOTE2_FORE_COLOR
10532 && ps_global->VAR_QUOTE2_BACK_COLOR
10533 && (col = new_color_pair(ps_global->VAR_QUOTE2_FORE_COLOR,
10534 ps_global->VAR_QUOTE2_BACK_COLOR))
10535 && pico_is_good_colorpair(col)){
10536 GF_COLOR_PUTC(f, col);
10538 else if((j % 3) == 2
10539 && ps_global->VAR_QUOTE3_FORE_COLOR
10540 && ps_global->VAR_QUOTE3_BACK_COLOR
10541 && (col = new_color_pair(ps_global->VAR_QUOTE3_FORE_COLOR,
10542 ps_global->VAR_QUOTE3_BACK_COLOR))
10543 && pico_is_good_colorpair(col)){
10544 GF_COLOR_PUTC(f, col);
10546 if(col){
10547 free_color_pair(&col);
10548 col = NULL;
10552 if(!WRAP_LV_FLD(f)){
10553 if(!WRAP_FOR_CMPS(f) && ps_global->VAR_QUOTE_REPLACE_STRING && prefix){
10554 for(i = 0; prefix[i]; i++)
10555 GF_PUTC_GLO(f->next, prefix[i]);
10556 f->n += utf8_width(prefix);
10558 else if(ps_global->VAR_REPLY_STRING
10559 && (!strcmp(ps_global->VAR_REPLY_STRING, ">")
10560 || !strcmp(ps_global->VAR_REPLY_STRING, "\">\""))){
10561 GF_PUTC_GLO(f->next, '>');
10562 f->n += 1;
10564 else{
10565 GF_PUTC_GLO(f->next, '>');
10566 GF_PUTC_GLO(f->next, ' ');
10567 f->n += 2;
10570 else{
10571 GF_PUTC_GLO(f->next, '>');
10572 f->n += 1;
10575 if(j && WRAP_LV_FLD(f)){
10576 GF_PUTC_GLO(f->next, ' ');
10577 f->n++;
10579 else if(j && last_prefix){
10580 for(i = 0; last_prefix[i]; i++)
10581 GF_PUTC_GLO(f->next, last_prefix[i]);
10582 f->n += utf8_width(last_prefix);
10585 if(prefix)
10586 fs_give((void **)&prefix);
10587 if(last_prefix)
10588 fs_give((void **)&last_prefix);
10590 return 0;
10595 * function called from the outside to set
10596 * wrap filter's width option
10598 void *
10599 gf_wrap_filter_opt(int width, int width_max, int *margin, int indent, int flags)
10601 WRAP_S *wrap;
10603 /* NOTE: variables MUST be sanity checked before they get here */
10604 wrap = (WRAP_S *) fs_get(sizeof(WRAP_S));
10605 memset(wrap, 0, sizeof(WRAP_S));
10606 wrap->wrap_col = width;
10607 wrap->wrap_max = width_max;
10608 wrap->indent = indent;
10609 wrap->margin_l = (margin) ? margin[0] : 0;
10610 wrap->margin_r = (margin) ? margin[1] : 0;
10611 wrap->tags = (GFW_HANDLES & flags) == GFW_HANDLES;
10612 wrap->on_comma = (GFW_ONCOMMA & flags) == GFW_ONCOMMA;
10613 wrap->flowed = (GFW_FLOWED & flags) == GFW_FLOWED;
10614 wrap->leave_flowed = (GFW_FLOW_RESULT & flags) == GFW_FLOW_RESULT;
10615 wrap->delsp = (GFW_DELSP & flags) == GFW_DELSP;
10616 wrap->use_color = (GFW_USECOLOR & flags) == GFW_USECOLOR;
10617 wrap->hdr_color = (GFW_HDRCOLOR & flags) == GFW_HDRCOLOR;
10618 wrap->for_compose = (GFW_FORCOMPOSE & flags) == GFW_FORCOMPOSE;
10619 wrap->handle_soft_hyphen = (GFW_SOFTHYPHEN & flags) == GFW_SOFTHYPHEN;
10621 return((void *) wrap);
10625 void *
10626 gf_url_hilite_opt(URL_HILITE_S *uh, HANDLE_S **handlesp, int flags)
10628 if(uh){
10629 memset(uh, 0, sizeof(URL_HILITE_S));
10630 uh->handlesp = handlesp;
10631 uh->hdr_color = (URH_HDRCOLOR & flags) == URH_HDRCOLOR;
10634 return((void *) uh);
10638 #define PF_QD(F) (((PREFLOW_S *)(F)->opt)->quote_depth)
10639 #define PF_QC(F) (((PREFLOW_S *)(F)->opt)->quote_count)
10640 #define PF_SIG(F) (((PREFLOW_S *)(F)->opt)->sig)
10642 typedef struct preflow_s {
10643 int quote_depth,
10644 quote_count,
10645 sig;
10646 } PREFLOW_S;
10649 * This would normally be handled in gf_wrap. If there is a possibility
10650 * that a url we want to recognize is cut in half by a soft newline we
10651 * want to fix that up by putting the halves back together. We do that
10652 * by deleting the soft newline and putting it all in one line. It will
10653 * still get wrapped later in gf_wrap. It isn't pretty with all the
10654 * goto's, but whatta ya gonna do?
10656 void
10657 gf_preflow(FILTER_S *f, int flg)
10659 GF_INIT(f, f->next);
10661 if(flg == GF_DATA){
10662 register unsigned char c;
10663 register int state = f->f1;
10664 register int pending = f->f2;
10666 while(GF_GETC(f, c)){
10667 switch(state){
10668 case DFL:
10669 default_case:
10670 switch(c){
10671 case ' ':
10672 state = WSPACE;
10673 break;
10675 case '\015':
10676 state = CCR;
10677 break;
10679 default:
10680 GF_PUTC(f->next, c);
10681 break;
10684 break;
10686 case CCR:
10687 switch(c){
10688 case '\012':
10689 pending = 1;
10690 state = BOL;
10691 break;
10693 default:
10694 GF_PUTC(f->next, '\012');
10695 state = DFL;
10696 goto default_case;
10697 break;
10700 break;
10702 case WSPACE:
10703 switch(c){
10704 case '\015':
10705 state = SPACECR;
10706 break;
10708 default:
10709 GF_PUTC(f->next, ' ');
10710 state = DFL;
10711 goto default_case;
10712 break;
10715 break;
10717 case SPACECR:
10718 switch(c){
10719 case '\012':
10720 pending = 2;
10721 state = BOL;
10722 break;
10724 default:
10725 GF_PUTC(f->next, ' ');
10726 GF_PUTC(f->next, '\012');
10727 state = DFL;
10728 goto default_case;
10729 break;
10732 break;
10734 case BOL:
10735 PF_QC(f) = 0;
10736 if(c == '>'){ /* count quote level */
10737 PF_QC(f)++;
10738 state = FL_QLEV;
10740 else{
10741 done_counting_quotes:
10742 if(c == ' '){ /* eat stuffed space */
10743 state = FL_STF;
10744 break;
10747 done_with_stuffed_space:
10748 if(c == '-'){ /* look for signature */
10749 PF_SIG(f) = 1;
10750 state = FL_SIG;
10751 break;
10754 done_with_sig:
10755 if(pending == 2){
10756 if(PF_QD(f) == PF_QC(f) && PF_SIG(f) < 4){
10757 /* delete pending */
10759 PF_QD(f) = PF_QC(f);
10761 /* suppress quotes, too */
10762 PF_QC(f) = 0;
10764 else{
10766 * This should have been a hard new line
10767 * instead so leave out the trailing space.
10769 GF_PUTC(f->next, '\015');
10770 GF_PUTC(f->next, '\012');
10772 PF_QD(f) = PF_QC(f);
10775 else if(pending == 1){
10776 GF_PUTC(f->next, '\015');
10777 GF_PUTC(f->next, '\012');
10778 PF_QD(f) = PF_QC(f);
10780 else{
10781 PF_QD(f) = PF_QC(f);
10784 pending = 0;
10785 state = DFL;
10786 while(PF_QC(f)-- > 0)
10787 GF_PUTC(f->next, '>');
10789 switch(PF_SIG(f)){
10790 case 0:
10791 default:
10792 break;
10794 case 1:
10795 GF_PUTC(f->next, '-');
10796 break;
10798 case 2:
10799 GF_PUTC(f->next, '-');
10800 GF_PUTC(f->next, '-');
10801 break;
10803 case 3:
10804 case 4:
10805 GF_PUTC(f->next, '-');
10806 GF_PUTC(f->next, '-');
10807 GF_PUTC(f->next, ' ');
10808 break;
10811 PF_SIG(f) = 0;
10812 goto default_case; /* to handle c */
10815 break;
10817 case FL_QLEV: /* count quote level */
10818 if(c == '>')
10819 PF_QC(f)++;
10820 else
10821 goto done_counting_quotes;
10823 break;
10825 case FL_STF: /* eat stuffed space */
10826 goto done_with_stuffed_space;
10827 break;
10829 case FL_SIG: /* deal with sig indicator */
10830 switch(PF_SIG(f)){
10831 case 1: /* saw '-' */
10832 if(c == '-')
10833 PF_SIG(f) = 2;
10834 else
10835 goto done_with_sig;
10837 break;
10839 case 2: /* saw '--' */
10840 if(c == ' ')
10841 PF_SIG(f) = 3;
10842 else
10843 goto done_with_sig;
10845 break;
10847 case 3: /* saw '-- ' */
10848 if(c == '\015')
10849 PF_SIG(f) = 4; /* it really is a sig line */
10851 goto done_with_sig;
10852 break;
10855 break;
10859 f->f1 = state;
10860 f->f2 = pending;
10861 GF_END(f, f->next);
10863 else if(flg == GF_EOD){
10864 fs_give((void **) &f->opt);
10865 (void) GF_FLUSH(f->next);
10866 (*f->next->f)(f->next, GF_EOD);
10868 else if(flg == GF_RESET){
10869 PREFLOW_S *pf;
10871 pf = (PREFLOW_S *) fs_get(sizeof(*pf));
10872 memset(pf, 0, sizeof(*pf));
10873 f->opt = (void *) pf;
10875 f->f1 = BOL; /* state */
10876 f->f2 = 0; /* pending */
10877 PF_QD(f) = 0; /* quote depth */
10878 PF_QC(f) = 0; /* quote count */
10879 PF_SIG(f) = 0; /* sig level */
10887 * LINE PREFIX FILTER - insert given text at beginning of each
10888 * line
10892 #define GF_PREFIX_WRITE(s) { \
10893 register char *p; \
10894 if((p = (s)) != NULL) \
10895 while(*p) \
10896 GF_PUTC(f->next, *p++); \
10901 * the simple filter, prepends each line with the requested prefix.
10902 * if prefix is null, does nothing, and as with all filters, assumes
10903 * NVT end of lines.
10905 void
10906 gf_prefix(FILTER_S *f, int flg)
10908 GF_INIT(f, f->next);
10910 if(flg == GF_DATA){
10911 register unsigned char c;
10912 register int state = f->f1;
10913 register int first = f->f2;
10915 while(GF_GETC(f, c)){
10917 if(first){ /* write initial prefix!! */
10918 first = 0; /* but just once */
10919 GF_PREFIX_WRITE((char *) f->opt);
10923 * State == 0 is the starting state and the usual state.
10924 * State == 1 means we saw a CR and haven't acted on it yet.
10925 * We are looking for a LF to get the CRLF end of line.
10926 * However, we also treat bare CR and bare LF as if they
10927 * were CRLF sequences. What else could it mean in text?
10928 * This filter is only used for text so that is probably
10929 * a reasonable interpretation of the bad input.
10931 if(c == '\015'){ /* CR */
10932 if(state){ /* Treat pending CR as endofline, */
10933 GF_PUTC(f->next, '\015'); /* and remain in saw-a-CR state. */
10934 GF_PUTC(f->next, '\012');
10935 GF_PREFIX_WRITE((char *) f->opt);
10937 else{
10938 state = 1;
10941 else if(c == '\012'){ /* LF */
10942 GF_PUTC(f->next, '\015'); /* Got either a CRLF or a bare LF, */
10943 GF_PUTC(f->next, '\012'); /* treat both as if a CRLF. */
10944 GF_PREFIX_WRITE((char *) f->opt);
10945 state = 0;
10947 else{ /* any other character */
10948 if(state){
10949 GF_PUTC(f->next, '\015'); /* Treat pending CR as endofline. */
10950 GF_PUTC(f->next, '\012');
10951 GF_PREFIX_WRITE((char *) f->opt);
10952 state = 0;
10955 GF_PUTC(f->next, c);
10959 f->f1 = state; /* save state for next chunk of data */
10960 f->f2 = first;
10961 GF_END(f, f->next);
10963 else if(flg == GF_EOD){
10964 (void) GF_FLUSH(f->next);
10965 (*f->next->f)(f->next, GF_EOD);
10967 else if(flg == GF_RESET){
10968 dprint((9, "-- gf_reset prefix\n"));
10969 f->f1 = 0;
10970 f->f2 = 1; /* nothing written yet */
10976 * function called from the outside to set
10977 * prefix filter's prefix string
10979 void *
10980 gf_prefix_opt(char *prefix)
10982 return((void *) prefix);
10987 * LINE TEST FILTER - accumulate lines and offer each to the provided
10988 * test function.
10991 typedef struct _linetest_s {
10992 linetest_t f;
10993 void *local;
10994 } LINETEST_S;
10997 /* accumulator growth increment */
10998 #define LINE_TEST_BLOCK 1024
11000 #define GF_LINE_TEST_EOB(f) \
11001 ((f)->line + ((f)->f2 - 1))
11003 #define GF_LINE_TEST_ADD(f, c) \
11005 if(p >= eobuf){ \
11006 f->f2 += LINE_TEST_BLOCK; \
11007 fs_resize((void **)&f->line, \
11008 (size_t) f->f2 * sizeof(char)); \
11009 eobuf = GF_LINE_TEST_EOB(f); \
11010 p = eobuf - LINE_TEST_BLOCK; \
11012 *p++ = c; \
11015 #define GF_LINE_TEST_TEST(F, D) \
11017 unsigned char c; \
11018 register char *cp; \
11019 register int l; \
11020 LT_INS_S *ins = NULL, *insp; \
11021 *p = '\0'; \
11022 (D) = (*((LINETEST_S *) (F)->opt)->f)((F)->n++, \
11023 (F)->line, &ins, \
11024 ((LINETEST_S *) (F)->opt)->local); \
11025 if((D) < 2){ \
11026 if((D) < 0){ \
11027 if((F)->line) \
11028 fs_give((void **) &(F)->line); \
11029 if((F)->opt) \
11030 fs_give((void **) &(F)->opt); \
11031 gf_error(_("translation error")); \
11032 /* NO RETURN */ \
11034 for(insp = ins, cp = (F)->line; cp < p; ){ \
11035 if(insp && cp == insp->where){ \
11036 if(insp->len > 0){ \
11037 for(l = 0; l < insp->len; l++){ \
11038 c = (unsigned char) insp->text[l]; \
11039 GF_PUTC((F)->next, c); \
11041 insp = insp->next; \
11042 continue; \
11043 } else if(insp->len < 0){ \
11044 cp -= insp->len; \
11045 insp = insp->next; \
11046 continue; \
11049 GF_PUTC((F)->next, *cp); \
11050 cp++; \
11052 while(insp){ \
11053 for(l = 0; l < insp->len; l++){ \
11054 c = (unsigned char) insp->text[l]; \
11055 GF_PUTC((F)->next, c); \
11057 insp = insp->next; \
11059 gf_line_test_free_ins(&ins); \
11066 * this simple filter accumulates characters until a newline, offers it
11067 * to the provided test function, and then passes it on. It assumes
11068 * NVT EOLs.
11070 void
11071 gf_line_test(FILTER_S *f, int flg)
11073 register char *p = f->linep;
11074 register char *eobuf = GF_LINE_TEST_EOB(f);
11075 GF_INIT(f, f->next);
11077 if(flg == GF_DATA){
11078 register unsigned char c;
11079 register int state = f->f1;
11081 while(GF_GETC(f, c)){
11083 if(state){
11084 state = 0;
11085 if(c == '\012'){
11086 int done;
11088 GF_LINE_TEST_TEST(f, done);
11090 p = (f)->line;
11092 if(done == 2) /* skip this line! */
11093 continue;
11095 GF_PUTC(f->next, '\015');
11096 GF_PUTC(f->next, '\012');
11098 * if the line tester returns TRUE, it's
11099 * telling us its seen enough and doesn't
11100 * want to see any more. Remove ourself
11101 * from the pipeline...
11103 if(done){
11104 if(gf_master == f){
11105 gf_master = f->next;
11107 else{
11108 FILTER_S *fprev;
11110 for(fprev = gf_master;
11111 fprev && fprev->next != f;
11112 fprev = fprev->next)
11115 if(fprev) /* wha??? */
11116 fprev->next = f->next;
11117 else
11118 continue;
11121 while(GF_GETC(f, c)) /* pass input */
11122 GF_PUTC(f->next, c);
11124 (void) GF_FLUSH(f->next); /* and drain queue */
11125 fs_give((void **)&f->line);
11126 fs_give((void **)&f); /* wax our data */
11127 return;
11129 else
11130 continue;
11132 else /* add CR to buffer */
11133 GF_LINE_TEST_ADD(f, '\015');
11134 } /* fall thru to handle 'c' */
11136 if(c == '\015') /* newline? */
11137 state = 1;
11138 else
11139 GF_LINE_TEST_ADD(f, c);
11142 f->f1 = state;
11143 GF_END(f, f->next);
11145 else if(flg == GF_EOD){
11146 int i;
11148 GF_LINE_TEST_TEST(f, i); /* examine remaining data */
11149 fs_give((void **) &f->line); /* free line buffer */
11150 fs_give((void **) &f->opt); /* free test struct */
11151 (void) GF_FLUSH(f->next);
11152 (*f->next->f)(f->next, GF_EOD);
11154 else if(flg == GF_RESET){
11155 dprint((9, "-- gf_reset line_test\n"));
11156 f->f1 = 0; /* state */
11157 f->n = 0L; /* line number */
11158 f->f2 = LINE_TEST_BLOCK; /* size of alloc'd line */
11159 f->line = p = (char *) fs_get(f->f2 * sizeof(char));
11162 f->linep = p;
11167 * function called from the outside to operate on accumulated line.
11169 void *
11170 gf_line_test_opt(linetest_t test_f, void *local)
11172 LINETEST_S *ltp;
11174 ltp = (LINETEST_S *) fs_get(sizeof(LINETEST_S));
11175 memset(ltp, 0, sizeof(LINETEST_S));
11176 ltp->f = test_f;
11177 ltp->local = local;
11178 return((void *) ltp);
11183 LT_INS_S **
11184 gf_line_test_new_ins(LT_INS_S **ins, char *p, char *s, int n)
11186 *ins = (LT_INS_S *) fs_get(sizeof(LT_INS_S));
11187 if(((*ins)->len = n) > 0)
11188 strncpy((*ins)->text = (char *) fs_get(n * sizeof(char)), s, n);
11189 else
11190 (*ins)->text = NULL;
11192 (*ins)->where = p;
11193 (*ins)->next = NULL;
11194 return(&(*ins)->next);
11198 void
11199 gf_line_test_free_ins(LT_INS_S **ins)
11201 if(ins && *ins){
11202 if((*ins)->next)
11203 gf_line_test_free_ins(&(*ins)->next);
11205 if((*ins)->text)
11206 fs_give((void **) &(*ins)->text);
11208 fs_give((void **) ins);
11214 * PREPEND EDITORIAL FILTER - conditionally prepend output text
11215 * with editorial comment
11218 typedef struct _preped_s {
11219 prepedtest_t f;
11220 char *text;
11221 } PREPED_S;
11225 * gf_prepend_editorial - accumulate filtered text and prepend its
11226 * output with given text
11230 void
11231 gf_prepend_editorial(FILTER_S *f, int flg)
11233 GF_INIT(f, f->next);
11235 if(flg == GF_DATA){
11236 register unsigned char c;
11238 while(GF_GETC(f, c)){
11239 so_writec(c, (STORE_S *) f->data);
11242 GF_END(f, f->next);
11244 else if(flg == GF_EOD){
11245 unsigned char c;
11247 if(!((PREPED_S *)(f)->opt)->f || (*((PREPED_S *)(f)->opt)->f)()){
11248 char *p = ((PREPED_S *)(f)->opt)->text;
11250 for( ; p && *p; p++)
11251 GF_PUTC(f->next, *p);
11254 so_seek((STORE_S *) f->data, 0L, 0);
11255 while(so_readc(&c, (STORE_S *) f->data)){
11256 GF_PUTC(f->next, c);
11259 so_give((STORE_S **) &f->data);
11260 fs_give((void **) &f->opt);
11261 (void) GF_FLUSH(f->next);
11262 (*f->next->f)(f->next, GF_EOD);
11264 else if(flg == GF_RESET){
11265 dprint((9, "-- gf_reset line_test\n"));
11266 f->data = (void *) so_get(CharStar, NULL, EDIT_ACCESS);
11272 * function called from the outside to setup prepending editorial
11273 * to output text
11275 void *
11276 gf_prepend_editorial_opt(prepedtest_t test_f, char *text)
11278 PREPED_S *pep;
11280 pep = (PREPED_S *) fs_get(sizeof(PREPED_S));
11281 memset(pep, 0, sizeof(PREPED_S));
11282 pep->f = test_f;
11283 pep->text = text;
11284 return((void *) pep);
11289 * Network virtual terminal to local newline convention filter
11291 void
11292 gf_nvtnl_local(FILTER_S *f, int flg)
11294 GF_INIT(f, f->next);
11296 if(flg == GF_DATA){
11297 register unsigned char c;
11298 register int state = f->f1;
11300 while(GF_GETC(f, c)){
11301 if(state){
11302 state = 0;
11303 if(c == '\012'){
11304 GF_PUTC(f->next, '\012');
11305 continue;
11307 else
11308 GF_PUTC(f->next, '\015');
11309 /* fall thru to deal with 'c' */
11312 if(c == '\015')
11313 state = 1;
11314 else
11315 GF_PUTC(f->next, c);
11318 f->f1 = state;
11319 GF_END(f, f->next);
11321 else if(flg == GF_EOD){
11322 (void) GF_FLUSH(f->next);
11323 (*f->next->f)(f->next, GF_EOD);
11325 else if(flg == GF_RESET){
11326 dprint((9, "-- gf_reset nvtnl_local\n"));
11327 f->f1 = 0;
11333 * local to network newline convention filter
11335 void
11336 gf_local_nvtnl(FILTER_S *f, int flg)
11338 GF_INIT(f, f->next);
11340 if(flg == GF_DATA){
11341 register unsigned char c;
11343 while(GF_GETC(f, c)){
11344 if(c == '\012'){
11345 GF_PUTC(f->next, '\015');
11346 GF_PUTC(f->next, '\012');
11348 else if(c != '\015') /* do not copy isolated \015 into source */
11349 GF_PUTC(f->next, c);
11352 GF_END(f, f->next);
11354 else if(flg == GF_EOD){
11355 (void) GF_FLUSH(f->next);
11356 (*f->next->f)(f->next, GF_EOD);
11358 else if(GF_RESET){
11359 dprint((9, "-- gf_reset local_nvtnl\n"));
11360 /* no op */