8 #define _GNU_SOURCE /* strcasestr() */
18 #include "bfu/listmenu.h"
20 #include "bookmarks/bookmarks.h"
21 #include "config/options.h"
22 #include "config/kbdbind.h"
23 #include "document/html/frames.h"
24 #include "document/html/parser/link.h"
25 #include "document/html/parser/parse.h"
26 #include "document/html/parser/stack.h"
27 #include "document/html/parser.h"
28 #include "document/html/renderer.h"
29 #include "globhist/globhist.h"
30 #include "mime/mime.h"
31 #include "protocol/uri.h"
32 #include "util/conv.h"
33 #include "util/error.h"
34 #include "util/memdebug.h"
35 #include "util/memory.h"
36 #include "util/string.h"
39 #include "document/html/internal.h"
43 html_a(struct html_context
*html_context
, unsigned char *a
,
44 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
48 href
= get_url_val(a
, "href", html_context
->options
->cp
);
50 unsigned char *target
;
52 mem_free_set(&format
.link
,
53 join_urls(html_context
->base_href
,
54 trim_chars(href
, ' ', 0)));
58 target
= get_target(html_context
->options
, a
);
60 mem_free_set(&format
.target
, target
);
62 mem_free_set(&format
.target
, stracpy(html_context
->base_target
));
66 ; /* Shut up compiler */
67 #ifdef CONFIG_GLOBHIST
68 } else if (get_global_history_item(format
.link
)) {
69 format
.style
.fg
= format
.vlink
;
70 html_top
->pseudo_class
&= ~ELEMENT_LINK
;
71 html_top
->pseudo_class
|= ELEMENT_VISITED
;
73 #ifdef CONFIG_BOOKMARKS
74 } else if (get_bookmark(format
.link
)) {
75 format
.style
.fg
= format
.bookmark_link
;
76 html_top
->pseudo_class
&= ~ELEMENT_VISITED
;
77 /* XXX: Really set ELEMENT_LINK? --pasky */
78 html_top
->pseudo_class
|= ELEMENT_LINK
;
81 format
.style
.fg
= format
.clink
;
82 html_top
->pseudo_class
&= ~ELEMENT_VISITED
;
83 html_top
->pseudo_class
|= ELEMENT_LINK
;
86 mem_free_set(&format
.title
,
87 get_attr_val(a
, "title", html_context
->options
->cp
));
89 html_focusable(html_context
, a
);
92 pop_html_element(html_context
);
95 set_fragment_identifier(html_context
, a
, "name");
98 /* Returns an allocated string made after @label
99 * but limited to @max_len length, by truncating
100 * the middle of @label string, which is replaced
101 * by an asterisk ('*').
102 * If @max_len < 0 it returns NULL.
103 * If @max_len == 0 it returns an unmodified copy
105 * In either case, it may return NULL if a memory
106 * allocation failure occurs.
108 * truncate_label("some_string", 5) => "so*ng" */
109 static unsigned char *
110 truncate_label(unsigned char *label
, int max_len
)
112 unsigned char *new_label
;
113 int len
= strlen(label
);
117 if (max_len
< 0) return NULL
;
118 if (max_len
== 0 || len
<= max_len
)
119 return stracpy(label
);
121 right_part_len
= left_part_len
= max_len
/ 2;
123 if (left_part_len
+ right_part_len
+ 1 > max_len
)
126 new_label
= mem_alloc(max_len
+ 1);
127 if (!new_label
) return NULL
;
130 memcpy(new_label
, label
, left_part_len
);
132 new_label
[left_part_len
] = '*';
135 memcpy(new_label
+ left_part_len
+ 1,
136 label
+ len
- right_part_len
, right_part_len
);
138 new_label
[max_len
] = '\0';
143 /* Get image filename from its src attribute. */
144 static unsigned char *
145 get_image_filename_from_src(int max_len
, unsigned char *src
)
147 unsigned char *text
= NULL
;
148 unsigned char *start
, *filename
;
151 if (!src
) return NULL
;
152 /* We can display image as [foo.gif]. */
154 len
= strcspn(src
, "?");
156 for (start
= src
+ len
; start
> src
; start
--)
157 if (dir_sep(start
[-1])) {
163 filename
= memacpy(start
, len
);
165 /* XXX: Due to a compatibility alias (added: 2004-12-15 in
166 * 0.10pre3.CVS for document.browse.images.file_tags) this can
167 * return a negative @max_len. */
168 text
= truncate_label(filename
, max_len
);
176 /* Returns an allocated string containing formatted @label. */
177 static unsigned char *
178 get_image_label(int max_len
, unsigned char *label
)
180 unsigned char *formatted_label
;
182 if (!label
) return NULL
;
184 formatted_label
= truncate_label(label
, max_len
);
187 return formatted_label
;
191 put_image_label(unsigned char *a
, unsigned char *label
,
192 struct html_context
*html_context
)
196 /* This is not 100% appropriate for <img>, but well, accepting
197 * accesskey and tabindex near <img> is just our little
198 * extension to the standard. After all, it makes sense. */
199 html_focusable(html_context
, a
);
201 fg
= format
.style
.fg
;
202 format
.style
.fg
= format
.image_link
;
203 put_chrs(html_context
, label
, strlen(label
));
204 format
.style
.fg
= fg
;
208 html_img_do(unsigned char *a
, unsigned char *object_src
,
209 struct html_context
*html_context
)
211 int ismap
, usemap
= 0;
212 int add_brackets
= 0;
213 unsigned char *src
= NULL
;
214 unsigned char *label
= NULL
;
215 unsigned char *usemap_attr
;
216 struct document_options
*options
= html_context
->options
;
217 int display_style
= options
->image_link
.display_style
;
218 int cp
= html_context
->part
->document
->cp
;
220 /* Note about display_style:
221 * 0 means always display IMG
222 * 1 means always display filename
223 * 2 means display alt/title attribute if possible, IMG if not
224 * 3 means display alt/title attribute if possible, filename if not */
226 usemap_attr
= get_attr_val(a
, "usemap", options
->cp
);
228 unsigned char *joined_urls
= join_urls(html_context
->base_href
,
230 unsigned char *map_url
;
232 mem_free(usemap_attr
);
233 if (!joined_urls
) return;
234 map_url
= straconcat("MAP@", joined_urls
, NULL
);
235 mem_free(joined_urls
);
236 if (!map_url
) return;
238 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
239 mem_free_set(&format
.link
, map_url
);
241 format
.style
.attr
|= AT_BOLD
;
246 && has_attr(a
, "ismap", options
->cp
)
249 if (display_style
== 2 || display_style
== 3) {
250 label
= get_attr_val(a
, "alt", cp
);
252 label
= get_attr_val(a
, "title", options
->cp
);
254 /* Little hack to preserve rendering of [ ], in directories listing,
255 * but we still want to drop extra spaces in alt or title attribute
256 * to limit display width on certain websites. --Zas */
257 if (label
&& strlen(label
) > 5) clr_spaces(label
);
260 src
= null_or_stracpy(object_src
);
261 if (!src
) src
= get_url_val(a
, "src", options
->cp
);
262 if (!src
) src
= get_url_val(a
, "dynsrc", options
->cp
);
264 /* If we have no label yet (no title or alt), so
265 * just use default ones, or image filename. */
266 if (!label
|| !*label
) {
267 mem_free_set(&label
, NULL
);
268 /* Do we want to display images with no alt/title and with no
270 * If not, just exit now. */
271 if (!options
->images
&& !format
.link
) {
273 if (usemap
) pop_html_element(html_context
);
280 label
= stracpy("USEMAP");
282 label
= stracpy("ISMAP");
284 if (display_style
== 3)
285 label
= get_image_filename_from_src(options
->image_link
.filename_maxlen
, src
);
289 label
= get_image_label(options
->image_link
.label_maxlen
, label
);
292 if (!label
|| !*label
) {
293 mem_free_set(&label
, NULL
);
295 if (display_style
== 1)
296 label
= get_image_filename_from_src(options
->image_link
.filename_maxlen
, src
);
297 if (!label
|| !*label
)
298 mem_free_set(&label
, stracpy("IMG"));
301 mem_free_set(&format
.image
, NULL
);
302 mem_free_set(&format
.title
, NULL
);
305 int img_link_tag
= options
->image_link
.tagging
;
307 if (img_link_tag
&& (img_link_tag
== 2 || add_brackets
)) {
308 unsigned char *img_link_prefix
= options
->image_link
.prefix
;
309 unsigned char *img_link_suffix
= options
->image_link
.suffix
;
310 unsigned char *new_label
= straconcat(img_link_prefix
, label
, img_link_suffix
, NULL
);
312 if (new_label
) mem_free_set(&label
, new_label
);
315 if (!options
->image_link
.show_any_as_links
) {
316 put_image_label(a
, label
, html_context
);
320 format
.image
= join_urls(html_context
->base_href
, src
);
323 format
.title
= get_attr_val(a
, "title", options
->cp
);
326 unsigned char *new_link
;
328 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
329 new_link
= straconcat(format
.link
, "?0,0", NULL
);
331 mem_free_set(&format
.link
, new_link
);
334 put_image_label(a
, label
, html_context
);
336 if (ismap
) pop_html_element(html_context
);
337 mem_free_set(&format
.image
, NULL
);
338 mem_free_set(&format
.title
, NULL
);
345 if (usemap
) pop_html_element(html_context
);
349 html_img(struct html_context
*html_context
, unsigned char *a
,
350 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
352 html_img_do(a
, NULL
, html_context
);
356 put_link_line(unsigned char *prefix
, unsigned char *linkname
,
357 unsigned char *link
, unsigned char *target
,
358 struct html_context
*html_context
)
360 html_context
->has_link_lines
= 1;
361 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
362 ln_break(html_context
, 1);
363 mem_free_set(&format
.link
, NULL
);
364 mem_free_set(&format
.target
, NULL
);
365 mem_free_set(&format
.title
, NULL
);
367 put_chrs(html_context
, prefix
, strlen(prefix
));
368 format
.link
= join_urls(html_context
->base_href
, link
);
369 format
.target
= stracpy(target
);
370 format
.style
.fg
= format
.clink
;
371 put_chrs(html_context
, linkname
, strlen(linkname
));
372 ln_break(html_context
, 1);
373 pop_html_element(html_context
);
378 html_applet(struct html_context
*html_context
, unsigned char *a
,
379 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
381 unsigned char *code
, *alt
;
383 code
= get_url_val(a
, "code", html_context
->options
->cp
);
386 alt
= get_attr_val(a
, "alt", html_context
->options
->cp
);
388 html_focusable(html_context
, a
);
391 put_link_line("Applet: ", alt
, code
,
392 html_context
->options
->framename
, html_context
);
394 put_link_line("", "Applet", code
,
395 html_context
->options
->framename
, html_context
);
403 html_iframe_do(unsigned char *a
, unsigned char *object_src
,
404 struct html_context
*html_context
)
406 unsigned char *name
, *url
= NULL
;
408 url
= null_or_stracpy(object_src
);
409 if (!url
) url
= get_url_val(a
, "src", html_context
->options
->cp
);
412 name
= get_attr_val(a
, "name", html_context
->options
->cp
);
413 if (!name
) name
= get_attr_val(a
, "id", html_context
->options
->cp
);
414 if (!name
) name
= stracpy("");
420 html_focusable(html_context
, a
);
423 put_link_line("IFrame: ", name
, url
,
424 html_context
->options
->framename
, html_context
);
426 put_link_line("", "IFrame", url
,
427 html_context
->options
->framename
, html_context
);
435 html_iframe(struct html_context
*html_context
, unsigned char *a
,
436 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
438 html_iframe_do(a
, NULL
, html_context
);
442 html_object(struct html_context
*html_context
, unsigned char *a
,
443 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
445 unsigned char *type
, *url
;
447 /* This is just some dirty wrapper. We emulate various things through
448 * this, which is anyway in the spirit of <object> element, unifying
449 * <img> and <iframe> etc. */
451 url
= get_url_val(a
, "data", html_context
->options
->cp
);
452 if (!url
) url
= get_url_val(a
, "codebase", html_context
->options
->cp
);
455 type
= get_attr_val(a
, "type", html_context
->options
->cp
);
456 if (!type
) { mem_free(url
); return; }
458 if (!strncasecmp(type
, "text/", 5)) {
459 /* We will just emulate <iframe>. */
460 html_iframe_do(a
, url
, html_context
);
461 html_skip(html_context
, a
);
463 } else if (!strncasecmp(type
, "image/", 6)) {
464 /* <img> emulation. */
465 /* TODO: Use the enclosed text as 'alt' attribute. */
466 html_img_do(a
, url
, html_context
);
470 name
= get_attr_val(a
, "standby", html_context
->options
->cp
);
472 html_focusable(html_context
, a
);
475 put_link_line("Object: ", name
, url
,
476 html_context
->options
->framename
,
479 put_link_line("Object: ", type
, url
,
480 html_context
->options
->framename
,
492 html_embed(struct html_context
*html_context
, unsigned char *a
,
493 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
495 unsigned char *type
, *extension
;
496 unsigned char *object_src
;
498 /* This is just some dirty wrapper. We emulate various things through
499 * this, which is anyway in the spirit of <object> element, unifying
500 * <img> and <iframe> etc. */
502 object_src
= get_url_val(a
, "src", html_context
->options
->cp
);
503 if (!object_src
|| !*object_src
) {
504 mem_free_set(&object_src
, NULL
);
508 /* If there is no extension we want to get the default mime/type
510 extension
= strrchr(object_src
, '.');
511 if (!extension
) extension
= object_src
;
513 type
= get_extension_content_type(extension
);
514 if (type
&& !strncasecmp(type
, "image/", 6)) {
515 html_img_do(a
, object_src
, html_context
);
517 /* We will just emulate <iframe>. */
518 html_iframe_do(a
, object_src
, html_context
);
522 mem_free_set(&object_src
, NULL
);
530 Designates substitute versions for the document in which the link
531 occurs. When used together with the lang attribute, it implies a
532 translated version of the document. When used together with the
533 media attribute, it implies a version designed for a different
537 Refers to an external style sheet. See the section on external style
538 sheets for details. This is used together with the link type
539 "Alternate" for user-selectable alternate style sheets.
542 Refers to the first document in a collection of documents. This link
543 type tells search engines which document is considered by the author
544 to be the starting point of the collection.
547 Refers to the next document in a linear sequence of documents. User
548 agents may choose to preload the "next" document, to reduce the
552 Refers to the previous document in an ordered series of documents.
553 Some user agents also support the synonym "Previous".
556 Refers to a document serving as a table of contents.
557 Some user agents also support the synonym ToC (from "Table of Contents").
560 Refers to a document providing an index for the current document.
563 Refers to a document providing a glossary of terms that pertain to the
567 Refers to a copyright statement for the current document.
570 Refers to a document serving as a chapter in a collection of documents.
573 Refers to a document serving as a section in a collection of documents.
576 Refers to a document serving as a subsection in a collection of
580 Refers to a document serving as an appendix in a collection of
584 Refers to a document offering help (more information, links to other
585 sources information, etc.)
588 Refers to a bookmark. A bookmark is a link to a key entry point
589 within an extended document. The title attribute may be used, for
590 example, to label the bookmark. Note that several bookmarks may be
591 defined in each document.
593 Some more were added, like top. --Zas */
617 LT_ALTERNATE_STYLESHEET
,
621 enum hlink_direction
{
628 enum hlink_type type
;
629 enum hlink_direction direction
;
630 unsigned char *content_type
;
631 unsigned char *media
;
633 unsigned char *hreflang
;
634 unsigned char *title
;
637 /* Not implemented yet.
638 unsigned char *charset;
639 unsigned char *target;
641 unsigned char *class;
646 struct lt_default_name
{
647 enum hlink_type type
;
652 /* XXX: Keep the (really really ;) default name first */
653 static struct lt_default_name lt_names
[] = {
654 { LT_START
, "start" },
656 { LT_START
, "home" },
657 { LT_PARENT
, "parent" },
660 { LT_PREV
, "previous" },
662 { LT_CONTENTS
, "contents" },
663 { LT_CONTENTS
, "toc" },
664 { LT_INDEX
, "index" },
665 { LT_GLOSSARY
, "glossary" },
666 { LT_CHAPTER
, "chapter" },
667 { LT_SECTION
, "section" },
668 { LT_SUBSECTION
, "subsection" },
669 { LT_SUBSECTION
, "child" },
670 { LT_SUBSECTION
, "sibling" },
671 { LT_APPENDIX
, "appendix" },
673 { LT_SEARCH
, "search" },
674 { LT_BOOKMARK
, "bookmark" },
675 { LT_ALTERNATE_LANG
, "alt. language" },
676 { LT_ALTERNATE_MEDIA
, "alt. media" },
677 { LT_ALTERNATE_STYLESHEET
, "alt. stylesheet" },
678 { LT_STYLESHEET
, "stylesheet" },
679 { LT_ALTERNATE
, "alternate" },
680 { LT_COPYRIGHT
, "copyright" },
681 { LT_AUTHOR
, "author" },
682 { LT_AUTHOR
, "made" },
683 { LT_AUTHOR
, "owner" },
688 /* Search for default name for this link according to its type. */
689 static unsigned char *
690 get_lt_default_name(struct hlink
*link
)
692 struct lt_default_name
*entry
= lt_names
;
696 while (entry
&& entry
->str
) {
697 if (entry
->type
== link
->type
) return entry
->str
;
705 html_link_clear(struct hlink
*link
)
709 mem_free_if(link
->content_type
);
710 mem_free_if(link
->media
);
711 mem_free_if(link
->href
);
712 mem_free_if(link
->hreflang
);
713 mem_free_if(link
->title
);
714 mem_free_if(link
->lang
);
715 mem_free_if(link
->name
);
717 memset(link
, 0, sizeof(*link
));
720 /* Parse a link and return results in @link.
721 * It tries to identify known types. */
723 html_link_parse(struct html_context
*html_context
, unsigned char *a
,
729 memset(link
, 0, sizeof(*link
));
731 link
->href
= get_url_val(a
, "href", html_context
->options
->cp
);
732 if (!link
->href
) return 0;
734 link
->lang
= get_attr_val(a
, "lang", html_context
->options
->cp
);
735 link
->hreflang
= get_attr_val(a
, "hreflang", html_context
->options
->cp
);
736 link
->title
= get_attr_val(a
, "title", html_context
->options
->cp
);
737 link
->content_type
= get_attr_val(a
, "type", html_context
->options
->cp
);
738 link
->media
= get_attr_val(a
, "media", html_context
->options
->cp
);
740 link
->name
= get_attr_val(a
, "rel", html_context
->options
->cp
);
742 link
->direction
= LD_REL
;
744 link
->name
= get_attr_val(a
, "rev", html_context
->options
->cp
);
745 if (link
->name
) link
->direction
= LD_REV
;
748 if (!link
->name
) return 1;
751 for (i
= 0; lt_names
[i
].str
; i
++)
752 if (!strcasecmp(link
->name
, lt_names
[i
].str
)) {
753 link
->type
= lt_names
[i
].type
;
757 if (strcasestr(link
->name
, "icon") ||
758 (link
->content_type
&& strcasestr(link
->content_type
, "icon"))) {
759 link
->type
= LT_ICON
;
761 } else if (strcasestr(link
->name
, "alternate")) {
762 link
->type
= LT_ALTERNATE
;
764 link
->type
= LT_ALTERNATE_LANG
;
765 else if (strcasestr(link
->name
, "stylesheet") ||
766 (link
->content_type
&& strcasestr(link
->content_type
, "css")))
767 link
->type
= LT_ALTERNATE_STYLESHEET
;
768 else if (link
->media
)
769 link
->type
= LT_ALTERNATE_MEDIA
;
771 } else if (link
->content_type
&& strcasestr(link
->content_type
, "css")) {
772 link
->type
= LT_STYLESHEET
;
779 html_link(struct html_context
*html_context
, unsigned char *a
,
780 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
782 int link_display
= html_context
->options
->meta_link_display
;
786 int name_neq_title
= 0;
790 if (!link_display
) return;
792 if (!html_link_parse(html_context
, a
, &link
)) return;
793 if (!link
.href
) goto free_and_return
;
796 if (link
.type
== LT_STYLESHEET
) {
797 int len
= strlen(link
.href
);
799 import_css_stylesheet(&html_context
->css_styles
,
800 html_context
->base_href
, link
.href
, len
);
803 if (!link_display
) goto free_and_return
;
806 /* Ignore few annoying links.. */
807 if (link_display
< 5 &&
808 (link
.type
== LT_ICON
||
809 link
.type
== LT_AUTHOR
||
810 link
.type
== LT_STYLESHEET
||
811 link
.type
== LT_ALTERNATE_STYLESHEET
)) goto free_and_return
;
813 if (!link
.name
|| link
.type
!= LT_UNKNOWN
)
814 /* Give preference to our default names for known types. */
815 name
= get_lt_default_name(&link
);
819 if (!name
) goto free_and_return
;
820 if (!init_string(&text
)) goto free_and_return
;
822 html_focusable(html_context
, a
);
825 add_to_string(&text
, link
.title
);
826 name_neq_title
= strcmp(link
.title
, name
);
828 add_to_string(&text
, name
);
830 if (link_display
== 1) goto put_link_line
; /* Only title */
832 #define APPEND(what) do { \
833 add_to_string(&text, first ? " (" : ", "); \
834 add_to_string(&text, (what)); \
838 if (name_neq_title
) {
842 if (link_display
>= 3 && link
.hreflang
) {
843 APPEND(link
.hreflang
);
846 if (link_display
>= 4 && link
.content_type
) {
847 APPEND(link
.content_type
);
850 if (link
.lang
&& link
.type
== LT_ALTERNATE_LANG
&&
851 (link_display
< 3 || (link
.hreflang
&&
852 strcasecmp(link
.hreflang
, link
.lang
)))) {
862 if (!first
) add_char_to_string(&text
, ')');
866 unsigned char *prefix
= (link
.direction
== LD_REL
)
867 ? "Link: " : "Reverse link: ";
868 unsigned char *link_name
= (text
.length
)
869 ? text
.source
: name
;
871 put_link_line(prefix
, link_name
, link
.href
,
872 html_context
->base_target
, html_context
);
874 if (text
.source
) done_string(&text
);
878 html_link_clear(&link
);