8 #define _GNU_SOURCE /* strcasestr() */
11 #include <libgen.h> /* basename() */
20 #include "bfu/listmenu.h"
22 #include "bookmarks/bookmarks.h"
23 #include "config/options.h"
24 #include "config/kbdbind.h"
25 #include "document/html/frames.h"
26 #include "document/html/parser/link.h"
27 #include "document/html/parser/parse.h"
28 #include "document/html/parser/stack.h"
29 #include "document/html/parser.h"
30 #include "document/html/renderer.h"
31 #include "globhist/globhist.h"
32 #include "mime/mime.h"
33 #include "protocol/uri.h"
34 #include "util/conv.h"
35 #include "util/error.h"
36 #include "util/memdebug.h"
37 #include "util/memory.h"
38 #include "util/string.h"
41 #include "document/html/internal.h"
45 html_a(struct html_context
*html_context
, unsigned char *a
,
46 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
50 href
= get_url_val(a
, "href", html_context
->doc_cp
);
52 unsigned char *target
;
54 mem_free_set(&format
.link
,
55 join_urls(html_context
->base_href
,
56 trim_chars(href
, ' ', 0)));
60 target
= get_target(html_context
->options
, a
);
62 mem_free_set(&format
.target
, target
);
64 mem_free_set(&format
.target
, stracpy(html_context
->base_target
));
68 ; /* Shut up compiler */
69 #ifdef CONFIG_GLOBHIST
70 } else if (get_global_history_item(format
.link
)) {
71 format
.style
.color
.foreground
= format
.color
.vlink
;
72 html_top
->pseudo_class
&= ~ELEMENT_LINK
;
73 html_top
->pseudo_class
|= ELEMENT_VISITED
;
75 #ifdef CONFIG_BOOKMARKS
76 } else if (get_bookmark(format
.link
)) {
77 format
.style
.color
.foreground
= format
.color
.bookmark_link
;
78 html_top
->pseudo_class
&= ~ELEMENT_VISITED
;
79 /* XXX: Really set ELEMENT_LINK? --pasky */
80 html_top
->pseudo_class
|= ELEMENT_LINK
;
83 format
.style
.color
.foreground
= format
.color
.clink
;
84 html_top
->pseudo_class
&= ~ELEMENT_VISITED
;
85 html_top
->pseudo_class
|= ELEMENT_LINK
;
88 mem_free_set(&format
.title
,
89 get_attr_val(a
, "title", html_context
->doc_cp
));
91 html_focusable(html_context
, a
);
94 pop_html_element(html_context
);
97 set_fragment_identifier(html_context
, a
, "name");
100 /* Returns an allocated string made after @label
101 * but limited to @max_len length, by truncating
102 * the middle of @label string, which is replaced
103 * by an asterisk ('*').
104 * If @max_len < 0 it returns NULL.
105 * If @max_len == 0 it returns an unmodified copy
107 * In either case, it may return NULL if a memory
108 * allocation failure occurs.
110 * truncate_label("some_string", 5) => "so*ng" */
111 static unsigned char *
112 truncate_label(unsigned char *label
, int max_len
)
114 unsigned char *new_label
;
115 int len
= strlen(label
);
119 if (max_len
< 0) return NULL
;
120 if (max_len
== 0 || len
<= max_len
)
121 return stracpy(label
);
123 right_part_len
= left_part_len
= max_len
/ 2;
125 if (left_part_len
+ right_part_len
+ 1 > max_len
)
128 new_label
= mem_alloc(max_len
+ 1);
129 if (!new_label
) return NULL
;
132 memcpy(new_label
, label
, left_part_len
);
134 new_label
[left_part_len
] = '*';
137 memcpy(new_label
+ left_part_len
+ 1,
138 label
+ len
- right_part_len
, right_part_len
);
140 new_label
[max_len
] = '\0';
145 /* Get image filename from its src attribute. */
146 static unsigned char *
147 get_image_filename_from_src(int max_len
, unsigned char *src
)
149 unsigned char *text
= NULL
;
150 unsigned char *start
, *filename
;
153 if (!src
) return NULL
;
154 /* We can display image as [foo.gif]. */
156 len
= strcspn(src
, "?");
158 for (start
= src
+ len
; start
> src
; start
--)
159 if (dir_sep(start
[-1])) {
165 filename
= memacpy(start
, len
);
167 /* XXX: Due to a compatibility alias (added: 2004-12-15 in
168 * 0.10pre3.CVS for document.browse.images.file_tags) this can
169 * return a negative @max_len. */
170 text
= truncate_label(filename
, max_len
);
178 /* Returns an allocated string containing formatted @label. */
179 static unsigned char *
180 get_image_label(int max_len
, unsigned char *label
)
182 unsigned char *formatted_label
;
184 if (!label
) return NULL
;
186 formatted_label
= truncate_label(label
, max_len
);
189 return formatted_label
;
193 put_image_label(unsigned char *a
, unsigned char *label
,
194 struct html_context
*html_context
)
196 color_T saved_foreground
;
197 enum text_style_format saved_attr
;
199 /* This is not 100% appropriate for <img>, but well, accepting
200 * accesskey and tabindex near <img> is just our little
201 * extension to the standard. After all, it makes sense. */
202 html_focusable(html_context
, a
);
204 saved_foreground
= format
.style
.color
.foreground
;
205 saved_attr
= format
.style
.attr
;
206 format
.style
.color
.foreground
= format
.color
.image_link
;
207 format
.style
.attr
|= AT_NO_ENTITIES
;
208 put_chrs(html_context
, label
, strlen(label
));
209 format
.style
.color
.foreground
= saved_foreground
;
210 format
.style
.attr
= saved_attr
;
214 html_img_do(unsigned char *a
, unsigned char *object_src
,
215 struct html_context
*html_context
)
217 int ismap
, usemap
= 0;
218 int add_brackets
= 0;
219 unsigned char *src
= NULL
;
220 unsigned char *label
= NULL
;
221 unsigned char *usemap_attr
;
222 struct document_options
*options
= html_context
->options
;
223 int display_style
= options
->image_link
.display_style
;
225 /* Note about display_style:
226 * 0 means always display IMG
227 * 1 means always display filename
228 * 2 means display alt/title attribute if possible, IMG if not
229 * 3 means display alt/title attribute if possible, filename if not */
231 usemap_attr
= get_attr_val(a
, "usemap", html_context
->doc_cp
);
233 unsigned char *joined_urls
= join_urls(html_context
->base_href
,
235 unsigned char *map_url
;
237 mem_free(usemap_attr
);
238 if (!joined_urls
) return;
239 map_url
= straconcat("MAP@", joined_urls
,
240 (unsigned char *) NULL
);
241 mem_free(joined_urls
);
242 if (!map_url
) return;
244 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
245 mem_free_set(&format
.link
, map_url
);
247 format
.style
.attr
|= AT_BOLD
;
252 && has_attr(a
, "ismap", html_context
->doc_cp
)
255 if (display_style
== 2 || display_style
== 3) {
256 label
= get_attr_val(a
, "alt", html_context
->doc_cp
);
258 label
= get_attr_val(a
, "title", html_context
->doc_cp
);
260 /* Little hack to preserve rendering of [ ], in directory listings,
261 * but we still want to drop extra spaces in alt or title attribute
262 * to limit display width on certain websites. --Zas */
263 if (label
&& strlen(label
) > 5) clr_spaces(label
);
266 src
= null_or_stracpy(object_src
);
267 if (!src
) src
= get_url_val(a
, "src", html_context
->doc_cp
);
268 if (!src
) src
= get_url_val(a
, "dynsrc", html_context
->doc_cp
);
270 /* If we have no label yet (no title or alt), so
271 * just use default ones, or image filename. */
272 if (!label
|| !*label
) {
273 mem_free_set(&label
, NULL
);
274 /* Do we want to display images with no alt/title and with no
276 * If not, just exit now. */
277 if (!options
->images
&& !format
.link
) {
279 if (usemap
) pop_html_element(html_context
);
286 label
= stracpy("USEMAP");
288 label
= stracpy("ISMAP");
290 if (display_style
== 3)
291 label
= get_image_filename_from_src(options
->image_link
.filename_maxlen
, src
);
295 label
= get_image_label(options
->image_link
.label_maxlen
, label
);
298 if (!label
|| !*label
) {
299 mem_free_set(&label
, NULL
);
301 if (display_style
== 1)
302 label
= get_image_filename_from_src(options
->image_link
.filename_maxlen
, src
);
303 if (!label
|| !*label
)
304 mem_free_set(&label
, stracpy("IMG"));
307 mem_free_set(&format
.image
, NULL
);
308 mem_free_set(&format
.title
, NULL
);
311 int img_link_tag
= options
->image_link
.tagging
;
313 if (img_link_tag
&& (img_link_tag
== 2 || add_brackets
)) {
314 unsigned char *img_link_prefix
= options
->image_link
.prefix
;
315 unsigned char *img_link_suffix
= options
->image_link
.suffix
;
316 unsigned char *new_label
= straconcat(img_link_prefix
, label
, img_link_suffix
, (unsigned char *) NULL
);
318 if (new_label
) mem_free_set(&label
, new_label
);
321 if (!options
->image_link
.show_any_as_links
) {
322 put_image_label(a
, label
, html_context
);
326 format
.image
= join_urls(html_context
->base_href
, src
);
329 format
.title
= get_attr_val(a
, "title", html_context
->doc_cp
);
332 unsigned char *new_link
;
334 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
335 new_link
= straconcat(format
.link
, "?0,0", (unsigned char *) NULL
);
337 mem_free_set(&format
.link
, new_link
);
340 put_image_label(a
, label
, html_context
);
342 if (ismap
) pop_html_element(html_context
);
343 mem_free_set(&format
.image
, NULL
);
344 mem_free_set(&format
.title
, NULL
);
351 if (usemap
) pop_html_element(html_context
);
355 html_img(struct html_context
*html_context
, unsigned char *a
,
356 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
358 html_img_do(a
, NULL
, html_context
);
361 /* prefix can have entities in it, but linkname cannot. */
363 put_link_line(unsigned char *prefix
, unsigned char *linkname
,
364 unsigned char *link
, unsigned char *target
,
365 struct html_context
*html_context
)
367 html_context
->has_link_lines
= 1;
368 html_stack_dup(html_context
, ELEMENT_KILLABLE
);
369 ln_break(html_context
, 1);
370 mem_free_set(&format
.link
, NULL
);
371 mem_free_set(&format
.target
, NULL
);
372 mem_free_set(&format
.title
, NULL
);
374 put_chrs(html_context
, prefix
, strlen(prefix
));
375 format
.link
= join_urls(html_context
->base_href
, link
);
376 format
.target
= stracpy(target
);
377 format
.style
.color
.foreground
= format
.color
.clink
;
378 /* linkname typically comes from get_attr_val, which
379 * has already expanded character entity references.
380 * Tell put_chrs not to expand them again. */
381 format
.style
.attr
|= AT_NO_ENTITIES
;
382 put_chrs(html_context
, linkname
, strlen(linkname
));
383 ln_break(html_context
, 1);
384 pop_html_element(html_context
);
389 html_applet(struct html_context
*html_context
, unsigned char *a
,
390 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
392 unsigned char *code
, *alt
;
394 code
= get_url_val(a
, "code", html_context
->doc_cp
);
397 alt
= get_attr_val(a
, "alt", html_context
->doc_cp
);
399 html_focusable(html_context
, a
);
402 put_link_line("Applet: ", alt
, code
,
403 html_context
->options
->framename
, html_context
);
405 put_link_line("", "Applet", code
,
406 html_context
->options
->framename
, html_context
);
414 html_audio(struct html_context
*html_context
, unsigned char *a
,
415 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
419 /* This just places a link where a audio element would be. */
421 url
= get_url_val(a
, "src", html_context
->doc_cp
);
424 html_focusable(html_context
, a
);
426 put_link_line("Audio: ", basename(url
), url
,
427 html_context
->options
->framename
, html_context
);
429 html_skip(html_context
, a
);
435 html_iframe_do(unsigned char *a
, unsigned char *object_src
,
436 struct html_context
*html_context
)
438 unsigned char *name
, *url
= NULL
;
440 url
= null_or_stracpy(object_src
);
441 if (!url
) url
= get_url_val(a
, "src", html_context
->doc_cp
);
444 name
= get_attr_val(a
, "name", html_context
->doc_cp
);
445 if (!name
) name
= get_attr_val(a
, "id", html_context
->doc_cp
);
446 if (!name
) name
= stracpy("");
452 html_focusable(html_context
, a
);
455 put_link_line("IFrame: ", name
, url
,
456 html_context
->options
->framename
, html_context
);
458 put_link_line("", "IFrame", url
,
459 html_context
->options
->framename
, html_context
);
467 html_iframe(struct html_context
*html_context
, unsigned char *a
,
468 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
470 html_iframe_do(a
, NULL
, html_context
);
474 html_object(struct html_context
*html_context
, unsigned char *a
,
475 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
477 unsigned char *type
, *url
;
479 /* This is just some dirty wrapper. We emulate various things through
480 * this, which is anyway in the spirit of <object> element, unifying
481 * <img> and <iframe> etc. */
483 url
= get_url_val(a
, "data", html_context
->doc_cp
);
484 if (!url
) url
= get_url_val(a
, "codebase", html_context
->doc_cp
);
487 type
= get_attr_val(a
, "type", html_context
->doc_cp
);
488 if (!type
) { mem_free(url
); return; }
490 if (!c_strncasecmp(type
, "text/", 5)) {
491 /* We will just emulate <iframe>. */
492 html_iframe_do(a
, url
, html_context
);
493 html_skip(html_context
, a
);
495 } else if (!c_strncasecmp(type
, "image/", 6)) {
496 /* <img> emulation. */
497 /* TODO: Use the enclosed text as 'alt' attribute. */
498 html_img_do(a
, url
, html_context
);
502 name
= get_attr_val(a
, "standby", html_context
->doc_cp
);
504 html_focusable(html_context
, a
);
507 put_link_line("Object: ", name
, url
,
508 html_context
->options
->framename
,
511 put_link_line("Object: ", type
, url
,
512 html_context
->options
->framename
,
524 html_embed(struct html_context
*html_context
, unsigned char *a
,
525 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
527 unsigned char *type
, *extension
;
528 unsigned char *object_src
;
530 /* This is just some dirty wrapper. We emulate various things through
531 * this, which is anyway in the spirit of <object> element, unifying
532 * <img> and <iframe> etc. */
534 object_src
= get_url_val(a
, "src", html_context
->doc_cp
);
535 if (!object_src
|| !*object_src
) {
536 mem_free_set(&object_src
, NULL
);
540 /* If there is no extension we want to get the default mime/type
542 extension
= strrchr(object_src
, '.');
543 if (!extension
) extension
= object_src
;
545 type
= get_extension_content_type(extension
);
546 if (type
&& !c_strncasecmp(type
, "image/", 6)) {
547 html_img_do(a
, object_src
, html_context
);
549 /* We will just emulate <iframe>. */
550 html_iframe_do(a
, object_src
, html_context
);
554 mem_free_set(&object_src
, NULL
);
558 html_video(struct html_context
*html_context
, unsigned char *a
,
559 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
563 /* This just places a link where a video element would be. */
565 url
= get_url_val(a
, "src", html_context
->doc_cp
);
568 html_focusable(html_context
, a
);
570 put_link_line("Video: ", basename(url
), url
,
571 html_context
->options
->framename
, html_context
);
573 html_skip(html_context
, a
);
581 Designates substitute versions for the document in which the link
582 occurs. When used together with the lang attribute, it implies a
583 translated version of the document. When used together with the
584 media attribute, it implies a version designed for a different
588 Refers to an external style sheet. See the section on external style
589 sheets for details. This is used together with the link type
590 "Alternate" for user-selectable alternate style sheets.
593 Refers to the first document in a collection of documents. This link
594 type tells search engines which document is considered by the author
595 to be the starting point of the collection.
598 Refers to the next document in a linear sequence of documents. User
599 agents may choose to preload the "next" document, to reduce the
603 Refers to the previous document in an ordered series of documents.
604 Some user agents also support the synonym "Previous".
607 Refers to a document serving as a table of contents.
608 Some user agents also support the synonym ToC (from "Table of Contents").
611 Refers to a document providing an index for the current document.
614 Refers to a document providing a glossary of terms that pertain to the
618 Refers to a copyright statement for the current document.
621 Refers to a document serving as a chapter in a collection of documents.
624 Refers to a document serving as a section in a collection of documents.
627 Refers to a document serving as a subsection in a collection of
631 Refers to a document serving as an appendix in a collection of
635 Refers to a document offering help (more information, links to other
636 sources information, etc.)
639 Refers to a bookmark. A bookmark is a link to a key entry point
640 within an extended document. The title attribute may be used, for
641 example, to label the bookmark. Note that several bookmarks may be
642 defined in each document.
644 Some more were added, like top. --Zas */
668 LT_ALTERNATE_STYLESHEET
,
672 enum hlink_direction
{
679 enum hlink_type type
;
680 enum hlink_direction direction
;
681 unsigned char *content_type
;
682 unsigned char *media
;
684 unsigned char *hreflang
;
685 unsigned char *title
;
688 /* Not implemented yet.
689 unsigned char *charset;
690 unsigned char *target;
692 unsigned char *class_;
697 struct lt_default_name
{
698 enum hlink_type type
;
703 /* XXX: Keep the (really really ;) default name first */
704 static struct lt_default_name lt_names
[] = {
705 { LT_START
, "start" },
707 { LT_START
, "home" },
708 { LT_PARENT
, "parent" },
711 { LT_PREV
, "previous" },
713 { LT_CONTENTS
, "contents" },
714 { LT_CONTENTS
, "toc" },
715 { LT_INDEX
, "index" },
716 { LT_GLOSSARY
, "glossary" },
717 { LT_CHAPTER
, "chapter" },
718 { LT_SECTION
, "section" },
719 { LT_SUBSECTION
, "subsection" },
720 { LT_SUBSECTION
, "child" },
721 { LT_SUBSECTION
, "sibling" },
722 { LT_APPENDIX
, "appendix" },
724 { LT_SEARCH
, "search" },
725 { LT_BOOKMARK
, "bookmark" },
726 { LT_ALTERNATE_LANG
, "alt. language" },
727 { LT_ALTERNATE_MEDIA
, "alt. media" },
728 { LT_ALTERNATE_STYLESHEET
, "alt. stylesheet" },
729 { LT_STYLESHEET
, "stylesheet" },
730 { LT_ALTERNATE
, "alternate" },
731 { LT_COPYRIGHT
, "copyright" },
732 { LT_AUTHOR
, "author" },
733 { LT_AUTHOR
, "made" },
734 { LT_AUTHOR
, "owner" },
739 /* Search for default name for this link according to its type. */
740 static unsigned char *
741 get_lt_default_name(struct hlink
*link
)
743 struct lt_default_name
*entry
= lt_names
;
747 while (entry
&& entry
->str
) {
748 if (entry
->type
== link
->type
) return entry
->str
;
756 html_link_clear(struct hlink
*link
)
760 mem_free_if(link
->content_type
);
761 mem_free_if(link
->media
);
762 mem_free_if(link
->href
);
763 mem_free_if(link
->hreflang
);
764 mem_free_if(link
->title
);
765 mem_free_if(link
->lang
);
766 mem_free_if(link
->name
);
768 memset(link
, 0, sizeof(*link
));
771 /* Parse a link and return results in @link.
772 * It tries to identify known types. */
774 html_link_parse(struct html_context
*html_context
, unsigned char *a
,
780 memset(link
, 0, sizeof(*link
));
782 link
->href
= get_url_val(a
, "href", html_context
->doc_cp
);
783 if (!link
->href
) return 0;
785 link
->lang
= get_attr_val(a
, "lang", html_context
->doc_cp
);
786 link
->hreflang
= get_attr_val(a
, "hreflang", html_context
->doc_cp
);
787 link
->title
= get_attr_val(a
, "title", html_context
->doc_cp
);
788 link
->content_type
= get_attr_val(a
, "type", html_context
->doc_cp
);
789 link
->media
= get_attr_val(a
, "media", html_context
->doc_cp
);
791 link
->name
= get_attr_val(a
, "rel", html_context
->doc_cp
);
793 link
->direction
= LD_REL
;
795 link
->name
= get_attr_val(a
, "rev", html_context
->doc_cp
);
796 if (link
->name
) link
->direction
= LD_REV
;
799 if (!link
->name
) return 1;
802 for (i
= 0; lt_names
[i
].str
; i
++)
803 if (!c_strcasecmp(link
->name
, lt_names
[i
].str
)) {
804 link
->type
= lt_names
[i
].type
;
808 if (c_strcasestr((const char *)link
->name
, "icon") ||
809 (link
->content_type
&& c_strcasestr((const char *)link
->content_type
, "icon"))) {
810 link
->type
= LT_ICON
;
812 } else if (c_strcasestr((const char *)link
->name
, "alternate")) {
813 link
->type
= LT_ALTERNATE
;
815 link
->type
= LT_ALTERNATE_LANG
;
816 else if (c_strcasestr((const char *)link
->name
, "stylesheet") ||
817 (link
->content_type
&& c_strcasestr((const char *)link
->content_type
, "css")))
818 link
->type
= LT_ALTERNATE_STYLESHEET
;
819 else if (link
->media
)
820 link
->type
= LT_ALTERNATE_MEDIA
;
822 } else if (link
->content_type
&& c_strcasestr((const char *)link
->content_type
, "css")) {
823 link
->type
= LT_STYLESHEET
;
830 html_link(struct html_context
*html_context
, unsigned char *a
,
831 unsigned char *xxx3
, unsigned char *xxx4
, unsigned char **xxx5
)
833 int link_display
= html_context
->options
->meta_link_display
;
837 int name_neq_title
= 0;
841 if (!link_display
) return;
843 if (!html_link_parse(html_context
, a
, &link
)) return;
844 if (!link
.href
) goto free_and_return
;
847 if (link
.type
== LT_STYLESHEET
848 && supports_html_media_attr(link
.media
)) {
849 int len
= strlen(link
.href
);
851 import_css_stylesheet(&html_context
->css_styles
,
852 html_context
->base_href
, link
.href
, len
);
855 if (!link_display
) goto free_and_return
;
858 /* Ignore few annoying links.. */
859 if (link_display
< 5 &&
860 (link
.type
== LT_ICON
||
861 link
.type
== LT_AUTHOR
||
862 link
.type
== LT_STYLESHEET
||
863 link
.type
== LT_ALTERNATE_STYLESHEET
)) goto free_and_return
;
865 if (!link
.name
|| link
.type
!= LT_UNKNOWN
)
866 /* Give preference to our default names for known types. */
867 name
= get_lt_default_name(&link
);
871 if (!name
) goto free_and_return
;
872 if (!init_string(&text
)) goto free_and_return
;
874 html_focusable(html_context
, a
);
877 add_to_string(&text
, link
.title
);
878 name_neq_title
= strcmp(link
.title
, name
);
880 add_to_string(&text
, name
);
882 if (link_display
== 1) goto put_link_line
; /* Only title */
884 #define APPEND(what) do { \
885 add_to_string(&text, first ? " (" : ", "); \
886 add_to_string(&text, (what)); \
890 if (name_neq_title
) {
894 if (link_display
>= 3 && link
.hreflang
) {
895 APPEND(link
.hreflang
);
898 if (link_display
>= 4 && link
.content_type
) {
899 APPEND(link
.content_type
);
902 if (link
.lang
&& link
.type
== LT_ALTERNATE_LANG
&&
903 (link_display
< 3 || (link
.hreflang
&&
904 c_strcasecmp(link
.hreflang
, link
.lang
)))) {
914 if (!first
) add_char_to_string(&text
, ')');
918 unsigned char *prefix
= (link
.direction
== LD_REL
)
919 ? "Link: " : "Reverse link: ";
920 unsigned char *link_name
= (text
.length
)
921 ? text
.source
: name
;
923 put_link_line(prefix
, link_name
, link
.href
,
924 html_context
->base_target
, html_context
);
926 if (text
.source
) done_string(&text
);
930 html_link_clear(&link
);