Empty text node is '\n'
[elinks.git] / src / document / libdom / renderer.c
blob598fe752a04c4ce466f256d566246b8cde560e34
1 /* Plain text document renderer */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <ctype.h>
8 #include <stdlib.h>
9 #include <string.h>
11 #include "elinks.h"
13 #include "bookmarks/bookmarks.h"
14 #include "cache/cache.h"
15 #include "config/options.h"
16 #include "document/docdata.h"
17 #include "document/document.h"
18 #include "document/format.h"
19 #include "document/options.h"
20 #include "document/libdom/renderer.h"
21 #include "document/renderer.h"
22 #include "globhist/globhist.h"
23 #include "intl/charsets.h"
24 #include "protocol/protocol.h"
25 #include "protocol/uri.h"
26 #include "terminal/color.h"
27 #include "terminal/draw.h"
28 #include "util/color.h"
29 #include "util/error.h"
30 #include "util/memory.h"
31 #include "util/string.h"
33 #include <dom/dom.h>
34 #include <dom/bindings/hubbub/parser.h>
36 struct source_renderer {
37 /* The document being renderered */
38 struct document *document;
40 struct string tmp_buffer;
41 struct string *source;
43 /* The convert table that should be used for converting line strings to
44 * the rendered strings. */
45 struct conv_table *convert_table;
47 /* The default template char data for text */
48 struct screen_char template_;
50 /* The maximum width any line can have (used for wrapping text) */
51 int max_width;
53 /* The current line number */
54 int lineno;
56 /* Are we doing line compression */
57 unsigned int compress:1;
60 #define realloc_document_links(doc, size) \
61 ALIGN_LINK(&(doc)->links, (doc)->nlinks, size)
63 static struct screen_char *
64 realloc_line(struct document *document, int x, int y)
66 struct line *line = realloc_lines(document, y);
68 if (!line) return NULL;
70 if (x != line->length) {
71 if (!ALIGN_LINE(&line->chars, line->length, x))
72 return NULL;
74 line->length = x;
77 return line->chars;
81 static inline struct link *
82 add_document_link(struct document *document, unsigned char *uri, int length,
83 int x, int y)
85 struct link *link;
86 struct point *point;
88 if (!realloc_document_links(document, document->nlinks + 1))
89 return NULL;
91 link = &document->links[document->nlinks];
93 if (!realloc_points(link, length))
94 return NULL;
96 link->npoints = length;
97 link->type = LINK_HYPERTEXT;
98 link->where = uri;
99 link->color.background = document->options.default_style.color.background;
100 link->color.foreground = document->options.default_color.link;
101 link->number = document->nlinks;
103 for (point = link->points; length > 0; length--, point++, x++) {
104 point->x = x;
105 point->y = y;
108 document->nlinks++;
109 document->links_sorted = 0;
110 return link;
113 /* Searches a word to find an email adress or an URI to add as a link. */
114 static inline struct link *
115 check_link_word(struct document *document, unsigned char *uri, int length,
116 int x, int y)
118 struct uri test;
119 unsigned char *where = NULL;
120 unsigned char *mailto = memchr(uri, '@', length);
121 int keep = uri[length];
122 struct link *new_link;
124 assert(document);
125 if_assert_failed return NULL;
127 uri[length] = 0;
129 if (mailto && mailto > uri && mailto - uri < length - 1) {
130 where = straconcat("mailto:", uri, (unsigned char *) NULL);
132 } else if (parse_uri(&test, uri) == URI_ERRNO_OK
133 && test.protocol != PROTOCOL_UNKNOWN
134 && (test.datalen || test.hostlen)) {
135 where = memacpy(uri, length);
138 uri[length] = keep;
140 if (!where) return NULL;
142 /* We need to reparse the URI and normalize it so that the protocol and
143 * host part are converted to lowercase. */
144 normalize_uri(NULL, where);
146 new_link = add_document_link(document, where, length, x, y);
148 if (!new_link) mem_free(where);
150 return new_link;
153 #define url_char(c) ( \
154 (c) > ' ' \
155 && (c) != '<' \
156 && (c) != '>' \
157 && (c) != '(' \
158 && (c) != ')' \
159 && !isquote(c))
161 static inline int
162 get_uri_length(unsigned char *line, int length)
164 int uri_end = 0;
166 while (uri_end < length
167 && url_char(line[uri_end]))
168 uri_end++;
170 for (; uri_end > 0; uri_end--) {
171 if (line[uri_end - 1] != '.'
172 && line[uri_end - 1] != ',')
173 break;
176 return uri_end;
179 static int
180 print_document_link(struct source_renderer *renderer, int lineno,
181 unsigned char *line, int line_pos, int width,
182 int expanded, struct screen_char *pos, int cells)
184 struct document *document = renderer->document;
185 unsigned char *start = &line[line_pos];
186 int len = get_uri_length(start, width - line_pos);
187 int screen_column = cells + expanded;
188 struct link *new_link;
189 int link_end = line_pos + len;
190 unsigned char saved_char;
191 struct document_options *doc_opts = &document->options;
192 struct screen_char template_ = renderer->template_;
193 int i;
195 if (!len) return 0;
197 new_link = check_link_word(document, start, len, screen_column,
198 lineno);
200 if (!new_link) return 0;
202 saved_char = line[link_end];
203 line[link_end] = '\0';
205 if (0)
206 ; /* Shut up compiler */
207 #ifdef CONFIG_GLOBHIST
208 else if (get_global_history_item(start))
209 new_link->color.foreground = doc_opts->default_color.vlink;
210 #endif
211 #ifdef CONFIG_BOOKMARKS
212 else if (get_bookmark(start))
213 new_link->color.foreground = doc_opts->default_color.bookmark_link;
214 #endif
215 else
216 new_link->color.foreground = doc_opts->default_color.link;
218 line[link_end] = saved_char;
220 new_link->color.background = doc_opts->default_style.color.background;
222 set_term_color(&template_, &new_link->color,
223 doc_opts->color_flags, doc_opts->color_mode);
225 for (i = len; i; i--) {
226 template_.data = line[line_pos++];
227 copy_screen_chars(pos++, &template_, 1);
230 return len;
233 static void
234 decode_esc_color(unsigned char *text, int *line_pos, int width,
235 struct screen_char *template_, enum color_mode mode,
236 int *was_reversed)
238 struct screen_char ch;
239 struct color_pair color;
240 char *buf, *tail, *begin, *end;
241 int k, foreground, background, f1, b1; /* , intensity; */
243 ++(*line_pos);
244 buf = (char *)&text[*line_pos];
246 if (*buf != '[') return;
247 ++buf;
248 ++(*line_pos);
250 k = strspn(buf, "0123456789;");
251 *line_pos += k;
252 if (!k || buf[k] != 'm') return;
254 end = buf + k;
255 begin = tail = buf;
257 get_screen_char_color(template_, &color, 0, mode);
258 set_term_color(&ch, &color, 0, COLOR_MODE_16);
259 b1 = background = (ch.c.color[0] >> 4) & 7;
260 f1 = foreground = ch.c.color[0] & 15;
262 while (tail < end) {
263 unsigned char kod = (unsigned char)strtol(begin, &tail, 10);
265 begin = tail + 1;
266 switch (kod) {
267 case 0:
268 background = 0;
269 foreground = 7;
270 break;
271 case 7:
272 if (*was_reversed == 0) {
273 background = f1 & 7;
274 foreground = b1;
275 *was_reversed = 1;
277 break;
278 case 27:
279 if (*was_reversed == 1) {
280 background = f1 & 7;
281 foreground = b1;
282 *was_reversed = 0;
284 break;
285 case 30:
286 case 31:
287 case 32:
288 case 33:
289 case 34:
290 case 35:
291 case 36:
292 case 37:
293 foreground = kod - 30;
294 break;
295 case 40:
296 case 41:
297 case 42:
298 case 43:
299 case 44:
300 case 45:
301 case 46:
302 case 47:
303 background = kod - 40;
304 break;
305 default:
306 break;
309 color.background = get_term_color16(background);
310 color.foreground = get_term_color16(foreground);
311 set_term_color(template_, &color, 0, mode);
315 static void
316 init_template(struct screen_char *template_, struct document_options *options)
318 get_screen_char_template(template_, options, options->default_style);
321 static struct node *
322 add_node(struct source_renderer *renderer, int x, int width, int height)
324 struct node *node = mem_alloc(sizeof(*node));
326 if (node) {
327 struct document *document = renderer->document;
329 set_box(&node->box, x, renderer->lineno, width, height);
331 int_lower_bound(&document->width, width);
332 int_lower_bound(&document->height, height);
334 add_to_list(document->nodes, node);
337 return node;
340 static inline int
341 add_document_line(struct source_renderer *renderer, char *line, int line_width)
343 struct document *document = renderer->document;
344 struct screen_char *template_ = &renderer->template_;
345 struct screen_char saved_renderer_template = *template_;
346 struct screen_char *pos, *startpos;
347 struct document_options *doc_opts = &document->options;
348 int was_reversed = 0;
350 #ifdef CONFIG_UTF8
351 int utf8 = doc_opts->utf8;
352 #endif /* CONFIG_UTF8 */
353 int cells = 0;
354 int lineno = renderer->lineno;
355 int expanded = 0;
356 int width = line_width;
357 int line_pos;
359 line = convert_string(renderer->convert_table, line, width,
360 document->options.cp, CSM_NONE, &width,
361 NULL, NULL);
362 if (!line) return 0;
364 /* Now expand tabs */
365 for (line_pos = 0; line_pos < width;) {
366 unsigned char line_char = line[line_pos];
367 int charlen = 1;
368 int cell = 1;
369 #ifdef CONFIG_UTF8
370 unicode_val_T data;
372 if (utf8) {
373 unsigned char *line_char2 = &line[line_pos];
374 charlen = utf8charlen(&line_char);
375 data = utf8_to_unicode(&line_char2, &line[width]);
377 if (data == UCS_NO_CHAR) {
378 line_pos += charlen;
379 continue;
382 cell = unicode_to_cell(data);
384 #endif /* CONFIG_UTF8 */
386 if (line_char == ASCII_TAB
387 && (line_pos + charlen == width
388 || line[line_pos + charlen] != ASCII_BS)) {
389 int tab_width = 7 - ((cells + expanded) & 7);
391 expanded += tab_width;
392 } else if (line_char == ASCII_BS) {
393 #if 0
394 This does not work: Suppose we have seventeen spaces
395 followed by a back-space; that will call for sixteen
396 bytes of memory, but we will print seventeen spaces
397 before we hit the back-space -- overflow!
399 /* Don't count the character
400 * that the back-space character will delete */
401 if (expanded + line_pos)
402 expanded--;
403 #endif
404 #if 0
405 /* Don't count the back-space character */
406 if (expanded > 0)
407 expanded--;
408 #endif
410 line_pos += charlen;
411 cells += cell;
414 assert(expanded >= 0);
416 startpos = pos = realloc_line(document, width + expanded, lineno);
417 if (!pos) {
418 mem_free(line);
419 return 0;
422 cells = 0;
423 expanded = 0;
424 for (line_pos = 0; line_pos < width;) {
425 unsigned char line_char = line[line_pos];
426 unsigned char next_char, prev_char;
427 int charlen = 1;
428 int cell = 1;
429 #ifdef CONFIG_UTF8
430 unicode_val_T data = UCS_NO_CHAR;
432 if (utf8) {
433 unsigned char *line_char2 = &line[line_pos];
434 charlen = utf8charlen(&line_char);
435 data = utf8_to_unicode(&line_char2, &line[width]);
437 if (data == UCS_NO_CHAR) {
438 line_pos += charlen;
439 continue;
442 cell = unicode_to_cell(data);
444 #endif /* CONFIG_UTF8 */
446 prev_char = line_pos > 0 ? line[line_pos - 1] : '\0';
447 next_char = (line_pos + charlen < width) ?
448 line[line_pos + charlen] : '\0';
450 /* Do not expand tabs that precede back-spaces; this saves the
451 * back-space code some trouble. */
452 if (line_char == ASCII_TAB && next_char != ASCII_BS) {
453 int tab_width = 7 - ((cells + expanded) & 7);
455 expanded += tab_width;
457 template_->data = ' ';
459 copy_screen_chars(pos++, template_, 1);
460 while (tab_width--);
462 *template_ = saved_renderer_template;
464 } else if (line_char == ASCII_BS) {
465 if (!(expanded + cells)) {
466 /* We've backspaced to the start of the line */
467 goto next;
469 if (pos > startpos)
470 pos--; /* Backspace */
472 /* Handle x^H_ as _^Hx, but prevent an infinite loop
473 * swapping two underscores. */
474 if (next_char == '_' && prev_char != '_') {
475 /* x^H_ becomes _^Hx */
476 if (line_pos - 1 >= 0)
477 line[line_pos - 1] = next_char;
478 if (line_pos + charlen < width)
479 line[line_pos + charlen] = prev_char;
481 /* Go back and reparse the swapped characters */
482 if (line_pos - 2 >= 0) {
483 cells--;
484 line_pos--;
486 continue;
489 if ((expanded + line_pos) - 2 >= 0) {
490 /* Don't count the backspace character or the
491 * deleted character when returning the line's
492 * width or when expanding tabs. */
493 expanded -= 2;
496 if (pos->data == '_' && next_char == '_') {
497 /* Is _^H_ an underlined underscore
498 * or an emboldened underscore? */
500 if (expanded + line_pos >= 0
501 && pos - 1 >= startpos
502 && (pos - 1)->attr) {
503 /* There is some preceding text,
504 * and it has an attribute; copy it */
505 template_->attr |= (pos - 1)->attr;
506 } else {
507 /* Default to bold; seems more useful
508 * than underlining the underscore */
509 template_->attr |= SCREEN_ATTR_BOLD;
512 } else if (pos->data == '_') {
513 /* Underline _^Hx */
515 template_->attr |= SCREEN_ATTR_UNDERLINE;
517 } else if (pos->data == next_char) {
518 /* Embolden x^Hx */
520 template_->attr |= SCREEN_ATTR_BOLD;
523 /* Handle _^Hx^Hx as both bold and underlined */
524 if (template_->attr)
525 template_->attr |= pos->attr;
526 } else if (line_char == 27) {
527 decode_esc_color(line, &line_pos, width,
528 &saved_renderer_template,
529 doc_opts->color_mode, &was_reversed);
530 *template_ = saved_renderer_template;
531 } else {
532 int added_chars = 0;
534 if (document->options.plain_display_links
535 && isalpha(line_char) && isalpha(next_char)) {
536 /* We only want to check for a URI if there are
537 * at least two consecutive alphabetic
538 * characters, or if we are at the very start of
539 * the line. It improves performance a bit.
540 * --Zas */
541 added_chars = print_document_link(renderer,
542 lineno, line,
543 line_pos,
544 width,
545 expanded,
546 pos, cells);
549 if (added_chars) {
550 line_pos += added_chars - 1;
551 cells += added_chars - 1;
552 pos += added_chars;
553 } else {
554 #ifdef CONFIG_UTF8
555 if (utf8) {
556 if (data == UCS_NO_CHAR) {
557 line_pos += charlen;
558 continue;
561 template_->data = (unicode_val_T)data;
562 copy_screen_chars(pos++, template_, 1);
564 if (cell == 2) {
565 template_->data = UCS_NO_CHAR;
566 copy_screen_chars(pos++,
567 template_, 1);
569 } else
570 #endif /* CONFIG_UTF8 */
572 if (!isscreensafe(line_char))
573 line_char = '.';
574 template_->data = line_char;
575 copy_screen_chars(pos++, template_, 1);
577 /* Detect copy of nul chars to screen,
578 * this should not occur. --Zas */
579 assert(line_char);
583 *template_ = saved_renderer_template;
585 next:
586 line_pos += charlen;
587 cells += cell;
589 mem_free(line);
591 realloc_line(document, pos - startpos, lineno);
593 return width + expanded;
598 static void
599 add_document_lines(struct source_renderer *renderer)
601 unsigned char *source = renderer->tmp_buffer.source;
602 int length = renderer->tmp_buffer.length;
603 int was_empty_line = 0;
604 int was_wrapped = 0;
605 #ifdef CONFIG_UTF8
606 int utf8 = is_cp_utf8(renderer->document->cp);
607 #endif
608 for (; length > 0; renderer->lineno++) {
609 unsigned char *xsource;
610 int width, added, only_spaces = 1, spaces = 0, was_spaces = 0;
611 int last_space = 0;
612 int tab_spaces = 0;
613 int step = 0;
614 int cells = 0;
616 /* End of line detection: We handle \r, \r\n and \n types. */
617 for (width = 0; (width < length) &&
618 (cells < renderer->max_width);) {
619 if (source[width] == ASCII_CR)
620 step++;
621 if (source[width + step] == ASCII_LF)
622 step++;
623 if (step) break;
625 if (isspace(source[width])) {
626 last_space = width;
627 if (only_spaces)
628 spaces++;
629 else
630 was_spaces++;
631 if (source[width] == '\t')
632 tab_spaces += 7 - ((width + tab_spaces) % 8);
633 } else {
634 only_spaces = 0;
635 was_spaces = 0;
637 #ifdef CONFIG_UTF8
638 if (utf8) {
639 unsigned char *text = &source[width];
640 unicode_val_T data = utf8_to_unicode(&text,
641 &source[length]);
643 if (data == UCS_NO_CHAR) return;
645 cells += unicode_to_cell(data);
646 width += utf8charlen(&source[width]);
647 } else
648 #endif /* CONFIG_UTF8 */
650 cells++;
651 width++;
655 if (only_spaces && step) {
656 if (was_wrapped || (renderer->compress && was_empty_line)) {
657 /* Successive empty lines will appear as one. */
658 length -= step + spaces;
659 source += step + spaces;
660 renderer->lineno--;
661 assert(renderer->lineno >= 0);
662 continue;
664 was_empty_line = 1;
666 /* No need to keep whitespaces on an empty line. */
667 source += spaces;
668 length -= spaces;
669 width -= spaces;
671 } else {
672 was_empty_line = 0;
673 was_wrapped = !step;
675 if (was_spaces && step) {
676 /* Drop trailing whitespaces. */
677 width -= was_spaces;
678 step += was_spaces;
681 if (!step && (width < length) && last_space) {
682 width = last_space;
683 step = 1;
687 assert(width >= 0);
689 /* We will touch the supplied source, so better replicate it. */
690 xsource = memacpy(source, width);
691 if (!xsource) continue;
693 added = add_document_line(renderer, source, width);
694 mem_free(xsource);
696 if (added) {
697 /* Add (search) nodes on a line by line basis */
698 add_node(renderer, 0, added, 1);
701 /* Skip end of line chars too. */
702 width += step;
703 length -= width;
704 source += width;
707 assert(!length);
711 * Generate a LibDOM document DOM from an HTML file
713 * \param file The file path
714 * \return pointer to DOM document, or NULL on error
716 static dom_document *
717 create_doc_dom_from_buffer(struct source_renderer *renderer)
719 dom_hubbub_parser *parser = NULL;
720 dom_hubbub_error error;
721 dom_hubbub_parser_params params;
722 dom_document *doc;
724 params.enc = NULL;
725 params.fix_enc = true;
726 params.enable_script = false;
727 params.msg = NULL;
728 params.script = NULL;
729 params.ctx = NULL;
730 params.daf = NULL;
732 /* Create Hubbub parser */
733 error = dom_hubbub_parser_create(&params, &parser, &doc);
734 if (error != DOM_HUBBUB_OK) {
735 DBG("Can't create Hubbub Parser\n");
736 return NULL;
739 /* Parse data */
740 error = dom_hubbub_parser_parse_chunk(parser, renderer->source->source, renderer->source->length);
741 if (error != DOM_HUBBUB_OK) {
742 dom_hubbub_parser_destroy(parser);
743 DBG("Parsing errors occur\n");
744 return NULL;
747 /* Done parsing file */
748 error = dom_hubbub_parser_completed(parser);
749 if (error != DOM_HUBBUB_OK) {
750 dom_hubbub_parser_destroy(parser);
751 DBG("Parsing error when construct DOM\n");
752 return NULL;
755 /* Finished with parser */
756 dom_hubbub_parser_destroy(parser);
758 return doc;
762 * Dump attribute/value for an element node
764 * \param node The attribute node to dump details for
765 * \return true on success, or false on error
767 static bool
768 dump_node_element_attribute(struct source_renderer *renderer, dom_node *node)
770 dom_exception exc;
771 dom_string *attr = NULL;
772 dom_string *attr_value = NULL;
774 exc = dom_attr_get_name((struct dom_attr *)node, &attr);
776 if (exc != DOM_NO_ERR) {
777 DBG("Exception raised for dom_string_create\n");
778 return false;
781 /* Get attribute's value */
782 exc = dom_attr_get_value((struct dom_attr *)node, &attr_value);
783 if (exc != DOM_NO_ERR) {
784 DBG("Exception raised for element_get_attribute\n");
785 dom_string_unref(attr);
786 return false;
787 } else if (attr_value == NULL) {
788 /* Element lacks required attribute */
789 dom_string_unref(attr);
790 return true;
793 add_to_string(&renderer->tmp_buffer, " \033[0;33m");
794 add_bytes_to_string(&renderer->tmp_buffer, dom_string_data(attr), dom_string_byte_length(attr));
795 add_to_string(&renderer->tmp_buffer, "\033[0m=\"\033[0;35m");
796 add_bytes_to_string(&renderer->tmp_buffer, dom_string_data(attr_value), dom_string_byte_length(attr_value));
797 add_to_string(&renderer->tmp_buffer, "\033[0m\"");
799 /* Finished with the attr dom_string */
800 dom_string_unref(attr);
801 dom_string_unref(attr_value);
803 return true;
807 * Print a line in a DOM structure dump for an element
809 * \param node The node to dump
810 * \param depth The node's depth
811 * \return true on success, or false on error
813 static bool
814 dump_dom_element(struct source_renderer *renderer, dom_node *node, int depth)
816 dom_exception exc;
817 dom_string *node_name = NULL;
818 dom_node_type type;
819 dom_namednodemap *attrs;
821 /* Only interested in element nodes */
822 exc = dom_node_get_node_type(node, &type);
824 if (exc != DOM_NO_ERR) {
825 DBG("Exception raised for node_get_node_type\n");
826 return false;
827 } else {
828 if (type == DOM_TEXT_NODE) {
829 dom_string *str;
831 exc = dom_node_get_text_content(node, &str);
833 if (exc == DOM_NO_ERR && str != NULL) {
834 int length = dom_string_byte_length(str);
835 const char *string = dom_string_data(str);
837 if (!((length == 1) && (*string == '\n'))) {
838 add_bytes_to_string(&renderer->tmp_buffer, string, length);
840 dom_string_unref(str);
842 return true;
844 if (type != DOM_ELEMENT_NODE) {
845 /* Nothing to print */
846 return true;
850 /* Get element name */
851 exc = dom_node_get_node_name(node, &node_name);
852 if (exc != DOM_NO_ERR) {
853 DBG("Exception raised for get_node_name\n");
854 return false;
855 } else if (node_name == NULL) {
856 DBG("Broken: root_name == NULL\n");
857 return false;
860 /* Get string data and print element name */
861 add_to_string(&renderer->tmp_buffer, "<\033[0;32m");
862 add_bytes_to_string(&renderer->tmp_buffer, dom_string_data(node_name), dom_string_byte_length(node_name));
863 add_to_string(&renderer->tmp_buffer, "\033[0m");
865 exc = dom_node_get_attributes(node, &attrs);
867 if (exc == DOM_NO_ERR) {
868 int length;
870 exc = dom_namednodemap_get_length(attrs, &length);
872 if (exc == DOM_NO_ERR) {
873 int i;
875 for (i = 0; i < length; ++i) {
876 dom_node *attr;
878 exc = dom_namednodemap_item(attrs, i, &attr);
880 if (exc == DOM_NO_ERR) {
881 dump_node_element_attribute(renderer, attr);
882 dom_node_unref(attr);
886 dom_node_unref(attrs);
888 add_char_to_string(&renderer->tmp_buffer, '>');
890 /* Finished with the node_name dom_string */
891 dom_string_unref(node_name);
893 return true;
897 * Print a closing element
899 * \param node The node to dump
900 * \return true on success, or false on error
902 static bool
903 dump_dom_element_closing(struct source_renderer *renderer, dom_node *node)
905 dom_exception exc;
906 dom_string *node_name = NULL;
907 dom_node_type type;
909 /* Only interested in element nodes */
910 exc = dom_node_get_node_type(node, &type);
912 if (exc != DOM_NO_ERR) {
913 DBG("Exception raised for node_get_node_type\n");
914 return false;
915 } else {
916 if (type != DOM_ELEMENT_NODE) {
917 /* Nothing to print */
918 return true;
922 /* Get element name */
923 exc = dom_node_get_node_name(node, &node_name);
924 if (exc != DOM_NO_ERR) {
925 DBG("Exception raised for get_node_name\n");
926 return false;
927 } else if (node_name == NULL) {
928 DBG("Broken: root_name == NULL\n");
929 return false;
932 /* Get string data and print element name */
933 add_to_string(&renderer->tmp_buffer, "</\033[0;32m");
934 add_bytes_to_string(&renderer->tmp_buffer, dom_string_data(node_name), dom_string_byte_length(node_name));
935 add_to_string(&renderer->tmp_buffer, "\033[0m>");
937 /* Finished with the node_name dom_string */
938 dom_string_unref(node_name);
940 return true;
945 * Walk though a DOM (sub)tree, in depth first order, printing DOM structure.
947 * \param node The root node to start from
948 * \param depth The depth of 'node' in the (sub)tree
950 static bool
951 dump_dom_structure(struct source_renderer *renderer, dom_node *node, int depth)
953 dom_exception exc;
954 dom_node *child;
956 /* Print this node's entry */
957 if (dump_dom_element(renderer, node, depth) == false) {
958 /* There was an error; return */
959 return false;
962 /* Get the node's first child */
963 exc = dom_node_get_first_child(node, &child);
964 if (exc != DOM_NO_ERR) {
965 DBG("Exception raised for node_get_first_child\n");
966 return false;
967 } else if (child != NULL) {
968 /* node has children; decend to children's depth */
969 depth++;
971 /* Loop though all node's children */
972 do {
973 dom_node *next_child;
975 /* Visit node's descendents */
976 if (dump_dom_structure(renderer, child, depth) == false) {
977 /* There was an error; return */
978 dom_node_unref(child);
979 return false;
982 /* Go to next sibling */
983 exc = dom_node_get_next_sibling(child, &next_child);
984 if (exc != DOM_NO_ERR) {
985 DBG("Exception raised for "
986 "node_get_next_sibling\n");
987 dom_node_unref(child);
988 return false;
991 dom_node_unref(child);
992 child = next_child;
993 } while (child != NULL); /* No more children */
996 dump_dom_element_closing(renderer, node);
998 return true;
1002 * Main entry point from OS.
1004 static int
1005 libdom_main(struct source_renderer *renderer)
1007 dom_exception exc; /* returned by libdom functions */
1008 dom_document *doc = NULL; /* document, loaded into libdom */
1009 dom_node *root = NULL; /* root element of document */
1011 /* Load up the input HTML file */
1012 doc = create_doc_dom_from_buffer(renderer);
1013 if (doc == NULL) {
1014 DBG("Failed to load document.\n");
1015 return EXIT_FAILURE;
1018 /* Get root element */
1019 exc = dom_document_get_document_element(doc, &root);
1020 if (exc != DOM_NO_ERR) {
1021 DBG("Exception raised for get_document_element\n");
1022 dom_node_unref(doc);
1023 return EXIT_FAILURE;
1024 } else if (root == NULL) {
1025 DBG("Broken: root == NULL\n");
1026 dom_node_unref(doc);
1027 return EXIT_FAILURE;
1030 /* Dump DOM structure */
1031 if (dump_dom_structure(renderer, root, 0) == false) {
1032 DBG("Failed to complete DOM structure dump.\n");
1033 dom_node_unref(root);
1034 dom_node_unref(doc);
1035 return EXIT_FAILURE;
1038 dom_node_unref(root);
1040 /* Finished with the dom_document */
1041 dom_node_unref(doc);
1043 return EXIT_SUCCESS;
1046 void
1047 render_source_document(struct cache_entry *cached, struct document *document,
1048 struct string *buffer)
1050 struct conv_table *convert_table;
1051 unsigned char *head = empty_string_or_(cached->head);
1052 struct source_renderer renderer;
1054 convert_table = get_convert_table(head, document->options.cp,
1055 document->options.assume_cp,
1056 &document->cp,
1057 &document->cp_status,
1058 document->options.hard_assume);
1060 init_string(&renderer.tmp_buffer);
1062 renderer.source = buffer;
1064 renderer.document = document;
1065 renderer.lineno = 0;
1066 renderer.convert_table = convert_table;
1067 renderer.compress = document->options.plain_compress_empty_lines;
1068 renderer.max_width = document->options.wrap ? document->options.box.width
1069 : INT_MAX;
1071 document->color.background = document->options.default_style.color.background;
1072 document->width = 0;
1073 #ifdef CONFIG_UTF8
1074 document->options.utf8 = is_cp_utf8(document->options.cp);
1075 #endif /* CONFIG_UTF8 */
1077 /* Setup the style */
1078 init_template(&renderer.template_, &document->options);
1080 libdom_main(&renderer);
1081 add_document_lines(&renderer);
1082 done_string(&renderer.tmp_buffer);