Update Chinese (China) translation
[yelp.git] / libyelp / yelp-info-parser.c
blobd0e767bce3234cb934a1eee0c0d23ce5a54e3e45
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil -*- */
2 /*
3 * Copyright (C) 2005 Davyd Madeley <davyd@madeley.id.au>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public
16 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
18 * Author: Davyd Madeley <davyd@madeley.id.au>
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
25 #include <glib.h>
26 #include <gtk/gtk.h>
27 #include <string.h>
29 #include "yelp-info-parser.h"
30 #include "yelp-magic-decompressor.h"
31 #include "yelp-debug.h"
34 static GtkTreeIter * find_real_top (GtkTreeModel *model,
35 GtkTreeIter *it);
36 static GtkTreeIter * find_real_sibling (GtkTreeModel *model,
37 GtkTreeIter *it,
38 GtkTreeIter *comp);
39 static xmlNodePtr yelp_info_parse_menu (GtkTreeStore *tree,
40 xmlNodePtr *node,
41 gchar *page_content,
42 gboolean notes);
43 static gboolean get_menuoptions (gchar *line,
44 gchar **title,
45 gchar **ref,
46 gchar **desc,
47 gchar **xref);
48 static gboolean resolve_frag_id (GtkTreeModel *model,
49 GtkTreePath *path,
50 GtkTreeIter *iter,
51 gpointer data);
52 static void info_process_text_notes (xmlNodePtr *node,
53 gchar *content,
54 GtkTreeStore
55 *tree);
58 Used to output the correct <heading level="?" /> tag.
60 static const gchar* level_headings[] = { NULL, "1", "2", "3" };
62 static GHashTable *
63 info_image_get_attributes (gchar const* string)
65 GMatchInfo *match_info;
66 GRegex *regex;
67 GHashTable *h;
69 h = 0;
70 regex = g_regex_new ("([^\\s][^\\s=]+)=(?:([^\\s \"]+)|(?:\"((?:[^\\\"]|\\\\[\\\\\"])*)\"))", 0, 0, NULL);
71 g_regex_match (regex, string, 0, &match_info);
72 while (g_match_info_matches (match_info))
74 gchar *key;
75 gchar *value;
77 if (!h)
78 h = g_hash_table_new (g_str_hash, g_str_equal);
79 key = g_match_info_fetch (match_info, 1);
80 value = g_match_info_fetch (match_info, 2);
81 if (!*value)
82 value = g_match_info_fetch (match_info, 3);
83 g_hash_table_insert (h, key, value);
84 g_match_info_next (match_info, NULL);
86 g_match_info_free (match_info);
87 g_regex_unref (regex);
89 return h;
93 info elements look like \0\b[<TAGNAME>\0\b] and take attribute=value
94 pairs, i.e. for image: \0\b[image src="foo.png" \0\b]
96 #define INFO_TAG_0 "\0"
97 #define INFO_TAG_1 "\b"
98 #define INFO_TAG_OPEN_2 INFO_TAG_1 "["
99 #define INFO_TAG_CLOSE_2 INFO_TAG_1 "]"
100 #define INFO_TAG_OPEN_2_RE INFO_TAG_1 "[[]"
101 #define INFO_TAG_CLOSE_2_RE INFO_TAG_1 "[]]"
102 #define INFO_TAG_OPEN INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2
103 #define INFO_TAG_CLOSE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2
104 #define INFO_TAG_OPEN_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2_RE
105 #define INFO_TAG_CLOSE_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2_RE
106 /* C/glib * cannot really handle \0 in strings, convert to '@' */
107 #define INFO_C_TAG_0 "@"
108 #define INFO_C_TAG_OPEN INFO_C_TAG_0 INFO_TAG_OPEN_2
109 #define INFO_C_TAG_CLOSE INFO_C_TAG_0 INFO_TAG_CLOSE_2
110 #define INFO_C_TAG_OPEN_RE INFO_C_TAG_0 INFO_TAG_OPEN_2_RE
111 #define INFO_C_TAG_CLOSE_RE INFO_C_TAG_0 INFO_TAG_CLOSE_2_RE
112 #define INFO_C_IMAGE_TAG_OPEN INFO_C_TAG_OPEN "image"
113 #define INFO_C_IMAGE_TAG_OPEN_RE INFO_C_TAG_OPEN_RE "image"
115 static xmlNodePtr
116 info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
118 gchar *title;
119 gchar *text;
120 gchar *alt;
121 xmlNodePtr img;
122 GHashTable *h = info_image_get_attributes (g_match_info_fetch (match_info, 1));
123 gchar *source;
124 if (h)
125 source = (gchar*)g_hash_table_lookup (h, "src");
127 if (!h || !source || !*source)
128 return xmlNewTextChild (parent, NULL, BAD_CAST "para",
129 BAD_CAST "[broken image]");
131 title = (gchar*)g_hash_table_lookup (h, "title");
132 text = (gchar*)g_hash_table_lookup (h, "text");
133 alt = (gchar*)g_hash_table_lookup (h, "alt");
134 g_hash_table_destroy (h);
135 img = xmlNewChild (parent, NULL, BAD_CAST "img", NULL);
136 xmlNewProp (img, BAD_CAST "src", BAD_CAST source);
137 xmlNewProp (img, BAD_CAST "title", BAD_CAST (title ? title : ""));
138 xmlNewProp (img, BAD_CAST "text", BAD_CAST (text ? text : ""));
139 xmlNewProp (img, BAD_CAST "alt", BAD_CAST (alt ? alt : ""));
140 g_free (source);
141 g_free (title);
142 g_free (alt);
143 return parent;
147 If every element of `str' is `ch' then return TRUE, else FALSE.
149 static gboolean
150 string_all_char_p (const gchar* str, gchar ch)
152 for (; *str; str++) {
153 if (*str != ch) return FALSE;
155 return TRUE;
159 If `line' is a line of '*', '=' or '-', return 1,2,3 respectively
160 for the heading level. If it's anything else, return 0.
162 static int
163 header_underline_level (const gchar* line)
165 if (*line != '*' && *line != '=' && *line != '-')
166 return 0;
168 if (string_all_char_p (line, '*')) return 1;
169 if (string_all_char_p (line, '=')) return 2;
170 if (string_all_char_p (line, '-')) return 3;
172 return 0;
176 Use g_strjoinv to join up the strings from `strings', but they might
177 not actually be a null-terminated array. `end' should be strings+n,
178 where I want the first n strings (strings+0, ..., strings+(n-1)). It
179 shouldn't point outside of the array allocated, but it can point at
180 the null string at the end.
182 static gchar*
183 join_strings_subset (const gchar *separator,
184 gchar** strings, gchar** end)
186 gchar *ptr;
187 gchar *glob;
189 g_assert(end > strings);
191 ptr = *end;
192 *end = NULL;
194 glob = g_strjoinv (separator, strings);
195 *end = ptr;
196 return glob;
200 Create a text node, child of `parent', with the lines strictly
201 between `first' and `last'.
203 static void
204 lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
205 gchar** first, gchar** last)
207 /* TODO? Currently we're copying the split strings again, which is
208 less efficient than somehow storing lengths and using a sort of
209 window on `content'. But that's much more difficult, so unless
210 there's a problem, let's go with the stupid approach. */
211 gchar *glob;
213 if (last > first) {
214 glob = join_strings_subset ("\n", first, last);
215 xmlAddChild (parent, xmlNewText (BAD_CAST glob));
216 g_free (glob);
221 Convert body text CONTENT to xml nodes. This function is responsible
222 for spotting headings etc and splitting them out correctly.
224 paragraph is as described in info_body_text, but cannot be null.
226 If `inline_p' is true, end with a <para1> tag. Otherwise, end with a
227 <para> tag.
229 TODO: IWBN add a regex match for *Note: here and call the *Note ==>
230 <a href> logic of info_process_text_notes from here.
232 static void
233 info_body_parse_text (xmlNodePtr parent, xmlNodePtr *paragraph,
234 xmlNsPtr ns,
235 gboolean inline_p, const gchar *content)
237 /* The easiest things to spot are headings: they look like a line of
238 * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot
239 * them, we split content into single lines and work with them. */
240 gchar **lines = g_strsplit (content, "\n", 0);
241 gchar **first = lines, **last = lines;
242 int header_level;
243 xmlNodePtr header_node;
245 /* Deal with the possibility that `content' is empty */
246 if (*lines == NULL) {
247 if (!inline_p) {
248 xmlNewTextChild (parent, NULL, BAD_CAST "para", BAD_CAST "");
250 return;
253 /* Use a pair of pointers, first and last, which point to two lines,
254 * the chunk of the body we're displaying (inclusive) */
255 for (; *last; last++) {
257 /* Check for a blank line */
258 if (**last == '\0') {
259 if (last != first) {
260 if (!*paragraph) {
261 *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
263 lines_subset_text_child (*paragraph, ns, first, last);
265 /* On the next iteration, last==first both pointing at the next
266 line. */
267 first = last+1;
268 *paragraph = NULL;
270 continue;
273 /* Check for a header */
274 header_level = header_underline_level (*last);
275 if (header_level) {
276 /* Write out any lines beforehand */
277 lines_subset_text_child (parent, ns, first, last-1);
278 /* Now write out the actual header line */
279 header_node = xmlNewTextChild (parent, ns, BAD_CAST "header",
280 BAD_CAST *(last-1));
281 xmlNewProp (header_node, BAD_CAST "level",
282 BAD_CAST level_headings[header_level]);
284 first = last+1;
285 last = first-1;
289 /* Write out any lines left */
290 if (!*paragraph) {
291 *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
293 lines_subset_text_child (*paragraph, ns, first, last);
295 g_strfreev (lines);
299 info_body_text is responsible for taking a hunk of the info page's
300 body and turning it into paragraph tags. It searches out images and
301 marks them up properly if necessary.
303 parent should be the node in which we're currently storing text and
304 paragraph a pointer to a <para> tag or NULL. At blank lines, we
305 finish with the current para tag and switch to a new one.
307 It uses info_body_parse_text to mark up the actual bits of text.
309 static void
310 info_body_text (xmlNodePtr parent, xmlNodePtr *paragraph, xmlNsPtr ns,
311 gboolean inline_p, gchar const *content)
313 xmlNodePtr thepara = NULL;
314 gint content_len;
315 gint pos;
316 GRegex *regex;
317 GMatchInfo *match_info;
318 gchar *after;
319 if (paragraph == NULL) paragraph = &thepara;
321 if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) {
322 info_body_parse_text (parent, paragraph, ns, inline_p, content);
323 return;
326 content_len = strlen (content);
327 pos = 0;
328 regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL);
330 g_regex_match (regex, content, 0, &match_info);
331 while (g_match_info_matches (match_info))
333 gint image_start;
334 gint image_end;
335 gboolean image_found = g_match_info_fetch_pos (match_info, 0,
336 &image_start, &image_end);
337 gchar *before = g_strndup (&content[pos], image_start - pos);
338 pos = image_end + 1;
339 info_body_parse_text (parent, paragraph, NULL, TRUE, before);
340 g_free (before);
342 /* End the paragraph that was before */
343 *paragraph = NULL;
345 if (image_found)
346 info_insert_image (parent, match_info);
347 g_match_info_next (match_info, NULL);
349 after = g_strndup (&content[pos], content_len - pos);
350 info_body_parse_text (parent, paragraph, NULL, TRUE, after);
351 g_free (after);
354 /* Part 1: Parse File Into Tree Store */
356 enum
358 PAGE_TAG_TABLE,
359 PAGE_NODE,
360 PAGE_INDIRECT,
361 PAGE_OTHER
364 static int
365 page_type (char *page)
367 if (g_ascii_strncasecmp (page, "Tag Table:\n", 11) == 0)
368 return PAGE_TAG_TABLE;
369 else if (g_ascii_strncasecmp (page, "Indirect:\n", 10) == 0)
370 return PAGE_INDIRECT;
371 else if (g_ascii_strncasecmp (page, "File:", 5) == 0 ||
372 g_ascii_strncasecmp (page, "Node:", 5) == 0)
373 return PAGE_NODE;
375 else
376 return PAGE_OTHER;
379 static char
380 *open_info_file (const gchar *file)
382 GFile *gfile;
383 GConverter *converter;
384 GFileInputStream *file_stream;
385 GInputStream *stream;
386 gchar buf[1024];
387 gssize bytes;
388 GString *string;
389 gchar *str;
390 gsize i;
392 gfile = g_file_new_for_path (file);
393 file_stream = g_file_read (gfile, NULL, NULL);
394 converter = (GConverter *) yelp_magic_decompressor_new ();
395 stream = g_converter_input_stream_new ((GInputStream *) file_stream, converter);
396 string = g_string_new (NULL);
398 while ((bytes = g_input_stream_read (stream, buf, 1024, NULL, NULL)) > 0)
399 g_string_append_len (string, buf, bytes);
401 g_object_unref (stream);
403 str = string->str;
405 /* C/glib * cannot really handle \0 in strings, convert. */
406 for (i = 0; i < (string->len - 1); i++)
407 if (str[i] == INFO_TAG_OPEN[0] && str[i+1] == INFO_TAG_OPEN[1])
408 str[i] = INFO_C_TAG_OPEN[0];
410 g_string_free (string, FALSE);
412 return str;
415 static gchar *
416 find_info_part (gchar *part_name, const gchar *base)
418 /* New and improved. We now assume that all parts are
419 * in the same subdirectory as the base file. Makes
420 * life much simpler and is (afaict) always true
422 gchar *path;
423 gchar *tmp;
424 gchar *bzfname, *gzfname, *lzfd, *fname;
425 gchar *uri = NULL;
426 tmp = g_strrstr (base, "/");
427 path = g_strndup (base, tmp-base);
429 bzfname = g_strconcat (path, "/", part_name, ".bz2", NULL);
430 gzfname = g_strconcat (path, "/", part_name, ".gz", NULL);
431 lzfd = g_strconcat (path, "/", part_name, ".lzma", NULL);
432 fname = g_strconcat (path, "/", part_name, NULL);
434 if (g_file_test (bzfname, G_FILE_TEST_EXISTS))
435 uri = g_strdup (bzfname);
436 else if (g_file_test (gzfname, G_FILE_TEST_EXISTS))
437 uri = g_strdup (gzfname);
438 else if (g_file_test (lzfd, G_FILE_TEST_EXISTS))
439 uri = g_strdup (lzfd);
440 else if (g_file_test (fname, G_FILE_TEST_EXISTS))
441 uri = g_strdup (fname);
443 g_free (bzfname);
444 g_free (gzfname);
445 g_free (lzfd);
446 g_free (fname);
447 g_free (path);
448 return uri;
452 static char
453 *process_indirect_map (char *page, const gchar *file)
455 char **lines;
456 char **ptr;
457 char *composite = NULL;
458 size_t composite_len = 0;
460 lines = g_strsplit (page, "\n", 0);
463 Go backwards down the list so that we allocate composite
464 big enough the first time around.
466 for (ptr = lines + 1; *ptr != NULL; ptr++);
467 for (ptr--; ptr != lines; ptr--)
469 char **items;
470 char *filename;
471 char *str;
472 char **pages;
473 gsize offset;
474 gsize plength;
476 debug_print (DB_DEBUG, "Line: %s\n", *ptr);
477 items = g_strsplit (*ptr, ": ", 2);
479 if (items[0])
481 filename = find_info_part (items[0], file);
482 str = open_info_file (filename);
483 if (!str) {
484 g_strfreev (items);
485 continue;
487 pages = g_strsplit (str, "\x1f", 2);
488 g_free (str);
489 if (!pages[1]) {
490 g_strfreev (items);
491 g_strfreev (pages);
492 continue;
495 offset = (gsize) atoi (items[1]);
496 plength = strlen(pages[1]);
498 debug_print (DB_DEBUG, "Need to make string %s+%i bytes = %i\n",
499 items[1], plength,
500 offset + plength);
502 if (!composite) /* not yet created, malloc it */
504 composite_len = offset + plength;
505 composite = g_malloc (sizeof (char) *
506 (composite_len + 1));
507 memset (composite, '-', composite_len);
508 composite[composite_len] = '\0';
511 /* Because we're going down the list
512 * backwards, plength should always be short
513 * enough to fit in the memory allocated. But
514 * in case something's broken/malicious, we
515 * should check anyway.
517 if (offset > composite_len)
518 continue;
519 if (plength + offset + 1 > composite_len)
520 plength = composite_len - offset - 1;
522 composite[offset] = '\x1f';
523 memcpy (composite + offset + 1, pages[1], plength);
525 g_free (filename);
526 g_strfreev (pages);
529 g_strfreev (items);
532 g_strfreev (lines);
534 return composite;
538 Open up the relevant info file and read it all into memory. If there
539 is an indirect table thingy, we resolve that as we go.
541 Returns a NULL-terminated list of pointers to pages on success and
542 NULL otherwise.
544 static gchar**
545 expanded_info_file (const gchar *file)
547 gchar *slurp = open_info_file (file);
548 gchar **page_list;
549 gchar **page;
551 if (!slurp) return NULL;
553 /* TODO: There's a lot of copying of bits of memory here. With a bit
554 * more effort we could avoid it. Either we should fix this or
555 * measure the time taken and decide it's irrelevant...
557 * Note: \x1f\n is ^_\n
559 page_list = g_strsplit (slurp, "\x1f\n", 0);
561 g_free (slurp);
563 for (page = page_list; *page != NULL; page++) {
564 if (page_type (*page) == PAGE_INDIRECT) {
566 slurp = process_indirect_map (*page, file);
567 g_strfreev (page_list);
569 if (!slurp)
570 return NULL;
572 page_list = g_strsplit (slurp, "\x1f\n", 0);
573 g_free (slurp);
574 break;
578 return page_list;
582 Look for strings in source by key. For example, we extract "blah"
583 from "Node: blah," when the key is "Node: ". To know when to stop,
584 there are two strings: end and cancel.
586 If we find a character from end first, return a copy of the string
587 up to (not including) that character. If we find a character of
588 cancel first, return NULL. If we find neither, return the rest of
589 the string.
591 cancel can be NULL, in which case, we don't do its test.
593 static char*
594 get_value_after_ext (const char *source, const char *key,
595 const char *end, const char *cancel)
597 char *start;
598 size_t not_end, not_cancel;
600 start = strstr (source, key);
601 if (!start) return NULL;
603 start += strlen (key);
605 not_end = strcspn (start, end);
606 not_cancel = (cancel) ? strcspn (start, cancel) : not_end + 1;
608 if (not_cancel < not_end)
609 return NULL;
611 return g_strndup (start, not_end);
614 static char*
615 get_value_after (const char* source, const char *key)
617 return get_value_after_ext (source, key, ",", "\n\x7f");
620 static int
621 node2page (GHashTable *nodes2pages, char *node)
623 gpointer p;
625 if (g_hash_table_lookup_extended (nodes2pages, node,
626 NULL, &p))
627 return GPOINTER_TO_INT(p);
629 /* This shouldn't happen: we should only ever have to look up pages
630 * that exist. */
631 g_return_val_if_reached (0);
634 static GtkTreeIter
635 *node2iter (GHashTable *nodes2iters, char *node)
637 GtkTreeIter *iter;
639 iter = g_hash_table_lookup (nodes2iters, node);
640 d (if (!iter) debug_print (DB_WARN, "Could not retrieve iter for node !%s!\n", node));
641 return iter;
644 GtkTreeIter
645 *find_real_top (GtkTreeModel *model, GtkTreeIter *it)
647 GtkTreeIter *r = NULL;
648 GtkTreeIter *tmp = NULL;
650 if (!it)
651 return NULL;
653 r = gtk_tree_iter_copy (it);
654 tmp = g_malloc0 (sizeof (GtkTreeIter));
655 while (gtk_tree_model_iter_parent (model, tmp, r)) {
656 gtk_tree_iter_free (r);
657 r = gtk_tree_iter_copy (tmp);
659 g_free (tmp);
661 return r;
664 GtkTreeIter * find_real_sibling (GtkTreeModel *model,
665 GtkTreeIter *it, GtkTreeIter *comp)
667 GtkTreeIter *r;
668 GtkTreeIter *tmp = NULL;
669 gboolean result = FALSE;
670 gchar *title;
671 gchar *reftitle;
673 if (!it) {
674 return NULL;
677 r = gtk_tree_iter_copy (it);
678 tmp = gtk_tree_iter_copy (it);
680 reftitle = gtk_tree_model_get_string_from_iter (model, comp);
682 result = gtk_tree_model_iter_parent (model, r, it);
683 if (!result)
684 return it;
686 title = gtk_tree_model_get_string_from_iter (model, r);
688 while (!g_str_equal (title, reftitle) && result) {
689 gtk_tree_iter_free (tmp);
690 tmp = gtk_tree_iter_copy (r);
691 result = gtk_tree_model_iter_parent (model, r, tmp);
692 if (result)
693 title = gtk_tree_model_get_string_from_iter (model, r);
696 if (!g_str_equal (title, reftitle))
698 gtk_tree_iter_free (tmp);
699 tmp = NULL;
702 gtk_tree_iter_free (r);
703 g_free (title);
704 g_free (reftitle);
705 return tmp;
709 static void
710 process_page (GtkTreeStore *tree,
711 GHashTable *nodes2pages, GHashTable *nodes2iters,
712 int *processed_table, char **page_list, char *page_text)
714 GtkTreeIter *iter;
716 char **parts;
717 char *node;
718 char *up;
719 char *prev;
720 char *next;
721 gchar *tmp;
723 int page;
725 /* split out the header line and the text */
726 parts = g_strsplit (page_text, "\n", 3);
728 node = get_value_after (parts[0], "Node: ");
729 up = get_value_after (parts[0], "Up: ");
730 prev = get_value_after (parts[0], "Prev: ");
731 next = get_value_after (parts[0], "Next: ");
733 if (next && g_str_equal (next, "Top")) {
734 g_free (next);
735 next = NULL;
737 if (g_str_equal (node, "Top") && prev != NULL) {
738 g_free (prev);
739 prev = NULL;
742 /* check to see if this page has been processed already */
743 page = node2page (nodes2pages, node);
744 if (processed_table[page]) {
745 return;
747 processed_table[page] = 1;
749 debug_print (DB_DEBUG, "-- Processing Page %s\n\tParent: %s\n", node, up);
751 iter = g_slice_alloc0 (sizeof (GtkTreeIter));
752 /* check to see if we need to process our parent and siblings */
753 if (up && g_ascii_strncasecmp (up, "(dir)", 5) && strcmp (up, "Top"))
755 page = node2page (nodes2pages, up);
756 if (!processed_table[page])
758 debug_print (DB_DEBUG, "%% Processing Node %s\n", up);
759 process_page (tree, nodes2pages,
760 nodes2iters, processed_table, page_list,
761 page_list[page]);
764 if (prev && g_ascii_strncasecmp (prev, "(dir)", 5))
766 if (strncmp (node, "Top", 3)) {
767 /* Special case the Top node to always appear first */
768 } else {
769 page = node2page (nodes2pages, prev);
770 if (!processed_table[page])
772 debug_print (DB_DEBUG, "%% Processing Node %s\n", prev);
773 process_page (tree, nodes2pages,
774 nodes2iters, processed_table, page_list,
775 page_list[page]);
780 /* by this point our parent and older sibling should be processed */
781 if (!up || !g_ascii_strcasecmp (up, "(dir)"))
783 debug_print (DB_DEBUG, "\t> no parent\n");
784 if (!prev || !g_ascii_strcasecmp (prev, "(dir)"))
786 debug_print (DB_DEBUG, "\t> no previous\n");
787 gtk_tree_store_append (tree, iter, NULL);
789 else if (prev) {
790 GtkTreeIter *real;
791 real = find_real_top (GTK_TREE_MODEL (tree),
792 node2iter (nodes2iters, prev));
793 if (real) {
794 gtk_tree_store_insert_after (tree, iter, NULL,
795 real);
796 gtk_tree_iter_free (real);
798 else
799 gtk_tree_store_append (tree, iter, NULL);
802 else if (!prev || !g_ascii_strcasecmp (prev, "(dir)") || !strcmp (prev, up))
804 debug_print (DB_DEBUG, "\t> no previous\n");
805 gtk_tree_store_append (tree, iter,
806 node2iter (nodes2iters, up));
808 else if (up && prev)
810 GtkTreeIter *upit = node2iter (nodes2iters, up);
811 GtkTreeIter *previt = node2iter (nodes2iters, prev);
812 GtkTreeIter *nit = NULL;
813 debug_print (DB_DEBUG, "+++ Parent: %s Previous: %s\n", up, prev);
815 d (if (upit) debug_print (DB_DEBUG, "++++ Have parent node!\n"));
816 d (if (previt) debug_print (DB_DEBUG, "++++ Have previous node!\n"));
817 nit = find_real_sibling (GTK_TREE_MODEL (tree), previt, upit);
818 if (nit) {
819 gtk_tree_store_insert_after (tree, iter,
820 upit,
821 nit);
822 gtk_tree_iter_free (nit);
824 else
825 gtk_tree_store_append (tree, iter, upit);
827 else
829 debug_print (DB_DEBUG, "# node %s was not put in tree\n", node);
830 return;
833 d (if (iter) debug_print (DB_DEBUG, "Have a valid iter, storing for %s\n", node));
835 g_hash_table_insert (nodes2iters, g_strdup (node), iter);
836 debug_print (DB_DEBUG, "size: %i\n", g_hash_table_size (nodes2iters));
838 /*tmp = g_strdup_printf ("%i",
839 node2page (nodes2pages, node));*/
840 tmp = g_strdup (node);
841 tmp = g_strdelimit (tmp, " ", '_');
842 gtk_tree_store_set (tree, iter,
843 INFO_PARSER_COLUMN_PAGE_NO, tmp,
844 INFO_PARSER_COLUMN_PAGE_NAME, node,
845 INFO_PARSER_COLUMN_PAGE_CONTENT, parts[2],
846 -1);
848 g_free (tmp);
849 g_free (node);
850 g_free (up);
851 g_free (prev);
852 g_free (next);
853 g_strfreev (parts);
856 struct TagTableFix {
857 GHashTable *nodes2pages; /* Build this... */
858 GHashTable *pages2nodes; /* ... using this. */
861 static void
862 use_offset2page (gpointer o, gpointer p, gpointer ud)
864 struct TagTableFix* ttf = (struct TagTableFix*)ud;
866 const gchar* node = g_hash_table_lookup (ttf->pages2nodes, p);
867 if (node) {
868 g_hash_table_insert (ttf->nodes2pages, g_strdup (node), p);
873 We had a nodes2offsets hash table, but sometimes these things
874 lie. How terribly rude. Anyway, use offsets2pages and pages2nodes
875 (and injectivity!) to construct the nodes2pages hash table.
877 static GHashTable *
878 make_nodes2pages (GHashTable* offsets2pages,
879 GHashTable* pages2nodes)
881 struct TagTableFix ttf;
883 ttf.nodes2pages =
884 g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
885 ttf.pages2nodes = pages2nodes;
887 g_hash_table_foreach (offsets2pages, use_offset2page, &ttf);
889 return ttf.nodes2pages;
893 * Parse file into a GtkTreeStore containing useful information that we can
894 * later convert into a nice XML document or something else.
896 GtkTreeStore
897 *yelp_info_parser_parse_file (char *file)
899 gchar **page_list;
900 char **ptr;
901 int pages;
902 int offset;
903 GHashTable *offsets2pages = NULL;
904 GHashTable *pages2nodes = NULL;
905 GHashTable *nodes2pages = NULL;
906 GHashTable *nodes2iters = NULL;
907 int *processed_table;
908 GtkTreeStore *tree;
909 int pt;
911 page_list = expanded_info_file (file);
912 if (!page_list)
913 return NULL;
915 pages = 0;
916 offset = 0;
918 offsets2pages = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
919 NULL);
920 pages2nodes = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL,
921 g_free);
923 for (ptr = page_list; *ptr != NULL; ptr++)
925 gchar *name = NULL;
927 g_hash_table_insert (offsets2pages,
928 g_strdup_printf ("%i", offset),
929 GINT_TO_POINTER (pages));
931 name = get_value_after (*ptr, "Node: ");
932 if (name)
933 g_hash_table_insert (pages2nodes,
934 GINT_TO_POINTER (pages), name);
936 offset += strlen (*ptr);
937 if (pages) offset += 2;
938 pages++;
940 pt = page_type (*ptr);
941 if (pt == PAGE_INDIRECT) {
942 g_warning ("Found an indirect page in a file "
943 "we thought we'd expanded.");
947 /* Now consolidate (and correct) the two hash tables */
948 nodes2pages = make_nodes2pages (offsets2pages, pages2nodes);
950 g_hash_table_destroy (offsets2pages);
951 g_hash_table_destroy (pages2nodes);
953 processed_table = g_malloc0 (pages * sizeof (int));
954 tree = gtk_tree_store_new (INFO_PARSER_N_COLUMNS, G_TYPE_STRING, G_TYPE_STRING,
955 G_TYPE_STRING);
956 nodes2iters = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
957 (GDestroyNotify) gtk_tree_iter_free);
959 pages = 0;
960 for (ptr = page_list; *ptr != NULL; ptr++)
962 if (page_type (*ptr) != PAGE_NODE) continue;
963 process_page (tree, nodes2pages, nodes2iters,
964 processed_table, page_list, *ptr);
967 g_strfreev (page_list);
969 g_hash_table_destroy (nodes2iters);
970 g_hash_table_destroy (nodes2pages);
972 g_free (processed_table);
974 return tree;
977 /* End Part 1 */
978 /* Part 2: Parse Tree into XML */
979 static void
980 parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
982 GtkTreeIter children, parent;
983 xmlNodePtr newnode;
985 char *page_no = NULL;
986 char *page_name = NULL;
987 char *page_content = NULL;
988 gboolean notes = FALSE;
990 debug_print (DB_DEBUG, "Decended\n");
993 gtk_tree_model_get (GTK_TREE_MODEL (tree), &iter,
994 INFO_PARSER_COLUMN_PAGE_NO, &page_no,
995 INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
996 INFO_PARSER_COLUMN_PAGE_CONTENT, &page_content,
997 -1);
998 debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
999 if (strstr (page_content, "*Note") ||
1000 strstr (page_content, "*note")) {
1001 notes = TRUE;
1003 if (strstr (page_content, "* Menu:")) {
1004 newnode = yelp_info_parse_menu (tree, node, page_content, notes);
1005 } else {
1006 newnode = xmlNewTextChild (*node, NULL,
1007 BAD_CAST "Section",
1008 NULL);
1009 if (!notes)
1010 info_body_text (newnode, NULL, NULL, FALSE, page_content);
1012 else {
1013 /* Handle notes here */
1014 info_process_text_notes (&newnode, page_content, tree);
1017 /* if we free the page content, now it's in the XML, we can
1018 * save some memory */
1019 g_free (page_content);
1020 page_content = NULL;
1022 if (gtk_tree_model_iter_parent (GTK_TREE_MODEL (tree), &parent, &iter)) {
1023 gchar *parent_id;
1024 gtk_tree_model_get (GTK_TREE_MODEL (tree), &parent,
1025 INFO_PARSER_COLUMN_PAGE_NO, &parent_id,
1026 -1);
1027 xmlNewProp (newnode, BAD_CAST "up", BAD_CAST parent_id);
1028 g_free (parent_id);
1031 xmlNewProp (newnode, BAD_CAST "id",
1032 BAD_CAST page_no);
1033 xmlNewProp (newnode, BAD_CAST "name",
1034 BAD_CAST page_name);
1035 if (gtk_tree_model_iter_children (GTK_TREE_MODEL (tree),
1036 &children,
1037 &iter))
1038 parse_tree_level (tree, &newnode, children);
1039 g_free (page_no);
1040 g_free (page_name);
1042 while (gtk_tree_model_iter_next (GTK_TREE_MODEL (tree), &iter));
1043 debug_print (DB_DEBUG, "Ascending\n");
1046 xmlDocPtr
1047 yelp_info_parser_parse_tree (GtkTreeStore *tree)
1049 xmlDocPtr doc;
1050 xmlNodePtr node;
1051 GtkTreeIter iter;
1054 xmlChar *xmlbuf;
1055 int bufsiz;
1058 doc = xmlNewDoc (BAD_CAST "1.0");
1059 node = xmlNewNode (NULL, BAD_CAST "Info");
1060 xmlDocSetRootElement (doc, node);
1062 /* functions I will want:
1063 gtk_tree_model_get_iter_first;
1064 gtk_tree_model_iter_next;
1065 gtk_tree_model_iter_children;
1068 if (gtk_tree_model_get_iter_first (GTK_TREE_MODEL (tree), &iter))
1069 parse_tree_level (tree, &node, iter);
1070 d (else debug_print (DB_DEBUG, "Empty tree?\n"));
1073 xmlDocDumpFormatMemory (doc, &xmlbuf, &bufsiz, 1);
1074 g_print ("XML follows:\n%s\n", xmlbuf);
1077 return doc;
1080 gboolean
1081 resolve_frag_id (GtkTreeModel *model, GtkTreePath *path, GtkTreeIter *iter,
1082 gpointer data)
1084 gchar *page_no = NULL;
1085 gchar *page_name = NULL;
1086 gchar **xref = data;
1088 gtk_tree_model_get (GTK_TREE_MODEL (model), iter,
1089 INFO_PARSER_COLUMN_PAGE_NO, &page_no,
1090 INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
1091 -1);
1092 if (g_str_equal (page_name, *xref)) {
1093 g_free (*xref);
1094 *xref = g_strdup (page_name);
1095 *xref = g_strdelimit (*xref, " ", '_');
1097 g_free (page_name);
1098 g_free (page_no);
1099 return TRUE;
1101 g_free (page_name);
1102 g_free (page_no);
1104 return FALSE;
1107 gboolean
1108 get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc,
1109 gchar **xref)
1111 /* Since info is actually braindead and allows .s in
1112 * its references, we gotta carefully extract things
1113 * as .s can be in either the title or desc
1115 gchar *tmp = line;
1116 gchar *tfind = NULL;
1118 if (!g_str_has_prefix (line, "* "))
1119 return FALSE;
1121 tfind = strchr (tmp, ':');
1123 if (!tfind) /* No : on the line, bail out */
1124 return FALSE;
1126 (*title) = g_strndup (tmp, tfind-tmp);
1128 if (tfind[1] == ':') { /* This happens if the title and ref are the same
1129 * Most menus are of this type
1132 (*ref) = NULL; /* There is no second part. The rest is description */
1134 tmp++;
1135 (*xref) = g_strndup (tmp, tfind-tmp);
1136 g_strstrip (*xref);
1138 tfind+=2;
1139 (*desc) = g_strdup (tfind);
1140 } else { /* The other type of menu option */
1141 gchar *td = NULL;
1143 tfind++;
1144 td = strchr (tfind, '.');
1145 if (!td)
1146 return FALSE;
1147 (*ref) = g_strndup (tfind, td-tfind);
1148 (*xref) = g_strdup (*ref);
1149 g_strstrip (*xref);
1151 td++;
1152 (*desc) = g_strdup (td);
1154 return TRUE;
1157 /* Find the first non whitespace character in str or return pointer to the
1158 * '\0' if there isn't one. */
1159 static gchar*
1160 first_non_space (gchar* str)
1162 /* As long as str is null terminated, this is ok! */
1163 while (g_ascii_isspace (*str)) str++;
1164 return str;
1167 static xmlNodePtr
1168 yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
1169 gchar *page_content, gboolean notes)
1171 gchar **split;
1172 gchar **menuitems;
1173 gchar *tmp = NULL;
1174 xmlNodePtr newnode, menu_node, mholder = NULL;
1175 int i=0;
1177 split = g_strsplit (page_content, "* Menu:", 2);
1179 newnode = xmlNewChild (*node, NULL,
1180 BAD_CAST "Section", NULL);
1183 if (!notes)
1184 info_body_text (newnode, NULL, NULL, FALSE, split[0]);
1185 else {
1186 info_process_text_notes (&newnode, split[0], tree);
1189 menuitems = g_strsplit (split[1], "\n", -1);
1190 g_strfreev (split);
1192 /* The output xml should look something like the following:
1194 <menu>
1195 <menuholder>
1196 <a href="xref:Help-Inv">Help-Inv</a>
1197 <para1>Invisible text in Emacs Info.</para1>
1198 </menuholder>
1199 <menuholder>
1200 <a href="xref:Help-M">Help-M</a>
1201 <para1>Menus.</para1>
1202 </menuholder>
1204 </menu>
1206 (from the top page of info:info). Note the absence of *'s and
1207 ::'s on the links.
1209 If there's a line with no "* Blah::", it looks like a child of
1210 the previous menu item so (for i > 0) deal with that correctly by
1211 not "closing" the <menuholder> tag until we find the next
1212 start.
1215 if (menuitems[0] != NULL) {
1216 /* If there are any menu items, make the <menu> node */
1217 menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
1220 while (menuitems[i] != NULL) {
1221 gboolean menu = FALSE;
1222 gchar *title = NULL;
1223 gchar *ref = NULL;
1224 gchar *desc = NULL;
1225 gchar *xref = NULL;
1226 gchar *link_text = NULL;
1227 xmlNodePtr ref1;
1229 menu = get_menuoptions (menuitems[i], &title, &ref, &desc, &xref);
1231 if (menu && (*title == '\0' || *(title + 1) == '\0')) {
1232 g_warning ("Info title unexpectedly short for menu item (%s)",
1233 menuitems[i]);
1234 menu = FALSE;
1237 if (menu) {
1238 mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
1239 gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
1241 if (ref == NULL) { /* A standard type menu */
1242 /* title+2 skips the "* ". We know we haven't jumped over the
1243 end of the string because strlen (title) >= 3 */
1244 link_text = g_strdup (title+2);
1246 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1247 BAD_CAST link_text);
1249 tmp = g_strconcat ("xref:", xref, NULL);
1250 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1251 g_free (tmp);
1252 } else { /* Indexy type menu - we gotta do a little work to fix the
1253 * spacing
1255 gchar *spacing = ref;
1256 gint c=0;
1257 gchar *sp = NULL;
1259 while (*spacing ==' ') {
1260 c++;
1261 spacing++;
1263 sp = g_strndup (ref, c);
1265 link_text = g_strdup (title);
1267 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1268 BAD_CAST link_text);
1269 tmp = g_strconcat ("xref:", xref, NULL);
1270 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1271 g_free (tmp);
1272 xmlNewTextChild (mholder, NULL, BAD_CAST "spacing",
1273 BAD_CAST sp);
1274 tmp = g_strconcat (g_strstrip(ref), ".", NULL);
1275 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1276 BAD_CAST tmp);
1277 g_free (tmp);
1278 tmp = g_strconcat ("xref:", xref, NULL);
1279 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1281 g_free (tmp);
1282 g_free (sp);
1285 tmp = g_strconcat ("\n", first_non_space (desc), NULL);
1288 Don't print the link text a second time, because that looks
1289 really stupid.
1291 We don't do a straight check for equality because lots of
1292 .info files have something like
1294 * Foo:: Foo.
1296 Obviously if the longer explanation has more afterwards, we
1297 don't want to omit it, which is why there's the strlen test.
1299 if (strncmp (link_text, tmp + 1, strlen (link_text)) ||
1300 strlen (link_text) + 1 < strlen (tmp + 1)) {
1301 xmlNewTextChild (mholder, NULL,
1302 BAD_CAST "para1", BAD_CAST tmp);
1305 g_free (tmp);
1306 g_free (link_text);
1308 else if (*(menuitems[i]) != '\0') {
1309 tmp = g_strconcat ("\n", first_non_space (menuitems[i]), NULL);
1310 xmlNewTextChild (mholder ? mholder : menu_node,
1311 NULL, BAD_CAST "para1",
1312 BAD_CAST tmp);
1313 g_free (tmp);
1315 i++;
1316 g_free (title);
1317 g_free (ref);
1318 g_free (desc);
1319 g_free (xref);
1322 g_strfreev (menuitems);
1324 return newnode;
1327 void
1328 info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
1330 gchar **notes;
1331 gchar **current;
1332 xmlNodePtr ref1;
1333 xmlNodePtr paragraph = NULL;
1334 gboolean first = TRUE;
1337 Split using the regular expression
1339 \*[Nn]ote(?!_)
1341 which deals with either case and the last bit is a lookahead so
1342 that we don't split on things of the form *Note:_, which aren't
1343 real notes.
1345 notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
1347 for (current = notes; *current != NULL; current++) {
1348 gchar *url, **urls;
1349 gchar *append;
1350 gchar *alt_append, *alt_append1;
1351 gchar *link_text;
1352 gchar *href = NULL;
1353 gchar *break_point = NULL;
1354 gboolean broken = FALSE;
1355 if (first) {
1356 /* The first node is special. It doesn't have a note ref at the
1357 * start, so we can just add it and forget about it.
1359 first = FALSE;
1360 info_body_text (*node, &paragraph, NULL, TRUE, (*current));
1361 continue;
1364 /* If we got to here, we now gotta parse the note reference */
1365 append = strchr (*current, ':');
1366 if (!append) {
1367 info_body_text (*node, &paragraph, NULL, TRUE, *current);
1368 continue;
1370 append++;
1371 alt_append = append;
1372 alt_append1 = alt_append;
1373 append = strchr (append, ':');
1374 alt_append = strchr (alt_append, '.');
1375 if (alt_append && g_str_has_prefix (alt_append, ".info")) {
1376 broken = TRUE;
1377 alt_append++;
1378 alt_append = strchr (alt_append, '.');
1380 alt_append1 = strchr (alt_append1, ',');
1381 if (!append && !alt_append && !alt_append1) {
1382 info_body_text (*node, &paragraph, NULL, TRUE, *current);
1383 continue;
1385 if (!append || alt_append || alt_append1) {
1386 if (!append) {
1387 if (alt_append) append = alt_append;
1388 else append = alt_append1;
1390 if ((alt_append && alt_append < append))
1391 append = alt_append;
1392 if (alt_append1 && alt_append1 < append)
1393 append = alt_append1;
1395 append++;
1396 url = g_strndup (*current, append - (*current));
1398 /* Save a copy of the unadulterated link text for later. */
1399 link_text = g_strconcat ("*Note", url, NULL);
1401 /* By now, we got 2 things. First, is append which is the (hopefully)
1402 * non-link text. Second, we got a url.
1403 * The url can be in several forms:
1404 * 1. linkend::
1405 * 2. linkend:(infofile)Linkend.
1406 * 3. Title: Linkend.
1407 * 4. Title: Linkend, (pretty sure this is just broken)
1408 * 5. Title: (infofile.info)Linkend.
1409 * All possibilities should have been picked up.
1410 * Here:
1411 * Clean up the split. Should be left with a real url and
1412 * a list of fragments that should be linked
1413 * Also goes through and removes extra spaces, leaving only one
1414 * space in place of many
1416 urls = g_strsplit (url, "\n", -1);
1417 break_point = strchr (url, '\n');
1418 while (break_point) {
1419 *break_point = ' ';
1420 break_point = strchr (++break_point, '\n');
1422 break_point = strchr (url, ' ');
1423 while (break_point) {
1424 if (*(break_point+1) == ' ') {
1425 /* Massive space. Fix. */
1426 gchar *next = break_point;
1427 gchar *url_copy;
1428 gchar *old = url;
1429 while (*next == ' ')
1430 next++;
1431 next--;
1432 url_copy = g_strndup (url, break_point-url);
1433 url = g_strconcat (url_copy, next, NULL);
1434 g_free (old);
1435 break_point = strchr (url, ' ');
1436 g_free (url_copy);
1437 } else {
1438 break_point++;
1439 break_point = strchr (break_point, ' ');
1442 if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */
1443 gchar *stop = NULL;
1444 gchar *lurl = NULL;
1445 gchar *zloc = NULL;
1446 stop = strchr (url, ':');
1447 lurl = strchr (stop, '(');
1448 if (!lurl) { /* 3rd type of link */
1449 gchar *link;
1450 gint length;
1451 stop++;
1452 link = g_strdup (stop);
1453 link = g_strstrip (link);
1454 length = strlen (link) - 1;
1455 link[length] = '\0';
1456 href = g_strconcat ("xref:", link, NULL);
1457 link[length] = 'a';
1458 g_free (link);
1461 } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */
1462 if (broken) {
1463 gchar *new_url;
1464 gchar *info;
1465 gchar *stripped;
1467 new_url = g_strdup (lurl);
1468 info = strstr (new_url, ".info)");
1469 stripped = g_strndup (new_url, info-new_url);
1470 info +=5;
1471 lurl = g_strconcat (stripped, info, NULL);
1472 g_free (stripped);
1473 g_free (new_url);
1475 zloc = &(lurl[strlen(lurl)-1]);
1476 *zloc = '\0';
1477 href = g_strconcat ("info:", lurl, NULL);
1478 *zloc = 'a';
1480 } else { /* First kind of link */
1481 gchar *tmp1;
1482 gchar *frag;
1484 tmp1 = strchr (url, ':');
1485 if (!tmp1)
1486 frag = g_strdup (url);
1487 else
1488 frag = g_strndup (url, tmp1 - url);
1489 g_strstrip (frag);
1490 gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
1491 href = g_strconcat ("xref:", frag, NULL);
1492 g_free (frag);
1495 /* Check we've got a valid paragraph node */
1496 if (!paragraph) {
1497 paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
1501 Now we're supposed to actually render the link. I have a list of
1502 bits of URL and actually this is really easy - I want to have
1503 the link *text* exactly the same as it appeared in the .info
1504 file, so don't use the list of strings urls, instead use the
1505 whole lot: url (complete with embedded newlines etc.)
1507 ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
1508 BAD_CAST link_text);
1509 g_free (link_text);
1510 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
1512 g_strfreev (urls);
1514 /* Finally, we can add the following text as required */
1515 info_body_text (*node, &paragraph, NULL, TRUE, append);
1517 g_free (url);
1518 g_free (href);
1520 g_strfreev (notes);