1 /* Interface to libxml2.
2 Copyright (C) 2010-2018 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or (at
9 your option) any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
26 #include <libxml/tree.h>
27 #include <libxml/parser.h>
28 #include <libxml/HTMLparser.h>
36 DEF_DLL_FN (htmlDocPtr
, htmlReadMemory
,
37 (const char *, int, const char *, const char *, int));
38 DEF_DLL_FN (xmlDocPtr
, xmlReadMemory
,
39 (const char *, int, const char *, const char *, int));
40 DEF_DLL_FN (xmlNodePtr
, xmlDocGetRootElement
, (xmlDocPtr
));
41 DEF_DLL_FN (void, xmlFreeDoc
, (xmlDocPtr
));
42 DEF_DLL_FN (void, xmlCleanupParser
, (void));
43 DEF_DLL_FN (void, xmlCheckVersion
, (int));
46 libxml2_loaded_p (void)
48 Lisp_Object found
= Fassq (Qlibxml2
, Vlibrary_cache
);
50 return CONSP (found
) && EQ (XCDR (found
), Qt
);
53 # undef htmlReadMemory
54 # undef xmlCheckVersion
55 # undef xmlCleanupParser
56 # undef xmlDocGetRootElement
60 # define htmlReadMemory fn_htmlReadMemory
61 # define xmlCheckVersion fn_xmlCheckVersion
62 # define xmlCleanupParser fn_xmlCleanupParser
63 # define xmlDocGetRootElement fn_xmlDocGetRootElement
64 # define xmlFreeDoc fn_xmlFreeDoc
65 # define xmlReadMemory fn_xmlReadMemory
68 load_dll_functions (HMODULE library
)
70 LOAD_DLL_FN (library
, htmlReadMemory
);
71 LOAD_DLL_FN (library
, xmlReadMemory
);
72 LOAD_DLL_FN (library
, xmlDocGetRootElement
);
73 LOAD_DLL_FN (library
, xmlFreeDoc
);
74 LOAD_DLL_FN (library
, xmlCleanupParser
);
75 LOAD_DLL_FN (library
, xmlCheckVersion
);
79 #else /* !WINDOWSNT */
82 libxml2_loaded_p (void)
87 #endif /* !WINDOWSNT */
90 init_libxml2_functions (void)
93 if (libxml2_loaded_p ())
99 if (!(library
= w32_delayed_load (Qlibxml2
)))
101 message1 ("libxml2 library not found");
105 if (! load_dll_functions (library
))
108 Vlibrary_cache
= Fcons (Fcons (Qlibxml2
, Qt
), Vlibrary_cache
);
113 Vlibrary_cache
= Fcons (Fcons (Qlibxml2
, Qnil
), Vlibrary_cache
);
116 #else /* !WINDOWSNT */
118 #endif /* !WINDOWSNT */
122 make_dom (xmlNode
*node
)
124 if (node
->type
== XML_ELEMENT_NODE
)
126 Lisp_Object result
= list1 (intern ((char *) node
->name
));
129 Lisp_Object plist
= Qnil
;
131 /* First add the attributes. */
132 property
= node
->properties
;
133 while (property
!= NULL
)
135 if (property
->children
&&
136 property
->children
->content
)
138 char *content
= (char *) property
->children
->content
;
139 plist
= Fcons (Fcons (intern ((char *) property
->name
),
140 build_string (content
)),
143 property
= property
->next
;
145 result
= Fcons (Fnreverse (plist
), result
);
147 /* Then add the children of the node. */
148 child
= node
->children
;
149 while (child
!= NULL
)
151 result
= Fcons (make_dom (child
), result
);
155 return Fnreverse (result
);
157 else if (node
->type
== XML_TEXT_NODE
|| node
->type
== XML_CDATA_SECTION_NODE
)
160 return build_string ((char *) node
->content
);
164 else if (node
->type
== XML_COMMENT_NODE
)
167 return list3 (intern ("comment"), Qnil
,
168 build_string ((char *) node
->content
));
177 parse_region (Lisp_Object start
, Lisp_Object end
, Lisp_Object base_url
,
178 Lisp_Object discard_comments
, bool htmlp
)
181 Lisp_Object result
= Qnil
;
182 const char *burl
= "";
183 ptrdiff_t istart
, iend
, istart_byte
, iend_byte
;
184 unsigned char *buftext
;
186 xmlCheckVersion (LIBXML_VERSION
);
188 validate_region (&start
, &end
);
190 istart
= XINT (start
);
192 istart_byte
= CHAR_TO_BYTE (istart
);
193 iend_byte
= CHAR_TO_BYTE (iend
);
195 if (istart
< GPT
&& GPT
< iend
)
196 move_gap_both (iend
, iend_byte
);
198 if (! NILP (base_url
))
200 CHECK_STRING (base_url
);
201 burl
= SSDATA (base_url
);
204 buftext
= BYTE_POS_ADDR (istart_byte
);
206 /* Prevent ralloc.c from relocating the current buffer while libxml2
207 functions below read its text. */
208 r_alloc_inhibit_buffer_relocation (1);
211 doc
= htmlReadMemory ((char *)buftext
,
212 iend_byte
- istart_byte
, burl
, "utf-8",
213 HTML_PARSE_RECOVER
|HTML_PARSE_NONET
|
214 HTML_PARSE_NOWARNING
|HTML_PARSE_NOERROR
|
215 HTML_PARSE_NOBLANKS
);
217 doc
= xmlReadMemory ((char *)buftext
,
218 iend_byte
- istart_byte
, burl
, "utf-8",
219 XML_PARSE_NONET
|XML_PARSE_NOWARNING
|
220 XML_PARSE_NOBLANKS
|XML_PARSE_NOERROR
);
223 r_alloc_inhibit_buffer_relocation (0);
225 /* If the assertion below fails, malloc was called inside the above
226 libxml2 functions, and ralloc.c caused relocation of buffer text,
227 so we could have read from unrelated memory. */
228 eassert (buftext
== BYTE_POS_ADDR (istart_byte
));
232 Lisp_Object r
= Qnil
;
233 if (NILP(discard_comments
))
235 /* If the document has toplevel comments, then this should
236 get us the nodes and the comments. */
237 xmlNode
*n
= doc
->children
;
241 result
= Fcons (r
, result
);
248 /* The document doesn't have toplevel comments or we discarded
249 them. Get the tree the proper way. */
250 xmlNode
*node
= xmlDocGetRootElement (doc
);
252 result
= make_dom (node
);
254 result
= Fcons (Qtop
, Fcons (Qnil
, Fnreverse (Fcons (r
, result
))));
263 xml_cleanup_parser (void)
265 if (libxml2_loaded_p ())
269 DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region
,
270 Slibxml_parse_html_region
,
272 doc
: /* Parse the region as an HTML document and return the parse tree.
273 If BASE-URL is non-nil, it is used to expand relative URLs.
274 If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
275 (Lisp_Object start
, Lisp_Object end
, Lisp_Object base_url
, Lisp_Object discard_comments
)
277 if (init_libxml2_functions ())
278 return parse_region (start
, end
, base_url
, discard_comments
, true);
282 DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region
,
283 Slibxml_parse_xml_region
,
285 doc
: /* Parse the region as an XML document and return the parse tree.
286 If BASE-URL is non-nil, it is used to expand relative URLs.
287 If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
288 (Lisp_Object start
, Lisp_Object end
, Lisp_Object base_url
, Lisp_Object discard_comments
)
290 if (init_libxml2_functions ())
291 return parse_region (start
, end
, base_url
, discard_comments
, false);
294 #endif /* HAVE_LIBXML2 */
298 DEFUN ("libxml-available-p", Flibxml_available_p
, Slibxml_available_p
, 0, 0, 0,
299 doc
: /* Return t if libxml2 support is available in this instance of Emacs.*/)
304 Lisp_Object found
= Fassq (Qlibxml2
, Vlibrary_cache
);
310 status
= init_libxml2_functions () ? Qt
: Qnil
;
311 Vlibrary_cache
= Fcons (Fcons (Qlibxml2
, status
), Vlibrary_cache
);
316 # endif /* WINDOWSNT */
319 #endif /* HAVE_LIBXML2 */
322 /***********************************************************************
324 ***********************************************************************/
329 defsubr (&Slibxml_parse_html_region
);
330 defsubr (&Slibxml_parse_xml_region
);
332 defsubr (&Slibxml_available_p
);