2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #ifndef incl_HPHP_EXT_LIBXML_H_
19 #define incl_HPHP_EXT_LIBXML_H_
21 #include "hphp/runtime/ext/extension.h"
23 #include <libxml/parser.h>
26 ///////////////////////////////////////////////////////////////////////////////
28 bool libxml_use_internal_error();
29 void libxml_add_error(const std::string
& msg
);
30 String
libxml_get_valid_file_path(const String
& source
);
31 String
libxml_get_valid_file_path(const char* source
);
33 int libxml_streams_IO_read(void* context
, char* buffer
, int len
);
34 int libxml_streams_IO_write(void* context
, const char* buffer
, int len
);
35 int libxml_streams_IO_close(void* context
);
36 int libxml_streams_IO_nop_close(void* context
);
38 void php_libxml_node_free(xmlNodePtr node
);
39 void php_libxml_node_free_list(xmlNodePtr node
);
40 void php_libxml_node_free_resource(xmlNodePtr node
);
42 bool HHVM_FUNCTION(libxml_disable_entity_loader
, bool disable
= true);
45 * LibXML resource wrappers.
47 * Several extensions use libxml (DOMDocument, SimpleXML, ext_xsl) as their
48 * underlying representation of an XML DOM. Because these extensions share
49 * libxml data we use Resource wrappers to control destruction of libxml data.
51 * In addition to their req::ptr behavior these wrappers also serve as a
52 * cache for DOMNode objects that have been associated with particular xmlNodes
53 * and retain information about the owning document for each node.
55 * Unfortunately (read: Because, PHP), these wrappers will also sometimes serve
56 * as weak-references. In particular it is possible for (1) the cached
57 * DOMNode object to be free'd before the xmlNode*, in which case the DOMNode
58 * is responsible for clearing the cache, and (2) for the underlying xmlNode*
59 * to be free'd while there are still req::ptr holding XMLNodeData resouces.
61 * In the event of (1) the only reference to the free'd object will be the
62 * object in m_cache, and as such an raw ObjectData* is used as a quasi-weak
65 * In PHP, if the root of an orphaned sub-tree of a Document, or of an orphaned
66 * subtree with no associated document goes out of scope all of its descendants
67 * are freed. When this is done any live XMLNodeDatas remain valid, however,
68 * their xmlNode* pointers are cleared to indicate the bound node no longer
71 * Any node properly connected to the root element of an xmlDoc* will remain
72 * valid until such time the the owning xmlDoc* becomes invalid or the node
73 * becomes orphaned from the root and its subtree is freed.
75 * Documents will remain valid until such time that there are no further
76 * references to nodes contained therein (including references to nodes whose
77 * underlying representation has been freed).
79 * The libxml_register_node() function will "Do the right thing" (tm) when
80 * given an xmlNodePtr. Specifically, if the node already has an associated
81 * XMLNodeData or XMLDocumnentData, that resource is attached- otherwise a new
82 * resource is created. Additionally, if a new resource is created for a node
83 * which itself is attached to a document with no associated resource, a
84 * resource will be created for that document.
86 * It is not necessary to cast xmlNode* to xmlDoc* before passing these pointers
87 * to libxml_register_node(), the type field will always be inspected before
88 * creating a new node resource.
90 * These resource classes are based on the PHP php_libxml_node_object
91 * (XMLNodeData), and php_libxml_ref_obj (XMLDocumentData). Rather than track
92 * these classes separately we track them as a single unified set of XMLNodeData
95 * https://github.com/php/php-src/blob/master/ext/libxml/php_libxml.h
98 struct XMLDocumentData
;
100 struct XMLNodeData
: SweepableResourceData
{
101 DECLARE_RESOURCE_ALLOCATION(XMLNodeData
)
103 explicit XMLNodeData(xmlNodePtr p
);
104 virtual ~XMLNodeData();
106 ObjectData
* getCache() const { return m_cache
; }
107 void clearCache() { m_cache
= nullptr; }
108 void setCache(ObjectData
* o
) { m_cache
= o
; }
110 void reset() { m_node
= nullptr; }
111 void setDoc(req::ptr
<XMLDocumentData
>&& doc
);
113 xmlDocPtr
docp() const;
114 xmlNodePtr
nodep() const { return m_node
; }
115 req::ptr
<XMLDocumentData
> doc();
116 void unlink() { xmlUnlinkNode(m_node
); }
119 ObjectData
* m_cache
{nullptr}; // XXX: to avoid a cycle this is a weak ref
120 xmlNodePtr m_node
{nullptr};
121 req::ptr
<XMLDocumentData
> m_doc
{nullptr};
123 friend struct XMLDocumentData
;
126 struct XMLDocumentData
: XMLNodeData
{
127 DECLARE_RESOURCE_ALLOCATION(XMLDocumentData
)
129 explicit XMLDocumentData(xmlDocPtr p
)
130 : XMLNodeData((xmlNodePtr
)p
)
131 , m_formatoutput(false)
132 , m_validateonparse(false)
133 , m_resolveexternals(false)
134 , m_preservewhitespace(true)
135 , m_substituteentities(false)
136 , m_stricterror(true)
140 assertx(p
->type
== XML_HTML_DOCUMENT_NODE
|| p
->type
== XML_DOCUMENT_NODE
);
143 void copyProperties(req::ptr
<XMLDocumentData
> data
) {
144 m_classmap
= data
->m_classmap
;
145 m_formatoutput
= data
->m_formatoutput
;
146 m_validateonparse
= data
->m_validateonparse
;
147 m_resolveexternals
= data
->m_resolveexternals
;
148 m_preservewhitespace
= data
->m_preservewhitespace
;
149 m_substituteentities
= data
->m_substituteentities
;
150 m_stricterror
= data
->m_stricterror
;
151 m_recover
= data
->m_recover
;
154 xmlDocPtr
docp() const { return (xmlDocPtr
)m_node
; }
155 void attachNode() { m_liveNodes
++; }
157 assertx(m_liveNodes
);
158 if (!--m_liveNodes
&& m_destruct
) cleanup();
162 ~XMLDocumentData() override
{ cleanup(); }
165 uint32_t m_liveNodes
{0};
167 unsigned m_formatoutput
:1;
168 unsigned m_validateonparse
:1;
169 unsigned m_resolveexternals
:1;
170 unsigned m_preservewhitespace
:1;
171 unsigned m_substituteentities
:1;
172 unsigned m_stricterror
:1;
173 unsigned m_recover
:1;
174 unsigned m_destruct
:1; // cleanup when last node de-registers
177 using XMLNode
= req::ptr
<XMLNodeData
>;
179 inline XMLNode
libxml_register_node(xmlNodePtr p
) {
180 if (!p
) return nullptr;
182 return XMLNode(reinterpret_cast<XMLNodeData
*>(p
->_private
));
185 if (p
->type
== XML_HTML_DOCUMENT_NODE
||
186 p
->type
== XML_DOCUMENT_NODE
) {
187 assertx(p
->doc
== (xmlDocPtr
)p
);
189 return req::make
<XMLDocumentData
>((xmlDocPtr
)p
);
191 return req::make
<XMLNodeData
>(p
);
194 inline XMLNode
libxml_register_node(xmlDocPtr p
) {
195 return libxml_register_node((xmlNodePtr
)p
);
199 inline XMLNodeData::XMLNodeData(xmlNodePtr p
) : m_node(p
) {
200 assertx(p
&& !p
->_private
);
201 m_node
->_private
= this;
203 if (p
->doc
&& p
!= (xmlNodePtr
)p
->doc
) {
204 m_doc
= libxml_register_node((xmlNodePtr
)p
->doc
)->doc();
209 inline XMLNodeData::~XMLNodeData() {
211 assertx(!m_cache
&& m_node
->_private
== this);
213 m_node
->_private
= nullptr;
214 php_libxml_node_free_resource(m_node
);
216 if (m_doc
) m_doc
->detachNode();
219 inline void XMLNodeData::setDoc(req::ptr
<XMLDocumentData
>&& doc
) {
220 if (m_doc
) m_doc
->detachNode();
221 if (doc
) doc
->attachNode();
222 m_doc
= std::move(doc
);
225 inline req::ptr
<XMLDocumentData
> XMLNodeData::doc() {
226 if (!m_node
) return nullptr;
228 if (m_node
->type
== XML_HTML_DOCUMENT_NODE
||
229 m_node
->type
== XML_DOCUMENT_NODE
) {
230 return req::ptr
<XMLDocumentData
>(static_cast<XMLDocumentData
*>(this));
234 assertx(!m_node
->doc
);
238 assertx(m_doc
.get() == libxml_register_node((xmlNodePtr
)m_node
->doc
).get());
242 inline xmlDocPtr
XMLNodeData::docp() const {
243 auto docData
= const_cast<XMLNodeData
*>(this)->doc();
244 return docData
? docData
->docp() : nullptr;
247 #define LIBXML_SAVE_NOEMPTYTAG 1<<2
249 ///////////////////////////////////////////////////////////////////////////////