2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/xml/ext_xml.h"
19 #include <folly/ScopeGuard.h>
21 #include "hphp/runtime/base/array-init.h"
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/comparisons.h"
24 #include "hphp/runtime/base/externals.h"
25 #include "hphp/runtime/base/request-local.h"
26 #include "hphp/runtime/base/root-map.h"
27 #include "hphp/runtime/base/zend-functions.h"
28 #include "hphp/runtime/base/zend-string.h"
29 #include "hphp/runtime/vm/jit/translator.h"
30 #include "hphp/runtime/vm/jit/translator-inline.h"
31 #include "hphp/runtime/base/utf8-decode.h"
34 #define XML_MAXLEVEL 255
35 // XXX this should be dynamic
40 PHP_XML_OPTION_CASE_FOLDING
= 1,
41 PHP_XML_OPTION_TARGET_ENCODING
,
42 PHP_XML_OPTION_SKIP_TAGSTART
,
43 PHP_XML_OPTION_SKIP_WHITE
46 static struct XMLExtension final
: Extension
{
47 XMLExtension() : Extension("xml", NO_EXTENSION_VERSION_YET
) {}
48 void moduleInit() override
{
49 HHVM_FE(xml_parser_create
);
50 HHVM_FE(xml_parser_free
);
52 HHVM_FE(xml_parse_into_struct
);
53 HHVM_FE(xml_parser_create_ns
);
54 HHVM_FE(xml_parser_get_option
);
55 HHVM_FE(xml_parser_set_option
);
56 HHVM_FE(xml_set_character_data_handler
);
57 HHVM_FE(xml_set_default_handler
);
58 HHVM_FE(xml_set_element_handler
);
59 HHVM_FE(xml_set_processing_instruction_handler
);
60 HHVM_FE(xml_set_start_namespace_decl_handler
);
61 HHVM_FE(xml_set_end_namespace_decl_handler
);
62 HHVM_FE(xml_set_unparsed_entity_decl_handler
);
63 HHVM_FE(xml_set_external_entity_ref_handler
);
64 HHVM_FE(xml_set_notation_decl_handler
);
65 HHVM_FE(xml_set_object
);
66 HHVM_FE(xml_get_current_byte_index
);
67 HHVM_FE(xml_get_current_column_number
);
68 HHVM_FE(xml_get_current_line_number
);
69 HHVM_FE(xml_get_error_code
);
70 HHVM_FE(xml_error_string
);
74 HHVM_RC_INT_SAME(XML_ERROR_ASYNC_ENTITY
);
75 HHVM_RC_INT_SAME(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
76 HHVM_RC_INT_SAME(XML_ERROR_BAD_CHAR_REF
);
77 HHVM_RC_INT_SAME(XML_ERROR_BINARY_ENTITY_REF
);
78 HHVM_RC_INT_SAME(XML_ERROR_DUPLICATE_ATTRIBUTE
);
79 HHVM_RC_INT_SAME(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
80 HHVM_RC_INT_SAME(XML_ERROR_INCORRECT_ENCODING
);
81 HHVM_RC_INT_SAME(XML_ERROR_INVALID_TOKEN
);
82 HHVM_RC_INT_SAME(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
83 HHVM_RC_INT_SAME(XML_ERROR_MISPLACED_XML_PI
);
84 HHVM_RC_INT_SAME(XML_ERROR_NONE
);
85 HHVM_RC_INT_SAME(XML_ERROR_NO_ELEMENTS
);
86 HHVM_RC_INT_SAME(XML_ERROR_NO_MEMORY
);
87 HHVM_RC_INT_SAME(XML_ERROR_PARAM_ENTITY_REF
);
88 HHVM_RC_INT_SAME(XML_ERROR_PARTIAL_CHAR
);
89 HHVM_RC_INT_SAME(XML_ERROR_RECURSIVE_ENTITY_REF
);
90 HHVM_RC_INT_SAME(XML_ERROR_SYNTAX
);
91 HHVM_RC_INT_SAME(XML_ERROR_TAG_MISMATCH
);
92 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_CDATA_SECTION
);
93 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_TOKEN
);
94 HHVM_RC_INT_SAME(XML_ERROR_UNDEFINED_ENTITY
);
95 HHVM_RC_INT_SAME(XML_ERROR_UNKNOWN_ENCODING
);
97 HHVM_RC_INT(XML_OPTION_CASE_FOLDING
, PHP_XML_OPTION_CASE_FOLDING
);
98 HHVM_RC_INT(XML_OPTION_TARGET_ENCODING
, PHP_XML_OPTION_TARGET_ENCODING
);
99 HHVM_RC_INT(XML_OPTION_SKIP_TAGSTART
, PHP_XML_OPTION_SKIP_TAGSTART
);
100 HHVM_RC_INT(XML_OPTION_SKIP_WHITE
, PHP_XML_OPTION_SKIP_WHITE
);
102 HHVM_RC_STR(XML_SAX_IMPL
, "expat");
108 ///////////////////////////////////////////////////////////////////////////////
110 struct XmlParser
: SweepableResourceData
{
111 DECLARE_RESOURCE_ALLOCATION(XmlParser
)
113 virtual ~XmlParser();
116 const String
& o_getClassNameHook() const override
;
119 XML_Parser parser
{nullptr};
120 XML_Char
*target_encoding
{nullptr};
122 Variant startElementHandler
;
123 Variant endElementHandler
;
124 Variant characterDataHandler
;
125 Variant processingInstructionHandler
;
126 Variant defaultHandler
;
127 Variant unparsedEntityDeclHandler
;
128 Variant notationDeclHandler
;
129 Variant externalEntityRefHandler
;
130 Variant unknownEncodingHandler
;
131 Variant startNamespaceDeclHandler
;
132 Variant endNamespaceDeclHandler
;
142 char **ltags
{nullptr};
148 XmlParser::~XmlParser() {
152 void XmlParser::cleanupImpl() {
154 XML_ParserFree(parser
);
159 for (inx
= 0; (inx
< level
) && (inx
< XML_MAXLEVEL
); inx
++)
166 void XmlParser::sweep() {
170 const String
& XmlParser::o_getClassNameHook() const {
171 return classnameof();
174 struct XmlParserData final
: RequestEventHandler
{
175 void requestInit() override
{ parsers
.reset(); }
176 void requestShutdown() override
{ parsers
.reset(); }
177 RootMap
<XmlParser
> parsers
;
179 IMPLEMENT_STATIC_REQUEST_LOCAL(XmlParserData
, s_xml_data
);
183 inline req::ptr
<XmlParser
> getParserFromToken(void* userData
) {
184 return s_xml_data
->parsers
.lookupRoot(userData
);
187 inline void* getParserToken(const req::ptr
<XmlParser
>& parser
) {
188 return reinterpret_cast<void*>(s_xml_data
->parsers
.addRoot(parser
));
191 inline void clearParser(const req::ptr
<XmlParser
>& p
) {
192 s_xml_data
->parsers
.removeRoot(p
);
199 char (*decoding_function
)(unsigned short);
200 unsigned short (*encoding_function
)(unsigned char);
203 static XML_Char
* xml_globals_default_encoding
= (XML_Char
*)"UTF-8";
204 // for xml_parse_into_struct
207 #define XML(v) (xml_globals_ ## v)
209 inline static unsigned short xml_encode_iso_8859_1(unsigned char c
) {
210 return (unsigned short)c
;
213 inline static char xml_decode_iso_8859_1(unsigned short c
) {
214 return (char)(c
> 0xff ? '?' : c
);
217 inline static unsigned short xml_encode_us_ascii(unsigned char c
) {
218 return (unsigned short)c
;
221 inline static char xml_decode_us_ascii(unsigned short c
) {
222 return (char)(c
> 0x7f ? '?' : c
);
225 xml_encoding xml_encodings
[] = {
226 { (XML_Char
*)"ISO-8859-1", xml_decode_iso_8859_1
, xml_encode_iso_8859_1
},
227 { (XML_Char
*)"US-ASCII", xml_decode_us_ascii
, xml_encode_us_ascii
},
228 { (XML_Char
*)"UTF-8", NULL
, NULL
},
229 { (XML_Char
*)NULL
, NULL
, NULL
}
232 static void *php_xml_malloc_wrapper(size_t sz
) {
233 return req::malloc_untyped(sz
);
236 static void *php_xml_realloc_wrapper(void *ptr
, size_t sz
) {
237 return req::realloc_untyped(ptr
, sz
);
240 static void php_xml_free_wrapper(void *ptr
) {
246 static XML_Memory_Handling_Suite php_xml_mem_hdlrs
= {
247 php_xml_malloc_wrapper
,
248 php_xml_realloc_wrapper
,
252 static xml_encoding
*xml_get_encoding(const XML_Char
*name
) {
253 xml_encoding
*enc
= &xml_encodings
[0];
255 while (enc
&& enc
->name
) {
256 if (strcasecmp((const char*)name
, (const char*)enc
->name
) == 0) {
264 static int _xml_xmlcharlen(const XML_Char
*s
) {
274 String
xml_utf8_decode(const XML_Char
*s
, int len
,
275 const XML_Char
*encoding
) {
276 String str
= String(len
, ReserveString
);
277 char *newbuf
= str
.mutableData();
278 char (*decoder
)(unsigned short) = nullptr;
279 xml_encoding
*enc
= xml_get_encoding(encoding
);
283 decoder
= enc
->decoding_function
;
285 if (decoder
== nullptr) {
286 /* If the target encoding was unknown, or no decoder function
287 * was specified, return the UTF-8-encoded data as-is.
289 memcpy(newbuf
, s
, len
);
294 UTF8To16Decoder
dec(s
, len
, true);
295 for (int b
= dec
.decode(); b
!= UTF8_END
; b
= dec
.decode()) {
296 newbuf
[newlen
] = decoder(b
);
300 assert(newlen
<= len
);
305 static Variant
_xml_xmlchar_zval(const XML_Char
*s
, int len
,
306 const XML_Char
*encoding
) {
311 len
= _xml_xmlcharlen(s
);
313 String ret
= xml_utf8_decode(s
, len
, encoding
);
318 String
_xml_decode_tag(const req::ptr
<XmlParser
>& parser
, const char *tag
) {
319 auto newstr
= xml_utf8_decode((const XML_Char
*)tag
, strlen(tag
),
320 parser
->target_encoding
);
321 if (parser
->case_folding
) {
322 string_to_upper(newstr
);
327 static Variant
php_xml_parser_create_impl(const String
& encoding_param
,
328 const String
& ns_param
,
333 if (!encoding_param
.isNull()) {
334 /* The supported encoding types are hardcoded here because
335 * we are limited to the encodings supported by expat/xmltok.
337 if (encoding_param
.size() == 0) {
338 encoding
= XML(default_encoding
);
340 } else if (strcasecmp(encoding_param
.data(), "ISO-8859-1") == 0) {
341 encoding
= (XML_Char
*)"ISO-8859-1";
342 } else if (strcasecmp(encoding_param
.data(), "UTF-8") == 0) {
343 encoding
= (XML_Char
*)"UTF-8";
344 } else if (strcasecmp(encoding_param
.data(), "US-ASCII") == 0) {
345 encoding
= (XML_Char
*)"US-ASCII";
347 raise_warning("unsupported source encoding \"%s\"",
348 encoding_param
.c_str());
352 encoding
= XML(default_encoding
);
356 if (ns_support
&& ns_param
.empty()) {
359 separator
= ns_param
;
362 auto parser
= req::make
<XmlParser
>();
363 parser
->parser
= XML_ParserCreate_MM
364 ((auto_detect
? NULL
: encoding
), &php_xml_mem_hdlrs
,
365 !separator
.empty() ? (const XML_Char
*)separator
.data() : NULL
);
367 parser
->target_encoding
= encoding
;
368 parser
->case_folding
= 1;
369 parser
->object
.asTypedValue()->m_type
= KindOfNull
;
370 parser
->isparsing
= 0;
372 XML_SetUserData(parser
->parser
, getParserToken(parser
));
374 return Variant(std::move(parser
));
377 static bool name_contains_class(const String
& name
) {
379 int pos
= name
.find("::");
380 return pos
!= 0 && pos
!= String::npos
&& pos
+ 2 < name
.size();
385 static Variant
xml_call_handler(const req::ptr
<XmlParser
>& parser
,
386 const Variant
& handler
,
388 if (parser
&& handler
.toBoolean()) {
390 if (handler
.isString() && !name_contains_class(handler
.toString())) {
391 if (!parser
->object
.isObject()) {
392 retval
= invoke(handler
.toString().c_str(), args
, -1);
394 retval
= parser
->object
.toObject()->
395 o_invoke(handler
.toString(), args
);
397 } else if (is_callable(handler
)) {
398 vm_call_user_func(handler
, args
);
400 raise_warning("Handler is invalid");
407 static void _xml_add_to_info(const req::ptr
<XmlParser
>& parser
,
408 const String
& nameStr
) {
409 if (parser
->info
.isNull()) {
412 forceToArray(parser
->info
);
413 if (!parser
->info
.toCArrRef().exists(nameStr
)) {
414 parser
->info
.toArrRef().set(nameStr
, Array::Create());
416 auto& inner
= parser
->info
.toArrRef().lvalAt(nameStr
);
417 forceToArray(inner
).append(parser
->curtag
);
423 s_complete("complete"),
430 s_attributes("attributes");
432 void _xml_endElementHandler(void *userData
, const XML_Char
*name
) {
433 auto parser
= getParserFromToken(userData
);
437 Array args
= Array::Create();
439 auto tag_name
= _xml_decode_tag(parser
, (const char*)name
);
441 if (parser
->endElementHandler
.toBoolean()) {
442 args
.append(Variant(parser
));
443 args
.append(tag_name
);
444 xml_call_handler(parser
, parser
->endElementHandler
, args
);
447 if (!parser
->data
.isNull()) {
448 if (parser
->lastwasopen
) {
449 parser
->ctag
.toArrRef().set(s_type
, s_complete
);
451 ArrayInit
tag(3, ArrayInit::Map
{});
452 _xml_add_to_info(parser
, tag_name
.substr(parser
->toffset
));
453 tag
.set(s_tag
, tag_name
.substr(parser
->toffset
));
454 tag
.set(s_type
, s_close
);
455 tag
.set(s_level
, parser
->level
);
456 parser
->data
.toArrRef().append(tag
.toArray());
458 parser
->lastwasopen
= 0;
462 if ((parser
->ltags
) && (parser
->level
<= XML_MAXLEVEL
)) {
463 free(parser
->ltags
[parser
->level
-1]);
470 void _xml_characterDataHandler(void *userData
, const XML_Char
*s
, int len
) {
471 auto parser
= getParserFromToken(userData
);
475 Array args
= Array::Create();
477 if (parser
->characterDataHandler
.toBoolean()) {
478 args
.append(Variant(parser
));
479 args
.append(_xml_xmlchar_zval(s
, len
, parser
->target_encoding
));
480 xml_call_handler(parser
, parser
->characterDataHandler
, args
);
483 if (!parser
->data
.isNull()) {
487 String decoded_value
;
489 decoded_value
= xml_utf8_decode(s
,len
,
490 parser
->target_encoding
);
491 decoded_len
= decoded_value
.size();
492 for (i
= 0; i
< decoded_len
; i
++) {
493 switch (decoded_value
[i
]) {
505 if (doprint
|| (! parser
->skipwhite
)) {
506 if (parser
->lastwasopen
) {
508 // check if value exists, if yes append to that
509 if (parser
->ctag
.toArrRef().exists(s_value
)) {
510 myval
= tvCastToString(parser
->ctag
.toArray().rvalAt(s_value
).tv());
511 myval
+= decoded_value
;
512 parser
->ctag
.toArrRef().set(s_value
, myval
);
514 parser
->ctag
.toArrRef().set(
524 auto curtag
= parser
->data
.toArrRef().pop();
527 parser
->data
.toArrRef().append(curtag
);
531 if (curtag
.toArrRef().exists(s_type
)) {
532 mytype
= tvCastToString(curtag
.toArrRef().rvalAt(s_type
).tv());
533 if (!strcmp(mytype
.data(), "cdata") &&
534 curtag
.toArrRef().exists(s_value
)) {
535 myval
= tvCastToString(curtag
.toArrRef().rvalAt(s_value
).tv());
536 myval
+= decoded_value
;
537 curtag
.toArrRef().set(s_value
, myval
);
541 if (parser
->level
<= XML_MAXLEVEL
&& parser
->level
> 0) {
542 tag
= Array::Create();
543 _xml_add_to_info(parser
, parser
->ltags
[parser
->level
-1] +
545 tag
.set(s_tag
, String(parser
->ltags
[parser
->level
-1] +
546 parser
->toffset
, CopyString
));
547 tag
.set(s_value
, decoded_value
);
548 tag
.set(s_type
, s_cdata
);
549 tag
.set(s_level
, parser
->level
);
550 parser
->data
.toArrRef().append(tag
);
551 } else if (parser
->level
== (XML_MAXLEVEL
+ 1)) {
552 raise_warning("Maximum depth exceeded - Results truncated");
560 void _xml_defaultHandler(void *userData
, const XML_Char
*s
, int len
) {
561 auto parser
= getParserFromToken(userData
);
563 if (parser
&& parser
->defaultHandler
.toBoolean()) {
564 xml_call_handler(parser
,
565 parser
->defaultHandler
,
568 _xml_xmlchar_zval(s
, len
, parser
->target_encoding
)));
572 void _xml_startElementHandler(void *userData
, const XML_Char
*name
, const XML_Char
**attributes
) {
573 auto parser
= getParserFromToken(userData
);
574 const char **attrs
= (const char **) attributes
;
576 Array args
= Array::Create();
581 String tag_name
= _xml_decode_tag(parser
, (const char*)name
);
583 if (parser
->startElementHandler
.toBoolean()) {
584 args
.append(Variant(parser
));
585 args
.append(tag_name
);
586 args
.append(Array::Create());
588 while (attributes
&& *attributes
) {
589 String att
= _xml_decode_tag(parser
, (const char*)attributes
[0]);
590 String val
= xml_utf8_decode(attributes
[1],
591 strlen((const char*)attributes
[1]),
592 parser
->target_encoding
);
593 args
.lvalAt(2).toArrRef().set(att
, val
);
597 xml_call_handler(parser
, parser
->startElementHandler
, args
);
600 if (!parser
->data
.isNull()) {
601 if (parser
->level
<= XML_MAXLEVEL
) {
604 tag
= Array::Create();
605 atr
= Array::Create();
607 _xml_add_to_info(parser
, tag_name
.substr(parser
->toffset
));
609 tag
.set(s_tag
, tag_name
.substr(parser
->toffset
));
610 tag
.set(s_type
, s_open
);
611 tag
.set(s_level
, parser
->level
);
613 parser
->ltags
[parser
->level
-1] = strdup(tag_name
.data());
614 parser
->lastwasopen
= 1;
616 attributes
= (const XML_Char
**) attrs
;
618 while (attributes
&& *attributes
) {
619 String att
= _xml_decode_tag(parser
, (const char*)attributes
[0]);
620 String val
= xml_utf8_decode(attributes
[1],
621 strlen((const char*)attributes
[1]),
622 parser
->target_encoding
);
629 tag
.set(s_attributes
,atr
);
631 auto lval
= parser
->data
.toArrRef().lvalAt();
632 lval
.type() = KindOfArray
;
633 lval
.val().parr
= tag
.detach();
634 parser
->ctag
.assignRef(tvAsVariant(lval
.tv_ptr()));
635 } else if (parser
->level
== (XML_MAXLEVEL
+ 1)) {
636 raise_warning("Maximum depth exceeded - Results truncated");
642 void _xml_processingInstructionHandler(void *userData
, const XML_Char
*target
,
643 const XML_Char
*data
) {
644 auto parser
= getParserFromToken(userData
);
645 if (parser
&& parser
->processingInstructionHandler
.toBoolean()) {
646 Array args
= Array::Create();
647 args
.append(Variant(parser
));
648 args
.append(_xml_xmlchar_zval(target
, 0, parser
->target_encoding
));
649 args
.append(_xml_xmlchar_zval(data
, 0, parser
->target_encoding
));
650 xml_call_handler(parser
, parser
->processingInstructionHandler
, args
);
654 int _xml_externalEntityRefHandler(XML_Parser
/* void* */ parserPtr
,
655 const XML_Char
*openEntityNames
,
656 const XML_Char
*base
,
657 const XML_Char
*systemId
,
658 const XML_Char
*publicId
) {
659 auto parser
= getParserFromToken(XML_GetUserData(parserPtr
));
660 int ret
= 0; /* abort if no handler is set (should be configurable?) */
661 if (parser
&& parser
->externalEntityRefHandler
.toBoolean()) {
662 Array args
= Array::Create();
663 args
.append(Variant(parser
));
664 args
.append(_xml_xmlchar_zval(openEntityNames
, 0,
665 parser
->target_encoding
));
666 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
667 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
668 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
669 ret
= xml_call_handler(parser
,
670 parser
->externalEntityRefHandler
, args
).toInt64();
675 void _xml_notationDeclHandler(void *userData
,
676 const XML_Char
*notationName
,
677 const XML_Char
*base
,
678 const XML_Char
*systemId
,
679 const XML_Char
*publicId
) {
680 auto parser
= getParserFromToken(userData
);
682 if (parser
&& parser
->notationDeclHandler
.toBoolean()) {
683 Array args
= Array::Create();
684 args
.append(Variant(parser
));
685 args
.append(_xml_xmlchar_zval(notationName
, 0, parser
->target_encoding
));
686 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
687 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
688 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
689 xml_call_handler(parser
, parser
->notationDeclHandler
, args
);
693 void _xml_startNamespaceDeclHandler(void *userData
,const XML_Char
*prefix
,
694 const XML_Char
*uri
) {
695 auto parser
= getParserFromToken(userData
);
697 if (parser
&& parser
->startNamespaceDeclHandler
.toBoolean()) {
698 Array args
= Array::Create();
700 args
.append(Variant(parser
));
701 args
.append(_xml_xmlchar_zval(prefix
, 0, parser
->target_encoding
));
702 args
.append(_xml_xmlchar_zval(uri
, 0, parser
->target_encoding
));
703 xml_call_handler(parser
, parser
->startNamespaceDeclHandler
, args
);
707 void _xml_endNamespaceDeclHandler(void *userData
, const XML_Char
*prefix
) {
708 auto parser
= getParserFromToken(userData
);
710 if (parser
&& parser
->endNamespaceDeclHandler
.toBoolean()) {
711 Array args
= Array::Create();
712 args
.append(Variant(parser
));
713 args
.append(_xml_xmlchar_zval(prefix
, 0, parser
->target_encoding
));
714 xml_call_handler(parser
, parser
->endNamespaceDeclHandler
, args
);
718 void _xml_unparsedEntityDeclHandler(void *userData
,
719 const XML_Char
*entityName
,
720 const XML_Char
*base
,
721 const XML_Char
*systemId
,
722 const XML_Char
*publicId
,
723 const XML_Char
*notationName
) {
724 auto parser
= getParserFromToken(userData
);
726 if (parser
&& parser
->unparsedEntityDeclHandler
.toBoolean()) {
727 Array args
= Array::Create();
728 args
.append(Variant(parser
));
729 args
.append(_xml_xmlchar_zval(entityName
, 0, parser
->target_encoding
));
730 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
731 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
732 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
733 args
.append(_xml_xmlchar_zval(notationName
, 0, parser
->target_encoding
));
734 xml_call_handler(parser
, parser
->unparsedEntityDeclHandler
, args
);
738 static void xml_set_handler(Variant
* handler
, const Variant
& data
) {
739 if (data
.isNull() || same(data
, false) || data
.isString() ||
743 raise_warning("Handler is invalid");
747 ///////////////////////////////////////////////////////////////////////////////
749 Resource
HHVM_FUNCTION(xml_parser_create
,
750 const Variant
& encoding
/* = uninit_variant */) {
751 const String
& strEncoding
= encoding
.isNull()
753 : encoding
.toString();
754 return php_xml_parser_create_impl(strEncoding
, null_string
, 0).toResource();
757 Resource
HHVM_FUNCTION(xml_parser_create_ns
,
758 const Variant
& encoding
/* = uninit_variant */,
759 const Variant
& separator
/* = uninit_variant */) {
760 const String
& strEncoding
= encoding
.isNull()
762 : encoding
.toString();
763 const String
& strSeparator
= separator
.isNull()
765 : separator
.toString();
766 return php_xml_parser_create_impl(strEncoding
, strSeparator
, 1).toResource();
769 bool HHVM_FUNCTION(xml_parser_free
,
770 const Resource
& parser
) {
771 auto p
= cast
<XmlParser
>(parser
);
772 if (p
->isparsing
== 1) {
773 raise_warning("Parser cannot be freed while it is parsing.");
780 int64_t HHVM_FUNCTION(xml_parse
,
781 const Resource
& parser
,
783 bool is_final
/* = true */) {
784 // XML_Parse can reenter the VM, and it will do so after we've lost
785 // the frame pointer by calling through the system's copy of XML_Parse
787 SYNC_VM_REGS_SCOPED();
788 auto p
= cast
<XmlParser
>(parser
);
790 long isFinal
= is_final
? 1 : 0;
792 ret
= XML_Parse(p
->parser
, (const XML_Char
*)data
.data(), data
.size(),
798 int64_t HHVM_FUNCTION(xml_parse_into_struct
,
799 const Resource
& parser
,
802 VRefParam index
/* = null */) {
803 SYNC_VM_REGS_SCOPED();
805 auto p
= cast
<XmlParser
>(parser
);
806 p
->data
.setWithRef(values
);
807 p
->data
= Array::Create();
808 p
->info
.setWithRef(index
);
809 p
->info
= Array::Create();
811 p
->ltags
= (char**)malloc(XML_MAXLEVEL
* sizeof(char*));
813 XML_SetDefaultHandler(p
->parser
, _xml_defaultHandler
);
814 XML_SetElementHandler(p
->parser
, _xml_startElementHandler
,
815 _xml_endElementHandler
);
816 XML_SetCharacterDataHandler(p
->parser
, _xml_characterDataHandler
);
819 ret
= XML_Parse(p
->parser
, (const XML_Char
*)data
.data(), data
.size(), 1);
825 Variant
HHVM_FUNCTION(xml_parser_get_option
,
826 const Resource
& parser
,
828 auto p
= cast
<XmlParser
>(parser
);
830 case PHP_XML_OPTION_CASE_FOLDING
:
831 return p
->case_folding
;
832 case PHP_XML_OPTION_TARGET_ENCODING
:
833 return String((const char*)p
->target_encoding
, CopyString
);
835 raise_warning("Unknown option");
841 bool HHVM_FUNCTION(xml_parser_set_option
,
842 const Resource
& parser
,
844 const Variant
& value
) {
845 auto p
= cast
<XmlParser
>(parser
);
847 case PHP_XML_OPTION_CASE_FOLDING
:
848 p
->case_folding
= value
.toInt64();
850 case PHP_XML_OPTION_SKIP_TAGSTART
:
851 p
->toffset
= value
.toInt64();
853 case PHP_XML_OPTION_SKIP_WHITE
:
854 p
->skipwhite
= value
.toInt64();
856 case PHP_XML_OPTION_TARGET_ENCODING
: {
858 enc
= xml_get_encoding((const XML_Char
*)value
.toString().data());
860 raise_warning("Unsupported target encoding \"%s\"",
861 value
.toString().data());
864 p
->target_encoding
= enc
->name
;
868 raise_warning("Unknown option");
874 bool HHVM_FUNCTION(xml_set_character_data_handler
,
875 const Resource
& parser
,
876 const Variant
& handler
) {
877 auto p
= cast
<XmlParser
>(parser
);
878 xml_set_handler(&p
->characterDataHandler
, handler
);
879 XML_SetCharacterDataHandler(p
->parser
, _xml_characterDataHandler
);
883 bool HHVM_FUNCTION(xml_set_default_handler
,
884 const Resource
& parser
,
885 const Variant
& handler
) {
886 auto p
= cast
<XmlParser
>(parser
);
887 xml_set_handler(&p
->defaultHandler
, handler
);
888 XML_SetDefaultHandler(p
->parser
, _xml_defaultHandler
);
892 bool HHVM_FUNCTION(xml_set_element_handler
,
893 const Resource
& parser
,
894 const Variant
& start_element_handler
,
895 const Variant
& end_element_handler
) {
896 auto p
= cast
<XmlParser
>(parser
);
897 xml_set_handler(&p
->startElementHandler
, start_element_handler
);
898 xml_set_handler(&p
->endElementHandler
, end_element_handler
);
899 XML_SetElementHandler(p
->parser
, _xml_startElementHandler
,
900 _xml_endElementHandler
);
904 bool HHVM_FUNCTION(xml_set_processing_instruction_handler
,
905 const Resource
& parser
,
906 const Variant
& handler
) {
907 auto p
= cast
<XmlParser
>(parser
);
908 xml_set_handler(&p
->processingInstructionHandler
, handler
);
909 XML_SetProcessingInstructionHandler(p
->parser
,
910 _xml_processingInstructionHandler
);
914 bool HHVM_FUNCTION(xml_set_start_namespace_decl_handler
,
915 const Resource
& parser
,
916 const Variant
& handler
) {
917 auto p
= cast
<XmlParser
>(parser
);
918 xml_set_handler(&p
->startNamespaceDeclHandler
, handler
);
919 XML_SetStartNamespaceDeclHandler(p
->parser
, _xml_startNamespaceDeclHandler
);
923 bool HHVM_FUNCTION(xml_set_end_namespace_decl_handler
,
924 const Resource
& parser
,
925 const Variant
& handler
) {
926 auto p
= cast
<XmlParser
>(parser
);
927 xml_set_handler(&p
->endNamespaceDeclHandler
, handler
);
928 XML_SetEndNamespaceDeclHandler(p
->parser
, _xml_endNamespaceDeclHandler
);
932 bool HHVM_FUNCTION(xml_set_unparsed_entity_decl_handler
,
933 const Resource
& parser
,
934 const Variant
& handler
) {
935 auto p
= cast
<XmlParser
>(parser
);
936 xml_set_handler(&p
->unparsedEntityDeclHandler
, handler
);
937 XML_SetUnparsedEntityDeclHandler(p
->parser
, _xml_unparsedEntityDeclHandler
);
941 bool HHVM_FUNCTION(xml_set_external_entity_ref_handler
,
942 const Resource
& parser
,
943 const Variant
& handler
) {
944 auto p
= cast
<XmlParser
>(parser
);
945 xml_set_handler(&p
->externalEntityRefHandler
, handler
);
946 XML_SetExternalEntityRefHandler(p
->parser
, _xml_externalEntityRefHandler
);
950 bool HHVM_FUNCTION(xml_set_notation_decl_handler
,
951 const Resource
& parser
,
952 const Variant
& handler
) {
953 auto p
= cast
<XmlParser
>(parser
);
954 xml_set_handler(&p
->notationDeclHandler
, handler
);
955 XML_SetNotationDeclHandler(p
->parser
, _xml_notationDeclHandler
);
959 bool HHVM_FUNCTION(xml_set_object
,
960 const Resource
& parser
,
962 auto p
= cast
<XmlParser
>(parser
);
963 p
->object
.setWithRef(object
);
967 int64_t HHVM_FUNCTION(xml_get_current_byte_index
,
968 const Resource
& parser
) {
969 auto p
= cast
<XmlParser
>(parser
);
970 return XML_GetCurrentByteIndex(p
->parser
);
973 int64_t HHVM_FUNCTION(xml_get_current_column_number
,
974 const Resource
& parser
) {
975 auto p
= cast
<XmlParser
>(parser
);
976 return XML_GetCurrentColumnNumber(p
->parser
);
979 int64_t HHVM_FUNCTION(xml_get_current_line_number
,
980 const Resource
& parser
) {
981 auto p
= cast
<XmlParser
>(parser
);
982 return XML_GetCurrentLineNumber(p
->parser
);
985 int64_t HHVM_FUNCTION(xml_get_error_code
,
986 const Resource
& parser
) {
987 auto p
= cast
<XmlParser
>(parser
);
988 return XML_GetErrorCode(p
->parser
);
991 String
HHVM_FUNCTION(xml_error_string
,
993 char * str
= (char *)XML_ErrorString((XML_Error
)/*(int)*/code
);
994 return String(str
, CopyString
);
997 ///////////////////////////////////////////////////////////////////////////////
999 String
HHVM_FUNCTION(utf8_decode
,
1000 const String
& data
) {
1001 return xml_utf8_decode(data
.c_str(), data
.size(), "ISO-8859-1");
1004 String
HHVM_FUNCTION(utf8_encode
,
1005 const String
& data
) {
1006 auto const maxSize
= safe_cast
<size_t>(data
.size()) * 4;
1007 String str
= String(maxSize
, ReserveString
);
1008 char *newbuf
= str
.mutableData();
1010 const char *s
= data
.data();
1011 for (int pos
= data
.size(); pos
> 0; pos
--, s
++) {
1012 unsigned int c
= (unsigned char)(*s
);
1014 newbuf
[newlen
++] = (char) c
;
1015 } else if (c
< 0x800) {
1016 newbuf
[newlen
++] = (0xc0 | (c
>> 6));
1017 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1018 } else if (c
< 0x10000) {
1019 newbuf
[newlen
++] = (0xe0 | (c
>> 12));
1020 newbuf
[newlen
++] = (0xc0 | ((c
>> 6) & 0x3f));
1021 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1022 } else if (c
< 0x200000) {
1023 newbuf
[newlen
++] = (0xf0 | (c
>> 18));
1024 newbuf
[newlen
++] = (0xe0 | ((c
>> 12) & 0x3f));
1025 newbuf
[newlen
++] = (0xc0 | ((c
>> 6) & 0x3f));
1026 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1030 assert(newlen
<= maxSize
);
1035 ///////////////////////////////////////////////////////////////////////////////