2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/xml/ext_xml.h"
19 #include <folly/ScopeGuard.h>
21 #include "hphp/runtime/base/array-init.h"
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/comparisons.h"
24 #include "hphp/runtime/base/externals.h"
25 #include "hphp/runtime/base/request-local.h"
26 #include "hphp/runtime/base/root-map.h"
27 #include "hphp/runtime/base/zend-functions.h"
28 #include "hphp/runtime/base/zend-string.h"
29 #include "hphp/runtime/vm/jit/translator.h"
30 #include "hphp/runtime/vm/jit/translator-inline.h"
31 #include "hphp/runtime/base/utf8-decode.h"
34 #define XML_MAXLEVEL 255
35 // XXX this should be dynamic
40 PHP_XML_OPTION_CASE_FOLDING
= 1,
41 PHP_XML_OPTION_TARGET_ENCODING
,
42 PHP_XML_OPTION_SKIP_TAGSTART
,
43 PHP_XML_OPTION_SKIP_WHITE
46 static struct XMLExtension final
: Extension
{
47 XMLExtension() : Extension("xml", NO_EXTENSION_VERSION_YET
) {}
48 void moduleInit() override
{
49 HHVM_FE(xml_parser_create
);
50 HHVM_FE(xml_parser_free
);
52 HHVM_FE(xml_parse_into_struct
);
53 HHVM_FE(xml_parser_create_ns
);
54 HHVM_FE(xml_parser_get_option
);
55 HHVM_FE(xml_parser_set_option
);
56 HHVM_FE(xml_set_character_data_handler
);
57 HHVM_FE(xml_set_default_handler
);
58 HHVM_FE(xml_set_element_handler
);
59 HHVM_FE(xml_set_processing_instruction_handler
);
60 HHVM_FE(xml_set_start_namespace_decl_handler
);
61 HHVM_FE(xml_set_end_namespace_decl_handler
);
62 HHVM_FE(xml_set_unparsed_entity_decl_handler
);
63 HHVM_FE(xml_set_external_entity_ref_handler
);
64 HHVM_FE(xml_set_notation_decl_handler
);
65 HHVM_FE(xml_set_object
);
66 HHVM_FE(xml_get_current_byte_index
);
67 HHVM_FE(xml_get_current_column_number
);
68 HHVM_FE(xml_get_current_line_number
);
69 HHVM_FE(xml_get_error_code
);
70 HHVM_FE(xml_error_string
);
74 HHVM_RC_INT_SAME(XML_ERROR_ASYNC_ENTITY
);
75 HHVM_RC_INT_SAME(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
);
76 HHVM_RC_INT_SAME(XML_ERROR_BAD_CHAR_REF
);
77 HHVM_RC_INT_SAME(XML_ERROR_BINARY_ENTITY_REF
);
78 HHVM_RC_INT_SAME(XML_ERROR_DUPLICATE_ATTRIBUTE
);
79 HHVM_RC_INT_SAME(XML_ERROR_EXTERNAL_ENTITY_HANDLING
);
80 HHVM_RC_INT_SAME(XML_ERROR_INCORRECT_ENCODING
);
81 HHVM_RC_INT_SAME(XML_ERROR_INVALID_TOKEN
);
82 HHVM_RC_INT_SAME(XML_ERROR_JUNK_AFTER_DOC_ELEMENT
);
83 HHVM_RC_INT_SAME(XML_ERROR_MISPLACED_XML_PI
);
84 HHVM_RC_INT_SAME(XML_ERROR_NONE
);
85 HHVM_RC_INT_SAME(XML_ERROR_NO_ELEMENTS
);
86 HHVM_RC_INT_SAME(XML_ERROR_NO_MEMORY
);
87 HHVM_RC_INT_SAME(XML_ERROR_PARAM_ENTITY_REF
);
88 HHVM_RC_INT_SAME(XML_ERROR_PARTIAL_CHAR
);
89 HHVM_RC_INT_SAME(XML_ERROR_RECURSIVE_ENTITY_REF
);
90 HHVM_RC_INT_SAME(XML_ERROR_SYNTAX
);
91 HHVM_RC_INT_SAME(XML_ERROR_TAG_MISMATCH
);
92 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_CDATA_SECTION
);
93 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_TOKEN
);
94 HHVM_RC_INT_SAME(XML_ERROR_UNDEFINED_ENTITY
);
95 HHVM_RC_INT_SAME(XML_ERROR_UNKNOWN_ENCODING
);
97 HHVM_RC_INT(XML_OPTION_CASE_FOLDING
, PHP_XML_OPTION_CASE_FOLDING
);
98 HHVM_RC_INT(XML_OPTION_TARGET_ENCODING
, PHP_XML_OPTION_TARGET_ENCODING
);
99 HHVM_RC_INT(XML_OPTION_SKIP_TAGSTART
, PHP_XML_OPTION_SKIP_TAGSTART
);
100 HHVM_RC_INT(XML_OPTION_SKIP_WHITE
, PHP_XML_OPTION_SKIP_WHITE
);
102 HHVM_RC_STR(XML_SAX_IMPL
, "expat");
108 ///////////////////////////////////////////////////////////////////////////////
110 struct XmlParser
: SweepableResourceData
{
111 DECLARE_RESOURCE_ALLOCATION(XmlParser
)
113 virtual ~XmlParser();
116 const String
& o_getClassNameHook() const override
;
119 XML_Parser parser
{nullptr};
120 XML_Char
*target_encoding
{nullptr};
122 Variant startElementHandler
;
123 Variant endElementHandler
;
124 Variant characterDataHandler
;
125 Variant processingInstructionHandler
;
126 Variant defaultHandler
;
127 Variant unparsedEntityDeclHandler
;
128 Variant notationDeclHandler
;
129 Variant externalEntityRefHandler
;
130 Variant unknownEncodingHandler
;
131 Variant startNamespaceDeclHandler
;
132 Variant endNamespaceDeclHandler
;
142 char **ltags
{nullptr};
148 XmlParser::~XmlParser() {
152 void XmlParser::cleanupImpl() {
154 XML_ParserFree(parser
);
159 for (inx
= 0; (inx
< level
) && (inx
< XML_MAXLEVEL
); inx
++)
166 void XmlParser::sweep() {
170 const String
& XmlParser::o_getClassNameHook() const {
171 return classnameof();
174 struct XmlParserData final
: RequestEventHandler
{
175 void requestInit() override
{ parsers
.reset(); }
176 void requestShutdown() override
{ parsers
.reset(); }
177 RootMap
<XmlParser
> parsers
;
179 IMPLEMENT_STATIC_REQUEST_LOCAL(XmlParserData
, s_xml_data
);
183 inline req::ptr
<XmlParser
> getParserFromToken(void* userData
) {
184 return s_xml_data
->parsers
.lookupRoot(userData
);
187 inline void* getParserToken(const req::ptr
<XmlParser
>& parser
) {
188 return reinterpret_cast<void*>(s_xml_data
->parsers
.addRoot(parser
));
191 inline void clearParser(const req::ptr
<XmlParser
>& p
) {
192 s_xml_data
->parsers
.removeRoot(p
);
199 char (*decoding_function
)(unsigned short);
200 unsigned short (*encoding_function
)(unsigned char);
203 static XML_Char
* xml_globals_default_encoding
= (XML_Char
*)"UTF-8";
204 // for xml_parse_into_struct
207 #define XML(v) (xml_globals_ ## v)
209 inline static unsigned short xml_encode_iso_8859_1(unsigned char c
) {
210 return (unsigned short)c
;
213 inline static char xml_decode_iso_8859_1(unsigned short c
) {
214 return (char)(c
> 0xff ? '?' : c
);
217 inline static unsigned short xml_encode_us_ascii(unsigned char c
) {
218 return (unsigned short)c
;
221 inline static char xml_decode_us_ascii(unsigned short c
) {
222 return (char)(c
> 0x7f ? '?' : c
);
225 xml_encoding xml_encodings
[] = {
226 { (XML_Char
*)"ISO-8859-1", xml_decode_iso_8859_1
, xml_encode_iso_8859_1
},
227 { (XML_Char
*)"US-ASCII", xml_decode_us_ascii
, xml_encode_us_ascii
},
228 { (XML_Char
*)"UTF-8", NULL
, NULL
},
229 { (XML_Char
*)NULL
, NULL
, NULL
}
232 static void *php_xml_malloc_wrapper(size_t sz
) {
233 return req::malloc_untyped(sz
);
236 static void *php_xml_realloc_wrapper(void *ptr
, size_t sz
) {
237 return req::realloc_untyped(ptr
, sz
);
240 static void php_xml_free_wrapper(void *ptr
) {
246 static XML_Memory_Handling_Suite php_xml_mem_hdlrs
= {
247 php_xml_malloc_wrapper
,
248 php_xml_realloc_wrapper
,
252 static xml_encoding
*xml_get_encoding(const XML_Char
*name
) {
253 xml_encoding
*enc
= &xml_encodings
[0];
255 while (enc
&& enc
->name
) {
256 if (strcasecmp((const char*)name
, (const char*)enc
->name
) == 0) {
264 static int _xml_xmlcharlen(const XML_Char
*s
) {
274 String
xml_utf8_decode(const XML_Char
*s
, int len
,
275 const XML_Char
*encoding
) {
276 String str
= String(len
, ReserveString
);
277 char *newbuf
= str
.mutableData();
278 char (*decoder
)(unsigned short) = nullptr;
279 xml_encoding
*enc
= xml_get_encoding(encoding
);
283 decoder
= enc
->decoding_function
;
285 if (decoder
== nullptr) {
286 /* If the target encoding was unknown, or no decoder function
287 * was specified, return the UTF-8-encoded data as-is.
289 memcpy(newbuf
, s
, len
);
294 UTF8To16Decoder
dec(s
, len
, true);
295 for (int b
= dec
.decode(); b
!= UTF8_END
; b
= dec
.decode()) {
296 newbuf
[newlen
] = decoder(b
);
300 assert(newlen
<= len
);
305 static Variant
_xml_xmlchar_zval(const XML_Char
*s
, int len
,
306 const XML_Char
*encoding
) {
311 len
= _xml_xmlcharlen(s
);
313 String ret
= xml_utf8_decode(s
, len
, encoding
);
318 String
_xml_decode_tag(const req::ptr
<XmlParser
>& parser
, const char *tag
) {
319 auto newstr
= xml_utf8_decode((const XML_Char
*)tag
, strlen(tag
),
320 parser
->target_encoding
);
321 if (parser
->case_folding
) {
322 string_to_upper(newstr
);
327 static Variant
php_xml_parser_create_impl(const String
& encoding_param
,
328 const String
& ns_param
,
333 if (!encoding_param
.isNull()) {
334 /* The supported encoding types are hardcoded here because
335 * we are limited to the encodings supported by expat/xmltok.
337 if (encoding_param
.size() == 0) {
338 encoding
= XML(default_encoding
);
340 } else if (strcasecmp(encoding_param
.data(), "ISO-8859-1") == 0) {
341 encoding
= (XML_Char
*)"ISO-8859-1";
342 } else if (strcasecmp(encoding_param
.data(), "UTF-8") == 0) {
343 encoding
= (XML_Char
*)"UTF-8";
344 } else if (strcasecmp(encoding_param
.data(), "US-ASCII") == 0) {
345 encoding
= (XML_Char
*)"US-ASCII";
347 raise_warning("unsupported source encoding \"%s\"",
348 encoding_param
.c_str());
352 encoding
= XML(default_encoding
);
356 if (ns_support
&& ns_param
.empty()) {
359 separator
= ns_param
;
362 auto parser
= req::make
<XmlParser
>();
363 parser
->parser
= XML_ParserCreate_MM
364 ((auto_detect
? NULL
: encoding
), &php_xml_mem_hdlrs
,
365 !separator
.empty() ? (const XML_Char
*)separator
.data() : NULL
);
367 parser
->target_encoding
= encoding
;
368 parser
->case_folding
= 1;
369 parser
->object
.asTypedValue()->m_type
= KindOfNull
;
370 parser
->isparsing
= 0;
372 XML_SetUserData(parser
->parser
, getParserToken(parser
));
374 return Variant(std::move(parser
));
377 static bool name_contains_class(const String
& name
) {
379 int pos
= name
.find("::");
380 return pos
!= 0 && pos
!= String::npos
&& pos
+ 2 < name
.size();
385 static Variant
xml_call_handler(const req::ptr
<XmlParser
>& parser
,
386 const Variant
& handler
,
388 if (parser
&& handler
.toBoolean()) {
390 if (handler
.isString() && !name_contains_class(handler
.toString())) {
391 if (!parser
->object
.isObject()) {
392 retval
= invoke(handler
.toString().c_str(), args
, -1);
394 retval
= parser
->object
.toObject()->
395 o_invoke(handler
.toString(), args
);
397 } else if (is_callable(handler
)) {
398 vm_call_user_func(handler
, args
);
400 raise_warning("Handler is invalid");
407 static void _xml_add_to_info(const req::ptr
<XmlParser
>& parser
,
408 const String
& nameStr
) {
409 if (parser
->info
.isNull()) {
412 forceToArray(parser
->info
);
413 if (!parser
->info
.toCArrRef().exists(nameStr
)) {
414 parser
->info
.toArrRef().set(nameStr
, Array::Create());
416 auto const inner
= parser
->info
.toArrRef().lvalAt(nameStr
);
417 forceToArray(inner
).append(parser
->curtag
);
423 s_complete("complete"),
430 s_attributes("attributes");
432 void _xml_endElementHandler(void *userData
, const XML_Char
*name
) {
433 auto parser
= getParserFromToken(userData
);
437 Array args
= Array::Create();
439 auto tag_name
= _xml_decode_tag(parser
, (const char*)name
);
441 if (parser
->endElementHandler
.toBoolean()) {
442 args
.append(Variant(parser
));
443 args
.append(tag_name
);
444 xml_call_handler(parser
, parser
->endElementHandler
, args
);
447 if (!parser
->data
.isNull()) {
448 if (parser
->lastwasopen
) {
449 parser
->ctag
.toArrRef().set(s_type
, s_complete
);
451 ArrayInit
tag(3, ArrayInit::Map
{});
452 _xml_add_to_info(parser
, tag_name
.substr(parser
->toffset
));
453 tag
.set(s_tag
, tag_name
.substr(parser
->toffset
));
454 tag
.set(s_type
, s_close
);
455 tag
.set(s_level
, parser
->level
);
456 parser
->data
.toArrRef().append(tag
.toArray());
458 parser
->lastwasopen
= 0;
462 if ((parser
->ltags
) && (parser
->level
<= XML_MAXLEVEL
)) {
463 free(parser
->ltags
[parser
->level
-1]);
470 void _xml_characterDataHandler(void *userData
, const XML_Char
*s
, int len
) {
471 auto parser
= getParserFromToken(userData
);
475 Array args
= Array::Create();
477 if (parser
->characterDataHandler
.toBoolean()) {
478 args
.append(Variant(parser
));
479 args
.append(_xml_xmlchar_zval(s
, len
, parser
->target_encoding
));
480 xml_call_handler(parser
, parser
->characterDataHandler
, args
);
483 if (!parser
->data
.isNull()) {
487 String decoded_value
;
489 decoded_value
= xml_utf8_decode(s
,len
,
490 parser
->target_encoding
);
491 decoded_len
= decoded_value
.size();
492 for (i
= 0; i
< decoded_len
; i
++) {
493 switch (decoded_value
[i
]) {
505 if (doprint
|| (! parser
->skipwhite
)) {
506 if (parser
->lastwasopen
) {
508 // check if value exists, if yes append to that
509 if (parser
->ctag
.toArrRef().exists(s_value
)) {
510 myval
= tvCastToString(parser
->ctag
.toArray().rvalAt(s_value
).tv());
511 myval
+= decoded_value
;
512 parser
->ctag
.toArrRef().set(s_value
, myval
);
514 parser
->ctag
.toArrRef().set(
524 auto curtag
= parser
->data
.toArrRef().pop();
527 parser
->data
.toArrRef().append(curtag
);
531 if (curtag
.toArrRef().exists(s_type
)) {
532 mytype
= tvCastToString(curtag
.toArrRef().rvalAt(s_type
).tv());
533 if (!strcmp(mytype
.data(), "cdata") &&
534 curtag
.toArrRef().exists(s_value
)) {
535 myval
= tvCastToString(curtag
.toArrRef().rvalAt(s_value
).tv());
536 myval
+= decoded_value
;
537 curtag
.toArrRef().set(s_value
, myval
);
541 if (parser
->level
<= XML_MAXLEVEL
&& parser
->level
> 0) {
542 tag
= Array::Create();
543 _xml_add_to_info(parser
, parser
->ltags
[parser
->level
-1] +
545 tag
.set(s_tag
, String(parser
->ltags
[parser
->level
-1] +
546 parser
->toffset
, CopyString
));
547 tag
.set(s_value
, decoded_value
);
548 tag
.set(s_type
, s_cdata
);
549 tag
.set(s_level
, parser
->level
);
550 parser
->data
.toArrRef().append(tag
);
551 } else if (parser
->level
== (XML_MAXLEVEL
+ 1)) {
552 raise_warning("Maximum depth exceeded - Results truncated");
560 void _xml_defaultHandler(void *userData
, const XML_Char
*s
, int len
) {
561 auto parser
= getParserFromToken(userData
);
563 if (parser
&& parser
->defaultHandler
.toBoolean()) {
564 xml_call_handler(parser
,
565 parser
->defaultHandler
,
568 _xml_xmlchar_zval(s
, len
, parser
->target_encoding
)));
572 void _xml_startElementHandler(void *userData
, const XML_Char
*name
, const XML_Char
**attributes
) {
573 auto parser
= getParserFromToken(userData
);
574 const char **attrs
= (const char **) attributes
;
576 Array args
= Array::Create();
581 String tag_name
= _xml_decode_tag(parser
, (const char*)name
);
583 if (parser
->startElementHandler
.toBoolean()) {
584 args
.append(Variant(parser
));
585 args
.append(tag_name
);
586 args
.append(Array::Create());
588 while (attributes
&& *attributes
) {
589 String att
= _xml_decode_tag(parser
, (const char*)attributes
[0]);
590 String val
= xml_utf8_decode(attributes
[1],
591 strlen((const char*)attributes
[1]),
592 parser
->target_encoding
);
593 auto const arr
= args
.lvalAt(2);
594 asArrRef(arr
).set(att
, val
);
598 xml_call_handler(parser
, parser
->startElementHandler
, args
);
601 if (!parser
->data
.isNull()) {
602 if (parser
->level
<= XML_MAXLEVEL
) {
605 tag
= Array::Create();
606 atr
= Array::Create();
608 _xml_add_to_info(parser
, tag_name
.substr(parser
->toffset
));
610 tag
.set(s_tag
, tag_name
.substr(parser
->toffset
));
611 tag
.set(s_type
, s_open
);
612 tag
.set(s_level
, parser
->level
);
614 parser
->ltags
[parser
->level
-1] = strdup(tag_name
.data());
615 parser
->lastwasopen
= 1;
617 attributes
= (const XML_Char
**) attrs
;
619 while (attributes
&& *attributes
) {
620 String att
= _xml_decode_tag(parser
, (const char*)attributes
[0]);
621 String val
= xml_utf8_decode(attributes
[1],
622 strlen((const char*)attributes
[1]),
623 parser
->target_encoding
);
630 tag
.set(s_attributes
,atr
);
632 auto lval
= parser
->data
.toArrRef().lvalAt();
633 lval
.type() = KindOfArray
;
634 lval
.val().parr
= tag
.detach();
635 parser
->ctag
.assignRef(tvAsVariant(lval
.tv_ptr()));
636 } else if (parser
->level
== (XML_MAXLEVEL
+ 1)) {
637 raise_warning("Maximum depth exceeded - Results truncated");
643 void _xml_processingInstructionHandler(void *userData
, const XML_Char
*target
,
644 const XML_Char
*data
) {
645 auto parser
= getParserFromToken(userData
);
646 if (parser
&& parser
->processingInstructionHandler
.toBoolean()) {
647 Array args
= Array::Create();
648 args
.append(Variant(parser
));
649 args
.append(_xml_xmlchar_zval(target
, 0, parser
->target_encoding
));
650 args
.append(_xml_xmlchar_zval(data
, 0, parser
->target_encoding
));
651 xml_call_handler(parser
, parser
->processingInstructionHandler
, args
);
655 int _xml_externalEntityRefHandler(XML_Parser
/* void* */ parserPtr
,
656 const XML_Char
*openEntityNames
,
657 const XML_Char
*base
,
658 const XML_Char
*systemId
,
659 const XML_Char
*publicId
) {
660 auto parser
= getParserFromToken(XML_GetUserData(parserPtr
));
661 int ret
= 0; /* abort if no handler is set (should be configurable?) */
662 if (parser
&& parser
->externalEntityRefHandler
.toBoolean()) {
663 Array args
= Array::Create();
664 args
.append(Variant(parser
));
665 args
.append(_xml_xmlchar_zval(openEntityNames
, 0,
666 parser
->target_encoding
));
667 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
668 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
669 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
670 ret
= xml_call_handler(parser
,
671 parser
->externalEntityRefHandler
, args
).toInt64();
676 void _xml_notationDeclHandler(void *userData
,
677 const XML_Char
*notationName
,
678 const XML_Char
*base
,
679 const XML_Char
*systemId
,
680 const XML_Char
*publicId
) {
681 auto parser
= getParserFromToken(userData
);
683 if (parser
&& parser
->notationDeclHandler
.toBoolean()) {
684 Array args
= Array::Create();
685 args
.append(Variant(parser
));
686 args
.append(_xml_xmlchar_zval(notationName
, 0, parser
->target_encoding
));
687 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
688 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
689 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
690 xml_call_handler(parser
, parser
->notationDeclHandler
, args
);
694 void _xml_startNamespaceDeclHandler(void *userData
,const XML_Char
*prefix
,
695 const XML_Char
*uri
) {
696 auto parser
= getParserFromToken(userData
);
698 if (parser
&& parser
->startNamespaceDeclHandler
.toBoolean()) {
699 Array args
= Array::Create();
701 args
.append(Variant(parser
));
702 args
.append(_xml_xmlchar_zval(prefix
, 0, parser
->target_encoding
));
703 args
.append(_xml_xmlchar_zval(uri
, 0, parser
->target_encoding
));
704 xml_call_handler(parser
, parser
->startNamespaceDeclHandler
, args
);
708 void _xml_endNamespaceDeclHandler(void *userData
, const XML_Char
*prefix
) {
709 auto parser
= getParserFromToken(userData
);
711 if (parser
&& parser
->endNamespaceDeclHandler
.toBoolean()) {
712 Array args
= Array::Create();
713 args
.append(Variant(parser
));
714 args
.append(_xml_xmlchar_zval(prefix
, 0, parser
->target_encoding
));
715 xml_call_handler(parser
, parser
->endNamespaceDeclHandler
, args
);
719 void _xml_unparsedEntityDeclHandler(void *userData
,
720 const XML_Char
*entityName
,
721 const XML_Char
*base
,
722 const XML_Char
*systemId
,
723 const XML_Char
*publicId
,
724 const XML_Char
*notationName
) {
725 auto parser
= getParserFromToken(userData
);
727 if (parser
&& parser
->unparsedEntityDeclHandler
.toBoolean()) {
728 Array args
= Array::Create();
729 args
.append(Variant(parser
));
730 args
.append(_xml_xmlchar_zval(entityName
, 0, parser
->target_encoding
));
731 args
.append(_xml_xmlchar_zval(base
, 0, parser
->target_encoding
));
732 args
.append(_xml_xmlchar_zval(systemId
, 0, parser
->target_encoding
));
733 args
.append(_xml_xmlchar_zval(publicId
, 0, parser
->target_encoding
));
734 args
.append(_xml_xmlchar_zval(notationName
, 0, parser
->target_encoding
));
735 xml_call_handler(parser
, parser
->unparsedEntityDeclHandler
, args
);
739 static void xml_set_handler(Variant
* handler
, const Variant
& data
) {
740 if (data
.isNull() || same(data
, false) || data
.isString() ||
744 raise_warning("Handler is invalid");
748 ///////////////////////////////////////////////////////////////////////////////
750 Resource
HHVM_FUNCTION(xml_parser_create
,
751 const Variant
& encoding
/* = uninit_variant */) {
752 const String
& strEncoding
= encoding
.isNull()
754 : encoding
.toString();
755 return php_xml_parser_create_impl(strEncoding
, null_string
, 0).toResource();
758 Resource
HHVM_FUNCTION(xml_parser_create_ns
,
759 const Variant
& encoding
/* = uninit_variant */,
760 const Variant
& separator
/* = uninit_variant */) {
761 const String
& strEncoding
= encoding
.isNull()
763 : encoding
.toString();
764 const String
& strSeparator
= separator
.isNull()
766 : separator
.toString();
767 return php_xml_parser_create_impl(strEncoding
, strSeparator
, 1).toResource();
770 bool HHVM_FUNCTION(xml_parser_free
,
771 const Resource
& parser
) {
772 auto p
= cast
<XmlParser
>(parser
);
773 if (p
->isparsing
== 1) {
774 raise_warning("Parser cannot be freed while it is parsing.");
781 int64_t HHVM_FUNCTION(xml_parse
,
782 const Resource
& parser
,
784 bool is_final
/* = true */) {
785 // XML_Parse can reenter the VM, and it will do so after we've lost
786 // the frame pointer by calling through the system's copy of XML_Parse
788 SYNC_VM_REGS_SCOPED();
789 auto p
= cast
<XmlParser
>(parser
);
791 long isFinal
= is_final
? 1 : 0;
793 ret
= XML_Parse(p
->parser
, (const XML_Char
*)data
.data(), data
.size(),
799 int64_t HHVM_FUNCTION(xml_parse_into_struct
,
800 const Resource
& parser
,
803 VRefParam index
/* = null */) {
804 SYNC_VM_REGS_SCOPED();
806 auto p
= cast
<XmlParser
>(parser
);
807 p
->data
.setWithRef(values
);
808 p
->data
= Array::Create();
809 p
->info
.setWithRef(index
);
810 p
->info
= Array::Create();
812 p
->ltags
= (char**)malloc(XML_MAXLEVEL
* sizeof(char*));
814 XML_SetDefaultHandler(p
->parser
, _xml_defaultHandler
);
815 XML_SetElementHandler(p
->parser
, _xml_startElementHandler
,
816 _xml_endElementHandler
);
817 XML_SetCharacterDataHandler(p
->parser
, _xml_characterDataHandler
);
820 ret
= XML_Parse(p
->parser
, (const XML_Char
*)data
.data(), data
.size(), 1);
826 Variant
HHVM_FUNCTION(xml_parser_get_option
,
827 const Resource
& parser
,
829 auto p
= cast
<XmlParser
>(parser
);
831 case PHP_XML_OPTION_CASE_FOLDING
:
832 return p
->case_folding
;
833 case PHP_XML_OPTION_TARGET_ENCODING
:
834 return String((const char*)p
->target_encoding
, CopyString
);
836 raise_warning("Unknown option");
842 bool HHVM_FUNCTION(xml_parser_set_option
,
843 const Resource
& parser
,
845 const Variant
& value
) {
846 auto p
= cast
<XmlParser
>(parser
);
848 case PHP_XML_OPTION_CASE_FOLDING
:
849 p
->case_folding
= value
.toInt64();
851 case PHP_XML_OPTION_SKIP_TAGSTART
:
852 p
->toffset
= value
.toInt64();
854 case PHP_XML_OPTION_SKIP_WHITE
:
855 p
->skipwhite
= value
.toInt64();
857 case PHP_XML_OPTION_TARGET_ENCODING
: {
859 enc
= xml_get_encoding((const XML_Char
*)value
.toString().data());
861 raise_warning("Unsupported target encoding \"%s\"",
862 value
.toString().data());
865 p
->target_encoding
= enc
->name
;
869 raise_warning("Unknown option");
875 bool HHVM_FUNCTION(xml_set_character_data_handler
,
876 const Resource
& parser
,
877 const Variant
& handler
) {
878 auto p
= cast
<XmlParser
>(parser
);
879 xml_set_handler(&p
->characterDataHandler
, handler
);
880 XML_SetCharacterDataHandler(p
->parser
, _xml_characterDataHandler
);
884 bool HHVM_FUNCTION(xml_set_default_handler
,
885 const Resource
& parser
,
886 const Variant
& handler
) {
887 auto p
= cast
<XmlParser
>(parser
);
888 xml_set_handler(&p
->defaultHandler
, handler
);
889 XML_SetDefaultHandler(p
->parser
, _xml_defaultHandler
);
893 bool HHVM_FUNCTION(xml_set_element_handler
,
894 const Resource
& parser
,
895 const Variant
& start_element_handler
,
896 const Variant
& end_element_handler
) {
897 auto p
= cast
<XmlParser
>(parser
);
898 xml_set_handler(&p
->startElementHandler
, start_element_handler
);
899 xml_set_handler(&p
->endElementHandler
, end_element_handler
);
900 XML_SetElementHandler(p
->parser
, _xml_startElementHandler
,
901 _xml_endElementHandler
);
905 bool HHVM_FUNCTION(xml_set_processing_instruction_handler
,
906 const Resource
& parser
,
907 const Variant
& handler
) {
908 auto p
= cast
<XmlParser
>(parser
);
909 xml_set_handler(&p
->processingInstructionHandler
, handler
);
910 XML_SetProcessingInstructionHandler(p
->parser
,
911 _xml_processingInstructionHandler
);
915 bool HHVM_FUNCTION(xml_set_start_namespace_decl_handler
,
916 const Resource
& parser
,
917 const Variant
& handler
) {
918 auto p
= cast
<XmlParser
>(parser
);
919 xml_set_handler(&p
->startNamespaceDeclHandler
, handler
);
920 XML_SetStartNamespaceDeclHandler(p
->parser
, _xml_startNamespaceDeclHandler
);
924 bool HHVM_FUNCTION(xml_set_end_namespace_decl_handler
,
925 const Resource
& parser
,
926 const Variant
& handler
) {
927 auto p
= cast
<XmlParser
>(parser
);
928 xml_set_handler(&p
->endNamespaceDeclHandler
, handler
);
929 XML_SetEndNamespaceDeclHandler(p
->parser
, _xml_endNamespaceDeclHandler
);
933 bool HHVM_FUNCTION(xml_set_unparsed_entity_decl_handler
,
934 const Resource
& parser
,
935 const Variant
& handler
) {
936 auto p
= cast
<XmlParser
>(parser
);
937 xml_set_handler(&p
->unparsedEntityDeclHandler
, handler
);
938 XML_SetUnparsedEntityDeclHandler(p
->parser
, _xml_unparsedEntityDeclHandler
);
942 bool HHVM_FUNCTION(xml_set_external_entity_ref_handler
,
943 const Resource
& parser
,
944 const Variant
& handler
) {
945 auto p
= cast
<XmlParser
>(parser
);
946 xml_set_handler(&p
->externalEntityRefHandler
, handler
);
947 XML_SetExternalEntityRefHandler(p
->parser
, _xml_externalEntityRefHandler
);
951 bool HHVM_FUNCTION(xml_set_notation_decl_handler
,
952 const Resource
& parser
,
953 const Variant
& handler
) {
954 auto p
= cast
<XmlParser
>(parser
);
955 xml_set_handler(&p
->notationDeclHandler
, handler
);
956 XML_SetNotationDeclHandler(p
->parser
, _xml_notationDeclHandler
);
960 bool HHVM_FUNCTION(xml_set_object
,
961 const Resource
& parser
,
963 auto p
= cast
<XmlParser
>(parser
);
964 p
->object
.setWithRef(object
);
968 int64_t HHVM_FUNCTION(xml_get_current_byte_index
,
969 const Resource
& parser
) {
970 auto p
= cast
<XmlParser
>(parser
);
971 return XML_GetCurrentByteIndex(p
->parser
);
974 int64_t HHVM_FUNCTION(xml_get_current_column_number
,
975 const Resource
& parser
) {
976 auto p
= cast
<XmlParser
>(parser
);
977 return XML_GetCurrentColumnNumber(p
->parser
);
980 int64_t HHVM_FUNCTION(xml_get_current_line_number
,
981 const Resource
& parser
) {
982 auto p
= cast
<XmlParser
>(parser
);
983 return XML_GetCurrentLineNumber(p
->parser
);
986 int64_t HHVM_FUNCTION(xml_get_error_code
,
987 const Resource
& parser
) {
988 auto p
= cast
<XmlParser
>(parser
);
989 return XML_GetErrorCode(p
->parser
);
992 String
HHVM_FUNCTION(xml_error_string
,
994 char * str
= (char *)XML_ErrorString((XML_Error
)/*(int)*/code
);
995 return String(str
, CopyString
);
998 ///////////////////////////////////////////////////////////////////////////////
1000 String
HHVM_FUNCTION(utf8_decode
,
1001 const String
& data
) {
1002 return xml_utf8_decode(data
.c_str(), data
.size(), "ISO-8859-1");
1005 String
HHVM_FUNCTION(utf8_encode
,
1006 const String
& data
) {
1007 auto const maxSize
= safe_cast
<size_t>(data
.size()) * 4;
1008 String str
= String(maxSize
, ReserveString
);
1009 char *newbuf
= str
.mutableData();
1011 const char *s
= data
.data();
1012 for (int pos
= data
.size(); pos
> 0; pos
--, s
++) {
1013 unsigned int c
= (unsigned char)(*s
);
1015 newbuf
[newlen
++] = (char) c
;
1016 } else if (c
< 0x800) {
1017 newbuf
[newlen
++] = (0xc0 | (c
>> 6));
1018 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1019 } else if (c
< 0x10000) {
1020 newbuf
[newlen
++] = (0xe0 | (c
>> 12));
1021 newbuf
[newlen
++] = (0xc0 | ((c
>> 6) & 0x3f));
1022 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1023 } else if (c
< 0x200000) {
1024 newbuf
[newlen
++] = (0xf0 | (c
>> 18));
1025 newbuf
[newlen
++] = (0xe0 | ((c
>> 12) & 0x3f));
1026 newbuf
[newlen
++] = (0xc0 | ((c
>> 6) & 0x3f));
1027 newbuf
[newlen
++] = (0x80 | (c
& 0x3f));
1031 assert(newlen
<= maxSize
);
1036 ///////////////////////////////////////////////////////////////////////////////