Don't return Variant& from Array functions
[hiphop-php.git] / hphp / runtime / ext / xml / ext_xml.cpp
blob8940f382e68bc7fbc05943ca942af65ffdf34602
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/xml/ext_xml.h"
19 #include <folly/ScopeGuard.h>
21 #include "hphp/runtime/base/array-init.h"
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/comparisons.h"
24 #include "hphp/runtime/base/externals.h"
25 #include "hphp/runtime/base/request-local.h"
26 #include "hphp/runtime/base/root-map.h"
27 #include "hphp/runtime/base/zend-functions.h"
28 #include "hphp/runtime/base/zend-string.h"
29 #include "hphp/runtime/vm/jit/translator.h"
30 #include "hphp/runtime/vm/jit/translator-inline.h"
31 #include "hphp/runtime/base/utf8-decode.h"
32 #include <expat.h>
34 #define XML_MAXLEVEL 255
35 // XXX this should be dynamic
37 namespace HPHP {
39 enum php_xml_option {
40 PHP_XML_OPTION_CASE_FOLDING = 1,
41 PHP_XML_OPTION_TARGET_ENCODING,
42 PHP_XML_OPTION_SKIP_TAGSTART,
43 PHP_XML_OPTION_SKIP_WHITE
46 static struct XMLExtension final : Extension {
47 XMLExtension() : Extension("xml", NO_EXTENSION_VERSION_YET) {}
48 void moduleInit() override {
49 HHVM_FE(xml_parser_create);
50 HHVM_FE(xml_parser_free);
51 HHVM_FE(xml_parse);
52 HHVM_FE(xml_parse_into_struct);
53 HHVM_FE(xml_parser_create_ns);
54 HHVM_FE(xml_parser_get_option);
55 HHVM_FE(xml_parser_set_option);
56 HHVM_FE(xml_set_character_data_handler);
57 HHVM_FE(xml_set_default_handler);
58 HHVM_FE(xml_set_element_handler);
59 HHVM_FE(xml_set_processing_instruction_handler);
60 HHVM_FE(xml_set_start_namespace_decl_handler);
61 HHVM_FE(xml_set_end_namespace_decl_handler);
62 HHVM_FE(xml_set_unparsed_entity_decl_handler);
63 HHVM_FE(xml_set_external_entity_ref_handler);
64 HHVM_FE(xml_set_notation_decl_handler);
65 HHVM_FE(xml_set_object);
66 HHVM_FE(xml_get_current_byte_index);
67 HHVM_FE(xml_get_current_column_number);
68 HHVM_FE(xml_get_current_line_number);
69 HHVM_FE(xml_get_error_code);
70 HHVM_FE(xml_error_string);
71 HHVM_FE(utf8_decode);
72 HHVM_FE(utf8_encode);
74 HHVM_RC_INT_SAME(XML_ERROR_ASYNC_ENTITY);
75 HHVM_RC_INT_SAME(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
76 HHVM_RC_INT_SAME(XML_ERROR_BAD_CHAR_REF);
77 HHVM_RC_INT_SAME(XML_ERROR_BINARY_ENTITY_REF);
78 HHVM_RC_INT_SAME(XML_ERROR_DUPLICATE_ATTRIBUTE);
79 HHVM_RC_INT_SAME(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
80 HHVM_RC_INT_SAME(XML_ERROR_INCORRECT_ENCODING);
81 HHVM_RC_INT_SAME(XML_ERROR_INVALID_TOKEN);
82 HHVM_RC_INT_SAME(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
83 HHVM_RC_INT_SAME(XML_ERROR_MISPLACED_XML_PI);
84 HHVM_RC_INT_SAME(XML_ERROR_NONE);
85 HHVM_RC_INT_SAME(XML_ERROR_NO_ELEMENTS);
86 HHVM_RC_INT_SAME(XML_ERROR_NO_MEMORY);
87 HHVM_RC_INT_SAME(XML_ERROR_PARAM_ENTITY_REF);
88 HHVM_RC_INT_SAME(XML_ERROR_PARTIAL_CHAR);
89 HHVM_RC_INT_SAME(XML_ERROR_RECURSIVE_ENTITY_REF);
90 HHVM_RC_INT_SAME(XML_ERROR_SYNTAX);
91 HHVM_RC_INT_SAME(XML_ERROR_TAG_MISMATCH);
92 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_CDATA_SECTION);
93 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_TOKEN);
94 HHVM_RC_INT_SAME(XML_ERROR_UNDEFINED_ENTITY);
95 HHVM_RC_INT_SAME(XML_ERROR_UNKNOWN_ENCODING);
97 HHVM_RC_INT(XML_OPTION_CASE_FOLDING, PHP_XML_OPTION_CASE_FOLDING);
98 HHVM_RC_INT(XML_OPTION_TARGET_ENCODING, PHP_XML_OPTION_TARGET_ENCODING);
99 HHVM_RC_INT(XML_OPTION_SKIP_TAGSTART, PHP_XML_OPTION_SKIP_TAGSTART);
100 HHVM_RC_INT(XML_OPTION_SKIP_WHITE, PHP_XML_OPTION_SKIP_WHITE);
102 HHVM_RC_STR(XML_SAX_IMPL, "expat");
104 loadSystemlib();
106 } s_xml_extension;
108 ///////////////////////////////////////////////////////////////////////////////
110 struct XmlParser : SweepableResourceData {
111 DECLARE_RESOURCE_ALLOCATION(XmlParser)
112 XmlParser() {}
113 virtual ~XmlParser();
114 void cleanupImpl();
115 CLASSNAME_IS("xml");
116 const String& o_getClassNameHook() const override;
118 int case_folding{0};
119 XML_Parser parser{nullptr};
120 XML_Char *target_encoding{nullptr};
122 Variant startElementHandler;
123 Variant endElementHandler;
124 Variant characterDataHandler;
125 Variant processingInstructionHandler;
126 Variant defaultHandler;
127 Variant unparsedEntityDeclHandler;
128 Variant notationDeclHandler;
129 Variant externalEntityRefHandler;
130 Variant unknownEncodingHandler;
131 Variant startNamespaceDeclHandler;
132 Variant endNamespaceDeclHandler;
134 Variant object;
136 Variant data;
137 Variant info;
138 int level{0};
139 int toffset{0};
140 int curtag{0};
141 Variant ctag;
142 char **ltags{nullptr};
143 int lastwasopen{0};
144 int skipwhite{0};
145 int isparsing{0};
148 XmlParser::~XmlParser() {
149 cleanupImpl();
152 void XmlParser::cleanupImpl() {
153 if (parser) {
154 XML_ParserFree(parser);
155 parser = NULL;
157 if (ltags) {
158 int inx;
159 for (inx = 0; (inx < level) && (inx < XML_MAXLEVEL); inx++)
160 free(ltags[inx]);
161 free(ltags);
162 ltags = NULL;
166 void XmlParser::sweep() {
167 cleanupImpl();
170 const String& XmlParser::o_getClassNameHook() const {
171 return classnameof();
174 struct XmlParserData final : RequestEventHandler {
175 void requestInit() override { parsers.reset(); }
176 void requestShutdown() override { parsers.reset(); }
177 RootMap<XmlParser> parsers;
179 IMPLEMENT_STATIC_REQUEST_LOCAL(XmlParserData, s_xml_data);
181 namespace {
183 inline req::ptr<XmlParser> getParserFromToken(void* userData) {
184 return s_xml_data->parsers.lookupRoot(userData);
187 inline void* getParserToken(const req::ptr<XmlParser>& parser) {
188 return reinterpret_cast<void*>(s_xml_data->parsers.addRoot(parser));
191 inline void clearParser(const req::ptr<XmlParser>& p) {
192 s_xml_data->parsers.removeRoot(p);
197 typedef struct {
198 XML_Char *name;
199 char (*decoding_function)(unsigned short);
200 unsigned short (*encoding_function)(unsigned char);
201 } xml_encoding;
203 static XML_Char * xml_globals_default_encoding = (XML_Char*)"UTF-8";
204 // for xml_parse_into_struct
207 #define XML(v) (xml_globals_ ## v)
209 inline static unsigned short xml_encode_iso_8859_1(unsigned char c) {
210 return (unsigned short)c;
213 inline static char xml_decode_iso_8859_1(unsigned short c) {
214 return (char)(c > 0xff ? '?' : c);
217 inline static unsigned short xml_encode_us_ascii(unsigned char c) {
218 return (unsigned short)c;
221 inline static char xml_decode_us_ascii(unsigned short c) {
222 return (char)(c > 0x7f ? '?' : c);
225 xml_encoding xml_encodings[] = {
226 { (XML_Char*)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
227 { (XML_Char*)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
228 { (XML_Char*)"UTF-8", NULL, NULL },
229 { (XML_Char*)NULL, NULL, NULL }
232 static void *php_xml_malloc_wrapper(size_t sz) {
233 return req::malloc_untyped(sz);
236 static void *php_xml_realloc_wrapper(void *ptr, size_t sz) {
237 return req::realloc_untyped(ptr, sz);
240 static void php_xml_free_wrapper(void *ptr) {
241 if (ptr) {
242 req::free(ptr);
246 static XML_Memory_Handling_Suite php_xml_mem_hdlrs = {
247 php_xml_malloc_wrapper,
248 php_xml_realloc_wrapper,
249 php_xml_free_wrapper
252 static xml_encoding *xml_get_encoding(const XML_Char *name) {
253 xml_encoding *enc = &xml_encodings[0];
255 while (enc && enc->name) {
256 if (strcasecmp((const char*)name, (const char*)enc->name) == 0) {
257 return enc;
259 enc++;
261 return NULL;
264 static int _xml_xmlcharlen(const XML_Char *s) {
265 int len = 0;
267 while (*s) {
268 len++;
269 s++;
271 return len;
274 String xml_utf8_decode(const XML_Char *s, int len,
275 const XML_Char *encoding) {
276 String str = String(len, ReserveString);
277 char *newbuf = str.mutableData();
278 char (*decoder)(unsigned short) = nullptr;
279 xml_encoding *enc = xml_get_encoding(encoding);
281 int newlen = 0;
282 if (enc) {
283 decoder = enc->decoding_function;
285 if (decoder == nullptr) {
286 /* If the target encoding was unknown, or no decoder function
287 * was specified, return the UTF-8-encoded data as-is.
289 memcpy(newbuf, s, len);
290 str.setSize(len);
291 return str;
294 UTF8To16Decoder dec(s, len, true);
295 for (int b = dec.decode(); b != UTF8_END; b = dec.decode()) {
296 newbuf[newlen] = decoder(b);
297 ++newlen;
300 assert(newlen <= len);
301 str.shrink(newlen);
302 return str;
305 static Variant _xml_xmlchar_zval(const XML_Char *s, int len,
306 const XML_Char *encoding) {
307 if (s == NULL) {
308 return false;
310 if (len == 0) {
311 len = _xml_xmlcharlen(s);
313 String ret = xml_utf8_decode(s, len, encoding);
314 return ret;
317 static
318 String _xml_decode_tag(const req::ptr<XmlParser>& parser, const char *tag) {
319 auto newstr = xml_utf8_decode((const XML_Char*)tag, strlen(tag),
320 parser->target_encoding);
321 if (parser->case_folding) {
322 string_to_upper(newstr);
324 return newstr;
327 static Variant php_xml_parser_create_impl(const String& encoding_param,
328 const String& ns_param,
329 int ns_support) {
330 int auto_detect = 0;
331 XML_Char *encoding;
333 if (!encoding_param.isNull()) {
334 /* The supported encoding types are hardcoded here because
335 * we are limited to the encodings supported by expat/xmltok.
337 if (encoding_param.size() == 0) {
338 encoding = XML(default_encoding);
339 auto_detect = 1;
340 } else if (strcasecmp(encoding_param.data(), "ISO-8859-1") == 0) {
341 encoding = (XML_Char*)"ISO-8859-1";
342 } else if (strcasecmp(encoding_param.data(), "UTF-8") == 0) {
343 encoding = (XML_Char*)"UTF-8";
344 } else if (strcasecmp(encoding_param.data(), "US-ASCII") == 0) {
345 encoding = (XML_Char*)"US-ASCII";
346 } else {
347 raise_warning("unsupported source encoding \"%s\"",
348 encoding_param.c_str());
349 return false;
351 } else {
352 encoding = XML(default_encoding);
355 String separator;
356 if (ns_support && ns_param.empty()) {
357 separator = ":";
358 } else {
359 separator = ns_param;
362 auto parser = req::make<XmlParser>();
363 parser->parser = XML_ParserCreate_MM
364 ((auto_detect ? NULL : encoding), &php_xml_mem_hdlrs,
365 !separator.empty() ? (const XML_Char*)separator.data() : NULL);
367 parser->target_encoding = encoding;
368 parser->case_folding = 1;
369 parser->object.asTypedValue()->m_type = KindOfNull;
370 parser->isparsing = 0;
372 XML_SetUserData(parser->parser, getParserToken(parser));
374 return Variant(std::move(parser));
377 static bool name_contains_class(const String& name) {
378 if (name) {
379 int pos = name.find("::");
380 return pos != 0 && pos != String::npos && pos + 2 < name.size();
382 return false;
385 static Variant xml_call_handler(const req::ptr<XmlParser>& parser,
386 const Variant& handler,
387 const Array& args) {
388 if (parser && handler.toBoolean()) {
389 Variant retval;
390 if (handler.isString() && !name_contains_class(handler.toString())) {
391 if (!parser->object.isObject()) {
392 retval = invoke(handler.toString().c_str(), args, -1);
393 } else {
394 retval = parser->object.toObject()->
395 o_invoke(handler.toString(), args);
397 } else if (is_callable(handler)) {
398 vm_call_user_func(handler, args);
399 } else {
400 raise_warning("Handler is invalid");
402 return retval;
404 return init_null();
407 static void _xml_add_to_info(const req::ptr<XmlParser>& parser,
408 const String& nameStr) {
409 if (parser->info.isNull()) {
410 return;
412 forceToArray(parser->info);
413 if (!parser->info.toCArrRef().exists(nameStr)) {
414 parser->info.toArrRef().set(nameStr, Array::Create());
416 auto const inner = parser->info.toArrRef().lvalAt(nameStr);
417 forceToArray(inner).append(parser->curtag);
418 parser->curtag++;
421 const StaticString
422 s_type("type"),
423 s_complete("complete"),
424 s_tag("tag"),
425 s_close("close"),
426 s_level("level"),
427 s_value("value"),
428 s_cdata("cdata"),
429 s_open("open"),
430 s_attributes("attributes");
432 void _xml_endElementHandler(void *userData, const XML_Char *name) {
433 auto parser = getParserFromToken(userData);
435 if (parser) {
436 Variant retval;
437 Array args = Array::Create();
439 auto tag_name = _xml_decode_tag(parser, (const char*)name);
441 if (parser->endElementHandler.toBoolean()) {
442 args.append(Variant(parser));
443 args.append(tag_name);
444 xml_call_handler(parser, parser->endElementHandler, args);
447 if (!parser->data.isNull()) {
448 if (parser->lastwasopen) {
449 parser->ctag.toArrRef().set(s_type, s_complete);
450 } else {
451 ArrayInit tag(3, ArrayInit::Map{});
452 _xml_add_to_info(parser, tag_name.substr(parser->toffset));
453 tag.set(s_tag, tag_name.substr(parser->toffset));
454 tag.set(s_type, s_close);
455 tag.set(s_level, parser->level);
456 parser->data.toArrRef().append(tag.toArray());
458 parser->lastwasopen = 0;
462 if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
463 free(parser->ltags[parser->level-1]);
466 parser->level--;
470 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) {
471 auto parser = getParserFromToken(userData);
473 if (parser) {
474 Variant retval;
475 Array args = Array::Create();
477 if (parser->characterDataHandler.toBoolean()) {
478 args.append(Variant(parser));
479 args.append(_xml_xmlchar_zval(s, len, parser->target_encoding));
480 xml_call_handler(parser, parser->characterDataHandler, args);
483 if (!parser->data.isNull()) {
484 int i;
485 int doprint = 0;
487 String decoded_value;
488 int decoded_len;
489 decoded_value = xml_utf8_decode(s,len,
490 parser->target_encoding);
491 decoded_len = decoded_value.size();
492 for (i = 0; i < decoded_len; i++) {
493 switch (decoded_value[i]) {
494 case ' ':
495 case '\t':
496 case '\n':
497 default:
498 doprint = 1;
499 break;
501 if (doprint) {
502 break;
505 if (doprint || (! parser->skipwhite)) {
506 if (parser->lastwasopen) {
507 String myval;
508 // check if value exists, if yes append to that
509 if (parser->ctag.toArrRef().exists(s_value)) {
510 myval = tvCastToString(parser->ctag.toArray().rvalAt(s_value).tv());
511 myval += decoded_value;
512 parser->ctag.toArrRef().set(s_value, myval);
513 } else {
514 parser->ctag.toArrRef().set(
515 s_value,
516 decoded_value
519 } else {
520 Array tag;
521 String myval;
522 String mytype;
524 auto curtag = parser->data.toArrRef().pop();
525 SCOPE_EXIT {
526 try {
527 parser->data.toArrRef().append(curtag);
528 } catch (...) {}
531 if (curtag.toArrRef().exists(s_type)) {
532 mytype = tvCastToString(curtag.toArrRef().rvalAt(s_type).tv());
533 if (!strcmp(mytype.data(), "cdata") &&
534 curtag.toArrRef().exists(s_value)) {
535 myval = tvCastToString(curtag.toArrRef().rvalAt(s_value).tv());
536 myval += decoded_value;
537 curtag.toArrRef().set(s_value, myval);
538 return;
541 if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
542 tag = Array::Create();
543 _xml_add_to_info(parser, parser->ltags[parser->level-1] +
544 parser->toffset);
545 tag.set(s_tag, String(parser->ltags[parser->level-1] +
546 parser->toffset, CopyString));
547 tag.set(s_value, decoded_value);
548 tag.set(s_type, s_cdata);
549 tag.set(s_level, parser->level);
550 parser->data.toArrRef().append(tag);
551 } else if (parser->level == (XML_MAXLEVEL + 1)) {
552 raise_warning("Maximum depth exceeded - Results truncated");
560 void _xml_defaultHandler(void *userData, const XML_Char *s, int len) {
561 auto parser = getParserFromToken(userData);
563 if (parser && parser->defaultHandler.toBoolean()) {
564 xml_call_handler(parser,
565 parser->defaultHandler,
566 make_packed_array(
567 Variant(parser),
568 _xml_xmlchar_zval(s, len, parser->target_encoding)));
572 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes) {
573 auto parser = getParserFromToken(userData);
574 const char **attrs = (const char **) attributes;
575 Variant retval;
576 Array args = Array::Create();
578 if (parser) {
579 parser->level++;
581 String tag_name = _xml_decode_tag(parser, (const char*)name);
583 if (parser->startElementHandler.toBoolean()) {
584 args.append(Variant(parser));
585 args.append(tag_name);
586 args.append(Array::Create());
588 while (attributes && *attributes) {
589 String att = _xml_decode_tag(parser, (const char*)attributes[0]);
590 String val = xml_utf8_decode(attributes[1],
591 strlen((const char*)attributes[1]),
592 parser->target_encoding);
593 auto const arr = args.lvalAt(2);
594 asArrRef(arr).set(att, val);
595 attributes += 2;
598 xml_call_handler(parser, parser->startElementHandler, args);
601 if (!parser->data.isNull()) {
602 if (parser->level <= XML_MAXLEVEL) {
603 Array tag, atr;
604 int atcnt = 0;
605 tag = Array::Create();
606 atr = Array::Create();
608 _xml_add_to_info(parser, tag_name.substr(parser->toffset));
610 tag.set(s_tag, tag_name.substr(parser->toffset));
611 tag.set(s_type, s_open);
612 tag.set(s_level, parser->level);
614 parser->ltags[parser->level-1] = strdup(tag_name.data());
615 parser->lastwasopen = 1;
617 attributes = (const XML_Char **) attrs;
619 while (attributes && *attributes) {
620 String att = _xml_decode_tag(parser, (const char*)attributes[0]);
621 String val = xml_utf8_decode(attributes[1],
622 strlen((const char*)attributes[1]),
623 parser->target_encoding);
624 atr.set(att, val);
625 atcnt++;
626 attributes += 2;
629 if (atcnt) {
630 tag.set(s_attributes,atr);
632 auto lval = parser->data.toArrRef().lvalAt();
633 lval.type() = KindOfArray;
634 lval.val().parr = tag.detach();
635 parser->ctag.assignRef(tvAsVariant(lval.tv_ptr()));
636 } else if (parser->level == (XML_MAXLEVEL + 1)) {
637 raise_warning("Maximum depth exceeded - Results truncated");
643 void _xml_processingInstructionHandler(void *userData, const XML_Char *target,
644 const XML_Char *data) {
645 auto parser = getParserFromToken(userData);
646 if (parser && parser->processingInstructionHandler.toBoolean()) {
647 Array args = Array::Create();
648 args.append(Variant(parser));
649 args.append(_xml_xmlchar_zval(target, 0, parser->target_encoding));
650 args.append(_xml_xmlchar_zval(data, 0, parser->target_encoding));
651 xml_call_handler(parser, parser->processingInstructionHandler, args);
655 int _xml_externalEntityRefHandler(XML_Parser /* void* */ parserPtr,
656 const XML_Char *openEntityNames,
657 const XML_Char *base,
658 const XML_Char *systemId,
659 const XML_Char *publicId) {
660 auto parser = getParserFromToken(XML_GetUserData(parserPtr));
661 int ret = 0; /* abort if no handler is set (should be configurable?) */
662 if (parser && parser->externalEntityRefHandler.toBoolean()) {
663 Array args = Array::Create();
664 args.append(Variant(parser));
665 args.append(_xml_xmlchar_zval(openEntityNames, 0,
666 parser->target_encoding));
667 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
668 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
669 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
670 ret = xml_call_handler(parser,
671 parser->externalEntityRefHandler, args).toInt64();
673 return ret;
676 void _xml_notationDeclHandler(void *userData,
677 const XML_Char *notationName,
678 const XML_Char *base,
679 const XML_Char *systemId,
680 const XML_Char *publicId) {
681 auto parser = getParserFromToken(userData);
683 if (parser && parser->notationDeclHandler.toBoolean()) {
684 Array args = Array::Create();
685 args.append(Variant(parser));
686 args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
687 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
688 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
689 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
690 xml_call_handler(parser, parser->notationDeclHandler, args);
694 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix,
695 const XML_Char *uri) {
696 auto parser = getParserFromToken(userData);
698 if (parser && parser->startNamespaceDeclHandler.toBoolean()) {
699 Array args = Array::Create();
701 args.append(Variant(parser));
702 args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
703 args.append(_xml_xmlchar_zval(uri, 0, parser->target_encoding));
704 xml_call_handler(parser, parser->startNamespaceDeclHandler, args);
708 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
709 auto parser = getParserFromToken(userData);
711 if (parser && parser->endNamespaceDeclHandler.toBoolean()) {
712 Array args = Array::Create();
713 args.append(Variant(parser));
714 args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
715 xml_call_handler(parser, parser->endNamespaceDeclHandler, args);
719 void _xml_unparsedEntityDeclHandler(void *userData,
720 const XML_Char *entityName,
721 const XML_Char *base,
722 const XML_Char *systemId,
723 const XML_Char *publicId,
724 const XML_Char *notationName) {
725 auto parser = getParserFromToken(userData);
727 if (parser && parser->unparsedEntityDeclHandler.toBoolean()) {
728 Array args = Array::Create();
729 args.append(Variant(parser));
730 args.append(_xml_xmlchar_zval(entityName, 0, parser->target_encoding));
731 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
732 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
733 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
734 args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
735 xml_call_handler(parser, parser->unparsedEntityDeclHandler, args);
739 static void xml_set_handler(Variant * handler, const Variant& data) {
740 if (data.isNull() || same(data, false) || data.isString() ||
741 is_callable(data)) {
742 *handler = data;
743 } else {
744 raise_warning("Handler is invalid");
748 ///////////////////////////////////////////////////////////////////////////////
750 Resource HHVM_FUNCTION(xml_parser_create,
751 const Variant& encoding /* = uninit_variant */) {
752 const String& strEncoding = encoding.isNull()
753 ? null_string
754 : encoding.toString();
755 return php_xml_parser_create_impl(strEncoding, null_string, 0).toResource();
758 Resource HHVM_FUNCTION(xml_parser_create_ns,
759 const Variant& encoding /* = uninit_variant */,
760 const Variant& separator /* = uninit_variant */) {
761 const String& strEncoding = encoding.isNull()
762 ? null_string
763 : encoding.toString();
764 const String& strSeparator = separator.isNull()
765 ? null_string
766 : separator.toString();
767 return php_xml_parser_create_impl(strEncoding, strSeparator, 1).toResource();
770 bool HHVM_FUNCTION(xml_parser_free,
771 const Resource& parser) {
772 auto p = cast<XmlParser>(parser);
773 if (p->isparsing == 1) {
774 raise_warning("Parser cannot be freed while it is parsing.");
775 return false;
777 clearParser(p);
778 return true;
781 int64_t HHVM_FUNCTION(xml_parse,
782 const Resource& parser,
783 const String& data,
784 bool is_final /* = true */) {
785 // XML_Parse can reenter the VM, and it will do so after we've lost
786 // the frame pointer by calling through the system's copy of XML_Parse
787 // in libexpat.so.
788 SYNC_VM_REGS_SCOPED();
789 auto p = cast<XmlParser>(parser);
790 int ret;
791 long isFinal = is_final ? 1 : 0;
792 p->isparsing = 1;
793 ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(),
794 isFinal);
795 p->isparsing = 0;
796 return ret;
799 int64_t HHVM_FUNCTION(xml_parse_into_struct,
800 const Resource& parser,
801 const String& data,
802 VRefParam values,
803 VRefParam index /* = null */) {
804 SYNC_VM_REGS_SCOPED();
805 int ret;
806 auto p = cast<XmlParser>(parser);
807 p->data.setWithRef(values);
808 p->data = Array::Create();
809 p->info.setWithRef(index);
810 p->info = Array::Create();
811 p->level = 0;
812 p->ltags = (char**)malloc(XML_MAXLEVEL * sizeof(char*));
814 XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
815 XML_SetElementHandler(p->parser, _xml_startElementHandler,
816 _xml_endElementHandler);
817 XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
819 p->isparsing = 1;
820 ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(), 1);
821 p->isparsing = 0;
823 return ret;
826 Variant HHVM_FUNCTION(xml_parser_get_option,
827 const Resource& parser,
828 int option) {
829 auto p = cast<XmlParser>(parser);
830 switch (option) {
831 case PHP_XML_OPTION_CASE_FOLDING:
832 return p->case_folding;
833 case PHP_XML_OPTION_TARGET_ENCODING:
834 return String((const char*)p->target_encoding, CopyString);
835 default:
836 raise_warning("Unknown option");
837 return false;
839 return false;
842 bool HHVM_FUNCTION(xml_parser_set_option,
843 const Resource& parser,
844 int option,
845 const Variant& value) {
846 auto p = cast<XmlParser>(parser);
847 switch (option) {
848 case PHP_XML_OPTION_CASE_FOLDING:
849 p->case_folding = value.toInt64();
850 break;
851 case PHP_XML_OPTION_SKIP_TAGSTART:
852 p->toffset = value.toInt64();
853 break;
854 case PHP_XML_OPTION_SKIP_WHITE:
855 p->skipwhite = value.toInt64();
856 break;
857 case PHP_XML_OPTION_TARGET_ENCODING: {
858 xml_encoding *enc;
859 enc = xml_get_encoding((const XML_Char*)value.toString().data());
860 if (enc == NULL) {
861 raise_warning("Unsupported target encoding \"%s\"",
862 value.toString().data());
863 return false;
865 p->target_encoding = enc->name;
866 break;
868 default:
869 raise_warning("Unknown option");
870 return false;
872 return true;
875 bool HHVM_FUNCTION(xml_set_character_data_handler,
876 const Resource& parser,
877 const Variant& handler) {
878 auto p = cast<XmlParser>(parser);
879 xml_set_handler(&p->characterDataHandler, handler);
880 XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
881 return true;
884 bool HHVM_FUNCTION(xml_set_default_handler,
885 const Resource& parser,
886 const Variant& handler) {
887 auto p = cast<XmlParser>(parser);
888 xml_set_handler(&p->defaultHandler, handler);
889 XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
890 return true;
893 bool HHVM_FUNCTION(xml_set_element_handler,
894 const Resource& parser,
895 const Variant& start_element_handler,
896 const Variant& end_element_handler) {
897 auto p = cast<XmlParser>(parser);
898 xml_set_handler(&p->startElementHandler, start_element_handler);
899 xml_set_handler(&p->endElementHandler, end_element_handler);
900 XML_SetElementHandler(p->parser, _xml_startElementHandler,
901 _xml_endElementHandler);
902 return true;
905 bool HHVM_FUNCTION(xml_set_processing_instruction_handler,
906 const Resource& parser,
907 const Variant& handler) {
908 auto p = cast<XmlParser>(parser);
909 xml_set_handler(&p->processingInstructionHandler, handler);
910 XML_SetProcessingInstructionHandler(p->parser,
911 _xml_processingInstructionHandler);
912 return true;
915 bool HHVM_FUNCTION(xml_set_start_namespace_decl_handler,
916 const Resource& parser,
917 const Variant& handler) {
918 auto p = cast<XmlParser>(parser);
919 xml_set_handler(&p->startNamespaceDeclHandler, handler);
920 XML_SetStartNamespaceDeclHandler(p->parser, _xml_startNamespaceDeclHandler);
921 return true;
924 bool HHVM_FUNCTION(xml_set_end_namespace_decl_handler,
925 const Resource& parser,
926 const Variant& handler) {
927 auto p = cast<XmlParser>(parser);
928 xml_set_handler(&p->endNamespaceDeclHandler, handler);
929 XML_SetEndNamespaceDeclHandler(p->parser, _xml_endNamespaceDeclHandler);
930 return true;
933 bool HHVM_FUNCTION(xml_set_unparsed_entity_decl_handler,
934 const Resource& parser,
935 const Variant& handler) {
936 auto p = cast<XmlParser>(parser);
937 xml_set_handler(&p->unparsedEntityDeclHandler, handler);
938 XML_SetUnparsedEntityDeclHandler(p->parser, _xml_unparsedEntityDeclHandler);
939 return true;
942 bool HHVM_FUNCTION(xml_set_external_entity_ref_handler,
943 const Resource& parser,
944 const Variant& handler) {
945 auto p = cast<XmlParser>(parser);
946 xml_set_handler(&p->externalEntityRefHandler, handler);
947 XML_SetExternalEntityRefHandler(p->parser, _xml_externalEntityRefHandler);
948 return true;
951 bool HHVM_FUNCTION(xml_set_notation_decl_handler,
952 const Resource& parser,
953 const Variant& handler) {
954 auto p = cast<XmlParser>(parser);
955 xml_set_handler(&p->notationDeclHandler, handler);
956 XML_SetNotationDeclHandler(p->parser, _xml_notationDeclHandler);
957 return true;
960 bool HHVM_FUNCTION(xml_set_object,
961 const Resource& parser,
962 VRefParam object) {
963 auto p = cast<XmlParser>(parser);
964 p->object.setWithRef(object);
965 return true;
968 int64_t HHVM_FUNCTION(xml_get_current_byte_index,
969 const Resource& parser) {
970 auto p = cast<XmlParser>(parser);
971 return XML_GetCurrentByteIndex(p->parser);
974 int64_t HHVM_FUNCTION(xml_get_current_column_number,
975 const Resource& parser) {
976 auto p = cast<XmlParser>(parser);
977 return XML_GetCurrentColumnNumber(p->parser);
980 int64_t HHVM_FUNCTION(xml_get_current_line_number,
981 const Resource& parser) {
982 auto p = cast<XmlParser>(parser);
983 return XML_GetCurrentLineNumber(p->parser);
986 int64_t HHVM_FUNCTION(xml_get_error_code,
987 const Resource& parser) {
988 auto p = cast<XmlParser>(parser);
989 return XML_GetErrorCode(p->parser);
992 String HHVM_FUNCTION(xml_error_string,
993 int code) {
994 char * str = (char *)XML_ErrorString((XML_Error)/*(int)*/code);
995 return String(str, CopyString);
998 ///////////////////////////////////////////////////////////////////////////////
1000 String HHVM_FUNCTION(utf8_decode,
1001 const String& data) {
1002 return xml_utf8_decode(data.c_str(), data.size(), "ISO-8859-1");
1005 String HHVM_FUNCTION(utf8_encode,
1006 const String& data) {
1007 auto const maxSize = safe_cast<size_t>(data.size()) * 4;
1008 String str = String(maxSize, ReserveString);
1009 char *newbuf = str.mutableData();
1010 int newlen = 0;
1011 const char *s = data.data();
1012 for (int pos = data.size(); pos > 0; pos--, s++) {
1013 unsigned int c = (unsigned char)(*s);
1014 if (c < 0x80) {
1015 newbuf[newlen++] = (char) c;
1016 } else if (c < 0x800) {
1017 newbuf[newlen++] = (0xc0 | (c >> 6));
1018 newbuf[newlen++] = (0x80 | (c & 0x3f));
1019 } else if (c < 0x10000) {
1020 newbuf[newlen++] = (0xe0 | (c >> 12));
1021 newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
1022 newbuf[newlen++] = (0x80 | (c & 0x3f));
1023 } else if (c < 0x200000) {
1024 newbuf[newlen++] = (0xf0 | (c >> 18));
1025 newbuf[newlen++] = (0xe0 | ((c >> 12) & 0x3f));
1026 newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
1027 newbuf[newlen++] = (0x80 | (c & 0x3f));
1031 assert(newlen <= maxSize);
1032 str.shrink(newlen);
1033 return str;
1036 ///////////////////////////////////////////////////////////////////////////////