Return member_lval from Array::lval{,Ref}()
[hiphop-php.git] / hphp / runtime / ext / xml / ext_xml.cpp
blob8a098d1284928b8f98a8527a794687bebb78d26f
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #include "hphp/runtime/ext/xml/ext_xml.h"
19 #include <folly/ScopeGuard.h>
21 #include "hphp/runtime/base/array-init.h"
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/comparisons.h"
24 #include "hphp/runtime/base/externals.h"
25 #include "hphp/runtime/base/request-local.h"
26 #include "hphp/runtime/base/root-map.h"
27 #include "hphp/runtime/base/zend-functions.h"
28 #include "hphp/runtime/base/zend-string.h"
29 #include "hphp/runtime/vm/jit/translator.h"
30 #include "hphp/runtime/vm/jit/translator-inline.h"
31 #include "hphp/runtime/base/utf8-decode.h"
32 #include <expat.h>
34 #define XML_MAXLEVEL 255
35 // XXX this should be dynamic
37 namespace HPHP {
39 enum php_xml_option {
40 PHP_XML_OPTION_CASE_FOLDING = 1,
41 PHP_XML_OPTION_TARGET_ENCODING,
42 PHP_XML_OPTION_SKIP_TAGSTART,
43 PHP_XML_OPTION_SKIP_WHITE
46 static struct XMLExtension final : Extension {
47 XMLExtension() : Extension("xml", NO_EXTENSION_VERSION_YET) {}
48 void moduleInit() override {
49 HHVM_FE(xml_parser_create);
50 HHVM_FE(xml_parser_free);
51 HHVM_FE(xml_parse);
52 HHVM_FE(xml_parse_into_struct);
53 HHVM_FE(xml_parser_create_ns);
54 HHVM_FE(xml_parser_get_option);
55 HHVM_FE(xml_parser_set_option);
56 HHVM_FE(xml_set_character_data_handler);
57 HHVM_FE(xml_set_default_handler);
58 HHVM_FE(xml_set_element_handler);
59 HHVM_FE(xml_set_processing_instruction_handler);
60 HHVM_FE(xml_set_start_namespace_decl_handler);
61 HHVM_FE(xml_set_end_namespace_decl_handler);
62 HHVM_FE(xml_set_unparsed_entity_decl_handler);
63 HHVM_FE(xml_set_external_entity_ref_handler);
64 HHVM_FE(xml_set_notation_decl_handler);
65 HHVM_FE(xml_set_object);
66 HHVM_FE(xml_get_current_byte_index);
67 HHVM_FE(xml_get_current_column_number);
68 HHVM_FE(xml_get_current_line_number);
69 HHVM_FE(xml_get_error_code);
70 HHVM_FE(xml_error_string);
71 HHVM_FE(utf8_decode);
72 HHVM_FE(utf8_encode);
74 HHVM_RC_INT_SAME(XML_ERROR_ASYNC_ENTITY);
75 HHVM_RC_INT_SAME(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
76 HHVM_RC_INT_SAME(XML_ERROR_BAD_CHAR_REF);
77 HHVM_RC_INT_SAME(XML_ERROR_BINARY_ENTITY_REF);
78 HHVM_RC_INT_SAME(XML_ERROR_DUPLICATE_ATTRIBUTE);
79 HHVM_RC_INT_SAME(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
80 HHVM_RC_INT_SAME(XML_ERROR_INCORRECT_ENCODING);
81 HHVM_RC_INT_SAME(XML_ERROR_INVALID_TOKEN);
82 HHVM_RC_INT_SAME(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
83 HHVM_RC_INT_SAME(XML_ERROR_MISPLACED_XML_PI);
84 HHVM_RC_INT_SAME(XML_ERROR_NONE);
85 HHVM_RC_INT_SAME(XML_ERROR_NO_ELEMENTS);
86 HHVM_RC_INT_SAME(XML_ERROR_NO_MEMORY);
87 HHVM_RC_INT_SAME(XML_ERROR_PARAM_ENTITY_REF);
88 HHVM_RC_INT_SAME(XML_ERROR_PARTIAL_CHAR);
89 HHVM_RC_INT_SAME(XML_ERROR_RECURSIVE_ENTITY_REF);
90 HHVM_RC_INT_SAME(XML_ERROR_SYNTAX);
91 HHVM_RC_INT_SAME(XML_ERROR_TAG_MISMATCH);
92 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_CDATA_SECTION);
93 HHVM_RC_INT_SAME(XML_ERROR_UNCLOSED_TOKEN);
94 HHVM_RC_INT_SAME(XML_ERROR_UNDEFINED_ENTITY);
95 HHVM_RC_INT_SAME(XML_ERROR_UNKNOWN_ENCODING);
97 HHVM_RC_INT(XML_OPTION_CASE_FOLDING, PHP_XML_OPTION_CASE_FOLDING);
98 HHVM_RC_INT(XML_OPTION_TARGET_ENCODING, PHP_XML_OPTION_TARGET_ENCODING);
99 HHVM_RC_INT(XML_OPTION_SKIP_TAGSTART, PHP_XML_OPTION_SKIP_TAGSTART);
100 HHVM_RC_INT(XML_OPTION_SKIP_WHITE, PHP_XML_OPTION_SKIP_WHITE);
102 HHVM_RC_STR(XML_SAX_IMPL, "expat");
104 loadSystemlib();
106 } s_xml_extension;
108 ///////////////////////////////////////////////////////////////////////////////
110 struct XmlParser : SweepableResourceData {
111 DECLARE_RESOURCE_ALLOCATION(XmlParser)
112 XmlParser() {}
113 virtual ~XmlParser();
114 void cleanupImpl();
115 CLASSNAME_IS("xml");
116 const String& o_getClassNameHook() const override;
118 int case_folding{0};
119 XML_Parser parser{nullptr};
120 XML_Char *target_encoding{nullptr};
122 Variant startElementHandler;
123 Variant endElementHandler;
124 Variant characterDataHandler;
125 Variant processingInstructionHandler;
126 Variant defaultHandler;
127 Variant unparsedEntityDeclHandler;
128 Variant notationDeclHandler;
129 Variant externalEntityRefHandler;
130 Variant unknownEncodingHandler;
131 Variant startNamespaceDeclHandler;
132 Variant endNamespaceDeclHandler;
134 Variant object;
136 Variant data;
137 Variant info;
138 int level{0};
139 int toffset{0};
140 int curtag{0};
141 Variant ctag;
142 char **ltags{nullptr};
143 int lastwasopen{0};
144 int skipwhite{0};
145 int isparsing{0};
148 XmlParser::~XmlParser() {
149 cleanupImpl();
152 void XmlParser::cleanupImpl() {
153 if (parser) {
154 XML_ParserFree(parser);
155 parser = NULL;
157 if (ltags) {
158 int inx;
159 for (inx = 0; (inx < level) && (inx < XML_MAXLEVEL); inx++)
160 free(ltags[inx]);
161 free(ltags);
162 ltags = NULL;
166 void XmlParser::sweep() {
167 cleanupImpl();
170 const String& XmlParser::o_getClassNameHook() const {
171 return classnameof();
174 struct XmlParserData final : RequestEventHandler {
175 void requestInit() override { parsers.reset(); }
176 void requestShutdown() override { parsers.reset(); }
177 RootMap<XmlParser> parsers;
179 IMPLEMENT_STATIC_REQUEST_LOCAL(XmlParserData, s_xml_data);
181 namespace {
183 inline req::ptr<XmlParser> getParserFromToken(void* userData) {
184 return s_xml_data->parsers.lookupRoot(userData);
187 inline void* getParserToken(const req::ptr<XmlParser>& parser) {
188 return reinterpret_cast<void*>(s_xml_data->parsers.addRoot(parser));
191 inline void clearParser(const req::ptr<XmlParser>& p) {
192 s_xml_data->parsers.removeRoot(p);
197 typedef struct {
198 XML_Char *name;
199 char (*decoding_function)(unsigned short);
200 unsigned short (*encoding_function)(unsigned char);
201 } xml_encoding;
203 static XML_Char * xml_globals_default_encoding = (XML_Char*)"UTF-8";
204 // for xml_parse_into_struct
207 #define XML(v) (xml_globals_ ## v)
209 inline static unsigned short xml_encode_iso_8859_1(unsigned char c) {
210 return (unsigned short)c;
213 inline static char xml_decode_iso_8859_1(unsigned short c) {
214 return (char)(c > 0xff ? '?' : c);
217 inline static unsigned short xml_encode_us_ascii(unsigned char c) {
218 return (unsigned short)c;
221 inline static char xml_decode_us_ascii(unsigned short c) {
222 return (char)(c > 0x7f ? '?' : c);
225 xml_encoding xml_encodings[] = {
226 { (XML_Char*)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
227 { (XML_Char*)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
228 { (XML_Char*)"UTF-8", NULL, NULL },
229 { (XML_Char*)NULL, NULL, NULL }
232 static void *php_xml_malloc_wrapper(size_t sz) {
233 return req::malloc_untyped(sz);
236 static void *php_xml_realloc_wrapper(void *ptr, size_t sz) {
237 return req::realloc_untyped(ptr, sz);
240 static void php_xml_free_wrapper(void *ptr) {
241 if (ptr) {
242 req::free(ptr);
246 static XML_Memory_Handling_Suite php_xml_mem_hdlrs = {
247 php_xml_malloc_wrapper,
248 php_xml_realloc_wrapper,
249 php_xml_free_wrapper
252 static xml_encoding *xml_get_encoding(const XML_Char *name) {
253 xml_encoding *enc = &xml_encodings[0];
255 while (enc && enc->name) {
256 if (strcasecmp((const char*)name, (const char*)enc->name) == 0) {
257 return enc;
259 enc++;
261 return NULL;
264 static int _xml_xmlcharlen(const XML_Char *s) {
265 int len = 0;
267 while (*s) {
268 len++;
269 s++;
271 return len;
274 String xml_utf8_decode(const XML_Char *s, int len,
275 const XML_Char *encoding) {
276 String str = String(len, ReserveString);
277 char *newbuf = str.mutableData();
278 char (*decoder)(unsigned short) = nullptr;
279 xml_encoding *enc = xml_get_encoding(encoding);
281 int newlen = 0;
282 if (enc) {
283 decoder = enc->decoding_function;
285 if (decoder == nullptr) {
286 /* If the target encoding was unknown, or no decoder function
287 * was specified, return the UTF-8-encoded data as-is.
289 memcpy(newbuf, s, len);
290 str.setSize(len);
291 return str;
294 UTF8To16Decoder dec(s, len, true);
295 for (int b = dec.decode(); b != UTF8_END; b = dec.decode()) {
296 newbuf[newlen] = decoder(b);
297 ++newlen;
300 assert(newlen <= len);
301 str.shrink(newlen);
302 return str;
305 static Variant _xml_xmlchar_zval(const XML_Char *s, int len,
306 const XML_Char *encoding) {
307 if (s == NULL) {
308 return false;
310 if (len == 0) {
311 len = _xml_xmlcharlen(s);
313 String ret = xml_utf8_decode(s, len, encoding);
314 return ret;
317 static
318 String _xml_decode_tag(const req::ptr<XmlParser>& parser, const char *tag) {
319 auto newstr = xml_utf8_decode((const XML_Char*)tag, strlen(tag),
320 parser->target_encoding);
321 if (parser->case_folding) {
322 string_to_upper(newstr);
324 return newstr;
327 static Variant php_xml_parser_create_impl(const String& encoding_param,
328 const String& ns_param,
329 int ns_support) {
330 int auto_detect = 0;
331 XML_Char *encoding;
333 if (!encoding_param.isNull()) {
334 /* The supported encoding types are hardcoded here because
335 * we are limited to the encodings supported by expat/xmltok.
337 if (encoding_param.size() == 0) {
338 encoding = XML(default_encoding);
339 auto_detect = 1;
340 } else if (strcasecmp(encoding_param.data(), "ISO-8859-1") == 0) {
341 encoding = (XML_Char*)"ISO-8859-1";
342 } else if (strcasecmp(encoding_param.data(), "UTF-8") == 0) {
343 encoding = (XML_Char*)"UTF-8";
344 } else if (strcasecmp(encoding_param.data(), "US-ASCII") == 0) {
345 encoding = (XML_Char*)"US-ASCII";
346 } else {
347 raise_warning("unsupported source encoding \"%s\"",
348 encoding_param.c_str());
349 return false;
351 } else {
352 encoding = XML(default_encoding);
355 String separator;
356 if (ns_support && ns_param.empty()) {
357 separator = ":";
358 } else {
359 separator = ns_param;
362 auto parser = req::make<XmlParser>();
363 parser->parser = XML_ParserCreate_MM
364 ((auto_detect ? NULL : encoding), &php_xml_mem_hdlrs,
365 !separator.empty() ? (const XML_Char*)separator.data() : NULL);
367 parser->target_encoding = encoding;
368 parser->case_folding = 1;
369 parser->object.asTypedValue()->m_type = KindOfNull;
370 parser->isparsing = 0;
372 XML_SetUserData(parser->parser, getParserToken(parser));
374 return Variant(std::move(parser));
377 static bool name_contains_class(const String& name) {
378 if (name) {
379 int pos = name.find("::");
380 return pos != 0 && pos != String::npos && pos + 2 < name.size();
382 return false;
385 static Variant xml_call_handler(const req::ptr<XmlParser>& parser,
386 const Variant& handler,
387 const Array& args) {
388 if (parser && handler.toBoolean()) {
389 Variant retval;
390 if (handler.isString() && !name_contains_class(handler.toString())) {
391 if (!parser->object.isObject()) {
392 retval = invoke(handler.toString().c_str(), args, -1);
393 } else {
394 retval = parser->object.toObject()->
395 o_invoke(handler.toString(), args);
397 } else if (is_callable(handler)) {
398 vm_call_user_func(handler, args);
399 } else {
400 raise_warning("Handler is invalid");
402 return retval;
404 return init_null();
407 static void _xml_add_to_info(const req::ptr<XmlParser>& parser,
408 const String& nameStr) {
409 if (parser->info.isNull()) {
410 return;
412 forceToArray(parser->info);
413 if (!parser->info.toCArrRef().exists(nameStr)) {
414 parser->info.toArrRef().set(nameStr, Array::Create());
416 auto& inner = parser->info.toArrRef().lvalAt(nameStr);
417 forceToArray(inner).append(parser->curtag);
418 parser->curtag++;
421 const StaticString
422 s_type("type"),
423 s_complete("complete"),
424 s_tag("tag"),
425 s_close("close"),
426 s_level("level"),
427 s_value("value"),
428 s_cdata("cdata"),
429 s_open("open"),
430 s_attributes("attributes");
432 void _xml_endElementHandler(void *userData, const XML_Char *name) {
433 auto parser = getParserFromToken(userData);
435 if (parser) {
436 Variant retval;
437 Array args = Array::Create();
439 auto tag_name = _xml_decode_tag(parser, (const char*)name);
441 if (parser->endElementHandler.toBoolean()) {
442 args.append(Variant(parser));
443 args.append(tag_name);
444 xml_call_handler(parser, parser->endElementHandler, args);
447 if (!parser->data.isNull()) {
448 if (parser->lastwasopen) {
449 parser->ctag.toArrRef().set(s_type, s_complete);
450 } else {
451 ArrayInit tag(3, ArrayInit::Map{});
452 _xml_add_to_info(parser, tag_name.substr(parser->toffset));
453 tag.set(s_tag, tag_name.substr(parser->toffset));
454 tag.set(s_type, s_close);
455 tag.set(s_level, parser->level);
456 parser->data.toArrRef().append(tag.toArray());
458 parser->lastwasopen = 0;
462 if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
463 free(parser->ltags[parser->level-1]);
466 parser->level--;
470 void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) {
471 auto parser = getParserFromToken(userData);
473 if (parser) {
474 Variant retval;
475 Array args = Array::Create();
477 if (parser->characterDataHandler.toBoolean()) {
478 args.append(Variant(parser));
479 args.append(_xml_xmlchar_zval(s, len, parser->target_encoding));
480 xml_call_handler(parser, parser->characterDataHandler, args);
483 if (!parser->data.isNull()) {
484 int i;
485 int doprint = 0;
487 String decoded_value;
488 int decoded_len;
489 decoded_value = xml_utf8_decode(s,len,
490 parser->target_encoding);
491 decoded_len = decoded_value.size();
492 for (i = 0; i < decoded_len; i++) {
493 switch (decoded_value[i]) {
494 case ' ':
495 case '\t':
496 case '\n':
497 default:
498 doprint = 1;
499 break;
501 if (doprint) {
502 break;
505 if (doprint || (! parser->skipwhite)) {
506 if (parser->lastwasopen) {
507 String myval;
508 // check if value exists, if yes append to that
509 if (parser->ctag.toArrRef().exists(s_value)) {
510 myval = tvCastToString(parser->ctag.toArray().rvalAt(s_value).tv());
511 myval += decoded_value;
512 parser->ctag.toArrRef().set(s_value, myval);
513 } else {
514 parser->ctag.toArrRef().set(
515 s_value,
516 decoded_value
519 } else {
520 Array tag;
521 String myval;
522 String mytype;
524 auto curtag = parser->data.toArrRef().pop();
525 SCOPE_EXIT {
526 try {
527 parser->data.toArrRef().append(curtag);
528 } catch (...) {}
531 if (curtag.toArrRef().exists(s_type)) {
532 mytype = tvCastToString(curtag.toArrRef().rvalAt(s_type).tv());
533 if (!strcmp(mytype.data(), "cdata") &&
534 curtag.toArrRef().exists(s_value)) {
535 myval = tvCastToString(curtag.toArrRef().rvalAt(s_value).tv());
536 myval += decoded_value;
537 curtag.toArrRef().set(s_value, myval);
538 return;
541 if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
542 tag = Array::Create();
543 _xml_add_to_info(parser, parser->ltags[parser->level-1] +
544 parser->toffset);
545 tag.set(s_tag, String(parser->ltags[parser->level-1] +
546 parser->toffset, CopyString));
547 tag.set(s_value, decoded_value);
548 tag.set(s_type, s_cdata);
549 tag.set(s_level, parser->level);
550 parser->data.toArrRef().append(tag);
551 } else if (parser->level == (XML_MAXLEVEL + 1)) {
552 raise_warning("Maximum depth exceeded - Results truncated");
560 void _xml_defaultHandler(void *userData, const XML_Char *s, int len) {
561 auto parser = getParserFromToken(userData);
563 if (parser && parser->defaultHandler.toBoolean()) {
564 xml_call_handler(parser,
565 parser->defaultHandler,
566 make_packed_array(
567 Variant(parser),
568 _xml_xmlchar_zval(s, len, parser->target_encoding)));
572 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes) {
573 auto parser = getParserFromToken(userData);
574 const char **attrs = (const char **) attributes;
575 Variant retval;
576 Array args = Array::Create();
578 if (parser) {
579 parser->level++;
581 String tag_name = _xml_decode_tag(parser, (const char*)name);
583 if (parser->startElementHandler.toBoolean()) {
584 args.append(Variant(parser));
585 args.append(tag_name);
586 args.append(Array::Create());
588 while (attributes && *attributes) {
589 String att = _xml_decode_tag(parser, (const char*)attributes[0]);
590 String val = xml_utf8_decode(attributes[1],
591 strlen((const char*)attributes[1]),
592 parser->target_encoding);
593 args.lvalAt(2).toArrRef().set(att, val);
594 attributes += 2;
597 xml_call_handler(parser, parser->startElementHandler, args);
600 if (!parser->data.isNull()) {
601 if (parser->level <= XML_MAXLEVEL) {
602 Array tag, atr;
603 int atcnt = 0;
604 tag = Array::Create();
605 atr = Array::Create();
607 _xml_add_to_info(parser, tag_name.substr(parser->toffset));
609 tag.set(s_tag, tag_name.substr(parser->toffset));
610 tag.set(s_type, s_open);
611 tag.set(s_level, parser->level);
613 parser->ltags[parser->level-1] = strdup(tag_name.data());
614 parser->lastwasopen = 1;
616 attributes = (const XML_Char **) attrs;
618 while (attributes && *attributes) {
619 String att = _xml_decode_tag(parser, (const char*)attributes[0]);
620 String val = xml_utf8_decode(attributes[1],
621 strlen((const char*)attributes[1]),
622 parser->target_encoding);
623 atr.set(att, val);
624 atcnt++;
625 attributes += 2;
628 if (atcnt) {
629 tag.set(s_attributes,atr);
631 auto lval = parser->data.toArrRef().lvalAt();
632 lval.type() = KindOfArray;
633 lval.val().parr = tag.detach();
634 parser->ctag.assignRef(tvAsVariant(lval.tv_ptr()));
635 } else if (parser->level == (XML_MAXLEVEL + 1)) {
636 raise_warning("Maximum depth exceeded - Results truncated");
642 void _xml_processingInstructionHandler(void *userData, const XML_Char *target,
643 const XML_Char *data) {
644 auto parser = getParserFromToken(userData);
645 if (parser && parser->processingInstructionHandler.toBoolean()) {
646 Array args = Array::Create();
647 args.append(Variant(parser));
648 args.append(_xml_xmlchar_zval(target, 0, parser->target_encoding));
649 args.append(_xml_xmlchar_zval(data, 0, parser->target_encoding));
650 xml_call_handler(parser, parser->processingInstructionHandler, args);
654 int _xml_externalEntityRefHandler(XML_Parser /* void* */ parserPtr,
655 const XML_Char *openEntityNames,
656 const XML_Char *base,
657 const XML_Char *systemId,
658 const XML_Char *publicId) {
659 auto parser = getParserFromToken(XML_GetUserData(parserPtr));
660 int ret = 0; /* abort if no handler is set (should be configurable?) */
661 if (parser && parser->externalEntityRefHandler.toBoolean()) {
662 Array args = Array::Create();
663 args.append(Variant(parser));
664 args.append(_xml_xmlchar_zval(openEntityNames, 0,
665 parser->target_encoding));
666 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
667 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
668 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
669 ret = xml_call_handler(parser,
670 parser->externalEntityRefHandler, args).toInt64();
672 return ret;
675 void _xml_notationDeclHandler(void *userData,
676 const XML_Char *notationName,
677 const XML_Char *base,
678 const XML_Char *systemId,
679 const XML_Char *publicId) {
680 auto parser = getParserFromToken(userData);
682 if (parser && parser->notationDeclHandler.toBoolean()) {
683 Array args = Array::Create();
684 args.append(Variant(parser));
685 args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
686 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
687 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
688 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
689 xml_call_handler(parser, parser->notationDeclHandler, args);
693 void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix,
694 const XML_Char *uri) {
695 auto parser = getParserFromToken(userData);
697 if (parser && parser->startNamespaceDeclHandler.toBoolean()) {
698 Array args = Array::Create();
700 args.append(Variant(parser));
701 args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
702 args.append(_xml_xmlchar_zval(uri, 0, parser->target_encoding));
703 xml_call_handler(parser, parser->startNamespaceDeclHandler, args);
707 void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
708 auto parser = getParserFromToken(userData);
710 if (parser && parser->endNamespaceDeclHandler.toBoolean()) {
711 Array args = Array::Create();
712 args.append(Variant(parser));
713 args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
714 xml_call_handler(parser, parser->endNamespaceDeclHandler, args);
718 void _xml_unparsedEntityDeclHandler(void *userData,
719 const XML_Char *entityName,
720 const XML_Char *base,
721 const XML_Char *systemId,
722 const XML_Char *publicId,
723 const XML_Char *notationName) {
724 auto parser = getParserFromToken(userData);
726 if (parser && parser->unparsedEntityDeclHandler.toBoolean()) {
727 Array args = Array::Create();
728 args.append(Variant(parser));
729 args.append(_xml_xmlchar_zval(entityName, 0, parser->target_encoding));
730 args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
731 args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
732 args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
733 args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
734 xml_call_handler(parser, parser->unparsedEntityDeclHandler, args);
738 static void xml_set_handler(Variant * handler, const Variant& data) {
739 if (data.isNull() || same(data, false) || data.isString() ||
740 is_callable(data)) {
741 *handler = data;
742 } else {
743 raise_warning("Handler is invalid");
747 ///////////////////////////////////////////////////////////////////////////////
749 Resource HHVM_FUNCTION(xml_parser_create,
750 const Variant& encoding /* = uninit_variant */) {
751 const String& strEncoding = encoding.isNull()
752 ? null_string
753 : encoding.toString();
754 return php_xml_parser_create_impl(strEncoding, null_string, 0).toResource();
757 Resource HHVM_FUNCTION(xml_parser_create_ns,
758 const Variant& encoding /* = uninit_variant */,
759 const Variant& separator /* = uninit_variant */) {
760 const String& strEncoding = encoding.isNull()
761 ? null_string
762 : encoding.toString();
763 const String& strSeparator = separator.isNull()
764 ? null_string
765 : separator.toString();
766 return php_xml_parser_create_impl(strEncoding, strSeparator, 1).toResource();
769 bool HHVM_FUNCTION(xml_parser_free,
770 const Resource& parser) {
771 auto p = cast<XmlParser>(parser);
772 if (p->isparsing == 1) {
773 raise_warning("Parser cannot be freed while it is parsing.");
774 return false;
776 clearParser(p);
777 return true;
780 int64_t HHVM_FUNCTION(xml_parse,
781 const Resource& parser,
782 const String& data,
783 bool is_final /* = true */) {
784 // XML_Parse can reenter the VM, and it will do so after we've lost
785 // the frame pointer by calling through the system's copy of XML_Parse
786 // in libexpat.so.
787 SYNC_VM_REGS_SCOPED();
788 auto p = cast<XmlParser>(parser);
789 int ret;
790 long isFinal = is_final ? 1 : 0;
791 p->isparsing = 1;
792 ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(),
793 isFinal);
794 p->isparsing = 0;
795 return ret;
798 int64_t HHVM_FUNCTION(xml_parse_into_struct,
799 const Resource& parser,
800 const String& data,
801 VRefParam values,
802 VRefParam index /* = null */) {
803 SYNC_VM_REGS_SCOPED();
804 int ret;
805 auto p = cast<XmlParser>(parser);
806 p->data.setWithRef(values);
807 p->data = Array::Create();
808 p->info.setWithRef(index);
809 p->info = Array::Create();
810 p->level = 0;
811 p->ltags = (char**)malloc(XML_MAXLEVEL * sizeof(char*));
813 XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
814 XML_SetElementHandler(p->parser, _xml_startElementHandler,
815 _xml_endElementHandler);
816 XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
818 p->isparsing = 1;
819 ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(), 1);
820 p->isparsing = 0;
822 return ret;
825 Variant HHVM_FUNCTION(xml_parser_get_option,
826 const Resource& parser,
827 int option) {
828 auto p = cast<XmlParser>(parser);
829 switch (option) {
830 case PHP_XML_OPTION_CASE_FOLDING:
831 return p->case_folding;
832 case PHP_XML_OPTION_TARGET_ENCODING:
833 return String((const char*)p->target_encoding, CopyString);
834 default:
835 raise_warning("Unknown option");
836 return false;
838 return false;
841 bool HHVM_FUNCTION(xml_parser_set_option,
842 const Resource& parser,
843 int option,
844 const Variant& value) {
845 auto p = cast<XmlParser>(parser);
846 switch (option) {
847 case PHP_XML_OPTION_CASE_FOLDING:
848 p->case_folding = value.toInt64();
849 break;
850 case PHP_XML_OPTION_SKIP_TAGSTART:
851 p->toffset = value.toInt64();
852 break;
853 case PHP_XML_OPTION_SKIP_WHITE:
854 p->skipwhite = value.toInt64();
855 break;
856 case PHP_XML_OPTION_TARGET_ENCODING: {
857 xml_encoding *enc;
858 enc = xml_get_encoding((const XML_Char*)value.toString().data());
859 if (enc == NULL) {
860 raise_warning("Unsupported target encoding \"%s\"",
861 value.toString().data());
862 return false;
864 p->target_encoding = enc->name;
865 break;
867 default:
868 raise_warning("Unknown option");
869 return false;
871 return true;
874 bool HHVM_FUNCTION(xml_set_character_data_handler,
875 const Resource& parser,
876 const Variant& handler) {
877 auto p = cast<XmlParser>(parser);
878 xml_set_handler(&p->characterDataHandler, handler);
879 XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
880 return true;
883 bool HHVM_FUNCTION(xml_set_default_handler,
884 const Resource& parser,
885 const Variant& handler) {
886 auto p = cast<XmlParser>(parser);
887 xml_set_handler(&p->defaultHandler, handler);
888 XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
889 return true;
892 bool HHVM_FUNCTION(xml_set_element_handler,
893 const Resource& parser,
894 const Variant& start_element_handler,
895 const Variant& end_element_handler) {
896 auto p = cast<XmlParser>(parser);
897 xml_set_handler(&p->startElementHandler, start_element_handler);
898 xml_set_handler(&p->endElementHandler, end_element_handler);
899 XML_SetElementHandler(p->parser, _xml_startElementHandler,
900 _xml_endElementHandler);
901 return true;
904 bool HHVM_FUNCTION(xml_set_processing_instruction_handler,
905 const Resource& parser,
906 const Variant& handler) {
907 auto p = cast<XmlParser>(parser);
908 xml_set_handler(&p->processingInstructionHandler, handler);
909 XML_SetProcessingInstructionHandler(p->parser,
910 _xml_processingInstructionHandler);
911 return true;
914 bool HHVM_FUNCTION(xml_set_start_namespace_decl_handler,
915 const Resource& parser,
916 const Variant& handler) {
917 auto p = cast<XmlParser>(parser);
918 xml_set_handler(&p->startNamespaceDeclHandler, handler);
919 XML_SetStartNamespaceDeclHandler(p->parser, _xml_startNamespaceDeclHandler);
920 return true;
923 bool HHVM_FUNCTION(xml_set_end_namespace_decl_handler,
924 const Resource& parser,
925 const Variant& handler) {
926 auto p = cast<XmlParser>(parser);
927 xml_set_handler(&p->endNamespaceDeclHandler, handler);
928 XML_SetEndNamespaceDeclHandler(p->parser, _xml_endNamespaceDeclHandler);
929 return true;
932 bool HHVM_FUNCTION(xml_set_unparsed_entity_decl_handler,
933 const Resource& parser,
934 const Variant& handler) {
935 auto p = cast<XmlParser>(parser);
936 xml_set_handler(&p->unparsedEntityDeclHandler, handler);
937 XML_SetUnparsedEntityDeclHandler(p->parser, _xml_unparsedEntityDeclHandler);
938 return true;
941 bool HHVM_FUNCTION(xml_set_external_entity_ref_handler,
942 const Resource& parser,
943 const Variant& handler) {
944 auto p = cast<XmlParser>(parser);
945 xml_set_handler(&p->externalEntityRefHandler, handler);
946 XML_SetExternalEntityRefHandler(p->parser, _xml_externalEntityRefHandler);
947 return true;
950 bool HHVM_FUNCTION(xml_set_notation_decl_handler,
951 const Resource& parser,
952 const Variant& handler) {
953 auto p = cast<XmlParser>(parser);
954 xml_set_handler(&p->notationDeclHandler, handler);
955 XML_SetNotationDeclHandler(p->parser, _xml_notationDeclHandler);
956 return true;
959 bool HHVM_FUNCTION(xml_set_object,
960 const Resource& parser,
961 VRefParam object) {
962 auto p = cast<XmlParser>(parser);
963 p->object.setWithRef(object);
964 return true;
967 int64_t HHVM_FUNCTION(xml_get_current_byte_index,
968 const Resource& parser) {
969 auto p = cast<XmlParser>(parser);
970 return XML_GetCurrentByteIndex(p->parser);
973 int64_t HHVM_FUNCTION(xml_get_current_column_number,
974 const Resource& parser) {
975 auto p = cast<XmlParser>(parser);
976 return XML_GetCurrentColumnNumber(p->parser);
979 int64_t HHVM_FUNCTION(xml_get_current_line_number,
980 const Resource& parser) {
981 auto p = cast<XmlParser>(parser);
982 return XML_GetCurrentLineNumber(p->parser);
985 int64_t HHVM_FUNCTION(xml_get_error_code,
986 const Resource& parser) {
987 auto p = cast<XmlParser>(parser);
988 return XML_GetErrorCode(p->parser);
991 String HHVM_FUNCTION(xml_error_string,
992 int code) {
993 char * str = (char *)XML_ErrorString((XML_Error)/*(int)*/code);
994 return String(str, CopyString);
997 ///////////////////////////////////////////////////////////////////////////////
999 String HHVM_FUNCTION(utf8_decode,
1000 const String& data) {
1001 return xml_utf8_decode(data.c_str(), data.size(), "ISO-8859-1");
1004 String HHVM_FUNCTION(utf8_encode,
1005 const String& data) {
1006 auto const maxSize = safe_cast<size_t>(data.size()) * 4;
1007 String str = String(maxSize, ReserveString);
1008 char *newbuf = str.mutableData();
1009 int newlen = 0;
1010 const char *s = data.data();
1011 for (int pos = data.size(); pos > 0; pos--, s++) {
1012 unsigned int c = (unsigned char)(*s);
1013 if (c < 0x80) {
1014 newbuf[newlen++] = (char) c;
1015 } else if (c < 0x800) {
1016 newbuf[newlen++] = (0xc0 | (c >> 6));
1017 newbuf[newlen++] = (0x80 | (c & 0x3f));
1018 } else if (c < 0x10000) {
1019 newbuf[newlen++] = (0xe0 | (c >> 12));
1020 newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
1021 newbuf[newlen++] = (0x80 | (c & 0x3f));
1022 } else if (c < 0x200000) {
1023 newbuf[newlen++] = (0xf0 | (c >> 18));
1024 newbuf[newlen++] = (0xe0 | ((c >> 12) & 0x3f));
1025 newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
1026 newbuf[newlen++] = (0x80 | (c & 0x3f));
1030 assert(newlen <= maxSize);
1031 str.shrink(newlen);
1032 return str;
1035 ///////////////////////////////////////////////////////////////////////////////