Merge branch 'MDL-78811-Master' of https://github.com/aydevworks/moodle
[moodle.git] / lib / phpxmlrpc / Helper / XMLParser.php
blobc23b6fc52d0f0b47ce5d0beab11631541ffe808c
1 <?php
3 namespace PhpXmlRpc\Helper;
5 use PhpXmlRpc\PhpXmlRpc;
6 use PhpXmlRpc\Traits\DeprecationLogger;
7 use PhpXmlRpc\Value;
9 /**
10 * Deals with parsing the XML.
11 * @see http://xmlrpc.com/spec.md
13 * @todo implement an interface to allow for alternative implementations
14 * - make access to $_xh protected, return more high-level data structures
15 * - move the private parts of $_xh to the internal-use parsing-options config
16 * - add parseRequest, parseResponse, parseValue methods
17 * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
18 * while parsing, which is faster than doing it later by going over the rebuilt data structure
19 * @todo rename? This is an xml-rpc parser, not a generic xml parser...
21 * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC
22 * @property int $accept deprecated - (protected) access left in purely for BC
24 class XMLParser
26 use DeprecationLogger;
28 const RETURN_XMLRPCVALS = 'xmlrpcvals';
29 const RETURN_EPIVALS = 'epivals';
30 const RETURN_PHP = 'phpvals';
32 const ACCEPT_REQUEST = 1;
33 const ACCEPT_RESPONSE = 2;
34 const ACCEPT_VALUE = 4;
35 const ACCEPT_FAULT = 8;
37 /**
38 * @var int
39 * The max length beyond which data will get truncated in error messages
41 protected $maxLogValueLength = 100;
43 /**
44 * @var array
45 * Used to store state during parsing and to pass parsing results to callers.
46 * Quick explanation of components:
47 * private:
48 * ac - used to accumulate values
49 * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements
50 * valuestack - array used for parsing arrays and structs
51 * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
52 * (values: 0=not looking, 1=looking, 3=found)
53 * public:
54 * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3)
55 * isf_reason - used for storing xml-rpc response fault string
56 * value - used to store the value in responses
57 * method - used to store method name in requests
58 * params - used to store parameters in requests
59 * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
60 * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
62 protected $_xh = array(
63 'ac' => '',
64 'stack' => array(),
65 'valuestack' => array(),
66 'lv' => 0,
67 'isf' => 0,
68 'isf_reason' => '',
69 'value' => null,
70 'method' => false,
71 'params' => array(),
72 'pt' => array(),
73 'rt' => '',
76 /**
77 * @var array[]
79 protected $xmlrpc_valid_parents = array(
80 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
81 'BOOLEAN' => array('VALUE'),
82 'I4' => array('VALUE'),
83 'I8' => array('VALUE'),
84 'EX:I8' => array('VALUE'),
85 'INT' => array('VALUE'),
86 'STRING' => array('VALUE'),
87 'DOUBLE' => array('VALUE'),
88 'DATETIME.ISO8601' => array('VALUE'),
89 'BASE64' => array('VALUE'),
90 'MEMBER' => array('STRUCT'),
91 'NAME' => array('MEMBER'),
92 'DATA' => array('ARRAY'),
93 'ARRAY' => array('VALUE'),
94 'STRUCT' => array('VALUE'),
95 'PARAM' => array('PARAMS'),
96 'METHODNAME' => array('METHODCALL'),
97 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
98 'FAULT' => array('METHODRESPONSE'),
99 'NIL' => array('VALUE'), // only used when extension activated
100 'EX:NIL' => array('VALUE'), // only used when extension activated
103 /** @var array $parsing_options */
104 protected $parsing_options = array();
106 /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
107 //protected $accept = 3;
109 /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
110 protected $maxChunkLength = 4194304;
111 /** @var array
112 * Used keys: accept, target_charset, methodname_callback, plus the ones set here.
113 * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not
114 * the element handler methods
116 protected $current_parsing_options = array(
117 'xmlrpc_null_extension' => false,
118 'xmlrpc_return_datetimes' => false,
119 'xmlrpc_reject_invalid_values' => false
123 * @param array $options integer keys: options passed to the inner xml parser
124 * string keys:
125 * - target_charset (string)
126 * - methodname_callback (callable)
127 * - xmlrpc_null_extension (bool)
128 * - xmlrpc_return_datetimes (bool)
129 * - xmlrpc_reject_invalid_values (bool)
131 public function __construct(array $options = array())
133 $this->parsing_options = $options;
137 * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh'].
138 * Logs to the error log any issues which do not cause the parsing to fail.
140 * @param string $data
141 * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS
142 * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
143 * @param array $options integer-key options are passed to the xml parser, string-key options are used independently.
144 * These options are added to options received in the constructor.
145 * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values
146 * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used
147 * @return array see the definition of $this->_xh for the meaning of the results
148 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
150 * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options
151 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so
152 * that parsing will be completely independent of global state. Note that it might incur a small perf hit...
154 public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array())
156 $this->_xh = array(
157 'ac' => '',
158 'stack' => array(),
159 'valuestack' => array(),
160 'lv' => 0,
161 'isf' => 0,
162 'isf_reason' => '',
163 'value' => null,
164 'method' => false, // so we can check later if we got a methodname or not
165 'params' => array(),
166 'pt' => array(),
167 'rt' => '',
170 $len = strlen($data);
172 // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below
173 if ($len == 0) {
174 $this->_xh['isf'] = 3;
175 $this->_xh['isf_reason'] = 'XML error 5: empty document';
176 return $this->_xh;
179 $this->current_parsing_options = array('accept' => $accept);
181 $mergedOptions = $this->parsing_options;
182 foreach ($options as $key => $val) {
183 $mergedOptions[$key] = $val;
186 foreach ($mergedOptions as $key => $val) {
187 // q: can php be built without ctype? should we use a regexp?
188 if (is_string($key) && !ctype_digit($key)) {
189 /// @todo on invalid options, throw/error-out instead of logging an error message?
190 switch($key) {
191 case 'target_charset':
192 if (function_exists('mb_convert_encoding')) {
193 $this->current_parsing_options['target_charset'] = $val;
194 } else {
195 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring");
197 break;
199 case 'methodname_callback':
200 if (is_callable($val)) {
201 $this->current_parsing_options['methodname_callback'] = $val;
202 } else {
203 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable");
205 break;
207 case 'xmlrpc_null_extension':
208 case 'xmlrpc_return_datetimes':
209 case 'xmlrpc_reject_invalid_values':
210 $this->current_parsing_options[$key] = $val;
211 break;
213 default:
214 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key");
216 unset($mergedOptions[$key]);
220 if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) {
221 $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension;
223 if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) {
224 $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes;
226 if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) {
227 $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values;
230 // NB: we use '' instead of null to force charset detection from the xml declaration
231 $parser = xml_parser_create('');
233 foreach ($mergedOptions as $key => $val) {
234 xml_parser_set_option($parser, $key, $val);
237 // always set this, in case someone tries to disable it via options...
238 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1);
240 xml_set_object($parser, $this);
242 switch ($returnType) {
243 case self::RETURN_PHP:
244 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
245 break;
246 case self::RETURN_EPIVALS:
247 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
248 break;
249 /// @todo log an error / throw / error-out on unsupported return type
250 case XMLParser::RETURN_XMLRPCVALS:
251 default:
252 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
255 xml_set_character_data_handler($parser, 'xmlrpc_cd');
256 xml_set_default_handler($parser, 'xmlrpc_dh');
258 try {
259 // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors
260 for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) {
261 $chunk = substr($data, $offset, $this->maxChunkLength);
262 // error handling: xml not well formed
263 if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) {
264 $errCode = xml_get_error_code($parser);
265 $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
266 xml_get_current_line_number($parser), xml_get_current_column_number($parser));
268 $this->_xh['isf'] = 3;
269 $this->_xh['isf_reason'] = $errStr;
271 // no need to parse further if we already have a fatal error
272 if ($this->_xh['isf'] >= 2) {
273 break;
276 /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times
277 } catch (\Exception $e) {
278 xml_parser_free($parser);
279 $this->current_parsing_options = array();
280 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
281 throw $e;
282 } catch (\Error $e) {
283 xml_parser_free($parser);
284 $this->current_parsing_options = array();
285 //$this->accept = $prevAccept;
286 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
287 throw $e;
290 xml_parser_free($parser);
291 $this->current_parsing_options = array();
293 return $this->_xh;
297 * xml parser handler function for opening element tags.
298 * @internal
300 * @param resource $parser
301 * @param string $name
302 * @param $attrs
303 * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
304 * @return void
306 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
307 * and remove the checking for $this->_xh['isf'] >= 2 everywhere
309 public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
311 // if invalid xml-rpc already detected, skip all processing
312 if ($this->_xh['isf'] >= 2) {
313 return;
316 // check for correct element nesting
317 if (count($this->_xh['stack']) == 0) {
318 // top level element can only be of 2 types
319 /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
320 /// there is only a single top level element in xml anyway
322 // BC
323 if ($acceptSingleVals === false) {
324 $accept = $this->current_parsing_options['accept'];
325 } else {
326 $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated');
327 $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE;
329 if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) ||
330 ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) ||
331 ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) ||
332 ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) {
333 $this->_xh['rt'] = strtolower($name);
334 } else {
335 $this->_xh['isf'] = 2;
336 $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
338 return;
340 } else {
341 // not top level element: see if parent is OK
342 $parent = end($this->_xh['stack']);
343 if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) {
344 $this->_xh['isf'] = 2;
345 $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
347 return;
351 switch ($name) {
352 // optimize for speed switch cases: most common cases first
353 case 'VALUE':
354 /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
355 $this->_xh['vt'] = 'value'; // indicator: no value found yet
356 $this->_xh['ac'] = '';
357 $this->_xh['lv'] = 1;
358 $this->_xh['php_class'] = null;
359 break;
361 case 'I8':
362 case 'EX:I8':
363 if (PHP_INT_SIZE === 4) {
364 // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
365 $this->_xh['isf'] = 2;
366 $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
368 return;
370 // fall through voluntarily
372 case 'I4':
373 case 'INT':
374 case 'STRING':
375 case 'BOOLEAN':
376 case 'DOUBLE':
377 case 'DATETIME.ISO8601':
378 case 'BASE64':
379 if ($this->_xh['vt'] != 'value') {
380 // two data elements inside a value: an error occurred!
381 $this->_xh['isf'] = 2;
382 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
384 return;
386 $this->_xh['ac'] = ''; // reset the accumulator
387 break;
389 case 'STRUCT':
390 case 'ARRAY':
391 if ($this->_xh['vt'] != 'value') {
392 // two data elements inside a value: an error occurred!
393 $this->_xh['isf'] = 2;
394 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
396 return;
398 // create an empty array to hold child values, and push it onto appropriate stack
399 $curVal = array(
400 'values' => array(),
401 'type' => $name,
403 // check for out-of-band information to rebuild php objs and, in case it is found, save it
404 if (@isset($attrs['PHP_CLASS'])) {
405 $curVal['php_class'] = $attrs['PHP_CLASS'];
407 $this->_xh['valuestack'][] = $curVal;
408 $this->_xh['vt'] = 'data'; // be prepared for a data element next
409 break;
411 case 'DATA':
412 if ($this->_xh['vt'] != 'data') {
413 // two data elements inside a value: an error occurred!
414 $this->_xh['isf'] = 2;
415 $this->_xh['isf_reason'] = "found two data elements inside an array element";
417 return;
420 case 'METHODCALL':
421 case 'METHODRESPONSE':
422 case 'PARAMS':
423 // valid elements that add little to processing
424 break;
426 case 'METHODNAME':
427 case 'NAME':
428 /// @todo we could check for 2 NAME elements inside a MEMBER element
429 $this->_xh['ac'] = '';
430 break;
432 case 'FAULT':
433 $this->_xh['isf'] = 1;
434 break;
436 case 'MEMBER':
437 // set member name to null, in case we do not find in the xml later on
438 $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null;
439 //$this->_xh['ac']='';
440 // Drop trough intentionally
442 case 'PARAM':
443 // clear value type, so we can check later if no value has been passed for this param/member
444 $this->_xh['vt'] = null;
445 break;
447 case 'NIL':
448 case 'EX:NIL':
449 if ($this->current_parsing_options['xmlrpc_null_extension']) {
450 if ($this->_xh['vt'] != 'value') {
451 // two data elements inside a value: an error occurred!
452 $this->_xh['isf'] = 2;
453 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
455 return;
457 // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs
458 $this->_xh['ac'] = '';
460 } else {
461 $this->_xh['isf'] = 2;
462 $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension';
464 return;
466 break;
468 default:
469 // INVALID ELEMENT: RAISE ISF so that it is later recognized
470 /// @todo feature creep = allow a callback instead
471 $this->_xh['isf'] = 2;
472 $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";
474 return;
477 // Save current element name to stack, to validate nesting
478 $this->_xh['stack'][] = $name;
480 /// @todo optimization creep: move this inside the big switch() above
481 if ($name != 'VALUE') {
482 $this->_xh['lv'] = 0;
487 * xml parser handler function for close element tags.
488 * @internal
490 * @param resource $parser
491 * @param string $name
492 * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
493 * @return void
494 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
496 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
497 * and remove the checking for $this->_xh['isf'] >= 2 everywhere
499 public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
501 if ($this->_xh['isf'] >= 2) {
502 return;
505 // push this element name from stack
506 // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.
507 // we also checked for proper nesting at start of elements...
508 $currElem = array_pop($this->_xh['stack']);
510 switch ($name) {
511 case 'VALUE':
512 // If no scalar was inside <VALUE></VALUE>, it was a string value
513 if ($this->_xh['vt'] == 'value') {
514 $this->_xh['value'] = $this->_xh['ac'];
515 $this->_xh['vt'] = Value::$xmlrpcString;
518 // in case there is charset conversion required, do it here, to catch both cases of string values
519 if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) {
520 $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8');
523 if ($rebuildXmlrpcvals > 0) {
524 // build the xml-rpc val out of the data received, and substitute it
525 $temp = new Value($this->_xh['value'], $this->_xh['vt']);
526 // in case we got info about underlying php class, save it in the object we're rebuilding
527 if (isset($this->_xh['php_class'])) {
528 $temp->_php_class = $this->_xh['php_class'];
530 $this->_xh['value'] = $temp;
531 } elseif ($rebuildXmlrpcvals < 0) {
532 if ($this->_xh['vt'] == Value::$xmlrpcDateTime) {
533 $this->_xh['value'] = (object)array(
534 'xmlrpc_type' => 'datetime',
535 'scalar' => $this->_xh['value'],
536 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value'])
538 } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) {
539 $this->_xh['value'] = (object)array(
540 'xmlrpc_type' => 'base64',
541 'scalar' => $this->_xh['value']
544 } else {
545 /// @todo this should handle php-serialized objects, since std deserializing is done
546 /// by php_xmlrpc_decode, which we will not be calling...
547 //if (isset($this->_xh['php_class'])) {
551 // check if we are inside an array or struct:
552 // if value just built is inside an array, let's move it into array on the stack
553 $vscount = count($this->_xh['valuestack']);
554 if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
555 $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value'];
557 break;
559 case 'STRING':
560 $this->_xh['vt'] = Value::$xmlrpcString;
561 $this->_xh['lv'] = 3; // indicate we've found a value
562 $this->_xh['value'] = $this->_xh['ac'];
563 break;
565 case 'BOOLEAN':
566 $this->_xh['vt'] = Value::$xmlrpcBoolean;
567 $this->_xh['lv'] = 3; // indicate we've found a value
568 // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted,
569 // even though the spec never mentions them (see e.g. Blogger api docs)
570 // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here
571 // Note the non-strict type check: it will allow ' 1 '
572 /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime.
573 /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and
574 /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are
575 /// accepted when deciding if a string numeric...)
576 if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) {
577 $this->_xh['value'] = true;
578 } else {
579 // log if receiving something strange, even though we set the value to false anyway
580 /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL
581 if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) {
582 if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' .
583 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
584 return;
587 $this->_xh['value'] = false;
589 break;
591 case 'EX:I8':
592 $name = 'i8';
593 // fall through voluntarily
594 case 'I4':
595 case 'I8':
596 case 'INT':
597 // NB: we build the Value object with the original xml element name found, except for ex:i8. The
598 // `Value::scalarTyp()` function will do some normalization of the data
599 $this->_xh['vt'] = strtolower($name);
600 $this->_xh['lv'] = 3; // indicate we've found a value
601 if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) {
602 if (!$this->handleParsingError('non numeric data received in INT value: ' .
603 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
604 return;
606 /// @todo: find a better way of reporting an error value than this! Use NaN?
607 $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
608 } else {
609 // it's ok, add it on
610 $this->_xh['value'] = (int)$this->_xh['ac'];
612 break;
614 case 'DOUBLE':
615 $this->_xh['vt'] = Value::$xmlrpcDouble;
616 $this->_xh['lv'] = 3; // indicate we've found a value
617 if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) {
618 if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' .
619 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
620 return;
623 $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND';
624 } else {
625 // it's ok, add it on
626 $this->_xh['value'] = (double)$this->_xh['ac'];
628 break;
630 case 'DATETIME.ISO8601':
631 $this->_xh['vt'] = Value::$xmlrpcDateTime;
632 $this->_xh['lv'] = 3; // indicate we've found a value
633 if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) {
634 if (!$this->handleParsingError('invalid data received in DATETIME value: ' .
635 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
636 return;
639 if ($this->current_parsing_options['xmlrpc_return_datetimes']) {
640 try {
641 $this->_xh['value'] = new \DateTime($this->_xh['ac']);
643 // the default regex used to validate the date string a few lines above should make this case impossible,
644 // but one never knows...
645 } catch(\Exception $e) {
646 // what to do? We can not guarantee that a valid date can be created. We return null...
647 if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' .
648 $e->getMessage(), __METHOD__)) {
649 return;
652 } else {
653 $this->_xh['value'] = $this->_xh['ac'];
655 break;
657 case 'BASE64':
658 $this->_xh['vt'] = Value::$xmlrpcBase64;
659 $this->_xh['lv'] = 3; // indicate we've found a value
660 if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
661 $v = base64_decode($this->_xh['ac'], true);
662 if ($v === false) {
663 $this->_xh['isf'] = 2;
664 $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']);
665 return;
667 } else {
668 $v = base64_decode($this->_xh['ac']);
669 if ($v === '' && $this->_xh['ac'] !== '') {
670 // only the empty string should decode to the empty string
671 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' .
672 $this->truncateValueForLog($this->_xh['ac']));
675 $this->_xh['value'] = $v;
676 break;
678 case 'NAME':
679 $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac'];
680 break;
682 case 'MEMBER':
683 // add to array in the stack the last element built, unless no VALUE or no NAME were found
684 if ($this->_xh['vt']) {
685 $vscount = count($this->_xh['valuestack']);
686 if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) {
687 if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) {
688 return;
690 $this->_xh['valuestack'][$vscount - 1]['name'] = '';
692 $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value'];
693 } else {
694 if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) {
695 return;
698 break;
700 case 'DATA':
701 $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
702 break;
704 case 'STRUCT':
705 case 'ARRAY':
706 // fetch out of stack array of values, and promote it to current value
707 $currVal = array_pop($this->_xh['valuestack']);
708 $this->_xh['value'] = $currVal['values'];
709 $this->_xh['vt'] = strtolower($name);
710 if (isset($currVal['php_class'])) {
711 $this->_xh['php_class'] = $currVal['php_class'];
713 break;
715 case 'PARAM':
716 // add to array of params the current value, unless no VALUE was found
717 /// @todo should we also check if there were two VALUE inside the PARAM?
718 if ($this->_xh['vt']) {
719 $this->_xh['params'][] = $this->_xh['value'];
720 $this->_xh['pt'][] = $this->_xh['vt'];
721 } else {
722 if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) {
723 return;
726 break;
728 case 'METHODNAME':
729 if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) {
730 if (!$this->handleParsingError('invalid data received in METHODNAME: '.
731 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) {
732 return;
735 $methodName = trim($this->_xh['ac']);
736 $this->_xh['method'] = $methodName;
737 // we allow the callback to f.e. give us back a mangled method name by manipulating $this
738 if (isset($this->current_parsing_options['methodname_callback'])) {
739 call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser);
741 break;
743 case 'NIL':
744 case 'EX:NIL':
745 // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant
746 //if ($this->current_parsing_options['xmlrpc_null_extension']) {
747 $this->_xh['vt'] = 'null';
748 $this->_xh['value'] = null;
749 $this->_xh['lv'] = 3;
751 break;
753 /// @todo add extra checking:
754 /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT
755 /// - FAULT should contain a single struct with the 2 expected members (check their name and type)
756 /// - METHODCALL should contain a methodname
757 case 'PARAMS':
758 case 'FAULT':
759 case 'METHODCALL':
760 case 'METHODRESPONSE':
761 break;
763 default:
764 // End of INVALID ELEMENT
765 // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se,
766 // $this->_xh['isf'] is set to 2...
767 break;
772 * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.
773 * @internal
775 * @param resource $parser
776 * @param string $name
777 * @return void
779 public function xmlrpc_ee_fast($parser, $name)
781 $this->xmlrpc_ee($parser, $name, 0);
785 * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
786 * @internal
788 * @param resource $parser
789 * @param string $name
790 * @return void
792 public function xmlrpc_ee_epi($parser, $name)
794 $this->xmlrpc_ee($parser, $name, -1);
798 * xml parser handler function for character data.
799 * @internal
801 * @param resource $parser
802 * @param string $data
803 * @return void
805 public function xmlrpc_cd($parser, $data)
807 // skip processing if xml fault already detected
808 if ($this->_xh['isf'] >= 2) {
809 return;
812 // "lookforvalue == 3" means that we've found an entire value and should discard any further character data
813 if ($this->_xh['lv'] != 3) {
814 $this->_xh['ac'] .= $data;
819 * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag.
820 * In fact, it only gets called on unknown entities...
821 * @internal
823 * @param $parser
824 * @param string data
825 * @return void
827 public function xmlrpc_dh($parser, $data)
829 // skip processing if xml fault already detected
830 if ($this->_xh['isf'] >= 2) {
831 return;
834 if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
835 $this->_xh['ac'] .= $data;
840 * xml charset encoding guessing helper function.
841 * Tries to determine the charset encoding of an XML chunk received over HTTP.
842 * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
843 * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,
844 * which will be most probably using UTF-8 anyway...
845 * In order of importance checks:
846 * 1. http headers
847 * 2. BOM
848 * 3. XML declaration
849 * 4. guesses using mb_detect_encoding()
851 * @param string $httpHeader the http Content-type header
852 * @param string $xmlChunk xml content buffer
853 * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
854 * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
855 * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
856 * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
858 * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
859 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make
860 * the method independent of global state
862 public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
864 // discussion: see http://www.yale.edu/pclt/encoding/
865 // 1 - test if encoding is specified in HTTP HEADERS
867 // Details:
868 // LWS: (\13\10)?( |\t)+
869 // token: (any char but excluded stuff)+
870 // quoted string: " (any char but double quotes and control chars)* "
871 // header: Content-type = ...; charset=value(; ...)*
872 // where value is of type token, no LWS allowed between 'charset' and value
873 // Note: we do not check for invalid chars in VALUE:
874 // this had better be done using pure ereg as below
875 // Note 2: we might be removing whitespace/tabs that ought to be left in if
876 // the received charset is a quoted string. But nobody uses such charset names...
878 /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
879 $matches = array();
880 if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
881 return strtoupper(trim($matches[1], " \t\""));
884 // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
885 // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
886 // NOTE: actually, according to the spec, even if we find the BOM and determine
887 // an encoding, we should check if there is an encoding specified
888 // in the xml declaration, and verify if they match.
889 /// @todo implement check as described above?
890 /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
891 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
892 return 'UCS-4';
893 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
894 return 'UTF-16';
895 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
896 return 'UTF-8';
899 // 3 - test if encoding is specified in the xml declaration
900 /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that
901 /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6.
902 /// For lower versions, we could attempt usage of mb_ereg...
903 // Details:
904 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
905 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
906 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
907 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
908 $xmlChunk, $matches)) {
909 return strtoupper(substr($matches[2], 1, -1));
912 // 4 - if mbstring is available, let it do the guesswork
913 if (function_exists('mb_detect_encoding')) {
914 if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) {
915 $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings;
917 if ($encodingPrefs) {
918 $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
919 } else {
920 $enc = mb_detect_encoding($xmlChunk);
922 // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
923 // IANA also likes better US-ASCII, so go with it
924 if ($enc == 'ASCII') {
925 $enc = 'US-' . $enc;
928 return $enc;
929 } else {
930 // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
931 // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
932 // this should be the standard. And we should be getting text/xml as request and response.
933 // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
934 return PhpXmlRpc::$xmlrpc_defencoding;
939 * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).
941 * @param string $xmlChunk
942 * @return bool
944 * @todo rename to hasEncodingDeclaration
946 public static function hasEncoding($xmlChunk)
948 // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
949 // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
950 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
951 return true;
952 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
953 return true;
954 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
955 return true;
958 // test if encoding is specified in the xml declaration
959 // Details:
960 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
961 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
962 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
963 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
964 $xmlChunk)) {
965 return true;
968 return false;
972 * @param string $message
973 * @param string $method method/file/line info
974 * @return bool false if the caller has to stop parsing
976 protected function handleParsingError($message, $method = '')
978 if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) {
979 $this->_xh['isf'] = 2;
980 $this->_xh['isf_reason'] = ucfirst($message);
981 return false;
982 } else {
983 $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message);
984 return true;
989 * Truncates unsafe data
990 * @param string $data
991 * @return string
993 protected function truncateValueForLog($data)
995 if (strlen($data) > $this->maxLogValueLength) {
996 return substr($data, 0, $this->maxLogValueLength - 3) . '...';
999 return $data;
1002 // *** BC layer ***
1005 * xml parser handler function for opening element tags.
1006 * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses.
1007 * @deprecated
1009 * @param resource $parser
1010 * @param $name
1011 * @param $attrs
1012 * @return void
1014 public function xmlrpc_se_any($parser, $name, $attrs)
1016 // this will be spamming the log if this method is in use...
1017 $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated');
1019 $this->xmlrpc_se($parser, $name, $attrs, true);
1022 public function &__get($name)
1024 switch ($name) {
1025 case '_xh':
1026 case 'xmlrpc_valid_parents':
1027 $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated');
1028 return $this->$name;
1029 default:
1030 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1031 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1032 trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
1033 $result = null;
1034 return $result;
1038 public function __set($name, $value)
1040 switch ($name) {
1041 // this should only ever be called by subclasses which overtook `parse()`
1042 case 'accept':
1043 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1044 $this->current_parsing_options['accept'] = $value;
1045 break;
1046 case '_xh':
1047 case 'xmlrpc_valid_parents':
1048 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1049 $this->$name = $value;
1050 break;
1051 default:
1052 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1053 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1054 trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);
1058 public function __isset($name)
1060 switch ($name) {
1061 case 'accept':
1062 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1063 return isset($this->current_parsing_options['accept']);
1064 case '_xh':
1065 case 'xmlrpc_valid_parents':
1066 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1067 return isset($this->$name);
1068 default:
1069 return false;
1073 public function __unset($name)
1075 switch ($name) {
1076 // q: does this make sense at all?
1077 case 'accept':
1078 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1079 unset($this->current_parsing_options['accept']);
1080 break;
1081 case '_xh':
1082 case 'xmlrpc_valid_parents':
1083 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1084 unset($this->$name);
1085 break;
1086 default:
1087 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1088 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1);
1089 trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING);