3 namespace PhpXmlRpc\Helper
;
5 use PhpXmlRpc\PhpXmlRpc
;
6 use PhpXmlRpc\Traits\DeprecationLogger
;
10 * Deals with parsing the XML.
11 * @see http://xmlrpc.com/spec.md
13 * @todo implement an interface to allow for alternative implementations
14 * - make access to $_xh protected, return more high-level data structures
15 * - move the private parts of $_xh to the internal-use parsing-options config
16 * - add parseRequest, parseResponse, parseValue methods
17 * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding
18 * while parsing, which is faster than doing it later by going over the rebuilt data structure
19 * @todo rename? This is an xml-rpc parser, not a generic xml parser...
21 * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC
22 * @property int $accept deprecated - (protected) access left in purely for BC
26 use DeprecationLogger
;
28 const RETURN_XMLRPCVALS
= 'xmlrpcvals';
29 const RETURN_EPIVALS
= 'epivals';
30 const RETURN_PHP
= 'phpvals';
32 const ACCEPT_REQUEST
= 1;
33 const ACCEPT_RESPONSE
= 2;
34 const ACCEPT_VALUE
= 4;
35 const ACCEPT_FAULT
= 8;
39 * The max length beyond which data will get truncated in error messages
41 protected $maxLogValueLength = 100;
45 * Used to store state during parsing and to pass parsing results to callers.
46 * Quick explanation of components:
48 * ac - used to accumulate values
49 * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements
50 * valuestack - array used for parsing arrays and structs
51 * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings
52 * (values: 0=not looking, 1=looking, 3=found)
54 * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3)
55 * isf_reason - used for storing xml-rpc response fault string
56 * value - used to store the value in responses
57 * method - used to store method name in requests
58 * params - used to store parameters in requests
59 * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values
60 * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode)
62 protected $_xh = array(
65 'valuestack' => array(),
79 protected $xmlrpc_valid_parents = array(
80 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'),
81 'BOOLEAN' => array('VALUE'),
82 'I4' => array('VALUE'),
83 'I8' => array('VALUE'),
84 'EX:I8' => array('VALUE'),
85 'INT' => array('VALUE'),
86 'STRING' => array('VALUE'),
87 'DOUBLE' => array('VALUE'),
88 'DATETIME.ISO8601' => array('VALUE'),
89 'BASE64' => array('VALUE'),
90 'MEMBER' => array('STRUCT'),
91 'NAME' => array('MEMBER'),
92 'DATA' => array('ARRAY'),
93 'ARRAY' => array('VALUE'),
94 'STRUCT' => array('VALUE'),
95 'PARAM' => array('PARAMS'),
96 'METHODNAME' => array('METHODCALL'),
97 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'),
98 'FAULT' => array('METHODRESPONSE'),
99 'NIL' => array('VALUE'), // only used when extension activated
100 'EX:NIL' => array('VALUE'), // only used when extension activated
103 /** @var array $parsing_options */
104 protected $parsing_options = array();
106 /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */
107 //protected $accept = 3;
109 /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */
110 protected $maxChunkLength = 4194304;
112 * Used keys: accept, target_charset, methodname_callback, plus the ones set here.
113 * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not
114 * the element handler methods
116 protected $current_parsing_options = array(
117 'xmlrpc_null_extension' => false,
118 'xmlrpc_return_datetimes' => false,
119 'xmlrpc_reject_invalid_values' => false
123 * @param array $options integer keys: options passed to the inner xml parser
125 * - target_charset (string)
126 * - methodname_callback (callable)
127 * - xmlrpc_null_extension (bool)
128 * - xmlrpc_return_datetimes (bool)
129 * - xmlrpc_reject_invalid_values (bool)
131 public function __construct(array $options = array())
133 $this->parsing_options
= $options;
137 * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh'].
138 * Logs to the error log any issues which do not cause the parsing to fail.
140 * @param string $data
141 * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS
142 * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE
143 * @param array $options integer-key options are passed to the xml parser, string-key options are used independently.
144 * These options are added to options received in the constructor.
145 * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values
146 * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used
147 * @return array see the definition of $this->_xh for the meaning of the results
148 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
150 * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options
151 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so
152 * that parsing will be completely independent of global state. Note that it might incur a small perf hit...
154 public function parse($data, $returnType = self
::RETURN_XMLRPCVALS
, $accept = 3, $options = array())
159 'valuestack' => array(),
164 'method' => false, // so we can check later if we got a methodname or not
170 $len = strlen($data);
172 // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below
174 $this->_xh
['isf'] = 3;
175 $this->_xh
['isf_reason'] = 'XML error 5: empty document';
179 $this->current_parsing_options
= array('accept' => $accept);
181 $mergedOptions = $this->parsing_options
;
182 foreach ($options as $key => $val) {
183 $mergedOptions[$key] = $val;
186 foreach ($mergedOptions as $key => $val) {
187 // q: can php be built without ctype? should we use a regexp?
188 if (is_string($key) && !ctype_digit($key)) {
189 /// @todo on invalid options, throw/error-out instead of logging an error message?
191 case 'target_charset':
192 if (function_exists('mb_convert_encoding')) {
193 $this->current_parsing_options
['target_charset'] = $val;
195 $this->getLogger()->error('XML-RPC: ' . __METHOD__
. ": 'target_charset' option is unsupported without mbstring");
199 case 'methodname_callback':
200 if (is_callable($val)) {
201 $this->current_parsing_options
['methodname_callback'] = $val;
203 $this->getLogger()->error('XML-RPC: ' . __METHOD__
. ": Callback passed as 'methodname_callback' is not callable");
207 case 'xmlrpc_null_extension':
208 case 'xmlrpc_return_datetimes':
209 case 'xmlrpc_reject_invalid_values':
210 $this->current_parsing_options
[$key] = $val;
214 $this->getLogger()->error('XML-RPC: ' . __METHOD__
. ": unsupported option: $key");
216 unset($mergedOptions[$key]);
220 if (!isset($this->current_parsing_options
['xmlrpc_null_extension'])) {
221 $this->current_parsing_options
['xmlrpc_null_extension'] = PhpXmlRpc
::$xmlrpc_null_extension;
223 if (!isset($this->current_parsing_options
['xmlrpc_return_datetimes'])) {
224 $this->current_parsing_options
['xmlrpc_return_datetimes'] = PhpXmlRpc
::$xmlrpc_return_datetimes;
226 if (!isset($this->current_parsing_options
['xmlrpc_reject_invalid_values'])) {
227 $this->current_parsing_options
['xmlrpc_reject_invalid_values'] = PhpXmlRpc
::$xmlrpc_reject_invalid_values;
230 // NB: we use '' instead of null to force charset detection from the xml declaration
231 $parser = xml_parser_create('');
233 foreach ($mergedOptions as $key => $val) {
234 xml_parser_set_option($parser, $key, $val);
237 // always set this, in case someone tries to disable it via options...
238 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING
, 1);
240 xml_set_object($parser, $this);
242 switch ($returnType) {
243 case self
::RETURN_PHP
:
244 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast');
246 case self
::RETURN_EPIVALS
:
247 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi');
249 /// @todo log an error / throw / error-out on unsupported return type
250 case XMLParser
::RETURN_XMLRPCVALS
:
252 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee');
255 xml_set_character_data_handler($parser, 'xmlrpc_cd');
256 xml_set_default_handler($parser, 'xmlrpc_dh');
259 // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors
260 for ($offset = 0; $offset < $len; $offset +
= $this->maxChunkLength
) {
261 $chunk = substr($data, $offset, $this->maxChunkLength
);
262 // error handling: xml not well formed
263 if (!xml_parse($parser, $chunk, $offset +
$this->maxChunkLength
>= $len)) {
264 $errCode = xml_get_error_code($parser);
265 $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode),
266 xml_get_current_line_number($parser), xml_get_current_column_number($parser));
268 $this->_xh
['isf'] = 3;
269 $this->_xh
['isf_reason'] = $errStr;
271 // no need to parse further if we already have a fatal error
272 if ($this->_xh
['isf'] >= 2) {
276 /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times
277 } catch (\Exception
$e) {
278 xml_parser_free($parser);
279 $this->current_parsing_options
= array();
280 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
282 } catch (\Error
$e) {
283 xml_parser_free($parser);
284 $this->current_parsing_options
= array();
285 //$this->accept = $prevAccept;
286 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ?
290 xml_parser_free($parser);
291 $this->current_parsing_options
= array();
297 * xml parser handler function for opening element tags.
300 * @param resource $parser
301 * @param string $name
303 * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead
306 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
307 * and remove the checking for $this->_xh['isf'] >= 2 everywhere
309 public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false)
311 // if invalid xml-rpc already detected, skip all processing
312 if ($this->_xh
['isf'] >= 2) {
316 // check for correct element nesting
317 if (count($this->_xh
['stack']) == 0) {
318 // top level element can only be of 2 types
319 /// @todo optimization creep: save this check into a bool variable, instead of using count() every time:
320 /// there is only a single top level element in xml anyway
323 if ($acceptSingleVals === false) {
324 $accept = $this->current_parsing_options
['accept'];
326 $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__
. ' is deprecated');
327 $accept = self
::ACCEPT_REQUEST | self
::ACCEPT_RESPONSE | self
::ACCEPT_VALUE
;
329 if (($name == 'METHODCALL' && ($accept & self
::ACCEPT_REQUEST
)) ||
330 ($name == 'METHODRESPONSE' && ($accept & self
::ACCEPT_RESPONSE
)) ||
331 ($name == 'VALUE' && ($accept & self
::ACCEPT_VALUE
)) ||
332 ($name == 'FAULT' && ($accept & self
::ACCEPT_FAULT
))) {
333 $this->_xh
['rt'] = strtolower($name);
335 $this->_xh
['isf'] = 2;
336 $this->_xh
['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;
341 // not top level element: see if parent is OK
342 $parent = end($this->_xh
['stack']);
343 if (!array_key_exists($name, $this->xmlrpc_valid_parents
) ||
!in_array($parent, $this->xmlrpc_valid_parents
[$name])) {
344 $this->_xh
['isf'] = 2;
345 $this->_xh
['isf_reason'] = "xmlrpc element $name cannot be child of $parent";
352 // optimize for speed switch cases: most common cases first
354 /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element
355 $this->_xh
['vt'] = 'value'; // indicator: no value found yet
356 $this->_xh
['ac'] = '';
357 $this->_xh
['lv'] = 1;
358 $this->_xh
['php_class'] = null;
363 if (PHP_INT_SIZE
=== 4) {
364 // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!
365 $this->_xh
['isf'] = 2;
366 $this->_xh
['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode";
370 // fall through voluntarily
377 case 'DATETIME.ISO8601':
379 if ($this->_xh
['vt'] != 'value') {
380 // two data elements inside a value: an error occurred!
381 $this->_xh
['isf'] = 2;
382 $this->_xh
['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
386 $this->_xh
['ac'] = ''; // reset the accumulator
391 if ($this->_xh
['vt'] != 'value') {
392 // two data elements inside a value: an error occurred!
393 $this->_xh
['isf'] = 2;
394 $this->_xh
['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
398 // create an empty array to hold child values, and push it onto appropriate stack
403 // check for out-of-band information to rebuild php objs and, in case it is found, save it
404 if (@isset
($attrs['PHP_CLASS'])) {
405 $curVal['php_class'] = $attrs['PHP_CLASS'];
407 $this->_xh
['valuestack'][] = $curVal;
408 $this->_xh
['vt'] = 'data'; // be prepared for a data element next
412 if ($this->_xh
['vt'] != 'data') {
413 // two data elements inside a value: an error occurred!
414 $this->_xh
['isf'] = 2;
415 $this->_xh
['isf_reason'] = "found two data elements inside an array element";
421 case 'METHODRESPONSE':
423 // valid elements that add little to processing
428 /// @todo we could check for 2 NAME elements inside a MEMBER element
429 $this->_xh
['ac'] = '';
433 $this->_xh
['isf'] = 1;
437 // set member name to null, in case we do not find in the xml later on
438 $this->_xh
['valuestack'][count($this->_xh
['valuestack']) - 1]['name'] = null;
439 //$this->_xh['ac']='';
440 // Drop trough intentionally
443 // clear value type, so we can check later if no value has been passed for this param/member
444 $this->_xh
['vt'] = null;
449 if ($this->current_parsing_options
['xmlrpc_null_extension']) {
450 if ($this->_xh
['vt'] != 'value') {
451 // two data elements inside a value: an error occurred!
452 $this->_xh
['isf'] = 2;
453 $this->_xh
['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value";
457 // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs
458 $this->_xh
['ac'] = '';
461 $this->_xh
['isf'] = 2;
462 $this->_xh
['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension';
469 // INVALID ELEMENT: RAISE ISF so that it is later recognized
470 /// @todo feature creep = allow a callback instead
471 $this->_xh
['isf'] = 2;
472 $this->_xh
['isf_reason'] = "found not-xmlrpc xml element $name";
477 // Save current element name to stack, to validate nesting
478 $this->_xh
['stack'][] = $name;
480 /// @todo optimization creep: move this inside the big switch() above
481 if ($name != 'VALUE') {
482 $this->_xh
['lv'] = 0;
487 * xml parser handler function for close element tags.
490 * @param resource $parser
491 * @param string $name
492 * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility
494 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions)
496 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing
497 * and remove the checking for $this->_xh['isf'] >= 2 everywhere
499 public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1)
501 if ($this->_xh
['isf'] >= 2) {
505 // push this element name from stack
506 // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.
507 // we also checked for proper nesting at start of elements...
508 $currElem = array_pop($this->_xh
['stack']);
512 // If no scalar was inside <VALUE></VALUE>, it was a string value
513 if ($this->_xh
['vt'] == 'value') {
514 $this->_xh
['value'] = $this->_xh
['ac'];
515 $this->_xh
['vt'] = Value
::$xmlrpcString;
518 // in case there is charset conversion required, do it here, to catch both cases of string values
519 if (isset($this->current_parsing_options
['target_charset']) && $this->_xh
['vt'] === Value
::$xmlrpcString) {
520 $this->_xh
['value'] = mb_convert_encoding($this->_xh
['value'], $this->current_parsing_options
['target_charset'], 'UTF-8');
523 if ($rebuildXmlrpcvals > 0) {
524 // build the xml-rpc val out of the data received, and substitute it
525 $temp = new Value($this->_xh
['value'], $this->_xh
['vt']);
526 // in case we got info about underlying php class, save it in the object we're rebuilding
527 if (isset($this->_xh
['php_class'])) {
528 $temp->_php_class
= $this->_xh
['php_class'];
530 $this->_xh
['value'] = $temp;
531 } elseif ($rebuildXmlrpcvals < 0) {
532 if ($this->_xh
['vt'] == Value
::$xmlrpcDateTime) {
533 $this->_xh
['value'] = (object)array(
534 'xmlrpc_type' => 'datetime',
535 'scalar' => $this->_xh
['value'],
536 'timestamp' => \PhpXmlRpc\Helper\Date
::iso8601Decode($this->_xh
['value'])
538 } elseif ($this->_xh
['vt'] == Value
::$xmlrpcBase64) {
539 $this->_xh
['value'] = (object)array(
540 'xmlrpc_type' => 'base64',
541 'scalar' => $this->_xh
['value']
545 /// @todo this should handle php-serialized objects, since std deserializing is done
546 /// by php_xmlrpc_decode, which we will not be calling...
547 //if (isset($this->_xh['php_class'])) {
551 // check if we are inside an array or struct:
552 // if value just built is inside an array, let's move it into array on the stack
553 $vscount = count($this->_xh
['valuestack']);
554 if ($vscount && $this->_xh
['valuestack'][$vscount - 1]['type'] == 'ARRAY') {
555 $this->_xh
['valuestack'][$vscount - 1]['values'][] = $this->_xh
['value'];
560 $this->_xh
['vt'] = Value
::$xmlrpcString;
561 $this->_xh
['lv'] = 3; // indicate we've found a value
562 $this->_xh
['value'] = $this->_xh
['ac'];
566 $this->_xh
['vt'] = Value
::$xmlrpcBoolean;
567 $this->_xh
['lv'] = 3; // indicate we've found a value
568 // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted,
569 // even though the spec never mentions them (see e.g. Blogger api docs)
570 // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here
571 // Note the non-strict type check: it will allow ' 1 '
572 /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime.
573 /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and
574 /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are
575 /// accepted when deciding if a string numeric...)
576 if ($this->_xh
['ac'] == '1' ||
strcasecmp($this->_xh
['ac'], 'true') === 0) {
577 $this->_xh
['value'] = true;
579 // log if receiving something strange, even though we set the value to false anyway
580 /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL
581 if ($this->_xh
['ac'] != '0' && strcasecmp($this->_xh
['ac'], 'false') !== 0) {
582 if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' .
583 $this->truncateValueForLog($this->_xh
['ac']), __METHOD__
)) {
587 $this->_xh
['value'] = false;
593 // fall through voluntarily
597 // NB: we build the Value object with the original xml element name found, except for ex:i8. The
598 // `Value::scalarTyp()` function will do some normalization of the data
599 $this->_xh
['vt'] = strtolower($name);
600 $this->_xh
['lv'] = 3; // indicate we've found a value
601 if (!preg_match(PhpXmlRpc
::$xmlrpc_int_format, $this->_xh
['ac'])) {
602 if (!$this->handleParsingError('non numeric data received in INT value: ' .
603 $this->truncateValueForLog($this->_xh
['ac']), __METHOD__
)) {
606 /// @todo: find a better way of reporting an error value than this! Use NaN?
607 $this->_xh
['value'] = 'ERROR_NON_NUMERIC_FOUND';
609 // it's ok, add it on
610 $this->_xh
['value'] = (int)$this->_xh
['ac'];
615 $this->_xh
['vt'] = Value
::$xmlrpcDouble;
616 $this->_xh
['lv'] = 3; // indicate we've found a value
617 if (!preg_match(PhpXmlRpc
::$xmlrpc_double_format, $this->_xh
['ac'])) {
618 if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' .
619 $this->truncateValueForLog($this->_xh
['ac']), __METHOD__
)) {
623 $this->_xh
['value'] = 'ERROR_NON_NUMERIC_FOUND';
625 // it's ok, add it on
626 $this->_xh
['value'] = (double)$this->_xh
['ac'];
630 case 'DATETIME.ISO8601':
631 $this->_xh
['vt'] = Value
::$xmlrpcDateTime;
632 $this->_xh
['lv'] = 3; // indicate we've found a value
633 if (!preg_match(PhpXmlRpc
::$xmlrpc_datetime_format, $this->_xh
['ac'])) {
634 if (!$this->handleParsingError('invalid data received in DATETIME value: ' .
635 $this->truncateValueForLog($this->_xh
['ac']), __METHOD__
)) {
639 if ($this->current_parsing_options
['xmlrpc_return_datetimes']) {
641 $this->_xh
['value'] = new \
DateTime($this->_xh
['ac']);
643 // the default regex used to validate the date string a few lines above should make this case impossible,
644 // but one never knows...
645 } catch(\Exception
$e) {
646 // what to do? We can not guarantee that a valid date can be created. We return null...
647 if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' .
648 $e->getMessage(), __METHOD__
)) {
653 $this->_xh
['value'] = $this->_xh
['ac'];
658 $this->_xh
['vt'] = Value
::$xmlrpcBase64;
659 $this->_xh
['lv'] = 3; // indicate we've found a value
660 if ($this->current_parsing_options
['xmlrpc_reject_invalid_values']) {
661 $v = base64_decode($this->_xh
['ac'], true);
663 $this->_xh
['isf'] = 2;
664 $this->_xh
['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh
['ac']);
668 $v = base64_decode($this->_xh
['ac']);
669 if ($v === '' && $this->_xh
['ac'] !== '') {
670 // only the empty string should decode to the empty string
671 $this->getLogger()->error('XML-RPC: ' . __METHOD__
. ': invalid data received in BASE64 value: ' .
672 $this->truncateValueForLog($this->_xh
['ac']));
675 $this->_xh
['value'] = $v;
679 $this->_xh
['valuestack'][count($this->_xh
['valuestack']) - 1]['name'] = $this->_xh
['ac'];
683 // add to array in the stack the last element built, unless no VALUE or no NAME were found
684 if ($this->_xh
['vt']) {
685 $vscount = count($this->_xh
['valuestack']);
686 if ($this->_xh
['valuestack'][$vscount - 1]['name'] === null) {
687 if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__
)) {
690 $this->_xh
['valuestack'][$vscount - 1]['name'] = '';
692 $this->_xh
['valuestack'][$vscount - 1]['values'][$this->_xh
['valuestack'][$vscount - 1]['name']] = $this->_xh
['value'];
694 if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__
)) {
701 $this->_xh
['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty
706 // fetch out of stack array of values, and promote it to current value
707 $currVal = array_pop($this->_xh
['valuestack']);
708 $this->_xh
['value'] = $currVal['values'];
709 $this->_xh
['vt'] = strtolower($name);
710 if (isset($currVal['php_class'])) {
711 $this->_xh
['php_class'] = $currVal['php_class'];
716 // add to array of params the current value, unless no VALUE was found
717 /// @todo should we also check if there were two VALUE inside the PARAM?
718 if ($this->_xh
['vt']) {
719 $this->_xh
['params'][] = $this->_xh
['value'];
720 $this->_xh
['pt'][] = $this->_xh
['vt'];
722 if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__
)) {
729 if (!preg_match(PhpXmlRpc
::$xmlrpc_methodname_format, $this->_xh
['ac'])) {
730 if (!$this->handleParsingError('invalid data received in METHODNAME: '.
731 $this->truncateValueForLog($this->_xh
['ac']), __METHOD__
)) {
735 $methodName = trim($this->_xh
['ac']);
736 $this->_xh
['method'] = $methodName;
737 // we allow the callback to f.e. give us back a mangled method name by manipulating $this
738 if (isset($this->current_parsing_options
['methodname_callback'])) {
739 call_user_func($this->current_parsing_options
['methodname_callback'], $methodName, $this, $parser);
745 // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant
746 //if ($this->current_parsing_options['xmlrpc_null_extension']) {
747 $this->_xh
['vt'] = 'null';
748 $this->_xh
['value'] = null;
749 $this->_xh
['lv'] = 3;
753 /// @todo add extra checking:
754 /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT
755 /// - FAULT should contain a single struct with the 2 expected members (check their name and type)
756 /// - METHODCALL should contain a methodname
760 case 'METHODRESPONSE':
764 // End of INVALID ELEMENT
765 // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se,
766 // $this->_xh['isf'] is set to 2...
772 * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.
775 * @param resource $parser
776 * @param string $name
779 public function xmlrpc_ee_fast($parser, $name)
781 $this->xmlrpc_ee($parser, $name, 0);
785 * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).
788 * @param resource $parser
789 * @param string $name
792 public function xmlrpc_ee_epi($parser, $name)
794 $this->xmlrpc_ee($parser, $name, -1);
798 * xml parser handler function for character data.
801 * @param resource $parser
802 * @param string $data
805 public function xmlrpc_cd($parser, $data)
807 // skip processing if xml fault already detected
808 if ($this->_xh
['isf'] >= 2) {
812 // "lookforvalue == 3" means that we've found an entire value and should discard any further character data
813 if ($this->_xh
['lv'] != 3) {
814 $this->_xh
['ac'] .= $data;
819 * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag.
820 * In fact, it only gets called on unknown entities...
827 public function xmlrpc_dh($parser, $data)
829 // skip processing if xml fault already detected
830 if ($this->_xh
['isf'] >= 2) {
834 if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') {
835 $this->_xh
['ac'] .= $data;
840 * xml charset encoding guessing helper function.
841 * Tries to determine the charset encoding of an XML chunk received over HTTP.
842 * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,
843 * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,
844 * which will be most probably using UTF-8 anyway...
845 * In order of importance checks:
849 * 4. guesses using mb_detect_encoding()
851 * @param string $httpHeader the http Content-type header
852 * @param string $xmlChunk xml content buffer
853 * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled).
854 * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings
855 * @return string the encoding determined. Null if it can't be determined and mbstring is enabled,
856 * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled
858 * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!
859 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make
860 * the method independent of global state
862 public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null)
864 // discussion: see http://www.yale.edu/pclt/encoding/
865 // 1 - test if encoding is specified in HTTP HEADERS
868 // LWS: (\13\10)?( |\t)+
869 // token: (any char but excluded stuff)+
870 // quoted string: " (any char but double quotes and control chars)* "
871 // header: Content-type = ...; charset=value(; ...)*
872 // where value is of type token, no LWS allowed between 'charset' and value
873 // Note: we do not check for invalid chars in VALUE:
874 // this had better be done using pure ereg as below
875 // Note 2: we might be removing whitespace/tabs that ought to be left in if
876 // the received charset is a quoted string. But nobody uses such charset names...
878 /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it?
880 if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) {
881 return strtoupper(trim($matches[1], " \t\""));
884 // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern
885 // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
886 // NOTE: actually, according to the spec, even if we find the BOM and determine
887 // an encoding, we should check if there is an encoding specified
888 // in the xml declaration, and verify if they match.
889 /// @todo implement check as described above?
890 /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)
891 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
893 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
895 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
899 // 3 - test if encoding is specified in the xml declaration
900 /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that
901 /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6.
902 /// For lower versions, we could attempt usage of mb_ereg...
904 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
905 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
906 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
907 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
908 $xmlChunk, $matches)) {
909 return strtoupper(substr($matches[2], 1, -1));
912 // 4 - if mbstring is available, let it do the guesswork
913 if (function_exists('mb_detect_encoding')) {
914 if ($encodingPrefs == null && PhpXmlRpc
::$xmlrpc_detectencodings != null) {
915 $encodingPrefs = PhpXmlRpc
::$xmlrpc_detectencodings;
917 if ($encodingPrefs) {
918 $enc = mb_detect_encoding($xmlChunk, $encodingPrefs);
920 $enc = mb_detect_encoding($xmlChunk);
922 // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII...
923 // IANA also likes better US-ASCII, so go with it
924 if ($enc == 'ASCII') {
930 // no encoding specified: as per HTTP1.1 assume it is iso-8859-1?
931 // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types
932 // this should be the standard. And we should be getting text/xml as request and response.
933 // BUT we have to be backward compatible with the lib, which always used UTF-8 as default...
934 return PhpXmlRpc
::$xmlrpc_defencoding;
939 * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).
941 * @param string $xmlChunk
944 * @todo rename to hasEncodingDeclaration
946 public static function hasEncoding($xmlChunk)
948 // scan the first bytes of the data for a UTF-16 (or other) BOM pattern
949 // (source: http://www.w3.org/TR/2000/REC-xml-20001006)
950 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {
952 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {
954 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {
958 // test if encoding is specified in the xml declaration
960 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+
961 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]*
962 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" .
963 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",
972 * @param string $message
973 * @param string $method method/file/line info
974 * @return bool false if the caller has to stop parsing
976 protected function handleParsingError($message, $method = '')
978 if ($this->current_parsing_options
['xmlrpc_reject_invalid_values']) {
979 $this->_xh
['isf'] = 2;
980 $this->_xh
['isf_reason'] = ucfirst($message);
983 $this->getLogger()->error('XML-RPC: ' . ($method != '' ?
$method . ': ' : '') . $message);
989 * Truncates unsafe data
990 * @param string $data
993 protected function truncateValueForLog($data)
995 if (strlen($data) > $this->maxLogValueLength
) {
996 return substr($data, 0, $this->maxLogValueLength
- 3) . '...';
1005 * xml parser handler function for opening element tags.
1006 * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses.
1009 * @param resource $parser
1014 public function xmlrpc_se_any($parser, $name, $attrs)
1016 // this will be spamming the log if this method is in use...
1017 $this->logDeprecation('Method ' . __METHOD__
. ' is deprecated');
1019 $this->xmlrpc_se($parser, $name, $attrs, true);
1022 public function &__get($name)
1026 case 'xmlrpc_valid_parents':
1027 $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated');
1028 return $this->$name;
1030 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1031 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS
, 1);
1032 trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING
);
1038 public function __set($name, $value)
1041 // this should only ever be called by subclasses which overtook `parse()`
1043 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1044 $this->current_parsing_options
['accept'] = $value;
1047 case 'xmlrpc_valid_parents':
1048 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated');
1049 $this->$name = $value;
1052 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1053 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS
, 1);
1054 trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING
);
1058 public function __isset($name)
1062 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1063 return isset($this->current_parsing_options
['accept']);
1065 case 'xmlrpc_valid_parents':
1066 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated');
1067 return isset($this->$name);
1073 public function __unset($name)
1076 // q: does this make sense at all?
1078 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1079 unset($this->current_parsing_options
['accept']);
1082 case 'xmlrpc_valid_parents':
1083 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated');
1084 unset($this->$name);
1087 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout...
1088 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS
, 1);
1089 trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING
);