3 * Zend Framework (http://framework.zend.com/)
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license http://framework.zend.com/license/new-bsd New BSD License
16 const ENTITY_DETECT
= 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks';
19 * Heuristic scan to detect entity in XML
22 * @throws Exception\RuntimeException If entity expansion or external entity declaration was discovered.
24 protected static function heuristicScan($xml)
26 foreach (self
::getEntityComparison($xml) as $compare) {
27 if (strpos($xml, $compare) !== false) {
28 throw new Exception\
RuntimeException(self
::ENTITY_DETECT
);
34 * Scan XML string for potential XXE and XEE attacks
37 * @param DomDocument $dom
38 * @throws Exception\RuntimeException
39 * @return SimpleXMLElement|DomDocument|boolean
41 public static function scan($xml, DOMDocument
$dom = null)
43 // If running with PHP-FPM we perform an heuristic scan
44 // We cannot use libxml_disable_entity_loader because of this bug
45 // @see https://bugs.php.net/bug.php?id=64938
46 if (self
::isPhpFpm()) {
47 self
::heuristicScan($xml);
52 $dom = new DOMDocument();
55 if (!self
::isPhpFpm()) {
56 $loadEntities = libxml_disable_entity_loader(true);
57 $useInternalXmlErrors = libxml_use_internal_errors(true);
60 // Load XML with network access disabled (LIBXML_NONET)
61 // error disabled with @ for PHP-FPM scenario
62 set_error_handler(function ($errno, $errstr) {
63 if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
68 $result = $dom->loadXml($xml, LIBXML_NONET
);
69 restore_error_handler();
72 // Entity load to previous setting
73 if (!self
::isPhpFpm()) {
74 libxml_disable_entity_loader($loadEntities);
75 libxml_use_internal_errors($useInternalXmlErrors);
80 // Scan for potential XEE attacks using ENTITY, if not PHP-FPM
81 if (!self
::isPhpFpm()) {
82 foreach ($dom->childNodes
as $child) {
83 if ($child->nodeType
=== XML_DOCUMENT_TYPE_NODE
) {
84 if ($child->entities
->length
> 0) {
85 throw new Exception\
RuntimeException(self
::ENTITY_DETECT
);
91 // Entity load to previous setting
92 if (!self
::isPhpFpm()) {
93 libxml_disable_entity_loader($loadEntities);
94 libxml_use_internal_errors($useInternalXmlErrors);
97 if (isset($simpleXml)) {
98 $result = simplexml_import_dom($dom);
99 if (!$result instanceof SimpleXMLElement
) {
108 * Scan XML file for potential XXE/XEE attacks
110 * @param string $file
111 * @param DOMDocument $dom
112 * @throws Exception\InvalidArgumentException
113 * @return SimpleXMLElement|DomDocument
115 public static function scanFile($file, DOMDocument
$dom = null)
117 if (!file_exists($file)) {
118 throw new Exception\
InvalidArgumentException(
119 "The file $file specified doesn't exist"
122 return self
::scan(file_get_contents($file), $dom);
126 * Return true if PHP is running with PHP-FPM
128 * This method is mainly used to determine whether or not heuristic checks
129 * (vs libxml checks) should be made, due to threading issues in libxml;
130 * under php-fpm, threading becomes a concern.
132 * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
133 * libxml support in PHP that makes the libxml checks viable; in such
134 * versions, this method will return false to enforce those checks, which
135 * are more strict and accurate than the heuristic checks.
139 public static function isPhpFpm()
141 $isVulnerableVersion = (
142 version_compare(PHP_VERSION
, '5.5.22', 'lt')
144 version_compare(PHP_VERSION
, '5.6', 'gte')
145 && version_compare(PHP_VERSION
, '5.6.6', 'lt')
149 if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) {
156 * Determine and return the string(s) to use for the <!ENTITY comparison.
161 protected static function getEntityComparison($xml)
163 $encodingMap = self
::getAsciiEncodingMap();
164 return array_map(function ($encoding) use ($encodingMap) {
165 $generator = isset($encodingMap[$encoding]) ?
$encodingMap[$encoding] : $encodingMap['UTF-8'];
166 return $generator('<!ENTITY');
167 }, self
::detectXmlEncoding($xml, self
::detectStringEncoding($xml)));
171 * Determine the string encoding.
173 * Determines string encoding from either a detected BOM or a
177 * @return string File encoding
179 protected static function detectStringEncoding($xml)
181 return self
::detectBom($xml) ?
: self
::detectXmlStringEncoding($xml);
185 * Attempt to match a known BOM.
187 * Iterates through the return of getBomMap(), comparing the initial bytes
188 * of the provided string to the BOM of each; if a match is determined,
189 * it returns the encoding.
191 * @param string $string
192 * @return false|string Returns encoding on success.
194 protected static function detectBom($string)
196 foreach (self
::getBomMap() as $criteria) {
197 if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) {
198 return $criteria['encoding'];
205 * Attempt to detect the string encoding of an XML string.
208 * @return string Encoding
210 protected static function detectXmlStringEncoding($xml)
212 foreach (self
::getAsciiEncodingMap() as $encoding => $generator) {
213 $prefix = $generator('<' . '?xml');
214 if (0 === strncmp($xml, $prefix, strlen($prefix))) {
224 * Attempt to detect the specified XML encoding.
226 * Using the file's encoding, determines if an "encoding" attribute is
227 * present and well-formed in the XML declaration; if so, it returns a
228 * list with both the ASCII representation of that declaration and the
229 * original file encoding.
231 * If not, a list containing only the provided file encoding is returned.
234 * @param string $fileEncoding
235 * @return string[] Potential XML encodings
237 protected static function detectXmlEncoding($xml, $fileEncoding)
239 $encodingMap = self
::getAsciiEncodingMap();
240 $generator = $encodingMap[$fileEncoding];
241 $encAttr = $generator('encoding="');
242 $quote = $generator('"');
243 $close = $generator('>');
245 $closePos = strpos($xml, $close);
246 if (false === $closePos) {
247 return array($fileEncoding);
250 $encPos = strpos($xml, $encAttr);
251 if (false === $encPos
252 ||
$encPos > $closePos
254 return array($fileEncoding);
257 $encPos +
= strlen($encAttr);
258 $quotePos = strpos($xml, $quote, $encPos);
259 if (false === $quotePos) {
260 return array($fileEncoding);
263 $encoding = self
::substr($xml, $encPos, $quotePos);
265 // Following line works because we're only supporting 8-bit safe encodings at this time.
266 str_replace('\0', '', $encoding), // detected encoding
267 $fileEncoding, // file encoding
272 * Return a list of BOM maps.
274 * Returns a list of common encoding -> BOM maps, along with the character
275 * length to compare against.
277 * @link https://en.wikipedia.org/wiki/Byte_order_mark
280 protected static function getBomMap()
284 'encoding' => 'UTF-32BE',
285 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff),
289 'encoding' => 'UTF-32LE',
290 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00),
294 'encoding' => 'GB-18030',
295 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33),
299 'encoding' => 'UTF-16BE',
300 'bom' => pack('CC', 0xfe, 0xff),
304 'encoding' => 'UTF-16LE',
305 'bom' => pack('CC', 0xff, 0xfe),
309 'encoding' => 'UTF-8',
310 'bom' => pack('CCC', 0xef, 0xbb, 0xbf),
317 * Return a map of encoding => generator pairs.
319 * Returns a map of encoding => generator pairs, where the generator is a
320 * callable that accepts a string and returns the appropriate byte order
321 * sequence of that string for the encoding.
325 protected static function getAsciiEncodingMap()
328 'UTF-32BE' => function ($ascii) {
329 return preg_replace('/(.)/', "\0\0\0\\1", $ascii);
331 'UTF-32LE' => function ($ascii) {
332 return preg_replace('/(.)/', "\\1\0\0\0", $ascii);
334 'UTF-32odd1' => function ($ascii) {
335 return preg_replace('/(.)/', "\0\\1\0\0", $ascii);
337 'UTF-32odd2' => function ($ascii) {
338 return preg_replace('/(.)/', "\0\0\\1\0", $ascii);
340 'UTF-16BE' => function ($ascii) {
341 return preg_replace('/(.)/', "\0\\1", $ascii);
343 'UTF-16LE' => function ($ascii) {
344 return preg_replace('/(.)/', "\\1\0", $ascii);
346 'UTF-8' => function ($ascii) {
349 'GB-18030' => function ($ascii) {
356 * Binary-safe substr.
358 * substr() is not binary-safe; this method loops by character to ensure
359 * multi-byte characters are aggregated correctly.
361 * @param string $string
366 protected static function substr($string, $start, $end)
369 for ($i = $start; $i < $end; $i +
= 1) {
370 $substr .= $string[$i];