composer package updates
[openemr.git] / vendor / zendframework / zendxml / src / Security.php
blob0b38d053b101178b3ff635a5096a6f85a75ff2cc
1 <?php
2 /**
3 * @see https://github.com/zendframework/ZendXml for the canonical source repository
4 * @copyright Copyright (c) 2018 Zend Technologies USA Inc. (https://www.zend.com)
5 * @license https://github.com/zendframework/ZendXml/blob/master/LICENSE.md New BSD License
6 */
8 namespace ZendXml;
10 use DOMDocument;
11 use SimpleXMLElement;
13 class Security
15 const ENTITY_DETECT = 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks';
17 /**
18 * Heuristic scan to detect entity in XML
20 * @param string $xml
21 * @throws Exception\RuntimeException If entity expansion or external entity declaration was discovered.
23 protected static function heuristicScan($xml)
25 foreach (self::getEntityComparison($xml) as $compare) {
26 if (strpos($xml, $compare) !== false) {
27 throw new Exception\RuntimeException(self::ENTITY_DETECT);
32 /**
33 * Scan XML string for potential XXE and XEE attacks
35 * @param string $xml
36 * @param DomDocument $dom
37 * @throws Exception\RuntimeException
38 * @return SimpleXMLElement|DomDocument|boolean
40 public static function scan($xml, DOMDocument $dom = null)
42 // If running with PHP-FPM we perform an heuristic scan
43 // We cannot use libxml_disable_entity_loader because of this bug
44 // @see https://bugs.php.net/bug.php?id=64938
45 if (self::isPhpFpm()) {
46 self::heuristicScan($xml);
49 if (null === $dom) {
50 $simpleXml = true;
51 $dom = new DOMDocument();
54 if (! self::isPhpFpm()) {
55 $loadEntities = libxml_disable_entity_loader(true);
56 $useInternalXmlErrors = libxml_use_internal_errors(true);
59 // Load XML with network access disabled (LIBXML_NONET)
60 // error disabled with @ for PHP-FPM scenario
61 set_error_handler(function ($errno, $errstr) {
62 if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
63 return true;
65 return false;
66 }, E_WARNING);
67 $result = $dom->loadXml($xml, LIBXML_NONET);
68 restore_error_handler();
70 if (! $result) {
71 // Entity load to previous setting
72 if (! self::isPhpFpm()) {
73 libxml_disable_entity_loader($loadEntities);
74 libxml_use_internal_errors($useInternalXmlErrors);
76 return false;
79 // Scan for potential XEE attacks using ENTITY, if not PHP-FPM
80 if (! self::isPhpFpm()) {
81 foreach ($dom->childNodes as $child) {
82 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
83 if ($child->entities->length > 0) {
84 throw new Exception\RuntimeException(self::ENTITY_DETECT);
90 // Entity load to previous setting
91 if (! self::isPhpFpm()) {
92 libxml_disable_entity_loader($loadEntities);
93 libxml_use_internal_errors($useInternalXmlErrors);
96 if (isset($simpleXml)) {
97 $result = simplexml_import_dom($dom);
98 if (! $result instanceof SimpleXMLElement) {
99 return false;
101 return $result;
103 return $dom;
107 * Scan XML file for potential XXE/XEE attacks
109 * @param string $file
110 * @param DOMDocument $dom
111 * @throws Exception\InvalidArgumentException
112 * @return SimpleXMLElement|DomDocument
114 public static function scanFile($file, DOMDocument $dom = null)
116 if (! file_exists($file)) {
117 throw new Exception\InvalidArgumentException(
118 "The file $file specified doesn't exist"
121 return self::scan(file_get_contents($file), $dom);
125 * Return true if PHP is running with PHP-FPM
127 * This method is mainly used to determine whether or not heuristic checks
128 * (vs libxml checks) should be made, due to threading issues in libxml;
129 * under php-fpm, threading becomes a concern.
131 * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
132 * libxml support in PHP that makes the libxml checks viable; in such
133 * versions, this method will return false to enforce those checks, which
134 * are more strict and accurate than the heuristic checks.
136 * @return boolean
138 public static function isPhpFpm()
140 $isVulnerableVersion = (
141 version_compare(PHP_VERSION, '5.5.22', 'lt')
142 || (
143 version_compare(PHP_VERSION, '5.6', 'gte')
144 && version_compare(PHP_VERSION, '5.6.6', 'lt')
148 if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) {
149 return true;
151 return false;
155 * Determine and return the string(s) to use for the <!ENTITY comparison.
157 * @param string $xml
158 * @return string[]
160 protected static function getEntityComparison($xml)
162 $encodingMap = self::getAsciiEncodingMap();
163 return array_map(function ($encoding) use ($encodingMap) {
164 $generator = isset($encodingMap[$encoding]) ? $encodingMap[$encoding] : $encodingMap['UTF-8'];
165 return $generator('<!ENTITY');
166 }, self::detectXmlEncoding($xml, self::detectStringEncoding($xml)));
170 * Determine the string encoding.
172 * Determines string encoding from either a detected BOM or a
173 * heuristic.
175 * @param string $xml
176 * @return string File encoding
178 protected static function detectStringEncoding($xml)
180 return self::detectBom($xml) ?: self::detectXmlStringEncoding($xml);
184 * Attempt to match a known BOM.
186 * Iterates through the return of getBomMap(), comparing the initial bytes
187 * of the provided string to the BOM of each; if a match is determined,
188 * it returns the encoding.
190 * @param string $string
191 * @return false|string Returns encoding on success.
193 protected static function detectBom($string)
195 foreach (self::getBomMap() as $criteria) {
196 if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) {
197 return $criteria['encoding'];
200 return false;
204 * Attempt to detect the string encoding of an XML string.
206 * @param string $xml
207 * @return string Encoding
209 protected static function detectXmlStringEncoding($xml)
211 foreach (self::getAsciiEncodingMap() as $encoding => $generator) {
212 $prefix = $generator('<' . '?xml');
213 if (0 === strncmp($xml, $prefix, strlen($prefix))) {
214 return $encoding;
218 // Fallback
219 return 'UTF-8';
223 * Attempt to detect the specified XML encoding.
225 * Using the file's encoding, determines if an "encoding" attribute is
226 * present and well-formed in the XML declaration; if so, it returns a
227 * list with both the ASCII representation of that declaration and the
228 * original file encoding.
230 * If not, a list containing only the provided file encoding is returned.
232 * @param string $xml
233 * @param string $fileEncoding
234 * @return string[] Potential XML encodings
236 protected static function detectXmlEncoding($xml, $fileEncoding)
238 $encodingMap = self::getAsciiEncodingMap();
239 $generator = $encodingMap[$fileEncoding];
240 $encAttr = $generator('encoding="');
241 $quote = $generator('"');
242 $close = $generator('>');
244 $closePos = strpos($xml, $close);
245 if (false === $closePos) {
246 return [$fileEncoding];
249 $encPos = strpos($xml, $encAttr);
250 if (false === $encPos
251 || $encPos > $closePos
253 return [$fileEncoding];
256 $encPos += strlen($encAttr);
257 $quotePos = strpos($xml, $quote, $encPos);
258 if (false === $quotePos) {
259 return [$fileEncoding];
262 $encoding = self::substr($xml, $encPos, $quotePos);
263 return [
264 // Following line works because we're only supporting 8-bit safe encodings at this time.
265 str_replace('\0', '', $encoding), // detected encoding
266 $fileEncoding, // file encoding
271 * Return a list of BOM maps.
273 * Returns a list of common encoding -> BOM maps, along with the character
274 * length to compare against.
276 * @link https://en.wikipedia.org/wiki/Byte_order_mark
277 * @return array
279 protected static function getBomMap()
281 return [
283 'encoding' => 'UTF-32BE',
284 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff),
285 'length' => 4,
288 'encoding' => 'UTF-32LE',
289 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00),
290 'length' => 4,
293 'encoding' => 'GB-18030',
294 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33),
295 'length' => 4,
298 'encoding' => 'UTF-16BE',
299 'bom' => pack('CC', 0xfe, 0xff),
300 'length' => 2,
303 'encoding' => 'UTF-16LE',
304 'bom' => pack('CC', 0xff, 0xfe),
305 'length' => 2,
308 'encoding' => 'UTF-8',
309 'bom' => pack('CCC', 0xef, 0xbb, 0xbf),
310 'length' => 3,
316 * Return a map of encoding => generator pairs.
318 * Returns a map of encoding => generator pairs, where the generator is a
319 * callable that accepts a string and returns the appropriate byte order
320 * sequence of that string for the encoding.
322 * @return array
324 protected static function getAsciiEncodingMap()
326 return [
327 'UTF-32BE' => function ($ascii) {
328 return preg_replace('/(.)/', "\0\0\0\\1", $ascii);
330 'UTF-32LE' => function ($ascii) {
331 return preg_replace('/(.)/', "\\1\0\0\0", $ascii);
333 'UTF-32odd1' => function ($ascii) {
334 return preg_replace('/(.)/', "\0\\1\0\0", $ascii);
336 'UTF-32odd2' => function ($ascii) {
337 return preg_replace('/(.)/', "\0\0\\1\0", $ascii);
339 'UTF-16BE' => function ($ascii) {
340 return preg_replace('/(.)/', "\0\\1", $ascii);
342 'UTF-16LE' => function ($ascii) {
343 return preg_replace('/(.)/', "\\1\0", $ascii);
345 'UTF-8' => function ($ascii) {
346 return $ascii;
348 'GB-18030' => function ($ascii) {
349 return $ascii;
355 * Binary-safe substr.
357 * substr() is not binary-safe; this method loops by character to ensure
358 * multi-byte characters are aggregated correctly.
360 * @param string $string
361 * @param int $start
362 * @param int $end
363 * @return string
365 protected static function substr($string, $start, $end)
367 $substr = '';
368 for ($i = $start; $i < $end; $i += 1) {
369 $substr .= $string[$i];
371 return $substr;