Merge pull request #4036 from dokuwiki/issue4033
[dokuwiki.git] / _test / vendor / scotteh / php-dom-wrapper / src / Document.php
blob28e2b50e397dfd427bc35163241ee4b123a348cb
1 <?php declare(strict_types=1);
3 namespace DOMWrap;
5 use DOMWrap\Traits\{
6 CommonTrait,
7 TraversalTrait,
8 ManipulationTrait
9 };
11 /**
12 * Document Node
14 * @package DOMWrap
15 * @license http://opensource.org/licenses/BSD-3-Clause BSD 3 Clause
17 class Document extends \DOMDocument
19 use CommonTrait;
20 use TraversalTrait;
21 use ManipulationTrait;
23 /** @var int */
24 protected $libxmlOptions = 0;
26 /** @var string|null */
27 protected $documentEncoding = null;
29 public function __construct(string $version = '1.0', string $encoding = 'UTF-8') {
30 parent::__construct($version, $encoding);
32 $this->registerNodeClass('DOMText', 'DOMWrap\\Text');
33 $this->registerNodeClass('DOMElement', 'DOMWrap\\Element');
34 $this->registerNodeClass('DOMComment', 'DOMWrap\\Comment');
35 $this->registerNodeClass('DOMDocument', 'DOMWrap\\Document');
36 $this->registerNodeClass('DOMDocumentType', 'DOMWrap\\DocumentType');
37 $this->registerNodeClass('DOMProcessingInstruction', 'DOMWrap\\ProcessingInstruction');
40 /**
41 * Set libxml options.
43 * Multiple values must use bitwise OR.
44 * eg: LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
46 * @link http://php.net/manual/en/libxml.constants.php
48 * @param int $libxmlOptions
50 public function setLibxmlOptions(int $libxmlOptions): void {
51 $this->libxmlOptions = $libxmlOptions;
54 /**
55 * {@inheritdoc}
57 public function document(): ?\DOMDocument {
58 return $this;
61 /**
62 * {@inheritdoc}
64 public function collection(): NodeList {
65 return $this->newNodeList([$this]);
68 /**
69 * {@inheritdoc}
71 public function result(NodeList $nodeList) {
72 if ($nodeList->count()) {
73 return $nodeList->first();
76 return null;
79 /**
80 * {@inheritdoc}
82 public function parent() {
83 return null;
86 /**
87 * {@inheritdoc}
89 public function parents() {
90 return $this->newNodeList();
93 /**
94 * {@inheritdoc}
96 public function substituteWith($newNode): self {
97 $this->replaceChild($newNode, $this);
99 return $this;
103 * {@inheritdoc}
105 public function _clone() {
106 return null;
110 * {@inheritdoc}
112 public function getHtml(): string {
113 return $this->getOuterHtml();
117 * {@inheritdoc}
119 public function setHtml($html): self {
120 if (!is_string($html) || trim($html) == '') {
121 return $this;
124 $internalErrors = libxml_use_internal_errors(true);
125 if (\PHP_VERSION_ID < 80000) {
126 $disableEntities = libxml_disable_entity_loader(true);
127 $this->composeXmlNode($html);
128 libxml_use_internal_errors($internalErrors);
129 libxml_disable_entity_loader($disableEntities);
130 } else {
131 $this->composeXmlNode($html);
132 libxml_use_internal_errors($internalErrors);
135 return $this;
139 * @param string $html
140 * @param int $options
142 * @return bool
144 public function loadHTML($html, $options = 0): bool {
145 // Fix LibXML's crazy-ness RE root nodes
146 // While importing HTML using the LIBXML_HTML_NOIMPLIED option LibXML insists
147 // on having one root node. All subsequent nodes are appended to this first node.
148 // To counter this we will create a fake element, allow LibXML to 'do its thing'
149 // then undo it by taking the contents of the fake element, placing it back into
150 // the root and then remove our fake element.
151 if ($options & LIBXML_HTML_NOIMPLIED) {
152 $html = '<domwrap></domwrap>' . $html;
155 $html = '<?xml encoding="' . ($this->getEncoding() ?? 'UTF-8') . '">' . $html;
157 $result = parent::loadHTML($html, $options);
159 // Do our re-shuffling of nodes.
160 if ($this->libxmlOptions & LIBXML_HTML_NOIMPLIED) {
161 $this->children()->first()->contents()->each(function($node){
162 $this->appendWith($node);
165 $this->removeChild($this->children()->first());
168 return $result;
172 * @param $encoding string|null
174 public function setEncoding(string $encoding = null) {
175 $this->documentEncoding = $encoding;
179 * @return string|null
181 public function getEncoding(): ?string {
182 return $this->documentEncoding;
186 * @param $html string
188 * @return string|null
190 private function getCharset(string $html): ?string {
191 $charset = null;
193 if (preg_match('@<meta.*?charset=["\']?([^"\'\s>]+)@im', $html, $matches)) {
194 $charset = mb_strtoupper($matches[1]);
197 return $charset;
201 * @param $html string
203 private function detectEncoding(string $html) {
204 $charset = $this->getEncoding();
206 if (is_null($charset)) {
207 $charset = $this->getCharset($html);
210 $detectedCharset = mb_detect_encoding($html, mb_detect_order(), true);
212 if ($charset === null && $detectedCharset == 'UTF-8') {
213 $charset = $detectedCharset;
216 $this->setEncoding($charset);
220 * @param $html string
222 * @return string
224 private function convertToUtf8(string $html): string {
225 $charset = $this->getEncoding();
227 if ($charset !== null) {
228 $html = preg_replace('@(charset=["]?)([^"\s]+)([^"]*["]?)@im', '$1UTF-8$3', $html);
229 $mbHasCharset = in_array($charset, array_map('mb_strtoupper', mb_list_encodings()));
231 if ($mbHasCharset) {
232 $html = mb_convert_encoding($html, 'UTF-8', $charset);
234 // Fallback to iconv if available.
235 } elseif (extension_loaded('iconv')) {
236 $htmlIconv = iconv($charset, 'UTF-8', $html);
238 if ($htmlIconv !== false) {
239 $html = $htmlIconv;
240 } else {
241 $charset = null;
246 if ($charset === null) {
247 $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
250 return $html;
254 * @param $html
256 private function composeXmlNode($html)
258 $this->detectEncoding($html);
260 $html = $this->convertToUtf8($html);
262 $this->loadHTML($html, $this->libxmlOptions);
264 // Remove <?xml ...> processing instruction.
265 $this->contents()->each(function($node) {
266 if ($node instanceof ProcessingInstruction && $node->nodeName == 'xml') {
267 $node->destroy();