a7c75fd7b54367d6f1c8f91b37a6246bcf600dc6
[xhtml-compiler.git] / XHTMLCompiler / FilterManager.php
bloba7c75fd7b54367d6f1c8f91b37a6246bcf600dc6
1 <?php
3 /**
4 * Manages various filters in a document, and performs text processing
5 */
6 class XHTMLCompiler_FilterManager
9 protected $preTextFilters = array();
10 protected $postTextFilters = array();
11 protected $DOMFilters = array();
12 protected $Markup = array('xhtml' => true);
13 protected $xcAttr = array();
15 protected $errors = array();
16 protected $deps = array();
18 /**
19 * Adds a pre-processing text filter to the queue.
20 * @note Filters added here are run before the document is
21 * parsed into a DOM. Suggested use is for transforming
22 * non-XML style specialized markup.
23 * @param $filter XHTMLCompiler_TextFilter
25 public function addPreTextFilter($filter) {
26 $filter = $this->loadFilter($filter, 'TextFilter');
27 $n = $filter->getName();
28 if (isset($this->preTextFilters[$n])) {
29 throw new Exception('Cannot overload pre text filter ' .
30 $filter->getName());
32 return $this->preTextFilters[$n] = $filter;
35 /**
36 * Adds a post-processing text filter to the queue.
37 * @note Filters added here are run after the document has been
38 * parsed into a DOM and then serialized back. Suggested use
39 * is for fixing cosmetic issues with the source.
40 * @warning Anything done on this step will not have its
41 * well-formedness corrected, so be careful.
42 * @param $filter XHTMLCompiler_TextFilter
44 public function addPostTextFilter($filter) {
45 $filter = $this->loadFilter($filter, 'TextFilter');
46 $n = $filter->getName();
47 if (isset($this->postTextFilters[$n])) {
48 throw new Exception('Cannot overload post text filter ' .
49 $filter->getName());
51 return $this->postTextFilters[$n] = $filter;
54 /**
55 * Adds a DOM-processing filter to the queue
56 * @param $filter XHTMLCompiler_DOMFilter
58 public function addDOMFilter($filter) {
59 $filter = $this->loadFilter($filter, 'DOMFilter');
60 $n = $filter->getName();
61 if (isset($this->DOMFilters[$n])) {
62 throw new Exception('Cannot overload DOM filter ' .
63 $filter->getName());
65 $attributes = $filter->getXCAttributesDefined();
66 foreach ($attributes as $attribute) {
67 if (isset($this->xcAttr[$attribute])) {
68 throw new Exception('Duplicate attribute definition for '.
69 'xc:' . $attribute);
71 $this->xcAttr[$attribute] = true;
73 return $this->DOMFilters[$n] = $filter;
76 public function addMarkup($ext, $filter) {
77 $filter = $this->loadFilter($filter, 'Markup');
78 if (isset($this->Markup[$ext])) {
79 throw new Exception('Cannot overload extension ' . $ext);
81 return $this->Markup[$ext] = $filter;
84 public function getMarkup() {
85 return $this->Markup;
88 /**
89 * If filter is string, load the filter based on a few guesses
90 * @param $filter String or object filter
92 protected function loadFilter($filter, $subclass) {
93 if (is_string($filter)) {
94 $class = "XHTMLCompiler_{$subclass}_$filter";
95 if (class_exists($class)) {
96 $filter = new $class;
97 } elseif (class_exists($filter)) {
98 $filter = new $filter;
99 } else {
100 require "$subclass/$filter.php";
101 $filter = new $class;
104 return $filter;
107 /** Returns the dependency array accumulated from the filter run */
108 public function getDeps() {return $this->deps;}
110 /** Adds a file to the dependency list */
111 public function addDependency($filename) {
112 $this->deps[$filename] = filemtime($filename);
116 * Accepts a page's text and turns it into its DOM representation.
117 * Text, initial validation and XIncludes will be processed before
118 * returning. DOM filters will *not* be processed.
119 * @param $text String text to be processed
120 * @param
122 public function parse($text, $page) {
123 $markup = $this->Markup[$page->getSourceExt()];
124 if (!is_bool($markup)) {
125 // do markup pre-processing
126 $text = $markup->process($text, $page, $this);
127 // Conceivably, if something properly put things into
128 // DOM form before kicking it out, this wouldn't work;
129 // such a case is highly unlikely though.
132 // do pre-text processing
133 foreach ($this->preTextFilters as $filter) {
134 $text = $filter->process($text, $page, $this);
137 // generate the DOM
138 $this->setupXMLCatalog();
139 $dom = $this->createDOM($text);
141 $this->analyzeInternalSubset($dom);
143 // validate the document to force the entities to be resolved,
144 // we don't actually care about the errors
145 set_error_handler(array($this, 'muteErrorHandler'));
146 $dom->validate();
147 restore_error_handler();
149 $this->analyzeXIncludes($dom);
150 $dom->xinclude();
152 return $dom;
156 * Accepts a page's text (usually XHTML) and processes it.
157 * @param $text String text to be processed
158 * @param $page XHTMLCompiler_Page representing currently processed page
160 public function process($text, $page) {
162 $dom = $this->parse($text, $page);
164 // run DOM filters
165 foreach ($this->DOMFilters as $filter) {
166 $filter->setup($dom);
167 $filter->process($dom, $page, $this);
170 // translate back to text
171 $text = $dom->saveXML();
173 // remove all non-default namespace declarations, may change,
174 // but for now embedded XML namespaces are not cross-browser friendly
175 $text = preg_replace('/ xmlns:.+?=".+?"/', '', $text);
176 // scrub out custom DTD additions
177 $text = preg_replace('/(<!DOCTYPE[^>]*?) ?\[[^\]]+\]/', '\1', $text);
178 foreach ($this->postTextFilters as $filter) {
179 $text = $filter->process($text, $page, $this);
181 // replace all CDATA sections
182 $text = preg_replace_callback(
183 '/<!\[CDATA\[(.+?)\]\]>/s',
184 array('XHTMLCompiler_FilterManager', 'cdataCallback'),
185 $text
188 // replace any redundant xmlns sections, although they are
189 // valid they interfere with DTD validation
190 $text = preg_replace(
191 '#(<(?!html)[^>]+) xmlns="http://www.w3.org/1999/xhtml"#',
192 '$1',
193 $text
196 // okay, now finally do validation, and let the errors get
197 // spit out if there are some collect parse errors
198 set_error_handler(array($this, 'validationErrorHandler'));
199 $dom->loadXML($text);
200 $status = $dom->validate();
201 restore_error_handler();
202 if (!$status || !empty($this->errors)) {
203 $this->buildErrors($dom);
204 $text = $dom->saveXML();
207 return $text;
210 public static function cdataCallback($matches) {
211 return htmlspecialchars($matches[1], ENT_NOQUOTES, 'UTF-8');
215 * Temporary error handler to use when validating a document
217 public function validationErrorHandler($n, $text) {
218 $this->errors[] = $text;
222 * Handler that mutes all errors
224 public function muteErrorHandler($n, $t) {}
227 * Sets up an XML catalog to speed up entity resolution
229 public function setupXMLCatalog() {
230 $catalog = str_replace(array(' ', '\\'), array('%20', '/'),
231 dirname(__FILE__)) . '/../catalog/catalog.xml';
232 if ($catalog[1] == ':') $catalog = substr($catalog, 2); // remove drive
233 putenv('XML_CATALOG_FILES=' . $catalog);
237 * Creates a reasonable well default configured DOM
238 * @param string $xml XML to load DOM with
240 public function createDOM($text = false) {
241 $dom = new DOMDocument();
242 $dom->preserveWhiteSpace = false;
243 $dom->formatOutput = true;
244 $dom->resolveExternals = true;
246 // todo: somehow, collect information on which entity files
247 // are being added to the document, and add to xc-deps.
248 $dom->substituteEntities = true; // allows for custom entities too!
250 if ($text !== false) $dom->loadXML($text);
252 return $dom;
256 * Analyzes the internal subset of a DOM, registering any file
257 * entity definitions as dependencies
259 public function analyzeInternalSubset($dom) {
260 if (empty($dom->doctype) || !is_object($dom->doctype)) return;
261 $internal_subset = $dom->doctype->internalSubset;
262 if ($internal_subset) {
263 // there are some entities that need to be registered to
264 // the dependency list. Match ones that declare SYSTEM
265 // '<!ENTITY' S '%' S Name S PEDef S? '>'
266 preg_match_all(
267 '/<!ENTITY\s+%\s+[^\s]+\s+(?:PUBLIC.+?)?SYSTEM\s+(?:"([^"]*)"|\'([^\']*)\')\s*>/s',
268 $internal_subset,
269 $matches
271 foreach ($matches[1] as $filename) {
272 // $filename will always be relative to web root, so
273 // no munging necessary
274 $this->addDependency($filename);
280 * Analyzes a documents XIncludes and registers necessary dependencies.
281 * Make sure you call this before calling $dom->xinclude
282 * @param DOMDocument $dom to process
283 * @todo Factor into a DOMFilter
284 * @todo Handle arbitrary nestings of includes
286 public function analyzeXIncludes($dom) {
287 $xpath = new DOMXPath($dom);
288 $xpath->registerNamespace('xi', $ns = 'http://www.w3.org/2001/XInclude');
289 $nodes = $xpath->query('//xi:include');
290 foreach ($nodes as $node) {
291 if (! $node instanceof DOMElement) continue;
292 if (! $filename = $node->getAttribute('href')) continue;
293 $this->addDependency($filename);
294 $sub_dom = new DOMDocument();
295 $sub_dom->load($filename);
296 $this->analyzeInternalSubset($sub_dom);
301 * Adds validation errors to the output document as a message
303 public function buildErrors($dom) {
304 // This should output some error to command line utility
305 $body = $dom->getElementsByTagName('body')->item(0);
306 if (!$body) {
307 $dom->appendChild($html = $dom->createElement('html'));
308 $html->appendChild($body = $dom->createElement('body'));
310 $warning = $dom->createElement('div');
311 $warning->setAttribute('class', 'warning');
312 $warning->appendChild($dom->createElement('h2', 'Warning: Errors'));
313 $warning->appendChild($dom->createElement('p', 'This document has validation errors:'));
314 $list = $dom->createElement('ul');
315 foreach ($this->errors as $error) {
316 // strip-tags removes HTML tags to make the plaintext output
317 // more friendly, IS NOT for security reasons
318 $list->appendChild($dom->createElement('li', strip_tags($error)));
320 $warning->appendChild($list);
321 $body->insertBefore($warning, $body->childNodes->item(0));