Remove trailing ?>
[xhtml-compiler.git] / XHTMLCompiler / FilterManager.php
blob6b2cf1a6bea700acc073f5bd8ae38c88ad34aaa4
1 <?php
3 /**
4 * Manages various filters in a document, and performs text processing
5 */
6 class XHTMLCompiler_FilterManager
9 protected $preTextFilters = array();
10 protected $postTextFilters = array();
11 protected $DOMFilters = array();
12 protected $xcAttr = array();
14 protected $errors = array();
15 protected $deps = array();
17 /**
18 * Adds a pre-processing text filter to the queue.
19 * @note Filters added here are run before the document is
20 * parsed into a DOM. Suggested use is for transforming
21 * non-XML style specialized markup.
22 * @param $filter XHTMLCompiler_TextFilter
24 public function addPreTextFilter($filter) {
25 $filter = $this->loadFilter($filter, 'TextFilter');
26 $n = $filter->getName();
27 if (isset($this->preTextFilters[$n])) {
28 throw new Exception('Cannot overload pre text filter ' .
29 $filter->getName());
31 return $this->preTextFilters[$n] = $filter;
34 /**
35 * Adds a post-processing text filter to the queue.
36 * @note Filters added here are run after the document has been
37 * parsed into a DOM and then serialized back. Suggested use
38 * is for fixing cosmetic issues with the source.
39 * @warning Anything done on this step will not have its
40 * well-formedness corrected, so be careful.
41 * @param $filter XHTMLCompiler_TextFilter
43 public function addPostTextFilter($filter) {
44 $filter = $this->loadFilter($filter, 'TextFilter');
45 $n = $filter->getName();
46 if (isset($this->postTextFilters[$n])) {
47 throw new Exception('Cannot overload post text filter ' .
48 $filter->getName());
50 return $this->postTextFilters[$n] = $filter;
53 /**
54 * Adds a DOM-processing filter to the queue
55 * @param $filter XHTMLCompiler_DOMFilter
57 public function addDOMFilter($filter) {
58 $filter = $this->loadFilter($filter, 'DOMFilter');
59 $n = $filter->getName();
60 if (isset($this->DOMFilters[$n])) {
61 throw new Exception('Cannot overload DOM filter ' .
62 $filter->getName());
64 $attributes = $filter->getXCAttributesDefined();
65 foreach ($attributes as $attribute) {
66 if (isset($this->xcAttr[$attribute])) {
67 throw new Exception('Duplicate attribute definition for '.
68 'xc:' . $attribute);
70 $this->xcAttr[$attribute] = true;
72 return $this->DOMFilters[$n] = $filter;
75 /**
76 * If filter is string, load the filter based on a few guesses
77 * @param $filter String or object filter
79 protected function loadFilter($filter, $subclass) {
80 if (is_string($filter)) {
81 $class = "XHTMLCompiler_{$subclass}_$filter";
82 if (class_exists($class)) {
83 $filter = new $class;
84 } elseif (class_exists($filter)) {
85 $filter = new $filter;
86 } else {
87 require "$subclass/$filter.php";
88 $filter = new $class;
91 return $filter;
94 /** Returns the dependency array accumulated from the filter run */
95 public function getDeps() {return $this->deps;}
97 /** Adds a file to the dependency list */
98 public function addDependency($filename) {
99 $this->deps[$filename] = filemtime($filename);
103 * Accepts a page's text and turns it into its DOM representation.
104 * Text, initial validation and XIncludes will be processed before
105 * returning. DOM filters will *not* be processed.
106 * @param $text String text to be processed
107 * @param
109 public function parse($text, $page) {
110 // do pre-text processing
111 foreach ($this->preTextFilters as $filter) {
112 $text = $filter->process($text, $page, $this);
115 // generate the DOM
116 $this->setupXMLCatalog();
117 $dom = $this->createDOM($text);
119 $this->analyzeInternalSubset($dom);
121 // validate the document to force the entities to be resolved,
122 // we don't actually care about the errors
123 set_error_handler(array($this, 'muteErrorHandler'));
124 $dom->validate();
125 restore_error_handler();
127 $this->analyzeXIncludes($dom);
128 $dom->xinclude();
130 return $dom;
134 * Accepts a page's text (usually XHTML) and processes it.
135 * @param $text String text to be processed
136 * @param $page XHTMLCompiler_Page representing currently processed page
138 public function process($text, $page) {
140 $dom = $this->parse($text, $page);
142 // run DOM filters
143 foreach ($this->DOMFilters as $filter) {
144 $filter->setup($dom);
145 $filter->process($dom, $page, $this);
148 // translate back to text
149 $text = $dom->saveXML();
151 // remove all non-default namespace declarations, may change,
152 // but for now embedded XML namespaces are not cross-browser friendly
153 $text = preg_replace('/ xmlns:.+?=".+?"/', '', $text);
154 // scrub out custom DTD additions
155 $text = preg_replace('/(<!DOCTYPE[^>]*?) ?\[[^\]]+\]/', '\1', $text);
156 foreach ($this->postTextFilters as $filter) {
157 $text = $filter->process($text, $page, $this);
159 // replace all CDATA sections
160 $text = preg_replace_callback(
161 '/<!\[CDATA\[(.+?)\]\]>/s',
162 array('XHTMLCompiler_FilterManager', 'cdataCallback'),
163 $text
166 // replace any redundant xmlns sections, although they are
167 // valid they interfere with DTD validation
168 $text = preg_replace(
169 '#(<(?!html)[^>]+) xmlns="http://www.w3.org/1999/xhtml"#',
170 '$1',
171 $text
174 // okay, now finally do validation, and let the errors get
175 // spit out if there are some collect parse errors
176 set_error_handler(array($this, 'validationErrorHandler'));
177 $dom->loadXML($text);
178 $status = $dom->validate();
179 restore_error_handler();
180 if (!$status || !empty($this->errors)) {
181 $this->buildErrors($dom);
182 $text = $dom->saveXML();
185 return $text;
188 public static function cdataCallback($matches) {
189 return htmlspecialchars($matches[1], ENT_NOQUOTES, 'UTF-8');
193 * Temporary error handler to use when validating a document
195 public function validationErrorHandler($n, $text) {
196 $this->errors[] = $text;
200 * Handler that mutes all errors
202 public function muteErrorHandler($n, $t) {}
205 * Sets up an XML catalog to speed up entity resolution
207 public function setupXMLCatalog() {
208 $catalog = str_replace(array(' ', '\\'), array('%20', '/'),
209 dirname(__FILE__)) . '/../catalog/catalog.xml';
210 if ($catalog[1] == ':') $catalog = substr($catalog, 2); // remove drive
211 putenv('XML_CATALOG_FILES=' . $catalog);
215 * Creates a reasonable well default configured DOM
216 * @param string $xml XML to load DOM with
218 public function createDOM($text = false) {
219 $dom = new DOMDocument();
220 $dom->preserveWhiteSpace = false;
221 $dom->formatOutput = true;
222 $dom->resolveExternals = true;
224 // todo: somehow, collect information on which entity files
225 // are being added to the document, and add to xc-deps.
226 $dom->substituteEntities = true; // allows for custom entities too!
228 if ($text !== false) $dom->loadXML($text);
230 return $dom;
234 * Analyzes the internal subset of a DOM, registering any file
235 * entity definitions as dependencies
237 public function analyzeInternalSubset($dom) {
238 if (empty($dom->doctype) || !is_object($dom->doctype)) return;
239 $internal_subset = $dom->doctype->internalSubset;
240 if ($internal_subset) {
241 // there are some entities that need to be registered to
242 // the dependency list. Match ones that declare SYSTEM
243 // '<!ENTITY' S '%' S Name S PEDef S? '>'
244 preg_match_all(
245 '/<!ENTITY\s+%\s+[^\s]+\s+(?:PUBLIC.+?)?SYSTEM\s+(?:"([^"]*)"|\'([^\']*)\')\s*>/s',
246 $internal_subset,
247 $matches
249 foreach ($matches[1] as $filename) {
250 // $filename will always be relative to web root, so
251 // no munging necessary
252 $this->addDependency($filename);
258 * Analyzes a documents XIncludes and registers necessary dependencies.
259 * Make sure you call this before calling $dom->xinclude
260 * @param DOMDocument $dom to process
261 * @todo Factor into a DOMFilter
262 * @todo Handle arbitrary nestings of includes
264 public function analyzeXIncludes($dom) {
265 $xpath = new DOMXPath($dom);
266 $xpath->registerNamespace('xi', $ns = 'http://www.w3.org/2001/XInclude');
267 $nodes = $xpath->query('//xi:include');
268 foreach ($nodes as $node) {
269 if (! $node instanceof DOMElement) continue;
270 if (! $filename = $node->getAttribute('href')) continue;
271 $this->addDependency($filename);
272 $sub_dom = new DOMDocument();
273 $sub_dom->load($filename);
274 $this->analyzeInternalSubset($sub_dom);
279 * Adds validation errors to the output document as a message
281 public function buildErrors($dom) {
282 $body = $dom->getElementsByTagName('body')->item(0);
283 if (!$body) {
284 $dom->appendChild($html = $dom->createElement('html'));
285 $html->appendChild($body = $dom->createElement('body'));
287 $warning = $dom->createElement('div');
288 $warning->setAttribute('class', 'warning');
289 $warning->appendChild($dom->createElement('h2', 'Warning: Errors'));
290 $warning->appendChild($dom->createElement('p', 'This document has validation errors:'));
291 $list = $dom->createElement('ul');
292 foreach ($this->errors as $error) {
293 // strip-tags removes HTML tags to make the plaintext output
294 // more friendly, IS NOT for security reasons
295 $list->appendChild($dom->createElement('li', strip_tags($error)));
297 $warning->appendChild($list);
298 $body->insertBefore($warning, $body->childNodes->item(0));