4 * Manages various filters in a document, and performs text processing
6 class XHTMLCompiler_FilterManager
9 protected $preTextFilters = array();
10 protected $postTextFilters = array();
11 protected $DOMFilters = array();
12 protected $Markup = array('xhtml' => true);
13 protected $xcAttr = array();
15 protected $errors = array();
16 protected $deps = array();
19 * Adds a pre-processing text filter to the queue.
20 * @note Filters added here are run before the document is
21 * parsed into a DOM. Suggested use is for transforming
22 * non-XML style specialized markup.
23 * @param $filter XHTMLCompiler_TextFilter
25 public function addPreTextFilter($filter) {
26 $filter = $this->loadFilter($filter, 'TextFilter');
27 $n = $filter->getName();
28 if (isset($this->preTextFilters
[$n])) {
29 throw new Exception('Cannot overload pre text filter ' .
32 return $this->preTextFilters
[$n] = $filter;
36 * Adds a post-processing text filter to the queue.
37 * @note Filters added here are run after the document has been
38 * parsed into a DOM and then serialized back. Suggested use
39 * is for fixing cosmetic issues with the source.
40 * @warning Anything done on this step will not have its
41 * well-formedness corrected, so be careful.
42 * @param $filter XHTMLCompiler_TextFilter
44 public function addPostTextFilter($filter) {
45 $filter = $this->loadFilter($filter, 'TextFilter');
46 $n = $filter->getName();
47 if (isset($this->postTextFilters
[$n])) {
48 throw new Exception('Cannot overload post text filter ' .
51 return $this->postTextFilters
[$n] = $filter;
55 * Adds a DOM-processing filter to the queue
56 * @param $filter XHTMLCompiler_DOMFilter
58 public function addDOMFilter($filter) {
59 $filter = $this->loadFilter($filter, 'DOMFilter');
60 $n = $filter->getName();
61 if (isset($this->DOMFilters
[$n])) {
62 throw new Exception('Cannot overload DOM filter ' .
65 $attributes = $filter->getXCAttributesDefined();
66 foreach ($attributes as $attribute) {
67 if (isset($this->xcAttr
[$attribute])) {
68 throw new Exception('Duplicate attribute definition for '.
71 $this->xcAttr
[$attribute] = true;
73 return $this->DOMFilters
[$n] = $filter;
76 public function addMarkup($ext, $filter) {
77 $filter = $this->loadFilter($filter, 'Markup');
78 if (isset($this->Markup
[$ext])) {
79 throw new Exception('Cannot overload extension ' . $ext);
81 return $this->Markup
[$ext] = $filter;
84 public function getMarkup() {
89 * If filter is string, load the filter based on a few guesses
90 * @param $filter String or object filter
92 protected function loadFilter($filter, $subclass) {
93 if (is_string($filter)) {
94 $class = "XHTMLCompiler_{$subclass}_$filter";
95 if (class_exists($class)) {
97 } elseif (class_exists($filter)) {
98 $filter = new $filter;
100 require "$subclass/$filter.php";
101 $filter = new $class;
107 /** Returns the dependency array accumulated from the filter run */
108 public function getDeps() {return $this->deps
;}
110 /** Adds a file to the dependency list */
111 public function addDependency($filename) {
112 $this->deps
[$filename] = filemtime($filename);
116 * Accepts a page's text and turns it into its DOM representation.
117 * Text, initial validation and XIncludes will be processed before
118 * returning. DOM filters will *not* be processed.
119 * @param $text String text to be processed
122 public function parse($text, $page) {
123 $markup = $this->Markup
[$page->getSourceExt()];
124 if (!is_bool($markup)) {
125 // do markup pre-processing
126 $text = $markup->process($text, $page, $this);
127 // Conceivably, if something properly put things into
128 // DOM form before kicking it out, this wouldn't work;
129 // such a case is highly unlikely though.
132 // do pre-text processing
133 foreach ($this->preTextFilters
as $filter) {
134 $text = $filter->process($text, $page, $this);
138 $this->setupXMLCatalog();
139 $dom = $this->createDOM($text);
141 $this->analyzeInternalSubset($dom);
143 // validate the document to force the entities to be resolved,
144 // we don't actually care about the errors
145 set_error_handler(array($this, 'muteErrorHandler'));
147 restore_error_handler();
149 $this->analyzeXIncludes($dom);
156 * Accepts a page's text (usually XHTML) and processes it.
157 * @param $text String text to be processed
158 * @param $page XHTMLCompiler_Page representing currently processed page
160 public function process($text, $page) {
162 $dom = $this->parse($text, $page);
165 foreach ($this->DOMFilters
as $filter) {
166 $filter->setup($dom);
167 $filter->process($dom, $page, $this);
170 // translate back to text
171 $text = $dom->saveXML();
173 // remove all non-default namespace declarations, may change,
174 // but for now embedded XML namespaces are not cross-browser friendly
175 $text = preg_replace('/ xmlns:.+?=".+?"/', '', $text);
176 // scrub out custom DTD additions
177 $text = preg_replace('/(<!DOCTYPE[^>]*?) ?\[[^\]]+\]/', '\1', $text);
178 foreach ($this->postTextFilters
as $filter) {
179 $text = $filter->process($text, $page, $this);
181 // replace all CDATA sections
182 $text = preg_replace_callback(
183 '/<!\[CDATA\[(.+?)\]\]>/s',
184 array('XHTMLCompiler_FilterManager', 'cdataCallback'),
188 // replace any redundant xmlns sections, although they are
189 // valid they interfere with DTD validation
190 $text = preg_replace(
191 '#(<(?!html)[^>]+) xmlns="http://www.w3.org/1999/xhtml"#',
196 // okay, now finally do validation, and let the errors get
197 // spit out if there are some collect parse errors
198 set_error_handler(array($this, 'validationErrorHandler'));
199 $dom->loadXML($text);
200 $status = $dom->validate();
201 restore_error_handler();
202 if (!$status ||
!empty($this->errors
)) {
203 $this->buildErrors($dom);
204 $text = $dom->saveXML();
210 public static function cdataCallback($matches) {
211 return htmlspecialchars($matches[1], ENT_NOQUOTES
, 'UTF-8');
215 * Temporary error handler to use when validating a document
217 public function validationErrorHandler($n, $text) {
218 $this->errors
[] = $text;
222 * Handler that mutes all errors
224 public function muteErrorHandler($n, $t) {}
227 * Sets up an XML catalog to speed up entity resolution
229 public function setupXMLCatalog() {
230 $catalog = XHTMLCOMPILER
. '/catalog/catalog.xml';
231 if ($catalog[1] == ':') $catalog = substr($catalog, 2); // remove drive
232 putenv('XML_CATALOG_FILES=' . $catalog);
236 * Creates a reasonable well default configured DOM
237 * @param string $xml XML to load DOM with
239 public function createDOM($text = false) {
240 $dom = new DOMDocument();
241 $dom->preserveWhiteSpace
= false;
242 $dom->formatOutput
= true;
243 $dom->resolveExternals
= true;
245 // todo: somehow, collect information on which entity files
246 // are being added to the document, and add to xc-deps.
247 $dom->substituteEntities
= true; // allows for custom entities too!
249 if ($text !== false) $dom->loadXML($text);
255 * Analyzes the internal subset of a DOM, registering any file
256 * entity definitions as dependencies
258 public function analyzeInternalSubset($dom) {
259 if (empty($dom->doctype
) ||
!is_object($dom->doctype
)) return;
260 $internal_subset = $dom->doctype
->internalSubset
;
261 if ($internal_subset) {
262 // there are some entities that need to be registered to
263 // the dependency list. Match ones that declare SYSTEM
264 // '<!ENTITY' S '%' S Name S PEDef S? '>'
266 '/<!ENTITY\s+%\s+[^\s]+\s+(?:PUBLIC.+?)?SYSTEM\s+(?:"([^"]*)"|\'([^\']*)\')\s*>/s',
270 foreach ($matches[1] as $filename) {
271 // $filename will always be relative to web root, so
272 // no munging necessary
273 $this->addDependency($filename);
279 * Analyzes a documents XIncludes and registers necessary dependencies.
280 * Make sure you call this before calling $dom->xinclude
281 * @param DOMDocument $dom to process
282 * @todo Factor into a DOMFilter
283 * @todo Handle arbitrary nestings of includes
285 public function analyzeXIncludes($dom) {
286 $xpath = new DOMXPath($dom);
287 $xpath->registerNamespace('xi', $ns = 'http://www.w3.org/2001/XInclude');
288 $nodes = $xpath->query('//xi:include');
289 foreach ($nodes as $node) {
290 if (! $node instanceof DOMElement
) continue;
291 if (! $filename = $node->getAttribute('href')) continue;
292 $this->addDependency($filename);
293 $sub_dom = new DOMDocument();
294 $sub_dom->load($filename);
295 $this->analyzeInternalSubset($sub_dom);
300 * Adds validation errors to the output document as a message
302 public function buildErrors($dom) {
303 // This should output some error to command line utility
304 $body = $dom->getElementsByTagName('body')->item(0);
306 $dom->appendChild($html = $dom->createElement('html'));
307 $html->appendChild($body = $dom->createElement('body'));
309 $warning = $dom->createElement('div');
310 $warning->setAttribute('class', 'warning');
311 $warning->appendChild($dom->createElement('h2', 'Warning: Errors'));
312 $warning->appendChild($dom->createElement('p', 'This document has validation errors:'));
313 $list = $dom->createElement('ul');
314 foreach ($this->errors
as $error) {
315 // strip-tags removes HTML tags to make the plaintext output
316 // more friendly, IS NOT for security reasons
317 $list->appendChild($dom->createElement('li', strip_tags($error)));
319 $warning->appendChild($list);
320 $body->insertBefore($warning, $body->childNodes
->item(0));