Have filters add self as dependencies, rm whitespace.
[xhtml-compiler.git] / XHTMLCompiler / DOMFilter / Acronymizer.php
blob79c2f44fcef33284512a9dd28d60fa7bf6084792
1 <?php
3 /**
4 * Based on a list of known acronyms, populates of the title attribute
5 * of acronym and abbr elements in documents.
6 * @note Title is a bit of legacy baggage.
7 */
8 class XHTMLCompiler_DOMFilter_Acronymizer extends XHTMLCompiler_DOMFilter
11 protected $name = 'Acronymizer';
13 /**
14 * Associative array of recognized abbreviations/initialisms
15 * @note To be used when each letter should be spelled out, such
16 * as HTML.
18 protected $abbreviations = array(
19 // markup languages and related technologies
20 'SGML' => 'Standard Generalized Markup Language',
21 'HTML' => 'HyperText Markup Language',
22 'XHTML' => 'Extensible HyperText Markup Language',
23 'XML' => 'Extensible Markup Language',
24 'RSS' => 'Really Simple Syndication',
25 'DTD' => 'Document Type Definition',
26 'CSS' => 'Cascading Style Sheets',
27 'HTTP' => 'HyperText Transfer Protocol',
28 // programming/apis
29 'PHP' => 'PHP: HyperText Preprocessor',
30 'CMS' => 'Content Management System',
31 'API' => 'Application Programming Interface',
32 'SVN' => 'Subversion',
33 'XSLT' => 'Extensible Stylesheet Language Transformations',
34 'SQL' => 'Structured Query Language',
35 // web-app security
36 'XSS' => 'Cross-Site Scripting',
37 // organizations/groups
38 'W3C' => 'World Wide Web Consortium',
39 'RFC' => 'Request for Comment',
40 'PECL' => 'PHP Extension Community Library',
41 // character encodings
42 'UTF-8' => '8-bit Unicode Transformation Format',
43 // other
44 'INI' => 'Initialization',
45 'CPU' => 'Central Processing Unit',
46 'LGPL' => 'Lesser GNU Public License',
47 'FTP' => 'File Transfer Protocol',
48 'URI' => 'Uniform Resource Identifier',
51 /**
52 * Array of recognized acronyms.
53 * @note Acronyms can be spoken literally, if in doubt, make it
54 * an abbreviation.
55 * @todo Make a public API for this, allow multiple acronym sets
56 * and different precedences for them.
58 protected $acronyms = array(
59 // programming
60 'SAX' => 'Simple API for XML',
61 'DOM' => 'Document Object Module',
62 'PEAR' => 'PHP Extension and Application Repository',
63 'ASCII' => 'American Standard Code for Information Interchange',
64 'SHA-1' => 'Secure Hash Algorithm',
65 // paradigms
66 'WYSIWYG' => 'What You See Is What You Get',
67 'WYSIWYM' => 'What You See Is What You Mean',
70 public function process(DOMDocument $dom, $page, $manager) {
71 $nodes = $this->query("//html:acronym[not(@title)]");
72 foreach ($nodes as $node) $this->addAdvisoryTitle($node, $this->acronyms);
74 $nodes = $this->query("//html:abbr[not(@title)]");
75 foreach ($nodes as $node) $this->addAdvisoryTitle($node, $this->abbreviations);
77 // add self as dependency; when acronym lists change, so does the page
78 $manager->addDependency(__FILE__);
81 protected function addAdvisoryTitle($node, $lookup) {
82 $key = $node->textContent;
83 if (!isset($lookup[$key])) {
84 // not fatal, but good to let the document author know
85 trigger_error(htmlspecialchars($key) .
86 ' is not a recognized acronym/abbreviation (missing title attribute)');
87 return;
89 $node->setAttribute('title', $lookup[$key]);