Deal with old libxml incompatibilities.
[htmlpurifier.git] / maintenance / config-scanner.php
blobc614d1fbc2c3610759fd70ccf7e23d347225a85f
1 #!/usr/bin/php
2 <?php
4 chdir(dirname(__FILE__));
5 require_once 'common.php';
6 require_once '../library/HTMLPurifier.auto.php';
7 assertCli();
9 if (version_compare(PHP_VERSION, '5.2.2', '<')) {
10 echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers.";
11 exit(1);
14 /**
15 * @file
16 * Scans HTML Purifier source code for $config tokens and records the
17 * directive being used; configdoc can use this info later.
19 * Currently, this just dumps all the info onto the console. Eventually, it
20 * will create an XML file that our XSLT transform can use.
23 $FS = new FSTools();
24 chdir(dirname(__FILE__) . '/../library/');
25 $raw_files = $FS->globr('.', '*.php');
26 $files = array();
27 foreach ($raw_files as $file) {
28 $file = substr($file, 2); // rm leading './'
29 if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files
30 if (substr_count($file, '.') > 1) continue; // rm meta files
31 $files[] = $file;
34 /**
35 * Moves the $i cursor to the next non-whitespace token
37 function consumeWhitespace($tokens, &$i)
39 do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE);
42 /**
43 * Tests whether or not a token is a particular type. There are three run-cases:
44 * - ($token, $expect_token): tests if the token is $expect_token type;
45 * - ($token, $expect_value): tests if the token is the string $expect_value;
46 * - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and
47 * its string representation is $expect_value
49 function testToken($token, $value_or_token, $value = null)
51 if (is_null($value)) {
52 if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token;
53 else return $token === $value_or_token;
54 } else {
55 return is_array($token) && $token[0] === $value_or_token && $token[1] === $value;
59 $counter = 0;
60 $full_counter = 0;
61 $tracker = array();
63 foreach ($files as $file) {
64 $tokens = token_get_all(file_get_contents($file));
65 $file = str_replace('\\', '/', $file);
66 for ($i = 0, $c = count($tokens); $i < $c; $i++) {
67 $ok = false;
68 // Match $config
69 if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true;
70 // Match $this->config
71 while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) {
72 consumeWhitespace($tokens, $i);
73 if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break;
74 consumeWhitespace($tokens, $i);
75 if (testToken($tokens[$i], T_STRING, 'config')) $ok = true;
76 break;
78 if (!$ok) continue;
80 $ok = false;
81 for($i++; $i < $c; $i++) {
82 if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') {
83 break;
85 if (is_string($tokens[$i])) continue;
86 if ($tokens[$i][0] === T_OBJECT_OPERATOR) {
87 $ok = true;
88 break;
91 if (!$ok) continue;
93 $line = $tokens[$i][2];
95 consumeWhitespace($tokens, $i);
96 if (!testToken($tokens[$i], T_STRING, 'get')) continue;
98 consumeWhitespace($tokens, $i);
99 if (!testToken($tokens[$i], '(')) continue;
101 $full_counter++;
103 $matched = false;
104 do {
106 // What we currently don't match are batch retrievals, and
107 // wildcard retrievals. This data might be useful in the future,
108 // which is why we have a do {} while loop that doesn't actually
109 // do anything.
111 consumeWhitespace($tokens, $i);
112 if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue;
113 $id = substr($tokens[$i][1], 1, -1);
115 $counter++;
116 $matched = true;
118 if (!isset($tracker[$id])) $tracker[$id] = array();
119 if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array();
120 $tracker[$id][$file][] = $line;
122 } while (0);
124 //echo "$file:$line uses $namespace.$directive\n";
128 echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n";
130 echo "Generating XML... ";
132 $xw = new XMLWriter();
133 $xw->openURI('../configdoc/usage.xml');
134 $xw->setIndent(true);
135 $xw->startDocument('1.0', 'UTF-8');
136 $xw->startElement('usage');
137 foreach ($tracker as $id => $files) {
138 $xw->startElement('directive');
139 $xw->writeAttribute('id', $id);
140 foreach ($files as $file => $lines) {
141 $xw->startElement('file');
142 $xw->writeAttribute('name', $file);
143 foreach ($lines as $line) {
144 $xw->writeElement('line', $line);
146 $xw->endElement();
148 $xw->endElement();
150 $xw->endElement();
151 $xw->flush();
153 echo "done!\n";
155 // vim: et sw=4 sts=4