4 chdir(dirname(__FILE__
));
5 require_once 'common.php';
10 * Parses *.ent files into an entity lookup table, and then serializes and
11 * writes the whole kaboodle to a file. The resulting file should be versioned.
14 // here's where the entity files are located, assuming working directory
15 // is the same as the location of this PHP file. Needs trailing slash.
16 $entity_dir = '../docs/entities/';
18 // defines the output file for the serialized content.
19 $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
21 // courtesy of a PHP manual comment
22 function unichr($dec) {
25 } else if ($dec < 2048) {
26 $utf = chr(192 +
(($dec - ($dec %
64)) / 64));
27 $utf .= chr(128 +
($dec %
64));
29 $utf = chr(224 +
(($dec - ($dec %
4096)) / 4096));
30 $utf .= chr(128 +
((($dec %
4096) - ($dec %
64)) / 64));
31 $utf .= chr(128 +
($dec %
64));
36 if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
37 if ( file_exists($output_file) ) exit("Fatal Error: entity-lookup.txt already exists.\n");
39 $dh = @opendir
($entity_dir);
40 if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
42 $entity_files = array();
43 while (($file = readdir($dh)) !== false) {
44 if (@$file[0] === '.') continue;
45 if (substr(strrchr($file, "."), 1) !== 'ent') continue;
46 $entity_files[] = $file;
50 if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
52 $entity_table = array();
53 $regexp = '/<!ENTITY\s+([A-Za-z]+)\s+"&#(?:38;#)?([0-9]+);">/';
55 foreach ( $entity_files as $file ) {
56 $contents = file_get_contents($entity_dir . $file);
58 preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER
);
59 foreach ($matches as $match) {
60 $entity_table[$match[1]] = unichr($match[2]);
64 $output = serialize($entity_table);
66 $fh = fopen($output_file, 'w');
70 echo "Completed successfully.";