[1.2.0]
[htmlpurifier.git] / maintenance / generate-entity-file.php
blob283650cddacd21323204126d73ae96c070dacfa9
1 #!/usr/bin/php
2 <?php
4 /**
5 * Parses *.ent files into an entity lookup table, and then serializes and
6 * writes the whole kaboodle to a file. The resulting file should be versioned.
7 */
9 chdir( dirname(__FILE__) );
11 // here's where the entity files are located, assuming working directory
12 // is the same as the location of this PHP file. Needs trailing slash.
13 $entity_dir = '../docs/entities/';
15 // defines the output file for the serialized content.
16 $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
18 // courtesy of a PHP manual comment
19 function unichr($dec) {
20 if ($dec < 128) {
21 $utf = chr($dec);
22 } else if ($dec < 2048) {
23 $utf = chr(192 + (($dec - ($dec % 64)) / 64));
24 $utf .= chr(128 + ($dec % 64));
25 } else {
26 $utf = chr(224 + (($dec - ($dec % 4096)) / 4096));
27 $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
28 $utf .= chr(128 + ($dec % 64));
30 return $utf;
33 if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
34 if ( file_exists($output_file) ) exit("Fatal Error: entity-lookup.txt already exists.\n");
36 $dh = @opendir($entity_dir);
37 if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
39 $entity_files = array();
40 while (($file = readdir($dh)) !== false) {
41 if (@$file[0] === '.') continue;
42 if (substr(strrchr($file, "."), 1) !== 'ent') continue;
43 $entity_files[] = $file;
45 closedir($dh);
47 if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
49 $entity_table = array();
50 $regexp = '/<!ENTITY\s+([A-Za-z]+)\s+"&#(?:38;#)?([0-9]+);">/';
52 foreach ( $entity_files as $file ) {
53 $contents = file_get_contents($entity_dir . $file);
54 $matches = array();
55 preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
56 foreach ($matches as $match) {
57 $entity_table[$match[1]] = unichr($match[2]);
61 $output = serialize($entity_table);
63 $fh = fopen($output_file, 'w');
64 fwrite($fh, $output);
65 fclose($fh);
67 echo "Completed successfully.";