avoid exif_imagetype exception with small files/corrupt data URI
[htmlpurifier.git] / library / HTMLPurifier / URIScheme / data.php
blob41c49d5533f173142bb170ce9d09f372838bb105
1 <?php
3 /**
4 * Implements data: URI for base64 encoded images supported by GD.
5 */
6 class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
8 /**
9 * @type bool
11 public $browsable = true;
13 /**
14 * @type array
16 public $allowed_types = array(
17 // you better write validation code for other types if you
18 // decide to allow them
19 'image/jpeg' => true,
20 'image/gif' => true,
21 'image/png' => true,
23 // this is actually irrelevant since we only write out the path
24 // component
25 /**
26 * @type bool
28 public $may_omit_host = true;
30 /**
31 * @param HTMLPurifier_URI $uri
32 * @param HTMLPurifier_Config $config
33 * @param HTMLPurifier_Context $context
34 * @return bool
36 public function doValidate(&$uri, $config, $context)
38 $result = explode(',', $uri->path, 2);
39 $is_base64 = false;
40 $charset = null;
41 $content_type = null;
42 if (count($result) == 2) {
43 list($metadata, $data) = $result;
44 // do some legwork on the metadata
45 $metas = explode(';', $metadata);
46 while (!empty($metas)) {
47 $cur = array_shift($metas);
48 if ($cur == 'base64') {
49 $is_base64 = true;
50 break;
52 if (substr($cur, 0, 8) == 'charset=') {
53 // doesn't match if there are arbitrary spaces, but
54 // whatever dude
55 if ($charset !== null) {
56 continue;
57 } // garbage
58 $charset = substr($cur, 8); // not used
59 } else {
60 if ($content_type !== null) {
61 continue;
62 } // garbage
63 $content_type = $cur;
66 } else {
67 $data = $result[0];
69 if ($content_type !== null && empty($this->allowed_types[$content_type])) {
70 return false;
72 if ($charset !== null) {
73 // error; we don't allow plaintext stuff
74 $charset = null;
76 $data = rawurldecode($data);
77 if ($is_base64) {
78 $raw_data = base64_decode($data);
79 } else {
80 $raw_data = $data;
82 if ( strlen($raw_data) < 12 ) {
83 // error; exif_imagetype throws exception with small files,
84 // and this likely indicates a corrupt URI/failed parse anyway
85 return false;
87 // XXX probably want to refactor this into a general mechanism
88 // for filtering arbitrary content types
89 if (function_exists('sys_get_temp_dir')) {
90 $file = tempnam(sys_get_temp_dir(), "");
91 } else {
92 $file = tempnam("/tmp", "");
94 file_put_contents($file, $raw_data);
95 if (function_exists('exif_imagetype')) {
96 $image_code = exif_imagetype($file);
97 unlink($file);
98 } elseif (function_exists('getimagesize')) {
99 set_error_handler(array($this, 'muteErrorHandler'));
100 $info = getimagesize($file);
101 restore_error_handler();
102 unlink($file);
103 if ($info == false) {
104 return false;
106 $image_code = $info[2];
107 } else {
108 trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
110 $real_content_type = image_type_to_mime_type($image_code);
111 if ($real_content_type != $content_type) {
112 // we're nice guys; if the content type is something else we
113 // support, change it over
114 if (empty($this->allowed_types[$real_content_type])) {
115 return false;
117 $content_type = $real_content_type;
119 // ok, it's kosher, rewrite what we need
120 $uri->userinfo = null;
121 $uri->host = null;
122 $uri->port = null;
123 $uri->fragment = null;
124 $uri->query = null;
125 $uri->path = "$content_type;base64," . base64_encode($raw_data);
126 return true;
130 * @param int $errno
131 * @param string $errstr
133 public function muteErrorHandler($errno, $errstr)