added referral source, made country a list, added contrastart
[openemr.git] / library / classes / Filtreatment_class.php
bloba7af6c5cba813af6ed46f56fa2e67b326ee861fd
1 <?php
2 /**
3 * Filtreatment Class
4 *
5 * This class can be use to sanitize user inputs and prevent
6 * most of known vulnerabilities
7 * @author Cristian Năvălici <lemonsoftware@gmail.com> http://www.lemonsoftware.eu
8 * @version 1.2
9 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
10 * @package Filtreatment
13 class Filtreatment {
15 /**
16 * class constructor
18 * do some settings at init
20 * @param
21 * @return void
23 function filtreatment () {
24 if ( get_magic_quotes_gpc() ) {
25 define ('MAGICQUOTES', true);
26 } else {
27 define ('MAGICQUOTES', false);
31 /**
32 * GENERAL FUNCTION FOR FILTERING
34 * it calls different functions to perform the operations required
35 * - the FLAG is important here
36 * possible values are:
37 * <br />INT - $param1 - min value $param2 - max value
38 * <br />FLOAT - $param1 - min value $param2 - max value
39 * <br />HTML - $param1 is an array with allowed tags
40 * <br />STRING - $param1 is a regex expression with allowed values ; $param2 - a flag for verifying OR clean up
41 * <br />EMAIL - no parameters
42 * <br />SQL - $param1 is database type (mysql/postgresql)
43 * <br />XSS - $param1 is character set
44 * - A valid value in some cases can be confounded with 'false' (invalid value) so this function return a predefined string for more safety in result interpretation
46 * @access public
47 * @uses _fInteger()
48 * @uses _fFloat()
49 * @uses _fHtml()
50 * @uses _fString()
51 * @uses _fEmail()
52 * @uses _fSql()
53 * @uses _fXss()
54 * @param mixed $input - variable to sanitize
55 * @param string $flag - flag to switching functions
56 * @param mixed $param1 - possible parameter
57 * @param mixed $param2 - possible parameter
58 * @return mixed|bool processed/cleaned up value if it's ok / false if it's not
60 function doTreatment($input, $flag, $param1 = '', $param2 = '') {
61 $input = trim($input);
63 switch ($flag) {
64 case 'INT':
65 $input_f = $this->_fInteger($input, $param1, $param2);
66 break;
68 case 'FLOAT':
69 $input_f = $this->_fFloat($input, $param1, $param2);
70 break;
72 case 'HTML':
73 $input_f = $this->_fHtml($input, $param1);
74 break;
76 case 'STRING':
77 $input_f = $this->_fString($input, $param1, $param2);
78 break;
80 case 'EMAIL':
81 $input_f = $this->_fEmail($input);
82 break;
84 case 'SQL':
85 $input_f = $this->_fSql($input, $param1='MYSQL');
86 break;
88 case 'XSS':
89 $input_f = $this->_fXss($input, $param1);
90 break;
92 default: $input_f = false;
95 // INVALID is a predefined conventionally string
96 return (is_bool($input_f)) ? 'INVALID' : $input_f;
101 * TREATMENT ONLY FOR INTEGERS
103 * make sure that $input is an integer and optionally, check its boundaries
104 * <br />min and max values must be both provided or neither.
106 * @access private
107 * @example example_01.php
108 * @param string|int $value - variable to sanitize
109 * @param int $val_min - minimum value (included in comparision)
110 * @param int $val_max - maximum value (included in comparision)
111 * @return mixed|bool input value if it's ok (it passed the conditions) FALSE otherwise
113 function _fInteger($value, $val_min = null, $val_max = null) {
114 if ( !ctype_alnum ) return false;
116 $val_int = intval($value);
117 if ( $val_min && $val_max ) {
118 $val_min = intval($val_min);
119 $val_max = intval($val_max);
120 return ( ($val_int <= $val_max) && ($val_int >= $val_min) ) ? $val_int : false;
121 } else {
122 return $val_int;
127 * TREATMENT ONLY FOR FLOAT
129 * make sure that $input is an integer and optionally, check its boundaries
130 * <br />min and max values must be both provided or neither.
132 * @access private
133 * @example example_02.php
134 * @param string|float $value - variable to sanitize
135 * @param int $val_min - minimum value (included in comparision)
136 * @param int $val_max - maximum value (included in comparision)
137 * @return mixed|bool input value if it's ok (it passed the conditions) FALSE otherwise
139 function _fFloat($value, $val_min = null, $val_max = null) {
140 if ( !is_numeric($value) ) return false;
142 $val_float = floatval ($value);
143 if ( $val_min && $val_max ) {
144 $val_min = floatval ($val_min);
145 $val_max = floatval ($val_max);
146 return ( ($val_float <= $val_max) && ($val_float >= $val_min) ) ? $val_float : false;
147 } else {
148 return $val_float;
154 * TREATMENT FOR HTML STRINGS
156 * clean up all Php/HTML tags, or less the allowed ones
158 * @access private
159 * @example example_03.php
160 * @param string $value - variable to sanitize
161 * @param string $allowed_tags - string with allowed tags, separated by comma
162 * @return string $val_str
164 function _fHtml($value, $allowed_tags) {
165 $val_str = strip_tags ($value,$allowed_tags);
166 return $val_str;
171 * TREATMENT FOR STRING WITH SPECIAL REGEXP EXPRESSIONS
173 * check a string for specified characters
175 * @access private
176 * @example example_04.php
177 * @param string $value - variable to sanitize
178 * @param string $regex - is in a special form detailed below:
179 * <br />it contains ONLY allowed characters, ANY other characters making invalid string
180 * <br />it must NOT contain begin/end delimitators /[... ]/
181 * <br />eg: 0-9, 0-9A-Za-z, AERS
182 * @param int $cv - 1 or 2
183 * @return string|bool return string if check succeed ($cv = 1) or string with replaced chars
184 * <br />OR false if check failed ($cv = 2)
186 function _fString ($value, $regex, $cv) {
187 $s = true; //var control
188 $regexfull = "/[^" . $regex . "]/";
190 // function of $cv might be a clean up operation, or just verifying
191 switch ($cv) {
192 // verify the string
193 case '1':
194 if ( preg_match($regexfull, $value) ) $s = false;
195 break;
197 // cleanup the string
198 case '2':
199 $value = preg_replace($regexfull,'',$value);
200 break;
202 // if $cv is not specified
203 default: if ( preg_match($regexfull, $value) ) $s = false;
206 if ($s) return $value; else return false;
211 * SPECIALIZED FUNCTION FOR EMAIL VERIFICATION
213 * validate an email address (implies RFC 2822)
215 * @access private
216 * @example example_05.php
217 * @param string $value - email to validate
218 * @return string|bool false if verification fails or email address itself if everything is ok
220 function _fEmail ($value) {
221 if (MAGICQUOTES) {
222 $value = stripslashes ($value);
225 // check for @ symbol and maximum allowed lengths
226 if (!ereg("^[^@]{1,64}@[^@]{1,255}$", $value)) { return false; }
228 // split for sections
229 $email_array = explode("@", $value);
230 $local_array = explode(".", $email_array[0]);
232 for ($i = 0; $i < sizeof($local_array); $i++) {
233 if ( !ereg("^(([A-Za-z0-9!#$%&'*+/=?^_`{|}~-][A-Za-z0-9!#$%&'*+/=?^_`{|}~\.-]{0,63})|(\"[^(\\|\")]{0,62}\"))$", $local_array[$i]) ) { return false; }
236 if (!ereg("^\[?[0-9\.]+\]?$", $email_array[1])) {
237 // verify if domain is IP. If not, it must be a valid domain name
238 $domain_array = explode(".", $email_array[1]);
239 if (sizeof($domain_array) < 2) { return false; }
241 for ($i = 0; $i < sizeof($domain_array); $i++) {
242 if (!ereg("^(([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]+))$", $domain_array[$i])) { return false; }
245 } // if
247 return $value;
251 * SPECIALIZED FUNCTION STRINGS TREATMENT FOR MYSQL/POSTGRESQL DATABASES INPUT
253 * it defends agains SQL injection tehniques
255 * @access private
256 * @example example_06.php
257 * @param string $value - email to validate
258 * @param string $db_type - allow two constants MYSQL | PGSQL
259 * @return string|bool $value sanitized value
261 function _fSql ($value, $db_type) {
262 if (MAGICQUOTES) {
263 $value = stripslashes($value);
266 // Quote if not a number or a numeric string
267 if (!is_numeric($value)) {
268 switch ($db_type) {
269 case 'MYSQL': $value = "'" . mysql_real_escape_string($value) . "'"; break;
270 case 'PGSQL': $value = "'" . pg_escape_string($value) . "'"; break;
274 return $value;
278 * SPECIALIZED FUNCTION STRINGS TREATMENT FOR MYSQL/POSTGRESQL DATABASES INPUT
280 * it defends agains SQL injection tehniques
282 * @access private
283 * @example example_07.php
284 * @since ver 1.1. 09 februarie 2007
285 * @uses _html_entity_decode();
286 * @param string $str - string to check
287 * @param string $charset - character set (default ISO-8859-1)
288 * @return string|bool $value sanitized value
290 function _fXss($str, $charset = 'ISO-8859-1') {
292 * Remove Null Characters
294 * This prevents sandwiching null characters
295 * between ascii characters, like Java\0script.
298 $str = preg_replace('/\0+/', '', $str);
299 $str = preg_replace('/(\\\\0)+/', '', $str);
302 * Validate standard character entities
304 * Add a semicolon if missing. We do this to enable
305 * the conversion of entities to ASCII later.
308 $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str);
311 * Validate UTF16 two byte encoding (x00)
313 * Just as above, adds a semicolon if missing.
316 $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str);
319 * URL Decode
321 * Just in case stuff like this is submitted:
323 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
325 * Note: Normally urldecode() would be easier but it removes plus signs
328 $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str);
329 $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str);
332 * Convert character entities to ASCII
334 * This permits our tests below to work reliably.
335 * We only convert entities that are within tags since
336 * these are the ones that will pose security problems.
339 if (preg_match_all("/<(.+?)>/si", $str, $matches)) {
340 for ($i = 0; $i < count($matches['0']); $i++) {
341 $str = str_replace($matches['1'][$i],
342 $this->_html_entity_decode($matches['1'][$i], $charset), $str);
347 * Convert all tabs to spaces
349 * This prevents strings like this: ja vascript
350 * Note: we deal with spaces between characters later.
353 $str = preg_replace("#\t+#", " ", $str);
356 * Makes PHP tags safe
358 * Note: XML tags are inadvertently replaced too:
360 * <?xml
362 * But it doesn't seem to pose a problem.
365 $str = str_replace(array('<?php', '<?PHP', '<?', '?>'), array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
368 * Compact any exploded words
370 * This corrects words like: j a v a s c r i p t
371 * These words are compacted back to their correct state.
374 $words = array('javascript', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
375 foreach ($words as $word) {
376 $temp = '';
377 for ($i = 0; $i < strlen($word); $i++) {
378 $temp .= substr($word, $i, 1)."\s*";
381 $temp = substr($temp, 0, -3);
382 $str = preg_replace('#'.$temp.'#s', $word, $str);
383 $str = preg_replace('#'.ucfirst($temp).'#s', ucfirst($word), $str);
387 * Remove disallowed Javascript in links or img tags
389 $str = preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $str);
390 $str = preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si","", $str);
391 $str = preg_replace("#<(script|xss).*?\>#si", "", $str);
394 * Remove JavaScript Event Handlers
396 * Note: This code is a little blunt. It removes
397 * the event handler and anything up to the closing >,
398 * but it's unlikely to be a problem.
401 $str = preg_replace('#(<[^>]+.*?)(onblur|onchange|onclick|onfocus|onload|onmouseover|onmouseup|onmousedown|onselect|onsubmit|onunload|onkeypress|onkeydown|onkeyup|onresize)[^>]*>#iU',"\\1>",$str);
404 * Sanitize naughty HTML elements
406 * If a tag containing any of the words in the list
407 * below is found, the tag gets converted to entities.
409 * So this: <blink>
410 * Becomes: &lt;blink&gt;
413 $str = preg_replace('#<(/*\s*)(alert|applet|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|layer|link|meta|object|plaintext|style|script|textarea|title|xml|xss)([^>]*)>#is', "&lt;\\1\\2\\3&gt;", $str);
416 * Sanitize naughty scripting elements
418 * Similar to above, only instead of looking for
419 * tags it looks for PHP and JavaScript commands
420 * that are disallowed. Rather than removing the
421 * code, it simply converts the parenthesis to entities
422 * rendering the code un-executable.
424 * For example: eval('some code')
425 * Becomes: eval&#40;'some code'&#41;
428 $str = preg_replace('#(alert|cmd|passthru|eval|exec|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
431 * Final clean up
433 * This adds a bit of extra precaution in case
434 * something got through the above filters
438 $bad = array(
439 'document.cookie' => '',
440 'document.write' => '',
441 'window.location' => '',
442 "javascript\s*:" => '',
443 "Redirect\s+302" => '',
444 '<!--' => '&lt;!--',
445 '-->' => '--&gt;'
448 foreach ($bad as $key => $val) {
449 $str = preg_replace("#".$key."#i", $val, $str);
452 return $str;
457 * HTML ENTITIES DECODE
458 * merit goes to CodeIgniter - www.codeigniter.com
460 * This function is a replacement for html_entity_decode()
462 * In some versions of PHP the native function does not work
463 * when UTF-8 is the specified character set, so this gives us
464 * a work-around. More info here: http://bugs.php.net/bug.php?id=25670
466 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
467 * character set, and the PHP developers said they were not back porting the
468 * fix to versions other than PHP 5.x.
469 * @access private
470 * @since ver 1.1. 09 februarie 2007
471 * @param string $str - string to check
472 * @param string $charset - character set (default ISO-8859-1)
473 * @return string|bool $value sanitized value
475 function _html_entity_decode($str, $charset='ISO-8859-1') {
476 if (stristr($str, '&') === FALSE) return $str;
478 // The reason we are not using html_entity_decode() by itself is because
479 // while it is not technically correct to leave out the semicolon
480 // at the end of an entity most browsers will still interpret the entity
481 // correctly. html_entity_decode() does not convert entities without
482 // semicolons, so we are left with our own little solution here. Bummer.
484 if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>='))) {
485 $str = html_entity_decode($str, ENT_COMPAT, $charset);
486 $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
487 return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
490 // Numeric Entities
491 $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
492 $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
494 // Literal Entities - Slightly slow so we do another check
495 if (stristr($str, '&') === FALSE) {
496 $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
499 return $str;
503 } // class