2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2013 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Derick Rethans <derick@php.net> |
16 | Pierre-A. Joye <pierre@php.net> |
17 +----------------------------------------------------------------------+
22 #include "php_filter.h"
23 #include "filter_private.h"
24 #include "ext/standard/url.h"
25 #include "ext/pcre/php_pcre.h"
27 #include "zend_multiply.h"
30 # include <arpa/inet.h>
34 # define INADDR_NONE ((unsigned long int) -1)
38 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
39 #define FETCH_LONG_OPTION(var_name, option_name) \
43 if (zend_hash_find(HASH_OF(option_array), option_name, sizeof(option_name), (void **) &option_val) == SUCCESS) { \
44 PHP_FILTER_GET_LONG_OPT(option_val, var_name); \
50 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
51 #define FETCH_STRING_OPTION(var_name, option_name) \
56 if (zend_hash_find(HASH_OF(option_array), option_name, sizeof(option_name), (void **) &option_val) == SUCCESS) { \
57 if (Z_TYPE_PP(option_val) == IS_STRING) { \
58 var_name = Z_STRVAL_PP(option_val); \
59 var_name##_len = Z_STRLEN_PP(option_val); \
69 static int php_filter_parse_int(const char *str
, unsigned int str_len
, long *ret TSRMLS_DC
) { /* {{{ */
71 int sign
= 0, digit
= 0;
72 const char *end
= str
+ str_len
;
83 if (*str
== '0' && str
+ 1 == end
) {
84 /* Special cases: +0 and -0 */
88 /* must start with 1..9*/
89 if (str
< end
&& *str
>= '1' && *str
<= '9') {
90 ctx_value
= ((sign
)?-1:1) * ((*(str
++)) - '0');
95 if ((end
- str
> MAX_LENGTH_OF_LONG
- 1) /* number too long */
96 || (SIZEOF_LONG
== 4 && (end
- str
== MAX_LENGTH_OF_LONG
- 1) && *str
> '2')) {
102 if (*str
>= '0' && *str
<= '9') {
103 digit
= (*(str
++) - '0');
104 if ( (!sign
) && ctx_value
<= (LONG_MAX
-digit
)/10 ) {
105 ctx_value
= (ctx_value
* 10) + digit
;
106 } else if ( sign
&& ctx_value
>= (LONG_MIN
+digit
)/10) {
107 ctx_value
= (ctx_value
* 10) - digit
;
121 static int php_filter_parse_octal(const char *str
, unsigned int str_len
, long *ret TSRMLS_DC
) { /* {{{ */
122 unsigned long ctx_value
= 0;
123 const char *end
= str
+ str_len
;
126 if (*str
>= '0' && *str
<= '7') {
127 unsigned long n
= ((*(str
++)) - '0');
129 if ((ctx_value
> ((unsigned long)(~(long)0)) / 8) ||
130 ((ctx_value
= ctx_value
* 8) > ((unsigned long)(~(long)0)) - n
)) {
139 *ret
= (long)ctx_value
;
144 static int php_filter_parse_hex(const char *str
, unsigned int str_len
, long *ret TSRMLS_DC
) { /* {{{ */
145 unsigned long ctx_value
= 0;
146 const char *end
= str
+ str_len
;
150 if (*str
>= '0' && *str
<= '9') {
151 n
= ((*(str
++)) - '0');
152 } else if (*str
>= 'a' && *str
<= 'f') {
153 n
= ((*(str
++)) - ('a' - 10));
154 } else if (*str
>= 'A' && *str
<= 'F') {
155 n
= ((*(str
++)) - ('A' - 10));
159 if ((ctx_value
> ((unsigned long)(~(long)0)) / 16) ||
160 ((ctx_value
= ctx_value
* 16) > ((unsigned long)(~(long)0)) - n
)) {
166 *ret
= (long)ctx_value
;
171 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
174 long min_range
, max_range
, option_flags
;
175 int min_range_set
, max_range_set
;
176 int allow_octal
= 0, allow_hex
= 0;
182 FETCH_LONG_OPTION(min_range
, "min_range");
183 FETCH_LONG_OPTION(max_range
, "max_range");
184 option_flags
= flags
;
186 len
= Z_STRLEN_P(value
);
189 RETURN_VALIDATION_FAILED
192 if (option_flags
& FILTER_FLAG_ALLOW_OCTAL
) {
196 if (option_flags
& FILTER_FLAG_ALLOW_HEX
) {
200 /* Start the validating loop */
201 p
= Z_STRVAL_P(value
);
204 PHP_FILTER_TRIM_DEFAULT(p
, len
);
208 if (allow_hex
&& (*p
== 'x' || *p
== 'X')) {
210 if (php_filter_parse_hex(p
, len
, &ctx_value TSRMLS_CC
) < 0) {
213 } else if (allow_octal
) {
214 if (php_filter_parse_octal(p
, len
, &ctx_value TSRMLS_CC
) < 0) {
217 } else if (len
!= 0) {
221 if (php_filter_parse_int(p
, len
, &ctx_value TSRMLS_CC
) < 0) {
226 if (error
> 0 || (min_range_set
&& (ctx_value
< min_range
)) || (max_range_set
&& (ctx_value
> max_range
))) {
227 RETURN_VALIDATION_FAILED
230 Z_TYPE_P(value
) = IS_LONG
;
231 Z_LVAL_P(value
) = ctx_value
;
237 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
239 char *str
= Z_STRVAL_P(value
);
240 int len
= Z_STRLEN_P(value
);
243 PHP_FILTER_TRIM_DEFAULT_EX(str
, len
, 0);
245 /* returns true for "1", "true", "on" and "yes"
246 * returns false for "0", "false", "off", "no", and ""
255 } else if (*str
== '0') {
262 if (strncasecmp(str
, "on", 2) == 0) {
264 } else if (strncasecmp(str
, "no", 2) == 0) {
271 if (strncasecmp(str
, "yes", 3) == 0) {
273 } else if (strncasecmp(str
, "off", 3) == 0) {
280 if (strncasecmp(str
, "true", 4) == 0) {
287 if (strncasecmp(str
, "false", 5) == 0) {
298 RETURN_VALIDATION_FAILED
301 ZVAL_BOOL(value
, ret
);
306 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
314 int decimal_set
, decimal_len
;
316 char tsd_sep
[3] = "',.";
323 len
= Z_STRLEN_P(value
);
324 str
= Z_STRVAL_P(value
);
326 PHP_FILTER_TRIM_DEFAULT(str
, len
);
329 FETCH_STRING_OPTION(decimal
, "decimal");
332 if (decimal_len
!= 1) {
333 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "decimal separator must be one char");
334 RETURN_VALIDATION_FAILED
340 num
= p
= emalloc(len
+1);
341 if (str
< end
&& (*str
== '+' || *str
== '-')) {
347 while (str
< end
&& *str
>= '0' && *str
<= '9') {
351 if (str
== end
|| *str
== dec_sep
|| *str
== 'e' || *str
== 'E') {
352 if (!first
&& n
!= 3) {
355 if (*str
== dec_sep
) {
358 while (str
< end
&& *str
>= '0' && *str
<= '9') {
362 if (*str
== 'e' || *str
== 'E') {
364 if (str
< end
&& (*str
== '+' || *str
== '-')) {
367 while (str
< end
&& *str
>= '0' && *str
<= '9') {
373 if ((flags
& FILTER_FLAG_ALLOW_THOUSAND
) && (*str
== tsd_sep
[0] || *str
== tsd_sep
[1] || *str
== tsd_sep
[2])) {
374 if (first
?(n
< 1 || n
> 3):(n
!= 3)) {
388 switch (is_numeric_string(num
, p
- num
, &lval
, &dval
, 0)) {
391 Z_TYPE_P(value
) = IS_DOUBLE
;
392 Z_DVAL_P(value
) = lval
;
395 if ((!dval
&& p
- num
> 1 && strpbrk(num
, "123456789")) || !zend_finite(dval
)) {
399 Z_TYPE_P(value
) = IS_DOUBLE
;
400 Z_DVAL_P(value
) = dval
;
405 RETURN_VALIDATION_FAILED
411 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
417 int regexp_set
, option_flags_set
;
420 pcre_extra
*pcre_extra
= NULL
;
421 int preg_options
= 0;
427 FETCH_STRING_OPTION(regexp
, "regexp");
428 FETCH_LONG_OPTION(option_flags
, "flags");
431 php_error_docref(NULL TSRMLS_CC
, E_WARNING
, "'regexp' option missing");
432 RETURN_VALIDATION_FAILED
435 re
= pcre_get_compiled_regex(regexp
, &pcre_extra
, &preg_options TSRMLS_CC
);
437 RETURN_VALIDATION_FAILED
439 matches
= pcre_exec(re
, NULL
, Z_STRVAL_P(value
), Z_STRLEN_P(value
), 0, 0, ovector
, 3);
441 /* 0 means that the vector is too small to hold all the captured substring offsets */
443 RETURN_VALIDATION_FAILED
448 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
451 int old_len
= Z_STRLEN_P(value
);
453 php_filter_url(value
, flags
, option_array
, charset TSRMLS_CC
);
455 if (Z_TYPE_P(value
) != IS_STRING
|| old_len
!= Z_STRLEN_P(value
)) {
456 RETURN_VALIDATION_FAILED
459 /* Use parse_url - if it returns false, we return NULL */
460 url
= php_url_parse_ex(Z_STRVAL_P(value
), Z_STRLEN_P(value
));
463 RETURN_VALIDATION_FAILED
466 if (url
->scheme
!= NULL
&& (!strcasecmp(url
->scheme
, "http") || !strcasecmp(url
->scheme
, "https"))) {
469 if (url
->host
== NULL
) {
473 e
= url
->host
+ strlen(url
->host
);
476 /* First char of hostname must be alphanumeric */
477 if(!isalnum((int)*(unsigned char *)s
)) {
482 if (!isalnum((int)*(unsigned char *)s
) && *s
!= '-' && *s
!= '.') {
488 if (*(e
- 1) == '.') {
494 url
->scheme
== NULL
||
495 /* some schemas allow the host to be empty */
496 (url
->host
== NULL
&& (strcmp(url
->scheme
, "mailto") && strcmp(url
->scheme
, "news") && strcmp(url
->scheme
, "file"))) ||
497 ((flags
& FILTER_FLAG_PATH_REQUIRED
) && url
->path
== NULL
) || ((flags
& FILTER_FLAG_QUERY_REQUIRED
) && url
->query
== NULL
)
501 RETURN_VALIDATION_FAILED
507 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
510 * The regex below is based on a regex by Michael Rushton.
511 * However, it is not identical. I changed it to only consider routeable
512 * addresses as valid. Michael's regex considers a@b a valid address
513 * which conflicts with section 2.3.5 of RFC 5321 which states that:
515 * Only resolvable, fully-qualified domain names (FQDNs) are permitted
516 * when domain names are used in SMTP. In other words, names that can
517 * be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
518 * in Section 5) are permitted, as are CNAME RRs whose targets can be
519 * resolved, in turn, to MX or address RRs. Local nicknames or
520 * unqualified names MUST NOT be used.
522 * This regex does not handle comments and folding whitespace. While
523 * this is technically valid in an email address, these parts aren't
524 * actually part of the address itself.
526 * Michael's regex carries this copyright:
528 * Copyright © Michael Rushton 2009-10
529 * http://squiloople.com/
530 * Feel free to use and redistribute this code. But please keep this copyright notice.
533 const char regexp
[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
536 pcre_extra
*pcre_extra
= NULL
;
537 int preg_options
= 0;
538 int ovector
[150]; /* Needs to be a multiple of 3 */
542 /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
543 if (Z_STRLEN_P(value
) > 320) {
544 RETURN_VALIDATION_FAILED
547 re
= pcre_get_compiled_regex((char *)regexp
, &pcre_extra
, &preg_options TSRMLS_CC
);
549 RETURN_VALIDATION_FAILED
551 matches
= pcre_exec(re
, NULL
, Z_STRVAL_P(value
), Z_STRLEN_P(value
), 0, 0, ovector
, 3);
553 /* 0 means that the vector is too small to hold all the captured substring offsets */
555 RETURN_VALIDATION_FAILED
561 static int _php_filter_validate_ipv4(char *str
, int str_len
, int *ip
) /* {{{ */
563 const char *end
= str
+ str_len
;
569 if (*str
< '0' || *str
> '9') {
572 leading_zero
= (*str
== '0');
574 num
= ((*(str
++)) - '0');
575 while (str
< end
&& (*str
>= '0' && *str
<= '9')) {
576 num
= num
* 10 + ((*(str
++)) - '0');
577 if (num
> 255 || ++m
> 3) {
581 /* don't allow a leading 0; that introduces octal numbers,
582 * which we don't support */
583 if (leading_zero
&& (num
!= 0 || m
> 1))
588 } else if (str
>= end
|| *(str
++) != '.') {
596 static int _php_filter_validate_ipv6(char *str
, int str_len TSRMLS_DC
) /* {{{ */
606 if (!memchr(str
, ':', str_len
)) {
610 /* check for bundled IPv4 */
611 ipv4
= memchr(str
, '.', str_len
);
613 while (ipv4
> str
&& *(ipv4
-1) != ':') {
617 if (!_php_filter_validate_ipv4(ipv4
, (str_len
- (ipv4
- str
)), ip4elm
)) {
621 str_len
= ipv4
- str
; /* length excluding ipv4 */
626 if (ipv4
[-2] != ':') {
627 /* don't include : before ipv4 unless it's a :: */
639 /* cannot end in : without previous : */
646 blocks
++; /* :: means 1 or more 16-bit 0 blocks */
650 return (blocks
<= 8);
652 } else if ((str
- 1) == s
) {
653 /* dont allow leading : without another : following */
658 while ((str
< end
) &&
659 ((*str
>= '0' && *str
<= '9') ||
660 (*str
>= 'a' && *str
<= 'f') ||
661 (*str
>= 'A' && *str
<= 'F'))) {
665 if (n
< 1 || n
> 4) {
671 return ((compressed
&& blocks
<= 8) || blocks
== 8);
675 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL
) /* {{{ */
677 /* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
678 * flag to throw out reserved ranges; multicast ranges... etc. If both
679 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
680 * colon determine the format */
685 if (memchr(Z_STRVAL_P(value
), ':', Z_STRLEN_P(value
))) {
687 } else if (memchr(Z_STRVAL_P(value
), '.', Z_STRLEN_P(value
))) {
690 RETURN_VALIDATION_FAILED
693 if ((flags
& FILTER_FLAG_IPV4
) && (flags
& FILTER_FLAG_IPV6
)) {
694 /* Both formats are cool */
695 } else if ((flags
& FILTER_FLAG_IPV4
) && mode
== FORMAT_IPV6
) {
696 RETURN_VALIDATION_FAILED
697 } else if ((flags
& FILTER_FLAG_IPV6
) && mode
== FORMAT_IPV4
) {
698 RETURN_VALIDATION_FAILED
703 if (!_php_filter_validate_ipv4(Z_STRVAL_P(value
), Z_STRLEN_P(value
), ip
)) {
704 RETURN_VALIDATION_FAILED
708 if (flags
& FILTER_FLAG_NO_PRIV_RANGE
) {
711 (ip
[0] == 172 && (ip
[1] >= 16 && ip
[1] <= 31)) ||
712 (ip
[0] == 192 && ip
[1] == 168)
714 RETURN_VALIDATION_FAILED
718 if (flags
& FILTER_FLAG_NO_RES_RANGE
) {
721 (ip
[0] == 128 && ip
[1] == 0) ||
722 (ip
[0] == 191 && ip
[1] == 255) ||
723 (ip
[0] == 169 && ip
[1] == 254) ||
724 (ip
[0] == 192 && ip
[1] == 0 && ip
[2] == 2) ||
725 (ip
[0] == 127 && ip
[1] == 0 && ip
[2] == 0 && ip
[3] == 1) ||
726 (ip
[0] >= 224 && ip
[0] <= 255)
728 RETURN_VALIDATION_FAILED
736 res
= _php_filter_validate_ipv6(Z_STRVAL_P(value
), Z_STRLEN_P(value
) TSRMLS_CC
);
738 RETURN_VALIDATION_FAILED
741 if (flags
& FILTER_FLAG_NO_PRIV_RANGE
) {
742 if (Z_STRLEN_P(value
) >=2 && (!strncasecmp("FC", Z_STRVAL_P(value
), 2) || !strncasecmp("FD", Z_STRVAL_P(value
), 2))) {
743 RETURN_VALIDATION_FAILED
746 if (flags
& FILTER_FLAG_NO_RES_RANGE
) {
747 switch (Z_STRLEN_P(value
)) {
751 if (!strcmp("::", Z_STRVAL_P(value
))) {
752 RETURN_VALIDATION_FAILED
756 if (!strcmp("::1", Z_STRVAL_P(value
)) || !strcmp("5f:", Z_STRVAL_P(value
))) {
757 RETURN_VALIDATION_FAILED
761 if (Z_STRLEN_P(value
) >= 5) {
763 !strncasecmp("fe8", Z_STRVAL_P(value
), 3) ||
764 !strncasecmp("fe9", Z_STRVAL_P(value
), 3) ||
765 !strncasecmp("fea", Z_STRVAL_P(value
), 3) ||
766 !strncasecmp("feb", Z_STRVAL_P(value
), 3)
768 RETURN_VALIDATION_FAILED
772 (Z_STRLEN_P(value
) >= 9 && !strncasecmp("2001:0db8", Z_STRVAL_P(value
), 9)) ||
773 (Z_STRLEN_P(value
) >= 2 && !strncasecmp("5f", Z_STRVAL_P(value
), 2)) ||
774 (Z_STRLEN_P(value
) >= 4 && !strncasecmp("3ff3", Z_STRVAL_P(value
), 4)) ||
775 (Z_STRLEN_P(value
) >= 8 && !strncasecmp("2001:001", Z_STRVAL_P(value
), 8))
777 RETURN_VALIDATION_FAILED
792 * vim600: noet sw=4 ts=4 fdm=marker
793 * vim<600: noet sw=4 ts=4