3 // This file is part of Moodle - http://moodle.org/
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
19 * About validateUrlSyntax():
20 * This function will verify if a http URL is formatted properly, returning
21 * either with true or false.
23 * I used rfc #2396 URI: Generic Syntax as my guide when creating the
24 * regular expression. For all the details see the comments below.
27 * validateUrlSyntax( url_to_check[, options])
29 * url_to_check - string - The url to check
31 * options - string - A optional string of options to set which parts of
32 * the url are required, optional, or not allowed. Each option
33 * must be followed by a "+" for required, "?" for optional, or
34 * "-" for not allowed.
36 * s - Scheme. Allows "+?-", defaults to "s?"
37 * H - http:// Allows "+?-", defaults to "H?"
38 * S - https:// (SSL). Allows "+?-", defaults to "S?"
39 * E - mailto: (email). Allows "+?-", defaults to "E-"
40 * F - ftp:// Allows "+?-", defaults to "F-"
41 * Dependant on scheme being enabled
42 * u - User section. Allows "+?-", defaults to "u?"
43 * P - Password in user section. Allows "+?-", defaults to "P?"
44 * Dependant on user section being enabled
45 * a - Address (ip or domain). Allows "+?-", defaults to "a+"
46 * I - Ip address. Allows "+?-", defaults to "I?"
47 * If I+, then domains are disabled
48 * If I-, then domains are required
49 * Dependant on address being enabled
50 * p - Port number. Allows "+?-", defaults to "p?"
51 * f - File path. Allows "+?-", defaults to "f?"
52 * q - Query section. Allows "+?-", defaults to "q?"
53 * r - Fragment (anchor). Allows "+?-", defaults to "r?"
55 * Paste the funtion code, or include_once() this template at the top of the page
56 * you wish to use this function.
61 * validateUrlSyntax('http://george@www.cnn.com/#top')
63 * validateUrlSyntax('https://games.yahoo.com:8080/board/chess.htm?move=true')
65 * validateUrlSyntax('http://www.hotmail.com/', 's+u-I-p-q-r-')
67 * validateUrlSyntax('/directory/file.php#top', 's-u-a-p-f+')
70 * if (validateUrlSyntax('http://www.canowhoopass.com/', 'u-'))
72 * echo 'URL SYNTAX IS VERIFIED';
74 * echo 'URL SYNTAX IS ILLEGAL';
83 * June 15th 2017 by Moodle.
84 * -Added rtmp:// option.
87 * -Added new TLD's - .jobs, .mobi, .post and .travel. They are official, but not yet active.
90 * -Fixed bug allowing empty username even when it was required
91 * -Changed and added a few options to add extra schemes
92 * -Added mailto: ftp:// and http:// options
93 * -https option was 'l' now it is 'S' (capital)
94 * -Added password option. Now passwords can be disabled while usernames are ok (for email)
95 * -IP Address option was 'i' now it is 'I' (capital)
96 * -Options are now case sensitive
97 * -Added validateEmailSyntax() and validateFtpSyntax() functions below<br>
100 * -IP group range is more specific. Used to allow 0-299. Now it is 0-255
101 * -Port range more specific. Used to allow 0-69999. Now it is 0-65535<br>
102 * -Fixed bug disallowing 'i-' option.<br>
103 * -Changed license to GPL
106 * -Fixed bug disallowing 'l-' option. Thanks Dr. Cheap
109 * -Added options parameter to make it easier for people to plug the function in
110 * without needed to rework the code.
111 * -Split the example application away from the function
116 * -Easier to disable sections
117 * -Easier to port to other languages
118 * -Easier to port to verify email addresses
119 * -Uses only simple regular expressions so it is more portable
120 * -Follows RFC closer for domain names. Some "play" domains may break
121 * -Renamed from 'verifyUrl()' to 'validateUrlSyntax()'
122 * -Removed extra code which added 'http://' and trailing '/' if it was missing
123 * -That code was better suited for a massaging function, not verifying
125 * -Now splits up and forces '/path?query#fragment' order
126 * -No longer requires a path when using a query or fragment
129 * -Allowed port numbers above 9999. Now allows up to 69999
132 * -Added new top level domains
133 * -aero, coop, museum, name, info, biz, pro
139 * Intentional Limitations:
140 * -Does not verify url actually exists. Only validates the syntax
141 * -Strictly follows the RFC standards. Some urls exist in the wild which will
142 * not validate. Including ones with square brackets in the query section '[]'
150 * Rod Apeldoorn - rod(at)canowhoopass(dot)com
154 * http://www.canowhoopass.com/
158 * -WEAV -Several members of Weav helped to test - http://weav.bc.ca/
159 * -There were also a number of emails from other developers expressing
160 * thanks and suggestions. It is nice to be appreciated. Thanks!
162 * Alternate Commercial Licenses:
163 * For information in regards to alternate licensing, contact me.
165 * @package moodlecore
166 * @copyright Copyright 2004, Rod Apeldoorn {@link http://www.canowhoopass.com/}
167 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
171 * BEGINNING OF validateUrlSyntax() function
173 function validateUrlSyntax( $urladdr, $options="" ){
175 // Force Options parameter to be lower case
176 // DISABLED PERMAMENTLY - OK to remove from code
177 // $options = strtolower($options);
179 // Check Options Parameter
180 if (!preg_match( '/^([sHSEFRuPaIpfqr][+?-])*$/', $options ))
182 trigger_error("Options attribute malformed", E_USER_ERROR
);
185 // Set Options Array, set defaults if options are not specified
187 if (strpos( $options, 's') === false) $aOptions['s'] = '?';
188 else $aOptions['s'] = substr( $options, strpos( $options, 's') +
1, 1);
190 if (strpos( $options, 'H') === false) $aOptions['H'] = '?';
191 else $aOptions['H'] = substr( $options, strpos( $options, 'H') +
1, 1);
193 if (strpos( $options, 'S') === false) $aOptions['S'] = '?';
194 else $aOptions['S'] = substr( $options, strpos( $options, 'S') +
1, 1);
196 if (strpos( $options, 'E') === false) $aOptions['E'] = '-';
197 else $aOptions['E'] = substr( $options, strpos( $options, 'E') +
1, 1);
199 if (strpos( $options, 'F') === false) $aOptions['F'] = '-';
200 else $aOptions['F'] = substr( $options, strpos( $options, 'F') +
1, 1);
202 if (strpos( $options, 'R') === false) $aOptions['R'] = '-';
203 else $aOptions['R'] = substr( $options, strpos( $options, 'R') +
1, 1);
205 if (strpos( $options, 'u') === false) $aOptions['u'] = '?';
206 else $aOptions['u'] = substr( $options, strpos( $options, 'u') +
1, 1);
207 // Password in user section
208 if (strpos( $options, 'P') === false) $aOptions['P'] = '?';
209 else $aOptions['P'] = substr( $options, strpos( $options, 'P') +
1, 1);
211 if (strpos( $options, 'a') === false) $aOptions['a'] = '+';
212 else $aOptions['a'] = substr( $options, strpos( $options, 'a') +
1, 1);
213 // IP Address in address section
214 if (strpos( $options, 'I') === false) $aOptions['I'] = '?';
215 else $aOptions['I'] = substr( $options, strpos( $options, 'I') +
1, 1);
217 if (strpos( $options, 'p') === false) $aOptions['p'] = '?';
218 else $aOptions['p'] = substr( $options, strpos( $options, 'p') +
1, 1);
220 if (strpos( $options, 'f') === false) $aOptions['f'] = '?';
221 else $aOptions['f'] = substr( $options, strpos( $options, 'f') +
1, 1);
223 if (strpos( $options, 'q') === false) $aOptions['q'] = '?';
224 else $aOptions['q'] = substr( $options, strpos( $options, 'q') +
1, 1);
226 if (strpos( $options, 'r') === false) $aOptions['r'] = '?';
227 else $aOptions['r'] = substr( $options, strpos( $options, 'r') +
1, 1);
230 // Loop through options array, to search for and replace "-" to "{0}" and "+" to ""
231 foreach($aOptions as $key => $value)
235 $aOptions[$key] = '{0}';
239 $aOptions[$key] = '';
243 // DEBUGGING - Unescape following line to display to screen current option values
244 // echo '<pre>'; print_r($aOptions); echo '</pre>';
247 // Preset Allowed Characters
248 $alphanum = '[a-zA-Z0-9]'; // Alpha Numeric
249 $unreserved = '[a-zA-Z0-9_.!~*' . '\'' . '()-]';
250 $escaped = '(%[0-9a-fA-F]{2})'; // Escape sequence - In Hex - %6d would be a 'm'
251 $reserved = '[;/?:@&=+$,]'; // Special characters in the URI
253 // Beginning Regular Expression
254 // Scheme - Allows for 'http://', 'https://', 'mailto:', 'ftp://' or 'rtmp://'
256 if ($aOptions['H'] === '') { $scheme .= 'http://'; }
257 elseif ($aOptions['S'] === '') { $scheme .= 'https://'; }
258 elseif ($aOptions['E'] === '') { $scheme .= 'mailto:'; }
259 elseif ($aOptions['F'] === '') { $scheme .= 'ftp://'; }
260 elseif ($aOptions['R'] === '') { $scheme .= 'rtmp://'; }
263 if ($aOptions['H'] === '?') { $scheme .= '|(http://)'; }
264 if ($aOptions['S'] === '?') { $scheme .= '|(https://)'; }
265 if ($aOptions['E'] === '?') { $scheme .= '|(mailto:)'; }
266 if ($aOptions['F'] === '?') { $scheme .= '|(ftp://)'; }
267 if ($aOptions['R'] === '?') { $scheme .= '|(rtmp://)'; }
268 $scheme = str_replace('(|', '(', $scheme); // fix first pipe
270 $scheme .= ')' . $aOptions['s'];
271 // End setting scheme
273 // User Info - Allows for 'username@' or 'username:password@'. Note: contrary to rfc, I removed ':' from username section, allowing it only in password.
274 // /---------------- Username -----------------------\ /-------------------------------- Password ------------------------------\
275 $userinfo = '((' . $unreserved . '|' . $escaped . '|[;&=+$,]' . ')+(:(' . $unreserved . '|' . $escaped . '|[;:&=+$,]' . ')+)' . $aOptions['P'] . '@)' . $aOptions['u'];
277 // IP ADDRESS - Allows 0.0.0.0 to 255.255.255.255
278 $ipaddress = '((((2(([0-4][0-9])|(5[0-5])))|([01]?[0-9]?[0-9]))\.){3}((2(([0-4][0-9])|(5[0-5])))|([01]?[0-9]?[0-9])))';
280 // Tertiary Domain(s) - Optional - Multi - Although some sites may use other characters, the RFC says tertiary domains have the same naming restrictions as second level domains
281 $domain_tertiary = '(' . $alphanum . '(([a-zA-Z0-9-]{0,62})' . $alphanum . ')?\.)*';
283 /* MDL-9295 - take out domain_secondary here and below, so that URLs like http://localhost/ and lan addresses like http://host/ are accepted.
284 // Second Level Domain - Required - First and last characters must be Alpha-numeric. Hyphens are allowed inside.
285 $domain_secondary = '(' . $alphanum . '(([a-zA-Z0-9-]{0,62})' . $alphanum . ')?\.)';
288 // we want more relaxed URLs in Moodle: MDL-11462
289 // Top Level Domain - First character must be Alpha. Last character must be AlphaNumeric. Hyphens are allowed inside.
290 $domain_toplevel = '([a-zA-Z](([a-zA-Z0-9-]*)[a-zA-Z0-9])?)';
291 /* // Top Level Domain - Required - Domain List Current As Of December 2004. Use above escaped line to be forgiving of possible future TLD's
292 $domain_toplevel = '(aero|biz|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|post|pro|travel|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ax|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)';
295 // Address can be IP address or Domain
296 if ($aOptions['I'] === '{0}') { // IP Address Not Allowed
297 $address = '(' . $domain_tertiary . /* MDL-9295 $domain_secondary . */ $domain_toplevel . ')';
298 } elseif ($aOptions['I'] === '') { // IP Address Required
299 $address = '(' . $ipaddress . ')';
300 } else { // IP Address Optional
301 $address = '((' . $ipaddress . ')|(' . $domain_tertiary . /* MDL-9295 $domain_secondary . */ $domain_toplevel . '))';
303 $address = $address . $aOptions['a'];
305 // Port Number - :80 or :8080 or :65534 Allows range of :0 to :65535
306 // (0-59999) |(60000-64999) |(65000-65499) |(65500-65529) |(65530-65535)
307 $port_number = '(:(([0-5]?[0-9]{1,4})|(6[0-4][0-9]{3})|(65[0-4][0-9]{2})|(655[0-2][0-9])|(6553[0-5])))' . $aOptions['p'];
309 // Path - Can be as simple as '/' or have multiple folders and filenames
310 $path = '(/((;)?(' . $unreserved . '|' . $escaped . '|' . '[:@&=+$,]' . ')+(/)?)*)' . $aOptions['f'];
312 // Query Section - Accepts ?var1=value1&var2=value2 or ?2393,1221 and much more
313 $querystring = '(\?(' . $reserved . '|' . $unreserved . '|' . $escaped . ')*)' . $aOptions['q'];
315 // Fragment Section - Accepts anchors such as #top
316 $fragment = '(\#(' . $reserved . '|' . $unreserved . '|' . $escaped . ')*)' . $aOptions['r'];
319 // Building Regular Expression
320 $regexp = '#^' . $scheme . $userinfo . $address . $port_number . $path . $querystring . $fragment . '$#i';
322 // DEBUGGING - Uncomment Line Below To Display The Regular Expression Built
323 // echo '<pre>' . htmlentities(wordwrap($regexp,70,"\n",1)) . '</pre>';
325 // Running the regular expression
326 if (preg_match( $regexp, $urladdr ))
328 return true; // The domain passed
332 return false; // The domain didn't pass the expression
335 } // END Function validateUrlSyntax()
340 * About ValidateEmailSyntax():
341 * This function uses the ValidateUrlSyntax() function to easily check the
342 * syntax of an email address. It accepts the same options as ValidateURLSyntax
343 * but defaults them for email addresses.
348 * validateEmailSyntax( url_to_check[, options])
350 * url_to_check - string - The url to check
352 * options - string - A optional string of options to set which parts of
353 * the url are required, optional, or not allowed. Each option
354 * must be followed by a "+" for required, "?" for optional, or
355 * "-" for not allowed. See ValidateUrlSyntax() docs for option list.
357 * The default options are changed to:
358 * s-H-S-E+F-u+P-a+I-p-f-q-r-
360 * This only allows an address of "name@domain".
364 * validateEmailSyntax('george@fakemail.com')
365 * validateEmailSyntax('mailto:george@fakemail.com', 's+')
366 * validateEmailSyntax('george@fakemail.com?subject=Hi%20George', 'q?')
367 * validateEmailSyntax('george@212.198.33.12', 'I?')
372 * Rod Apeldoorn - rod(at)canowhoopass(dot)com
376 * http://www.canowhoopass.com/
380 * Copyright 2004 - Rod Apeldoorn
382 * Released under same license as validateUrlSyntax(). For details, contact me.
385 function validateEmailSyntax( $emailaddr, $options="" ){
387 // Check Options Parameter
388 if (!preg_match( '/^([sHSEFuPaIpfqr][+?-])*$/', $options ))
390 trigger_error("Options attribute malformed", E_USER_ERROR
);
393 // Set Options Array, set defaults if options are not specified
395 if (strpos( $options, 's') === false) $aOptions['s'] = '-';
396 else $aOptions['s'] = substr( $options, strpos( $options, 's') +
1, 1);
398 if (strpos( $options, 'H') === false) $aOptions['H'] = '-';
399 else $aOptions['H'] = substr( $options, strpos( $options, 'H') +
1, 1);
401 if (strpos( $options, 'S') === false) $aOptions['S'] = '-';
402 else $aOptions['S'] = substr( $options, strpos( $options, 'S') +
1, 1);
404 if (strpos( $options, 'E') === false) $aOptions['E'] = '?';
405 else $aOptions['E'] = substr( $options, strpos( $options, 'E') +
1, 1);
407 if (strpos( $options, 'F') === false) $aOptions['F'] = '-';
408 else $aOptions['F'] = substr( $options, strpos( $options, 'F') +
1, 1);
410 if (strpos( $options, 'u') === false) $aOptions['u'] = '+';
411 else $aOptions['u'] = substr( $options, strpos( $options, 'u') +
1, 1);
412 // Password in user section
413 if (strpos( $options, 'P') === false) $aOptions['P'] = '-';
414 else $aOptions['P'] = substr( $options, strpos( $options, 'P') +
1, 1);
416 if (strpos( $options, 'a') === false) $aOptions['a'] = '+';
417 else $aOptions['a'] = substr( $options, strpos( $options, 'a') +
1, 1);
418 // IP Address in address section
419 if (strpos( $options, 'I') === false) $aOptions['I'] = '-';
420 else $aOptions['I'] = substr( $options, strpos( $options, 'I') +
1, 1);
422 if (strpos( $options, 'p') === false) $aOptions['p'] = '-';
423 else $aOptions['p'] = substr( $options, strpos( $options, 'p') +
1, 1);
425 if (strpos( $options, 'f') === false) $aOptions['f'] = '-';
426 else $aOptions['f'] = substr( $options, strpos( $options, 'f') +
1, 1);
428 if (strpos( $options, 'q') === false) $aOptions['q'] = '-';
429 else $aOptions['q'] = substr( $options, strpos( $options, 'q') +
1, 1);
431 if (strpos( $options, 'r') === false) $aOptions['r'] = '-';
432 else $aOptions['r'] = substr( $options, strpos( $options, 'r') +
1, 1);
436 foreach($aOptions as $key => $value)
438 $newoptions .= $key . $value;
441 // DEBUGGING - Uncomment line below to display generated options
442 // echo '<pre>' . $newoptions . '</pre>';
444 // Send to validateUrlSyntax() and return result
445 return validateUrlSyntax( $emailaddr, $newoptions);
447 } // END Function validateEmailSyntax()
452 * About ValidateFtpSyntax():
453 * This function uses the ValidateUrlSyntax() function to easily check the
454 * syntax of an FTP address. It accepts the same options as ValidateURLSyntax
455 * but defaults them for FTP addresses.
460 * validateFtpSyntax( url_to_check[, options])
462 * url_to_check - string - The url to check
464 * options - string - A optional string of options to set which parts of
465 * the url are required, optional, or not allowed. Each option
466 * must be followed by a "+" for required, "?" for optional, or
467 * "-" for not allowed. See ValidateUrlSyntax() docs for option list.
469 * The default options are changed to:
470 * s?H-S-E-F+u?P?a+I?p?f?q-r-
474 * validateFtpSyntax('ftp://netscape.com')
475 * validateFtpSyntax('moz:iesucks@netscape.com')
476 * validateFtpSyntax('ftp://netscape.com:2121/browsers/ns7/', 'u-')
480 * Rod Apeldoorn - rod(at)canowhoopass(dot)com
484 * http://www.canowhoopass.com/
488 * Copyright 2004 - Rod Apeldoorn
490 * Released under same license as validateUrlSyntax(). For details, contact me.
493 function validateFtpSyntax( $ftpaddr, $options="" ){
495 // Check Options Parameter
496 if (!preg_match( '/^([sHSEFuPaIpfqr][+?-])*$/', $options ))
498 trigger_error("Options attribute malformed", E_USER_ERROR
);
501 // Set Options Array, set defaults if options are not specified
503 if (strpos( $options, 's') === false) $aOptions['s'] = '?';
504 else $aOptions['s'] = substr( $options, strpos( $options, 's') +
1, 1);
506 if (strpos( $options, 'H') === false) $aOptions['H'] = '-';
507 else $aOptions['H'] = substr( $options, strpos( $options, 'H') +
1, 1);
509 if (strpos( $options, 'S') === false) $aOptions['S'] = '-';
510 else $aOptions['S'] = substr( $options, strpos( $options, 'S') +
1, 1);
512 if (strpos( $options, 'E') === false) $aOptions['E'] = '-';
513 else $aOptions['E'] = substr( $options, strpos( $options, 'E') +
1, 1);
515 if (strpos( $options, 'F') === false) $aOptions['F'] = '+';
516 else $aOptions['F'] = substr( $options, strpos( $options, 'F') +
1, 1);
518 if (strpos( $options, 'u') === false) $aOptions['u'] = '?';
519 else $aOptions['u'] = substr( $options, strpos( $options, 'u') +
1, 1);
520 // Password in user section
521 if (strpos( $options, 'P') === false) $aOptions['P'] = '?';
522 else $aOptions['P'] = substr( $options, strpos( $options, 'P') +
1, 1);
524 if (strpos( $options, 'a') === false) $aOptions['a'] = '+';
525 else $aOptions['a'] = substr( $options, strpos( $options, 'a') +
1, 1);
526 // IP Address in address section
527 if (strpos( $options, 'I') === false) $aOptions['I'] = '?';
528 else $aOptions['I'] = substr( $options, strpos( $options, 'I') +
1, 1);
530 if (strpos( $options, 'p') === false) $aOptions['p'] = '?';
531 else $aOptions['p'] = substr( $options, strpos( $options, 'p') +
1, 1);
533 if (strpos( $options, 'f') === false) $aOptions['f'] = '?';
534 else $aOptions['f'] = substr( $options, strpos( $options, 'f') +
1, 1);
536 if (strpos( $options, 'q') === false) $aOptions['q'] = '-';
537 else $aOptions['q'] = substr( $options, strpos( $options, 'q') +
1, 1);
539 if (strpos( $options, 'r') === false) $aOptions['r'] = '-';
540 else $aOptions['r'] = substr( $options, strpos( $options, 'r') +
1, 1);
544 foreach($aOptions as $key => $value)
546 $newoptions .= $key . $value;
549 // DEBUGGING - Uncomment line below to display generated options
550 // echo '<pre>' . $newoptions . '</pre>';
552 // Send to validateUrlSyntax() and return result
553 return validateUrlSyntax( $ftpaddr, $newoptions);
555 } // END Function validateFtpSyntax()