Commit IPv6 fix, with majoring factoring out. Thank you Feyd!
[htmlpurifier.git] / library / HTMLPurifier / AttrDef / URI.php
blobca437f962d141907a7ae839f2906d3f27b093d3a
1 <?php
3 require_once 'HTMLPurifier/AttrDef.php';
4 require_once 'HTMLPurifier/URIScheme.php';
5 require_once 'HTMLPurifier/URISchemeRegistry.php';
6 require_once 'HTMLPurifier/AttrDef/Host.php';
8 HTMLPurifier_ConfigDef::define(
9 'URI', 'DefaultScheme', 'http',
10 'Defines through what scheme the output will be served, in order to '.
11 'select the proper object validator when no scheme information is present.'
14 class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
17 var $host;
19 function HTMLPurifier_AttrDef_URI() {
20 $this->host = new HTMLPurifier_AttrDef_Host();
23 function validate($uri, $config, &$context) {
25 // We'll write stack-based parsers later, for now, use regexps to
26 // get things working as fast as possible (irony)
28 // parse as CDATA
29 $uri = $this->parseCDATA($uri);
31 // while it would be nice to use parse_url(), that's specifically
32 // for HTTP and thus won't work for our generic URI parsing
34 // according to the RFC... (but this cuts corners, i.e. non-validating)
35 $r_URI = '!^'.
36 '(([^:/?#<>]+):)?'. // 2. Scheme
37 '(//([^/?#<>]*))?'. // 4. Authority
38 '([^?#<>]*)'. // 5. Path
39 '(\?([^#<>]*))?'. // 7. Query
40 '(#([^<>]*))?'. // 8. Fragment
41 '$!';
43 $matches = array();
44 $result = preg_match($r_URI, $uri, $matches);
46 if (!$result) return false; // invalid URI
48 // seperate out parts
49 $scheme = !empty($matches[1]) ? $matches[2] : null;
50 $authority = !empty($matches[3]) ? $matches[4] : null;
51 $path = $matches[5]; // always present, can be empty
52 $query = !empty($matches[6]) ? $matches[7] : null;
53 $fragment = !empty($matches[8]) ? $matches[9] : null;
57 $registry =& HTMLPurifier_URISchemeRegistry::instance();
58 if ($scheme !== null) {
59 // no need to validate the scheme's fmt since we do that when we
60 // retrieve the specific scheme object from the registry
61 $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
62 $scheme_obj =& $registry->getScheme($scheme, $config);
63 if (!$scheme_obj) return false; // invalid scheme, clean it out
64 } else {
65 $scheme_obj =& $registry->getScheme(
66 $config->get('URI', 'DefaultScheme'), $config
72 if ($authority !== null) {
74 $HEXDIG = '[A-Fa-f0-9]';
75 $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
76 $sub_delims = '!$&\'()'; // needs []
77 $pct_encoded = "%$HEXDIG$HEXDIG";
78 $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
79 $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
80 $matches = array();
81 preg_match($r_authority, $authority, $matches);
82 // overloads regexp!
83 $userinfo = !empty($matches[1]) ? $matches[2] : null;
84 $host = !empty($matches[3]) ? $matches[3] : null;
85 $port = !empty($matches[4]) ? $matches[5] : null;
87 // validate port
88 if ($port !== null) {
89 $port = (int) $port;
90 if ($port < 1 || $port > 65535) $port = null;
93 $host = $this->host->validate($host, $config, $context);
94 if ($host === false) $host = null;
96 // userinfo and host are validated within the regexp
98 } else {
99 $port = $host = $userinfo = null;
103 // query and fragment are quite simple in terms of definition:
104 // *( pchar / "/" / "?" ), so define their validation routines
105 // when we start fixing percent encoding
109 // path gets to be validated against a hodge-podge of rules depending
110 // on the status of authority and scheme, but it's not that important,
111 // esp. since it won't be applicable to everyone
115 // okay, now we defer execution to the subobject for more processing
116 // note that $fragment is omitted
117 list($userinfo, $host, $port, $path, $query) =
118 $scheme_obj->validateComponents(
119 $userinfo, $host, $port, $path, $query, $config
123 // reconstruct authority
124 $authority = null;
125 if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
126 $authority = '';
127 if($userinfo !== null) $authority .= $userinfo . '@';
128 $authority .= $host;
129 if($port !== null) $authority .= ':' . $port;
132 // reconstruct the result
133 $result = '';
134 if ($scheme !== null) $result .= "$scheme:";
135 if ($authority !== null) $result .= "//$authority";
136 $result .= $path;
137 if ($query !== null) $result .= "?$query";
138 if ($fragment !== null) $result .= "#$fragment";
140 return $result;