[2.1.5] [MFH] Percent encode query and hash, and lazy update with attr validator
[htmlpurifier/rdancer.git] / library / HTMLPurifier / URI.php
blobbe42ccb8690d955cf81629cb550fddbfd71cfc61
1 <?php
3 require_once 'HTMLPurifier/URIParser.php';
4 require_once 'HTMLPurifier/URIFilter.php';
6 /**
7 * HTML Purifier's internal representation of a URI.
8 * @note
9 * Internal data-structures are completely escaped. If the data needs
10 * to be used in a non-URI context (which is very unlikely), be sure
11 * to decode it first. The URI may not necessarily be well-formed until
12 * validate() is called.
14 class HTMLPurifier_URI
17 var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
19 /**
20 * @note Automatically normalizes scheme and port
22 function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
23 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
24 $this->userinfo = $userinfo;
25 $this->host = $host;
26 $this->port = is_null($port) ? $port : (int) $port;
27 $this->path = $path;
28 $this->query = $query;
29 $this->fragment = $fragment;
32 /**
33 * Retrieves a scheme object corresponding to the URI's scheme/default
34 * @param $config Instance of HTMLPurifier_Config
35 * @param $context Instance of HTMLPurifier_Context
36 * @return Scheme object appropriate for validating this URI
38 function getSchemeObj($config, &$context) {
39 $registry =& HTMLPurifier_URISchemeRegistry::instance();
40 if ($this->scheme !== null) {
41 $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
42 if (!$scheme_obj) return false; // invalid scheme, clean it out
43 } else {
44 // no scheme: retrieve the default one
45 $def = $config->getDefinition('URI');
46 $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
47 if (!$scheme_obj) {
48 // something funky happened to the default scheme object
49 trigger_error(
50 'Default scheme object "' . $def->defaultScheme . '" was not readable',
51 E_USER_WARNING
53 return false;
56 return $scheme_obj;
59 /**
60 * Generic validation method applicable for all schemes. May modify
61 * this URI in order to get it into a compliant form.
62 * @param $config Instance of HTMLPurifier_Config
63 * @param $context Instance of HTMLPurifier_Context
64 * @return True if validation/filtering succeeds, false if failure
66 function validate($config, &$context) {
68 // ABNF definitions from RFC 3986
69 $chars_sub_delims = '!$&\'()*+,;=';
70 $chars_gen_delims = ':/?#[]@';
71 $chars_pchar = $chars_sub_delims . ':@';
73 // validate scheme (MUST BE FIRST!)
74 if (!is_null($this->scheme) && is_null($this->host)) {
75 $def = $config->getDefinition('URI');
76 if ($def->defaultScheme === $this->scheme) {
77 $this->scheme = null;
81 // validate host
82 if (!is_null($this->host)) {
83 $host_def = new HTMLPurifier_AttrDef_URI_Host();
84 $this->host = $host_def->validate($this->host, $config, $context);
85 if ($this->host === false) $this->host = null;
88 // validate username
89 if (!is_null($this->userinfo)) {
90 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
91 $this->userinfo = $encoder->encode($this->userinfo);
94 // validate port
95 if (!is_null($this->port)) {
96 if ($this->port < 1 || $this->port > 65535) $this->port = null;
99 // validate path
100 $path_parts = array();
101 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
102 if (!is_null($this->host)) {
103 // path-abempty (hier and relative)
104 $this->path = $segments_encoder->encode($this->path);
105 } elseif ($this->path !== '' && $this->path[0] === '/') {
106 // path-absolute (hier and relative)
107 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
108 // This shouldn't ever happen!
109 $this->path = '';
110 } else {
111 $this->path = $segments_encoder->encode($this->path);
113 } elseif (!is_null($this->scheme) && $this->path !== '') {
114 // path-rootless (hier)
115 // Short circuit evaluation means we don't need to check nz
116 $this->path = $segments_encoder->encode($this->path);
117 } elseif (is_null($this->scheme) && $this->path !== '') {
118 // path-noscheme (relative)
119 // (once again, not checking nz)
120 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
121 $c = strpos($this->path, '/');
122 if ($c !== false) {
123 $this->path =
124 $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
125 $segments_encoder->encode(substr($this->path, $c));
126 } else {
127 $this->path = $segment_nc_encoder->encode($this->path);
129 } else {
130 // path-empty (hier and relative)
131 $this->path = ''; // just to be safe
134 // qf = query and fragment
135 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
137 if (!is_null($this->query)) {
138 $this->query = $qf_encoder->encode($this->query);
141 if (!is_null($this->fragment)) {
142 $this->fragment = $qf_encoder->encode($this->fragment);
145 return true;
150 * Convert URI back to string
151 * @return String URI appropriate for output
153 function toString() {
154 // reconstruct authority
155 $authority = null;
156 if (!is_null($this->host)) {
157 $authority = '';
158 if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
159 $authority .= $this->host;
160 if(!is_null($this->port)) $authority .= ':' . $this->port;
163 // reconstruct the result
164 $result = '';
165 if (!is_null($this->scheme)) $result .= $this->scheme . ':';
166 if (!is_null($authority)) $result .= '//' . $authority;
167 $result .= $this->path;
168 if (!is_null($this->query)) $result .= '?' . $this->query;
169 if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
171 return $result;
175 * Returns a copy of the URI object
177 function copy() {
178 return unserialize(serialize($this));