Merge branch 'MDL-39444_23' of git://github.com/timhunt/moodle into MOODLE_23_STABLE
[moodle.git] / lib / simpletestlib / url.php
blob11d70e745ee4bf941d606e2edf6dbaa05d769c9b
1 <?php
2 /**
3 * base include file for SimpleTest
4 * @package SimpleTest
5 * @subpackage WebTester
6 * @version $Id: url.php 2011 2011-04-29 08:22:48Z pp11 $
7 */
9 /**#@+
10 * include other SimpleTest class files
12 require_once(dirname(__FILE__) . '/encoding.php');
13 /**#@-*/
15 /**
16 * URL parser to replace parse_url() PHP function which
17 * got broken in PHP 4.3.0. Adds some browser specific
18 * functionality such as expandomatics.
19 * Guesses a bit trying to separate the host from
20 * the path and tries to keep a raw, possibly unparsable,
21 * request string as long as possible.
22 * @package SimpleTest
23 * @subpackage WebTester
25 class SimpleUrl {
26 private $scheme;
27 private $username;
28 private $password;
29 private $host;
30 private $port;
31 public $path;
32 private $request;
33 private $fragment;
34 private $x;
35 private $y;
36 private $target;
37 private $raw = false;
39 /**
40 * Constructor. Parses URL into sections.
41 * @param string $url Incoming URL.
42 * @access public
44 function __construct($url = '') {
45 list($x, $y) = $this->chompCoordinates($url);
46 $this->setCoordinates($x, $y);
47 $this->scheme = $this->chompScheme($url);
48 if ($this->scheme === 'file') {
49 // Unescaped backslashes not used in directory separator context
50 // will get caught by this, but they should have been urlencoded
51 // anyway so we don't care. If this ends up being a problem, the
52 // host regexp must be modified to match for backslashes when
53 // the scheme is file.
54 $url = str_replace('\\', '/', $url);
56 list($this->username, $this->password) = $this->chompLogin($url);
57 $this->host = $this->chompHost($url);
58 $this->port = false;
59 if (preg_match('/(.*?):(.*)/', $this->host, $host_parts)) {
60 if ($this->scheme === 'file' && strlen($this->host) === 2) {
61 // DOS drive was placed in authority; promote it to path.
62 $url = '/' . $this->host . $url;
63 $this->host = false;
64 } else {
65 $this->host = $host_parts[1];
66 $this->port = (integer)$host_parts[2];
69 $this->path = $this->chompPath($url);
70 $this->request = $this->parseRequest($this->chompRequest($url));
71 $this->fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
72 $this->target = false;
75 /**
76 * Extracts the X, Y coordinate pair from an image map.
77 * @param string $url URL so far. The coordinates will be
78 * removed.
79 * @return array X, Y as a pair of integers.
80 * @access private
82 protected function chompCoordinates(&$url) {
83 if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
84 $url = $matches[1];
85 return array((integer)$matches[2], (integer)$matches[3]);
87 return array(false, false);
90 /**
91 * Extracts the scheme part of an incoming URL.
92 * @param string $url URL so far. The scheme will be
93 * removed.
94 * @return string Scheme part or false.
95 * @access private
97 protected function chompScheme(&$url) {
98 if (preg_match('#^([^/:]*):(//)(.*)#', $url, $matches)) {
99 $url = $matches[2] . $matches[3];
100 return $matches[1];
102 return false;
106 * Extracts the username and password from the
107 * incoming URL. The // prefix will be reattached
108 * to the URL after the doublet is extracted.
109 * @param string $url URL so far. The username and
110 * password are removed.
111 * @return array Two item list of username and
112 * password. Will urldecode() them.
113 * @access private
115 protected function chompLogin(&$url) {
116 $prefix = '';
117 if (preg_match('#^(//)(.*)#', $url, $matches)) {
118 $prefix = $matches[1];
119 $url = $matches[2];
121 if (preg_match('#^([^/]*)@(.*)#', $url, $matches)) {
122 $url = $prefix . $matches[2];
123 $parts = explode(":", $matches[1]);
124 return array(
125 urldecode($parts[0]),
126 isset($parts[1]) ? urldecode($parts[1]) : false);
128 $url = $prefix . $url;
129 return array(false, false);
133 * Extracts the host part of an incoming URL.
134 * Includes the port number part. Will extract
135 * the host if it starts with // or it has
136 * a top level domain or it has at least two
137 * dots.
138 * @param string $url URL so far. The host will be
139 * removed.
140 * @return string Host part guess or false.
141 * @access private
143 protected function chompHost(&$url) {
144 if (preg_match('!^(//)(.*?)(/.*|\?.*|#.*|$)!', $url, $matches)) {
145 $url = $matches[3];
146 return $matches[2];
148 if (preg_match('!(.*?)(\.\./|\./|/|\?|#|$)(.*)!', $url, $matches)) {
149 $tlds = SimpleUrl::getAllTopLevelDomains();
150 if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
151 $url = $matches[2] . $matches[3];
152 return $matches[1];
153 } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
154 $url = $matches[2] . $matches[3];
155 return $matches[1];
158 return false;
162 * Extracts the path information from the incoming
163 * URL. Strips this path from the URL.
164 * @param string $url URL so far. The host will be
165 * removed.
166 * @return string Path part or '/'.
167 * @access private
169 protected function chompPath(&$url) {
170 if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
171 $url = $matches[2] . $matches[3];
172 return ($matches[1] ? $matches[1] : '');
174 return '';
178 * Strips off the request data.
179 * @param string $url URL so far. The request will be
180 * removed.
181 * @return string Raw request part.
182 * @access private
184 protected function chompRequest(&$url) {
185 if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
186 $url = $matches[2] . $matches[3];
187 return $matches[1];
189 return '';
193 * Breaks the request down into an object.
194 * @param string $raw Raw request.
195 * @return SimpleFormEncoding Parsed data.
196 * @access private
198 protected function parseRequest($raw) {
199 $this->raw = $raw;
200 $request = new SimpleGetEncoding();
201 foreach (explode("&", $raw) as $pair) {
202 if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
203 $request->add(urldecode($matches[1]), urldecode($matches[2]));
204 } elseif ($pair) {
205 $request->add(urldecode($pair), '');
208 return $request;
212 * Accessor for protocol part.
213 * @param string $default Value to use if not present.
214 * @return string Scheme name, e.g "http".
215 * @access public
217 function getScheme($default = false) {
218 return $this->scheme ? $this->scheme : $default;
222 * Accessor for user name.
223 * @return string Username preceding host.
224 * @access public
226 function getUsername() {
227 return $this->username;
231 * Accessor for password.
232 * @return string Password preceding host.
233 * @access public
235 function getPassword() {
236 return $this->password;
240 * Accessor for hostname and port.
241 * @param string $default Value to use if not present.
242 * @return string Hostname only.
243 * @access public
245 function getHost($default = false) {
246 return $this->host ? $this->host : $default;
250 * Accessor for top level domain.
251 * @return string Last part of host.
252 * @access public
254 function getTld() {
255 $path_parts = pathinfo($this->getHost());
256 return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
260 * Accessor for port number.
261 * @return integer TCP/IP port number.
262 * @access public
264 function getPort() {
265 return $this->port;
269 * Accessor for path.
270 * @return string Full path including leading slash if implied.
271 * @access public
273 function getPath() {
274 if (! $this->path && $this->host) {
275 return '/';
277 return $this->path;
281 * Accessor for page if any. This may be a
282 * directory name if ambiguious.
283 * @return Page name.
284 * @access public
286 function getPage() {
287 if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
288 return false;
290 return $matches[1];
294 * Gets the path to the page.
295 * @return string Path less the page.
296 * @access public
298 function getBasePath() {
299 if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
300 return false;
302 return $matches[1];
306 * Accessor for fragment at end of URL after the "#".
307 * @return string Part after "#".
308 * @access public
310 function getFragment() {
311 return $this->fragment;
315 * Sets image coordinates. Set to false to clear
316 * them.
317 * @param integer $x Horizontal position.
318 * @param integer $y Vertical position.
319 * @access public
321 function setCoordinates($x = false, $y = false) {
322 if (($x === false) || ($y === false)) {
323 $this->x = $this->y = false;
324 return;
326 $this->x = (integer)$x;
327 $this->y = (integer)$y;
331 * Accessor for horizontal image coordinate.
332 * @return integer X value.
333 * @access public
335 function getX() {
336 return $this->x;
340 * Accessor for vertical image coordinate.
341 * @return integer Y value.
342 * @access public
344 function getY() {
345 return $this->y;
349 * Accessor for current request parameters
350 * in URL string form. Will return teh original request
351 * if at all possible even if it doesn't make much
352 * sense.
353 * @return string Form is string "?a=1&b=2", etc.
354 * @access public
356 function getEncodedRequest() {
357 if ($this->raw) {
358 $encoded = $this->raw;
359 } else {
360 $encoded = $this->request->asUrlRequest();
362 if ($encoded) {
363 return '?' . preg_replace('/^\?/', '', $encoded);
365 return '';
369 * Adds an additional parameter to the request.
370 * @param string $key Name of parameter.
371 * @param string $value Value as string.
372 * @access public
374 function addRequestParameter($key, $value) {
375 $this->raw = false;
376 $this->request->add($key, $value);
380 * Adds additional parameters to the request.
381 * @param hash/SimpleFormEncoding $parameters Additional
382 * parameters.
383 * @access public
385 function addRequestParameters($parameters) {
386 $this->raw = false;
387 $this->request->merge($parameters);
391 * Clears down all parameters.
392 * @access public
394 function clearRequest() {
395 $this->raw = false;
396 $this->request = new SimpleGetEncoding();
400 * Gets the frame target if present. Although
401 * not strictly part of the URL specification it
402 * acts as similarily to the browser.
403 * @return boolean/string Frame name or false if none.
404 * @access public
406 function getTarget() {
407 return $this->target;
411 * Attaches a frame target.
412 * @param string $frame Name of frame.
413 * @access public
415 function setTarget($frame) {
416 $this->raw = false;
417 $this->target = $frame;
421 * Renders the URL back into a string.
422 * @return string URL in canonical form.
423 * @access public
425 function asString() {
426 $path = $this->path;
427 $scheme = $identity = $host = $port = $encoded = $fragment = '';
428 if ($this->username && $this->password) {
429 $identity = $this->username . ':' . $this->password . '@';
431 if ($this->getHost()) {
432 $scheme = $this->getScheme() ? $this->getScheme() : 'http';
433 $scheme .= '://';
434 $host = $this->getHost();
435 } elseif ($this->getScheme() === 'file') {
436 // Safest way; otherwise, file URLs on Windows have an extra
437 // leading slash. It might be possible to convert file://
438 // URIs to local file paths, but that requires more research.
439 $scheme = 'file://';
441 if ($this->getPort() && $this->getPort() != 80 ) {
442 $port = ':'.$this->getPort();
445 if (substr($this->path, 0, 1) == '/') {
446 $path = $this->normalisePath($this->path);
448 $encoded = $this->getEncodedRequest();
449 $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
450 $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
451 return "$scheme$identity$host$port$path$encoded$fragment$coords";
455 * Replaces unknown sections to turn a relative
456 * URL into an absolute one. The base URL can
457 * be either a string or a SimpleUrl object.
458 * @param string/SimpleUrl $base Base URL.
459 * @access public
461 function makeAbsolute($base) {
462 if (! is_object($base)) {
463 $base = new SimpleUrl($base);
465 if ($this->getHost()) {
466 $scheme = $this->getScheme();
467 $host = $this->getHost();
468 $port = $this->getPort() ? ':' . $this->getPort() : '';
469 $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
470 if (! $identity) {
471 $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
473 } else {
474 $scheme = $base->getScheme();
475 $host = $base->getHost();
476 $port = $base->getPort() ? ':' . $base->getPort() : '';
477 $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
479 $path = $this->normalisePath($this->extractAbsolutePath($base));
480 $encoded = $this->getEncodedRequest();
481 $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
482 $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
483 return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
487 * Replaces unknown sections of the path with base parts
488 * to return a complete absolute one.
489 * @param string/SimpleUrl $base Base URL.
490 * @param string Absolute path.
491 * @access private
493 protected function extractAbsolutePath($base) {
494 if ($this->getHost()) {
495 return $this->path;
497 if (! $this->isRelativePath($this->path)) {
498 return $this->path;
500 if ($this->path) {
501 return $base->getBasePath() . $this->path;
503 return $base->getPath();
507 * Simple test to see if a path part is relative.
508 * @param string $path Path to test.
509 * @return boolean True if starts with a "/".
510 * @access private
512 protected function isRelativePath($path) {
513 return (substr($path, 0, 1) != '/');
517 * Extracts the username and password for use in rendering
518 * a URL.
519 * @return string/boolean Form of username:password or false.
520 * @access public
522 function getIdentity() {
523 if ($this->username && $this->password) {
524 return $this->username . ':' . $this->password;
526 return false;
530 * Replaces . and .. sections of the path.
531 * @param string $path Unoptimised path.
532 * @return string Path with dots removed if possible.
533 * @access public
535 function normalisePath($path) {
536 $path = preg_replace('|/\./|', '/', $path);
537 return preg_replace('|/[^/]+/\.\./|', '/', $path);
541 * A pipe seperated list of all TLDs that result in two part
542 * domain names.
543 * @return string Pipe separated list.
544 * @access public
546 static function getAllTopLevelDomains() {
547 return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';