premier commit
[bazdig.git] / test / simpletest / url.php
blob532e8afb07a513cf388b2798e819242883457670
1 <?php
2 /**
3 * base include file for SimpleTest
4 * @package SimpleTest
5 * @subpackage WebTester
6 * @version $Id: url.php,v 1.30 2006/09/26 20:07:28 maugrim_t_r Exp $
7 */
9 /**#@+
10 * include other SimpleTest class files
12 require_once(dirname(__FILE__) . '/encoding.php');
13 /**#@-*/
15 /**
16 * URL parser to replace parse_url() PHP function which
17 * got broken in PHP 4.3.0. Adds some browser specific
18 * functionality such as expandomatics.
19 * Guesses a bit trying to separate the host from
20 * the path and tries to keep a raw, possibly unparsable,
21 * request string as long as possible.
22 * @package SimpleTest
23 * @subpackage WebTester
25 class SimpleUrl {
26 var $_scheme;
27 var $_username;
28 var $_password;
29 var $_host;
30 var $_port;
31 var $_path;
32 var $_request;
33 var $_fragment;
34 var $_x;
35 var $_y;
36 var $_target;
37 var $_raw = false;
39 /**
40 * Constructor. Parses URL into sections.
41 * @param string $url Incoming URL.
42 * @access public
44 function SimpleUrl($url) {
45 list($x, $y) = $this->_chompCoordinates($url);
46 $this->setCoordinates($x, $y);
47 $this->_scheme = $this->_chompScheme($url);
48 list($this->_username, $this->_password) = $this->_chompLogin($url);
49 $this->_host = $this->_chompHost($url);
50 $this->_port = false;
51 if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
52 $this->_host = $host_parts[1];
53 $this->_port = (integer)$host_parts[2];
55 $this->_path = $this->_chompPath($url);
56 $this->_request = $this->_parseRequest($this->_chompRequest($url));
57 $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
58 $this->_target = false;
61 /**
62 * Extracts the X, Y coordinate pair from an image map.
63 * @param string $url URL so far. The coordinates will be
64 * removed.
65 * @return array X, Y as a pair of integers.
66 * @access private
68 function _chompCoordinates(&$url) {
69 if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
70 $url = $matches[1];
71 return array((integer)$matches[2], (integer)$matches[3]);
73 return array(false, false);
76 /**
77 * Extracts the scheme part of an incoming URL.
78 * @param string $url URL so far. The scheme will be
79 * removed.
80 * @return string Scheme part or false.
81 * @access private
83 function _chompScheme(&$url) {
84 if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) {
85 $url = $matches[2] . $matches[3];
86 return $matches[1];
88 return false;
91 /**
92 * Extracts the username and password from the
93 * incoming URL. The // prefix will be reattached
94 * to the URL after the doublet is extracted.
95 * @param string $url URL so far. The username and
96 * password are removed.
97 * @return array Two item list of username and
98 * password. Will urldecode() them.
99 * @access private
101 function _chompLogin(&$url) {
102 $prefix = '';
103 if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
104 $prefix = $matches[1];
105 $url = $matches[2];
107 if (preg_match('/(.*?)@(.*)/', $url, $matches)) {
108 $url = $prefix . $matches[2];
109 $parts = split(":", $matches[1]);
110 return array(
111 urldecode($parts[0]),
112 isset($parts[1]) ? urldecode($parts[1]) : false);
114 $url = $prefix . $url;
115 return array(false, false);
119 * Extracts the host part of an incoming URL.
120 * Includes the port number part. Will extract
121 * the host if it starts with // or it has
122 * a top level domain or it has at least two
123 * dots.
124 * @param string $url URL so far. The host will be
125 * removed.
126 * @return string Host part guess or false.
127 * @access private
129 function _chompHost(&$url) {
130 if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
131 $url = $matches[3];
132 return $matches[2];
134 if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
135 $tlds = SimpleUrl::getAllTopLevelDomains();
136 if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
137 $url = $matches[2] . $matches[3];
138 return $matches[1];
139 } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
140 $url = $matches[2] . $matches[3];
141 return $matches[1];
144 return false;
148 * Extracts the path information from the incoming
149 * URL. Strips this path from the URL.
150 * @param string $url URL so far. The host will be
151 * removed.
152 * @return string Path part or '/'.
153 * @access private
155 function _chompPath(&$url) {
156 if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
157 $url = $matches[2] . $matches[3];
158 return ($matches[1] ? $matches[1] : '');
160 return '';
164 * Strips off the request data.
165 * @param string $url URL so far. The request will be
166 * removed.
167 * @return string Raw request part.
168 * @access private
170 function _chompRequest(&$url) {
171 if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
172 $url = $matches[2] . $matches[3];
173 return $matches[1];
175 return '';
179 * Breaks the request down into an object.
180 * @param string $raw Raw request.
181 * @return SimpleFormEncoding Parsed data.
182 * @access private
184 function _parseRequest($raw) {
185 $this->_raw = $raw;
186 $request = new SimpleGetEncoding();
187 foreach (split("&", $raw) as $pair) {
188 if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
189 $request->add($matches[1], urldecode($matches[2]));
190 } elseif ($pair) {
191 $request->add($pair, '');
194 return $request;
198 * Accessor for protocol part.
199 * @param string $default Value to use if not present.
200 * @return string Scheme name, e.g "http".
201 * @access public
203 function getScheme($default = false) {
204 return $this->_scheme ? $this->_scheme : $default;
208 * Accessor for user name.
209 * @return string Username preceding host.
210 * @access public
212 function getUsername() {
213 return $this->_username;
217 * Accessor for password.
218 * @return string Password preceding host.
219 * @access public
221 function getPassword() {
222 return $this->_password;
226 * Accessor for hostname and port.
227 * @param string $default Value to use if not present.
228 * @return string Hostname only.
229 * @access public
231 function getHost($default = false) {
232 return $this->_host ? $this->_host : $default;
236 * Accessor for top level domain.
237 * @return string Last part of host.
238 * @access public
240 function getTld() {
241 $path_parts = pathinfo($this->getHost());
242 return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
246 * Accessor for port number.
247 * @return integer TCP/IP port number.
248 * @access public
250 function getPort() {
251 return $this->_port;
255 * Accessor for path.
256 * @return string Full path including leading slash if implied.
257 * @access public
259 function getPath() {
260 if (! $this->_path && $this->_host) {
261 return '/';
263 return $this->_path;
267 * Accessor for page if any. This may be a
268 * directory name if ambiguious.
269 * @return Page name.
270 * @access public
272 function getPage() {
273 if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
274 return false;
276 return $matches[1];
280 * Gets the path to the page.
281 * @return string Path less the page.
282 * @access public
284 function getBasePath() {
285 if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
286 return false;
288 return $matches[1];
292 * Accessor for fragment at end of URL after the "#".
293 * @return string Part after "#".
294 * @access public
296 function getFragment() {
297 return $this->_fragment;
301 * Sets image coordinates. Set to false to clear
302 * them.
303 * @param integer $x Horizontal position.
304 * @param integer $y Vertical position.
305 * @access public
307 function setCoordinates($x = false, $y = false) {
308 if (($x === false) || ($y === false)) {
309 $this->_x = $this->_y = false;
310 return;
312 $this->_x = (integer)$x;
313 $this->_y = (integer)$y;
317 * Accessor for horizontal image coordinate.
318 * @return integer X value.
319 * @access public
321 function getX() {
322 return $this->_x;
326 * Accessor for vertical image coordinate.
327 * @return integer Y value.
328 * @access public
330 function getY() {
331 return $this->_y;
335 * Accessor for current request parameters
336 * in URL string form. Will return teh original request
337 * if at all possible even if it doesn't make much
338 * sense.
339 * @return string Form is string "?a=1&b=2", etc.
340 * @access public
342 function getEncodedRequest() {
343 if ($this->_raw) {
344 $encoded = $this->_raw;
345 } else {
346 $encoded = $this->_request->asUrlRequest();
348 if ($encoded) {
349 return '?' . preg_replace('/^\?/', '', $encoded);
351 return '';
355 * Adds an additional parameter to the request.
356 * @param string $key Name of parameter.
357 * @param string $value Value as string.
358 * @access public
360 function addRequestParameter($key, $value) {
361 $this->_raw = false;
362 $this->_request->add($key, $value);
366 * Adds additional parameters to the request.
367 * @param hash/SimpleFormEncoding $parameters Additional
368 * parameters.
369 * @access public
371 function addRequestParameters($parameters) {
372 $this->_raw = false;
373 $this->_request->merge($parameters);
377 * Clears down all parameters.
378 * @access public
380 function clearRequest() {
381 $this->_raw = false;
382 $this->_request = &new SimpleGetEncoding();
386 * Gets the frame target if present. Although
387 * not strictly part of the URL specification it
388 * acts as similarily to the browser.
389 * @return boolean/string Frame name or false if none.
390 * @access public
392 function getTarget() {
393 return $this->_target;
397 * Attaches a frame target.
398 * @param string $frame Name of frame.
399 * @access public
401 function setTarget($frame) {
402 $this->_raw = false;
403 $this->_target = $frame;
407 * Renders the URL back into a string.
408 * @return string URL in canonical form.
409 * @access public
411 function asString() {
412 $scheme = $identity = $host = $path = $encoded = $fragment = '';
413 if ($this->_username && $this->_password) {
414 $identity = $this->_username . ':' . $this->_password . '@';
416 if ($this->getHost()) {
417 $scheme = $this->getScheme() ? $this->getScheme() : 'http';
418 $host = $this->getHost();
420 if (substr($this->_path, 0, 1) == '/') {
421 $path = $this->normalisePath($this->_path);
423 $encoded = $this->getEncodedRequest();
424 $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
425 $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
426 return "$scheme://$identity$host$path$encoded$fragment$coords";
430 * Replaces unknown sections to turn a relative
431 * URL into an absolute one. The base URL can
432 * be either a string or a SimpleUrl object.
433 * @param string/SimpleUrl $base Base URL.
434 * @access public
436 function makeAbsolute($base) {
437 if (! is_object($base)) {
438 $base = new SimpleUrl($base);
440 $scheme = $this->getScheme() ? $this->getScheme() : $base->getScheme();
441 if ($this->getHost()) {
442 $host = $this->getHost();
443 $port = $this->getPort() ? ':' . $this->getPort() : '';
444 $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
445 if (! $identity) {
446 $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
448 } else {
449 $host = $base->getHost();
450 $port = $base->getPort() ? ':' . $base->getPort() : '';
451 $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
453 $path = $this->normalisePath($this->_extractAbsolutePath($base));
454 $encoded = $this->getEncodedRequest();
455 $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
456 $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
457 return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
461 * Replaces unknown sections of the path with base parts
462 * to return a complete absolute one.
463 * @param string/SimpleUrl $base Base URL.
464 * @param string Absolute path.
465 * @access private
467 function _extractAbsolutePath($base) {
468 if ($this->getHost()) {
469 return $this->_path;
471 if (! $this->_isRelativePath($this->_path)) {
472 return $this->_path;
474 if ($this->_path) {
475 return $base->getBasePath() . $this->_path;
477 return $base->getPath();
481 * Simple test to see if a path part is relative.
482 * @param string $path Path to test.
483 * @return boolean True if starts with a "/".
484 * @access private
486 function _isRelativePath($path) {
487 return (substr($path, 0, 1) != '/');
491 * Extracts the username and password for use in rendering
492 * a URL.
493 * @return string/boolean Form of username:password or false.
494 * @access public
496 function getIdentity() {
497 if ($this->_username && $this->_password) {
498 return $this->_username . ':' . $this->_password;
500 return false;
504 * Replaces . and .. sections of the path.
505 * @param string $path Unoptimised path.
506 * @return string Path with dots removed if possible.
507 * @access public
509 function normalisePath($path) {
510 $path = preg_replace('|/\./|', '/', $path);
511 return preg_replace('|/[^/]+/\.\./|', '/', $path);
515 * A pipe seperated list of all TLDs that result in two part
516 * domain names.
517 * @return string Pipe separated list.
518 * @access public
519 * @static
521 function getAllTopLevelDomains() {
522 return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';