Merged revisions 96681,96683-96686,96689-96692,96698-96701,96705,96708,96710,96712...
[official-gcc.git] / libjava / java / net / URI.java
blobc466b7198169b8826bf6f6c56e796a39f8e3e4d7
1 /* URI.java -- An URI class
2 Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.net;
41 import java.io.IOException;
42 import java.io.ObjectInputStream;
43 import java.io.ObjectOutputStream;
44 import java.io.Serializable;
45 import java.util.regex.Matcher;
46 import java.util.regex.Pattern;
48 /**
49 * <p>
50 * A URI instance represents that defined by
51 * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC2396</a>,
52 * with some deviations.
53 * </p>
54 * <p>
55 * At its highest level, a URI consists of:
56 * </p>
57 * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
58 * [<strong>#</strong><em>fragment</em>]</code>
59 * </p>
60 * <p>
61 * where <strong>#</strong> and <strong>:</strong> are literal characters,
62 * and those parts enclosed in square brackets are optional.
63 * </p>
64 * <p>
65 * There are two main types of URI. An <em>opaque</em> URI is one
66 * which just consists of the above three parts, and is not further
67 * defined. An example of such a URI would be <em>mailto:</em> URI.
68 * In contrast, <em>hierarchical</em> URIs give further definition
69 * to the scheme-specific part, so as represent some part of a hierarchical
70 * structure.
71 * </p>
72 * <p>
73 * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
74 * [<strong>?</strong><em>query</em>]</code>
75 * </p>
76 * <p>
77 * with <strong>/</strong> and <strong>?</strong> being literal characters.
78 * When server-based, the authority section is further subdivided into:
79 * </p>
80 * <p>
81 * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
82 * [<strong>:</strong><em>port</em>]</code>
83 * </p>
84 * <p>
85 * with <strong>@</strong> and <strong>:</strong> as literal characters.
86 * Authority sections that are not server-based are said to be registry-based.
87 * </p>
88 * <p>
89 * Hierarchical URIs can be either relative or absolute. Absolute URIs
90 * always start with a `<strong>/</strong>', while relative URIs don't
91 * specify a scheme. Opaque URIs are always absolute.
92 * </p>
93 * <p>
94 * Each part of the URI may have one of three states: undefined, empty
95 * or containing some content. The former two of these are represented
96 * by <code>null</code> and the empty string in Java, respectively.
97 * The scheme-specific part may never be undefined. It also follows from
98 * this that the path sub-part may also not be undefined, so as to ensure
99 * the former.
100 * </p>
102 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
103 * @author Dalibor Topic (robilad@kaffe.org)
104 * @author Michael Koch (konqueror@gmx.de)
105 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
106 * @since 1.4
108 public final class URI
109 implements Comparable, Serializable
111 static final long serialVersionUID = -6052424284110960213L;
114 * Regular expression for parsing URIs.
116 * Taken from RFC 2396, Appendix B.
117 * This expression doesn't parse IPv6 addresses.
119 private static final String URI_REGEXP =
120 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
122 private static final String AUTHORITY_REGEXP =
123 "(([^?#]*)@)?([^?#:]*)(:([^?#]*))?";
126 * Valid characters (taken from rfc2396)
128 private static final String RFC2396_DIGIT = "0123456789";
129 private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
130 private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
131 private static final String RFC2396_ALPHA =
132 RFC2396_LOWALPHA + RFC2396_UPALPHA;
133 private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
134 private static final String RFC2396_MARK = "-_.!~*'()";
135 private static final String RFC2396_UNRESERVED =
136 RFC2396_ALPHANUM + RFC2396_MARK;
137 private static final String RFC2396_REG_NAME =
138 RFC2396_UNRESERVED + "$,;:@&=+";
139 private static final String RFC2396_PCHAR = RFC2396_UNRESERVED + ":@&=+$,";
140 private static final String RFC2396_SEGMENT = RFC2396_PCHAR + ";";
141 private static final String RFC2396_PATH_SEGMENTS = RFC2396_SEGMENT + "/";
144 * Index of scheme component in parsed URI.
146 private static final int SCHEME_GROUP = 2;
149 * Index of scheme-specific-part in parsed URI.
151 private static final int SCHEME_SPEC_PART_GROUP = 3;
154 * Index of authority component in parsed URI.
156 private static final int AUTHORITY_GROUP = 5;
159 * Index of path component in parsed URI.
161 private static final int PATH_GROUP = 6;
164 * Index of query component in parsed URI.
166 private static final int QUERY_GROUP = 8;
169 * Index of fragment component in parsed URI.
171 private static final int FRAGMENT_GROUP = 10;
173 private static final int AUTHORITY_USERINFO_GROUP = 2;
174 private static final int AUTHORITY_HOST_GROUP = 3;
175 private static final int AUTHORITY_PORT_GROUP = 5;
177 private transient String scheme;
178 private transient String rawSchemeSpecificPart;
179 private transient String schemeSpecificPart;
180 private transient String rawAuthority;
181 private transient String authority;
182 private transient String rawUserInfo;
183 private transient String userInfo;
184 private transient String rawHost;
185 private transient String host;
186 private transient int port = -1;
187 private transient String rawPath;
188 private transient String path;
189 private transient String rawQuery;
190 private transient String query;
191 private transient String rawFragment;
192 private transient String fragment;
193 private String string;
195 private void readObject(ObjectInputStream is)
196 throws ClassNotFoundException, IOException
198 this.string = (String) is.readObject();
201 parseURI(this.string);
203 catch (URISyntaxException x)
205 // Should not happen.
206 throw new RuntimeException(x);
210 private void writeObject(ObjectOutputStream os) throws IOException
212 if (string == null)
213 string = toString();
214 os.writeObject(string);
217 private static String getURIGroup(Matcher match, int group)
219 String matched = match.group(group);
220 return matched.length() == 0 ? null : matched;
224 * Sets fields of this URI by parsing the given string.
226 * @param str The string to parse
228 * @exception URISyntaxException If the given string violates RFC 2396
230 private void parseURI(String str) throws URISyntaxException
232 Pattern pattern = Pattern.compile(URI_REGEXP);
233 Matcher matcher = pattern.matcher(str);
235 if (matcher.matches())
237 scheme = getURIGroup(matcher, SCHEME_GROUP);
238 rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
239 schemeSpecificPart = unquote(rawSchemeSpecificPart);
240 if (!isOpaque())
242 rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
243 rawPath = matcher.group(PATH_GROUP);
244 rawQuery = getURIGroup(matcher, QUERY_GROUP);
246 rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
248 else
249 throw new URISyntaxException(str, "doesn't match URI regular expression");
251 if (rawAuthority != null)
253 pattern = Pattern.compile(AUTHORITY_REGEXP);
254 matcher = pattern.matcher(rawAuthority);
256 if (matcher.matches())
258 rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
259 rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
261 String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
263 if (portStr != null)
266 port = Integer.parseInt(portStr);
268 catch (NumberFormatException e)
270 URISyntaxException use =
271 new URISyntaxException
272 (str, "doesn't match URI regular expression");
273 use.initCause(e);
274 throw use;
277 else
278 throw new URISyntaxException(str, "doesn't match URI regular expression");
281 // We must eagerly unquote the parts, because this is the only time
282 // we may throw an exception.
283 authority = unquote(rawAuthority);
284 userInfo = unquote(rawUserInfo);
285 host = unquote(rawHost);
286 path = unquote(rawPath);
287 query = unquote(rawQuery);
288 fragment = unquote(rawFragment);
292 * Unquote "%" + hex quotes characters
294 * @param str The string to unquote or null.
296 * @return The unquoted string or null if str was null.
298 * @exception URISyntaxException If the given string contains invalid
299 * escape sequences.
301 private static String unquote(String str) throws URISyntaxException
303 if (str == null)
304 return null;
305 byte[] buf = new byte[str.length()];
306 int pos = 0;
307 for (int i = 0; i < str.length(); i++)
309 char c = str.charAt(i);
310 if (c > 127)
311 throw new URISyntaxException(str, "Invalid character");
312 if (c == '%')
314 if (i + 2 >= str.length())
315 throw new URISyntaxException(str, "Invalid quoted character");
316 int hi = Character.digit(str.charAt(++i), 16);
317 int lo = Character.digit(str.charAt(++i), 16);
318 if (lo < 0 || hi < 0)
319 throw new URISyntaxException(str, "Invalid quoted character");
320 buf[pos++] = (byte) (hi * 16 + lo);
322 else
323 buf[pos++] = (byte) c;
327 return new String(buf, 0, pos, "utf-8");
329 catch (java.io.UnsupportedEncodingException x2)
331 throw (Error) new InternalError().initCause(x2);
336 * Quote characters illegal in URIs in given string.
338 * Replace illegal characters by encoding their UTF-8
339 * representation as "%" + hex code for each resulting
340 * UTF-8 character.
342 * @param str The string to quote
344 * @return The quoted string.
346 private static String quote(String str)
348 // FIXME: unimplemented.
349 return str;
353 * Quote characters illegal in URI authorities in given string.
355 * Replace illegal characters by encoding their UTF-8
356 * representation as "%" + hex code for each resulting
357 * UTF-8 character.
359 * @param str The string to quote
361 * @return The quoted string.
363 private static String quoteAuthority(String str)
365 // Technically, we should be using RFC2396_AUTHORITY, but
366 // it contains no additional characters.
367 return quote(str, RFC2396_REG_NAME);
371 * Quote characters in str that are not part of legalCharacters.
373 * Replace illegal characters by encoding their UTF-8
374 * representation as "%" + hex code for each resulting
375 * UTF-8 character.
377 * @param str The string to quote
378 * @param legalCharacters The set of legal characters
380 * @return The quoted string.
382 private static String quote(String str, String legalCharacters)
384 StringBuffer sb = new StringBuffer(str.length());
385 for (int i = 0; i < str.length(); i++)
387 char c = str.charAt(i);
388 if (legalCharacters.indexOf(c) == -1)
390 String hex = "0123456789ABCDEF";
391 if (c <= 127)
392 sb.append('%').append(hex.charAt(c / 16)).append(hex.charAt(c % 16));
393 else
397 // this is far from optimal, but it works
398 byte[] utf8 = str.substring(i, i + 1).getBytes("utf-8");
399 for (int j = 0; j < utf8.length; j++)
400 sb.append('%').append(hex.charAt((utf8[j] & 0xff) / 16))
401 .append(hex.charAt((utf8[j] & 0xff) % 16));
403 catch (java.io.UnsupportedEncodingException x)
405 throw (Error) new InternalError().initCause(x);
409 else
410 sb.append(c);
412 return sb.toString();
416 * Quote characters illegal in URI hosts in given string.
418 * Replace illegal characters by encoding their UTF-8
419 * representation as "%" + hex code for each resulting
420 * UTF-8 character.
422 * @param str The string to quote
424 * @return The quoted string.
426 private static String quoteHost(String str)
428 // FIXME: unimplemented.
429 return str;
433 * Quote characters illegal in URI paths in given string.
435 * Replace illegal characters by encoding their UTF-8
436 * representation as "%" + hex code for each resulting
437 * UTF-8 character.
439 * @param str The string to quote
441 * @return The quoted string.
443 private static String quotePath(String str)
445 // Technically, we should be using RFC2396_PATH, but
446 // it contains no additional characters.
447 return quote(str, RFC2396_PATH_SEGMENTS);
451 * Quote characters illegal in URI user infos in given string.
453 * Replace illegal characters by encoding their UTF-8
454 * representation as "%" + hex code for each resulting
455 * UTF-8 character.
457 * @param str The string to quote
459 * @return The quoted string.
461 private static String quoteUserInfo(String str)
463 // FIXME: unimplemented.
464 return str;
468 * Creates an URI from the given string
470 * @param str The string to create the URI from
472 * @exception URISyntaxException If the given string violates RFC 2396
473 * @exception NullPointerException If str is null
475 public URI(String str) throws URISyntaxException
477 this.string = str;
478 parseURI(str);
482 * Create an URI from the given components
484 * @param scheme The scheme name
485 * @param userInfo The username and authorization info
486 * @param host The hostname
487 * @param port The port number
488 * @param path The path
489 * @param query The query
490 * @param fragment The fragment
492 * @exception URISyntaxException If the given string violates RFC 2396
494 public URI(String scheme, String userInfo, String host, int port,
495 String path, String query, String fragment)
496 throws URISyntaxException
498 this((scheme == null ? "" : scheme + ":")
499 + (userInfo == null && host == null && port == -1 ? "" : "//")
500 + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
501 + (host == null ? "" : quoteHost(host))
502 + (port == -1 ? "" : ":" + String.valueOf(port))
503 + (path == null ? "" : quotePath(path))
504 + (query == null ? "" : "?" + quote(query))
505 + (fragment == null ? "" : "#" + quote(fragment)));
507 parseServerAuthority();
511 * Create an URI from the given components
513 * @param scheme The scheme name
514 * @param authority The authority
515 * @param path The apth
516 * @param query The query
517 * @param fragment The fragment
519 * @exception URISyntaxException If the given string violates RFC 2396
521 public URI(String scheme, String authority, String path, String query,
522 String fragment) throws URISyntaxException
524 this((scheme == null ? "" : scheme + ":")
525 + (authority == null ? "" : "//" + quoteAuthority(authority))
526 + (path == null ? "" : quotePath(path))
527 + (query == null ? "" : "?" + quote(query))
528 + (fragment == null ? "" : "#" + quote(fragment)));
532 * Create an URI from the given components
534 * @param scheme The scheme name
535 * @param host The hostname
536 * @param path The path
537 * @param fragment The fragment
539 * @exception URISyntaxException If the given string violates RFC 2396
541 public URI(String scheme, String host, String path, String fragment)
542 throws URISyntaxException
544 this(scheme, null, host, -1, path, null, fragment);
548 * Create an URI from the given components
550 * @param scheme The scheme name
551 * @param ssp The scheme specific part
552 * @param fragment The fragment
554 * @exception URISyntaxException If the given string violates RFC 2396
556 public URI(String scheme, String ssp, String fragment)
557 throws URISyntaxException
559 this((scheme == null ? "" : scheme + ":")
560 + (ssp == null ? "" : quote(ssp))
561 + (fragment == null ? "" : "#" + quote(fragment)));
565 * Create an URI from the given string
567 * @param str The string to create the URI from
569 * @exception IllegalArgumentException If the given string violates RFC 2396
570 * @exception NullPointerException If str is null
572 public static URI create(String str)
576 return new URI(str);
578 catch (URISyntaxException e)
580 throw (IllegalArgumentException) new IllegalArgumentException()
581 .initCause(e);
586 * Attempts to parse this URI's authority component, if defined,
587 * into user-information, host, and port components
589 * @exception URISyntaxException If the given string violates RFC 2396
591 public URI parseServerAuthority() throws URISyntaxException
593 return null;
597 * Returns a normalizes versions of the URI
599 public URI normalize()
601 return null;
605 * Resolves the given URI against this URI
607 * @param uri The URI to resolve against this URI
609 * @return The resulting URI, or null when it couldn't be resolved
610 * for some reason.
612 * @exception NullPointerException If uri is null
614 public URI resolve(URI uri)
616 if (uri.isAbsolute())
617 return uri;
618 if (uri.isOpaque())
619 return uri;
621 String scheme = uri.getScheme();
622 String schemeSpecificPart = uri.getSchemeSpecificPart();
623 String authority = uri.getAuthority();
624 String path = uri.getPath();
625 String query = uri.getQuery();
626 String fragment = uri.getFragment();
630 if (fragment != null && path != null && path.equals("")
631 && scheme == null && authority == null && query == null)
632 return new URI(this.scheme, this.schemeSpecificPart, fragment);
634 if (authority == null)
636 authority = this.authority;
637 if (path == null)
638 path = "";
639 if (! (path.startsWith("/")))
641 StringBuffer basepath = new StringBuffer(this.path);
642 int i = this.path.lastIndexOf('/');
644 if (i >= 0)
645 basepath.delete(i + 1, basepath.length());
647 basepath.append(path);
648 path = basepath.toString();
649 // FIXME We must normalize the path here.
650 // Normalization process omitted.
653 return new URI(this.scheme, authority, path, query, fragment);
655 catch (URISyntaxException e)
657 return null;
662 * Resolves the given URI string against this URI
664 * @param str The URI as string to resolve against this URI
666 * @return The resulting URI
668 * @exception IllegalArgumentException If the given URI string
669 * violates RFC 2396
670 * @exception NullPointerException If uri is null
672 public URI resolve(String str) throws IllegalArgumentException
674 return resolve(create(str));
678 * Relativizes the given URI against this URI
680 * @param uri The URI to relativize this URI
682 * @return The resulting URI
684 * @exception NullPointerException If uri is null
686 public URI relativize(URI uri)
688 return null;
692 * Creates an URL from an URI
694 * @exception MalformedURLException If a protocol handler for the URL could
695 * not be found, or if some other error occurred while constructing the URL
696 * @exception IllegalArgumentException If the URI is not absolute
698 public URL toURL() throws IllegalArgumentException, MalformedURLException
700 if (isAbsolute())
701 return new URL(this.toString());
703 throw new IllegalArgumentException("not absolute");
707 * Returns the scheme of the URI
709 public String getScheme()
711 return scheme;
715 * Tells whether this URI is absolute or not
717 public boolean isAbsolute()
719 return scheme != null;
723 * Tell whether this URI is opaque or not
725 public boolean isOpaque()
727 return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
731 * Returns the raw scheme specific part of this URI.
732 * The scheme-specific part is never undefined, though it may be empty
734 public String getRawSchemeSpecificPart()
736 return rawSchemeSpecificPart;
740 * Returns the decoded scheme specific part of this URI.
742 public String getSchemeSpecificPart()
744 return schemeSpecificPart;
748 * Returns the rae authority part of this URI
750 public String getRawAuthority()
752 return rawAuthority;
756 * Returns the decoded authority part of this URI
758 public String getAuthority()
760 return authority;
764 * Returns the raw user info part of this URI
766 public String getRawUserInfo()
768 return rawUserInfo;
772 * Returns the decoded user info part of this URI
774 public String getUserInfo()
776 return userInfo;
780 * Returns the hostname of the URI
782 public String getHost()
784 return host;
788 * Returns the port number of the URI
790 public int getPort()
792 return port;
796 * Returns the raw path part of this URI
798 public String getRawPath()
800 return rawPath;
804 * Returns the path of the URI
806 public String getPath()
808 return path;
812 * Returns the raw query part of this URI
814 public String getRawQuery()
816 return rawQuery;
820 * Returns the query of the URI
822 public String getQuery()
824 return query;
828 * Return the raw fragment part of this URI
830 public String getRawFragment()
832 return rawFragment;
836 * Returns the fragment of the URI
838 public String getFragment()
840 return fragment;
844 * Compares the URI with a given object
846 * @param obj The obj to compare the URI with
848 public boolean equals(Object obj)
850 return false;
854 * Computes the hascode of the URI
856 public int hashCode()
858 return 0;
862 * Compare the URI with another object that must be an URI too
864 * @param obj This object to compare this URI with
866 * @exception ClassCastException If given object ist not an URI
868 public int compareTo(Object obj) throws ClassCastException
870 return 0;
874 * Returns the URI as a String. If the URI was created using a constructor,
875 * then this will be the same as the original input string.
877 * @return a string representation of the URI.
879 public String toString()
881 return (getScheme() == null ? "" : getScheme() + ":")
882 + getRawSchemeSpecificPart()
883 + (getRawFragment() == null ? "" : "#" + getRawFragment());
887 * Returns the URI as US-ASCII string
889 public String toASCIIString()
891 return "";