1 /* URI.java -- An URI class
2 Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
41 import java
.io
.IOException
;
42 import java
.io
.ObjectInputStream
;
43 import java
.io
.ObjectOutputStream
;
44 import java
.io
.Serializable
;
45 import java
.util
.regex
.Matcher
;
46 import java
.util
.regex
.Pattern
;
50 * A URI instance represents that defined by
51 * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC2396</a>,
52 * with some deviations.
55 * At its highest level, a URI consists of:
57 * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
58 * [<strong>#</strong><em>fragment</em>]</code>
61 * where <strong>#</strong> and <strong>:</strong> are literal characters,
62 * and those parts enclosed in square brackets are optional.
65 * There are two main types of URI. An <em>opaque</em> URI is one
66 * which just consists of the above three parts, and is not further
67 * defined. An example of such a URI would be <em>mailto:</em> URI.
68 * In contrast, <em>hierarchical</em> URIs give further definition
69 * to the scheme-specific part, so as represent some part of a hierarchical
73 * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
74 * [<strong>?</strong><em>query</em>]</code>
77 * with <strong>/</strong> and <strong>?</strong> being literal characters.
78 * When server-based, the authority section is further subdivided into:
81 * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
82 * [<strong>:</strong><em>port</em>]</code>
85 * with <strong>@</strong> and <strong>:</strong> as literal characters.
86 * Authority sections that are not server-based are said to be registry-based.
89 * Hierarchical URIs can be either relative or absolute. Absolute URIs
90 * always start with a `<strong>/</strong>', while relative URIs don't
91 * specify a scheme. Opaque URIs are always absolute.
94 * Each part of the URI may have one of three states: undefined, empty
95 * or containing some content. The former two of these are represented
96 * by <code>null</code> and the empty string in Java, respectively.
97 * The scheme-specific part may never be undefined. It also follows from
98 * this that the path sub-part may also not be undefined, so as to ensure
102 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
103 * @author Dalibor Topic (robilad@kaffe.org)
104 * @author Michael Koch (konqueror@gmx.de)
105 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
108 public final class URI
109 implements Comparable
, Serializable
111 static final long serialVersionUID
= -6052424284110960213L;
114 * Regular expression for parsing URIs.
116 * Taken from RFC 2396, Appendix B.
117 * This expression doesn't parse IPv6 addresses.
119 private static final String URI_REGEXP
=
120 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
122 private static final String AUTHORITY_REGEXP
=
123 "(([^?#]*)@)?([^?#:]*)(:([^?#]*))?";
126 * Valid characters (taken from rfc2396)
128 private static final String RFC2396_DIGIT
= "0123456789";
129 private static final String RFC2396_LOWALPHA
= "abcdefghijklmnopqrstuvwxyz";
130 private static final String RFC2396_UPALPHA
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
131 private static final String RFC2396_ALPHA
=
132 RFC2396_LOWALPHA
+ RFC2396_UPALPHA
;
133 private static final String RFC2396_ALPHANUM
= RFC2396_DIGIT
+ RFC2396_ALPHA
;
134 private static final String RFC2396_MARK
= "-_.!~*'()";
135 private static final String RFC2396_UNRESERVED
=
136 RFC2396_ALPHANUM
+ RFC2396_MARK
;
137 private static final String RFC2396_REG_NAME
=
138 RFC2396_UNRESERVED
+ "$,;:@&=+";
139 private static final String RFC2396_PCHAR
= RFC2396_UNRESERVED
+ ":@&=+$,";
140 private static final String RFC2396_SEGMENT
= RFC2396_PCHAR
+ ";";
141 private static final String RFC2396_PATH_SEGMENTS
= RFC2396_SEGMENT
+ "/";
144 * Index of scheme component in parsed URI.
146 private static final int SCHEME_GROUP
= 2;
149 * Index of scheme-specific-part in parsed URI.
151 private static final int SCHEME_SPEC_PART_GROUP
= 3;
154 * Index of authority component in parsed URI.
156 private static final int AUTHORITY_GROUP
= 5;
159 * Index of path component in parsed URI.
161 private static final int PATH_GROUP
= 6;
164 * Index of query component in parsed URI.
166 private static final int QUERY_GROUP
= 8;
169 * Index of fragment component in parsed URI.
171 private static final int FRAGMENT_GROUP
= 10;
173 private static final int AUTHORITY_USERINFO_GROUP
= 2;
174 private static final int AUTHORITY_HOST_GROUP
= 3;
175 private static final int AUTHORITY_PORT_GROUP
= 5;
177 private transient String scheme
;
178 private transient String rawSchemeSpecificPart
;
179 private transient String schemeSpecificPart
;
180 private transient String rawAuthority
;
181 private transient String authority
;
182 private transient String rawUserInfo
;
183 private transient String userInfo
;
184 private transient String rawHost
;
185 private transient String host
;
186 private transient int port
= -1;
187 private transient String rawPath
;
188 private transient String path
;
189 private transient String rawQuery
;
190 private transient String query
;
191 private transient String rawFragment
;
192 private transient String fragment
;
193 private String string
;
195 private void readObject(ObjectInputStream is
)
196 throws ClassNotFoundException
, IOException
198 this.string
= (String
) is
.readObject();
201 parseURI(this.string
);
203 catch (URISyntaxException x
)
205 // Should not happen.
206 throw new RuntimeException(x
);
210 private void writeObject(ObjectOutputStream os
) throws IOException
214 os
.writeObject(string
);
217 private static String
getURIGroup(Matcher match
, int group
)
219 String matched
= match
.group(group
);
220 return matched
.length() == 0 ?
null : matched
;
224 * Sets fields of this URI by parsing the given string.
226 * @param str The string to parse
228 * @exception URISyntaxException If the given string violates RFC 2396
230 private void parseURI(String str
) throws URISyntaxException
232 Pattern pattern
= Pattern
.compile(URI_REGEXP
);
233 Matcher matcher
= pattern
.matcher(str
);
235 if (matcher
.matches())
237 scheme
= getURIGroup(matcher
, SCHEME_GROUP
);
238 rawSchemeSpecificPart
= matcher
.group(SCHEME_SPEC_PART_GROUP
);
239 schemeSpecificPart
= unquote(rawSchemeSpecificPart
);
242 rawAuthority
= getURIGroup(matcher
, AUTHORITY_GROUP
);
243 rawPath
= matcher
.group(PATH_GROUP
);
244 rawQuery
= getURIGroup(matcher
, QUERY_GROUP
);
246 rawFragment
= getURIGroup(matcher
, FRAGMENT_GROUP
);
249 throw new URISyntaxException(str
, "doesn't match URI regular expression");
251 if (rawAuthority
!= null)
253 pattern
= Pattern
.compile(AUTHORITY_REGEXP
);
254 matcher
= pattern
.matcher(rawAuthority
);
256 if (matcher
.matches())
258 rawUserInfo
= getURIGroup(matcher
, AUTHORITY_USERINFO_GROUP
);
259 rawHost
= getURIGroup(matcher
, AUTHORITY_HOST_GROUP
);
261 String portStr
= getURIGroup(matcher
, AUTHORITY_PORT_GROUP
);
266 port
= Integer
.parseInt(portStr
);
268 catch (NumberFormatException e
)
270 URISyntaxException use
=
271 new URISyntaxException
272 (str
, "doesn't match URI regular expression");
278 throw new URISyntaxException(str
, "doesn't match URI regular expression");
281 // We must eagerly unquote the parts, because this is the only time
282 // we may throw an exception.
283 authority
= unquote(rawAuthority
);
284 userInfo
= unquote(rawUserInfo
);
285 host
= unquote(rawHost
);
286 path
= unquote(rawPath
);
287 query
= unquote(rawQuery
);
288 fragment
= unquote(rawFragment
);
292 * Unquote "%" + hex quotes characters
294 * @param str The string to unquote or null.
296 * @return The unquoted string or null if str was null.
298 * @exception URISyntaxException If the given string contains invalid
301 private static String
unquote(String str
) throws URISyntaxException
305 byte[] buf
= new byte[str
.length()];
307 for (int i
= 0; i
< str
.length(); i
++)
309 char c
= str
.charAt(i
);
311 throw new URISyntaxException(str
, "Invalid character");
314 if (i
+ 2 >= str
.length())
315 throw new URISyntaxException(str
, "Invalid quoted character");
316 int hi
= Character
.digit(str
.charAt(++i
), 16);
317 int lo
= Character
.digit(str
.charAt(++i
), 16);
318 if (lo
< 0 || hi
< 0)
319 throw new URISyntaxException(str
, "Invalid quoted character");
320 buf
[pos
++] = (byte) (hi
* 16 + lo
);
323 buf
[pos
++] = (byte) c
;
327 return new String(buf
, 0, pos
, "utf-8");
329 catch (java
.io
.UnsupportedEncodingException x2
)
331 throw (Error
) new InternalError().initCause(x2
);
336 * Quote characters illegal in URIs in given string.
338 * Replace illegal characters by encoding their UTF-8
339 * representation as "%" + hex code for each resulting
342 * @param str The string to quote
344 * @return The quoted string.
346 private static String
quote(String str
)
348 // FIXME: unimplemented.
353 * Quote characters illegal in URI authorities in given string.
355 * Replace illegal characters by encoding their UTF-8
356 * representation as "%" + hex code for each resulting
359 * @param str The string to quote
361 * @return The quoted string.
363 private static String
quoteAuthority(String str
)
365 // Technically, we should be using RFC2396_AUTHORITY, but
366 // it contains no additional characters.
367 return quote(str
, RFC2396_REG_NAME
);
371 * Quote characters in str that are not part of legalCharacters.
373 * Replace illegal characters by encoding their UTF-8
374 * representation as "%" + hex code for each resulting
377 * @param str The string to quote
378 * @param legalCharacters The set of legal characters
380 * @return The quoted string.
382 private static String
quote(String str
, String legalCharacters
)
384 StringBuffer sb
= new StringBuffer(str
.length());
385 for (int i
= 0; i
< str
.length(); i
++)
387 char c
= str
.charAt(i
);
388 if (legalCharacters
.indexOf(c
) == -1)
390 String hex
= "0123456789ABCDEF";
392 sb
.append('%').append(hex
.charAt(c
/ 16)).append(hex
.charAt(c
% 16));
397 // this is far from optimal, but it works
398 byte[] utf8
= str
.substring(i
, i
+ 1).getBytes("utf-8");
399 for (int j
= 0; j
< utf8
.length
; j
++)
400 sb
.append('%').append(hex
.charAt((utf8
[j
] & 0xff) / 16))
401 .append(hex
.charAt((utf8
[j
] & 0xff) % 16));
403 catch (java
.io
.UnsupportedEncodingException x
)
405 throw (Error
) new InternalError().initCause(x
);
412 return sb
.toString();
416 * Quote characters illegal in URI hosts in given string.
418 * Replace illegal characters by encoding their UTF-8
419 * representation as "%" + hex code for each resulting
422 * @param str The string to quote
424 * @return The quoted string.
426 private static String
quoteHost(String str
)
428 // FIXME: unimplemented.
433 * Quote characters illegal in URI paths in given string.
435 * Replace illegal characters by encoding their UTF-8
436 * representation as "%" + hex code for each resulting
439 * @param str The string to quote
441 * @return The quoted string.
443 private static String
quotePath(String str
)
445 // Technically, we should be using RFC2396_PATH, but
446 // it contains no additional characters.
447 return quote(str
, RFC2396_PATH_SEGMENTS
);
451 * Quote characters illegal in URI user infos in given string.
453 * Replace illegal characters by encoding their UTF-8
454 * representation as "%" + hex code for each resulting
457 * @param str The string to quote
459 * @return The quoted string.
461 private static String
quoteUserInfo(String str
)
463 // FIXME: unimplemented.
468 * Creates an URI from the given string
470 * @param str The string to create the URI from
472 * @exception URISyntaxException If the given string violates RFC 2396
473 * @exception NullPointerException If str is null
475 public URI(String str
) throws URISyntaxException
482 * Create an URI from the given components
484 * @param scheme The scheme name
485 * @param userInfo The username and authorization info
486 * @param host The hostname
487 * @param port The port number
488 * @param path The path
489 * @param query The query
490 * @param fragment The fragment
492 * @exception URISyntaxException If the given string violates RFC 2396
494 public URI(String scheme
, String userInfo
, String host
, int port
,
495 String path
, String query
, String fragment
)
496 throws URISyntaxException
498 this((scheme
== null ?
"" : scheme
+ ":")
499 + (userInfo
== null && host
== null && port
== -1 ?
"" : "//")
500 + (userInfo
== null ?
"" : quoteUserInfo(userInfo
) + "@")
501 + (host
== null ?
"" : quoteHost(host
))
502 + (port
== -1 ?
"" : ":" + String
.valueOf(port
))
503 + (path
== null ?
"" : quotePath(path
))
504 + (query
== null ?
"" : "?" + quote(query
))
505 + (fragment
== null ?
"" : "#" + quote(fragment
)));
507 parseServerAuthority();
511 * Create an URI from the given components
513 * @param scheme The scheme name
514 * @param authority The authority
515 * @param path The apth
516 * @param query The query
517 * @param fragment The fragment
519 * @exception URISyntaxException If the given string violates RFC 2396
521 public URI(String scheme
, String authority
, String path
, String query
,
522 String fragment
) throws URISyntaxException
524 this((scheme
== null ?
"" : scheme
+ ":")
525 + (authority
== null ?
"" : "//" + quoteAuthority(authority
))
526 + (path
== null ?
"" : quotePath(path
))
527 + (query
== null ?
"" : "?" + quote(query
))
528 + (fragment
== null ?
"" : "#" + quote(fragment
)));
532 * Create an URI from the given components
534 * @param scheme The scheme name
535 * @param host The hostname
536 * @param path The path
537 * @param fragment The fragment
539 * @exception URISyntaxException If the given string violates RFC 2396
541 public URI(String scheme
, String host
, String path
, String fragment
)
542 throws URISyntaxException
544 this(scheme
, null, host
, -1, path
, null, fragment
);
548 * Create an URI from the given components
550 * @param scheme The scheme name
551 * @param ssp The scheme specific part
552 * @param fragment The fragment
554 * @exception URISyntaxException If the given string violates RFC 2396
556 public URI(String scheme
, String ssp
, String fragment
)
557 throws URISyntaxException
559 this((scheme
== null ?
"" : scheme
+ ":")
560 + (ssp
== null ?
"" : quote(ssp
))
561 + (fragment
== null ?
"" : "#" + quote(fragment
)));
565 * Create an URI from the given string
567 * @param str The string to create the URI from
569 * @exception IllegalArgumentException If the given string violates RFC 2396
570 * @exception NullPointerException If str is null
572 public static URI
create(String str
)
578 catch (URISyntaxException e
)
580 throw (IllegalArgumentException
) new IllegalArgumentException()
586 * Attempts to parse this URI's authority component, if defined,
587 * into user-information, host, and port components
589 * @exception URISyntaxException If the given string violates RFC 2396
591 public URI
parseServerAuthority() throws URISyntaxException
597 * Returns a normalizes versions of the URI
599 public URI
normalize()
605 * Resolves the given URI against this URI
607 * @param uri The URI to resolve against this URI
609 * @return The resulting URI, or null when it couldn't be resolved
612 * @exception NullPointerException If uri is null
614 public URI
resolve(URI uri
)
616 if (uri
.isAbsolute())
621 String scheme
= uri
.getScheme();
622 String schemeSpecificPart
= uri
.getSchemeSpecificPart();
623 String authority
= uri
.getAuthority();
624 String path
= uri
.getPath();
625 String query
= uri
.getQuery();
626 String fragment
= uri
.getFragment();
630 if (fragment
!= null && path
!= null && path
.equals("")
631 && scheme
== null && authority
== null && query
== null)
632 return new URI(this.scheme
, this.schemeSpecificPart
, fragment
);
634 if (authority
== null)
636 authority
= this.authority
;
639 if (! (path
.startsWith("/")))
641 StringBuffer basepath
= new StringBuffer(this.path
);
642 int i
= this.path
.lastIndexOf('/');
645 basepath
.delete(i
+ 1, basepath
.length());
647 basepath
.append(path
);
648 path
= basepath
.toString();
649 // FIXME We must normalize the path here.
650 // Normalization process omitted.
653 return new URI(this.scheme
, authority
, path
, query
, fragment
);
655 catch (URISyntaxException e
)
662 * Resolves the given URI string against this URI
664 * @param str The URI as string to resolve against this URI
666 * @return The resulting URI
668 * @exception IllegalArgumentException If the given URI string
670 * @exception NullPointerException If uri is null
672 public URI
resolve(String str
) throws IllegalArgumentException
674 return resolve(create(str
));
678 * Relativizes the given URI against this URI
680 * @param uri The URI to relativize this URI
682 * @return The resulting URI
684 * @exception NullPointerException If uri is null
686 public URI
relativize(URI uri
)
692 * Creates an URL from an URI
694 * @exception MalformedURLException If a protocol handler for the URL could
695 * not be found, or if some other error occurred while constructing the URL
696 * @exception IllegalArgumentException If the URI is not absolute
698 public URL
toURL() throws IllegalArgumentException
, MalformedURLException
701 return new URL(this.toString());
703 throw new IllegalArgumentException("not absolute");
707 * Returns the scheme of the URI
709 public String
getScheme()
715 * Tells whether this URI is absolute or not
717 public boolean isAbsolute()
719 return scheme
!= null;
723 * Tell whether this URI is opaque or not
725 public boolean isOpaque()
727 return ((scheme
!= null) && ! (schemeSpecificPart
.startsWith("/")));
731 * Returns the raw scheme specific part of this URI.
732 * The scheme-specific part is never undefined, though it may be empty
734 public String
getRawSchemeSpecificPart()
736 return rawSchemeSpecificPart
;
740 * Returns the decoded scheme specific part of this URI.
742 public String
getSchemeSpecificPart()
744 return schemeSpecificPart
;
748 * Returns the rae authority part of this URI
750 public String
getRawAuthority()
756 * Returns the decoded authority part of this URI
758 public String
getAuthority()
764 * Returns the raw user info part of this URI
766 public String
getRawUserInfo()
772 * Returns the decoded user info part of this URI
774 public String
getUserInfo()
780 * Returns the hostname of the URI
782 public String
getHost()
788 * Returns the port number of the URI
796 * Returns the raw path part of this URI
798 public String
getRawPath()
804 * Returns the path of the URI
806 public String
getPath()
812 * Returns the raw query part of this URI
814 public String
getRawQuery()
820 * Returns the query of the URI
822 public String
getQuery()
828 * Return the raw fragment part of this URI
830 public String
getRawFragment()
836 * Returns the fragment of the URI
838 public String
getFragment()
844 * Compares the URI with a given object
846 * @param obj The obj to compare the URI with
848 public boolean equals(Object obj
)
854 * Computes the hascode of the URI
856 public int hashCode()
862 * Compare the URI with another object that must be an URI too
864 * @param obj This object to compare this URI with
866 * @exception ClassCastException If given object ist not an URI
868 public int compareTo(Object obj
) throws ClassCastException
874 * Returns the URI as a String. If the URI was created using a constructor,
875 * then this will be the same as the original input string.
877 * @return a string representation of the URI.
879 public String
toString()
881 return (getScheme() == null ?
"" : getScheme() + ":")
882 + getRawSchemeSpecificPart()
883 + (getRawFragment() == null ?
"" : "#" + getRawFragment());
887 * Returns the URI as US-ASCII string
889 public String
toASCIIString()