1 /* URI.java -- An URI class
2 Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
41 import java
.io
.IOException
;
42 import java
.io
.ObjectInputStream
;
43 import java
.io
.ObjectOutputStream
;
44 import java
.io
.Serializable
;
45 import java
.util
.regex
.Matcher
;
46 import java
.util
.regex
.Pattern
;
49 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
50 * @author Dalibor Topic (robilad@kaffe.org)
51 * @author Michael Koch (konqueror@gmx.de)
54 public final class URI
implements Comparable
, Serializable
56 static final long serialVersionUID
= -6052424284110960213L;
59 * Regular expression for parsing URIs.
61 * Taken from RFC 2396, Appendix B.
62 * This expression doesn't parse IPv6 addresses.
64 private static final String URI_REGEXP
=
65 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
67 private static final String AUTHORITY_REGEXP
=
68 "^((([^?#]*)@)?([^?#:]*)(:([^?#]*)))?";
71 * Valid characters (taken from rfc2396)
73 private static final String RFC2396_DIGIT
= "0123456789";
74 private static final String RFC2396_LOWALPHA
= "abcdefghijklmnopqrstuvwxyz";
75 private static final String RFC2396_UPALPHA
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
76 private static final String RFC2396_ALPHA
=
77 RFC2396_LOWALPHA
+ RFC2396_UPALPHA
;
78 private static final String RFC2396_ALPHANUM
= RFC2396_DIGIT
+ RFC2396_ALPHA
;
79 private static final String RFC2396_MARK
= "-_.!~*'()";
80 private static final String RFC2396_UNRESERVED
=
81 RFC2396_ALPHANUM
+ RFC2396_MARK
;
82 private static final String RFC2396_REG_NAME
=
83 RFC2396_UNRESERVED
+ "$,;:@&=+";
84 private static final String RFC2396_PCHAR
= RFC2396_UNRESERVED
+ ":@&=+$,";
85 private static final String RFC2396_SEGMENT
= RFC2396_PCHAR
+ ";";
86 private static final String RFC2396_PATH_SEGMENTS
= RFC2396_SEGMENT
+ "/";
89 * Index of scheme component in parsed URI.
91 private static final int SCHEME_GROUP
= 2;
94 * Index of scheme-specific-part in parsed URI.
96 private static final int SCHEME_SPEC_PART_GROUP
= 3;
99 * Index of authority component in parsed URI.
101 private static final int AUTHORITY_GROUP
= 5;
104 * Index of path component in parsed URI.
106 private static final int PATH_GROUP
= 6;
109 * Index of query component in parsed URI.
111 private static final int QUERY_GROUP
= 8;
114 * Index of fragment component in parsed URI.
116 private static final int FRAGMENT_GROUP
= 10;
118 private static final int AUTHORITY_USERINFO_GROUP
= 3;
119 private static final int AUTHORITY_HOST_GROUP
= 4;
120 private static final int AUTHORITY_PORT_GROUP
= 6;
122 private transient String scheme
;
123 private transient String rawSchemeSpecificPart
;
124 private transient String schemeSpecificPart
;
125 private transient String rawAuthority
;
126 private transient String authority
;
127 private transient String rawUserInfo
;
128 private transient String userInfo
;
129 private transient String rawHost
;
130 private transient String host
;
131 private transient int port
= -1;
132 private transient String rawPath
;
133 private transient String path
;
134 private transient String rawQuery
;
135 private transient String query
;
136 private transient String rawFragment
;
137 private transient String fragment
;
138 private String string
;
140 private void readObject(ObjectInputStream is
)
141 throws ClassNotFoundException
, IOException
143 this.string
= (String
) is
.readObject();
146 parseURI(this.string
);
148 catch (URISyntaxException x
)
150 // Should not happen.
151 throw new RuntimeException(x
);
155 private void writeObject(ObjectOutputStream os
) throws IOException
159 os
.writeObject(string
);
162 private static String
getURIGroup(Matcher match
, int group
)
164 String matched
= match
.group(group
);
165 return matched
.length() == 0 ?
null : matched
;
169 * Sets fields of this URI by parsing the given string.
171 * @param str The string to parse
173 * @exception URISyntaxException If the given string violates RFC 2396
175 private void parseURI(String str
) throws URISyntaxException
177 Pattern pattern
= Pattern
.compile(URI_REGEXP
);
178 Matcher matcher
= pattern
.matcher(str
);
180 if (matcher
.matches())
182 scheme
= getURIGroup(matcher
, SCHEME_GROUP
);
183 rawSchemeSpecificPart
= getURIGroup(matcher
, SCHEME_SPEC_PART_GROUP
);
184 rawAuthority
= getURIGroup(matcher
, AUTHORITY_GROUP
);
185 rawPath
= getURIGroup(matcher
, PATH_GROUP
);
186 rawQuery
= getURIGroup(matcher
, QUERY_GROUP
);
187 rawFragment
= getURIGroup(matcher
, FRAGMENT_GROUP
);
190 throw new URISyntaxException(str
, "doesn't match URI regular expression");
192 if (rawAuthority
!= null)
194 pattern
= Pattern
.compile(AUTHORITY_REGEXP
);
195 matcher
= pattern
.matcher(rawAuthority
);
197 if (matcher
.matches())
199 rawUserInfo
= getURIGroup(matcher
, AUTHORITY_USERINFO_GROUP
);
200 rawHost
= getURIGroup(matcher
, AUTHORITY_HOST_GROUP
);
202 String portStr
= getURIGroup(matcher
, AUTHORITY_PORT_GROUP
);
207 port
= Integer
.parseInt(portStr
);
209 catch (NumberFormatException e
)
211 URISyntaxException use
=
212 new URISyntaxException
213 (str
, "doesn't match URI regular expression");
219 throw new URISyntaxException(str
, "doesn't match URI regular expression");
222 // We must eagerly unquote the parts, because this is the only time
223 // we may throw an exception.
224 schemeSpecificPart
= unquote(rawSchemeSpecificPart
);
225 authority
= unquote(rawAuthority
);
226 userInfo
= unquote(rawUserInfo
);
227 host
= unquote(rawHost
);
228 path
= unquote(rawPath
);
229 query
= unquote(rawQuery
);
230 fragment
= unquote(rawFragment
);
234 * Unquote "%" + hex quotes characters
236 * @param str The string to unquote or null.
238 * @return The unquoted string or null if str was null.
240 * @exception URISyntaxException If the given string contains invalid
243 private static String
unquote(String str
) throws URISyntaxException
247 byte[] buf
= new byte[str
.length()];
249 for (int i
= 0; i
< str
.length(); i
++)
251 char c
= str
.charAt(i
);
253 throw new URISyntaxException(str
, "Invalid character");
256 if (i
+ 2 >= str
.length())
257 throw new URISyntaxException(str
, "Invalid quoted character");
258 String hex
= "0123456789ABCDEF";
259 int hi
= hex
.indexOf(str
.charAt(++i
));
260 int lo
= hex
.indexOf(str
.charAt(++i
));
261 if (lo
< 0 || hi
< 0)
262 throw new URISyntaxException(str
, "Invalid quoted character");
263 buf
[pos
++] = (byte) (hi
* 16 + lo
);
266 buf
[pos
++] = (byte) c
;
270 return new String(buf
, 0, pos
, "utf-8");
272 catch (java
.io
.UnsupportedEncodingException x2
)
274 throw (Error
) new InternalError().initCause(x2
);
279 * Quote characters illegal in URIs in given string.
281 * Replace illegal characters by encoding their UTF-8
282 * representation as "%" + hex code for each resulting
285 * @param str The string to quote
287 * @return The quoted string.
289 private static String
quote(String str
)
291 // FIXME: unimplemented.
296 * Quote characters illegal in URI authorities in given string.
298 * Replace illegal characters by encoding their UTF-8
299 * representation as "%" + hex code for each resulting
302 * @param str The string to quote
304 * @return The quoted string.
306 private static String
quoteAuthority(String str
)
308 // Technically, we should be using RFC2396_AUTHORITY, but
309 // it contains no additional characters.
310 return quote(str
, RFC2396_REG_NAME
);
314 * Quote characters in str that are not part of legalCharacters.
316 * Replace illegal characters by encoding their UTF-8
317 * representation as "%" + hex code for each resulting
320 * @param str The string to quote
321 * @param legalCharacters The set of legal characters
323 * @return The quoted string.
325 private static String
quote(String str
, String legalCharacters
)
327 StringBuffer sb
= new StringBuffer(str
.length());
328 for (int i
= 0; i
< str
.length(); i
++)
330 char c
= str
.charAt(i
);
331 if (legalCharacters
.indexOf(c
) == -1)
333 String hex
= "0123456789ABCDEF";
335 sb
.append('%').append(hex
.charAt(c
/ 16)).append(hex
.charAt(c
% 16));
340 // this is far from optimal, but it works
341 byte[] utf8
= str
.substring(i
, i
+ 1).getBytes("utf-8");
342 for (int j
= 0; j
< utf8
.length
; j
++)
343 sb
.append('%').append(hex
.charAt((utf8
[j
] & 0xff) / 16))
344 .append(hex
.charAt((utf8
[j
] & 0xff) % 16));
346 catch (java
.io
.UnsupportedEncodingException x
)
348 throw (Error
) new InternalError().initCause(x
);
355 return sb
.toString();
359 * Quote characters illegal in URI hosts in given string.
361 * Replace illegal characters by encoding their UTF-8
362 * representation as "%" + hex code for each resulting
365 * @param str The string to quote
367 * @return The quoted string.
369 private static String
quoteHost(String str
)
371 // FIXME: unimplemented.
376 * Quote characters illegal in URI paths in given string.
378 * Replace illegal characters by encoding their UTF-8
379 * representation as "%" + hex code for each resulting
382 * @param str The string to quote
384 * @return The quoted string.
386 private static String
quotePath(String str
)
388 // Technically, we should be using RFC2396_PATH, but
389 // it contains no additional characters.
390 return quote(str
, RFC2396_PATH_SEGMENTS
);
394 * Quote characters illegal in URI user infos in given string.
396 * Replace illegal characters by encoding their UTF-8
397 * representation as "%" + hex code for each resulting
400 * @param str The string to quote
402 * @return The quoted string.
404 private static String
quoteUserInfo(String str
)
406 // FIXME: unimplemented.
411 * Creates an URI from the given string
413 * @param str The string to create the URI from
415 * @exception URISyntaxException If the given string violates RFC 2396
416 * @exception NullPointerException If str is null
418 public URI(String str
) throws URISyntaxException
425 * Create an URI from the given components
427 * @param scheme The scheme name
428 * @param userInfo The username and authorization info
429 * @param host The hostname
430 * @param port The port number
431 * @param path The path
432 * @param query The query
433 * @param fragment The fragment
435 * @exception URISyntaxException If the given string violates RFC 2396
437 public URI(String scheme
, String userInfo
, String host
, int port
,
438 String path
, String query
, String fragment
)
439 throws URISyntaxException
441 this((scheme
== null ?
"" : scheme
+ ":")
442 + (userInfo
== null && host
== null && port
== -1 ?
"" : "//")
443 + (userInfo
== null ?
"" : quoteUserInfo(userInfo
) + "@")
444 + (host
== null ?
"" : quoteHost(host
))
445 + (port
== -1 ?
"" : ":" + String
.valueOf(port
))
446 + (path
== null ?
"" : quotePath(path
))
447 + (query
== null ?
"" : "?" + quote(query
))
448 + (fragment
== null ?
"" : "#" + quote(fragment
)));
450 parseServerAuthority();
454 * Create an URI from the given components
456 * @param scheme The scheme name
457 * @param authority The authority
458 * @param path The apth
459 * @param query The query
460 * @param fragment The fragment
462 * @exception URISyntaxException If the given string violates RFC 2396
464 public URI(String scheme
, String authority
, String path
, String query
,
465 String fragment
) throws URISyntaxException
467 this((scheme
== null ?
"" : scheme
+ ":")
468 + (authority
== null ?
"" : "//" + quoteAuthority(authority
))
469 + (path
== null ?
"" : quotePath(path
))
470 + (query
== null ?
"" : "?" + quote(query
))
471 + (fragment
== null ?
"" : "#" + quote(fragment
)));
475 * Create an URI from the given components
477 * @param scheme The scheme name
478 * @param host The hostname
479 * @param path The path
480 * @param fragment The fragment
482 * @exception URISyntaxException If the given string violates RFC 2396
484 public URI(String scheme
, String host
, String path
, String fragment
)
485 throws URISyntaxException
487 this(scheme
, null, host
, -1, path
, null, fragment
);
491 * Create an URI from the given components
493 * @param scheme The scheme name
494 * @param ssp The scheme specific part
495 * @param fragment The fragment
497 * @exception URISyntaxException If the given string violates RFC 2396
499 public URI(String scheme
, String ssp
, String fragment
)
500 throws URISyntaxException
502 this((scheme
== null ?
"" : scheme
+ ":")
503 + (ssp
== null ?
"" : quote(ssp
))
504 + (fragment
== null ?
"" : "#" + quote(fragment
)));
508 * Create an URI from the given string
510 * @param str The string to create the URI from
512 * @exception IllegalArgumentException If the given string violates RFC 2396
513 * @exception NullPointerException If str is null
515 public static URI
create(String str
)
521 catch (URISyntaxException e
)
523 throw (IllegalArgumentException
) new IllegalArgumentException()
529 * Attempts to parse this URI's authority component, if defined,
530 * into user-information, host, and port components
532 * @exception URISyntaxException If the given string violates RFC 2396
534 public URI
parseServerAuthority() throws URISyntaxException
540 * Returns a normalizes versions of the URI
542 public URI
normalize()
548 * Resolves the given URI against this URI
550 * @param uri The URI to resolve against this URI
552 * @return The resulting URI, or null when it couldn't be resolved
555 * @exception NullPointerException If uri is null
557 public URI
resolve(URI uri
)
559 if (uri
.isAbsolute())
564 String scheme
= uri
.getScheme();
565 String schemeSpecificPart
= uri
.getSchemeSpecificPart();
566 String authority
= uri
.getAuthority();
567 String path
= uri
.getPath();
568 String query
= uri
.getQuery();
569 String fragment
= uri
.getFragment();
573 if (fragment
!= null && path
!= null && path
.equals("")
574 && scheme
== null && authority
== null && query
== null)
575 return new URI(this.scheme
, this.schemeSpecificPart
, fragment
);
577 if (authority
== null)
579 authority
= this.authority
;
582 if (! (path
.startsWith("/")))
584 StringBuffer basepath
= new StringBuffer(this.path
);
585 int i
= this.path
.lastIndexOf('/');
588 basepath
.delete(i
+ 1, basepath
.length());
590 basepath
.append(path
);
591 path
= basepath
.toString();
592 // FIXME We must normalize the path here.
593 // Normalization process omitted.
596 return new URI(this.scheme
, authority
, path
, query
, fragment
);
598 catch (URISyntaxException e
)
605 * Resolves the given URI string against this URI
607 * @param str The URI as string to resolve against this URI
609 * @return The resulting URI
611 * @exception IllegalArgumentException If the given URI string
613 * @exception NullPointerException If uri is null
615 public URI
resolve(String str
) throws IllegalArgumentException
617 return resolve(create(str
));
621 * Relativizes the given URI against this URI
623 * @param uri The URI to relativize this URI
625 * @return The resulting URI
627 * @exception NullPointerException If uri is null
629 public URI
relativize(URI uri
)
635 * Creates an URL from an URI
637 * @exception MalformedURLException If a protocol handler for the URL could
638 * not be found, or if some other error occurred while constructing the URL
639 * @exception IllegalArgumentException If the URI is not absolute
641 public URL
toURL() throws IllegalArgumentException
, MalformedURLException
644 return new URL(this.toString());
646 throw new IllegalArgumentException("not absolute");
650 * Returns the scheme of the URI
652 public String
getScheme()
658 * Tells whether this URI is absolute or not
660 public boolean isAbsolute()
662 return scheme
!= null;
666 * Tell whether this URI is opaque or not
668 public boolean isOpaque()
670 return ((scheme
!= null) && ! (schemeSpecificPart
.startsWith("/")));
674 * Returns the raw scheme specific part of this URI.
675 * The scheme-specific part is never undefined, though it may be empty
677 public String
getRawSchemeSpecificPart()
679 return rawSchemeSpecificPart
;
683 * Returns the decoded scheme specific part of this URI.
685 public String
getSchemeSpecificPart()
687 return schemeSpecificPart
;
691 * Returns the rae authority part of this URI
693 public String
getRawAuthority()
699 * Returns the decoded authority part of this URI
701 public String
getAuthority()
707 * Returns the raw user info part of this URI
709 public String
getRawUserInfo()
715 * Returns the decoded user info part of this URI
717 public String
getUserInfo()
723 * Returns the hostname of the URI
725 public String
getHost()
731 * Returns the port number of the URI
739 * Returns the raw path part of this URI
741 public String
getRawPath()
747 * Returns the path of the URI
749 public String
getPath()
755 * Returns the raw query part of this URI
757 public String
getRawQuery()
763 * Returns the query of the URI
765 public String
getQuery()
771 * Return the raw fragment part of this URI
773 public String
getRawFragment()
779 * Returns the fragment of the URI
781 public String
getFragment()
787 * Compares the URI with a given object
789 * @param obj The obj to compare the URI with
791 public boolean equals(Object obj
)
797 * Computes the hascode of the URI
799 public int hashCode()
805 * Compare the URI with another object that must be an URI too
807 * @param obj This object to compare this URI with
809 * @exception ClassCastException If given object ist not an URI
811 public int compareTo(Object obj
) throws ClassCastException
817 * Returns the URI as string
819 public String
toString()
821 return (getScheme() == null ?
"" : getScheme() + ":")
822 + (getRawAuthority() == null ?
"" : "//" + getRawAuthority())
823 + (getRawPath() == null ?
"" : getRawPath())
824 + (getRawQuery() == null ?
"" : "?" + getRawQuery())
825 + (getRawFragment() == null ?
"" : "#" + getRawFragment());
829 * Returns the URI as US-ASCII string
831 public String
toASCIIString()