Merge from the pain train
[official-gcc.git] / libjava / java / net / URI.java
blob23e3e7c9e4a2248ba5b0cd4a80bb7ee2ce1f16c4
1 /* URI.java -- An URI class
2 Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.net;
41 import java.io.IOException;
42 import java.io.ObjectInputStream;
43 import java.io.ObjectOutputStream;
44 import java.io.Serializable;
45 import java.util.regex.Matcher;
46 import java.util.regex.Pattern;
48 /**
49 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
50 * @author Dalibor Topic (robilad@kaffe.org)
51 * @author Michael Koch (konqueror@gmx.de)
52 * @since 1.4
54 public final class URI implements Comparable, Serializable
56 static final long serialVersionUID = -6052424284110960213L;
58 /**
59 * Regular expression for parsing URIs.
61 * Taken from RFC 2396, Appendix B.
62 * This expression doesn't parse IPv6 addresses.
64 private static final String URI_REGEXP =
65 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
67 private static final String AUTHORITY_REGEXP =
68 "^((([^?#]*)@)?([^?#:]*)(:([^?#]*)))?";
70 /**
71 * Valid characters (taken from rfc2396)
73 private static final String RFC2396_DIGIT = "0123456789";
74 private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
75 private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
76 private static final String RFC2396_ALPHA =
77 RFC2396_LOWALPHA + RFC2396_UPALPHA;
78 private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
79 private static final String RFC2396_MARK = "-_.!~*'()";
80 private static final String RFC2396_UNRESERVED =
81 RFC2396_ALPHANUM + RFC2396_MARK;
82 private static final String RFC2396_REG_NAME =
83 RFC2396_UNRESERVED + "$,;:@&=+";
84 private static final String RFC2396_PCHAR = RFC2396_UNRESERVED + ":@&=+$,";
85 private static final String RFC2396_SEGMENT = RFC2396_PCHAR + ";";
86 private static final String RFC2396_PATH_SEGMENTS = RFC2396_SEGMENT + "/";
88 /**
89 * Index of scheme component in parsed URI.
91 private static final int SCHEME_GROUP = 2;
93 /**
94 * Index of scheme-specific-part in parsed URI.
96 private static final int SCHEME_SPEC_PART_GROUP = 3;
98 /**
99 * Index of authority component in parsed URI.
101 private static final int AUTHORITY_GROUP = 5;
104 * Index of path component in parsed URI.
106 private static final int PATH_GROUP = 6;
109 * Index of query component in parsed URI.
111 private static final int QUERY_GROUP = 8;
114 * Index of fragment component in parsed URI.
116 private static final int FRAGMENT_GROUP = 10;
118 private static final int AUTHORITY_USERINFO_GROUP = 3;
119 private static final int AUTHORITY_HOST_GROUP = 4;
120 private static final int AUTHORITY_PORT_GROUP = 6;
122 private transient String scheme;
123 private transient String rawSchemeSpecificPart;
124 private transient String schemeSpecificPart;
125 private transient String rawAuthority;
126 private transient String authority;
127 private transient String rawUserInfo;
128 private transient String userInfo;
129 private transient String rawHost;
130 private transient String host;
131 private transient int port = -1;
132 private transient String rawPath;
133 private transient String path;
134 private transient String rawQuery;
135 private transient String query;
136 private transient String rawFragment;
137 private transient String fragment;
138 private String string;
140 private void readObject(ObjectInputStream is)
141 throws ClassNotFoundException, IOException
143 this.string = (String) is.readObject();
146 parseURI(this.string);
148 catch (URISyntaxException x)
150 // Should not happen.
151 throw new RuntimeException(x);
155 private void writeObject(ObjectOutputStream os) throws IOException
157 if (string == null)
158 string = toString();
159 os.writeObject(string);
162 private static String getURIGroup(Matcher match, int group)
164 String matched = match.group(group);
165 return matched.length() == 0 ? null : matched;
169 * Sets fields of this URI by parsing the given string.
171 * @param str The string to parse
173 * @exception URISyntaxException If the given string violates RFC 2396
175 private void parseURI(String str) throws URISyntaxException
177 Pattern pattern = Pattern.compile(URI_REGEXP);
178 Matcher matcher = pattern.matcher(str);
180 if (matcher.matches())
182 scheme = getURIGroup(matcher, SCHEME_GROUP);
183 rawSchemeSpecificPart = getURIGroup(matcher, SCHEME_SPEC_PART_GROUP);
184 rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
185 rawPath = getURIGroup(matcher, PATH_GROUP);
186 rawQuery = getURIGroup(matcher, QUERY_GROUP);
187 rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
189 else
190 throw new URISyntaxException(str, "doesn't match URI regular expression");
192 if (rawAuthority != null)
194 pattern = Pattern.compile(AUTHORITY_REGEXP);
195 matcher = pattern.matcher(rawAuthority);
197 if (matcher.matches())
199 rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
200 rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
202 String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
204 if (portStr != null)
207 port = Integer.parseInt(portStr);
209 catch (NumberFormatException e)
211 URISyntaxException use =
212 new URISyntaxException
213 (str, "doesn't match URI regular expression");
214 use.initCause(e);
215 throw use;
218 else
219 throw new URISyntaxException(str, "doesn't match URI regular expression");
222 // We must eagerly unquote the parts, because this is the only time
223 // we may throw an exception.
224 schemeSpecificPart = unquote(rawSchemeSpecificPart);
225 authority = unquote(rawAuthority);
226 userInfo = unquote(rawUserInfo);
227 host = unquote(rawHost);
228 path = unquote(rawPath);
229 query = unquote(rawQuery);
230 fragment = unquote(rawFragment);
234 * Unquote "%" + hex quotes characters
236 * @param str The string to unquote or null.
238 * @return The unquoted string or null if str was null.
240 * @exception URISyntaxException If the given string contains invalid
241 * escape sequences.
243 private static String unquote(String str) throws URISyntaxException
245 if (str == null)
246 return null;
247 byte[] buf = new byte[str.length()];
248 int pos = 0;
249 for (int i = 0; i < str.length(); i++)
251 char c = str.charAt(i);
252 if (c > 127)
253 throw new URISyntaxException(str, "Invalid character");
254 if (c == '%')
256 if (i + 2 >= str.length())
257 throw new URISyntaxException(str, "Invalid quoted character");
258 String hex = "0123456789ABCDEF";
259 int hi = hex.indexOf(str.charAt(++i));
260 int lo = hex.indexOf(str.charAt(++i));
261 if (lo < 0 || hi < 0)
262 throw new URISyntaxException(str, "Invalid quoted character");
263 buf[pos++] = (byte) (hi * 16 + lo);
265 else
266 buf[pos++] = (byte) c;
270 return new String(buf, 0, pos, "utf-8");
272 catch (java.io.UnsupportedEncodingException x2)
274 throw (Error) new InternalError().initCause(x2);
279 * Quote characters illegal in URIs in given string.
281 * Replace illegal characters by encoding their UTF-8
282 * representation as "%" + hex code for each resulting
283 * UTF-8 character.
285 * @param str The string to quote
287 * @return The quoted string.
289 private static String quote(String str)
291 // FIXME: unimplemented.
292 return str;
296 * Quote characters illegal in URI authorities in given string.
298 * Replace illegal characters by encoding their UTF-8
299 * representation as "%" + hex code for each resulting
300 * UTF-8 character.
302 * @param str The string to quote
304 * @return The quoted string.
306 private static String quoteAuthority(String str)
308 // Technically, we should be using RFC2396_AUTHORITY, but
309 // it contains no additional characters.
310 return quote(str, RFC2396_REG_NAME);
314 * Quote characters in str that are not part of legalCharacters.
316 * Replace illegal characters by encoding their UTF-8
317 * representation as "%" + hex code for each resulting
318 * UTF-8 character.
320 * @param str The string to quote
321 * @param legalCharacters The set of legal characters
323 * @return The quoted string.
325 private static String quote(String str, String legalCharacters)
327 StringBuffer sb = new StringBuffer(str.length());
328 for (int i = 0; i < str.length(); i++)
330 char c = str.charAt(i);
331 if (legalCharacters.indexOf(c) == -1)
333 String hex = "0123456789ABCDEF";
334 if (c <= 127)
335 sb.append('%').append(hex.charAt(c / 16)).append(hex.charAt(c % 16));
336 else
340 // this is far from optimal, but it works
341 byte[] utf8 = str.substring(i, i + 1).getBytes("utf-8");
342 for (int j = 0; j < utf8.length; j++)
343 sb.append('%').append(hex.charAt((utf8[j] & 0xff) / 16))
344 .append(hex.charAt((utf8[j] & 0xff) % 16));
346 catch (java.io.UnsupportedEncodingException x)
348 throw (Error) new InternalError().initCause(x);
352 else
353 sb.append(c);
355 return sb.toString();
359 * Quote characters illegal in URI hosts in given string.
361 * Replace illegal characters by encoding their UTF-8
362 * representation as "%" + hex code for each resulting
363 * UTF-8 character.
365 * @param str The string to quote
367 * @return The quoted string.
369 private static String quoteHost(String str)
371 // FIXME: unimplemented.
372 return str;
376 * Quote characters illegal in URI paths in given string.
378 * Replace illegal characters by encoding their UTF-8
379 * representation as "%" + hex code for each resulting
380 * UTF-8 character.
382 * @param str The string to quote
384 * @return The quoted string.
386 private static String quotePath(String str)
388 // Technically, we should be using RFC2396_PATH, but
389 // it contains no additional characters.
390 return quote(str, RFC2396_PATH_SEGMENTS);
394 * Quote characters illegal in URI user infos in given string.
396 * Replace illegal characters by encoding their UTF-8
397 * representation as "%" + hex code for each resulting
398 * UTF-8 character.
400 * @param str The string to quote
402 * @return The quoted string.
404 private static String quoteUserInfo(String str)
406 // FIXME: unimplemented.
407 return str;
411 * Creates an URI from the given string
413 * @param str The string to create the URI from
415 * @exception URISyntaxException If the given string violates RFC 2396
416 * @exception NullPointerException If str is null
418 public URI(String str) throws URISyntaxException
420 this.string = str;
421 parseURI(str);
425 * Create an URI from the given components
427 * @param scheme The scheme name
428 * @param userInfo The username and authorization info
429 * @param host The hostname
430 * @param port The port number
431 * @param path The path
432 * @param query The query
433 * @param fragment The fragment
435 * @exception URISyntaxException If the given string violates RFC 2396
437 public URI(String scheme, String userInfo, String host, int port,
438 String path, String query, String fragment)
439 throws URISyntaxException
441 this((scheme == null ? "" : scheme + ":")
442 + (userInfo == null && host == null && port == -1 ? "" : "//")
443 + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
444 + (host == null ? "" : quoteHost(host))
445 + (port == -1 ? "" : ":" + String.valueOf(port))
446 + (path == null ? "" : quotePath(path))
447 + (query == null ? "" : "?" + quote(query))
448 + (fragment == null ? "" : "#" + quote(fragment)));
450 parseServerAuthority();
454 * Create an URI from the given components
456 * @param scheme The scheme name
457 * @param authority The authority
458 * @param path The apth
459 * @param query The query
460 * @param fragment The fragment
462 * @exception URISyntaxException If the given string violates RFC 2396
464 public URI(String scheme, String authority, String path, String query,
465 String fragment) throws URISyntaxException
467 this((scheme == null ? "" : scheme + ":")
468 + (authority == null ? "" : "//" + quoteAuthority(authority))
469 + (path == null ? "" : quotePath(path))
470 + (query == null ? "" : "?" + quote(query))
471 + (fragment == null ? "" : "#" + quote(fragment)));
475 * Create an URI from the given components
477 * @param scheme The scheme name
478 * @param host The hostname
479 * @param path The path
480 * @param fragment The fragment
482 * @exception URISyntaxException If the given string violates RFC 2396
484 public URI(String scheme, String host, String path, String fragment)
485 throws URISyntaxException
487 this(scheme, null, host, -1, path, null, fragment);
491 * Create an URI from the given components
493 * @param scheme The scheme name
494 * @param ssp The scheme specific part
495 * @param fragment The fragment
497 * @exception URISyntaxException If the given string violates RFC 2396
499 public URI(String scheme, String ssp, String fragment)
500 throws URISyntaxException
502 this((scheme == null ? "" : scheme + ":")
503 + (ssp == null ? "" : quote(ssp))
504 + (fragment == null ? "" : "#" + quote(fragment)));
508 * Create an URI from the given string
510 * @param str The string to create the URI from
512 * @exception IllegalArgumentException If the given string violates RFC 2396
513 * @exception NullPointerException If str is null
515 public static URI create(String str)
519 return new URI(str);
521 catch (URISyntaxException e)
523 throw (IllegalArgumentException) new IllegalArgumentException()
524 .initCause(e);
529 * Attempts to parse this URI's authority component, if defined,
530 * into user-information, host, and port components
532 * @exception URISyntaxException If the given string violates RFC 2396
534 public URI parseServerAuthority() throws URISyntaxException
536 return null;
540 * Returns a normalizes versions of the URI
542 public URI normalize()
544 return null;
548 * Resolves the given URI against this URI
550 * @param uri The URI to resolve against this URI
552 * @return The resulting URI, or null when it couldn't be resolved
553 * for some reason.
555 * @exception NullPointerException If uri is null
557 public URI resolve(URI uri)
559 if (uri.isAbsolute())
560 return uri;
561 if (uri.isOpaque())
562 return uri;
564 String scheme = uri.getScheme();
565 String schemeSpecificPart = uri.getSchemeSpecificPart();
566 String authority = uri.getAuthority();
567 String path = uri.getPath();
568 String query = uri.getQuery();
569 String fragment = uri.getFragment();
573 if (fragment != null && path != null && path.equals("")
574 && scheme == null && authority == null && query == null)
575 return new URI(this.scheme, this.schemeSpecificPart, fragment);
577 if (authority == null)
579 authority = this.authority;
580 if (path == null)
581 path = "";
582 if (! (path.startsWith("/")))
584 StringBuffer basepath = new StringBuffer(this.path);
585 int i = this.path.lastIndexOf('/');
587 if (i >= 0)
588 basepath.delete(i + 1, basepath.length());
590 basepath.append(path);
591 path = basepath.toString();
592 // FIXME We must normalize the path here.
593 // Normalization process omitted.
596 return new URI(this.scheme, authority, path, query, fragment);
598 catch (URISyntaxException e)
600 return null;
605 * Resolves the given URI string against this URI
607 * @param str The URI as string to resolve against this URI
609 * @return The resulting URI
611 * @exception IllegalArgumentException If the given URI string
612 * violates RFC 2396
613 * @exception NullPointerException If uri is null
615 public URI resolve(String str) throws IllegalArgumentException
617 return resolve(create(str));
621 * Relativizes the given URI against this URI
623 * @param uri The URI to relativize this URI
625 * @return The resulting URI
627 * @exception NullPointerException If uri is null
629 public URI relativize(URI uri)
631 return null;
635 * Creates an URL from an URI
637 * @exception MalformedURLException If a protocol handler for the URL could
638 * not be found, or if some other error occurred while constructing the URL
639 * @exception IllegalArgumentException If the URI is not absolute
641 public URL toURL() throws IllegalArgumentException, MalformedURLException
643 if (isAbsolute())
644 return new URL(this.toString());
646 throw new IllegalArgumentException("not absolute");
650 * Returns the scheme of the URI
652 public String getScheme()
654 return scheme;
658 * Tells whether this URI is absolute or not
660 public boolean isAbsolute()
662 return scheme != null;
666 * Tell whether this URI is opaque or not
668 public boolean isOpaque()
670 return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
674 * Returns the raw scheme specific part of this URI.
675 * The scheme-specific part is never undefined, though it may be empty
677 public String getRawSchemeSpecificPart()
679 return rawSchemeSpecificPart;
683 * Returns the decoded scheme specific part of this URI.
685 public String getSchemeSpecificPart()
687 return schemeSpecificPart;
691 * Returns the rae authority part of this URI
693 public String getRawAuthority()
695 return rawAuthority;
699 * Returns the decoded authority part of this URI
701 public String getAuthority()
703 return authority;
707 * Returns the raw user info part of this URI
709 public String getRawUserInfo()
711 return rawUserInfo;
715 * Returns the decoded user info part of this URI
717 public String getUserInfo()
719 return userInfo;
723 * Returns the hostname of the URI
725 public String getHost()
727 return host;
731 * Returns the port number of the URI
733 public int getPort()
735 return port;
739 * Returns the raw path part of this URI
741 public String getRawPath()
743 return rawPath;
747 * Returns the path of the URI
749 public String getPath()
751 return path;
755 * Returns the raw query part of this URI
757 public String getRawQuery()
759 return rawQuery;
763 * Returns the query of the URI
765 public String getQuery()
767 return query;
771 * Return the raw fragment part of this URI
773 public String getRawFragment()
775 return rawFragment;
779 * Returns the fragment of the URI
781 public String getFragment()
783 return fragment;
787 * Compares the URI with a given object
789 * @param obj The obj to compare the URI with
791 public boolean equals(Object obj)
793 return false;
797 * Computes the hascode of the URI
799 public int hashCode()
801 return 0;
805 * Compare the URI with another object that must be an URI too
807 * @param obj This object to compare this URI with
809 * @exception ClassCastException If given object ist not an URI
811 public int compareTo(Object obj) throws ClassCastException
813 return 0;
817 * Returns the URI as string
819 public String toString()
821 return (getScheme() == null ? "" : getScheme() + ":")
822 + (getRawAuthority() == null ? "" : "//" + getRawAuthority())
823 + (getRawPath() == null ? "" : getRawPath())
824 + (getRawQuery() == null ? "" : "?" + getRawQuery())
825 + (getRawFragment() == null ? "" : "#" + getRawFragment());
829 * Returns the URI as US-ASCII string
831 public String toASCIIString()
833 return "";