libjava/classpath/java/lang/String.java

   1 /* String.java -- immutable character sequences; the object of string literals
   2    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Classpath.
   6
   7 GNU Classpath is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Classpath is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Classpath; see the file COPYING.  If not, write to the
  19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301 USA.
  21
  22 Linking this library statically or dynamically with other modules is
  23 making a combined work based on this library.  Thus, the terms and
  24 conditions of the GNU General Public License cover the whole
  25 combination.
  26
  27 As a special exception, the copyright holders of this library give you
  28 permission to link this library with independent modules to produce an
  29 executable, regardless of the license terms of these independent
  30 modules, and to copy and distribute the resulting executable under
  31 terms of your choice, provided that you also meet, for each linked
  32 independent module, the terms and conditions of the license of that
  33 module.  An independent module is a module which is not derived from
  34 or based on this library.  If you modify this library, you may extend
  35 this exception to your version of the library, but you are not
  36 obligated to do so.  If you do not wish to do so, delete this
  37 exception statement from your version. */
  38
  39
  40 package java.lang;
  41
  42 import gnu.java.lang.CharData;
  43
  44 import java.io.Serializable;
  45 import java.io.UnsupportedEncodingException;
  46 import java.nio.ByteBuffer;
  47 import java.nio.CharBuffer;
  48 import java.nio.charset.CharacterCodingException;
  49 import java.nio.charset.Charset;
  50 import java.nio.charset.CharsetDecoder;
  51 import java.nio.charset.CharsetEncoder;
  52 import java.nio.charset.CodingErrorAction;
  53 import java.nio.charset.IllegalCharsetNameException;
  54 import java.nio.charset.UnsupportedCharsetException;
  55 import java.text.Collator;
  56 import java.util.Comparator;
  57 import java.util.Formatter;
  58 import java.util.Locale;
  59 import java.util.regex.Matcher;
  60 import java.util.regex.Pattern;
  61 import java.util.regex.PatternSyntaxException;
  62
  63 /**
  64  * Strings represent an immutable set of characters.  All String literals
  65  * are instances of this class, and two string literals with the same contents
  66  * refer to the same String object.
  67  *
  68  * <p>This class also includes a number of methods for manipulating the
  69  * contents of strings (of course, creating a new object if there are any
  70  * changes, as String is immutable). Case mapping relies on Unicode 3.0.0
  71  * standards, where some character sequences have a different number of
  72  * characters in the uppercase version than the lower case.
  73  *
  74  * <p>Strings are special, in that they are the only object with an overloaded
  75  * operator. When you use '+' with at least one String argument, both
  76  * arguments have String conversion performed on them, and another String (not
  77  * guaranteed to be unique) results.
  78  *
  79  * <p>String is special-cased when doing data serialization - rather than
  80  * listing the fields of this class, a String object is converted to a string
  81  * literal in the object stream.
  82  *
  83  * @author Paul N. Fisher
  84  * @author Eric Blake (ebb9@email.byu.edu)
  85  * @author Per Bothner (bothner@cygnus.com)
  86  * @author Tom Tromey (tromey@redhat.com)
  87  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
  88  * @since 1.0
  89  * @status updated to 1.4; but could use better data sharing via offset field
  90  */
  91 public final class String
  92   implements Serializable, Comparable<String>, CharSequence
  93 {
  94   // WARNING: String is a CORE class in the bootstrap cycle. See the comments
  95   // in vm/reference/java/lang/Runtime for implications of this fact.
  96
  97   /**
  98    * This is probably not necessary because this class is special cased already
  99    * but it will avoid showing up as a discrepancy when comparing SUIDs.
 100    */
 101   private static final long serialVersionUID = -6849794470754667710L;
 102
 103   /**
 104    * Stores unicode multi-character uppercase expansion table.
 105    * @see #toUpperCase(Locale)
 106    * @see CharData#UPPER_EXPAND
 107    */
 108   private static final char[] upperExpand
 109         = zeroBasedStringValue(CharData.UPPER_EXPAND);
 110
 111   /**
 112    * Stores unicode multi-character uppercase special casing table.
 113    * @see #upperCaseExpansion(char)
 114    * @see CharData#UPPER_SPECIAL
 115    */
 116   private static final char[] upperSpecial
 117           = zeroBasedStringValue(CharData.UPPER_SPECIAL);
 118
 119   /**
 120    * Characters which make up the String.
 121    * Package access is granted for use by StringBuffer.
 122    */
 123   final char[] value;
 124
 125   /**
 126    * Holds the number of characters in value.  This number is generally
 127    * the same as value.length, but can be smaller because substrings and
 128    * StringBuffers can share arrays. Package visible for use by trusted code.
 129    */
 130   final int count;
 131
 132   /**
 133    * Caches the result of hashCode().  If this value is zero, the hashcode
 134    * is considered uncached (even if 0 is the correct hash value).
 135    */
 136   private int cachedHashCode;
 137
 138   /**
 139    * Holds the starting position for characters in value[].  Since
 140    * substring()'s are common, the use of offset allows the operation
 141    * to perform in O(1). Package access is granted for use by StringBuffer.
 142    */
 143   final int offset;
 144
 145   /**
 146    * An implementation for {@link #CASE_INSENSITIVE_ORDER}.
 147    * This must be {@link Serializable}. The class name is dictated by
 148    * compatibility with Sun's JDK.
 149    */
 150   private static final class CaseInsensitiveComparator
 151     implements Comparator<String>, Serializable
 152   {
 153     /**
 154      * Compatible with JDK 1.2.
 155      */
 156     private static final long serialVersionUID = 8575799808933029326L;
 157
 158     /**
 159      * The default private constructor generates unnecessary overhead.
 160      */
 161     CaseInsensitiveComparator() {}
 162
 163     /**
 164      * Compares to Strings, using
 165      * <code>String.compareToIgnoreCase(String)</code>.
 166      *
 167      * @param o1 the first string
 168      * @param o2 the second string
 169      * @return &lt; 0, 0, or &gt; 0 depending on the case-insensitive
 170      *         comparison of the two strings.
 171      * @throws NullPointerException if either argument is null
 172      * @throws ClassCastException if either argument is not a String
 173      * @see #compareToIgnoreCase(String)
 174      */
 175     public int compare(String o1, String o2)
 176     {
 177       return o1.compareToIgnoreCase(o2);
 178     }
 179   } // class CaseInsensitiveComparator
 180
 181   /**
 182    * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>.
 183    * This comparator is {@link Serializable}. Note that it ignores Locale,
 184    * for that, you want a Collator.
 185    *
 186    * @see Collator#compare(String, String)
 187    * @since 1.2
 188    */
 189   public static final Comparator<String> CASE_INSENSITIVE_ORDER
 190     = new CaseInsensitiveComparator();
 191
 192   /**
 193    * Creates an empty String (length 0). Unless you really need a new object,
 194    * consider using <code>""</code> instead.
 195    */
 196   public String()
 197   {
 198     value = "".value;
 199     offset = 0;
 200     count = 0;
 201   }
 202
 203   /**
 204    * Copies the contents of a String to a new String. Since Strings are
 205    * immutable, only a shallow copy is performed.
 206    *
 207    * @param str String to copy
 208    * @throws NullPointerException if value is null
 209    */
 210   public String(String str)
 211   {
 212     value = str.value;
 213     offset = str.offset;
 214     count = str.count;
 215     cachedHashCode = str.cachedHashCode;
 216   }
 217
 218   /**
 219    * Creates a new String using the character sequence of the char array.
 220    * Subsequent changes to data do not affect the String.
 221    *
 222    * @param data char array to copy
 223    * @throws NullPointerException if data is null
 224    */
 225   public String(char[] data)
 226   {
 227     this(data, 0, data.length, false);
 228   }
 229
 230   /**
 231    * Creates a new String using the character sequence of a subarray of
 232    * characters. The string starts at offset, and copies count chars.
 233    * Subsequent changes to data do not affect the String.
 234    *
 235    * @param data char array to copy
 236    * @param offset position (base 0) to start copying out of data
 237    * @param count the number of characters from data to copy
 238    * @throws NullPointerException if data is null
 239    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
 240    *         || offset + count &lt; 0 (overflow)
 241    *         || offset + count &gt; data.length)
 242    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 243    */
 244   public String(char[] data, int offset, int count)
 245   {
 246     this(data, offset, count, false);
 247   }
 248
 249   /**
 250    * Creates a new String using an 8-bit array of integer values, starting at
 251    * an offset, and copying up to the count. Each character c, using
 252    * corresponding byte b, is created in the new String as if by performing:
 253    *
 254    * <pre>
 255    * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
 256    * </pre>
 257    *
 258    * @param ascii array of integer values
 259    * @param hibyte top byte of each Unicode character
 260    * @param offset position (base 0) to start copying out of ascii
 261    * @param count the number of characters from ascii to copy
 262    * @throws NullPointerException if ascii is null
 263    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
 264    *         || offset + count &lt; 0 (overflow)
 265    *         || offset + count &gt; ascii.length)
 266    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 267    * @see #String(byte[])
 268    * @see #String(byte[], String)
 269    * @see #String(byte[], int, int)
 270    * @see #String(byte[], int, int, String)
 271    * @deprecated use {@link #String(byte[], int, int, String)} to perform
 272    *             correct encoding
 273    */
 274   public String(byte[] ascii, int hibyte, int offset, int count)
 275   {
 276     if (offset < 0)
 277       throw new StringIndexOutOfBoundsException("offset: " + offset);
 278     if (count < 0)
 279       throw new StringIndexOutOfBoundsException("count: " + count);
 280     // equivalent to: offset + count < 0 || offset + count > ascii.length
 281     if (ascii.length - offset < count)
 282       throw new StringIndexOutOfBoundsException("offset + count: "
 283                                                 + (offset + count));
 284     value = new char[count];
 285     this.offset = 0;
 286     this.count = count;
 287     hibyte <<= 8;
 288     offset += count;
 289     while (--count >= 0)
 290       value[count] = (char) (hibyte | (ascii[--offset] & 0xff));
 291   }
 292
 293   /**
 294    * Creates a new String using an 8-bit array of integer values. Each
 295    * character c, using corresponding byte b, is created in the new String
 296    * as if by performing:
 297    *
 298    * <pre>
 299    * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
 300    * </pre>
 301    *
 302    * @param ascii array of integer values
 303    * @param hibyte top byte of each Unicode character
 304    * @throws NullPointerException if ascii is null
 305    * @see #String(byte[])
 306    * @see #String(byte[], String)
 307    * @see #String(byte[], int, int)
 308    * @see #String(byte[], int, int, String)
 309    * @see #String(byte[], int, int, int)
 310    * @deprecated use {@link #String(byte[], String)} to perform
 311    *             correct encoding
 312    */
 313   public String(byte[] ascii, int hibyte)
 314   {
 315     this(ascii, hibyte, 0, ascii.length);
 316   }
 317
 318   /**
 319    * Creates a new String using the portion of the byte array starting at the
 320    * offset and ending at offset + count. Uses the specified encoding type
 321    * to decode the byte array, so the resulting string may be longer or
 322    * shorter than the byte array. For more decoding control, use
 323    * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
 324    * see {@link java.nio.charset.Charset}. The behavior is not specified if
 325    * the decoder encounters invalid characters; this implementation throws
 326    * an Error.
 327    *
 328    * @param data byte array to copy
 329    * @param offset the offset to start at
 330    * @param count the number of bytes in the array to use
 331    * @param encoding the name of the encoding to use
 332    * @throws NullPointerException if data or encoding is null
 333    * @throws IndexOutOfBoundsException if offset or count is incorrect
 334    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 335    * @throws UnsupportedEncodingException if encoding is not found
 336    * @throws Error if the decoding fails
 337    * @since 1.1
 338    */
 339   public String(byte[] data, int offset, int count, String encoding)
 340     throws UnsupportedEncodingException
 341   {
 342     if (offset < 0)
 343       throw new StringIndexOutOfBoundsException("offset: " + offset);
 344     if (count < 0)
 345       throw new StringIndexOutOfBoundsException("count: " + count);
 346     // equivalent to: offset + count < 0 || offset + count > data.length
 347     if (data.length - offset < count)
 348       throw new StringIndexOutOfBoundsException("offset + count: "
 349                                                 + (offset + count));
 350     try
 351       {
 352         CharsetDecoder csd = Charset.forName(encoding).newDecoder();
 353         csd.onMalformedInput(CodingErrorAction.REPLACE);
 354         csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
 355         CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
 356         if(cbuf.hasArray())
 357           {
 358             value = cbuf.array();
 359             this.offset = cbuf.position();
 360             this.count = cbuf.remaining();
 361           } else {
 362             // Doubt this will happen. But just in case.
 363             value = new char[cbuf.remaining()];
 364             cbuf.get(value);
 365             this.offset = 0;
 366             this.count = value.length;
 367           }
 368       } catch(CharacterCodingException e){
 369           throw new UnsupportedEncodingException("Encoding: "+encoding+
 370                                                  " not found.");
 371       } catch(IllegalCharsetNameException e){
 372           throw new UnsupportedEncodingException("Encoding: "+encoding+
 373                                                  " not found.");
 374       } catch(UnsupportedCharsetException e){
 375           throw new UnsupportedEncodingException("Encoding: "+encoding+
 376                                                  " not found.");
 377       }
 378   }
 379
 380   /**
 381    * Creates a new String using the byte array. Uses the specified encoding
 382    * type to decode the byte array, so the resulting string may be longer or
 383    * shorter than the byte array. For more decoding control, use
 384    * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
 385    * see {@link java.nio.charset.Charset}. The behavior is not specified if
 386    * the decoder encounters invalid characters; this implementation throws
 387    * an Error.
 388    *
 389    * @param data byte array to copy
 390    * @param encoding the name of the encoding to use
 391    * @throws NullPointerException if data or encoding is null
 392    * @throws UnsupportedEncodingException if encoding is not found
 393    * @throws Error if the decoding fails
 394    * @see #String(byte[], int, int, String)
 395    * @since 1.1
 396    */
 397   public String(byte[] data, String encoding)
 398     throws UnsupportedEncodingException
 399   {
 400     this(data, 0, data.length, encoding);
 401   }
 402
 403   /**
 404    * Creates a new String using the portion of the byte array starting at the
 405    * offset and ending at offset + count. Uses the encoding of the platform's
 406    * default charset, so the resulting string may be longer or shorter than
 407    * the byte array. For more decoding control, use
 408    * {@link java.nio.charset.CharsetDecoder}.  The behavior is not specified
 409    * if the decoder encounters invalid characters; this implementation throws
 410    * an Error.
 411    *
 412    * @param data byte array to copy
 413    * @param offset the offset to start at
 414    * @param count the number of bytes in the array to use
 415    * @throws NullPointerException if data is null
 416    * @throws IndexOutOfBoundsException if offset or count is incorrect
 417    * @throws Error if the decoding fails
 418    * @see #String(byte[], int, int, String)
 419    * @since 1.1
 420    */
 421   public String(byte[] data, int offset, int count)
 422   {
 423     if (offset < 0)
 424       throw new StringIndexOutOfBoundsException("offset: " + offset);
 425     if (count < 0)
 426       throw new StringIndexOutOfBoundsException("count: " + count);
 427     // equivalent to: offset + count < 0 || offset + count > data.length
 428     if (data.length - offset < count)
 429       throw new StringIndexOutOfBoundsException("offset + count: "
 430                                                 + (offset + count));
 431     int o, c;
 432     char[] v;
 433     String encoding;
 434     try
 435         {
 436           encoding = System.getProperty("file.encoding");
 437           CharsetDecoder csd = Charset.forName(encoding).newDecoder();
 438           csd.onMalformedInput(CodingErrorAction.REPLACE);
 439           csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
 440           CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
 441           if(cbuf.hasArray())
 442             {
 443               v = cbuf.array();
 444               o = cbuf.position();
 445               c = cbuf.remaining();
 446             } else {
 447               // Doubt this will happen. But just in case.
 448               v = new char[cbuf.remaining()];
 449               cbuf.get(v);
 450               o = 0;
 451               c = v.length;
 452             }
 453         } catch(Exception ex){
 454             // If anything goes wrong (System property not set,
 455             // NIO provider not available, etc)
 456             // Default to the 'safe' encoding ISO8859_1
 457             v = new char[count];
 458             o = 0;
 459             c = count;
 460             for (int i=0;i<count;i++)
 461               v[i] = (char)data[offset+i];
 462         }
 463     this.value = v;
 464     this.offset = o;
 465     this.count = c;
 466   }
 467
 468   /**
 469    * Creates a new String using the byte array. Uses the encoding of the
 470    * platform's default charset, so the resulting string may be longer or
 471    * shorter than the byte array. For more decoding control, use
 472    * {@link java.nio.charset.CharsetDecoder}.  The behavior is not specified
 473    * if the decoder encounters invalid characters; this implementation throws
 474    * an Error.
 475    *
 476    * @param data byte array to copy
 477    * @throws NullPointerException if data is null
 478    * @throws Error if the decoding fails
 479    * @see #String(byte[], int, int)
 480    * @see #String(byte[], int, int, String)
 481    * @since 1.1
 482    */
 483   public String(byte[] data)
 484   {
 485     this(data, 0, data.length);
 486   }
 487
 488   /**
 489    * Creates a new String using the character sequence represented by
 490    * the StringBuffer. Subsequent changes to buf do not affect the String.
 491    *
 492    * @param buffer StringBuffer to copy
 493    * @throws NullPointerException if buffer is null
 494    */
 495   public String(StringBuffer buffer)
 496   {
 497     synchronized (buffer)
 498       {
 499         offset = 0;
 500         count = buffer.count;
 501         // Share unless buffer is 3/4 empty.
 502         if ((count << 2) < buffer.value.length)
 503           {
 504             value = new char[count];
 505             VMSystem.arraycopy(buffer.value, 0, value, 0, count);
 506           }
 507         else
 508           {
 509             buffer.shared = true;
 510             value = buffer.value;
 511           }
 512       }
 513   }
 514
 515   /**
 516    * Creates a new String using the character sequence represented by
 517    * the StringBuilder. Subsequent changes to buf do not affect the String.
 518    *
 519    * @param buffer StringBuilder to copy
 520    * @throws NullPointerException if buffer is null
 521    */
 522   public String(StringBuilder buffer)
 523   {
 524     this(buffer.value, 0, buffer.count);
 525   }
 526
 527   /**
 528    * Special constructor which can share an array when safe to do so.
 529    *
 530    * @param data the characters to copy
 531    * @param offset the location to start from
 532    * @param count the number of characters to use
 533    * @param dont_copy true if the array is trusted, and need not be copied
 534    * @throws NullPointerException if chars is null
 535    * @throws StringIndexOutOfBoundsException if bounds check fails
 536    */
 537   String(char[] data, int offset, int count, boolean dont_copy)
 538   {
 539     if (offset < 0)
 540       throw new StringIndexOutOfBoundsException("offset: " + offset);
 541     if (count < 0)
 542       throw new StringIndexOutOfBoundsException("count: " + count);
 543     // equivalent to: offset + count < 0 || offset + count > data.length
 544     if (data.length - offset < count)
 545       throw new StringIndexOutOfBoundsException("offset + count: "
 546                                                 + (offset + count));
 547     if (dont_copy)
 548       {
 549         value = data;
 550         this.offset = offset;
 551       }
 552     else
 553       {
 554         value = new char[count];
 555         VMSystem.arraycopy(data, offset, value, 0, count);
 556         this.offset = 0;
 557       }
 558     this.count = count;
 559   }
 560
 561   /**
 562    * Creates a new String containing the characters represented in the
 563    * given subarray of Unicode code points.
 564    * @param codePoints the entire array of code points
 565    * @param offset the start of the subarray
 566    * @param count the length of the subarray
 567    *
 568    * @throws IllegalArgumentException if an invalid code point is found
 569    * in the codePoints array
 570    * @throws IndexOutOfBoundsException if offset is negative or offset + count
 571    * is greater than the length of the array.
 572    */
 573   public String(int[] codePoints, int offset, int count)
 574   {
 575     // FIXME: This implementation appears to give correct internal
 576     // representation of the String because:
 577     //   - length() is correct
 578     //   - getting a char[] from toCharArray() and testing
 579     //     Character.codePointAt() on all the characters in that array gives
 580     //     the appropriate results
 581     // however printing the String gives incorrect results.  This may be
 582     // due to printing method errors (such as incorrectly looping through
 583     // the String one char at a time rather than one "character" at a time.
 584
 585     if (offset < 0)
 586       throw new IndexOutOfBoundsException();
 587     int end = offset + count;
 588     int pos = 0;
 589     // This creates a char array that is long enough for all of the code
 590     // points to represent supplementary characters.  This is more than likely
 591     // a waste of storage, so we use it only temporarily and then copy the
 592     // used portion into the value array.
 593     char[] temp = new char[2 * codePoints.length];
 594     for (int i = offset; i < end; i++)
 595       {
 596         pos += Character.toChars(codePoints[i], temp, pos);
 597       }
 598     this.count = pos;
 599     this.value = new char[pos];
 600     System.arraycopy(temp, 0, value, 0, pos);
 601     this.offset = 0;
 602   }
 603
 604   /**
 605    * Returns the number of characters contained in this String.
 606    *
 607    * @return the length of this String
 608    */
 609   public int length()
 610   {
 611     return count;
 612   }
 613
 614   /**
 615    * Returns the character located at the specified index within this String.
 616    *
 617    * @param index position of character to return (base 0)
 618    * @return character located at position index
 619    * @throws IndexOutOfBoundsException if index &lt; 0 || index &gt;= length()
 620    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 621    */
 622   public char charAt(int index)
 623   {
 624     if (index < 0 || index >= count)
 625       throw new StringIndexOutOfBoundsException(index);
 626     return value[offset + index];
 627   }
 628
 629   /**
 630    * Get the code point at the specified index.  This is like #charAt(int),
 631    * but if the character is the start of a surrogate pair, and the
 632    * following character completes the pair, then the corresponding
 633    * supplementary code point is returned.
 634    * @param index the index of the codepoint to get, starting at 0
 635    * @return the codepoint at the specified index
 636    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
 637    * @since 1.5
 638    */
 639   public synchronized int codePointAt(int index)
 640   {
 641     // Use the CharSequence overload as we get better range checking
 642     // this way.
 643     return Character.codePointAt(this, index);
 644   }
 645
 646   /**
 647    * Get the code point before the specified index.  This is like
 648    * #codePointAt(int), but checks the characters at <code>index-1</code> and
 649    * <code>index-2</code> to see if they form a supplementary code point.
 650    * @param index the index just past the codepoint to get, starting at 0
 651    * @return the codepoint at the specified index
 652    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
 653    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 654    * @since 1.5
 655    */
 656   public synchronized int codePointBefore(int index)
 657   {
 658     // Use the CharSequence overload as we get better range checking
 659     // this way.
 660     return Character.codePointBefore(this, index);
 661   }
 662
 663   /**
 664    * Copies characters from this String starting at a specified start index,
 665    * ending at a specified stop index, to a character array starting at
 666    * a specified destination begin index.
 667    *
 668    * @param srcBegin index to begin copying characters from this String
 669    * @param srcEnd index after the last character to be copied from this String
 670    * @param dst character array which this String is copied into
 671    * @param dstBegin index to start writing characters into dst
 672    * @throws NullPointerException if dst is null
 673    * @throws IndexOutOfBoundsException if any indices are out of bounds
 674    *         (while unspecified, source problems cause a
 675    *         StringIndexOutOfBoundsException, and dst problems cause an
 676    *         ArrayIndexOutOfBoundsException)
 677    */
 678   public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
 679   {
 680     if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
 681       throw new StringIndexOutOfBoundsException();
 682     VMSystem.arraycopy(value, srcBegin + offset,
 683                      dst, dstBegin, srcEnd - srcBegin);
 684   }
 685
 686   /**
 687    * Copies the low byte of each character from this String starting at a
 688    * specified start index, ending at a specified stop index, to a byte array
 689    * starting at a specified destination begin index.
 690    *
 691    * @param srcBegin index to being copying characters from this String
 692    * @param srcEnd index after the last character to be copied from this String
 693    * @param dst byte array which each low byte of this String is copied into
 694    * @param dstBegin index to start writing characters into dst
 695    * @throws NullPointerException if dst is null and copy length is non-zero
 696    * @throws IndexOutOfBoundsException if any indices are out of bounds
 697    *         (while unspecified, source problems cause a
 698    *         StringIndexOutOfBoundsException, and dst problems cause an
 699    *         ArrayIndexOutOfBoundsException)
 700    * @see #getBytes()
 701    * @see #getBytes(String)
 702    * @deprecated use {@link #getBytes()}, which uses a char to byte encoder
 703    */
 704   public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin)
 705   {
 706     if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
 707       throw new StringIndexOutOfBoundsException();
 708     int i = srcEnd - srcBegin;
 709     srcBegin += offset;
 710     while (--i >= 0)
 711       dst[dstBegin++] = (byte) value[srcBegin++];
 712   }
 713
 714   /**
 715    * Converts the Unicode characters in this String to a byte array. Uses the
 716    * specified encoding method, so the result may be longer or shorter than
 717    * the String. For more encoding control, use
 718    * {@link java.nio.charset.CharsetEncoder}, and for valid character sets,
 719    * see {@link java.nio.charset.Charset}. Unsupported characters get
 720    * replaced by an encoding specific byte.
 721    *
 722    * @param enc encoding name
 723    * @return the resulting byte array
 724    * @throws NullPointerException if enc is null
 725    * @throws UnsupportedEncodingException if encoding is not supported
 726    * @since 1.1
 727    */
 728   public byte[] getBytes(String enc) throws UnsupportedEncodingException
 729   {
 730     try
 731       {
 732         CharsetEncoder cse = Charset.forName(enc).newEncoder();
 733         cse.onMalformedInput(CodingErrorAction.REPLACE);
 734         cse.onUnmappableCharacter(CodingErrorAction.REPLACE);
 735         ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count));
 736         if(bbuf.hasArray())
 737           return bbuf.array();
 738
 739         // Doubt this will happen. But just in case.
 740         byte[] bytes = new byte[bbuf.remaining()];
 741         bbuf.get(bytes);
 742         return bytes;
 743       }
 744     catch(IllegalCharsetNameException e)
 745       {
 746         throw new UnsupportedEncodingException("Encoding: " + enc
 747                                                + " not found.");
 748       }
 749     catch(UnsupportedCharsetException e)
 750       {
 751         throw new UnsupportedEncodingException("Encoding: " + enc
 752                                                + " not found.");
 753       }
 754     catch(CharacterCodingException e)
 755       {
 756         // This shouldn't ever happen.
 757         throw (InternalError) new InternalError().initCause(e);
 758       }
 759   }
 760
 761   /**
 762    * Converts the Unicode characters in this String to a byte array. Uses the
 763    * encoding of the platform's default charset, so the result may be longer
 764    * or shorter than the String. For more encoding control, use
 765    * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get
 766    * replaced by an encoding specific byte.
 767    *
 768    * @return the resulting byte array, or null on a problem
 769    * @since 1.1
 770    */
 771   public byte[] getBytes()
 772   {
 773       try
 774           {
 775               return getBytes(System.getProperty("file.encoding"));
 776           } catch(Exception e) {
 777               // XXX - Throw an error here?
 778               // For now, default to the 'safe' encoding.
 779               byte[] bytes = new byte[count];
 780               for(int i=0;i<count;i++)
 781                   bytes[i] = (byte)((value[offset+i] <= 0xFF)?
 782                                     value[offset+i]:'?');
 783               return bytes;
 784       }
 785   }
 786
 787   /**
 788    * Predicate which compares anObject to this. This is true only for Strings
 789    * with the same character sequence.
 790    *
 791    * @param anObject the object to compare
 792    * @return true if anObject is semantically equal to this
 793    * @see #compareTo(String)
 794    * @see #equalsIgnoreCase(String)
 795    */
 796   public boolean equals(Object anObject)
 797   {
 798     if (! (anObject instanceof String))
 799       return false;
 800     String str2 = (String) anObject;
 801     if (count != str2.count)
 802       return false;
 803     if (value == str2.value && offset == str2.offset)
 804       return true;
 805     int i = count;
 806     int x = offset;
 807     int y = str2.offset;
 808     while (--i >= 0)
 809       if (value[x++] != str2.value[y++])
 810         return false;
 811     return true;
 812   }
 813
 814   /**
 815    * Compares the given StringBuffer to this String. This is true if the
 816    * StringBuffer has the same content as this String at this moment.
 817    *
 818    * @param buffer the StringBuffer to compare to
 819    * @return true if StringBuffer has the same character sequence
 820    * @throws NullPointerException if the given StringBuffer is null
 821    * @since 1.4
 822    */
 823   public boolean contentEquals(StringBuffer buffer)
 824   {
 825     synchronized (buffer)
 826       {
 827         if (count != buffer.count)
 828           return false;
 829         if (value == buffer.value)
 830           return true; // Possible if shared.
 831         int i = count;
 832         int x = offset + count;
 833         while (--i >= 0)
 834           if (value[--x] != buffer.value[i])
 835             return false;
 836         return true;
 837       }
 838   }
 839
 840   /**
 841    * Compares the given CharSequence to this String. This is true if
 842    * the CharSequence has the same content as this String at this
 843    * moment.
 844    *
 845    * @param seq the CharSequence to compare to
 846    * @return true if CharSequence has the same character sequence
 847    * @throws NullPointerException if the given CharSequence is null
 848    * @since 1.5
 849    */
 850   public boolean contentEquals(CharSequence seq)
 851   {
 852     if (seq.length() != count)
 853       return false;
 854     for (int i = 0; i < count; ++i)
 855       if (value[offset + i] != seq.charAt(i))
 856         return false;
 857     return true;
 858   }
 859
 860   /**
 861    * Compares a String to this String, ignoring case. This does not handle
 862    * multi-character capitalization exceptions; instead the comparison is
 863    * made on a character-by-character basis, and is true if:<br><ul>
 864    * <li><code>c1 == c2</code></li>
 865    * <li><code>Character.toUpperCase(c1)
 866    *     == Character.toUpperCase(c2)</code></li>
 867    * <li><code>Character.toLowerCase(c1)
 868    *     == Character.toLowerCase(c2)</code></li>
 869    * </ul>
 870    *
 871    * @param anotherString String to compare to this String
 872    * @return true if anotherString is equal, ignoring case
 873    * @see #equals(Object)
 874    * @see Character#toUpperCase(char)
 875    * @see Character#toLowerCase(char)
 876    */
 877   public boolean equalsIgnoreCase(String anotherString)
 878   {
 879     if (anotherString == null || count != anotherString.count)
 880       return false;
 881     int i = count;
 882     int x = offset;
 883     int y = anotherString.offset;
 884     while (--i >= 0)
 885       {
 886         char c1 = value[x++];
 887         char c2 = anotherString.value[y++];
 888         // Note that checking c1 != c2 is redundant, but avoids method calls.
 889         if (c1 != c2
 890             && Character.toUpperCase(c1) != Character.toUpperCase(c2)
 891             && Character.toLowerCase(c1) != Character.toLowerCase(c2))
 892           return false;
 893       }
 894     return true;
 895   }
 896
 897   /**
 898    * Compares this String and another String (case sensitive,
 899    * lexicographically). The result is less than 0 if this string sorts
 900    * before the other, 0 if they are equal, and greater than 0 otherwise.
 901    * After any common starting sequence is skipped, the result is
 902    * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings
 903    * have characters remaining, or
 904    * <code>this.length() - anotherString.length()</code> if one string is
 905    * a subsequence of the other.
 906    *
 907    * @param anotherString the String to compare against
 908    * @return the comparison
 909    * @throws NullPointerException if anotherString is null
 910    */
 911   public int compareTo(String anotherString)
 912   {
 913     int i = Math.min(count, anotherString.count);
 914     int x = offset;
 915     int y = anotherString.offset;
 916     while (--i >= 0)
 917       {
 918         int result = value[x++] - anotherString.value[y++];
 919         if (result != 0)
 920           return result;
 921       }
 922     return count - anotherString.count;
 923   }
 924
 925   /**
 926    * Compares this String and another String (case insensitive). This
 927    * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores
 928    * locale and multi-characater capitalization, and compares characters
 929    * after performing
 930    * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each
 931    * character of the string. This is unsatisfactory for locale-based
 932    * comparison, in which case you should use {@link java.text.Collator}.
 933    *
 934    * @param str the string to compare against
 935    * @return the comparison
 936    * @see Collator#compare(String, String)
 937    * @since 1.2
 938    */
 939   public int compareToIgnoreCase(String str)
 940   {
 941     int i = Math.min(count, str.count);
 942     int x = offset;
 943     int y = str.offset;
 944     while (--i >= 0)
 945       {
 946         int result = Character.toLowerCase(Character.toUpperCase(value[x++]))
 947           - Character.toLowerCase(Character.toUpperCase(str.value[y++]));
 948         if (result != 0)
 949           return result;
 950       }
 951     return count - str.count;
 952   }
 953
 954   /**
 955    * Predicate which determines if this String matches another String
 956    * starting at a specified offset for each String and continuing
 957    * for a specified length. Indices out of bounds are harmless, and give
 958    * a false result.
 959    *
 960    * @param toffset index to start comparison at for this String
 961    * @param other String to compare region to this String
 962    * @param ooffset index to start comparison at for other
 963    * @param len number of characters to compare
 964    * @return true if regions match (case sensitive)
 965    * @throws NullPointerException if other is null
 966    */
 967   public boolean regionMatches(int toffset, String other, int ooffset, int len)
 968   {
 969     return regionMatches(false, toffset, other, ooffset, len);
 970   }
 971
 972   /**
 973    * Predicate which determines if this String matches another String
 974    * starting at a specified offset for each String and continuing
 975    * for a specified length, optionally ignoring case. Indices out of bounds
 976    * are harmless, and give a false result. Case comparisons are based on
 977    * <code>Character.toLowerCase()</code> and
 978    * <code>Character.toUpperCase()</code>, not on multi-character
 979    * capitalization expansions.
 980    *
 981    * @param ignoreCase true if case should be ignored in comparision
 982    * @param toffset index to start comparison at for this String
 983    * @param other String to compare region to this String
 984    * @param ooffset index to start comparison at for other
 985    * @param len number of characters to compare
 986    * @return true if regions match, false otherwise
 987    * @throws NullPointerException if other is null
 988    */
 989   public boolean regionMatches(boolean ignoreCase, int toffset,
 990                                String other, int ooffset, int len)
 991   {
 992     if (toffset < 0 || ooffset < 0 || toffset + len > count
 993         || ooffset + len > other.count)
 994       return false;
 995     toffset += offset;
 996     ooffset += other.offset;
 997     while (--len >= 0)
 998       {
 999         char c1 = value[toffset++];
1000         char c2 = other.value[ooffset++];
1001         // Note that checking c1 != c2 is redundant when ignoreCase is true,
1002         // but it avoids method calls.
1003         if (c1 != c2
1004             && (! ignoreCase
1005                 || (Character.toLowerCase(c1) != Character.toLowerCase(c2)
1006                     && (Character.toUpperCase(c1)
1007                         != Character.toUpperCase(c2)))))
1008           return false;
1009       }
1010     return true;
1011   }
1012
1013   /**
1014    * Predicate which determines if this String contains the given prefix,
1015    * beginning comparison at toffset. The result is false if toffset is
1016    * negative or greater than this.length(), otherwise it is the same as
1017    * <code>this.substring(toffset).startsWith(prefix)</code>.
1018    *
1019    * @param prefix String to compare
1020    * @param toffset offset for this String where comparison starts
1021    * @return true if this String starts with prefix
1022    * @throws NullPointerException if prefix is null
1023    * @see #regionMatches(boolean, int, String, int, int)
1024    */
1025   public boolean startsWith(String prefix, int toffset)
1026   {
1027     return regionMatches(false, toffset, prefix, 0, prefix.count);
1028   }
1029
1030   /**
1031    * Predicate which determines if this String starts with a given prefix.
1032    * If the prefix is an empty String, true is returned.
1033    *
1034    * @param prefix String to compare
1035    * @return true if this String starts with the prefix
1036    * @throws NullPointerException if prefix is null
1037    * @see #startsWith(String, int)
1038    */
1039   public boolean startsWith(String prefix)
1040   {
1041     return regionMatches(false, 0, prefix, 0, prefix.count);
1042   }
1043
1044   /**
1045    * Predicate which determines if this String ends with a given suffix.
1046    * If the suffix is an empty String, true is returned.
1047    *
1048    * @param suffix String to compare
1049    * @return true if this String ends with the suffix
1050    * @throws NullPointerException if suffix is null
1051    * @see #regionMatches(boolean, int, String, int, int)
1052    */
1053   public boolean endsWith(String suffix)
1054   {
1055     return regionMatches(false, count - suffix.count, suffix, 0, suffix.count);
1056   }
1057
1058   /**
1059    * Computes the hashcode for this String. This is done with int arithmetic,
1060    * where ** represents exponentiation, by this formula:<br>
1061    * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>.
1062    *
1063    * @return hashcode value of this String
1064    */
1065   public int hashCode()
1066   {
1067     if (cachedHashCode != 0)
1068       return cachedHashCode;
1069
1070     // Compute the hash code using a local variable to be reentrant.
1071     int hashCode = 0;
1072     int limit = count + offset;
1073     for (int i = offset; i < limit; i++)
1074       hashCode = hashCode * 31 + value[i];
1075     return cachedHashCode = hashCode;
1076   }
1077
1078   /**
1079    * Finds the first instance of a character in this String.
1080    *
1081    * @param ch character to find
1082    * @return location (base 0) of the character, or -1 if not found
1083    */
1084   public int indexOf(int ch)
1085   {
1086     return indexOf(ch, 0);
1087   }
1088
1089   /**
1090    * Finds the first instance of a character in this String, starting at
1091    * a given index.  If starting index is less than 0, the search
1092    * starts at the beginning of this String.  If the starting index
1093    * is greater than the length of this String, -1 is returned.
1094    *
1095    * @param ch character to find
1096    * @param fromIndex index to start the search
1097    * @return location (base 0) of the character, or -1 if not found
1098    */
1099   public int indexOf(int ch, int fromIndex)
1100   {
1101     if ((char) ch != ch)
1102       return -1;
1103     if (fromIndex < 0)
1104       fromIndex = 0;
1105     int i = fromIndex + offset;
1106     for ( ; fromIndex < count; fromIndex++)
1107       if (value[i++] == ch)
1108         return fromIndex;
1109     return -1;
1110   }
1111
1112   /**
1113    * Finds the last instance of a character in this String.
1114    *
1115    * @param ch character to find
1116    * @return location (base 0) of the character, or -1 if not found
1117    */
1118   public int lastIndexOf(int ch)
1119   {
1120     return lastIndexOf(ch, count - 1);
1121   }
1122
1123   /**
1124    * Finds the last instance of a character in this String, starting at
1125    * a given index.  If starting index is greater than the maximum valid
1126    * index, then the search begins at the end of this String.  If the
1127    * starting index is less than zero, -1 is returned.
1128    *
1129    * @param ch character to find
1130    * @param fromIndex index to start the search
1131    * @return location (base 0) of the character, or -1 if not found
1132    */
1133   public int lastIndexOf(int ch, int fromIndex)
1134   {
1135     if ((char) ch != ch)
1136       return -1;
1137     if (fromIndex >= count)
1138       fromIndex = count - 1;
1139     int i = fromIndex + offset;
1140     for ( ; fromIndex >= 0; fromIndex--)
1141       if (value[i--] == ch)
1142         return fromIndex;
1143     return -1;
1144   }
1145
1146   /**
1147    * Finds the first instance of a String in this String.
1148    *
1149    * @param str String to find
1150    * @return location (base 0) of the String, or -1 if not found
1151    * @throws NullPointerException if str is null
1152    */
1153   public int indexOf(String str)
1154   {
1155     return indexOf(str, 0);
1156   }
1157
1158   /**
1159    * Finds the first instance of a String in this String, starting at
1160    * a given index.  If starting index is less than 0, the search
1161    * starts at the beginning of this String.  If the starting index
1162    * is greater than the length of this String, -1 is returned.
1163    *
1164    * @param str String to find
1165    * @param fromIndex index to start the search
1166    * @return location (base 0) of the String, or -1 if not found
1167    * @throws NullPointerException if str is null
1168    */
1169   public int indexOf(String str, int fromIndex)
1170   {
1171     if (fromIndex < 0)
1172       fromIndex = 0;
1173     int limit = count - str.count;
1174     for ( ; fromIndex <= limit; fromIndex++)
1175       if (regionMatches(fromIndex, str, 0, str.count))
1176         return fromIndex;
1177     return -1;
1178   }
1179
1180   /**
1181    * Finds the last instance of a String in this String.
1182    *
1183    * @param str String to find
1184    * @return location (base 0) of the String, or -1 if not found
1185    * @throws NullPointerException if str is null
1186    */
1187   public int lastIndexOf(String str)
1188   {
1189     return lastIndexOf(str, count - str.count);
1190   }
1191
1192   /**
1193    * Finds the last instance of a String in this String, starting at
1194    * a given index.  If starting index is greater than the maximum valid
1195    * index, then the search begins at the end of this String.  If the
1196    * starting index is less than zero, -1 is returned.
1197    *
1198    * @param str String to find
1199    * @param fromIndex index to start the search
1200    * @return location (base 0) of the String, or -1 if not found
1201    * @throws NullPointerException if str is null
1202    */
1203   public int lastIndexOf(String str, int fromIndex)
1204   {
1205     fromIndex = Math.min(fromIndex, count - str.count);
1206     for ( ; fromIndex >= 0; fromIndex--)
1207       if (regionMatches(fromIndex, str, 0, str.count))
1208         return fromIndex;
1209     return -1;
1210   }
1211
1212   /**
1213    * Creates a substring of this String, starting at a specified index
1214    * and ending at the end of this String.
1215    *
1216    * @param begin index to start substring (base 0)
1217    * @return new String which is a substring of this String
1218    * @throws IndexOutOfBoundsException if begin &lt; 0 || begin &gt; length()
1219    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1220    */
1221   public String substring(int begin)
1222   {
1223     return substring(begin, count);
1224   }
1225
1226   /**
1227    * Creates a substring of this String, starting at a specified index
1228    * and ending at one character before a specified index.
1229    *
1230    * @param beginIndex index to start substring (inclusive, base 0)
1231    * @param endIndex index to end at (exclusive)
1232    * @return new String which is a substring of this String
1233    * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1234    *         || begin &gt; end (while unspecified, this is a
1235    *         StringIndexOutOfBoundsException)
1236    */
1237   public String substring(int beginIndex, int endIndex)
1238   {
1239     if (beginIndex < 0 || endIndex > count || beginIndex > endIndex)
1240       throw new StringIndexOutOfBoundsException();
1241     if (beginIndex == 0 && endIndex == count)
1242       return this;
1243     int len = endIndex - beginIndex;
1244     // Package constructor avoids an array copy.
1245     return new String(value, beginIndex + offset, len,
1246                       (len << 2) >= value.length);
1247   }
1248
1249   /**
1250    * Creates a substring of this String, starting at a specified index
1251    * and ending at one character before a specified index. This behaves like
1252    * <code>substring(begin, end)</code>.
1253    *
1254    * @param begin index to start substring (inclusive, base 0)
1255    * @param end index to end at (exclusive)
1256    * @return new String which is a substring of this String
1257    * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1258    *         || begin &gt; end
1259    * @since 1.4
1260    */
1261   public CharSequence subSequence(int begin, int end)
1262   {
1263     return substring(begin, end);
1264   }
1265
1266   /**
1267    * Concatenates a String to this String. This results in a new string unless
1268    * one of the two originals is "".
1269    *
1270    * @param str String to append to this String
1271    * @return newly concatenated String
1272    * @throws NullPointerException if str is null
1273    */
1274   public String concat(String str)
1275   {
1276     if (str.count == 0)
1277       return this;
1278     if (count == 0)
1279       return str;
1280     char[] newStr = new char[count + str.count];
1281     VMSystem.arraycopy(value, offset, newStr, 0, count);
1282     VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count);
1283     // Package constructor avoids an array copy.
1284     return new String(newStr, 0, newStr.length, true);
1285   }
1286
1287   /**
1288    * Replaces every instance of a character in this String with a new
1289    * character. If no replacements occur, this is returned.
1290    *
1291    * @param oldChar the old character to replace
1292    * @param newChar the new character
1293    * @return new String with all instances of oldChar replaced with newChar
1294    */
1295   public String replace(char oldChar, char newChar)
1296   {
1297     if (oldChar == newChar)
1298       return this;
1299     int i = count;
1300     int x = offset - 1;
1301     while (--i >= 0)
1302       if (value[++x] == oldChar)
1303         break;
1304     if (i < 0)
1305       return this;
1306     char[] newStr = (char[]) value.clone();
1307     newStr[x] = newChar;
1308     while (--i >= 0)
1309       if (value[++x] == oldChar)
1310         newStr[x] = newChar;
1311     // Package constructor avoids an array copy.
1312     return new String(newStr, offset, count, true);
1313   }
1314
1315   /**
1316    * Test if this String matches a regular expression. This is shorthand for
1317    * <code>{@link Pattern}.matches(regex, this)</code>.
1318    *
1319    * @param regex the pattern to match
1320    * @return true if the pattern matches
1321    * @throws NullPointerException if regex is null
1322    * @throws PatternSyntaxException if regex is invalid
1323    * @see Pattern#matches(String, CharSequence)
1324    * @since 1.4
1325    */
1326   public boolean matches(String regex)
1327   {
1328     return Pattern.matches(regex, this);
1329   }
1330
1331   /**
1332    * Replaces the first substring match of the regular expression with a
1333    * given replacement. This is shorthand for <code>{@link Pattern}
1334    *   .compile(regex).matcher(this).replaceFirst(replacement)</code>.
1335    *
1336    * @param regex the pattern to match
1337    * @param replacement the replacement string
1338    * @return the modified string
1339    * @throws NullPointerException if regex or replacement is null
1340    * @throws PatternSyntaxException if regex is invalid
1341    * @see #replaceAll(String, String)
1342    * @see Pattern#compile(String)
1343    * @see Pattern#matcher(CharSequence)
1344    * @see Matcher#replaceFirst(String)
1345    * @since 1.4
1346    */
1347   public String replaceFirst(String regex, String replacement)
1348   {
1349     return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
1350   }
1351
1352   /**
1353    * Replaces all matching substrings of the regular expression with a
1354    * given replacement. This is shorthand for <code>{@link Pattern}
1355    *   .compile(regex).matcher(this).replaceAll(replacement)</code>.
1356    *
1357    * @param regex the pattern to match
1358    * @param replacement the replacement string
1359    * @return the modified string
1360    * @throws NullPointerException if regex or replacement is null
1361    * @throws PatternSyntaxException if regex is invalid
1362    * @see #replaceFirst(String, String)
1363    * @see Pattern#compile(String)
1364    * @see Pattern#matcher(CharSequence)
1365    * @see Matcher#replaceAll(String)
1366    * @since 1.4
1367    */
1368   public String replaceAll(String regex, String replacement)
1369   {
1370     return Pattern.compile(regex).matcher(this).replaceAll(replacement);
1371   }
1372
1373   /**
1374    * Split this string around the matches of a regular expression. Each
1375    * element of the returned array is the largest block of characters not
1376    * terminated by the regular expression, in the order the matches are found.
1377    *
1378    * <p>The limit affects the length of the array. If it is positive, the
1379    * array will contain at most n elements (n - 1 pattern matches). If
1380    * negative, the array length is unlimited, but there can be trailing empty
1381    * entries. if 0, the array length is unlimited, and trailing empty entries
1382    * are discarded.
1383    *
1384    * <p>For example, splitting "boo:and:foo" yields:<br>
1385    * <table border=0>
1386    * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th>
1387    * <tr><td>":"</td>   <td>2</td>  <td>{ "boo", "and:foo" }</td></tr>
1388    * <tr><td>":"</td>   <td>t</td>  <td>{ "boo", "and", "foo" }</td></tr>
1389    * <tr><td>":"</td>   <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr>
1390    * <tr><td>"o"</td>   <td>5</td>  <td>{ "b", "", ":and:f", "", "" }</td></tr>
1391    * <tr><td>"o"</td>   <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr>
1392    * <tr><td>"o"</td>   <td>0</td>  <td>{ "b", "", ":and:f" }</td></tr>
1393    * </table>
1394    *
1395    * <p>This is shorthand for
1396    * <code>{@link Pattern}.compile(regex).split(this, limit)</code>.
1397    *
1398    * @param regex the pattern to match
1399    * @param limit the limit threshold
1400    * @return the array of split strings
1401    * @throws NullPointerException if regex or replacement is null
1402    * @throws PatternSyntaxException if regex is invalid
1403    * @see Pattern#compile(String)
1404    * @see Pattern#split(CharSequence, int)
1405    * @since 1.4
1406    */
1407   public String[] split(String regex, int limit)
1408   {
1409     return Pattern.compile(regex).split(this, limit);
1410   }
1411
1412   /**
1413    * Split this string around the matches of a regular expression. Each
1414    * element of the returned array is the largest block of characters not
1415    * terminated by the regular expression, in the order the matches are found.
1416    * The array length is unlimited, and trailing empty entries are discarded,
1417    * as though calling <code>split(regex, 0)</code>.
1418    *
1419    * @param regex the pattern to match
1420    * @return the array of split strings
1421    * @throws NullPointerException if regex or replacement is null
1422    * @throws PatternSyntaxException if regex is invalid
1423    * @see #split(String, int)
1424    * @see Pattern#compile(String)
1425    * @see Pattern#split(CharSequence, int)
1426    * @since 1.4
1427    */
1428   public String[] split(String regex)
1429   {
1430     return Pattern.compile(regex).split(this, 0);
1431   }
1432
1433   /**
1434    * Lowercases this String according to a particular locale. This uses
1435    * Unicode's special case mappings, as applied to the given Locale, so the
1436    * resulting string may be a different length.
1437    *
1438    * @param loc locale to use
1439    * @return new lowercased String, or this if no characters were lowercased
1440    * @throws NullPointerException if loc is null
1441    * @see #toUpperCase(Locale)
1442    * @since 1.1
1443    */
1444   public String toLowerCase(Locale loc)
1445   {
1446     // First, see if the current string is already lower case.
1447     boolean turkish = "tr".equals(loc.getLanguage());
1448     int i = count;
1449     int x = offset - 1;
1450     while (--i >= 0)
1451       {
1452         char ch = value[++x];
1453         if ((turkish && ch == '\u0049')
1454             || ch != Character.toLowerCase(ch))
1455           break;
1456       }
1457     if (i < 0)
1458       return this;
1459
1460     // Now we perform the conversion. Fortunately, there are no multi-character
1461     // lowercase expansions in Unicode 3.0.0.
1462     char[] newStr = (char[]) value.clone();
1463     do
1464       {
1465         char ch = value[x];
1466         // Hardcoded special case.
1467         newStr[x++] = (turkish && ch == '\u0049') ? '\u0131'
1468           : Character.toLowerCase(ch);
1469       }
1470     while (--i >= 0);
1471     // Package constructor avoids an array copy.
1472     return new String(newStr, offset, count, true);
1473   }
1474
1475   /**
1476    * Lowercases this String. This uses Unicode's special case mappings, as
1477    * applied to the platform's default Locale, so the resulting string may
1478    * be a different length.
1479    *
1480    * @return new lowercased String, or this if no characters were lowercased
1481    * @see #toLowerCase(Locale)
1482    * @see #toUpperCase()
1483    */
1484   public String toLowerCase()
1485   {
1486     return toLowerCase(Locale.getDefault());
1487   }
1488
1489   /**
1490    * Uppercases this String according to a particular locale. This uses
1491    * Unicode's special case mappings, as applied to the given Locale, so the
1492    * resulting string may be a different length.
1493    *
1494    * @param loc locale to use
1495    * @return new uppercased String, or this if no characters were uppercased
1496    * @throws NullPointerException if loc is null
1497    * @see #toLowerCase(Locale)
1498    * @since 1.1
1499    */
1500   public String toUpperCase(Locale loc)
1501   {
1502     // First, see how many characters we have to grow by, as well as if the
1503     // current string is already upper case.
1504     boolean turkish = "tr".equals(loc.getLanguage());
1505     int expand = 0;
1506     boolean unchanged = true;
1507     int i = count;
1508     int x = i + offset;
1509     while (--i >= 0)
1510       {
1511         char ch = value[--x];
1512         expand += upperCaseExpansion(ch);
1513         unchanged = (unchanged && expand == 0
1514                      && ! (turkish && ch == '\u0069')
1515                      && ch == Character.toUpperCase(ch));
1516       }
1517     if (unchanged)
1518       return this;
1519
1520     // Now we perform the conversion.
1521     i = count;
1522     if (expand == 0)
1523       {
1524         char[] newStr = (char[]) value.clone();
1525         while (--i >= 0)
1526           {
1527             char ch = value[x];
1528             // Hardcoded special case.
1529             newStr[x++] = (turkish && ch == '\u0069') ? '\u0130'
1530               : Character.toUpperCase(ch);
1531           }
1532         // Package constructor avoids an array copy.
1533         return new String(newStr, offset, count, true);
1534       }
1535
1536     // Expansion is necessary.
1537     char[] newStr = new char[count + expand];
1538     int j = 0;
1539     while (--i >= 0)
1540       {
1541         char ch = value[x++];
1542         // Hardcoded special case.
1543         if (turkish && ch == '\u0069')
1544           {
1545             newStr[j++] = '\u0130';
1546             continue;
1547           }
1548         expand = upperCaseExpansion(ch);
1549         if (expand > 0)
1550           {
1551             int index = upperCaseIndex(ch);
1552             while (expand-- >= 0)
1553               newStr[j++] = upperExpand[index++];
1554           }
1555         else
1556           newStr[j++] = Character.toUpperCase(ch);
1557       }
1558     // Package constructor avoids an array copy.
1559     return new String(newStr, 0, newStr.length, true);
1560   }
1561
1562   /**
1563    * Uppercases this String. This uses Unicode's special case mappings, as
1564    * applied to the platform's default Locale, so the resulting string may
1565    * be a different length.
1566    *
1567    * @return new uppercased String, or this if no characters were uppercased
1568    * @see #toUpperCase(Locale)
1569    * @see #toLowerCase()
1570    */
1571   public String toUpperCase()
1572   {
1573     return toUpperCase(Locale.getDefault());
1574   }
1575
1576   /**
1577    * Trims all characters less than or equal to <code>'\u0020'</code>
1578    * (<code>' '</code>) from the beginning and end of this String. This
1579    * includes many, but not all, ASCII control characters, and all
1580    * {@link Character#isWhitespace(char)}.
1581    *
1582    * @return new trimmed String, or this if nothing trimmed
1583    */
1584   public String trim()
1585   {
1586     int limit = count + offset;
1587     if (count == 0 || (value[offset] > '\u0020'
1588                        && value[limit - 1] > '\u0020'))
1589       return this;
1590     int begin = offset;
1591     do
1592       if (begin == limit)
1593         return "";
1594     while (value[begin++] <= '\u0020');
1595     int end = limit;
1596     while (value[--end] <= '\u0020');
1597     return substring(begin - offset - 1, end - offset + 1);
1598   }
1599
1600   /**
1601    * Returns this, as it is already a String!
1602    *
1603    * @return this
1604    */
1605   public String toString()
1606   {
1607     return this;
1608   }
1609
1610   /**
1611    * Copies the contents of this String into a character array. Subsequent
1612    * changes to the array do not affect the String.
1613    *
1614    * @return character array copying the String
1615    */
1616   public char[] toCharArray()
1617   {
1618     if (count == value.length)
1619       return (char[]) value.clone();
1620
1621     char[] copy = new char[count];
1622     VMSystem.arraycopy(value, offset, copy, 0, count);
1623     return copy;
1624   }
1625
1626   /**
1627    * Returns a String representation of an Object. This is "null" if the
1628    * object is null, otherwise it is <code>obj.toString()</code> (which
1629    * can be null).
1630    *
1631    * @param obj the Object
1632    * @return the string conversion of obj
1633    */
1634   public static String valueOf(Object obj)
1635   {
1636     return obj == null ? "null" : obj.toString();
1637   }
1638
1639   /**
1640    * Returns a String representation of a character array. Subsequent
1641    * changes to the array do not affect the String.
1642    *
1643    * @param data the character array
1644    * @return a String containing the same character sequence as data
1645    * @throws NullPointerException if data is null
1646    * @see #valueOf(char[], int, int)
1647    * @see #String(char[])
1648    */
1649   public static String valueOf(char[] data)
1650   {
1651     return valueOf (data, 0, data.length);
1652   }
1653
1654   /**
1655    * Returns a String representing the character sequence of the char array,
1656    * starting at the specified offset, and copying chars up to the specified
1657    * count. Subsequent changes to the array do not affect the String.
1658    *
1659    * @param data character array
1660    * @param offset position (base 0) to start copying out of data
1661    * @param count the number of characters from data to copy
1662    * @return String containing the chars from data[offset..offset+count]
1663    * @throws NullPointerException if data is null
1664    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1665    *         || offset + count &gt; data.length)
1666    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1667    * @see #String(char[], int, int)
1668    */
1669   public static String valueOf(char[] data, int offset, int count)
1670   {
1671     return new String(data, offset, count, false);
1672   }
1673
1674   /**
1675    * Returns a String representing the character sequence of the char array,
1676    * starting at the specified offset, and copying chars up to the specified
1677    * count. Subsequent changes to the array do not affect the String.
1678    *
1679    * @param data character array
1680    * @param offset position (base 0) to start copying out of data
1681    * @param count the number of characters from data to copy
1682    * @return String containing the chars from data[offset..offset+count]
1683    * @throws NullPointerException if data is null
1684    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1685    *         || offset + count &lt; 0 (overflow)
1686    *         || offset + count &lt; 0 (overflow)
1687    *         || offset + count &gt; data.length)
1688    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1689    * @see #String(char[], int, int)
1690    */
1691   public static String copyValueOf(char[] data, int offset, int count)
1692   {
1693     return new String(data, offset, count, false);
1694   }
1695
1696   /**
1697    * Returns a String representation of a character array. Subsequent
1698    * changes to the array do not affect the String.
1699    *
1700    * @param data the character array
1701    * @return a String containing the same character sequence as data
1702    * @throws NullPointerException if data is null
1703    * @see #copyValueOf(char[], int, int)
1704    * @see #String(char[])
1705    */
1706   public static String copyValueOf(char[] data)
1707   {
1708     return copyValueOf (data, 0, data.length);
1709   }
1710
1711   /**
1712    * Returns a String representing a boolean.
1713    *
1714    * @param b the boolean
1715    * @return "true" if b is true, else "false"
1716    */
1717   public static String valueOf(boolean b)
1718   {
1719     return b ? "true" : "false";
1720   }
1721
1722   /**
1723    * Returns a String representing a character.
1724    *
1725    * @param c the character
1726    * @return String containing the single character c
1727    */
1728   public static String valueOf(char c)
1729   {
1730     // Package constructor avoids an array copy.
1731     return new String(new char[] { c }, 0, 1, true);
1732   }
1733
1734   /**
1735    * Returns a String representing an integer.
1736    *
1737    * @param i the integer
1738    * @return String containing the integer in base 10
1739    * @see Integer#toString(int)
1740    */
1741   public static String valueOf(int i)
1742   {
1743     // See Integer to understand why we call the two-arg variant.
1744     return Integer.toString(i, 10);
1745   }
1746
1747   /**
1748    * Returns a String representing a long.
1749    *
1750    * @param l the long
1751    * @return String containing the long in base 10
1752    * @see Long#toString(long)
1753    */
1754   public static String valueOf(long l)
1755   {
1756     return Long.toString(l);
1757   }
1758
1759   /**
1760    * Returns a String representing a float.
1761    *
1762    * @param f the float
1763    * @return String containing the float
1764    * @see Float#toString(float)
1765    */
1766   public static String valueOf(float f)
1767   {
1768     return Float.toString(f);
1769   }
1770
1771   /**
1772    * Returns a String representing a double.
1773    *
1774    * @param d the double
1775    * @return String containing the double
1776    * @see Double#toString(double)
1777    */
1778   public static String valueOf(double d)
1779   {
1780     return Double.toString(d);
1781   }
1782
1783
1784   /** @since 1.5 */
1785   public static String format(Locale locale, String format, Object... args)
1786   {
1787     Formatter f = new Formatter(locale);
1788     return f.format(format, args).toString();
1789   }
1790
1791   /** @since 1.5 */
1792   public static String format(String format, Object... args)
1793   {
1794     return format(Locale.getDefault(), format, args);
1795   }
1796
1797   /**
1798    * If two Strings are considered equal, by the equals() method,
1799    * then intern() will return the same String instance. ie.
1800    * if (s1.equals(s2)) then (s1.intern() == s2.intern()).
1801    * All string literals and string-valued constant expressions
1802    * are already interned.
1803    *
1804    * @return the interned String
1805    */
1806   public String intern()
1807   {
1808     return VMString.intern(this);
1809   }
1810
1811   /**
1812    * Return the number of code points between two indices in the
1813    * <code>String</code>.  An unpaired surrogate counts as a
1814    * code point for this purpose.  Characters outside the indicated
1815    * range are not examined, even if the range ends in the middle of a
1816    * surrogate pair.
1817    *
1818    * @param start the starting index
1819    * @param end one past the ending index
1820    * @return the number of code points
1821    * @since 1.5
1822    */
1823   public synchronized int codePointCount(int start, int end)
1824   {
1825     if (start < 0 || end > count || start > end)
1826       throw new StringIndexOutOfBoundsException();
1827
1828     start += offset;
1829     end += offset;
1830     int count = 0;
1831     while (start < end)
1832       {
1833         char base = value[start];
1834         if (base < Character.MIN_HIGH_SURROGATE
1835             || base > Character.MAX_HIGH_SURROGATE
1836             || start == end
1837             || start == count
1838             || value[start + 1] < Character.MIN_LOW_SURROGATE
1839             || value[start + 1] > Character.MAX_LOW_SURROGATE)
1840           {
1841             // Nothing.
1842           }
1843         else
1844           {
1845             // Surrogate pair.
1846             ++start;
1847           }
1848         ++start;
1849         ++count;
1850       }
1851     return count;
1852   }
1853
1854   /**
1855    * Helper function used to detect which characters have a multi-character
1856    * uppercase expansion. Note that this is only used in locations which
1857    * track one-to-many capitalization (java.lang.Character does not do this).
1858    * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the
1859    * longest uppercase expansion is three characters (a growth of 2 from the
1860    * lowercase character).
1861    *
1862    * @param ch the char to check
1863    * @return the number of characters to add when converting to uppercase
1864    * @see CharData#DIRECTION
1865    * @see CharData#UPPER_SPECIAL
1866    * @see #toUpperCase(Locale)
1867    */
1868   private static int upperCaseExpansion(char ch)
1869   {
1870     return Character.direction[0][Character.readCodePoint((int)ch) >> 7] & 3;
1871   }
1872
1873   /**
1874    * Helper function used to locate the offset in upperExpand given a
1875    * character with a multi-character expansion. The binary search is
1876    * optimized under the assumption that this method will only be called on
1877    * characters which exist in upperSpecial.
1878    *
1879    * @param ch the char to check
1880    * @return the index where its expansion begins
1881    * @see CharData#UPPER_SPECIAL
1882    * @see CharData#UPPER_EXPAND
1883    * @see #toUpperCase(Locale)
1884    */
1885   private static int upperCaseIndex(char ch)
1886   {
1887     // Simple binary search for the correct character.
1888     int low = 0;
1889     int hi = upperSpecial.length - 2;
1890     int mid = ((low + hi) >> 2) << 1;
1891     char c = upperSpecial[mid];
1892     while (ch != c)
1893       {
1894         if (ch < c)
1895           hi = mid - 2;
1896         else
1897           low = mid + 2;
1898         mid = ((low + hi) >> 2) << 1;
1899         c = upperSpecial[mid];
1900       }
1901     return upperSpecial[mid + 1];
1902   }
1903
1904   /**
1905    * Returns the value array of the given string if it is zero based or a
1906    * copy of it that is zero based (stripping offset and making length equal
1907    * to count). Used for accessing the char[]s of gnu.java.lang.CharData.
1908    * Package private for use in Character.
1909    */
1910   static char[] zeroBasedStringValue(String s)
1911   {
1912     char[] value;
1913
1914     if (s.offset == 0 && s.count == s.value.length)
1915       value = s.value;
1916     else
1917       {
1918         int count = s.count;
1919         value = new char[count];
1920         VMSystem.arraycopy(s.value, s.offset, value, 0, count);
1921       }
1922
1923     return value;
1924   }
1925
1926   /**
1927    * Returns true iff this String contains the sequence of Characters
1928    * described in s.
1929    * @param s the CharSequence
1930    * @return true iff this String contains s
1931    *
1932    * @since 1.5
1933    */
1934   public boolean contains (CharSequence s)
1935   {
1936     return this.indexOf(s.toString()) != -1;
1937   }
1938
1939   /**
1940    * Returns a string that is this string with all instances of the sequence
1941    * represented by <code>target</code> replaced by the sequence in
1942    * <code>replacement</code>.
1943    * @param target the sequence to be replaced
1944    * @param replacement the sequence used as the replacement
1945    * @return the string constructed as above
1946    */
1947   public String replace (CharSequence target, CharSequence replacement)
1948   {
1949     String targetString = target.toString();
1950     String replaceString = replacement.toString();
1951     int targetLength = target.length();
1952     int replaceLength = replacement.length();
1953
1954     int startPos = this.indexOf(targetString);
1955     StringBuilder result = new StringBuilder(this);
1956     while (startPos != -1)
1957       {
1958         // Replace the target with the replacement
1959         result.replace(startPos, startPos + targetLength, replaceString);
1960
1961         // Search for a new occurrence of the target
1962         startPos = result.indexOf(targetString, startPos + replaceLength);
1963       }
1964     return result.toString();
1965   }
1966
1967   /**
1968    * Return the index into this String that is offset from the given index by
1969    * <code>codePointOffset</code> code points.
1970    * @param index the index at which to start
1971    * @param codePointOffset the number of code points to offset
1972    * @return the index into this String that is <code>codePointOffset</code>
1973    * code points offset from <code>index</code>.
1974    *
1975    * @throws IndexOutOfBoundsException if index is negative or larger than the
1976    * length of this string.
1977    * @throws IndexOutOfBoundsException if codePointOffset is positive and the
1978    * substring starting with index has fewer than codePointOffset code points.
1979    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
1980    * substring ending with index has fewer than (-codePointOffset) code points.
1981    * @since 1.5
1982    */
1983   public int offsetByCodePoints(int index, int codePointOffset)
1984   {
1985     if (index < 0 || index > count)
1986       throw new IndexOutOfBoundsException();
1987
1988     return Character.offsetByCodePoints(value, offset, count, offset + index,
1989                                         codePointOffset);
1990   }
1991 }