libjava/classpath/java/lang/String.java

   1 /* String.java -- immutable character sequences; the object of string literals
   2    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2005
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Classpath.
   6
   7 GNU Classpath is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Classpath is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Classpath; see the file COPYING.  If not, write to the
  19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301 USA.
  21
  22 Linking this library statically or dynamically with other modules is
  23 making a combined work based on this library.  Thus, the terms and
  24 conditions of the GNU General Public License cover the whole
  25 combination.
  26
  27 As a special exception, the copyright holders of this library give you
  28 permission to link this library with independent modules to produce an
  29 executable, regardless of the license terms of these independent
  30 modules, and to copy and distribute the resulting executable under
  31 terms of your choice, provided that you also meet, for each linked
  32 independent module, the terms and conditions of the license of that
  33 module.  An independent module is a module which is not derived from
  34 or based on this library.  If you modify this library, you may extend
  35 this exception to your version of the library, but you are not
  36 obligated to do so.  If you do not wish to do so, delete this
  37 exception statement from your version. */
  38
  39
  40 package java.lang;
  41
  42 import gnu.java.lang.CharData;
  43
  44 import java.io.Serializable;
  45 import java.io.UnsupportedEncodingException;
  46 import java.nio.ByteBuffer;
  47 import java.nio.CharBuffer;
  48 import java.nio.charset.CharacterCodingException;
  49 import java.nio.charset.Charset;
  50 import java.nio.charset.CharsetDecoder;
  51 import java.nio.charset.CharsetEncoder;
  52 import java.nio.charset.CodingErrorAction;
  53 import java.nio.charset.IllegalCharsetNameException;
  54 import java.nio.charset.UnsupportedCharsetException;
  55 import java.text.Collator;
  56 import java.util.Comparator;
  57 import java.util.Locale;
  58 import java.util.regex.Matcher;
  59 import java.util.regex.Pattern;
  60 import java.util.regex.PatternSyntaxException;
  61
  62 /**
  63  * Strings represent an immutable set of characters.  All String literals
  64  * are instances of this class, and two string literals with the same contents
  65  * refer to the same String object.
  66  *
  67  * <p>This class also includes a number of methods for manipulating the
  68  * contents of strings (of course, creating a new object if there are any
  69  * changes, as String is immutable). Case mapping relies on Unicode 3.0.0
  70  * standards, where some character sequences have a different number of
  71  * characters in the uppercase version than the lower case.
  72  *
  73  * <p>Strings are special, in that they are the only object with an overloaded
  74  * operator. When you use '+' with at least one String argument, both
  75  * arguments have String conversion performed on them, and another String (not
  76  * guaranteed to be unique) results.
  77  *
  78  * <p>String is special-cased when doing data serialization - rather than
  79  * listing the fields of this class, a String object is converted to a string
  80  * literal in the object stream.
  81  *
  82  * @author Paul N. Fisher
  83  * @author Eric Blake (ebb9@email.byu.edu)
  84  * @author Per Bothner (bothner@cygnus.com)
  85  * @since 1.0
  86  * @status updated to 1.4; but could use better data sharing via offset field
  87  */
  88 public final class String implements Serializable, Comparable, CharSequence
  89 {
  90   // WARNING: String is a CORE class in the bootstrap cycle. See the comments
  91   // in vm/reference/java/lang/Runtime for implications of this fact.
  92
  93   /**
  94    * This is probably not necessary because this class is special cased already
  95    * but it will avoid showing up as a discrepancy when comparing SUIDs.
  96    */
  97   private static final long serialVersionUID = -6849794470754667710L;
  98
  99   /**
 100    * Stores unicode multi-character uppercase expansion table.
 101    * @see #toUpperCase(Locale)
 102    * @see CharData#UPPER_EXPAND
 103    */
 104   private static final char[] upperExpand
 105         = zeroBasedStringValue(CharData.UPPER_EXPAND);
 106
 107   /**
 108    * Stores unicode multi-character uppercase special casing table.
 109    * @see #upperCaseExpansion(char)
 110    * @see CharData#UPPER_SPECIAL
 111    */
 112   private static final char[] upperSpecial
 113           = zeroBasedStringValue(CharData.UPPER_SPECIAL);
 114
 115   /**
 116    * Characters which make up the String.
 117    * Package access is granted for use by StringBuffer.
 118    */
 119   final char[] value;
 120
 121   /**
 122    * Holds the number of characters in value.  This number is generally
 123    * the same as value.length, but can be smaller because substrings and
 124    * StringBuffers can share arrays. Package visible for use by trusted code.
 125    */
 126   final int count;
 127
 128   /**
 129    * Caches the result of hashCode().  If this value is zero, the hashcode
 130    * is considered uncached (even if 0 is the correct hash value).
 131    */
 132   private int cachedHashCode;
 133
 134   /**
 135    * Holds the starting position for characters in value[].  Since
 136    * substring()'s are common, the use of offset allows the operation
 137    * to perform in O(1). Package access is granted for use by StringBuffer.
 138    */
 139   final int offset;
 140
 141   /**
 142    * An implementation for {@link #CASE_INSENSITIVE_ORDER}.
 143    * This must be {@link Serializable}. The class name is dictated by
 144    * compatibility with Sun's JDK.
 145    */
 146   private static final class CaseInsensitiveComparator
 147     implements Comparator, Serializable
 148   {
 149     /**
 150      * Compatible with JDK 1.2.
 151      */
 152     private static final long serialVersionUID = 8575799808933029326L;
 153
 154     /**
 155      * The default private constructor generates unnecessary overhead.
 156      */
 157     CaseInsensitiveComparator() {}
 158
 159     /**
 160      * Compares to Strings, using
 161      * <code>String.compareToIgnoreCase(String)</code>.
 162      *
 163      * @param o1 the first string
 164      * @param o2 the second string
 165      * @return &lt; 0, 0, or &gt; 0 depending on the case-insensitive
 166      *         comparison of the two strings.
 167      * @throws NullPointerException if either argument is null
 168      * @throws ClassCastException if either argument is not a String
 169      * @see #compareToIgnoreCase(String)
 170      */
 171     public int compare(Object o1, Object o2)
 172     {
 173       return ((String) o1).compareToIgnoreCase((String) o2);
 174     }
 175   } // class CaseInsensitiveComparator
 176
 177   /**
 178    * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>.
 179    * This comparator is {@link Serializable}. Note that it ignores Locale,
 180    * for that, you want a Collator.
 181    *
 182    * @see Collator#compare(String, String)
 183    * @since 1.2
 184    */
 185   public static final Comparator CASE_INSENSITIVE_ORDER
 186     = new CaseInsensitiveComparator();
 187
 188   /**
 189    * Creates an empty String (length 0). Unless you really need a new object,
 190    * consider using <code>""</code> instead.
 191    */
 192   public String()
 193   {
 194     value = "".value;
 195     offset = 0;
 196     count = 0;
 197   }
 198
 199   /**
 200    * Copies the contents of a String to a new String. Since Strings are
 201    * immutable, only a shallow copy is performed.
 202    *
 203    * @param str String to copy
 204    * @throws NullPointerException if value is null
 205    */
 206   public String(String str)
 207   {
 208     value = str.value;
 209     offset = str.offset;
 210     count = str.count;
 211     cachedHashCode = str.cachedHashCode;
 212   }
 213
 214   /**
 215    * Creates a new String using the character sequence of the char array.
 216    * Subsequent changes to data do not affect the String.
 217    *
 218    * @param data char array to copy
 219    * @throws NullPointerException if data is null
 220    */
 221   public String(char[] data)
 222   {
 223     this(data, 0, data.length, false);
 224   }
 225
 226   /**
 227    * Creates a new String using the character sequence of a subarray of
 228    * characters. The string starts at offset, and copies count chars.
 229    * Subsequent changes to data do not affect the String.
 230    *
 231    * @param data char array to copy
 232    * @param offset position (base 0) to start copying out of data
 233    * @param count the number of characters from data to copy
 234    * @throws NullPointerException if data is null
 235    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
 236    *         || offset + count &lt; 0 (overflow)
 237    *         || offset + count &gt; data.length)
 238    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 239    */
 240   public String(char[] data, int offset, int count)
 241   {
 242     this(data, offset, count, false);
 243   }
 244
 245   /**
 246    * Creates a new String using an 8-bit array of integer values, starting at
 247    * an offset, and copying up to the count. Each character c, using
 248    * corresponding byte b, is created in the new String as if by performing:
 249    *
 250    * <pre>
 251    * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
 252    * </pre>
 253    *
 254    * @param ascii array of integer values
 255    * @param hibyte top byte of each Unicode character
 256    * @param offset position (base 0) to start copying out of ascii
 257    * @param count the number of characters from ascii to copy
 258    * @throws NullPointerException if ascii is null
 259    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
 260    *         || offset + count &lt; 0 (overflow)
 261    *         || offset + count &gt; ascii.length)
 262    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 263    * @see #String(byte[])
 264    * @see #String(byte[], String)
 265    * @see #String(byte[], int, int)
 266    * @see #String(byte[], int, int, String)
 267    * @deprecated use {@link #String(byte[], int, int, String)} to perform
 268    *             correct encoding
 269    */
 270   public String(byte[] ascii, int hibyte, int offset, int count)
 271   {
 272     if (offset < 0)
 273       throw new StringIndexOutOfBoundsException("offset: " + offset);
 274     if (count < 0)
 275       throw new StringIndexOutOfBoundsException("count: " + count);
 276     // equivalent to: offset + count < 0 || offset + count > ascii.length
 277     if (ascii.length - offset < count)
 278       throw new StringIndexOutOfBoundsException("offset + count: "
 279                                                 + (offset + count));
 280     value = new char[count];
 281     this.offset = 0;
 282     this.count = count;
 283     hibyte <<= 8;
 284     offset += count;
 285     while (--count >= 0)
 286       value[count] = (char) (hibyte | (ascii[--offset] & 0xff));
 287   }
 288
 289   /**
 290    * Creates a new String using an 8-bit array of integer values. Each
 291    * character c, using corresponding byte b, is created in the new String
 292    * as if by performing:
 293    *
 294    * <pre>
 295    * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
 296    * </pre>
 297    *
 298    * @param ascii array of integer values
 299    * @param hibyte top byte of each Unicode character
 300    * @throws NullPointerException if ascii is null
 301    * @see #String(byte[])
 302    * @see #String(byte[], String)
 303    * @see #String(byte[], int, int)
 304    * @see #String(byte[], int, int, String)
 305    * @see #String(byte[], int, int, int)
 306    * @deprecated use {@link #String(byte[], String)} to perform
 307    *             correct encoding
 308    */
 309   public String(byte[] ascii, int hibyte)
 310   {
 311     this(ascii, hibyte, 0, ascii.length);
 312   }
 313
 314   /**
 315    * Creates a new String using the portion of the byte array starting at the
 316    * offset and ending at offset + count. Uses the specified encoding type
 317    * to decode the byte array, so the resulting string may be longer or
 318    * shorter than the byte array. For more decoding control, use
 319    * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
 320    * see {@link java.nio.charset.Charset}. The behavior is not specified if
 321    * the decoder encounters invalid characters; this implementation throws
 322    * an Error.
 323    *
 324    * @param data byte array to copy
 325    * @param offset the offset to start at
 326    * @param count the number of bytes in the array to use
 327    * @param encoding the name of the encoding to use
 328    * @throws NullPointerException if data or encoding is null
 329    * @throws IndexOutOfBoundsException if offset or count is incorrect
 330    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 331    * @throws UnsupportedEncodingException if encoding is not found
 332    * @throws Error if the decoding fails
 333    * @since 1.1
 334    */
 335   public String(byte[] data, int offset, int count, String encoding)
 336     throws UnsupportedEncodingException
 337   {
 338     if (offset < 0)
 339       throw new StringIndexOutOfBoundsException("offset: " + offset);
 340     if (count < 0)
 341       throw new StringIndexOutOfBoundsException("count: " + count);
 342     // equivalent to: offset + count < 0 || offset + count > data.length
 343     if (data.length - offset < count)
 344       throw new StringIndexOutOfBoundsException("offset + count: "
 345                                                 + (offset + count));
 346     try
 347       {
 348         CharsetDecoder csd = Charset.forName(encoding).newDecoder();
 349         csd.onMalformedInput(CodingErrorAction.REPLACE);
 350         csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
 351         CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
 352         if(cbuf.hasArray())
 353           {
 354             value = cbuf.array();
 355             this.offset = cbuf.position();
 356             this.count = cbuf.remaining();
 357           } else {
 358             // Doubt this will happen. But just in case.
 359             value = new char[cbuf.remaining()];
 360             cbuf.get(value);
 361             this.offset = 0;
 362             this.count = value.length;
 363           }
 364       } catch(CharacterCodingException e){
 365           throw new UnsupportedEncodingException("Encoding: "+encoding+
 366                                                  " not found.");
 367       } catch(IllegalCharsetNameException e){
 368           throw new UnsupportedEncodingException("Encoding: "+encoding+
 369                                                  " not found.");
 370       } catch(UnsupportedCharsetException e){
 371           throw new UnsupportedEncodingException("Encoding: "+encoding+
 372                                                  " not found.");
 373       }
 374   }
 375
 376   /**
 377    * Creates a new String using the byte array. Uses the specified encoding
 378    * type to decode the byte array, so the resulting string may be longer or
 379    * shorter than the byte array. For more decoding control, use
 380    * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
 381    * see {@link java.nio.charset.Charset}. The behavior is not specified if
 382    * the decoder encounters invalid characters; this implementation throws
 383    * an Error.
 384    *
 385    * @param data byte array to copy
 386    * @param encoding the name of the encoding to use
 387    * @throws NullPointerException if data or encoding is null
 388    * @throws UnsupportedEncodingException if encoding is not found
 389    * @throws Error if the decoding fails
 390    * @see #String(byte[], int, int, String)
 391    * @since 1.1
 392    */
 393   public String(byte[] data, String encoding)
 394     throws UnsupportedEncodingException
 395   {
 396     this(data, 0, data.length, encoding);
 397   }
 398
 399   /**
 400    * Creates a new String using the portion of the byte array starting at the
 401    * offset and ending at offset + count. Uses the encoding of the platform's
 402    * default charset, so the resulting string may be longer or shorter than
 403    * the byte array. For more decoding control, use
 404    * {@link java.nio.charset.CharsetDecoder}.  The behavior is not specified
 405    * if the decoder encounters invalid characters; this implementation throws
 406    * an Error.
 407    *
 408    * @param data byte array to copy
 409    * @param offset the offset to start at
 410    * @param count the number of bytes in the array to use
 411    * @throws NullPointerException if data is null
 412    * @throws IndexOutOfBoundsException if offset or count is incorrect
 413    * @throws Error if the decoding fails
 414    * @see #String(byte[], int, int, String)
 415    * @since 1.1
 416    */
 417   public String(byte[] data, int offset, int count)
 418   {
 419     if (offset < 0)
 420       throw new StringIndexOutOfBoundsException("offset: " + offset);
 421     if (count < 0)
 422       throw new StringIndexOutOfBoundsException("count: " + count);
 423     // equivalent to: offset + count < 0 || offset + count > data.length
 424     if (data.length - offset < count)
 425       throw new StringIndexOutOfBoundsException("offset + count: "
 426                                                 + (offset + count));
 427     int o, c;
 428     char[] v;
 429     String encoding;
 430     try
 431         {
 432           encoding = System.getProperty("file.encoding");
 433           CharsetDecoder csd = Charset.forName(encoding).newDecoder();
 434           csd.onMalformedInput(CodingErrorAction.REPLACE);
 435           csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
 436           CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
 437           if(cbuf.hasArray())
 438             {
 439               v = cbuf.array();
 440               o = cbuf.position();
 441               c = cbuf.remaining();
 442             } else {
 443               // Doubt this will happen. But just in case.
 444               v = new char[cbuf.remaining()];
 445               cbuf.get(v);
 446               o = 0;
 447               c = v.length;
 448             }
 449         } catch(Exception ex){
 450             // If anything goes wrong (System property not set,
 451             // NIO provider not available, etc)
 452             // Default to the 'safe' encoding ISO8859_1
 453             v = new char[count];
 454             o = 0;
 455             c = count;
 456             for (int i=0;i<count;i++)
 457               v[i] = (char)data[offset+i];
 458         }
 459     this.value = v;
 460     this.offset = o;
 461     this.count = c;
 462   }
 463
 464   /**
 465    * Creates a new String using the byte array. Uses the encoding of the
 466    * platform's default charset, so the resulting string may be longer or
 467    * shorter than the byte array. For more decoding control, use
 468    * {@link java.nio.charset.CharsetDecoder}.  The behavior is not specified
 469    * if the decoder encounters invalid characters; this implementation throws
 470    * an Error.
 471    *
 472    * @param data byte array to copy
 473    * @throws NullPointerException if data is null
 474    * @throws Error if the decoding fails
 475    * @see #String(byte[], int, int)
 476    * @see #String(byte[], int, int, String)
 477    * @since 1.1
 478    */
 479   public String(byte[] data)
 480   {
 481     this(data, 0, data.length);
 482   }
 483
 484   /**
 485    * Creates a new String using the character sequence represented by
 486    * the StringBuffer. Subsequent changes to buf do not affect the String.
 487    *
 488    * @param buffer StringBuffer to copy
 489    * @throws NullPointerException if buffer is null
 490    */
 491   public String(StringBuffer buffer)
 492   {
 493     synchronized (buffer)
 494       {
 495         offset = 0;
 496         count = buffer.count;
 497         // Share unless buffer is 3/4 empty.
 498         if ((count << 2) < buffer.value.length)
 499           {
 500             value = new char[count];
 501             VMSystem.arraycopy(buffer.value, 0, value, 0, count);
 502           }
 503         else
 504           {
 505             buffer.shared = true;
 506             value = buffer.value;
 507           }
 508       }
 509   }
 510
 511   /**
 512    * Creates a new String using the character sequence represented by
 513    * the StringBuilder. Subsequent changes to buf do not affect the String.
 514    *
 515    * @param buffer StringBuilder to copy
 516    * @throws NullPointerException if buffer is null
 517    */
 518   public String(StringBuilder buffer)
 519   {
 520     this(buffer.value, 0, buffer.count);
 521   }
 522
 523   /**
 524    * Special constructor which can share an array when safe to do so.
 525    *
 526    * @param data the characters to copy
 527    * @param offset the location to start from
 528    * @param count the number of characters to use
 529    * @param dont_copy true if the array is trusted, and need not be copied
 530    * @throws NullPointerException if chars is null
 531    * @throws StringIndexOutOfBoundsException if bounds check fails
 532    */
 533   String(char[] data, int offset, int count, boolean dont_copy)
 534   {
 535     if (offset < 0)
 536       throw new StringIndexOutOfBoundsException("offset: " + offset);
 537     if (count < 0)
 538       throw new StringIndexOutOfBoundsException("count: " + count);
 539     // equivalent to: offset + count < 0 || offset + count > data.length
 540     if (data.length - offset < count)
 541       throw new StringIndexOutOfBoundsException("offset + count: "
 542                                                 + (offset + count));
 543     if (dont_copy)
 544       {
 545         value = data;
 546         this.offset = offset;
 547       }
 548     else
 549       {
 550         value = new char[count];
 551         VMSystem.arraycopy(data, offset, value, 0, count);
 552         this.offset = 0;
 553       }
 554     this.count = count;
 555   }
 556
 557   /**
 558    * Returns the number of characters contained in this String.
 559    *
 560    * @return the length of this String
 561    */
 562   public int length()
 563   {
 564     return count;
 565   }
 566
 567   /**
 568    * Returns the character located at the specified index within this String.
 569    *
 570    * @param index position of character to return (base 0)
 571    * @return character located at position index
 572    * @throws IndexOutOfBoundsException if index &lt; 0 || index &gt;= length()
 573    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 574    */
 575   public char charAt(int index)
 576   {
 577     if (index < 0 || index >= count)
 578       throw new StringIndexOutOfBoundsException(index);
 579     return value[offset + index];
 580   }
 581
 582   /**
 583    * Get the code point at the specified index.  This is like #charAt(int),
 584    * but if the character is the start of a surrogate pair, and the
 585    * following character completes the pair, then the corresponding
 586    * supplementary code point is returned.
 587    * @param index the index of the codepoint to get, starting at 0
 588    * @return the codepoint at the specified index
 589    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
 590    * @since 1.5
 591    */
 592   public synchronized int codePointAt(int index)
 593   {
 594     // Use the CharSequence overload as we get better range checking
 595     // this way.
 596     return Character.codePointAt(this, index);
 597   }
 598
 599   /**
 600    * Get the code point before the specified index.  This is like
 601    * #codePointAt(int), but checks the characters at <code>index-1</code> and
 602    * <code>index-2</code> to see if they form a supplementary code point.
 603    * @param index the index just past the codepoint to get, starting at 0
 604    * @return the codepoint at the specified index
 605    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
 606    *         (while unspecified, this is a StringIndexOutOfBoundsException)
 607    * @since 1.5
 608    */
 609   public synchronized int codePointBefore(int index)
 610   {
 611     // Use the CharSequence overload as we get better range checking
 612     // this way.
 613     return Character.codePointBefore(this, index);
 614   }
 615
 616   /**
 617    * Copies characters from this String starting at a specified start index,
 618    * ending at a specified stop index, to a character array starting at
 619    * a specified destination begin index.
 620    *
 621    * @param srcBegin index to begin copying characters from this String
 622    * @param srcEnd index after the last character to be copied from this String
 623    * @param dst character array which this String is copied into
 624    * @param dstBegin index to start writing characters into dst
 625    * @throws NullPointerException if dst is null
 626    * @throws IndexOutOfBoundsException if any indices are out of bounds
 627    *         (while unspecified, source problems cause a
 628    *         StringIndexOutOfBoundsException, and dst problems cause an
 629    *         ArrayIndexOutOfBoundsException)
 630    */
 631   public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
 632   {
 633     if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
 634       throw new StringIndexOutOfBoundsException();
 635     VMSystem.arraycopy(value, srcBegin + offset,
 636                      dst, dstBegin, srcEnd - srcBegin);
 637   }
 638
 639   /**
 640    * Copies the low byte of each character from this String starting at a
 641    * specified start index, ending at a specified stop index, to a byte array
 642    * starting at a specified destination begin index.
 643    *
 644    * @param srcBegin index to being copying characters from this String
 645    * @param srcEnd index after the last character to be copied from this String
 646    * @param dst byte array which each low byte of this String is copied into
 647    * @param dstBegin index to start writing characters into dst
 648    * @throws NullPointerException if dst is null and copy length is non-zero
 649    * @throws IndexOutOfBoundsException if any indices are out of bounds
 650    *         (while unspecified, source problems cause a
 651    *         StringIndexOutOfBoundsException, and dst problems cause an
 652    *         ArrayIndexOutOfBoundsException)
 653    * @see #getBytes()
 654    * @see #getBytes(String)
 655    * @deprecated use {@link #getBytes()}, which uses a char to byte encoder
 656    */
 657   public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin)
 658   {
 659     if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
 660       throw new StringIndexOutOfBoundsException();
 661     int i = srcEnd - srcBegin;
 662     srcBegin += offset;
 663     while (--i >= 0)
 664       dst[dstBegin++] = (byte) value[srcBegin++];
 665   }
 666
 667   /**
 668    * Converts the Unicode characters in this String to a byte array. Uses the
 669    * specified encoding method, so the result may be longer or shorter than
 670    * the String. For more encoding control, use
 671    * {@link java.nio.charset.CharsetEncoder}, and for valid character sets,
 672    * see {@link java.nio.charset.Charset}. Unsupported characters get
 673    * replaced by an encoding specific byte.
 674    *
 675    * @param enc encoding name
 676    * @return the resulting byte array
 677    * @throws NullPointerException if enc is null
 678    * @throws UnsupportedEncodingException if encoding is not supported
 679    * @since 1.1
 680    */
 681   public byte[] getBytes(String enc) throws UnsupportedEncodingException
 682   {
 683     try
 684       {
 685         CharsetEncoder cse = Charset.forName(enc).newEncoder();
 686         cse.onMalformedInput(CodingErrorAction.REPLACE);
 687         cse.onUnmappableCharacter(CodingErrorAction.REPLACE);
 688         ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count));
 689         if(bbuf.hasArray())
 690           return bbuf.array();
 691
 692         // Doubt this will happen. But just in case.
 693         byte[] bytes = new byte[bbuf.remaining()];
 694         bbuf.get(bytes);
 695         return bytes;
 696       }
 697     catch(IllegalCharsetNameException e)
 698       {
 699         throw new UnsupportedEncodingException("Encoding: " + enc
 700                                                + " not found.");
 701       }
 702     catch(UnsupportedCharsetException e)
 703       {
 704         throw new UnsupportedEncodingException("Encoding: " + enc
 705                                                + " not found.");
 706       }
 707     catch(CharacterCodingException e)
 708       {
 709         // This shouldn't ever happen.
 710         throw (InternalError) new InternalError().initCause(e);
 711       }
 712   }
 713
 714   /**
 715    * Converts the Unicode characters in this String to a byte array. Uses the
 716    * encoding of the platform's default charset, so the result may be longer
 717    * or shorter than the String. For more encoding control, use
 718    * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get
 719    * replaced by an encoding specific byte.
 720    *
 721    * @return the resulting byte array, or null on a problem
 722    * @since 1.1
 723    */
 724   public byte[] getBytes()
 725   {
 726       try
 727           {
 728               return getBytes(System.getProperty("file.encoding"));
 729           } catch(Exception e) {
 730               // XXX - Throw an error here?
 731               // For now, default to the 'safe' encoding.
 732               byte[] bytes = new byte[count];
 733               for(int i=0;i<count;i++)
 734                   bytes[i] = (byte)((value[offset+i] <= 0xFF)?
 735                                     value[offset+i]:'?');
 736               return bytes;
 737       }
 738   }
 739
 740   /**
 741    * Predicate which compares anObject to this. This is true only for Strings
 742    * with the same character sequence.
 743    *
 744    * @param anObject the object to compare
 745    * @return true if anObject is semantically equal to this
 746    * @see #compareTo(String)
 747    * @see #equalsIgnoreCase(String)
 748    */
 749   public boolean equals(Object anObject)
 750   {
 751     if (! (anObject instanceof String))
 752       return false;
 753     String str2 = (String) anObject;
 754     if (count != str2.count)
 755       return false;
 756     if (value == str2.value && offset == str2.offset)
 757       return true;
 758     int i = count;
 759     int x = offset;
 760     int y = str2.offset;
 761     while (--i >= 0)
 762       if (value[x++] != str2.value[y++])
 763         return false;
 764     return true;
 765   }
 766
 767   /**
 768    * Compares the given StringBuffer to this String. This is true if the
 769    * StringBuffer has the same content as this String at this moment.
 770    *
 771    * @param buffer the StringBuffer to compare to
 772    * @return true if StringBuffer has the same character sequence
 773    * @throws NullPointerException if the given StringBuffer is null
 774    * @since 1.4
 775    */
 776   public boolean contentEquals(StringBuffer buffer)
 777   {
 778     synchronized (buffer)
 779       {
 780         if (count != buffer.count)
 781           return false;
 782         if (value == buffer.value)
 783           return true; // Possible if shared.
 784         int i = count;
 785         int x = offset + count;
 786         while (--i >= 0)
 787           if (value[--x] != buffer.value[i])
 788             return false;
 789         return true;
 790       }
 791   }
 792
 793   /**
 794    * Compares the given CharSequence to this String. This is true if
 795    * the CharSequence has the same content as this String at this
 796    * moment.
 797    *
 798    * @param seq the CharSequence to compare to
 799    * @return true if CharSequence has the same character sequence
 800    * @throws NullPointerException if the given CharSequence is null
 801    * @since 1.5
 802    */
 803   public boolean contentEquals(CharSequence seq)
 804   {
 805     if (seq.length() != count)
 806       return false;
 807     for (int i = 0; i < count; ++i)
 808       if (value[offset + i] != seq.charAt(i))
 809         return false;
 810     return true;
 811   }
 812
 813   /**
 814    * Compares a String to this String, ignoring case. This does not handle
 815    * multi-character capitalization exceptions; instead the comparison is
 816    * made on a character-by-character basis, and is true if:<br><ul>
 817    * <li><code>c1 == c2</code></li>
 818    * <li><code>Character.toUpperCase(c1)
 819    *     == Character.toUpperCase(c2)</code></li>
 820    * <li><code>Character.toLowerCase(c1)
 821    *     == Character.toLowerCase(c2)</code></li>
 822    * </ul>
 823    *
 824    * @param anotherString String to compare to this String
 825    * @return true if anotherString is equal, ignoring case
 826    * @see #equals(Object)
 827    * @see Character#toUpperCase(char)
 828    * @see Character#toLowerCase(char)
 829    */
 830   public boolean equalsIgnoreCase(String anotherString)
 831   {
 832     if (anotherString == null || count != anotherString.count)
 833       return false;
 834     int i = count;
 835     int x = offset;
 836     int y = anotherString.offset;
 837     while (--i >= 0)
 838       {
 839         char c1 = value[x++];
 840         char c2 = anotherString.value[y++];
 841         // Note that checking c1 != c2 is redundant, but avoids method calls.
 842         if (c1 != c2
 843             && Character.toUpperCase(c1) != Character.toUpperCase(c2)
 844             && Character.toLowerCase(c1) != Character.toLowerCase(c2))
 845           return false;
 846       }
 847     return true;
 848   }
 849
 850   /**
 851    * Compares this String and another String (case sensitive,
 852    * lexicographically). The result is less than 0 if this string sorts
 853    * before the other, 0 if they are equal, and greater than 0 otherwise.
 854    * After any common starting sequence is skipped, the result is
 855    * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings
 856    * have characters remaining, or
 857    * <code>this.length() - anotherString.length()</code> if one string is
 858    * a subsequence of the other.
 859    *
 860    * @param anotherString the String to compare against
 861    * @return the comparison
 862    * @throws NullPointerException if anotherString is null
 863    */
 864   public int compareTo(String anotherString)
 865   {
 866     int i = Math.min(count, anotherString.count);
 867     int x = offset;
 868     int y = anotherString.offset;
 869     while (--i >= 0)
 870       {
 871         int result = value[x++] - anotherString.value[y++];
 872         if (result != 0)
 873           return result;
 874       }
 875     return count - anotherString.count;
 876   }
 877
 878   /**
 879    * Behaves like <code>compareTo(java.lang.String)</code> unless the Object
 880    * is not a <code>String</code>.  Then it throws a
 881    * <code>ClassCastException</code>.
 882    *
 883    * @param o the object to compare against
 884    * @return the comparison
 885    * @throws NullPointerException if o is null
 886    * @throws ClassCastException if o is not a <code>String</code>
 887    * @since 1.2
 888    */
 889   public int compareTo(Object o)
 890   {
 891     return compareTo((String) o);
 892   }
 893
 894   /**
 895    * Compares this String and another String (case insensitive). This
 896    * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores
 897    * locale and multi-characater capitalization, and compares characters
 898    * after performing
 899    * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each
 900    * character of the string. This is unsatisfactory for locale-based
 901    * comparison, in which case you should use {@link java.text.Collator}.
 902    *
 903    * @param str the string to compare against
 904    * @return the comparison
 905    * @see Collator#compare(String, String)
 906    * @since 1.2
 907    */
 908   public int compareToIgnoreCase(String str)
 909   {
 910     int i = Math.min(count, str.count);
 911     int x = offset;
 912     int y = str.offset;
 913     while (--i >= 0)
 914       {
 915         int result = Character.toLowerCase(Character.toUpperCase(value[x++]))
 916           - Character.toLowerCase(Character.toUpperCase(str.value[y++]));
 917         if (result != 0)
 918           return result;
 919       }
 920     return count - str.count;
 921   }
 922
 923   /**
 924    * Predicate which determines if this String matches another String
 925    * starting at a specified offset for each String and continuing
 926    * for a specified length. Indices out of bounds are harmless, and give
 927    * a false result.
 928    *
 929    * @param toffset index to start comparison at for this String
 930    * @param other String to compare region to this String
 931    * @param ooffset index to start comparison at for other
 932    * @param len number of characters to compare
 933    * @return true if regions match (case sensitive)
 934    * @throws NullPointerException if other is null
 935    */
 936   public boolean regionMatches(int toffset, String other, int ooffset, int len)
 937   {
 938     return regionMatches(false, toffset, other, ooffset, len);
 939   }
 940
 941   /**
 942    * Predicate which determines if this String matches another String
 943    * starting at a specified offset for each String and continuing
 944    * for a specified length, optionally ignoring case. Indices out of bounds
 945    * are harmless, and give a false result. Case comparisons are based on
 946    * <code>Character.toLowerCase()</code> and
 947    * <code>Character.toUpperCase()</code>, not on multi-character
 948    * capitalization expansions.
 949    *
 950    * @param ignoreCase true if case should be ignored in comparision
 951    * @param toffset index to start comparison at for this String
 952    * @param other String to compare region to this String
 953    * @param ooffset index to start comparison at for other
 954    * @param len number of characters to compare
 955    * @return true if regions match, false otherwise
 956    * @throws NullPointerException if other is null
 957    */
 958   public boolean regionMatches(boolean ignoreCase, int toffset,
 959                                String other, int ooffset, int len)
 960   {
 961     if (toffset < 0 || ooffset < 0 || toffset + len > count
 962         || ooffset + len > other.count)
 963       return false;
 964     toffset += offset;
 965     ooffset += other.offset;
 966     while (--len >= 0)
 967       {
 968         char c1 = value[toffset++];
 969         char c2 = other.value[ooffset++];
 970         // Note that checking c1 != c2 is redundant when ignoreCase is true,
 971         // but it avoids method calls.
 972         if (c1 != c2
 973             && (! ignoreCase
 974                 || (Character.toLowerCase(c1) != Character.toLowerCase(c2)
 975                     && (Character.toUpperCase(c1)
 976                         != Character.toUpperCase(c2)))))
 977           return false;
 978       }
 979     return true;
 980   }
 981
 982   /**
 983    * Predicate which determines if this String contains the given prefix,
 984    * beginning comparison at toffset. The result is false if toffset is
 985    * negative or greater than this.length(), otherwise it is the same as
 986    * <code>this.substring(toffset).startsWith(prefix)</code>.
 987    *
 988    * @param prefix String to compare
 989    * @param toffset offset for this String where comparison starts
 990    * @return true if this String starts with prefix
 991    * @throws NullPointerException if prefix is null
 992    * @see #regionMatches(boolean, int, String, int, int)
 993    */
 994   public boolean startsWith(String prefix, int toffset)
 995   {
 996     return regionMatches(false, toffset, prefix, 0, prefix.count);
 997   }
 998
 999   /**
1000    * Predicate which determines if this String starts with a given prefix.
1001    * If the prefix is an empty String, true is returned.
1002    *
1003    * @param prefix String to compare
1004    * @return true if this String starts with the prefix
1005    * @throws NullPointerException if prefix is null
1006    * @see #startsWith(String, int)
1007    */
1008   public boolean startsWith(String prefix)
1009   {
1010     return regionMatches(false, 0, prefix, 0, prefix.count);
1011   }
1012
1013   /**
1014    * Predicate which determines if this String ends with a given suffix.
1015    * If the suffix is an empty String, true is returned.
1016    *
1017    * @param suffix String to compare
1018    * @return true if this String ends with the suffix
1019    * @throws NullPointerException if suffix is null
1020    * @see #regionMatches(boolean, int, String, int, int)
1021    */
1022   public boolean endsWith(String suffix)
1023   {
1024     return regionMatches(false, count - suffix.count, suffix, 0, suffix.count);
1025   }
1026
1027   /**
1028    * Computes the hashcode for this String. This is done with int arithmetic,
1029    * where ** represents exponentiation, by this formula:<br>
1030    * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>.
1031    *
1032    * @return hashcode value of this String
1033    */
1034   public int hashCode()
1035   {
1036     if (cachedHashCode != 0)
1037       return cachedHashCode;
1038
1039     // Compute the hash code using a local variable to be reentrant.
1040     int hashCode = 0;
1041     int limit = count + offset;
1042     for (int i = offset; i < limit; i++)
1043       hashCode = hashCode * 31 + value[i];
1044     return cachedHashCode = hashCode;
1045   }
1046
1047   /**
1048    * Finds the first instance of a character in this String.
1049    *
1050    * @param ch character to find
1051    * @return location (base 0) of the character, or -1 if not found
1052    */
1053   public int indexOf(int ch)
1054   {
1055     return indexOf(ch, 0);
1056   }
1057
1058   /**
1059    * Finds the first instance of a character in this String, starting at
1060    * a given index.  If starting index is less than 0, the search
1061    * starts at the beginning of this String.  If the starting index
1062    * is greater than the length of this String, -1 is returned.
1063    *
1064    * @param ch character to find
1065    * @param fromIndex index to start the search
1066    * @return location (base 0) of the character, or -1 if not found
1067    */
1068   public int indexOf(int ch, int fromIndex)
1069   {
1070     if ((char) ch != ch)
1071       return -1;
1072     if (fromIndex < 0)
1073       fromIndex = 0;
1074     int i = fromIndex + offset;
1075     for ( ; fromIndex < count; fromIndex++)
1076       if (value[i++] == ch)
1077         return fromIndex;
1078     return -1;
1079   }
1080
1081   /**
1082    * Finds the last instance of a character in this String.
1083    *
1084    * @param ch character to find
1085    * @return location (base 0) of the character, or -1 if not found
1086    */
1087   public int lastIndexOf(int ch)
1088   {
1089     return lastIndexOf(ch, count - 1);
1090   }
1091
1092   /**
1093    * Finds the last instance of a character in this String, starting at
1094    * a given index.  If starting index is greater than the maximum valid
1095    * index, then the search begins at the end of this String.  If the
1096    * starting index is less than zero, -1 is returned.
1097    *
1098    * @param ch character to find
1099    * @param fromIndex index to start the search
1100    * @return location (base 0) of the character, or -1 if not found
1101    */
1102   public int lastIndexOf(int ch, int fromIndex)
1103   {
1104     if ((char) ch != ch)
1105       return -1;
1106     if (fromIndex >= count)
1107       fromIndex = count - 1;
1108     int i = fromIndex + offset;
1109     for ( ; fromIndex >= 0; fromIndex--)
1110       if (value[i--] == ch)
1111         return fromIndex;
1112     return -1;
1113   }
1114
1115   /**
1116    * Finds the first instance of a String in this String.
1117    *
1118    * @param str String to find
1119    * @return location (base 0) of the String, or -1 if not found
1120    * @throws NullPointerException if str is null
1121    */
1122   public int indexOf(String str)
1123   {
1124     return indexOf(str, 0);
1125   }
1126
1127   /**
1128    * Finds the first instance of a String in this String, starting at
1129    * a given index.  If starting index is less than 0, the search
1130    * starts at the beginning of this String.  If the starting index
1131    * is greater than the length of this String, -1 is returned.
1132    *
1133    * @param str String to find
1134    * @param fromIndex index to start the search
1135    * @return location (base 0) of the String, or -1 if not found
1136    * @throws NullPointerException if str is null
1137    */
1138   public int indexOf(String str, int fromIndex)
1139   {
1140     if (fromIndex < 0)
1141       fromIndex = 0;
1142     int limit = count - str.count;
1143     for ( ; fromIndex <= limit; fromIndex++)
1144       if (regionMatches(fromIndex, str, 0, str.count))
1145         return fromIndex;
1146     return -1;
1147   }
1148
1149   /**
1150    * Finds the last instance of a String in this String.
1151    *
1152    * @param str String to find
1153    * @return location (base 0) of the String, or -1 if not found
1154    * @throws NullPointerException if str is null
1155    */
1156   public int lastIndexOf(String str)
1157   {
1158     return lastIndexOf(str, count - str.count);
1159   }
1160
1161   /**
1162    * Finds the last instance of a String in this String, starting at
1163    * a given index.  If starting index is greater than the maximum valid
1164    * index, then the search begins at the end of this String.  If the
1165    * starting index is less than zero, -1 is returned.
1166    *
1167    * @param str String to find
1168    * @param fromIndex index to start the search
1169    * @return location (base 0) of the String, or -1 if not found
1170    * @throws NullPointerException if str is null
1171    */
1172   public int lastIndexOf(String str, int fromIndex)
1173   {
1174     fromIndex = Math.min(fromIndex, count - str.count);
1175     for ( ; fromIndex >= 0; fromIndex--)
1176       if (regionMatches(fromIndex, str, 0, str.count))
1177         return fromIndex;
1178     return -1;
1179   }
1180
1181   /**
1182    * Creates a substring of this String, starting at a specified index
1183    * and ending at the end of this String.
1184    *
1185    * @param begin index to start substring (base 0)
1186    * @return new String which is a substring of this String
1187    * @throws IndexOutOfBoundsException if begin &lt; 0 || begin &gt; length()
1188    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1189    */
1190   public String substring(int begin)
1191   {
1192     return substring(begin, count);
1193   }
1194
1195   /**
1196    * Creates a substring of this String, starting at a specified index
1197    * and ending at one character before a specified index.
1198    *
1199    * @param beginIndex index to start substring (inclusive, base 0)
1200    * @param endIndex index to end at (exclusive)
1201    * @return new String which is a substring of this String
1202    * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1203    *         || begin &gt; end (while unspecified, this is a
1204    *         StringIndexOutOfBoundsException)
1205    */
1206   public String substring(int beginIndex, int endIndex)
1207   {
1208     if (beginIndex < 0 || endIndex > count || beginIndex > endIndex)
1209       throw new StringIndexOutOfBoundsException();
1210     if (beginIndex == 0 && endIndex == count)
1211       return this;
1212     int len = endIndex - beginIndex;
1213     // Package constructor avoids an array copy.
1214     return new String(value, beginIndex + offset, len,
1215                       (len << 2) >= value.length);
1216   }
1217
1218   /**
1219    * Creates a substring of this String, starting at a specified index
1220    * and ending at one character before a specified index. This behaves like
1221    * <code>substring(begin, end)</code>.
1222    *
1223    * @param begin index to start substring (inclusive, base 0)
1224    * @param end index to end at (exclusive)
1225    * @return new String which is a substring of this String
1226    * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1227    *         || begin &gt; end
1228    * @since 1.4
1229    */
1230   public CharSequence subSequence(int begin, int end)
1231   {
1232     return substring(begin, end);
1233   }
1234
1235   /**
1236    * Concatenates a String to this String. This results in a new string unless
1237    * one of the two originals is "".
1238    *
1239    * @param str String to append to this String
1240    * @return newly concatenated String
1241    * @throws NullPointerException if str is null
1242    */
1243   public String concat(String str)
1244   {
1245     if (str.count == 0)
1246       return this;
1247     if (count == 0)
1248       return str;
1249     char[] newStr = new char[count + str.count];
1250     VMSystem.arraycopy(value, offset, newStr, 0, count);
1251     VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count);
1252     // Package constructor avoids an array copy.
1253     return new String(newStr, 0, newStr.length, true);
1254   }
1255
1256   /**
1257    * Replaces every instance of a character in this String with a new
1258    * character. If no replacements occur, this is returned.
1259    *
1260    * @param oldChar the old character to replace
1261    * @param newChar the new character
1262    * @return new String with all instances of oldChar replaced with newChar
1263    */
1264   public String replace(char oldChar, char newChar)
1265   {
1266     if (oldChar == newChar)
1267       return this;
1268     int i = count;
1269     int x = offset - 1;
1270     while (--i >= 0)
1271       if (value[++x] == oldChar)
1272         break;
1273     if (i < 0)
1274       return this;
1275     char[] newStr = (char[]) value.clone();
1276     newStr[x] = newChar;
1277     while (--i >= 0)
1278       if (value[++x] == oldChar)
1279         newStr[x] = newChar;
1280     // Package constructor avoids an array copy.
1281     return new String(newStr, offset, count, true);
1282   }
1283
1284   /**
1285    * Test if this String matches a regular expression. This is shorthand for
1286    * <code>{@link Pattern}.matches(regex, this)</code>.
1287    *
1288    * @param regex the pattern to match
1289    * @return true if the pattern matches
1290    * @throws NullPointerException if regex is null
1291    * @throws PatternSyntaxException if regex is invalid
1292    * @see Pattern#matches(String, CharSequence)
1293    * @since 1.4
1294    */
1295   public boolean matches(String regex)
1296   {
1297     return Pattern.matches(regex, this);
1298   }
1299
1300   /**
1301    * Replaces the first substring match of the regular expression with a
1302    * given replacement. This is shorthand for <code>{@link Pattern}
1303    *   .compile(regex).matcher(this).replaceFirst(replacement)</code>.
1304    *
1305    * @param regex the pattern to match
1306    * @param replacement the replacement string
1307    * @return the modified string
1308    * @throws NullPointerException if regex or replacement is null
1309    * @throws PatternSyntaxException if regex is invalid
1310    * @see #replaceAll(String, String)
1311    * @see Pattern#compile(String)
1312    * @see Pattern#matcher(CharSequence)
1313    * @see Matcher#replaceFirst(String)
1314    * @since 1.4
1315    */
1316   public String replaceFirst(String regex, String replacement)
1317   {
1318     return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
1319   }
1320
1321   /**
1322    * Replaces all matching substrings of the regular expression with a
1323    * given replacement. This is shorthand for <code>{@link Pattern}
1324    *   .compile(regex).matcher(this).replaceAll(replacement)</code>.
1325    *
1326    * @param regex the pattern to match
1327    * @param replacement the replacement string
1328    * @return the modified string
1329    * @throws NullPointerException if regex or replacement is null
1330    * @throws PatternSyntaxException if regex is invalid
1331    * @see #replaceFirst(String, String)
1332    * @see Pattern#compile(String)
1333    * @see Pattern#matcher(CharSequence)
1334    * @see Matcher#replaceAll(String)
1335    * @since 1.4
1336    */
1337   public String replaceAll(String regex, String replacement)
1338   {
1339     return Pattern.compile(regex).matcher(this).replaceAll(replacement);
1340   }
1341
1342   /**
1343    * Split this string around the matches of a regular expression. Each
1344    * element of the returned array is the largest block of characters not
1345    * terminated by the regular expression, in the order the matches are found.
1346    *
1347    * <p>The limit affects the length of the array. If it is positive, the
1348    * array will contain at most n elements (n - 1 pattern matches). If
1349    * negative, the array length is unlimited, but there can be trailing empty
1350    * entries. if 0, the array length is unlimited, and trailing empty entries
1351    * are discarded.
1352    *
1353    * <p>For example, splitting "boo:and:foo" yields:<br>
1354    * <table border=0>
1355    * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th>
1356    * <tr><td>":"</td>   <td>2</td>  <td>{ "boo", "and:foo" }</td></tr>
1357    * <tr><td>":"</td>   <td>t</td>  <td>{ "boo", "and", "foo" }</td></tr>
1358    * <tr><td>":"</td>   <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr>
1359    * <tr><td>"o"</td>   <td>5</td>  <td>{ "b", "", ":and:f", "", "" }</td></tr>
1360    * <tr><td>"o"</td>   <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr>
1361    * <tr><td>"o"</td>   <td>0</td>  <td>{ "b", "", ":and:f" }</td></tr>
1362    * </table>
1363    *
1364    * <p>This is shorthand for
1365    * <code>{@link Pattern}.compile(regex).split(this, limit)</code>.
1366    *
1367    * @param regex the pattern to match
1368    * @param limit the limit threshold
1369    * @return the array of split strings
1370    * @throws NullPointerException if regex or replacement is null
1371    * @throws PatternSyntaxException if regex is invalid
1372    * @see Pattern#compile(String)
1373    * @see Pattern#split(CharSequence, int)
1374    * @since 1.4
1375    */
1376   public String[] split(String regex, int limit)
1377   {
1378     return Pattern.compile(regex).split(this, limit);
1379   }
1380
1381   /**
1382    * Split this string around the matches of a regular expression. Each
1383    * element of the returned array is the largest block of characters not
1384    * terminated by the regular expression, in the order the matches are found.
1385    * The array length is unlimited, and trailing empty entries are discarded,
1386    * as though calling <code>split(regex, 0)</code>.
1387    *
1388    * @param regex the pattern to match
1389    * @return the array of split strings
1390    * @throws NullPointerException if regex or replacement is null
1391    * @throws PatternSyntaxException if regex is invalid
1392    * @see #split(String, int)
1393    * @see Pattern#compile(String)
1394    * @see Pattern#split(CharSequence, int)
1395    * @since 1.4
1396    */
1397   public String[] split(String regex)
1398   {
1399     return Pattern.compile(regex).split(this, 0);
1400   }
1401
1402   /**
1403    * Lowercases this String according to a particular locale. This uses
1404    * Unicode's special case mappings, as applied to the given Locale, so the
1405    * resulting string may be a different length.
1406    *
1407    * @param loc locale to use
1408    * @return new lowercased String, or this if no characters were lowercased
1409    * @throws NullPointerException if loc is null
1410    * @see #toUpperCase(Locale)
1411    * @since 1.1
1412    */
1413   public String toLowerCase(Locale loc)
1414   {
1415     // First, see if the current string is already lower case.
1416     boolean turkish = "tr".equals(loc.getLanguage());
1417     int i = count;
1418     int x = offset - 1;
1419     while (--i >= 0)
1420       {
1421         char ch = value[++x];
1422         if ((turkish && ch == '\u0049')
1423             || ch != Character.toLowerCase(ch))
1424           break;
1425       }
1426     if (i < 0)
1427       return this;
1428
1429     // Now we perform the conversion. Fortunately, there are no multi-character
1430     // lowercase expansions in Unicode 3.0.0.
1431     char[] newStr = (char[]) value.clone();
1432     do
1433       {
1434         char ch = value[x];
1435         // Hardcoded special case.
1436         newStr[x++] = (turkish && ch == '\u0049') ? '\u0131'
1437           : Character.toLowerCase(ch);
1438       }
1439     while (--i >= 0);
1440     // Package constructor avoids an array copy.
1441     return new String(newStr, offset, count, true);
1442   }
1443
1444   /**
1445    * Lowercases this String. This uses Unicode's special case mappings, as
1446    * applied to the platform's default Locale, so the resulting string may
1447    * be a different length.
1448    *
1449    * @return new lowercased String, or this if no characters were lowercased
1450    * @see #toLowerCase(Locale)
1451    * @see #toUpperCase()
1452    */
1453   public String toLowerCase()
1454   {
1455     return toLowerCase(Locale.getDefault());
1456   }
1457
1458   /**
1459    * Uppercases this String according to a particular locale. This uses
1460    * Unicode's special case mappings, as applied to the given Locale, so the
1461    * resulting string may be a different length.
1462    *
1463    * @param loc locale to use
1464    * @return new uppercased String, or this if no characters were uppercased
1465    * @throws NullPointerException if loc is null
1466    * @see #toLowerCase(Locale)
1467    * @since 1.1
1468    */
1469   public String toUpperCase(Locale loc)
1470   {
1471     // First, see how many characters we have to grow by, as well as if the
1472     // current string is already upper case.
1473     boolean turkish = "tr".equals(loc.getLanguage());
1474     int expand = 0;
1475     boolean unchanged = true;
1476     int i = count;
1477     int x = i + offset;
1478     while (--i >= 0)
1479       {
1480         char ch = value[--x];
1481         expand += upperCaseExpansion(ch);
1482         unchanged = (unchanged && expand == 0
1483                      && ! (turkish && ch == '\u0069')
1484                      && ch == Character.toUpperCase(ch));
1485       }
1486     if (unchanged)
1487       return this;
1488
1489     // Now we perform the conversion.
1490     i = count;
1491     if (expand == 0)
1492       {
1493         char[] newStr = (char[]) value.clone();
1494         while (--i >= 0)
1495           {
1496             char ch = value[x];
1497             // Hardcoded special case.
1498             newStr[x++] = (turkish && ch == '\u0069') ? '\u0130'
1499               : Character.toUpperCase(ch);
1500           }
1501         // Package constructor avoids an array copy.
1502         return new String(newStr, offset, count, true);
1503       }
1504
1505     // Expansion is necessary.
1506     char[] newStr = new char[count + expand];
1507     int j = 0;
1508     while (--i >= 0)
1509       {
1510         char ch = value[x++];
1511         // Hardcoded special case.
1512         if (turkish && ch == '\u0069')
1513           {
1514             newStr[j++] = '\u0130';
1515             continue;
1516           }
1517         expand = upperCaseExpansion(ch);
1518         if (expand > 0)
1519           {
1520             int index = upperCaseIndex(ch);
1521             while (expand-- >= 0)
1522               newStr[j++] = upperExpand[index++];
1523           }
1524         else
1525           newStr[j++] = Character.toUpperCase(ch);
1526       }
1527     // Package constructor avoids an array copy.
1528     return new String(newStr, 0, newStr.length, true);
1529   }
1530
1531   /**
1532    * Uppercases this String. This uses Unicode's special case mappings, as
1533    * applied to the platform's default Locale, so the resulting string may
1534    * be a different length.
1535    *
1536    * @return new uppercased String, or this if no characters were uppercased
1537    * @see #toUpperCase(Locale)
1538    * @see #toLowerCase()
1539    */
1540   public String toUpperCase()
1541   {
1542     return toUpperCase(Locale.getDefault());
1543   }
1544
1545   /**
1546    * Trims all characters less than or equal to <code>'\u0020'</code>
1547    * (<code>' '</code>) from the beginning and end of this String. This
1548    * includes many, but not all, ASCII control characters, and all
1549    * {@link Character#isWhitespace(char)}.
1550    *
1551    * @return new trimmed String, or this if nothing trimmed
1552    */
1553   public String trim()
1554   {
1555     int limit = count + offset;
1556     if (count == 0 || (value[offset] > '\u0020'
1557                        && value[limit - 1] > '\u0020'))
1558       return this;
1559     int begin = offset;
1560     do
1561       if (begin == limit)
1562         return "";
1563     while (value[begin++] <= '\u0020');
1564     int end = limit;
1565     while (value[--end] <= '\u0020');
1566     return substring(begin - offset - 1, end - offset + 1);
1567   }
1568
1569   /**
1570    * Returns this, as it is already a String!
1571    *
1572    * @return this
1573    */
1574   public String toString()
1575   {
1576     return this;
1577   }
1578
1579   /**
1580    * Copies the contents of this String into a character array. Subsequent
1581    * changes to the array do not affect the String.
1582    *
1583    * @return character array copying the String
1584    */
1585   public char[] toCharArray()
1586   {
1587     if (count == value.length)
1588       return (char[]) value.clone();
1589
1590     char[] copy = new char[count];
1591     VMSystem.arraycopy(value, offset, copy, 0, count);
1592     return copy;
1593   }
1594
1595   /**
1596    * Returns a String representation of an Object. This is "null" if the
1597    * object is null, otherwise it is <code>obj.toString()</code> (which
1598    * can be null).
1599    *
1600    * @param obj the Object
1601    * @return the string conversion of obj
1602    */
1603   public static String valueOf(Object obj)
1604   {
1605     return obj == null ? "null" : obj.toString();
1606   }
1607
1608   /**
1609    * Returns a String representation of a character array. Subsequent
1610    * changes to the array do not affect the String.
1611    *
1612    * @param data the character array
1613    * @return a String containing the same character sequence as data
1614    * @throws NullPointerException if data is null
1615    * @see #valueOf(char[], int, int)
1616    * @see #String(char[])
1617    */
1618   public static String valueOf(char[] data)
1619   {
1620     return valueOf (data, 0, data.length);
1621   }
1622
1623   /**
1624    * Returns a String representing the character sequence of the char array,
1625    * starting at the specified offset, and copying chars up to the specified
1626    * count. Subsequent changes to the array do not affect the String.
1627    *
1628    * @param data character array
1629    * @param offset position (base 0) to start copying out of data
1630    * @param count the number of characters from data to copy
1631    * @return String containing the chars from data[offset..offset+count]
1632    * @throws NullPointerException if data is null
1633    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1634    *         || offset + count &lt; 0 (overflow)
1635    *         || offset + count &gt; data.length)
1636    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1637    * @see #String(char[], int, int)
1638    */
1639   public static String valueOf(char[] data, int offset, int count)
1640   {
1641     return new String(data, offset, count, false);
1642   }
1643
1644   /**
1645    * Returns a String representing the character sequence of the char array,
1646    * starting at the specified offset, and copying chars up to the specified
1647    * count. Subsequent changes to the array do not affect the String.
1648    *
1649    * @param data character array
1650    * @param offset position (base 0) to start copying out of data
1651    * @param count the number of characters from data to copy
1652    * @return String containing the chars from data[offset..offset+count]
1653    * @throws NullPointerException if data is null
1654    * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1655    *         || offset + count &lt; 0 (overflow)
1656    *         || offset + count &gt; data.length)
1657    *         (while unspecified, this is a StringIndexOutOfBoundsException)
1658    * @see #String(char[], int, int)
1659    */
1660   public static String copyValueOf(char[] data, int offset, int count)
1661   {
1662     return new String(data, offset, count, false);
1663   }
1664
1665   /**
1666    * Returns a String representation of a character array. Subsequent
1667    * changes to the array do not affect the String.
1668    *
1669    * @param data the character array
1670    * @return a String containing the same character sequence as data
1671    * @throws NullPointerException if data is null
1672    * @see #copyValueOf(char[], int, int)
1673    * @see #String(char[])
1674    */
1675   public static String copyValueOf(char[] data)
1676   {
1677     return copyValueOf (data, 0, data.length);
1678   }
1679
1680   /**
1681    * Returns a String representing a boolean.
1682    *
1683    * @param b the boolean
1684    * @return "true" if b is true, else "false"
1685    */
1686   public static String valueOf(boolean b)
1687   {
1688     return b ? "true" : "false";
1689   }
1690
1691   /**
1692    * Returns a String representing a character.
1693    *
1694    * @param c the character
1695    * @return String containing the single character c
1696    */
1697   public static String valueOf(char c)
1698   {
1699     // Package constructor avoids an array copy.
1700     return new String(new char[] { c }, 0, 1, true);
1701   }
1702
1703   /**
1704    * Returns a String representing an integer.
1705    *
1706    * @param i the integer
1707    * @return String containing the integer in base 10
1708    * @see Integer#toString(int)
1709    */
1710   public static String valueOf(int i)
1711   {
1712     // See Integer to understand why we call the two-arg variant.
1713     return Integer.toString(i, 10);
1714   }
1715
1716   /**
1717    * Returns a String representing a long.
1718    *
1719    * @param l the long
1720    * @return String containing the long in base 10
1721    * @see Long#toString(long)
1722    */
1723   public static String valueOf(long l)
1724   {
1725     return Long.toString(l);
1726   }
1727
1728   /**
1729    * Returns a String representing a float.
1730    *
1731    * @param f the float
1732    * @return String containing the float
1733    * @see Float#toString(float)
1734    */
1735   public static String valueOf(float f)
1736   {
1737     return Float.toString(f);
1738   }
1739
1740   /**
1741    * Returns a String representing a double.
1742    *
1743    * @param d the double
1744    * @return String containing the double
1745    * @see Double#toString(double)
1746    */
1747   public static String valueOf(double d)
1748   {
1749     return Double.toString(d);
1750   }
1751
1752   /**
1753    * If two Strings are considered equal, by the equals() method,
1754    * then intern() will return the same String instance. ie.
1755    * if (s1.equals(s2)) then (s1.intern() == s2.intern()).
1756    * All string literals and string-valued constant expressions
1757    * are already interned.
1758    *
1759    * @return the interned String
1760    */
1761   public String intern()
1762   {
1763     return VMString.intern(this);
1764   }
1765
1766   /**
1767    * Return the number of code points between two indices in the
1768    * <code>String</code>.  An unpaired surrogate counts as a
1769    * code point for this purpose.  Characters outside the indicated
1770    * range are not examined, even if the range ends in the middle of a
1771    * surrogate pair.
1772    *
1773    * @param start the starting index
1774    * @param end one past the ending index
1775    * @return the number of code points
1776    * @since 1.5
1777    */
1778   public synchronized int codePointCount(int start, int end)
1779   {
1780     if (start < 0 || end >= count || start > end)
1781       throw new StringIndexOutOfBoundsException();
1782
1783     start += offset;
1784     end += offset;
1785     int count = 0;
1786     while (start < end)
1787       {
1788         char base = value[start];
1789         if (base < Character.MIN_HIGH_SURROGATE
1790             || base > Character.MAX_HIGH_SURROGATE
1791             || start == end
1792             || start == count
1793             || value[start + 1] < Character.MIN_LOW_SURROGATE
1794             || value[start + 1] > Character.MAX_LOW_SURROGATE)
1795           {
1796             // Nothing.
1797           }
1798         else
1799           {
1800             // Surrogate pair.
1801             ++start;
1802           }
1803         ++start;
1804         ++count;
1805       }
1806     return count;
1807   }
1808
1809   /**
1810    * Helper function used to detect which characters have a multi-character
1811    * uppercase expansion. Note that this is only used in locations which
1812    * track one-to-many capitalization (java.lang.Character does not do this).
1813    * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the
1814    * longest uppercase expansion is three characters (a growth of 2 from the
1815    * lowercase character).
1816    *
1817    * @param ch the char to check
1818    * @return the number of characters to add when converting to uppercase
1819    * @see CharData#DIRECTION
1820    * @see CharData#UPPER_SPECIAL
1821    * @see #toUpperCase(Locale)
1822    */
1823   private static int upperCaseExpansion(char ch)
1824   {
1825     return Character.direction[Character.readChar(ch) >> 7] & 3;
1826   }
1827
1828   /**
1829    * Helper function used to locate the offset in upperExpand given a
1830    * character with a multi-character expansion. The binary search is
1831    * optimized under the assumption that this method will only be called on
1832    * characters which exist in upperSpecial.
1833    *
1834    * @param ch the char to check
1835    * @return the index where its expansion begins
1836    * @see CharData#UPPER_SPECIAL
1837    * @see CharData#UPPER_EXPAND
1838    * @see #toUpperCase(Locale)
1839    */
1840   private static int upperCaseIndex(char ch)
1841   {
1842     // Simple binary search for the correct character.
1843     int low = 0;
1844     int hi = upperSpecial.length - 2;
1845     int mid = ((low + hi) >> 2) << 1;
1846     char c = upperSpecial[mid];
1847     while (ch != c)
1848       {
1849         if (ch < c)
1850           hi = mid - 2;
1851         else
1852           low = mid + 2;
1853         mid = ((low + hi) >> 2) << 1;
1854         c = upperSpecial[mid];
1855       }
1856     return upperSpecial[mid + 1];
1857   }
1858
1859   /**
1860    * Returns the value array of the given string if it is zero based or a
1861    * copy of it that is zero based (stripping offset and making length equal
1862    * to count). Used for accessing the char[]s of gnu.java.lang.CharData.
1863    * Package private for use in Character.
1864    */
1865   static char[] zeroBasedStringValue(String s)
1866   {
1867     char[] value;
1868
1869     if (s.offset == 0 && s.count == s.value.length)
1870       value = s.value;
1871     else
1872       {
1873         int count = s.count;
1874         value = new char[count];
1875         VMSystem.arraycopy(s.value, s.offset, value, 0, count);
1876       }
1877
1878     return value;
1879   }
1880
1881   /**
1882    * Returns true iff this String contains the sequence of Characters
1883    * described in s.
1884    * @param s the CharSequence
1885    * @return true iff this String contains s
1886    *
1887    * @since 1.5
1888    */
1889   public boolean contains (CharSequence s)
1890   {
1891     return this.indexOf(s.toString()) != -1;
1892   }
1893
1894   /**
1895    * Returns a string that is this string with all instances of the sequence
1896    * represented by <code>target</code> replaced by the sequence in
1897    * <code>replacement</code>.
1898    * @param target the sequence to be replaced
1899    * @param replacement the sequence used as the replacement
1900    * @return the string constructed as above
1901    */
1902   public String replace (CharSequence target, CharSequence replacement)
1903   {
1904     String targetString = target.toString();
1905     String replaceString = replacement.toString();
1906     int targetLength = target.length();
1907     int replaceLength = replacement.length();
1908
1909     int startPos = this.indexOf(targetString);
1910     StringBuilder result = new StringBuilder(this);
1911     while (startPos != -1)
1912       {
1913         // Replace the target with the replacement
1914         result.replace(startPos, startPos + targetLength, replaceString);
1915
1916         // Search for a new occurrence of the target
1917         startPos = result.indexOf(targetString, startPos + replaceLength);
1918       }
1919     return result.toString();
1920   }
1921 }