libjava/ChangeLog:
[official-gcc.git] / libjava / classpath / java / lang / String.java
blob95c88399d3ca99c162b4207b65676b2707f8d95d
1 /* String.java -- immutable character sequences; the object of string literals
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
5 This file is part of GNU Classpath.
7 GNU Classpath is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU Classpath is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Classpath; see the file COPYING. If not, write to the
19 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA.
22 Linking this library statically or dynamically with other modules is
23 making a combined work based on this library. Thus, the terms and
24 conditions of the GNU General Public License cover the whole
25 combination.
27 As a special exception, the copyright holders of this library give you
28 permission to link this library with independent modules to produce an
29 executable, regardless of the license terms of these independent
30 modules, and to copy and distribute the resulting executable under
31 terms of your choice, provided that you also meet, for each linked
32 independent module, the terms and conditions of the license of that
33 module. An independent module is a module which is not derived from
34 or based on this library. If you modify this library, you may extend
35 this exception to your version of the library, but you are not
36 obligated to do so. If you do not wish to do so, delete this
37 exception statement from your version. */
40 package java.lang;
42 import gnu.java.lang.CharData;
43 import gnu.java.lang.CPStringBuilder;
45 import java.io.Serializable;
46 import java.io.UnsupportedEncodingException;
47 import java.nio.ByteBuffer;
48 import java.nio.CharBuffer;
49 import java.nio.charset.CharacterCodingException;
50 import java.nio.charset.Charset;
51 import java.nio.charset.CharsetDecoder;
52 import java.nio.charset.CharsetEncoder;
53 import java.nio.charset.CodingErrorAction;
54 import java.nio.charset.IllegalCharsetNameException;
55 import java.nio.charset.UnsupportedCharsetException;
56 import java.text.Collator;
57 import java.util.Comparator;
58 import java.util.Formatter;
59 import java.util.Locale;
60 import java.util.regex.Matcher;
61 import java.util.regex.Pattern;
62 import java.util.regex.PatternSyntaxException;
64 /**
65 * Strings represent an immutable set of characters. All String literals
66 * are instances of this class, and two string literals with the same contents
67 * refer to the same String object.
69 * <p>This class also includes a number of methods for manipulating the
70 * contents of strings (of course, creating a new object if there are any
71 * changes, as String is immutable). Case mapping relies on Unicode 3.0.0
72 * standards, where some character sequences have a different number of
73 * characters in the uppercase version than the lower case.
75 * <p>Strings are special, in that they are the only object with an overloaded
76 * operator. When you use '+' with at least one String argument, both
77 * arguments have String conversion performed on them, and another String (not
78 * guaranteed to be unique) results.
80 * <p>String is special-cased when doing data serialization - rather than
81 * listing the fields of this class, a String object is converted to a string
82 * literal in the object stream.
84 * @author Paul N. Fisher
85 * @author Eric Blake (ebb9@email.byu.edu)
86 * @author Per Bothner (bothner@cygnus.com)
87 * @author Tom Tromey (tromey@redhat.com)
88 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
89 * @since 1.0
90 * @status updated to 1.4; but could use better data sharing via offset field
92 public final class String
93 implements Serializable, Comparable<String>, CharSequence
95 // WARNING: String is a CORE class in the bootstrap cycle. See the comments
96 // in vm/reference/java/lang/Runtime for implications of this fact.
98 /**
99 * This is probably not necessary because this class is special cased already
100 * but it will avoid showing up as a discrepancy when comparing SUIDs.
102 private static final long serialVersionUID = -6849794470754667710L;
105 * Stores unicode multi-character uppercase expansion table.
106 * @see #toUpperCase(Locale)
107 * @see CharData#UPPER_EXPAND
109 private static final char[] upperExpand
110 = zeroBasedStringValue(CharData.UPPER_EXPAND);
113 * Stores unicode multi-character uppercase special casing table.
114 * @see #upperCaseExpansion(char)
115 * @see CharData#UPPER_SPECIAL
117 private static final char[] upperSpecial
118 = zeroBasedStringValue(CharData.UPPER_SPECIAL);
121 * Characters which make up the String.
122 * Package access is granted for use by StringBuffer.
124 final char[] value;
127 * Holds the number of characters in value. This number is generally
128 * the same as value.length, but can be smaller because substrings and
129 * StringBuffers can share arrays. Package visible for use by trusted code.
131 final int count;
134 * Caches the result of hashCode(). If this value is zero, the hashcode
135 * is considered uncached (even if 0 is the correct hash value).
137 private int cachedHashCode;
140 * Holds the starting position for characters in value[]. Since
141 * substring()'s are common, the use of offset allows the operation
142 * to perform in O(1). Package access is granted for use by StringBuffer.
144 final int offset;
147 * An implementation for {@link #CASE_INSENSITIVE_ORDER}.
148 * This must be {@link Serializable}. The class name is dictated by
149 * compatibility with Sun's JDK.
151 private static final class CaseInsensitiveComparator
152 implements Comparator<String>, Serializable
155 * Compatible with JDK 1.2.
157 private static final long serialVersionUID = 8575799808933029326L;
160 * The default private constructor generates unnecessary overhead.
162 CaseInsensitiveComparator() {}
165 * Compares to Strings, using
166 * <code>String.compareToIgnoreCase(String)</code>.
168 * @param o1 the first string
169 * @param o2 the second string
170 * @return &lt; 0, 0, or &gt; 0 depending on the case-insensitive
171 * comparison of the two strings.
172 * @throws NullPointerException if either argument is null
173 * @throws ClassCastException if either argument is not a String
174 * @see #compareToIgnoreCase(String)
176 public int compare(String o1, String o2)
178 return o1.compareToIgnoreCase(o2);
180 } // class CaseInsensitiveComparator
183 * A Comparator that uses <code>String.compareToIgnoreCase(String)</code>.
184 * This comparator is {@link Serializable}. Note that it ignores Locale,
185 * for that, you want a Collator.
187 * @see Collator#compare(String, String)
188 * @since 1.2
190 public static final Comparator<String> CASE_INSENSITIVE_ORDER
191 = new CaseInsensitiveComparator();
194 * Creates an empty String (length 0). Unless you really need a new object,
195 * consider using <code>""</code> instead.
197 public String()
199 value = "".value;
200 offset = 0;
201 count = 0;
205 * Copies the contents of a String to a new String. Since Strings are
206 * immutable, only a shallow copy is performed.
208 * @param str String to copy
209 * @throws NullPointerException if value is null
211 public String(String str)
213 value = str.value;
214 offset = str.offset;
215 count = str.count;
216 cachedHashCode = str.cachedHashCode;
220 * Creates a new String using the character sequence of the char array.
221 * Subsequent changes to data do not affect the String.
223 * @param data char array to copy
224 * @throws NullPointerException if data is null
226 public String(char[] data)
228 this(data, 0, data.length, false);
232 * Creates a new String using the character sequence of a subarray of
233 * characters. The string starts at offset, and copies count chars.
234 * Subsequent changes to data do not affect the String.
236 * @param data char array to copy
237 * @param offset position (base 0) to start copying out of data
238 * @param count the number of characters from data to copy
239 * @throws NullPointerException if data is null
240 * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
241 * || offset + count &lt; 0 (overflow)
242 * || offset + count &gt; data.length)
243 * (while unspecified, this is a StringIndexOutOfBoundsException)
245 public String(char[] data, int offset, int count)
247 this(data, offset, count, false);
251 * Creates a new String using an 8-bit array of integer values, starting at
252 * an offset, and copying up to the count. Each character c, using
253 * corresponding byte b, is created in the new String as if by performing:
255 * <pre>
256 * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
257 * </pre>
259 * @param ascii array of integer values
260 * @param hibyte top byte of each Unicode character
261 * @param offset position (base 0) to start copying out of ascii
262 * @param count the number of characters from ascii to copy
263 * @throws NullPointerException if ascii is null
264 * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
265 * || offset + count &lt; 0 (overflow)
266 * || offset + count &gt; ascii.length)
267 * (while unspecified, this is a StringIndexOutOfBoundsException)
268 * @see #String(byte[])
269 * @see #String(byte[], String)
270 * @see #String(byte[], int, int)
271 * @see #String(byte[], int, int, String)
272 * @deprecated use {@link #String(byte[], int, int, String)} to perform
273 * correct encoding
275 public String(byte[] ascii, int hibyte, int offset, int count)
277 if (offset < 0)
278 throw new StringIndexOutOfBoundsException("offset: " + offset);
279 if (count < 0)
280 throw new StringIndexOutOfBoundsException("count: " + count);
281 // equivalent to: offset + count < 0 || offset + count > ascii.length
282 if (ascii.length - offset < count)
283 throw new StringIndexOutOfBoundsException("offset + count: "
284 + (offset + count));
285 value = new char[count];
286 this.offset = 0;
287 this.count = count;
288 hibyte <<= 8;
289 offset += count;
290 while (--count >= 0)
291 value[count] = (char) (hibyte | (ascii[--offset] & 0xff));
295 * Creates a new String using an 8-bit array of integer values. Each
296 * character c, using corresponding byte b, is created in the new String
297 * as if by performing:
299 * <pre>
300 * c = (char) (((hibyte &amp; 0xff) &lt;&lt; 8) | (b &amp; 0xff))
301 * </pre>
303 * @param ascii array of integer values
304 * @param hibyte top byte of each Unicode character
305 * @throws NullPointerException if ascii is null
306 * @see #String(byte[])
307 * @see #String(byte[], String)
308 * @see #String(byte[], int, int)
309 * @see #String(byte[], int, int, String)
310 * @see #String(byte[], int, int, int)
311 * @deprecated use {@link #String(byte[], String)} to perform
312 * correct encoding
314 public String(byte[] ascii, int hibyte)
316 this(ascii, hibyte, 0, ascii.length);
320 * Creates a new String using the portion of the byte array starting at the
321 * offset and ending at offset + count. Uses the specified encoding type
322 * to decode the byte array, so the resulting string may be longer or
323 * shorter than the byte array. For more decoding control, use
324 * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
325 * see {@link java.nio.charset.Charset}. The behavior is not specified if
326 * the decoder encounters invalid characters; this implementation throws
327 * an Error.
329 * @param data byte array to copy
330 * @param offset the offset to start at
331 * @param count the number of bytes in the array to use
332 * @param encoding the name of the encoding to use
333 * @throws NullPointerException if data or encoding is null
334 * @throws IndexOutOfBoundsException if offset or count is incorrect
335 * (while unspecified, this is a StringIndexOutOfBoundsException)
336 * @throws UnsupportedEncodingException if encoding is not found
337 * @throws Error if the decoding fails
338 * @since 1.1
340 public String(byte[] data, int offset, int count, String encoding)
341 throws UnsupportedEncodingException
343 if (offset < 0)
344 throw new StringIndexOutOfBoundsException("offset: " + offset);
345 if (count < 0)
346 throw new StringIndexOutOfBoundsException("count: " + count);
347 // equivalent to: offset + count < 0 || offset + count > data.length
348 if (data.length - offset < count)
349 throw new StringIndexOutOfBoundsException("offset + count: "
350 + (offset + count));
351 try
353 CharsetDecoder csd = Charset.forName(encoding).newDecoder();
354 csd.onMalformedInput(CodingErrorAction.REPLACE);
355 csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
356 CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
357 if(cbuf.hasArray())
359 value = cbuf.array();
360 this.offset = cbuf.position();
361 this.count = cbuf.remaining();
362 } else {
363 // Doubt this will happen. But just in case.
364 value = new char[cbuf.remaining()];
365 cbuf.get(value);
366 this.offset = 0;
367 this.count = value.length;
369 } catch(CharacterCodingException e){
370 throw new UnsupportedEncodingException("Encoding: "+encoding+
371 " not found.");
372 } catch(IllegalCharsetNameException e){
373 throw new UnsupportedEncodingException("Encoding: "+encoding+
374 " not found.");
375 } catch(UnsupportedCharsetException e){
376 throw new UnsupportedEncodingException("Encoding: "+encoding+
377 " not found.");
382 * Creates a new String using the byte array. Uses the specified encoding
383 * type to decode the byte array, so the resulting string may be longer or
384 * shorter than the byte array. For more decoding control, use
385 * {@link java.nio.charset.CharsetDecoder}, and for valid character sets,
386 * see {@link java.nio.charset.Charset}. The behavior is not specified if
387 * the decoder encounters invalid characters; this implementation throws
388 * an Error.
390 * @param data byte array to copy
391 * @param encoding the name of the encoding to use
392 * @throws NullPointerException if data or encoding is null
393 * @throws UnsupportedEncodingException if encoding is not found
394 * @throws Error if the decoding fails
395 * @see #String(byte[], int, int, String)
396 * @since 1.1
398 public String(byte[] data, String encoding)
399 throws UnsupportedEncodingException
401 this(data, 0, data.length, encoding);
405 * Creates a new String using the portion of the byte array starting at the
406 * offset and ending at offset + count. Uses the encoding of the platform's
407 * default charset, so the resulting string may be longer or shorter than
408 * the byte array. For more decoding control, use
409 * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified
410 * if the decoder encounters invalid characters; this implementation throws
411 * an Error.
413 * @param data byte array to copy
414 * @param offset the offset to start at
415 * @param count the number of bytes in the array to use
416 * @throws NullPointerException if data is null
417 * @throws IndexOutOfBoundsException if offset or count is incorrect
418 * @throws Error if the decoding fails
419 * @see #String(byte[], int, int, String)
420 * @since 1.1
422 public String(byte[] data, int offset, int count)
424 if (offset < 0)
425 throw new StringIndexOutOfBoundsException("offset: " + offset);
426 if (count < 0)
427 throw new StringIndexOutOfBoundsException("count: " + count);
428 // equivalent to: offset + count < 0 || offset + count > data.length
429 if (data.length - offset < count)
430 throw new StringIndexOutOfBoundsException("offset + count: "
431 + (offset + count));
432 int o, c;
433 char[] v;
434 String encoding;
435 try
437 encoding = System.getProperty("file.encoding");
438 CharsetDecoder csd = Charset.forName(encoding).newDecoder();
439 csd.onMalformedInput(CodingErrorAction.REPLACE);
440 csd.onUnmappableCharacter(CodingErrorAction.REPLACE);
441 CharBuffer cbuf = csd.decode(ByteBuffer.wrap(data, offset, count));
442 if(cbuf.hasArray())
444 v = cbuf.array();
445 o = cbuf.position();
446 c = cbuf.remaining();
447 } else {
448 // Doubt this will happen. But just in case.
449 v = new char[cbuf.remaining()];
450 cbuf.get(v);
451 o = 0;
452 c = v.length;
454 } catch(Exception ex){
455 // If anything goes wrong (System property not set,
456 // NIO provider not available, etc)
457 // Default to the 'safe' encoding ISO8859_1
458 v = new char[count];
459 o = 0;
460 c = count;
461 for (int i=0;i<count;i++)
462 v[i] = (char)data[offset+i];
464 this.value = v;
465 this.offset = o;
466 this.count = c;
470 * Creates a new String using the byte array. Uses the encoding of the
471 * platform's default charset, so the resulting string may be longer or
472 * shorter than the byte array. For more decoding control, use
473 * {@link java.nio.charset.CharsetDecoder}. The behavior is not specified
474 * if the decoder encounters invalid characters; this implementation throws
475 * an Error.
477 * @param data byte array to copy
478 * @throws NullPointerException if data is null
479 * @throws Error if the decoding fails
480 * @see #String(byte[], int, int)
481 * @see #String(byte[], int, int, String)
482 * @since 1.1
484 public String(byte[] data)
486 this(data, 0, data.length);
490 * Creates a new String using the character sequence represented by
491 * the StringBuffer. Subsequent changes to buf do not affect the String.
493 * @param buffer StringBuffer to copy
494 * @throws NullPointerException if buffer is null
496 public String(StringBuffer buffer)
498 synchronized (buffer)
500 offset = 0;
501 count = buffer.count;
502 // Share unless buffer is 3/4 empty.
503 if ((count << 2) < buffer.value.length)
505 value = new char[count];
506 VMSystem.arraycopy(buffer.value, 0, value, 0, count);
508 else
510 buffer.shared = true;
511 value = buffer.value;
517 * Creates a new String using the character sequence represented by
518 * the StringBuilder. Subsequent changes to buf do not affect the String.
520 * @param buffer StringBuilder to copy
521 * @throws NullPointerException if buffer is null
523 public String(StringBuilder buffer)
525 this(buffer.value, 0, buffer.count);
529 * Special constructor which can share an array when safe to do so.
531 * @param data the characters to copy
532 * @param offset the location to start from
533 * @param count the number of characters to use
534 * @param dont_copy true if the array is trusted, and need not be copied
535 * @throws NullPointerException if chars is null
536 * @throws StringIndexOutOfBoundsException if bounds check fails
538 String(char[] data, int offset, int count, boolean dont_copy)
540 if (offset < 0)
541 throw new StringIndexOutOfBoundsException("offset: " + offset);
542 if (count < 0)
543 throw new StringIndexOutOfBoundsException("count: " + count);
544 // equivalent to: offset + count < 0 || offset + count > data.length
545 if (data.length - offset < count)
546 throw new StringIndexOutOfBoundsException("offset + count: "
547 + (offset + count));
548 if (dont_copy)
550 value = data;
551 this.offset = offset;
553 else
555 value = new char[count];
556 VMSystem.arraycopy(data, offset, value, 0, count);
557 this.offset = 0;
559 this.count = count;
563 * Creates a new String containing the characters represented in the
564 * given subarray of Unicode code points.
565 * @param codePoints the entire array of code points
566 * @param offset the start of the subarray
567 * @param count the length of the subarray
569 * @throws IllegalArgumentException if an invalid code point is found
570 * in the codePoints array
571 * @throws IndexOutOfBoundsException if offset is negative or offset + count
572 * is greater than the length of the array.
574 public String(int[] codePoints, int offset, int count)
576 // FIXME: This implementation appears to give correct internal
577 // representation of the String because:
578 // - length() is correct
579 // - getting a char[] from toCharArray() and testing
580 // Character.codePointAt() on all the characters in that array gives
581 // the appropriate results
582 // however printing the String gives incorrect results. This may be
583 // due to printing method errors (such as incorrectly looping through
584 // the String one char at a time rather than one "character" at a time.
586 if (offset < 0)
587 throw new IndexOutOfBoundsException();
588 int end = offset + count;
589 int pos = 0;
590 // This creates a char array that is long enough for all of the code
591 // points to represent supplementary characters. This is more than likely
592 // a waste of storage, so we use it only temporarily and then copy the
593 // used portion into the value array.
594 char[] temp = new char[2 * codePoints.length];
595 for (int i = offset; i < end; i++)
597 pos += Character.toChars(codePoints[i], temp, pos);
599 this.count = pos;
600 this.value = new char[pos];
601 System.arraycopy(temp, 0, value, 0, pos);
602 this.offset = 0;
606 * Returns the number of characters contained in this String.
608 * @return the length of this String
610 public int length()
612 return count;
616 * Returns the character located at the specified index within this String.
618 * @param index position of character to return (base 0)
619 * @return character located at position index
620 * @throws IndexOutOfBoundsException if index &lt; 0 || index &gt;= length()
621 * (while unspecified, this is a StringIndexOutOfBoundsException)
623 public char charAt(int index)
625 if (index < 0 || index >= count)
626 throw new StringIndexOutOfBoundsException(index);
627 return value[offset + index];
631 * Get the code point at the specified index. This is like #charAt(int),
632 * but if the character is the start of a surrogate pair, and the
633 * following character completes the pair, then the corresponding
634 * supplementary code point is returned.
635 * @param index the index of the codepoint to get, starting at 0
636 * @return the codepoint at the specified index
637 * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
638 * @since 1.5
640 public synchronized int codePointAt(int index)
642 // Use the CharSequence overload as we get better range checking
643 // this way.
644 return Character.codePointAt(this, index);
648 * Get the code point before the specified index. This is like
649 * #codePointAt(int), but checks the characters at <code>index-1</code> and
650 * <code>index-2</code> to see if they form a supplementary code point.
651 * @param index the index just past the codepoint to get, starting at 0
652 * @return the codepoint at the specified index
653 * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
654 * (while unspecified, this is a StringIndexOutOfBoundsException)
655 * @since 1.5
657 public synchronized int codePointBefore(int index)
659 // Use the CharSequence overload as we get better range checking
660 // this way.
661 return Character.codePointBefore(this, index);
665 * Copies characters from this String starting at a specified start index,
666 * ending at a specified stop index, to a character array starting at
667 * a specified destination begin index.
669 * @param srcBegin index to begin copying characters from this String
670 * @param srcEnd index after the last character to be copied from this String
671 * @param dst character array which this String is copied into
672 * @param dstBegin index to start writing characters into dst
673 * @throws NullPointerException if dst is null
674 * @throws IndexOutOfBoundsException if any indices are out of bounds
675 * (while unspecified, source problems cause a
676 * StringIndexOutOfBoundsException, and dst problems cause an
677 * ArrayIndexOutOfBoundsException)
679 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
681 if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
682 throw new StringIndexOutOfBoundsException();
683 VMSystem.arraycopy(value, srcBegin + offset,
684 dst, dstBegin, srcEnd - srcBegin);
688 * Copies the low byte of each character from this String starting at a
689 * specified start index, ending at a specified stop index, to a byte array
690 * starting at a specified destination begin index.
692 * @param srcBegin index to being copying characters from this String
693 * @param srcEnd index after the last character to be copied from this String
694 * @param dst byte array which each low byte of this String is copied into
695 * @param dstBegin index to start writing characters into dst
696 * @throws NullPointerException if dst is null and copy length is non-zero
697 * @throws IndexOutOfBoundsException if any indices are out of bounds
698 * (while unspecified, source problems cause a
699 * StringIndexOutOfBoundsException, and dst problems cause an
700 * ArrayIndexOutOfBoundsException)
701 * @see #getBytes()
702 * @see #getBytes(String)
703 * @deprecated use {@link #getBytes()}, which uses a char to byte encoder
705 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin)
707 if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
708 throw new StringIndexOutOfBoundsException();
709 int i = srcEnd - srcBegin;
710 srcBegin += offset;
711 while (--i >= 0)
712 dst[dstBegin++] = (byte) value[srcBegin++];
716 * Converts the Unicode characters in this String to a byte array. Uses the
717 * specified encoding method, so the result may be longer or shorter than
718 * the String. For more encoding control, use
719 * {@link java.nio.charset.CharsetEncoder}, and for valid character sets,
720 * see {@link java.nio.charset.Charset}. Unsupported characters get
721 * replaced by an encoding specific byte.
723 * @param enc encoding name
724 * @return the resulting byte array
725 * @throws NullPointerException if enc is null
726 * @throws UnsupportedEncodingException if encoding is not supported
727 * @since 1.1
729 public byte[] getBytes(String enc) throws UnsupportedEncodingException
731 try
733 CharsetEncoder cse = Charset.forName(enc).newEncoder();
734 cse.onMalformedInput(CodingErrorAction.REPLACE);
735 cse.onUnmappableCharacter(CodingErrorAction.REPLACE);
736 ByteBuffer bbuf = cse.encode(CharBuffer.wrap(value, offset, count));
737 if(bbuf.hasArray())
738 return bbuf.array();
740 // Doubt this will happen. But just in case.
741 byte[] bytes = new byte[bbuf.remaining()];
742 bbuf.get(bytes);
743 return bytes;
745 catch(IllegalCharsetNameException e)
747 throw new UnsupportedEncodingException("Encoding: " + enc
748 + " not found.");
750 catch(UnsupportedCharsetException e)
752 throw new UnsupportedEncodingException("Encoding: " + enc
753 + " not found.");
755 catch(CharacterCodingException e)
757 // This shouldn't ever happen.
758 throw (InternalError) new InternalError().initCause(e);
763 * Converts the Unicode characters in this String to a byte array. Uses the
764 * encoding of the platform's default charset, so the result may be longer
765 * or shorter than the String. For more encoding control, use
766 * {@link java.nio.charset.CharsetEncoder}. Unsupported characters get
767 * replaced by an encoding specific byte.
769 * @return the resulting byte array, or null on a problem
770 * @since 1.1
772 public byte[] getBytes()
774 try
776 return getBytes(System.getProperty("file.encoding"));
777 } catch(Exception e) {
778 // XXX - Throw an error here?
779 // For now, default to the 'safe' encoding.
780 byte[] bytes = new byte[count];
781 for(int i=0;i<count;i++)
782 bytes[i] = (byte)((value[offset+i] <= 0xFF)?
783 value[offset+i]:'?');
784 return bytes;
789 * Predicate which compares anObject to this. This is true only for Strings
790 * with the same character sequence.
792 * @param anObject the object to compare
793 * @return true if anObject is semantically equal to this
794 * @see #compareTo(String)
795 * @see #equalsIgnoreCase(String)
797 public boolean equals(Object anObject)
799 if (! (anObject instanceof String))
800 return false;
801 String str2 = (String) anObject;
802 if (count != str2.count)
803 return false;
804 if (value == str2.value && offset == str2.offset)
805 return true;
806 int i = count;
807 int x = offset;
808 int y = str2.offset;
809 while (--i >= 0)
810 if (value[x++] != str2.value[y++])
811 return false;
812 return true;
816 * Compares the given StringBuffer to this String. This is true if the
817 * StringBuffer has the same content as this String at this moment.
819 * @param buffer the StringBuffer to compare to
820 * @return true if StringBuffer has the same character sequence
821 * @throws NullPointerException if the given StringBuffer is null
822 * @since 1.4
824 public boolean contentEquals(StringBuffer buffer)
826 synchronized (buffer)
828 if (count != buffer.count)
829 return false;
830 if (value == buffer.value)
831 return true; // Possible if shared.
832 int i = count;
833 int x = offset + count;
834 while (--i >= 0)
835 if (value[--x] != buffer.value[i])
836 return false;
837 return true;
842 * Compares the given CharSequence to this String. This is true if
843 * the CharSequence has the same content as this String at this
844 * moment.
846 * @param seq the CharSequence to compare to
847 * @return true if CharSequence has the same character sequence
848 * @throws NullPointerException if the given CharSequence is null
849 * @since 1.5
851 public boolean contentEquals(CharSequence seq)
853 if (seq.length() != count)
854 return false;
855 for (int i = 0; i < count; ++i)
856 if (value[offset + i] != seq.charAt(i))
857 return false;
858 return true;
862 * Compares a String to this String, ignoring case. This does not handle
863 * multi-character capitalization exceptions; instead the comparison is
864 * made on a character-by-character basis, and is true if:<br><ul>
865 * <li><code>c1 == c2</code></li>
866 * <li><code>Character.toUpperCase(c1)
867 * == Character.toUpperCase(c2)</code></li>
868 * <li><code>Character.toLowerCase(c1)
869 * == Character.toLowerCase(c2)</code></li>
870 * </ul>
872 * @param anotherString String to compare to this String
873 * @return true if anotherString is equal, ignoring case
874 * @see #equals(Object)
875 * @see Character#toUpperCase(char)
876 * @see Character#toLowerCase(char)
878 public boolean equalsIgnoreCase(String anotherString)
880 if (anotherString == null || count != anotherString.count)
881 return false;
882 int i = count;
883 int x = offset;
884 int y = anotherString.offset;
885 while (--i >= 0)
887 char c1 = value[x++];
888 char c2 = anotherString.value[y++];
889 // Note that checking c1 != c2 is redundant, but avoids method calls.
890 if (c1 != c2
891 && Character.toUpperCase(c1) != Character.toUpperCase(c2)
892 && Character.toLowerCase(c1) != Character.toLowerCase(c2))
893 return false;
895 return true;
899 * Compares this String and another String (case sensitive,
900 * lexicographically). The result is less than 0 if this string sorts
901 * before the other, 0 if they are equal, and greater than 0 otherwise.
902 * After any common starting sequence is skipped, the result is
903 * <code>this.charAt(k) - anotherString.charAt(k)</code> if both strings
904 * have characters remaining, or
905 * <code>this.length() - anotherString.length()</code> if one string is
906 * a subsequence of the other.
908 * @param anotherString the String to compare against
909 * @return the comparison
910 * @throws NullPointerException if anotherString is null
912 public int compareTo(String anotherString)
914 int i = Math.min(count, anotherString.count);
915 int x = offset;
916 int y = anotherString.offset;
917 while (--i >= 0)
919 int result = value[x++] - anotherString.value[y++];
920 if (result != 0)
921 return result;
923 return count - anotherString.count;
927 * Compares this String and another String (case insensitive). This
928 * comparison is <em>similar</em> to equalsIgnoreCase, in that it ignores
929 * locale and multi-characater capitalization, and compares characters
930 * after performing
931 * <code>Character.toLowerCase(Character.toUpperCase(c))</code> on each
932 * character of the string. This is unsatisfactory for locale-based
933 * comparison, in which case you should use {@link java.text.Collator}.
935 * @param str the string to compare against
936 * @return the comparison
937 * @see Collator#compare(String, String)
938 * @since 1.2
940 public int compareToIgnoreCase(String str)
942 int i = Math.min(count, str.count);
943 int x = offset;
944 int y = str.offset;
945 while (--i >= 0)
947 int result = Character.toLowerCase(Character.toUpperCase(value[x++]))
948 - Character.toLowerCase(Character.toUpperCase(str.value[y++]));
949 if (result != 0)
950 return result;
952 return count - str.count;
956 * Predicate which determines if this String matches another String
957 * starting at a specified offset for each String and continuing
958 * for a specified length. Indices out of bounds are harmless, and give
959 * a false result.
961 * @param toffset index to start comparison at for this String
962 * @param other String to compare region to this String
963 * @param ooffset index to start comparison at for other
964 * @param len number of characters to compare
965 * @return true if regions match (case sensitive)
966 * @throws NullPointerException if other is null
968 public boolean regionMatches(int toffset, String other, int ooffset, int len)
970 return regionMatches(false, toffset, other, ooffset, len);
974 * Predicate which determines if this String matches another String
975 * starting at a specified offset for each String and continuing
976 * for a specified length, optionally ignoring case. Indices out of bounds
977 * are harmless, and give a false result. Case comparisons are based on
978 * <code>Character.toLowerCase()</code> and
979 * <code>Character.toUpperCase()</code>, not on multi-character
980 * capitalization expansions.
982 * @param ignoreCase true if case should be ignored in comparision
983 * @param toffset index to start comparison at for this String
984 * @param other String to compare region to this String
985 * @param ooffset index to start comparison at for other
986 * @param len number of characters to compare
987 * @return true if regions match, false otherwise
988 * @throws NullPointerException if other is null
990 public boolean regionMatches(boolean ignoreCase, int toffset,
991 String other, int ooffset, int len)
993 if (toffset < 0 || ooffset < 0 || toffset + len > count
994 || ooffset + len > other.count)
995 return false;
996 toffset += offset;
997 ooffset += other.offset;
998 while (--len >= 0)
1000 char c1 = value[toffset++];
1001 char c2 = other.value[ooffset++];
1002 // Note that checking c1 != c2 is redundant when ignoreCase is true,
1003 // but it avoids method calls.
1004 if (c1 != c2
1005 && (! ignoreCase
1006 || (Character.toLowerCase(c1) != Character.toLowerCase(c2)
1007 && (Character.toUpperCase(c1)
1008 != Character.toUpperCase(c2)))))
1009 return false;
1011 return true;
1015 * Predicate which determines if this String contains the given prefix,
1016 * beginning comparison at toffset. The result is false if toffset is
1017 * negative or greater than this.length(), otherwise it is the same as
1018 * <code>this.substring(toffset).startsWith(prefix)</code>.
1020 * @param prefix String to compare
1021 * @param toffset offset for this String where comparison starts
1022 * @return true if this String starts with prefix
1023 * @throws NullPointerException if prefix is null
1024 * @see #regionMatches(boolean, int, String, int, int)
1026 public boolean startsWith(String prefix, int toffset)
1028 return regionMatches(false, toffset, prefix, 0, prefix.count);
1032 * Predicate which determines if this String starts with a given prefix.
1033 * If the prefix is an empty String, true is returned.
1035 * @param prefix String to compare
1036 * @return true if this String starts with the prefix
1037 * @throws NullPointerException if prefix is null
1038 * @see #startsWith(String, int)
1040 public boolean startsWith(String prefix)
1042 return regionMatches(false, 0, prefix, 0, prefix.count);
1046 * Predicate which determines if this String ends with a given suffix.
1047 * If the suffix is an empty String, true is returned.
1049 * @param suffix String to compare
1050 * @return true if this String ends with the suffix
1051 * @throws NullPointerException if suffix is null
1052 * @see #regionMatches(boolean, int, String, int, int)
1054 public boolean endsWith(String suffix)
1056 return regionMatches(false, count - suffix.count, suffix, 0, suffix.count);
1060 * Computes the hashcode for this String. This is done with int arithmetic,
1061 * where ** represents exponentiation, by this formula:<br>
1062 * <code>s[0]*31**(n-1) + s[1]*31**(n-2) + ... + s[n-1]</code>.
1064 * @return hashcode value of this String
1066 public int hashCode()
1068 if (cachedHashCode != 0)
1069 return cachedHashCode;
1071 // Compute the hash code using a local variable to be reentrant.
1072 int hashCode = 0;
1073 int limit = count + offset;
1074 for (int i = offset; i < limit; i++)
1075 hashCode = hashCode * 31 + value[i];
1076 return cachedHashCode = hashCode;
1080 * Finds the first instance of a character in this String.
1082 * @param ch character to find
1083 * @return location (base 0) of the character, or -1 if not found
1085 public int indexOf(int ch)
1087 return indexOf(ch, 0);
1091 * Finds the first instance of a character in this String, starting at
1092 * a given index. If starting index is less than 0, the search
1093 * starts at the beginning of this String. If the starting index
1094 * is greater than the length of this String, -1 is returned.
1096 * @param ch character to find
1097 * @param fromIndex index to start the search
1098 * @return location (base 0) of the character, or -1 if not found
1100 public int indexOf(int ch, int fromIndex)
1102 if ((char) ch != ch)
1103 return -1;
1104 if (fromIndex < 0)
1105 fromIndex = 0;
1106 int i = fromIndex + offset;
1107 for ( ; fromIndex < count; fromIndex++)
1108 if (value[i++] == ch)
1109 return fromIndex;
1110 return -1;
1114 * Finds the last instance of a character in this String.
1116 * @param ch character to find
1117 * @return location (base 0) of the character, or -1 if not found
1119 public int lastIndexOf(int ch)
1121 return lastIndexOf(ch, count - 1);
1125 * Finds the last instance of a character in this String, starting at
1126 * a given index. If starting index is greater than the maximum valid
1127 * index, then the search begins at the end of this String. If the
1128 * starting index is less than zero, -1 is returned.
1130 * @param ch character to find
1131 * @param fromIndex index to start the search
1132 * @return location (base 0) of the character, or -1 if not found
1134 public int lastIndexOf(int ch, int fromIndex)
1136 if ((char) ch != ch)
1137 return -1;
1138 if (fromIndex >= count)
1139 fromIndex = count - 1;
1140 int i = fromIndex + offset;
1141 for ( ; fromIndex >= 0; fromIndex--)
1142 if (value[i--] == ch)
1143 return fromIndex;
1144 return -1;
1148 * Finds the first instance of a String in this String.
1150 * @param str String to find
1151 * @return location (base 0) of the String, or -1 if not found
1152 * @throws NullPointerException if str is null
1154 public int indexOf(String str)
1156 return indexOf(str, 0);
1160 * Finds the first instance of a String in this String, starting at
1161 * a given index. If starting index is less than 0, the search
1162 * starts at the beginning of this String. If the starting index
1163 * is greater than the length of this String, -1 is returned.
1165 * @param str String to find
1166 * @param fromIndex index to start the search
1167 * @return location (base 0) of the String, or -1 if not found
1168 * @throws NullPointerException if str is null
1170 public int indexOf(String str, int fromIndex)
1172 if (fromIndex < 0)
1173 fromIndex = 0;
1174 int limit = count - str.count;
1175 for ( ; fromIndex <= limit; fromIndex++)
1176 if (regionMatches(fromIndex, str, 0, str.count))
1177 return fromIndex;
1178 return -1;
1182 * Finds the last instance of a String in this String.
1184 * @param str String to find
1185 * @return location (base 0) of the String, or -1 if not found
1186 * @throws NullPointerException if str is null
1188 public int lastIndexOf(String str)
1190 return lastIndexOf(str, count - str.count);
1194 * Finds the last instance of a String in this String, starting at
1195 * a given index. If starting index is greater than the maximum valid
1196 * index, then the search begins at the end of this String. If the
1197 * starting index is less than zero, -1 is returned.
1199 * @param str String to find
1200 * @param fromIndex index to start the search
1201 * @return location (base 0) of the String, or -1 if not found
1202 * @throws NullPointerException if str is null
1204 public int lastIndexOf(String str, int fromIndex)
1206 fromIndex = Math.min(fromIndex, count - str.count);
1207 for ( ; fromIndex >= 0; fromIndex--)
1208 if (regionMatches(fromIndex, str, 0, str.count))
1209 return fromIndex;
1210 return -1;
1214 * Creates a substring of this String, starting at a specified index
1215 * and ending at the end of this String.
1217 * @param begin index to start substring (base 0)
1218 * @return new String which is a substring of this String
1219 * @throws IndexOutOfBoundsException if begin &lt; 0 || begin &gt; length()
1220 * (while unspecified, this is a StringIndexOutOfBoundsException)
1222 public String substring(int begin)
1224 return substring(begin, count);
1228 * Creates a substring of this String, starting at a specified index
1229 * and ending at one character before a specified index.
1231 * @param beginIndex index to start substring (inclusive, base 0)
1232 * @param endIndex index to end at (exclusive)
1233 * @return new String which is a substring of this String
1234 * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1235 * || begin &gt; end (while unspecified, this is a
1236 * StringIndexOutOfBoundsException)
1238 public String substring(int beginIndex, int endIndex)
1240 if (beginIndex < 0 || endIndex > count || beginIndex > endIndex)
1241 throw new StringIndexOutOfBoundsException();
1242 if (beginIndex == 0 && endIndex == count)
1243 return this;
1244 int len = endIndex - beginIndex;
1245 // Package constructor avoids an array copy.
1246 return new String(value, beginIndex + offset, len,
1247 (len << 2) >= value.length);
1251 * Creates a substring of this String, starting at a specified index
1252 * and ending at one character before a specified index. This behaves like
1253 * <code>substring(begin, end)</code>.
1255 * @param begin index to start substring (inclusive, base 0)
1256 * @param end index to end at (exclusive)
1257 * @return new String which is a substring of this String
1258 * @throws IndexOutOfBoundsException if begin &lt; 0 || end &gt; length()
1259 * || begin &gt; end
1260 * @since 1.4
1262 public CharSequence subSequence(int begin, int end)
1264 return substring(begin, end);
1268 * Concatenates a String to this String. This results in a new string unless
1269 * one of the two originals is "".
1271 * @param str String to append to this String
1272 * @return newly concatenated String
1273 * @throws NullPointerException if str is null
1275 public String concat(String str)
1277 if (str.count == 0)
1278 return this;
1279 if (count == 0)
1280 return str;
1281 char[] newStr = new char[count + str.count];
1282 VMSystem.arraycopy(value, offset, newStr, 0, count);
1283 VMSystem.arraycopy(str.value, str.offset, newStr, count, str.count);
1284 // Package constructor avoids an array copy.
1285 return new String(newStr, 0, newStr.length, true);
1289 * Replaces every instance of a character in this String with a new
1290 * character. If no replacements occur, this is returned.
1292 * @param oldChar the old character to replace
1293 * @param newChar the new character
1294 * @return new String with all instances of oldChar replaced with newChar
1296 public String replace(char oldChar, char newChar)
1298 if (oldChar == newChar)
1299 return this;
1300 int i = count;
1301 int x = offset - 1;
1302 while (--i >= 0)
1303 if (value[++x] == oldChar)
1304 break;
1305 if (i < 0)
1306 return this;
1307 char[] newStr = toCharArray();
1308 newStr[x - offset] = newChar;
1309 while (--i >= 0)
1310 if (value[++x] == oldChar)
1311 newStr[x - offset] = newChar;
1312 // Package constructor avoids an array copy.
1313 return new String(newStr, 0, count, true);
1317 * Test if this String matches a regular expression. This is shorthand for
1318 * <code>{@link Pattern}.matches(regex, this)</code>.
1320 * @param regex the pattern to match
1321 * @return true if the pattern matches
1322 * @throws NullPointerException if regex is null
1323 * @throws PatternSyntaxException if regex is invalid
1324 * @see Pattern#matches(String, CharSequence)
1325 * @since 1.4
1327 public boolean matches(String regex)
1329 return Pattern.matches(regex, this);
1333 * Replaces the first substring match of the regular expression with a
1334 * given replacement. This is shorthand for <code>{@link Pattern}
1335 * .compile(regex).matcher(this).replaceFirst(replacement)</code>.
1337 * @param regex the pattern to match
1338 * @param replacement the replacement string
1339 * @return the modified string
1340 * @throws NullPointerException if regex or replacement is null
1341 * @throws PatternSyntaxException if regex is invalid
1342 * @see #replaceAll(String, String)
1343 * @see Pattern#compile(String)
1344 * @see Pattern#matcher(CharSequence)
1345 * @see Matcher#replaceFirst(String)
1346 * @since 1.4
1348 public String replaceFirst(String regex, String replacement)
1350 return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
1354 * Replaces all matching substrings of the regular expression with a
1355 * given replacement. This is shorthand for <code>{@link Pattern}
1356 * .compile(regex).matcher(this).replaceAll(replacement)</code>.
1358 * @param regex the pattern to match
1359 * @param replacement the replacement string
1360 * @return the modified string
1361 * @throws NullPointerException if regex or replacement is null
1362 * @throws PatternSyntaxException if regex is invalid
1363 * @see #replaceFirst(String, String)
1364 * @see Pattern#compile(String)
1365 * @see Pattern#matcher(CharSequence)
1366 * @see Matcher#replaceAll(String)
1367 * @since 1.4
1369 public String replaceAll(String regex, String replacement)
1371 return Pattern.compile(regex).matcher(this).replaceAll(replacement);
1375 * Split this string around the matches of a regular expression. Each
1376 * element of the returned array is the largest block of characters not
1377 * terminated by the regular expression, in the order the matches are found.
1379 * <p>The limit affects the length of the array. If it is positive, the
1380 * array will contain at most n elements (n - 1 pattern matches). If
1381 * negative, the array length is unlimited, but there can be trailing empty
1382 * entries. if 0, the array length is unlimited, and trailing empty entries
1383 * are discarded.
1385 * <p>For example, splitting "boo:and:foo" yields:<br>
1386 * <table border=0>
1387 * <th><td>Regex</td> <td>Limit</td> <td>Result</td></th>
1388 * <tr><td>":"</td> <td>2</td> <td>{ "boo", "and:foo" }</td></tr>
1389 * <tr><td>":"</td> <td>t</td> <td>{ "boo", "and", "foo" }</td></tr>
1390 * <tr><td>":"</td> <td>-2</td> <td>{ "boo", "and", "foo" }</td></tr>
1391 * <tr><td>"o"</td> <td>5</td> <td>{ "b", "", ":and:f", "", "" }</td></tr>
1392 * <tr><td>"o"</td> <td>-2</td> <td>{ "b", "", ":and:f", "", "" }</td></tr>
1393 * <tr><td>"o"</td> <td>0</td> <td>{ "b", "", ":and:f" }</td></tr>
1394 * </table>
1396 * <p>This is shorthand for
1397 * <code>{@link Pattern}.compile(regex).split(this, limit)</code>.
1399 * @param regex the pattern to match
1400 * @param limit the limit threshold
1401 * @return the array of split strings
1402 * @throws NullPointerException if regex or replacement is null
1403 * @throws PatternSyntaxException if regex is invalid
1404 * @see Pattern#compile(String)
1405 * @see Pattern#split(CharSequence, int)
1406 * @since 1.4
1408 public String[] split(String regex, int limit)
1410 return Pattern.compile(regex).split(this, limit);
1414 * Split this string around the matches of a regular expression. Each
1415 * element of the returned array is the largest block of characters not
1416 * terminated by the regular expression, in the order the matches are found.
1417 * The array length is unlimited, and trailing empty entries are discarded,
1418 * as though calling <code>split(regex, 0)</code>.
1420 * @param regex the pattern to match
1421 * @return the array of split strings
1422 * @throws NullPointerException if regex or replacement is null
1423 * @throws PatternSyntaxException if regex is invalid
1424 * @see #split(String, int)
1425 * @see Pattern#compile(String)
1426 * @see Pattern#split(CharSequence, int)
1427 * @since 1.4
1429 public String[] split(String regex)
1431 return Pattern.compile(regex).split(this, 0);
1435 * Convert string to lower case for a Turkish locale that requires special
1436 * handling of '\u0049'
1438 private String toLowerCaseTurkish()
1440 // First, see if the current string is already lower case.
1441 int i = count;
1442 int x = offset - 1;
1443 while (--i >= 0)
1445 char ch = value[++x];
1446 if ((ch == '\u0049') || ch != Character.toLowerCase(ch))
1447 break;
1449 if (i < 0)
1450 return this;
1452 // Now we perform the conversion. Fortunately, there are no multi-character
1453 // lowercase expansions in Unicode 3.0.0.
1454 char[] newStr = new char[count];
1455 VMSystem.arraycopy(value, offset, newStr, 0, x - offset);
1458 char ch = value[x];
1459 // Hardcoded special case.
1460 if (ch != '\u0049')
1462 newStr[x - offset] = Character.toLowerCase(ch);
1464 else
1466 newStr[x - offset] = '\u0131';
1468 x++;
1470 while (--i >= 0);
1471 // Package constructor avoids an array copy.
1472 return new String(newStr, 0, count, true);
1476 * Lowercases this String according to a particular locale. This uses
1477 * Unicode's special case mappings, as applied to the given Locale, so the
1478 * resulting string may be a different length.
1480 * @param loc locale to use
1481 * @return new lowercased String, or this if no characters were lowercased
1482 * @throws NullPointerException if loc is null
1483 * @see #toUpperCase(Locale)
1484 * @since 1.1
1486 public String toLowerCase(Locale loc)
1488 // First, see if the current string is already lower case.
1490 // Is loc turkish? String equality test is ok as Locale.language is interned
1491 if ("tr" == loc.getLanguage())
1493 return toLowerCaseTurkish();
1495 else
1497 int i = count;
1498 int x = offset - 1;
1499 while (--i >= 0)
1501 char ch = value[++x];
1502 if (ch != Character.toLowerCase(ch))
1503 break;
1505 if (i < 0)
1506 return this;
1508 // Now we perform the conversion. Fortunately, there are no
1509 // multi-character lowercase expansions in Unicode 3.0.0.
1510 char[] newStr = new char[count];
1511 VMSystem.arraycopy(value, offset, newStr, 0, x - offset);
1514 char ch = value[x];
1515 // Hardcoded special case.
1516 newStr[x - offset] = Character.toLowerCase(ch);
1517 x++;
1519 while (--i >= 0);
1520 // Package constructor avoids an array copy.
1521 return new String(newStr, 0, count, true);
1526 * Lowercases this String. This uses Unicode's special case mappings, as
1527 * applied to the platform's default Locale, so the resulting string may
1528 * be a different length.
1530 * @return new lowercased String, or this if no characters were lowercased
1531 * @see #toLowerCase(Locale)
1532 * @see #toUpperCase()
1534 public String toLowerCase()
1536 return toLowerCase(Locale.getDefault());
1540 * Uppercase this string for a Turkish locale
1542 private String toUpperCaseTurkish()
1544 // First, see how many characters we have to grow by, as well as if the
1545 // current string is already upper case.
1546 int expand = 0;
1547 boolean unchanged = true;
1548 int i = count;
1549 int x = i + offset;
1550 while (--i >= 0)
1552 char ch = value[--x];
1553 expand += upperCaseExpansion(ch);
1554 unchanged = (unchanged && expand == 0
1555 && ch != '\u0069'
1556 && ch == Character.toUpperCase(ch));
1558 if (unchanged)
1559 return this;
1561 // Now we perform the conversion.
1562 i = count;
1563 if (expand == 0)
1565 char[] newStr = new char[count];
1566 VMSystem.arraycopy(value, offset, newStr, 0, count - (x - offset));
1567 while (--i >= 0)
1569 char ch = value[x];
1570 // Hardcoded special case.
1571 if (ch != '\u0069')
1573 newStr[x - offset] = Character.toUpperCase(ch);
1575 else
1577 newStr[x - offset] = '\u0130';
1579 x++;
1581 // Package constructor avoids an array copy.
1582 return new String(newStr, 0, count, true);
1585 // Expansion is necessary.
1586 char[] newStr = new char[count + expand];
1587 int j = 0;
1588 while (--i >= 0)
1590 char ch = value[x++];
1591 // Hardcoded special case.
1592 if (ch == '\u0069')
1594 newStr[j++] = '\u0130';
1595 continue;
1597 expand = upperCaseExpansion(ch);
1598 if (expand > 0)
1600 int index = upperCaseIndex(ch);
1601 while (expand-- >= 0)
1602 newStr[j++] = upperExpand[index++];
1604 else
1605 newStr[j++] = Character.toUpperCase(ch);
1607 // Package constructor avoids an array copy.
1608 return new String(newStr, 0, newStr.length, true);
1612 * Uppercases this String according to a particular locale. This uses
1613 * Unicode's special case mappings, as applied to the given Locale, so the
1614 * resulting string may be a different length.
1616 * @param loc locale to use
1617 * @return new uppercased String, or this if no characters were uppercased
1618 * @throws NullPointerException if loc is null
1619 * @see #toLowerCase(Locale)
1620 * @since 1.1
1622 public String toUpperCase(Locale loc)
1624 // First, see how many characters we have to grow by, as well as if the
1625 // current string is already upper case.
1627 // Is loc turkish? String equality test is ok as Locale.language is interned
1628 if ("tr" == loc.getLanguage())
1630 return toUpperCaseTurkish();
1632 else
1634 int expand = 0;
1635 boolean unchanged = true;
1636 int i = count;
1637 int x = i + offset;
1638 while (--i >= 0)
1640 char ch = value[--x];
1641 expand += upperCaseExpansion(ch);
1642 unchanged = (unchanged && expand == 0
1643 && ch == Character.toUpperCase(ch));
1645 if (unchanged)
1646 return this;
1648 // Now we perform the conversion.
1649 i = count;
1650 if (expand == 0)
1652 char[] newStr = new char[count];
1653 VMSystem.arraycopy(value, offset, newStr, 0, count - (x - offset));
1654 while (--i >= 0)
1656 char ch = value[x];
1657 newStr[x - offset] = Character.toUpperCase(ch);
1658 x++;
1660 // Package constructor avoids an array copy.
1661 return new String(newStr, 0, count, true);
1664 // Expansion is necessary.
1665 char[] newStr = new char[count + expand];
1666 int j = 0;
1667 while (--i >= 0)
1669 char ch = value[x++];
1670 expand = upperCaseExpansion(ch);
1671 if (expand > 0)
1673 int index = upperCaseIndex(ch);
1674 while (expand-- >= 0)
1675 newStr[j++] = upperExpand[index++];
1677 else
1678 newStr[j++] = Character.toUpperCase(ch);
1680 // Package constructor avoids an array copy.
1681 return new String(newStr, 0, newStr.length, true);
1685 * Uppercases this String. This uses Unicode's special case mappings, as
1686 * applied to the platform's default Locale, so the resulting string may
1687 * be a different length.
1689 * @return new uppercased String, or this if no characters were uppercased
1690 * @see #toUpperCase(Locale)
1691 * @see #toLowerCase()
1693 public String toUpperCase()
1695 return toUpperCase(Locale.getDefault());
1699 * Trims all characters less than or equal to <code>'\u0020'</code>
1700 * (<code>' '</code>) from the beginning and end of this String. This
1701 * includes many, but not all, ASCII control characters, and all
1702 * {@link Character#isWhitespace(char)}.
1704 * @return new trimmed String, or this if nothing trimmed
1706 public String trim()
1708 int limit = count + offset;
1709 if (count == 0 || (value[offset] > '\u0020'
1710 && value[limit - 1] > '\u0020'))
1711 return this;
1712 int begin = offset;
1714 if (begin == limit)
1715 return "";
1716 while (value[begin++] <= '\u0020');
1718 int end = limit;
1719 while (value[--end] <= '\u0020')
1721 return substring(begin - offset - 1, end - offset + 1);
1725 * Returns this, as it is already a String!
1727 * @return this
1729 public String toString()
1731 return this;
1735 * Copies the contents of this String into a character array. Subsequent
1736 * changes to the array do not affect the String.
1738 * @return character array copying the String
1740 public char[] toCharArray()
1742 char[] copy = new char[count];
1743 VMSystem.arraycopy(value, offset, copy, 0, count);
1744 return copy;
1748 * Returns a String representation of an Object. This is "null" if the
1749 * object is null, otherwise it is <code>obj.toString()</code> (which
1750 * can be null).
1752 * @param obj the Object
1753 * @return the string conversion of obj
1755 public static String valueOf(Object obj)
1757 return obj == null ? "null" : obj.toString();
1761 * Returns a String representation of a character array. Subsequent
1762 * changes to the array do not affect the String.
1764 * @param data the character array
1765 * @return a String containing the same character sequence as data
1766 * @throws NullPointerException if data is null
1767 * @see #valueOf(char[], int, int)
1768 * @see #String(char[])
1770 public static String valueOf(char[] data)
1772 return valueOf (data, 0, data.length);
1776 * Returns a String representing the character sequence of the char array,
1777 * starting at the specified offset, and copying chars up to the specified
1778 * count. Subsequent changes to the array do not affect the String.
1780 * @param data character array
1781 * @param offset position (base 0) to start copying out of data
1782 * @param count the number of characters from data to copy
1783 * @return String containing the chars from data[offset..offset+count]
1784 * @throws NullPointerException if data is null
1785 * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1786 * || offset + count &gt; data.length)
1787 * (while unspecified, this is a StringIndexOutOfBoundsException)
1788 * @see #String(char[], int, int)
1790 public static String valueOf(char[] data, int offset, int count)
1792 return new String(data, offset, count, false);
1796 * Returns a String representing the character sequence of the char array,
1797 * starting at the specified offset, and copying chars up to the specified
1798 * count. Subsequent changes to the array do not affect the String.
1800 * @param data character array
1801 * @param offset position (base 0) to start copying out of data
1802 * @param count the number of characters from data to copy
1803 * @return String containing the chars from data[offset..offset+count]
1804 * @throws NullPointerException if data is null
1805 * @throws IndexOutOfBoundsException if (offset &lt; 0 || count &lt; 0
1806 * || offset + count &lt; 0 (overflow)
1807 * || offset + count &lt; 0 (overflow)
1808 * || offset + count &gt; data.length)
1809 * (while unspecified, this is a StringIndexOutOfBoundsException)
1810 * @see #String(char[], int, int)
1812 public static String copyValueOf(char[] data, int offset, int count)
1814 return new String(data, offset, count, false);
1818 * Returns a String representation of a character array. Subsequent
1819 * changes to the array do not affect the String.
1821 * @param data the character array
1822 * @return a String containing the same character sequence as data
1823 * @throws NullPointerException if data is null
1824 * @see #copyValueOf(char[], int, int)
1825 * @see #String(char[])
1827 public static String copyValueOf(char[] data)
1829 return copyValueOf (data, 0, data.length);
1833 * Returns a String representing a boolean.
1835 * @param b the boolean
1836 * @return "true" if b is true, else "false"
1838 public static String valueOf(boolean b)
1840 return b ? "true" : "false";
1844 * Returns a String representing a character.
1846 * @param c the character
1847 * @return String containing the single character c
1849 public static String valueOf(char c)
1851 // Package constructor avoids an array copy.
1852 return new String(new char[] { c }, 0, 1, true);
1856 * Returns a String representing an integer.
1858 * @param i the integer
1859 * @return String containing the integer in base 10
1860 * @see Integer#toString(int)
1862 public static String valueOf(int i)
1864 // See Integer to understand why we call the two-arg variant.
1865 return Integer.toString(i, 10);
1869 * Returns a String representing a long.
1871 * @param l the long
1872 * @return String containing the long in base 10
1873 * @see Long#toString(long)
1875 public static String valueOf(long l)
1877 return Long.toString(l);
1881 * Returns a String representing a float.
1883 * @param f the float
1884 * @return String containing the float
1885 * @see Float#toString(float)
1887 public static String valueOf(float f)
1889 return Float.toString(f);
1893 * Returns a String representing a double.
1895 * @param d the double
1896 * @return String containing the double
1897 * @see Double#toString(double)
1899 public static String valueOf(double d)
1901 return Double.toString(d);
1905 /** @since 1.5 */
1906 public static String format(Locale locale, String format, Object... args)
1908 Formatter f = new Formatter(locale);
1909 return f.format(format, args).toString();
1912 /** @since 1.5 */
1913 public static String format(String format, Object... args)
1915 return format(Locale.getDefault(), format, args);
1919 * If two Strings are considered equal, by the equals() method,
1920 * then intern() will return the same String instance. ie.
1921 * if (s1.equals(s2)) then (s1.intern() == s2.intern()).
1922 * All string literals and string-valued constant expressions
1923 * are already interned.
1925 * @return the interned String
1927 public String intern()
1929 return VMString.intern(this);
1933 * Return the number of code points between two indices in the
1934 * <code>String</code>. An unpaired surrogate counts as a
1935 * code point for this purpose. Characters outside the indicated
1936 * range are not examined, even if the range ends in the middle of a
1937 * surrogate pair.
1939 * @param start the starting index
1940 * @param end one past the ending index
1941 * @return the number of code points
1942 * @since 1.5
1944 public synchronized int codePointCount(int start, int end)
1946 if (start < 0 || end > count || start > end)
1947 throw new StringIndexOutOfBoundsException();
1949 start += offset;
1950 end += offset;
1951 int count = 0;
1952 while (start < end)
1954 char base = value[start];
1955 if (base < Character.MIN_HIGH_SURROGATE
1956 || base > Character.MAX_HIGH_SURROGATE
1957 || start == end
1958 || start == count
1959 || value[start + 1] < Character.MIN_LOW_SURROGATE
1960 || value[start + 1] > Character.MAX_LOW_SURROGATE)
1962 // Nothing.
1964 else
1966 // Surrogate pair.
1967 ++start;
1969 ++start;
1970 ++count;
1972 return count;
1976 * Helper function used to detect which characters have a multi-character
1977 * uppercase expansion. Note that this is only used in locations which
1978 * track one-to-many capitalization (java.lang.Character does not do this).
1979 * As of Unicode 3.0.0, the result is limited in the range 0 to 2, as the
1980 * longest uppercase expansion is three characters (a growth of 2 from the
1981 * lowercase character).
1983 * @param ch the char to check
1984 * @return the number of characters to add when converting to uppercase
1985 * @see CharData#DIRECTION
1986 * @see CharData#UPPER_SPECIAL
1987 * @see #toUpperCase(Locale)
1989 private static int upperCaseExpansion(char ch)
1991 return Character.direction[0][Character.readCodePoint((int)ch) >> 7] & 3;
1995 * Helper function used to locate the offset in upperExpand given a
1996 * character with a multi-character expansion. The binary search is
1997 * optimized under the assumption that this method will only be called on
1998 * characters which exist in upperSpecial.
2000 * @param ch the char to check
2001 * @return the index where its expansion begins
2002 * @see CharData#UPPER_SPECIAL
2003 * @see CharData#UPPER_EXPAND
2004 * @see #toUpperCase(Locale)
2006 private static int upperCaseIndex(char ch)
2008 // Simple binary search for the correct character.
2009 int low = 0;
2010 int hi = upperSpecial.length - 2;
2011 int mid = ((low + hi) >> 2) << 1;
2012 char c = upperSpecial[mid];
2013 while (ch != c)
2015 if (ch < c)
2016 hi = mid - 2;
2017 else
2018 low = mid + 2;
2019 mid = ((low + hi) >> 2) << 1;
2020 c = upperSpecial[mid];
2022 return upperSpecial[mid + 1];
2026 * Returns the value array of the given string if it is zero based or a
2027 * copy of it that is zero based (stripping offset and making length equal
2028 * to count). Used for accessing the char[]s of gnu.java.lang.CharData.
2029 * Package private for use in Character.
2031 static char[] zeroBasedStringValue(String s)
2033 char[] value;
2035 if (s.offset == 0 && s.count == s.value.length)
2036 value = s.value;
2037 else
2039 int count = s.count;
2040 value = new char[count];
2041 VMSystem.arraycopy(s.value, s.offset, value, 0, count);
2044 return value;
2048 * Returns true iff this String contains the sequence of Characters
2049 * described in s.
2050 * @param s the CharSequence
2051 * @return true iff this String contains s
2053 * @since 1.5
2055 public boolean contains (CharSequence s)
2057 return this.indexOf(s.toString()) != -1;
2061 * Returns a string that is this string with all instances of the sequence
2062 * represented by <code>target</code> replaced by the sequence in
2063 * <code>replacement</code>.
2064 * @param target the sequence to be replaced
2065 * @param replacement the sequence used as the replacement
2066 * @return the string constructed as above
2068 public String replace (CharSequence target, CharSequence replacement)
2070 String targetString = target.toString();
2071 String replaceString = replacement.toString();
2072 int targetLength = target.length();
2073 int replaceLength = replacement.length();
2075 int startPos = this.indexOf(targetString);
2076 CPStringBuilder result = new CPStringBuilder(this);
2077 while (startPos != -1)
2079 // Replace the target with the replacement
2080 result.replace(startPos, startPos + targetLength, replaceString);
2082 // Search for a new occurrence of the target
2083 startPos = result.indexOf(targetString, startPos + replaceLength);
2085 return result.toString();
2089 * Return the index into this String that is offset from the given index by
2090 * <code>codePointOffset</code> code points.
2091 * @param index the index at which to start
2092 * @param codePointOffset the number of code points to offset
2093 * @return the index into this String that is <code>codePointOffset</code>
2094 * code points offset from <code>index</code>.
2096 * @throws IndexOutOfBoundsException if index is negative or larger than the
2097 * length of this string.
2098 * @throws IndexOutOfBoundsException if codePointOffset is positive and the
2099 * substring starting with index has fewer than codePointOffset code points.
2100 * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2101 * substring ending with index has fewer than (-codePointOffset) code points.
2102 * @since 1.5
2104 public int offsetByCodePoints(int index, int codePointOffset)
2106 if (index < 0 || index > count)
2107 throw new IndexOutOfBoundsException();
2109 return Character.offsetByCodePoints(value, offset, count, offset + index,
2110 codePointOffset);
2114 * Returns true if, and only if, {@link #length()}
2115 * is <code>0</code>.
2117 * @return true if the length of the string is zero.
2118 * @since 1.6
2120 public boolean isEmpty()
2122 return count == 0;