Merge from the pain train
[official-gcc.git] / libjava / java / text / CollationElementIterator.java
blob481b286c4797b018d58888835240eb3303498d57
1 /* CollationElementIterator.java -- Walks through collation elements
2 Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software Foundation
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.text;
41 import java.util.ArrayList;
43 /* Written using "Java Class Libraries", 2nd edition, plus online
44 * API docs for JDK 1.2 from http://www.javasoft.com.
45 * Status: Believed complete and correct to JDK 1.1.
48 /**
49 * This class walks through the character collation elements of a
50 * <code>String</code> as defined by the collation rules in an instance of
51 * <code>RuleBasedCollator</code>. There is no public constructor for
52 * this class. An instance is created by calling the
53 * <code>getCollationElementIterator</code> method on
54 * <code>RuleBasedCollator</code>.
56 * @author Aaron M. Renn (arenn@urbanophile.com)
57 * @author Tom Tromey (tromey@cygnus.com)
58 * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
60 public final class CollationElementIterator
62 /**
63 * This is a constant value that is returned to indicate that the end of
64 * the string was encountered.
66 public static final int NULLORDER = -1;
68 /**
69 * This is the RuleBasedCollator this object was created from.
71 RuleBasedCollator collator;
73 /**
74 * This is the String that is being iterated over.
76 String text;
78 /**
79 * This is the index into the collation decomposition where we are currently scanning.
81 int index;
83 /**
84 * This is the index into the String where we are currently scanning.
86 int textIndex;
88 /**
89 * Array containing the collation decomposition of the
90 * text given to the constructor.
92 private RuleBasedCollator.CollationElement[] text_decomposition;
94 /**
95 * Array containing the index of the specified block.
97 private int[] text_indexes;
99 /**
100 * This method initializes a new instance of <code>CollationElementIterator</code>
101 * to iterate over the specified <code>String</code> using the rules in the
102 * specified <code>RuleBasedCollator</code>.
104 * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
105 * @param text The <code>String</code> to iterate over.
107 CollationElementIterator(RuleBasedCollator collator, String text)
109 this.collator = collator;
111 setText (text);
114 RuleBasedCollator.CollationElement nextBlock()
116 if (index >= text_decomposition.length)
117 return null;
119 RuleBasedCollator.CollationElement e = text_decomposition[index];
121 textIndex = text_indexes[index+1];
123 index++;
125 return e;
128 RuleBasedCollator.CollationElement previousBlock()
130 if (index == 0)
131 return null;
133 index--;
134 RuleBasedCollator.CollationElement e = text_decomposition[index];
136 textIndex = text_indexes[index+1];
138 return e;
142 * This method returns the collation ordering value of the next character sequence
143 * in the string (it may be an extended character following collation rules).
144 * This method will return <code>NULLORDER</code> if the
145 * end of the string was reached.
147 * @return The collation ordering value.
149 public int next()
151 RuleBasedCollator.CollationElement e = nextBlock();
153 if (e == null)
154 return NULLORDER;
156 return e.getValue();
160 * This method returns the collation ordering value of the previous character
161 * in the string. This method will return <code>NULLORDER</code> if the
162 * beginning of the string was reached.
164 * @return The collation ordering value.
166 public int previous()
168 RuleBasedCollator.CollationElement e = previousBlock();
170 if (e == null)
171 return NULLORDER;
173 return e.getValue();
177 * This method returns the primary order value for the given collation
178 * value.
180 * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
182 * @return The primary order value of the specified collation value. This is the high 16 bits.
184 public static int primaryOrder(int order)
186 // From the JDK 1.2 spec.
187 return order >>> 16;
191 * This method resets the internal position pointer to read from the
192 * beginning of the <code>String</code> again.
194 public void reset()
196 index = 0;
197 textIndex = 0;
201 * This method returns the secondary order value for the given collation
202 * value.
204 * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
206 * @return The secondary order value of the specified collation value. This is the bits 8-15.
208 public static short secondaryOrder(int order)
210 // From the JDK 1.2 spec.
211 return (short) ((order >>> 8) & 255);
215 * This method returns the tertiary order value for the given collation
216 * value.
218 * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
220 * @return The tertiary order value of the specified collation value. This is the low eight bits.
222 public static short tertiaryOrder(int order)
224 // From the JDK 1.2 spec.
225 return (short) (order & 255);
229 * This method sets the <code>String</code> that it is iterating over
230 * to the specified <code>String</code>.
232 * @param text The new <code>String</code> to iterate over.
234 * @since 1.2
236 public void setText(String text)
238 int idx = 0;
239 int idx_idx = 0;
240 int alreadyExpanded = 0;
241 int idxToMove = 0;
243 this.text = text;
244 this.index = 0;
246 String work_text = text.intern();
248 ArrayList a_element = new ArrayList();
249 ArrayList a_idx = new ArrayList();
251 // Build element collection ordered as they come in "text".
252 while (idx < work_text.length())
254 String key, key_old;
256 Object object = null;
257 int p = 1;
259 // IMPROVE: use a TreeMap with a prefix-ordering rule.
260 key_old = key = null;
263 if (object != null)
264 key_old = key;
265 key = work_text.substring (idx, idx+p);
266 object = collator.prefix_tree.get (key);
267 if (object != null && idx < alreadyExpanded)
269 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
270 if (prefix.expansion != null &&
271 prefix.expansion.startsWith(work_text.substring(0, idx)))
273 object = null;
274 key = key_old;
277 p++;
279 while (idx+p <= work_text.length());
281 if (object == null)
282 key = key_old;
284 RuleBasedCollator.CollationElement prefix =
285 (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
288 * First case: There is no such sequence in the database.
289 * We will have to build one from the context.
291 if (prefix == null)
294 * We are dealing with sequences in an expansion. They
295 * are treated as accented characters (tertiary order).
297 if (alreadyExpanded > 0)
299 RuleBasedCollator.CollationElement e =
300 collator.getDefaultAccentedElement (work_text.charAt (idx));
302 a_element.add (e);
303 a_idx.add (new Integer(idx_idx));
304 idx++;
305 alreadyExpanded--;
306 if (alreadyExpanded == 0)
308 /* There is not any characters left in the expansion set.
309 * We can increase the pointer in the source string.
311 idx_idx += idxToMove;
312 idxToMove = 0;
314 else
315 idx_idx++;
317 else
319 /* This is a normal character. */
320 RuleBasedCollator.CollationElement e =
321 collator.getDefaultElement (work_text.charAt (idx));
322 Integer i_ref = new Integer(idx_idx);
324 /* Don't forget to mark it as a special sequence so the
325 * string can be ordered.
327 a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
328 a_idx.add (i_ref);
329 a_element.add (e);
330 a_idx.add (i_ref);
331 idx_idx++;
332 idx++;
334 continue;
338 * Second case: Here we have found a matching sequence.
339 * Here we have an expansion string prepend it to the "work text" and
340 * add the corresponding sorting element. We must also mark
342 if (prefix.expansion != null)
344 work_text = prefix.expansion
345 + work_text.substring (idx+prefix.key.length());
346 idx = 0;
347 a_element.add (prefix);
348 a_idx.add (new Integer(idx_idx));
349 if (alreadyExpanded == 0)
350 idxToMove = prefix.key.length();
351 alreadyExpanded += prefix.expansion.length()-prefix.key.length();
353 else
355 /* Third case: the simplest. We have got the prefix and it
356 * has not to be expanded.
358 a_element.add (prefix);
359 a_idx.add (new Integer(idx_idx));
360 idx += prefix.key.length();
361 /* If the sequence is in an expansion, we must decrease the
362 * counter.
364 if (alreadyExpanded > 0)
366 alreadyExpanded -= prefix.key.length();
367 if (alreadyExpanded == 0)
369 idx_idx += idxToMove;
370 idxToMove = 0;
373 else
374 idx_idx += prefix.key.length();
378 text_decomposition = (RuleBasedCollator.CollationElement[])
379 a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
380 text_indexes = new int[a_idx.size()+1];
381 for (int i = 0; i < a_idx.size(); i++)
383 text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
385 text_indexes[a_idx.size()] = text.length();
389 * This method sets the <code>String</code> that it is iterating over
390 * to the <code>String</code> represented by the specified
391 * <code>CharacterIterator</code>.
393 * @param source The <code>CharacterIterator</code> containing the new
394 * <code>String</code> to iterate over.
396 public void setText(CharacterIterator source)
398 StringBuffer expand = new StringBuffer();
400 // For now assume we read from the beginning of the string.
401 for (char c = source.first();
402 c != CharacterIterator.DONE;
403 c = source.next())
404 expand.append(c);
406 setText(expand.toString());
410 * This method returns the current offset into the <code>String</code>
411 * that is being iterated over.
413 * @return The iteration index position.
415 * @since 1.2
417 public int getOffset()
419 return textIndex;
423 * This method sets the iteration index position into the current
424 * <code>String</code> to the specified value. This value must not
425 * be negative and must not be greater than the last index position
426 * in the <code>String</code>.
428 * @param offset The new iteration index position.
430 * @exception IllegalArgumentException If the new offset is not valid.
432 public void setOffset(int offset)
434 if (offset < 0)
435 throw new IllegalArgumentException("Negative offset: " + offset);
437 if (offset > (text.length() - 1))
438 throw new IllegalArgumentException("Offset too large: " + offset);
440 for (index = 0; index < text_decomposition.length; index++)
442 if (offset <= text_indexes[index])
443 break;
446 * As text_indexes[0] == 0, we should not have to take care whether index is
447 * greater than 0. It is always.
449 if (text_indexes[index] == offset)
450 textIndex = offset;
451 else
452 textIndex = text_indexes[index-1];
456 * This method returns the maximum length of any expansion sequence that
457 * ends with the specified collation order value. (Whatever that means).
459 * @param value The collation order value
461 * @param The maximum length of an expansion sequence.
463 public int getMaxExpansion(int value)
465 return 1;