libjava/java/text/CollationElementIterator.java

   1 /* CollationElementIterator.java -- Walks through collation elements
   2    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19 02111-1307 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38
  39 package java.text;
  40
  41 import java.util.ArrayList;
  42
  43 /* Written using "Java Class Libraries", 2nd edition, plus online
  44  * API docs for JDK 1.2 from http://www.javasoft.com.
  45  * Status: Believed complete and correct to JDK 1.1.
  46  */
  47
  48 /**
  49  * This class walks through the character collation elements of a
  50  * <code>String</code> as defined by the collation rules in an instance of
  51  * <code>RuleBasedCollator</code>.  There is no public constructor for
  52  * this class.  An instance is created by calling the
  53  * <code>getCollationElementIterator</code> method on
  54  * <code>RuleBasedCollator</code>.
  55  *
  56  * @author Aaron M. Renn (arenn@urbanophile.com)
  57  * @author Tom Tromey (tromey@cygnus.com)
  58  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  59  */
  60 public final class CollationElementIterator
  61 {
  62   /**
  63    * This is a constant value that is returned to indicate that the end of
  64    * the string was encountered.
  65    */
  66   public static final int NULLORDER = -1;
  67
  68   /**
  69    * This is the RuleBasedCollator this object was created from.
  70    */
  71   RuleBasedCollator collator;
  72
  73   /**
  74    * This is the String that is being iterated over.
  75    */
  76   String text;
  77
  78   /**
  79    * This is the index into the collation decomposition where we are currently scanning.
  80    */
  81   int index;
  82
  83   /**
  84    * This is the index into the String where we are currently scanning.
  85    */
  86   int textIndex;
  87
  88   /**
  89    * Array containing the collation decomposition of the
  90    * text given to the constructor.
  91    */
  92   private RuleBasedCollator.CollationElement[] text_decomposition;
  93
  94   /**
  95    * Array containing the index of the specified block.
  96    */
  97   private int[] text_indexes;
  98
  99   /**
 100    * This method initializes a new instance of <code>CollationElementIterator</code>
 101    * to iterate over the specified <code>String</code> using the rules in the
 102    * specified <code>RuleBasedCollator</code>.
 103    *
 104    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 105    * @param text The <code>String</code> to iterate over.
 106    */
 107   CollationElementIterator(RuleBasedCollator collator, String text)
 108   {
 109     this.collator = collator;
 110
 111     setText (text);
 112   }
 113
 114   RuleBasedCollator.CollationElement nextBlock()
 115   {
 116     if (index >= text_decomposition.length)
 117       return null;
 118
 119     RuleBasedCollator.CollationElement e = text_decomposition[index];
 120
 121     textIndex = text_indexes[index+1];
 122
 123     index++;
 124
 125     return e;
 126   }
 127
 128   RuleBasedCollator.CollationElement previousBlock()
 129   {
 130     if (index == 0)
 131       return null;
 132
 133     index--;
 134     RuleBasedCollator.CollationElement e = text_decomposition[index];
 135
 136     textIndex = text_indexes[index+1];
 137
 138     return e;
 139   }
 140
 141   /**
 142    * This method returns the collation ordering value of the next character sequence
 143    * in the string (it may be an extended character following collation rules).
 144    * This method will return <code>NULLORDER</code> if the
 145    * end of the string was reached.
 146    *
 147    * @return The collation ordering value.
 148    */
 149   public int next()
 150   {
 151     RuleBasedCollator.CollationElement e = nextBlock();
 152
 153     if (e == null)
 154       return NULLORDER;
 155
 156     return e.getValue();
 157   }
 158
 159   /**
 160    * This method returns the collation ordering value of the previous character
 161    * in the string.  This method will return <code>NULLORDER</code> if the
 162    * beginning of the string was reached.
 163    *
 164    * @return The collation ordering value.
 165    */
 166   public int previous()
 167   {
 168     RuleBasedCollator.CollationElement e = previousBlock();
 169
 170     if (e == null)
 171       return NULLORDER;
 172
 173     return e.getValue();
 174   }
 175
 176   /**
 177    * This method returns the primary order value for the given collation
 178    * value.
 179    *
 180    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 181    *
 182    * @return The primary order value of the specified collation value.  This is the high 16 bits.
 183    */
 184   public static int primaryOrder(int order)
 185   {
 186     // From the JDK 1.2 spec.
 187     return order >>> 16;
 188   }
 189
 190   /**
 191    * This method resets the internal position pointer to read from the
 192    * beginning of the <code>String</code> again.
 193    */
 194   public void reset()
 195   {
 196     index = 0;
 197     textIndex = 0;
 198   }
 199
 200   /**
 201    * This method returns the secondary order value for the given collation
 202    * value.
 203    *
 204    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 205    *
 206    * @return The secondary order value of the specified collation value.  This is the bits 8-15.
 207    */
 208   public static short secondaryOrder(int order)
 209   {
 210     // From the JDK 1.2 spec.
 211     return (short) ((order >>> 8) & 255);
 212   }
 213
 214   /**
 215    * This method returns the tertiary order value for the given collation
 216    * value.
 217    *
 218    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 219    *
 220    * @return The tertiary order value of the specified collation value.  This is the low eight bits.
 221    */
 222   public static short tertiaryOrder(int order)
 223   {
 224     // From the JDK 1.2 spec.
 225     return (short) (order & 255);
 226   }
 227
 228   /**
 229    * This method sets the <code>String</code> that it is iterating over
 230    * to the specified <code>String</code>.
 231    *
 232    * @param text The new <code>String</code> to iterate over.
 233    *
 234    * @since 1.2
 235    */
 236   public void setText(String text)
 237   {
 238     int idx = 0;
 239     int idx_idx = 0;
 240     int alreadyExpanded = 0;
 241     int idxToMove = 0;
 242
 243     this.text = text;
 244     this.index = 0;
 245
 246     String work_text = text.intern();
 247
 248     ArrayList a_element = new ArrayList();
 249     ArrayList a_idx = new ArrayList();
 250
 251     // Build element collection ordered as they come in "text".
 252     while (idx < work_text.length())
 253       {
 254         String key, key_old;
 255
 256         Object object = null;
 257         int p = 1;
 258
 259         // IMPROVE: use a TreeMap with a prefix-ordering rule.
 260         key_old = key = null;
 261         do
 262           {
 263             if (object != null)
 264               key_old = key;
 265             key = work_text.substring (idx, idx+p);
 266             object = collator.prefix_tree.get (key);
 267             if (object != null && idx < alreadyExpanded)
 268               {
 269                 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 270                 if (prefix.expansion != null &&
 271                     prefix.expansion.startsWith(work_text.substring(0, idx)))
 272                 {
 273                   object = null;
 274                   key = key_old;
 275                 }
 276               }
 277             p++;
 278           }
 279         while (idx+p <= work_text.length());
 280
 281         if (object == null)
 282           key = key_old;
 283
 284         RuleBasedCollator.CollationElement prefix =
 285           (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 286
 287         /*
 288          * First case: There is no such sequence in the database.
 289          * We will have to build one from the context.
 290          */
 291         if (prefix == null)
 292           {
 293             /*
 294              * We are dealing with sequences in an expansion. They
 295              * are treated as accented characters (tertiary order).
 296              */
 297             if (alreadyExpanded > 0)
 298               {
 299                 RuleBasedCollator.CollationElement e =
 300                   collator.getDefaultAccentedElement (work_text.charAt (idx));
 301
 302                 a_element.add (e);
 303                 a_idx.add (new Integer(idx_idx));
 304                 idx++;
 305                 alreadyExpanded--;
 306                 if (alreadyExpanded == 0)
 307                   {
 308                     /* There is not any characters left in the expansion set.
 309                      * We can increase the pointer in the source string.
 310                      */
 311                     idx_idx += idxToMove;
 312                     idxToMove = 0;
 313                   }
 314                 else
 315                   idx_idx++;
 316               }
 317             else
 318               {
 319                 /* This is a normal character. */
 320                 RuleBasedCollator.CollationElement e =
 321                   collator.getDefaultElement (work_text.charAt (idx));
 322                 Integer i_ref = new Integer(idx_idx);
 323
 324                 /* Don't forget to mark it as a special sequence so the
 325                  * string can be ordered.
 326                  */
 327                 a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 328                 a_idx.add (i_ref);
 329                 a_element.add (e);
 330                 a_idx.add (i_ref);
 331                 idx_idx++;
 332                 idx++;
 333               }
 334             continue;
 335           }
 336
 337         /*
 338          * Second case: Here we have found a matching sequence.
 339          * Here we have an expansion string prepend it to the "work text" and
 340          * add the corresponding sorting element. We must also mark
 341          */
 342         if (prefix.expansion != null)
 343           {
 344             work_text = prefix.expansion
 345               + work_text.substring (idx+prefix.key.length());
 346             idx = 0;
 347             a_element.add (prefix);
 348             a_idx.add (new Integer(idx_idx));
 349             if (alreadyExpanded == 0)
 350               idxToMove = prefix.key.length();
 351             alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 352           }
 353         else
 354           {
 355             /* Third case: the simplest. We have got the prefix and it
 356              * has not to be expanded.
 357              */
 358             a_element.add (prefix);
 359             a_idx.add (new Integer(idx_idx));
 360             idx += prefix.key.length();
 361             /* If the sequence is in an expansion, we must decrease the
 362              * counter.
 363              */
 364             if (alreadyExpanded > 0)
 365               {
 366                 alreadyExpanded -= prefix.key.length();
 367                 if (alreadyExpanded == 0)
 368                   {
 369                     idx_idx += idxToMove;
 370                     idxToMove = 0;
 371                   }
 372               }
 373             else
 374               idx_idx += prefix.key.length();
 375           }
 376       }
 377
 378     text_decomposition = (RuleBasedCollator.CollationElement[])
 379            a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 380     text_indexes = new int[a_idx.size()+1];
 381     for (int i = 0; i < a_idx.size(); i++)
 382       {
 383         text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 384       }
 385     text_indexes[a_idx.size()] = text.length();
 386   }
 387
 388   /**
 389    * This method sets the <code>String</code> that it is iterating over
 390    * to the <code>String</code> represented by the specified
 391    * <code>CharacterIterator</code>.
 392    *
 393    * @param source The <code>CharacterIterator</code> containing the new
 394    * <code>String</code> to iterate over.
 395    */
 396   public void setText(CharacterIterator source)
 397   {
 398     StringBuffer expand = new StringBuffer();
 399
 400     // For now assume we read from the beginning of the string.
 401     for (char c = source.first();
 402          c != CharacterIterator.DONE;
 403          c = source.next())
 404       expand.append(c);
 405
 406     setText(expand.toString());
 407   }
 408
 409   /**
 410    * This method returns the current offset into the <code>String</code>
 411    * that is being iterated over.
 412    *
 413    * @return The iteration index position.
 414    *
 415    * @since 1.2
 416    */
 417   public int getOffset()
 418   {
 419     return textIndex;
 420   }
 421
 422   /**
 423    * This method sets the iteration index position into the current
 424    * <code>String</code> to the specified value.  This value must not
 425    * be negative and must not be greater than the last index position
 426    * in the <code>String</code>.
 427    *
 428    * @param offset The new iteration index position.
 429    *
 430    * @exception IllegalArgumentException If the new offset is not valid.
 431    */
 432   public void setOffset(int offset)
 433   {
 434     if (offset < 0)
 435       throw new IllegalArgumentException("Negative offset: " + offset);
 436
 437     if (offset > (text.length() - 1))
 438       throw new IllegalArgumentException("Offset too large: " + offset);
 439
 440     for (index = 0; index < text_decomposition.length; index++)
 441       {
 442         if (offset <= text_indexes[index])
 443           break;
 444       }
 445     /*
 446      * As text_indexes[0] == 0, we should not have to take care whether index is
 447      * greater than 0. It is always.
 448      */
 449     if (text_indexes[index] == offset)
 450       textIndex = offset;
 451     else
 452       textIndex = text_indexes[index-1];
 453   }
 454
 455   /**
 456    * This method returns the maximum length of any expansion sequence that
 457    * ends with the specified collation order value.  (Whatever that means).
 458    *
 459    * @param value The collation order value
 460    *
 461    * @param The maximum length of an expansion sequence.
 462    */
 463   public int getMaxExpansion(int value)
 464   {
 465     return 1;
 466   }
 467 }