libjava/java/text/RuleBasedCollator.java

   1 /* RuleBasedCollator.java -- Concrete Collator Class
   2    Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19 02111-1307 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38
  39 package java.text;
  40
  41 import java.util.ArrayList;
  42 import java.util.HashMap;
  43
  44 /* Written using "Java Class Libraries", 2nd edition, plus online
  45  * API docs for JDK 1.2 from http://www.javasoft.com.
  46  * Status: Believed complete and correct
  47  */
  48
  49 /**
  50  * This class is a concrete subclass of <code>Collator</code> suitable
  51  * for string collation in a wide variety of languages.  An instance of
  52  * this class is normally returned by the <code>getInstance</code> method
  53  * of <code>Collator</code> with rules predefined for the requested
  54  * locale.  However, an instance of this class can be created manually
  55  * with any desired rules.
  56  * <p>
  57  * Rules take the form of a <code>String</code> with the following syntax
  58  * <ul>
  59  * <li> Modifier: '@'</li>
  60  * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
  61  * <li> Reset: '&amp;' : &lt;text&gt;</li>
  62  * </ul>
  63  * The modifier character indicates that accents sort backward as is the
  64  * case with French.  The modifier applies to all rules <b>after</b>
  65  * the modifier but before the next primary sequence. If placed at the end
  66  * of the sequence if applies to all unknown accented character.
  67  * The relational operators specify how the text
  68  * argument relates to the previous term.  The relation characters have
  69  * the following meanings:
  70  * <ul>
  71  * <li>'&lt;' - The text argument is greater than the prior term at the primary
  72  * difference level.</li>
  73  * <li>';' - The text argument is greater than the prior term at the secondary
  74  * difference level.</li>
  75  * <li>',' - The text argument is greater than the prior term at the tertiary
  76  * difference level.</li>
  77  * <li>'=' - The text argument is equal to the prior term</li>
  78  * </ul>
  79  * <p>
  80  * As for the text argument itself, this is any sequence of Unicode
  81  * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
  82  * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
  83  * desired, they must be enclosed in single quotes.  If any whitespace is
  84  * encountered, it is ignored.  (For example, "a b" is equal to "ab").
  85  * <p>
  86  * The reset operation inserts the following rule at the point where the
  87  * text argument to it exists in the previously declared rule string.  This
  88  * makes it easy to add new rules to an existing string by simply including
  89  * them in a reset sequence at the end.  Note that the text argument, or
  90  * at least the first character of it, must be present somewhere in the
  91  * previously declared rules in order to be inserted properly.  If this
  92  * is not satisfied, a <code>ParseException</code> will be thrown.
  93  * <p>
  94  * This system of configuring <code>RuleBasedCollator</code> is needlessly
  95  * complex and the people at Taligent who developed it (along with the folks
  96  * at Sun who accepted it into the Java standard library) deserve a slow
  97  * and agonizing death.
  98  * <p>
  99  * Here are a couple of example of rule strings:
 100  * <p>
 101  * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
 102  * greater than c, with all differences being primary differences.
 103  * <p>
 104  * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
 105  * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
 106  * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
 107  * under a tertiary strength comparison.
 108  * <p>
 109  * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
 110  * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
 111  * the rule "&lt; b" is to be inserted after the text argument "a" in the
 112  * previous rule string segment.
 113  * <p>
 114  * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
 115  * anywhere in the previous rule string segment so the rule following the
 116  * reset rule cannot be inserted.
 117  * <p>
 118  * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
 119  * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
 120  * <p>
 121  * For a description of the various comparison strength types, see the
 122  * documentation for the <code>Collator</code> class.
 123  * <p>
 124  * As an additional complication to this already overly complex rule scheme,
 125  * if any characters precede the first rule, these characters are considered
 126  * ignorable.  They will be treated as if they did not exist during
 127  * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
 128  * character such that the strings "high-tech" and "hightech" would
 129  * be considered identical.
 130  * <p>
 131  * A <code>ParseException</code> will be thrown for any of the following
 132  * conditions:
 133  * <ul>
 134  * <li>Unquoted punctuation characters in a text argument.</li>
 135  * <li>A relational or reset operator not followed by a text argument</li>
 136  * <li>A reset operator where the text argument is not present in
 137  * the previous rule string section.</li>
 138  * </ul>
 139  *
 140  * @author Aaron M. Renn (arenn@urbanophile.com)
 141  * @author Tom Tromey (tromey@cygnus.com)
 142  * @author Guilhem Lavaux (guilhem@kaffe.org)
 143  */
 144 public class RuleBasedCollator extends Collator
 145 {
 146   /**
 147    * This class describes what rank has a character (or a sequence of characters)
 148    * in the lexicographic order. Each element in a rule has a collation element.
 149    */
 150   static final class CollationElement
 151   {
 152     String key;
 153     int primary;
 154     short secondary;
 155     short tertiary;
 156     short equality;
 157     boolean ignore;
 158     String expansion;
 159
 160     CollationElement(String key, int primary, short secondary, short tertiary,
 161                      short equality, String expansion, boolean ignore)
 162     {
 163       this.key = key;
 164       this.primary = primary;
 165       this.secondary = secondary;
 166       this.tertiary = tertiary;
 167       this.equality = equality;
 168       this.ignore = ignore;
 169       this.expansion = expansion;
 170     }
 171
 172     int getValue()
 173     {
 174       return (primary << 16) + (secondary << 8) + tertiary;
 175     }
 176   }
 177
 178   /**
 179    * Basic collation instruction (internal format) to build the series of
 180    * collation elements. It contains an instruction which specifies the new
 181    * state of the generator. The sequence of instruction should not contain
 182    * RESET (it is used by
 183    * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
 184    * as a temporary state while merging two sets of instructions.
 185    */
 186   static final class CollationSorter
 187   {
 188     static final int GREATERP = 0;
 189     static final int GREATERS = 1;
 190     static final int GREATERT = 2;
 191     static final int EQUAL = 3;
 192     static final int RESET = 4;
 193     static final int INVERSE_SECONDARY = 5;
 194
 195     int comparisonType;
 196     String textElement;
 197     int hashText;
 198     int offset;
 199     boolean ignore;
 200
 201     String expansionOrdering;
 202   }
 203
 204   /**
 205    * This the the original rule string.
 206    */
 207   private String rules;
 208
 209   /**
 210    * This is the table of collation element values
 211    */
 212   private Object[] ce_table;
 213
 214   /**
 215    * Quick-prefix finder.
 216    */
 217   HashMap prefix_tree;
 218
 219   /**
 220    * This is the value of the last sequence entered into
 221    * <code>ce_table</code>. It is used to compute the
 222    * ordering value of unspecified character.
 223    */
 224   private int last_primary_value;
 225
 226   /**
 227    * This is the value of the last secondary sequence of the
 228    * primary 0, entered into
 229    * <code>ce_table</code>. It is used to compute the
 230    * ordering value of an unspecified accented character.
 231    */
 232   private int last_tertiary_value;
 233
 234   /**
 235    * This variable is true if accents need to be sorted
 236    * in the other direction.
 237    */
 238   private boolean inverseAccentComparison;
 239
 240   /**
 241    * This collation element is special to unknown sequence.
 242    * The JDK uses it to mark and sort the characters which has
 243    * no collation rules.
 244    */
 245   static final CollationElement SPECIAL_UNKNOWN_SEQ =
 246     new CollationElement("", (short) 32767, (short) 0, (short) 0,
 247                          (short) 0, null, false);
 248
 249   /**
 250    * This method initializes a new instance of <code>RuleBasedCollator</code>
 251    * with the specified collation rules.  Note that an application normally
 252    * obtains an instance of <code>RuleBasedCollator</code> by calling the
 253    * <code>getInstance</code> method of <code>Collator</code>.  That method
 254    * automatically loads the proper set of rules for the desired locale.
 255    *
 256    * @param rules The collation rule string.
 257    *
 258    * @exception ParseException If the rule string contains syntax errors.
 259    */
 260   public RuleBasedCollator(String rules) throws ParseException
 261   {
 262     if (rules.equals(""))
 263       throw new ParseException("empty rule set", 0);
 264
 265     this.rules = rules;
 266
 267     buildCollationVector(parseString(rules));
 268     buildPrefixAccess();
 269   }
 270
 271   /**
 272    * This method returns the number of common characters at the beginning
 273    * of the string of the two parameters.
 274    *
 275    * @param prefix A string considered as a prefix to test against
 276    * the other string.
 277    * @param s A string to test the prefix against.
 278    * @return The number of common characters.
 279    */
 280   static int findPrefixLength(String prefix, String s)
 281   {
 282     int index;
 283     int len = prefix.length();
 284
 285     for (index = 0; index < len && index < s.length(); ++index)
 286       {
 287         if (prefix.charAt(index) != s.charAt(index))
 288           return index;
 289       }
 290
 291
 292     return index;
 293   }
 294
 295   /**
 296    * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
 297    * checks whether it is possible to find an anchor point for the rules to be merged and
 298    * then insert them at that precise point.
 299    *
 300    * @param offset Offset in the string containing rules of the beginning of the rules
 301    * being merged in.
 302    * @param starter Text of the rules being merged.
 303    * @param main Repository of all already parsed rules.
 304    * @param patch Rules to be merged into the repository.
 305    * @throws ParseException if it is impossible to find an anchor point for the new rules.
 306    */
 307   private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
 308     throws ParseException
 309   {
 310     int insertion_point = -1;
 311     int max_length = 0;
 312
 313     /* We must check that no rules conflict with another already present. If it
 314      * is the case delete the old rule.
 315      */
 316
 317     /* For the moment good old O(N^2) algorithm.
 318      */
 319     for (int i = 0; i < patch.size(); i++)
 320       {
 321         int j = 0;
 322
 323         while (j < main.size())
 324           {
 325             CollationSorter rule1 = (CollationSorter) patch.get(i);
 326             CollationSorter rule2 = (CollationSorter) main.get(j);
 327
 328             if (rule1.textElement.equals(rule2.textElement))
 329               main.remove(j);
 330             else
 331               j++;
 332           }
 333       }
 334
 335     // Find the insertion point... O(N)
 336     for (int i = 0; i < main.size(); i++)
 337       {
 338         CollationSorter sorter = (CollationSorter) main.get(i);
 339         int length = findPrefixLength(starter, sorter.textElement);
 340
 341         if (length > max_length)
 342           {
 343             max_length = length;
 344             insertion_point = i+1;
 345           }
 346       }
 347
 348     if (insertion_point < 0)
 349       throw new ParseException("no insertion point found for " + starter, offset);
 350
 351     if (max_length < starter.length())
 352       {
 353         /*
 354          * We need to expand the first entry. It must be sorted
 355          * like if it was the reference key itself (like the spec
 356          * said. So the first entry is special: the element is
 357          * replaced by the specified text element for the sorting.
 358          * This text replace the old one for comparisons. However
 359          * to preserve the behaviour we replace the first key (corresponding
 360          * to the found prefix) by a new code rightly ordered in the
 361          * sequence. The rest of the subsequence must be appended
 362          * to the end of the sequence.
 363          */
 364         CollationSorter sorter = (CollationSorter) patch.get(0);
 365         CollationSorter expansionPrefix =
 366           (CollationSorter) main.get(insertion_point-1);
 367
 368         sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
 369
 370         main.add(insertion_point, sorter);
 371
 372         /*
 373          * This is a new set of rules. Append to the list.
 374          */
 375         patch.remove(0);
 376         insertion_point++;
 377       }
 378
 379     // Now insert all elements of patch at the insertion point.
 380     for (int i = 0; i < patch.size(); i++)
 381       main.add(i+insertion_point, patch.get(i));
 382   }
 383
 384   /**
 385    * This method parses a string and build a set of sorting instructions. The parsing
 386    * may only be partial on the case the rules are to be merged sometime later.
 387    *
 388    * @param stop_on_reset If this parameter is true then the parser stops when it
 389    * encounters a reset instruction. In the other case, it tries to parse the subrules
 390    * and merged it in the same repository.
 391    * @param v Output vector for the set of instructions.
 392    * @param base_offset Offset in the string to begin parsing.
 393    * @param rules Rules to be parsed.
 394    * @return -1 if the parser reached the end of the string, an integer representing the
 395    * offset in the string at which it stopped parsing.
 396    * @throws ParseException if something turned wrong during the parsing. To get details
 397    * decode the message.
 398    */
 399   private int subParseString(boolean stop_on_reset, ArrayList v,
 400                              int base_offset, String rules)
 401     throws ParseException
 402   {
 403     boolean ignoreChars = (base_offset == 0);
 404     int operator = -1;
 405     StringBuffer sb = new StringBuffer();
 406     boolean doubleQuote = false;
 407     boolean eatingChars = false;
 408     boolean nextIsModifier = false;
 409     boolean isModifier = false;
 410     int i;
 411
 412 main_parse_loop:
 413     for (i = 0; i < rules.length(); i++)
 414       {
 415         char c = rules.charAt(i);
 416         int type = -1;
 417
 418         if (!eatingChars &&
 419             ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
 420               continue;
 421
 422         isModifier = nextIsModifier;
 423         nextIsModifier = false;
 424
 425         if (eatingChars && c != '\'')
 426           {
 427             doubleQuote = false;
 428             sb.append(c);
 429             continue;
 430           }
 431         if (doubleQuote && eatingChars)
 432           {
 433             sb.append(c);
 434             doubleQuote = false;
 435             continue;
 436           }
 437
 438         switch (c)
 439           {
 440           case '!':
 441             throw new ParseException
 442               ("Modifier '!' is not yet supported by Classpath", i + base_offset);
 443           case '<':
 444             type = CollationSorter.GREATERP;
 445             break;
 446           case ';':
 447             type = CollationSorter.GREATERS;
 448             break;
 449           case ',':
 450             type = CollationSorter.GREATERT;
 451             break;
 452           case '=':
 453             type = CollationSorter.EQUAL;
 454             break;
 455           case '\'':
 456             eatingChars = !eatingChars;
 457             doubleQuote = true;
 458             break;
 459           case '@':
 460             if (ignoreChars)
 461               throw new ParseException
 462                 ("comparison list has not yet been started. You may only use"
 463                  + "(<,;=&)", i + base_offset);
 464             // Inverse the order of secondaries from now on.
 465             nextIsModifier = true;
 466             type = CollationSorter.INVERSE_SECONDARY;
 467             break;
 468           case '&':
 469             type = CollationSorter.RESET;
 470             if (stop_on_reset)
 471               break main_parse_loop;
 472             break;
 473           default:
 474             if (operator < 0)
 475               throw new ParseException
 476                 ("operator missing at " + (i + base_offset), i + base_offset);
 477             if (! eatingChars
 478                 && ((c >= 0x21 && c <= 0x2F)
 479                     || (c >= 0x3A && c <= 0x40)
 480                     || (c >= 0x5B && c <= 0x60)
 481                     || (c >= 0x7B && c <= 0x7E)))
 482               throw new ParseException
 483                 ("unquoted punctuation character '" + c + "'", i + base_offset);
 484
 485             //type = ignoreChars ? CollationSorter.IGNORE : -1;
 486             sb.append(c);
 487             break;
 488           }
 489
 490         if (type  < 0)
 491           continue;
 492
 493         if (operator < 0)
 494           {
 495             operator = type;
 496             continue;
 497           }
 498
 499         if (sb.length() == 0 && !isModifier)
 500           throw new ParseException
 501             ("text element empty at " + (i+base_offset), i+base_offset);
 502
 503         if (operator == CollationSorter.RESET)
 504           {
 505             /* Reposition in the sorting list at the position
 506              * indicated by the text element.
 507              */
 508             String subrules = rules.substring(i);
 509             ArrayList sorted_rules = new ArrayList();
 510             int idx;
 511
 512             // Parse the subrules but do not iterate through all
 513             // sublist. This is the priviledge of the first call.
 514             idx = subParseString(true, sorted_rules, base_offset+i, subrules);
 515
 516             // Merge new parsed rules into the list.
 517             mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
 518             sb.setLength(0);
 519
 520             // Reset state to none.
 521             operator = -1;
 522             type = -1;
 523             // We have found a new subrule at 'idx' but it has not been parsed.
 524             if (idx >= 0)
 525               {
 526                 i += idx-1;
 527                 continue main_parse_loop;
 528               }
 529             else
 530                 // No more rules.
 531                 break main_parse_loop;
 532           }
 533
 534         CollationSorter sorter = new CollationSorter();
 535
 536         if (operator == CollationSorter.GREATERP)
 537           ignoreChars = false;
 538
 539         sorter.comparisonType = operator;
 540         sorter.textElement = sb.toString();
 541         sorter.hashText = sorter.textElement.hashCode();
 542         sorter.offset = base_offset+rules.length();
 543         sorter.ignore = ignoreChars;
 544         sb.setLength(0);
 545
 546         v.add(sorter);
 547         operator = type;
 548       }
 549
 550     if (operator >= 0)
 551       {
 552         CollationSorter sorter = new CollationSorter();
 553         int pos = rules.length() + base_offset;
 554
 555         if ((sb.length() != 0 && nextIsModifier)
 556             || (sb.length() == 0 && !nextIsModifier && !eatingChars))
 557           throw new ParseException("text element empty at " + pos, pos);
 558
 559         if (operator == CollationSorter.GREATERP)
 560           ignoreChars = false;
 561
 562         sorter.comparisonType = operator;
 563         sorter.textElement = sb.toString();
 564         sorter.hashText = sorter.textElement.hashCode();
 565         sorter.offset = base_offset+pos;
 566         sorter.ignore = ignoreChars;
 567         v.add(sorter);
 568       }
 569
 570     if (i == rules.length())
 571       return -1;
 572     else
 573       return i;
 574   }
 575
 576   /**
 577    * This method creates a copy of this object.
 578    *
 579    * @return A copy of this object.
 580    */
 581   public Object clone()
 582   {
 583     return super.clone();
 584   }
 585
 586   /**
 587    * This method completely parses a string 'rules' containing sorting rules.
 588    *
 589    * @param rules String containing the rules to be parsed.
 590    * @return A set of sorting instructions stored in a Vector.
 591    * @throws ParseException if something turned wrong during the parsing. To get details
 592    * decode the message.
 593    */
 594   private ArrayList parseString(String rules)
 595     throws ParseException
 596   {
 597     ArrayList v = new ArrayList();
 598
 599     // result of the first subParseString is not absolute (may be -1 or a
 600     // positive integer). But we do not care.
 601     subParseString(false, v, 0, rules);
 602
 603     return v;
 604   }
 605
 606   /**
 607    * This method uses the sorting instructions built by {@link #parseString}
 608    * to build collation elements which can be directly used to sort strings.
 609    *
 610    * @param parsedElements Parsed instructions stored in a ArrayList.
 611    * @throws ParseException if the order of the instructions are not valid.
 612    */
 613   private void buildCollationVector(ArrayList parsedElements)
 614     throws ParseException
 615   {
 616     int primary_seq = 0;
 617     int last_tertiary_seq = 0;
 618     short secondary_seq = 0;
 619     short tertiary_seq = 0;
 620     short equality_seq = 0;
 621     boolean inverseComparisons = false;
 622     final boolean DECREASING = false;
 623     final boolean INCREASING = true;
 624     boolean secondaryType = INCREASING;
 625     ArrayList v = new ArrayList();
 626
 627     // elts is completely sorted.
 628 element_loop:
 629     for (int i = 0; i < parsedElements.size(); i++)
 630       {
 631         CollationSorter elt = (CollationSorter) parsedElements.get(i);
 632         boolean ignoreChar = false;
 633
 634         switch (elt.comparisonType)
 635           {
 636           case CollationSorter.GREATERP:
 637             primary_seq++;
 638             if (inverseComparisons)
 639               {
 640                 secondary_seq = Short.MAX_VALUE;
 641                 secondaryType = DECREASING;
 642               }
 643             else
 644               {
 645                 secondary_seq = 0;
 646                 secondaryType = INCREASING;
 647               }
 648             tertiary_seq = 0;
 649             equality_seq = 0;
 650             inverseComparisons = false;
 651             break;
 652           case CollationSorter.GREATERS:
 653             if (secondaryType == DECREASING)
 654               secondary_seq--;
 655             else
 656               secondary_seq++;
 657             tertiary_seq = 0;
 658             equality_seq = 0;
 659             break;
 660           case CollationSorter.INVERSE_SECONDARY:
 661             inverseComparisons = true;
 662             continue element_loop;
 663           case CollationSorter.GREATERT:
 664             tertiary_seq++;
 665             if (primary_seq == 0)
 666               last_tertiary_seq = tertiary_seq;
 667             equality_seq = 0;
 668             break;
 669           case CollationSorter.EQUAL:
 670             equality_seq++;
 671             break;
 672           case CollationSorter.RESET:
 673             throw new ParseException
 674               ("Invalid reached state 'RESET'. Internal error", elt.offset);
 675           default:
 676             throw new ParseException
 677               ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
 678           }
 679
 680         v.add(new CollationElement(elt.textElement, primary_seq,
 681                                    secondary_seq, tertiary_seq,
 682                                    equality_seq, elt.expansionOrdering, elt.ignore));
 683       }
 684
 685     this.inverseAccentComparison = inverseComparisons;
 686
 687     ce_table = v.toArray();
 688
 689     last_primary_value = primary_seq+1;
 690     last_tertiary_value = last_tertiary_seq+1;
 691   }
 692
 693   /**
 694    * Build a tree where all keys are the texts of collation elements and data is
 695    * the collation element itself. The tree is used when extracting all prefix
 696    * for a given text.
 697    */
 698   private void buildPrefixAccess()
 699   {
 700     prefix_tree = new HashMap();
 701
 702     for (int i = 0; i < ce_table.length; i++)
 703       {
 704         CollationElement e = (CollationElement) ce_table[i];
 705
 706         prefix_tree.put(e.key, e);
 707       }
 708   }
 709
 710   /**
 711    * This method returns an integer which indicates whether the first
 712    * specified <code>String</code> is less than, greater than, or equal to
 713    * the second.  The value depends not only on the collation rules in
 714    * effect, but also the strength and decomposition settings of this object.
 715    *
 716    * @param source The first <code>String</code> to compare.
 717    * @param target A second <code>String</code> to compare to the first.
 718    *
 719    * @return A negative integer if source &lt; target, a positive integer
 720    * if source &gt; target, or 0 if source == target.
 721    */
 722   public int compare(String source, String target)
 723   {
 724     CollationElementIterator cs, ct;
 725     CollationElement ord1block = null;
 726     CollationElement ord2block = null;
 727     boolean advance_block_1 = true;
 728     boolean advance_block_2 = true;
 729
 730     cs = getCollationElementIterator(source);
 731     ct = getCollationElementIterator(target);
 732
 733     for(;;)
 734       {
 735         int ord1;
 736         int ord2;
 737
 738         /*
 739          * We have to check whether the characters are ignorable.
 740          * If it is the case then forget them.
 741          */
 742         if (advance_block_1)
 743           {
 744             ord1block = cs.nextBlock();
 745             if (ord1block != null && ord1block.ignore)
 746               continue;
 747           }
 748
 749         if (advance_block_2)
 750           {
 751             ord2block = ct.nextBlock();
 752             if (ord2block != null && ord2block.ignore)
 753               {
 754                 advance_block_1 = false;
 755                 continue;
 756               }
 757          }
 758         else
 759           advance_block_2 = true;
 760
 761         if (!advance_block_1)
 762           advance_block_1 = true;
 763
 764         if (ord1block != null)
 765           ord1 = ord1block.getValue();
 766         else
 767           {
 768             if (ord2block == null)
 769               return 0;
 770             return -1;
 771           }
 772
 773         if (ord2block == null)
 774           return 1;
 775
 776         ord2 = ord2block.getValue();
 777
 778         // We know chars are totally equal, so skip
 779         if (ord1 == ord2)
 780           {
 781             if (getStrength() == IDENTICAL)
 782               if (!ord1block.key.equals(ord2block.key))
 783                 return ord1block.key.compareTo(ord2block.key);
 784             continue;
 785           }
 786
 787         // Check for primary strength differences
 788         int prim1 = CollationElementIterator.primaryOrder(ord1);
 789         int prim2 = CollationElementIterator.primaryOrder(ord2);
 790
 791         if (prim1 == 0 && getStrength() < TERTIARY)
 792           {
 793             advance_block_2 = false;
 794             continue;
 795           }
 796         else if (prim2 == 0 && getStrength() < TERTIARY)
 797           {
 798             advance_block_1 = false;
 799             continue;
 800           }
 801
 802         if (prim1 < prim2)
 803           return -1;
 804         else if (prim1 > prim2)
 805           return 1;
 806         else if (getStrength() == PRIMARY)
 807           continue;
 808
 809         // Check for secondary strength differences
 810         int sec1 = CollationElementIterator.secondaryOrder(ord1);
 811         int sec2 = CollationElementIterator.secondaryOrder(ord2);
 812
 813         if (sec1 < sec2)
 814           return -1;
 815         else if (sec1 > sec2)
 816           return 1;
 817         else if (getStrength() == SECONDARY)
 818           continue;
 819
 820         // Check for tertiary differences
 821         int tert1 = CollationElementIterator.tertiaryOrder(ord1);
 822         int tert2 = CollationElementIterator.tertiaryOrder(ord2);
 823
 824         if (tert1 < tert2)
 825           return -1;
 826         else if (tert1 > tert2)
 827           return 1;
 828         else if (getStrength() == TERTIARY)
 829           continue;
 830
 831         // Apparently JDK does this (at least for my test case).
 832         return ord1block.key.compareTo(ord2block.key);
 833       }
 834   }
 835
 836   /**
 837    * This method tests this object for equality against the specified
 838    * object.  This will be true if and only if the specified object is
 839    * another reference to this object.
 840    *
 841    * @param obj The <code>Object</code> to compare against this object.
 842    *
 843    * @return <code>true</code> if the specified object is equal to this object,
 844    * <code>false</code> otherwise.
 845    */
 846   public boolean equals(Object obj)
 847   {
 848     if (obj == this)
 849       return true;
 850     else
 851       return false;
 852   }
 853
 854   /**
 855    * This method builds a default collation element without invoking
 856    * the database created from the rules passed to the constructor.
 857    *
 858    * @param c Character which needs a collation element.
 859    * @return A valid brand new CollationElement instance.
 860    */
 861   CollationElement getDefaultElement(char c)
 862   {
 863     int v;
 864
 865     // Preliminary support for generic accent sorting inversion (I don't know if all
 866     // characters in the range should be sorted backward). This is the place
 867     // to fix this if needed.
 868     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 869       v = 0x0361 - ((int) c - 0x02B9);
 870     else
 871       v = (short) c;
 872     return new CollationElement("" + c, last_primary_value + v,
 873                                 (short) 0, (short) 0, (short) 0, null, false);
 874   }
 875
 876   /**
 877    * This method builds a default collation element for an accented character
 878    * without invoking the database created from the rules passed to the constructor.
 879    *
 880    * @param c Character which needs a collation element.
 881    * @return A valid brand new CollationElement instance.
 882    */
 883   CollationElement getDefaultAccentedElement(char c)
 884   {
 885     int v;
 886
 887     // Preliminary support for generic accent sorting inversion (I don't know if all
 888     // characters in the range should be sorted backward). This is the place
 889     // to fix this if needed.
 890     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
 891       v = 0x0361 - ((int) c - 0x02B9);
 892     else
 893       v = (short) c;
 894     return new CollationElement("" + c, (short) 0,
 895                                 (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
 896   }
 897
 898   /**
 899    * This method returns an instance for <code>CollationElementIterator</code>
 900    * for the specified <code>String</code> under the collation rules for this
 901    * object.
 902    *
 903    * @param source The <code>String</code> to return the
 904    * <code>CollationElementIterator</code> instance for.
 905    *
 906    * @return A <code>CollationElementIterator</code> for the specified
 907    * <code>String</code>.
 908    */
 909   public CollationElementIterator getCollationElementIterator(String source)
 910   {
 911     return new CollationElementIterator(this, source);
 912   }
 913
 914   /**
 915    * This method returns an instance of <code>CollationElementIterator</code>
 916    * for the <code>String</code> represented by the specified
 917    * <code>CharacterIterator</code>.
 918    *
 919    * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
 920    *
 921    * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
 922    */
 923   public CollationElementIterator getCollationElementIterator(CharacterIterator source)
 924   {
 925     StringBuffer expand = new StringBuffer("");
 926
 927     // Right now we assume that we will read from the beginning of the string.
 928     for (char c = source.first();
 929          c != CharacterIterator.DONE;
 930          c = source.next())
 931       decomposeCharacter(c, expand);
 932
 933     return getCollationElementIterator(expand.toString());
 934   }
 935
 936   /**
 937    * This method returns an instance of <code>CollationKey</code> for the
 938    * specified <code>String</code>.  The object returned will have a
 939    * more efficient mechanism for its comparison function that could
 940    * provide speed benefits if multiple comparisons are performed, such
 941    * as during a sort.
 942    *
 943    * @param source The <code>String</code> to create a <code>CollationKey</code> for.
 944    *
 945    * @return A <code>CollationKey</code> for the specified <code>String</code>.
 946    */
 947   public CollationKey getCollationKey(String source)
 948   {
 949     CollationElementIterator cei = getCollationElementIterator(source);
 950     ArrayList vect = new ArrayList();
 951
 952     int ord = cei.next();
 953     cei.reset(); //set to start of string
 954
 955     while (ord != CollationElementIterator.NULLORDER)
 956       {
 957         // If the primary order is null, it means this is an ignorable
 958         // character.
 959         if (CollationElementIterator.primaryOrder(ord) == 0)
 960           {
 961             ord = cei.next();
 962             continue;
 963           }
 964         switch (getStrength())
 965           {
 966             case PRIMARY:
 967               ord = CollationElementIterator.primaryOrder(ord);
 968               break;
 969
 970             case SECONDARY:
 971               ord = CollationElementIterator.primaryOrder(ord) << 8;
 972               ord |= CollationElementIterator.secondaryOrder(ord);
 973
 974             default:
 975                break;
 976           }
 977
 978         vect.add(new Integer(ord));
 979         ord = cei.next(); //increment to next key
 980       }
 981
 982     Object[] objarr = vect.toArray();
 983     byte[] key = new byte[objarr.length * 4];
 984
 985     for (int i = 0; i < objarr.length; i++)
 986       {
 987         int j = ((Integer) objarr[i]).intValue();
 988         key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
 989         key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
 990         key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
 991         key [i * 4 + 3] = (byte) (j & 0x000000FF);
 992       }
 993
 994     return new CollationKey(this, source, key);
 995   }
 996
 997   /**
 998    * This method returns a <code>String</code> containing the collation rules
 999    * for this object.
1000    *
1001    * @return The collation rules for this object.
1002    */
1003   public String getRules()
1004   {
1005     return rules;
1006   }
1007
1008   /**
1009    * This method returns a hash value for this object.
1010    *
1011    * @return A hash value for this object.
1012    */
1013   public int hashCode()
1014   {
1015     return System.identityHashCode(this);
1016   }
1017 }