Merge from the pain train
[official-gcc.git] / libjava / java / text / RuleBasedCollator.java
blobafb766ff8ae7e3833d27c7cba86c2164128ed7c6
1 /* RuleBasedCollator.java -- Concrete Collator Class
2 Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.text;
41 import java.util.ArrayList;
42 import java.util.HashMap;
44 /* Written using "Java Class Libraries", 2nd edition, plus online
45 * API docs for JDK 1.2 from http://www.javasoft.com.
46 * Status: Believed complete and correct
49 /**
50 * This class is a concrete subclass of <code>Collator</code> suitable
51 * for string collation in a wide variety of languages. An instance of
52 * this class is normally returned by the <code>getInstance</code> method
53 * of <code>Collator</code> with rules predefined for the requested
54 * locale. However, an instance of this class can be created manually
55 * with any desired rules.
56 * <p>
57 * Rules take the form of a <code>String</code> with the following syntax
58 * <ul>
59 * <li> Modifier: '@'</li>
60 * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
61 * <li> Reset: '&amp;' : &lt;text&gt;</li>
62 * </ul>
63 * The modifier character indicates that accents sort backward as is the
64 * case with French. The modifier applies to all rules <b>after</b>
65 * the modifier but before the next primary sequence. If placed at the end
66 * of the sequence if applies to all unknown accented character.
67 * The relational operators specify how the text
68 * argument relates to the previous term. The relation characters have
69 * the following meanings:
70 * <ul>
71 * <li>'&lt;' - The text argument is greater than the prior term at the primary
72 * difference level.</li>
73 * <li>';' - The text argument is greater than the prior term at the secondary
74 * difference level.</li>
75 * <li>',' - The text argument is greater than the prior term at the tertiary
76 * difference level.</li>
77 * <li>'=' - The text argument is equal to the prior term</li>
78 * </ul>
79 * <p>
80 * As for the text argument itself, this is any sequence of Unicode
81 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
82 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
83 * desired, they must be enclosed in single quotes. If any whitespace is
84 * encountered, it is ignored. (For example, "a b" is equal to "ab").
85 * <p>
86 * The reset operation inserts the following rule at the point where the
87 * text argument to it exists in the previously declared rule string. This
88 * makes it easy to add new rules to an existing string by simply including
89 * them in a reset sequence at the end. Note that the text argument, or
90 * at least the first character of it, must be present somewhere in the
91 * previously declared rules in order to be inserted properly. If this
92 * is not satisfied, a <code>ParseException</code> will be thrown.
93 * <p>
94 * This system of configuring <code>RuleBasedCollator</code> is needlessly
95 * complex and the people at Taligent who developed it (along with the folks
96 * at Sun who accepted it into the Java standard library) deserve a slow
97 * and agonizing death.
98 * <p>
99 * Here are a couple of example of rule strings:
100 * <p>
101 * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
102 * greater than c, with all differences being primary differences.
103 * <p>
104 * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
105 * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and
106 * 'A' during a primary strength comparison. But 'B' is greater than 'b'
107 * under a tertiary strength comparison.
108 * <p>
109 * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
110 * "&lt; a &lt; b &lt; c" rule string above. The '&amp;' reset symbol indicates that
111 * the rule "&lt; b" is to be inserted after the text argument "a" in the
112 * previous rule string segment.
113 * <p>
114 * "&lt; a &lt; b &amp; y &lt; z" - This is an error. The character 'y' does not appear
115 * anywhere in the previous rule string segment so the rule following the
116 * reset rule cannot be inserted.
117 * <p>
118 * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
119 * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
120 * <p>
121 * For a description of the various comparison strength types, see the
122 * documentation for the <code>Collator</code> class.
123 * <p>
124 * As an additional complication to this already overly complex rule scheme,
125 * if any characters precede the first rule, these characters are considered
126 * ignorable. They will be treated as if they did not exist during
127 * comparisons. For example, "- &lt; a &lt; b ..." would make '-' an ignorable
128 * character such that the strings "high-tech" and "hightech" would
129 * be considered identical.
130 * <p>
131 * A <code>ParseException</code> will be thrown for any of the following
132 * conditions:
133 * <ul>
134 * <li>Unquoted punctuation characters in a text argument.</li>
135 * <li>A relational or reset operator not followed by a text argument</li>
136 * <li>A reset operator where the text argument is not present in
137 * the previous rule string section.</li>
138 * </ul>
140 * @author Aaron M. Renn (arenn@urbanophile.com)
141 * @author Tom Tromey (tromey@cygnus.com)
142 * @author Guilhem Lavaux (guilhem@kaffe.org)
144 public class RuleBasedCollator extends Collator
147 * This class describes what rank has a character (or a sequence of characters)
148 * in the lexicographic order. Each element in a rule has a collation element.
150 static final class CollationElement
152 String key;
153 int primary;
154 short secondary;
155 short tertiary;
156 short equality;
157 boolean ignore;
158 String expansion;
160 CollationElement(String key, int primary, short secondary, short tertiary,
161 short equality, String expansion, boolean ignore)
163 this.key = key;
164 this.primary = primary;
165 this.secondary = secondary;
166 this.tertiary = tertiary;
167 this.equality = equality;
168 this.ignore = ignore;
169 this.expansion = expansion;
172 int getValue()
174 return (primary << 16) + (secondary << 8) + tertiary;
179 * Basic collation instruction (internal format) to build the series of
180 * collation elements. It contains an instruction which specifies the new
181 * state of the generator. The sequence of instruction should not contain
182 * RESET (it is used by
183 * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
184 * as a temporary state while merging two sets of instructions.
186 static final class CollationSorter
188 static final int GREATERP = 0;
189 static final int GREATERS = 1;
190 static final int GREATERT = 2;
191 static final int EQUAL = 3;
192 static final int RESET = 4;
193 static final int INVERSE_SECONDARY = 5;
195 int comparisonType;
196 String textElement;
197 int hashText;
198 int offset;
199 boolean ignore;
201 String expansionOrdering;
205 * This the the original rule string.
207 private String rules;
210 * This is the table of collation element values
212 private Object[] ce_table;
215 * Quick-prefix finder.
217 HashMap prefix_tree;
220 * This is the value of the last sequence entered into
221 * <code>ce_table</code>. It is used to compute the
222 * ordering value of unspecified character.
224 private int last_primary_value;
227 * This is the value of the last secondary sequence of the
228 * primary 0, entered into
229 * <code>ce_table</code>. It is used to compute the
230 * ordering value of an unspecified accented character.
232 private int last_tertiary_value;
235 * This variable is true if accents need to be sorted
236 * in the other direction.
238 private boolean inverseAccentComparison;
241 * This collation element is special to unknown sequence.
242 * The JDK uses it to mark and sort the characters which has
243 * no collation rules.
245 static final CollationElement SPECIAL_UNKNOWN_SEQ =
246 new CollationElement("", (short) 32767, (short) 0, (short) 0,
247 (short) 0, null, false);
250 * This method initializes a new instance of <code>RuleBasedCollator</code>
251 * with the specified collation rules. Note that an application normally
252 * obtains an instance of <code>RuleBasedCollator</code> by calling the
253 * <code>getInstance</code> method of <code>Collator</code>. That method
254 * automatically loads the proper set of rules for the desired locale.
256 * @param rules The collation rule string.
258 * @exception ParseException If the rule string contains syntax errors.
260 public RuleBasedCollator(String rules) throws ParseException
262 if (rules.equals(""))
263 throw new ParseException("empty rule set", 0);
265 this.rules = rules;
267 buildCollationVector(parseString(rules));
268 buildPrefixAccess();
272 * This method returns the number of common characters at the beginning
273 * of the string of the two parameters.
275 * @param prefix A string considered as a prefix to test against
276 * the other string.
277 * @param s A string to test the prefix against.
278 * @return The number of common characters.
280 static int findPrefixLength(String prefix, String s)
282 int index;
283 int len = prefix.length();
285 for (index = 0; index < len && index < s.length(); ++index)
287 if (prefix.charAt(index) != s.charAt(index))
288 return index;
292 return index;
296 * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
297 * checks whether it is possible to find an anchor point for the rules to be merged and
298 * then insert them at that precise point.
300 * @param offset Offset in the string containing rules of the beginning of the rules
301 * being merged in.
302 * @param starter Text of the rules being merged.
303 * @param main Repository of all already parsed rules.
304 * @param patch Rules to be merged into the repository.
305 * @throws ParseException if it is impossible to find an anchor point for the new rules.
307 private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
308 throws ParseException
310 int insertion_point = -1;
311 int max_length = 0;
313 /* We must check that no rules conflict with another already present. If it
314 * is the case delete the old rule.
317 /* For the moment good old O(N^2) algorithm.
319 for (int i = 0; i < patch.size(); i++)
321 int j = 0;
323 while (j < main.size())
325 CollationSorter rule1 = (CollationSorter) patch.get(i);
326 CollationSorter rule2 = (CollationSorter) main.get(j);
328 if (rule1.textElement.equals(rule2.textElement))
329 main.remove(j);
330 else
331 j++;
335 // Find the insertion point... O(N)
336 for (int i = 0; i < main.size(); i++)
338 CollationSorter sorter = (CollationSorter) main.get(i);
339 int length = findPrefixLength(starter, sorter.textElement);
341 if (length > max_length)
343 max_length = length;
344 insertion_point = i+1;
348 if (insertion_point < 0)
349 throw new ParseException("no insertion point found for " + starter, offset);
351 if (max_length < starter.length())
354 * We need to expand the first entry. It must be sorted
355 * like if it was the reference key itself (like the spec
356 * said. So the first entry is special: the element is
357 * replaced by the specified text element for the sorting.
358 * This text replace the old one for comparisons. However
359 * to preserve the behaviour we replace the first key (corresponding
360 * to the found prefix) by a new code rightly ordered in the
361 * sequence. The rest of the subsequence must be appended
362 * to the end of the sequence.
364 CollationSorter sorter = (CollationSorter) patch.get(0);
365 CollationSorter expansionPrefix =
366 (CollationSorter) main.get(insertion_point-1);
368 sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
370 main.add(insertion_point, sorter);
373 * This is a new set of rules. Append to the list.
375 patch.remove(0);
376 insertion_point++;
379 // Now insert all elements of patch at the insertion point.
380 for (int i = 0; i < patch.size(); i++)
381 main.add(i+insertion_point, patch.get(i));
385 * This method parses a string and build a set of sorting instructions. The parsing
386 * may only be partial on the case the rules are to be merged sometime later.
388 * @param stop_on_reset If this parameter is true then the parser stops when it
389 * encounters a reset instruction. In the other case, it tries to parse the subrules
390 * and merged it in the same repository.
391 * @param v Output vector for the set of instructions.
392 * @param base_offset Offset in the string to begin parsing.
393 * @param rules Rules to be parsed.
394 * @return -1 if the parser reached the end of the string, an integer representing the
395 * offset in the string at which it stopped parsing.
396 * @throws ParseException if something turned wrong during the parsing. To get details
397 * decode the message.
399 private int subParseString(boolean stop_on_reset, ArrayList v,
400 int base_offset, String rules)
401 throws ParseException
403 boolean ignoreChars = (base_offset == 0);
404 int operator = -1;
405 StringBuffer sb = new StringBuffer();
406 boolean doubleQuote = false;
407 boolean eatingChars = false;
408 boolean nextIsModifier = false;
409 boolean isModifier = false;
410 int i;
412 main_parse_loop:
413 for (i = 0; i < rules.length(); i++)
415 char c = rules.charAt(i);
416 int type = -1;
418 if (!eatingChars &&
419 ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
420 continue;
422 isModifier = nextIsModifier;
423 nextIsModifier = false;
425 if (eatingChars && c != '\'')
427 doubleQuote = false;
428 sb.append(c);
429 continue;
431 if (doubleQuote && eatingChars)
433 sb.append(c);
434 doubleQuote = false;
435 continue;
438 switch (c)
440 case '!':
441 throw new ParseException
442 ("Modifier '!' is not yet supported by Classpath", i + base_offset);
443 case '<':
444 type = CollationSorter.GREATERP;
445 break;
446 case ';':
447 type = CollationSorter.GREATERS;
448 break;
449 case ',':
450 type = CollationSorter.GREATERT;
451 break;
452 case '=':
453 type = CollationSorter.EQUAL;
454 break;
455 case '\'':
456 eatingChars = !eatingChars;
457 doubleQuote = true;
458 break;
459 case '@':
460 if (ignoreChars)
461 throw new ParseException
462 ("comparison list has not yet been started. You may only use"
463 + "(<,;=&)", i + base_offset);
464 // Inverse the order of secondaries from now on.
465 nextIsModifier = true;
466 type = CollationSorter.INVERSE_SECONDARY;
467 break;
468 case '&':
469 type = CollationSorter.RESET;
470 if (stop_on_reset)
471 break main_parse_loop;
472 break;
473 default:
474 if (operator < 0)
475 throw new ParseException
476 ("operator missing at " + (i + base_offset), i + base_offset);
477 if (! eatingChars
478 && ((c >= 0x21 && c <= 0x2F)
479 || (c >= 0x3A && c <= 0x40)
480 || (c >= 0x5B && c <= 0x60)
481 || (c >= 0x7B && c <= 0x7E)))
482 throw new ParseException
483 ("unquoted punctuation character '" + c + "'", i + base_offset);
485 //type = ignoreChars ? CollationSorter.IGNORE : -1;
486 sb.append(c);
487 break;
490 if (type < 0)
491 continue;
493 if (operator < 0)
495 operator = type;
496 continue;
499 if (sb.length() == 0 && !isModifier)
500 throw new ParseException
501 ("text element empty at " + (i+base_offset), i+base_offset);
503 if (operator == CollationSorter.RESET)
505 /* Reposition in the sorting list at the position
506 * indicated by the text element.
508 String subrules = rules.substring(i);
509 ArrayList sorted_rules = new ArrayList();
510 int idx;
512 // Parse the subrules but do not iterate through all
513 // sublist. This is the priviledge of the first call.
514 idx = subParseString(true, sorted_rules, base_offset+i, subrules);
516 // Merge new parsed rules into the list.
517 mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
518 sb.setLength(0);
520 // Reset state to none.
521 operator = -1;
522 type = -1;
523 // We have found a new subrule at 'idx' but it has not been parsed.
524 if (idx >= 0)
526 i += idx-1;
527 continue main_parse_loop;
529 else
530 // No more rules.
531 break main_parse_loop;
534 CollationSorter sorter = new CollationSorter();
536 if (operator == CollationSorter.GREATERP)
537 ignoreChars = false;
539 sorter.comparisonType = operator;
540 sorter.textElement = sb.toString();
541 sorter.hashText = sorter.textElement.hashCode();
542 sorter.offset = base_offset+rules.length();
543 sorter.ignore = ignoreChars;
544 sb.setLength(0);
546 v.add(sorter);
547 operator = type;
550 if (operator >= 0)
552 CollationSorter sorter = new CollationSorter();
553 int pos = rules.length() + base_offset;
555 if ((sb.length() != 0 && nextIsModifier)
556 || (sb.length() == 0 && !nextIsModifier && !eatingChars))
557 throw new ParseException("text element empty at " + pos, pos);
559 if (operator == CollationSorter.GREATERP)
560 ignoreChars = false;
562 sorter.comparisonType = operator;
563 sorter.textElement = sb.toString();
564 sorter.hashText = sorter.textElement.hashCode();
565 sorter.offset = base_offset+pos;
566 sorter.ignore = ignoreChars;
567 v.add(sorter);
570 if (i == rules.length())
571 return -1;
572 else
573 return i;
577 * This method creates a copy of this object.
579 * @return A copy of this object.
581 public Object clone()
583 return super.clone();
587 * This method completely parses a string 'rules' containing sorting rules.
589 * @param rules String containing the rules to be parsed.
590 * @return A set of sorting instructions stored in a Vector.
591 * @throws ParseException if something turned wrong during the parsing. To get details
592 * decode the message.
594 private ArrayList parseString(String rules)
595 throws ParseException
597 ArrayList v = new ArrayList();
599 // result of the first subParseString is not absolute (may be -1 or a
600 // positive integer). But we do not care.
601 subParseString(false, v, 0, rules);
603 return v;
607 * This method uses the sorting instructions built by {@link #parseString}
608 * to build collation elements which can be directly used to sort strings.
610 * @param parsedElements Parsed instructions stored in a ArrayList.
611 * @throws ParseException if the order of the instructions are not valid.
613 private void buildCollationVector(ArrayList parsedElements)
614 throws ParseException
616 int primary_seq = 0;
617 int last_tertiary_seq = 0;
618 short secondary_seq = 0;
619 short tertiary_seq = 0;
620 short equality_seq = 0;
621 boolean inverseComparisons = false;
622 final boolean DECREASING = false;
623 final boolean INCREASING = true;
624 boolean secondaryType = INCREASING;
625 ArrayList v = new ArrayList();
627 // elts is completely sorted.
628 element_loop:
629 for (int i = 0; i < parsedElements.size(); i++)
631 CollationSorter elt = (CollationSorter) parsedElements.get(i);
632 boolean ignoreChar = false;
634 switch (elt.comparisonType)
636 case CollationSorter.GREATERP:
637 primary_seq++;
638 if (inverseComparisons)
640 secondary_seq = Short.MAX_VALUE;
641 secondaryType = DECREASING;
643 else
645 secondary_seq = 0;
646 secondaryType = INCREASING;
648 tertiary_seq = 0;
649 equality_seq = 0;
650 inverseComparisons = false;
651 break;
652 case CollationSorter.GREATERS:
653 if (secondaryType == DECREASING)
654 secondary_seq--;
655 else
656 secondary_seq++;
657 tertiary_seq = 0;
658 equality_seq = 0;
659 break;
660 case CollationSorter.INVERSE_SECONDARY:
661 inverseComparisons = true;
662 continue element_loop;
663 case CollationSorter.GREATERT:
664 tertiary_seq++;
665 if (primary_seq == 0)
666 last_tertiary_seq = tertiary_seq;
667 equality_seq = 0;
668 break;
669 case CollationSorter.EQUAL:
670 equality_seq++;
671 break;
672 case CollationSorter.RESET:
673 throw new ParseException
674 ("Invalid reached state 'RESET'. Internal error", elt.offset);
675 default:
676 throw new ParseException
677 ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
680 v.add(new CollationElement(elt.textElement, primary_seq,
681 secondary_seq, tertiary_seq,
682 equality_seq, elt.expansionOrdering, elt.ignore));
685 this.inverseAccentComparison = inverseComparisons;
687 ce_table = v.toArray();
689 last_primary_value = primary_seq+1;
690 last_tertiary_value = last_tertiary_seq+1;
694 * Build a tree where all keys are the texts of collation elements and data is
695 * the collation element itself. The tree is used when extracting all prefix
696 * for a given text.
698 private void buildPrefixAccess()
700 prefix_tree = new HashMap();
702 for (int i = 0; i < ce_table.length; i++)
704 CollationElement e = (CollationElement) ce_table[i];
706 prefix_tree.put(e.key, e);
711 * This method returns an integer which indicates whether the first
712 * specified <code>String</code> is less than, greater than, or equal to
713 * the second. The value depends not only on the collation rules in
714 * effect, but also the strength and decomposition settings of this object.
716 * @param source The first <code>String</code> to compare.
717 * @param target A second <code>String</code> to compare to the first.
719 * @return A negative integer if source &lt; target, a positive integer
720 * if source &gt; target, or 0 if source == target.
722 public int compare(String source, String target)
724 CollationElementIterator cs, ct;
725 CollationElement ord1block = null;
726 CollationElement ord2block = null;
727 boolean advance_block_1 = true;
728 boolean advance_block_2 = true;
730 cs = getCollationElementIterator(source);
731 ct = getCollationElementIterator(target);
733 for(;;)
735 int ord1;
736 int ord2;
739 * We have to check whether the characters are ignorable.
740 * If it is the case then forget them.
742 if (advance_block_1)
744 ord1block = cs.nextBlock();
745 if (ord1block != null && ord1block.ignore)
746 continue;
749 if (advance_block_2)
751 ord2block = ct.nextBlock();
752 if (ord2block != null && ord2block.ignore)
754 advance_block_1 = false;
755 continue;
758 else
759 advance_block_2 = true;
761 if (!advance_block_1)
762 advance_block_1 = true;
764 if (ord1block != null)
765 ord1 = ord1block.getValue();
766 else
768 if (ord2block == null)
769 return 0;
770 return -1;
773 if (ord2block == null)
774 return 1;
776 ord2 = ord2block.getValue();
778 // We know chars are totally equal, so skip
779 if (ord1 == ord2)
781 if (getStrength() == IDENTICAL)
782 if (!ord1block.key.equals(ord2block.key))
783 return ord1block.key.compareTo(ord2block.key);
784 continue;
787 // Check for primary strength differences
788 int prim1 = CollationElementIterator.primaryOrder(ord1);
789 int prim2 = CollationElementIterator.primaryOrder(ord2);
791 if (prim1 == 0 && getStrength() < TERTIARY)
793 advance_block_2 = false;
794 continue;
796 else if (prim2 == 0 && getStrength() < TERTIARY)
798 advance_block_1 = false;
799 continue;
802 if (prim1 < prim2)
803 return -1;
804 else if (prim1 > prim2)
805 return 1;
806 else if (getStrength() == PRIMARY)
807 continue;
809 // Check for secondary strength differences
810 int sec1 = CollationElementIterator.secondaryOrder(ord1);
811 int sec2 = CollationElementIterator.secondaryOrder(ord2);
813 if (sec1 < sec2)
814 return -1;
815 else if (sec1 > sec2)
816 return 1;
817 else if (getStrength() == SECONDARY)
818 continue;
820 // Check for tertiary differences
821 int tert1 = CollationElementIterator.tertiaryOrder(ord1);
822 int tert2 = CollationElementIterator.tertiaryOrder(ord2);
824 if (tert1 < tert2)
825 return -1;
826 else if (tert1 > tert2)
827 return 1;
828 else if (getStrength() == TERTIARY)
829 continue;
831 // Apparently JDK does this (at least for my test case).
832 return ord1block.key.compareTo(ord2block.key);
837 * This method tests this object for equality against the specified
838 * object. This will be true if and only if the specified object is
839 * another reference to this object.
841 * @param obj The <code>Object</code> to compare against this object.
843 * @return <code>true</code> if the specified object is equal to this object,
844 * <code>false</code> otherwise.
846 public boolean equals(Object obj)
848 if (obj == this)
849 return true;
850 else
851 return false;
855 * This method builds a default collation element without invoking
856 * the database created from the rules passed to the constructor.
858 * @param c Character which needs a collation element.
859 * @return A valid brand new CollationElement instance.
861 CollationElement getDefaultElement(char c)
863 int v;
865 // Preliminary support for generic accent sorting inversion (I don't know if all
866 // characters in the range should be sorted backward). This is the place
867 // to fix this if needed.
868 if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
869 v = 0x0361 - ((int) c - 0x02B9);
870 else
871 v = (short) c;
872 return new CollationElement("" + c, last_primary_value + v,
873 (short) 0, (short) 0, (short) 0, null, false);
877 * This method builds a default collation element for an accented character
878 * without invoking the database created from the rules passed to the constructor.
880 * @param c Character which needs a collation element.
881 * @return A valid brand new CollationElement instance.
883 CollationElement getDefaultAccentedElement(char c)
885 int v;
887 // Preliminary support for generic accent sorting inversion (I don't know if all
888 // characters in the range should be sorted backward). This is the place
889 // to fix this if needed.
890 if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
891 v = 0x0361 - ((int) c - 0x02B9);
892 else
893 v = (short) c;
894 return new CollationElement("" + c, (short) 0,
895 (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
899 * This method returns an instance for <code>CollationElementIterator</code>
900 * for the specified <code>String</code> under the collation rules for this
901 * object.
903 * @param source The <code>String</code> to return the
904 * <code>CollationElementIterator</code> instance for.
906 * @return A <code>CollationElementIterator</code> for the specified
907 * <code>String</code>.
909 public CollationElementIterator getCollationElementIterator(String source)
911 return new CollationElementIterator(this, source);
915 * This method returns an instance of <code>CollationElementIterator</code>
916 * for the <code>String</code> represented by the specified
917 * <code>CharacterIterator</code>.
919 * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
921 * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
923 public CollationElementIterator getCollationElementIterator(CharacterIterator source)
925 StringBuffer expand = new StringBuffer("");
927 // Right now we assume that we will read from the beginning of the string.
928 for (char c = source.first();
929 c != CharacterIterator.DONE;
930 c = source.next())
931 decomposeCharacter(c, expand);
933 return getCollationElementIterator(expand.toString());
937 * This method returns an instance of <code>CollationKey</code> for the
938 * specified <code>String</code>. The object returned will have a
939 * more efficient mechanism for its comparison function that could
940 * provide speed benefits if multiple comparisons are performed, such
941 * as during a sort.
943 * @param source The <code>String</code> to create a <code>CollationKey</code> for.
945 * @return A <code>CollationKey</code> for the specified <code>String</code>.
947 public CollationKey getCollationKey(String source)
949 CollationElementIterator cei = getCollationElementIterator(source);
950 ArrayList vect = new ArrayList();
952 int ord = cei.next();
953 cei.reset(); //set to start of string
955 while (ord != CollationElementIterator.NULLORDER)
957 // If the primary order is null, it means this is an ignorable
958 // character.
959 if (CollationElementIterator.primaryOrder(ord) == 0)
961 ord = cei.next();
962 continue;
964 switch (getStrength())
966 case PRIMARY:
967 ord = CollationElementIterator.primaryOrder(ord);
968 break;
970 case SECONDARY:
971 ord = CollationElementIterator.primaryOrder(ord) << 8;
972 ord |= CollationElementIterator.secondaryOrder(ord);
974 default:
975 break;
978 vect.add(new Integer(ord));
979 ord = cei.next(); //increment to next key
982 Object[] objarr = vect.toArray();
983 byte[] key = new byte[objarr.length * 4];
985 for (int i = 0; i < objarr.length; i++)
987 int j = ((Integer) objarr[i]).intValue();
988 key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
989 key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
990 key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
991 key [i * 4 + 3] = (byte) (j & 0x000000FF);
994 return new CollationKey(this, source, key);
998 * This method returns a <code>String</code> containing the collation rules
999 * for this object.
1001 * @return The collation rules for this object.
1003 public String getRules()
1005 return rules;
1009 * This method returns a hash value for this object.
1011 * @return A hash value for this object.
1013 public int hashCode()
1015 return System.identityHashCode(this);