Merge from mainline.
[official-gcc.git] / libjava / classpath / java / text / Bidi.java
blob43743604570d11f1682e331e699429c384cdef48
1 /* Bidi.java -- Bidirectional Algorithm implementation
2 Copyright (C) 2005, 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.text;
41 import java.awt.font.NumericShaper;
42 import java.awt.font.TextAttribute;
43 import java.util.ArrayList;
46 /**
47 * Bidirectional Algorithm implementation.
49 * The full algorithm is
50 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
51 * Annex #9: The Bidirectional Algorithm</a>.
53 * @since 1.4
55 public final class Bidi
57 /**
58 * This indicates that a strongly directional character in the text should
59 * set the initial direction, but if no such character is found, then the
60 * initial direction will be left-to-right.
62 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
64 /**
65 * This indicates that a strongly directional character in the text should
66 * set the initial direction, but if no such character is found, then the
67 * initial direction will be right-to-left.
69 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
71 /**
72 * This indicates that the initial direction should be left-to-right.
74 public static final int DIRECTION_LEFT_TO_RIGHT = 0;
76 /**
77 * This indicates that the initial direction should be right-to-left.
79 public static final int DIRECTION_RIGHT_TO_LEFT = 1;
81 // Flags used when computing the result.
82 private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
83 private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
85 // The text we are examining, and the starting offset.
86 // If we had a better way to handle createLineBidi, we wouldn't
87 // need this at all -- which for the String case would be an
88 // efficiency win.
89 private char[] text;
90 private int textOffset;
91 // The embeddings corresponding to the text, and the starting offset.
92 private byte[] embeddings;
93 private int embeddingOffset;
94 // The length of the text (and embeddings) to use.
95 private int length;
96 // The flags.
97 private int flags;
99 // All instance fields following this point are initialized
100 // during analysis. Fields before this must be set by the constructor.
102 // The initial embedding level.
103 private int baseEmbedding;
104 // The type of each character in the text.
105 private byte[] types;
106 // The levels we compute.
107 private byte[] levels;
109 // A list of indices where a formatting code was found. These
110 // are indicies into the original text -- not into the text after
111 // the codes have been removed.
112 private ArrayList formatterIndices;
114 // Indices of the starts of runs in the text.
115 private int[] runs;
117 // A convenience field where we keep track of what kinds of runs
118 // we've seen.
119 private int resultFlags;
122 * Create a new Bidi object given an attributed character iterator.
123 * This constructor will examine various attributes of the text:
124 * <ul>
125 * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126 * paragraph's base embedding level. This constructor will recognize
127 * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128 * {@link TextAttribute#RUN_DIRECTION_RTL}. If neither is given,
129 * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130 * </li>
132 * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133 * shaping will be done before the Bidi algorithm is run.
134 * </li>
136 * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137 * character, then the value of this attribute will be used as an
138 * embedding level override.
139 * </li>
140 * </ul>
141 * @param iter the attributed character iterator to use
143 public Bidi(AttributedCharacterIterator iter)
145 // If set, this attribute should be set on all characters.
146 // We don't check this (should we?) but we do assume that we
147 // can simply examine the first character.
148 Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149 if (val == TextAttribute.RUN_DIRECTION_LTR)
150 this.flags = DIRECTION_LEFT_TO_RIGHT;
151 else if (val == TextAttribute.RUN_DIRECTION_RTL)
152 this.flags = DIRECTION_RIGHT_TO_LEFT;
153 else
154 this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
156 // Likewise this attribute should be specified on the whole text.
157 // We read it here and then, if it is set, we apply the numeric shaper
158 // to the text before processing it.
159 NumericShaper shaper = null;
160 val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161 if (val instanceof NumericShaper)
162 shaper = (NumericShaper) val;
164 char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165 this.embeddings = new byte[this.text.length];
166 this.embeddingOffset = 0;
167 this.length = text.length;
168 for (int i = 0; i < this.text.length; ++i)
170 this.text[i] = iter.current();
172 val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173 if (val instanceof Integer)
175 int ival = ((Integer) val).intValue();
176 byte bval;
177 if (ival < -62 || ival > 62)
178 bval = 0;
179 else
180 bval = (byte) ival;
181 this.embeddings[i] = bval;
185 // Invoke the numeric shaper, if specified.
186 if (shaper != null)
187 shaper.shape(this.text, 0, this.length);
189 runBidi();
193 * Create a new Bidi object with the indicated text and, possibly, explicit
194 * embedding settings.
196 * If the embeddings array is null, it is ignored. Otherwise it is taken to
197 * be explicit embedding settings corresponding to the text. Positive values
198 * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199 * embedding overrides. (FIXME: not at all clear what this really means.)
201 * @param text the text to use
202 * @param offset the offset of the first character of the text
203 * @param embeddings the explicit embeddings, or null if there are none
204 * @param embedOffset the offset of the first embedding value to use
205 * @param length the length of both the text and the embeddings
206 * @param flags a flag indicating the base embedding direction
208 public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209 int length, int flags)
211 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213 && flags != DIRECTION_LEFT_TO_RIGHT
214 && flags != DIRECTION_RIGHT_TO_LEFT)
215 throw new IllegalArgumentException("unrecognized 'flags' argument: "
216 + flags);
217 this.text = text;
218 this.textOffset = offset;
219 this.embeddings = embeddings;
220 this.embeddingOffset = embedOffset;
221 this.length = length;
222 this.flags = flags;
224 runBidi();
228 * Create a new Bidi object using the contents of the given String
229 * as the text.
230 * @param text the text to use
231 * @param flags a flag indicating the base embedding direction
233 public Bidi(String text, int flags)
235 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237 && flags != DIRECTION_LEFT_TO_RIGHT
238 && flags != DIRECTION_RIGHT_TO_LEFT)
239 throw new IllegalArgumentException("unrecognized 'flags' argument: "
240 + flags);
242 // This is inefficient, but it isn't clear whether it matters.
243 // If it does we can change our implementation a bit to allow either
244 // a String or a char[].
245 this.text = text.toCharArray();
246 this.textOffset = 0;
247 this.embeddings = null;
248 this.embeddingOffset = 0;
249 this.length = text.length();
250 this.flags = flags;
252 runBidi();
256 * Implementation function which computes the initial type of
257 * each character in the input.
259 private void computeTypes()
261 types = new byte[length];
262 for (int i = 0; i < length; ++i)
263 types[i] = Character.getDirectionality(text[textOffset + i]);
267 * An internal function which implements rules P2 and P3.
268 * This computes the base embedding level.
269 * @return the paragraph's base embedding level
271 private int computeParagraphEmbeddingLevel()
273 // First check to see if the user supplied a directionality override.
274 if (flags == DIRECTION_LEFT_TO_RIGHT
275 || flags == DIRECTION_RIGHT_TO_LEFT)
276 return flags;
278 // This implements rules P2 and P3.
279 // (Note that we don't need P1, as the user supplies
280 // a paragraph.)
281 for (int i = 0; i < length; ++i)
283 int dir = types[i];
284 if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285 return DIRECTION_LEFT_TO_RIGHT;
286 if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287 || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288 return DIRECTION_RIGHT_TO_LEFT;
290 return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291 ? DIRECTION_LEFT_TO_RIGHT
292 : DIRECTION_RIGHT_TO_LEFT);
296 * An internal function which implements rules X1 through X9.
297 * This computes the initial levels for the text, handling
298 * explicit overrides and embeddings.
300 private void computeExplicitLevels()
302 levels = new byte[length];
303 byte currentEmbedding = (byte) baseEmbedding;
304 // The directional override is a Character directionality
305 // constant. -1 means there is no override.
306 byte directionalOverride = -1;
307 // The stack of pushed embeddings, and the stack pointer.
308 // Note that because the direction is inherent in the depth,
309 // and because we have a bit left over in a byte, we can encode
310 // the override, if any, directly in this value on the stack.
311 final int MAX_DEPTH = 62;
312 byte[] embeddingStack = new byte[MAX_DEPTH];
313 int sp = 0;
315 for (int i = 0; i < length; ++i)
317 // If we see an explicit embedding, we use that, even if
318 // the current character is itself a directional override.
319 if (embeddings != null && embeddings[embeddingOffset + i] != 0)
321 // It isn't at all clear what we're supposed to do here.
322 // What does a negative value really mean?
323 // Should we push on the embedding stack here?
324 currentEmbedding = embeddings[embeddingOffset + i];
325 if (currentEmbedding < 0)
327 currentEmbedding = (byte) -currentEmbedding;
328 directionalOverride
329 = (((currentEmbedding % 2) == 0)
330 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
333 else
334 directionalOverride = -1;
335 continue;
337 // No explicit embedding.
338 boolean isLtoR = false;
339 boolean isSpecial = true;
340 switch (types[i])
342 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344 isLtoR = true;
345 // Fall through.
346 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
349 byte newEmbedding;
350 if (isLtoR)
352 // Least greater even.
353 newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
355 else
357 // Least greater odd.
358 newEmbedding = (byte) ((currentEmbedding + 1) | 1);
360 // FIXME: we don't properly handle invalid pushes.
361 if (newEmbedding < MAX_DEPTH)
363 // The new level is valid. Push the old value.
364 // See above for a comment on the encoding here.
365 if (directionalOverride != -1)
366 currentEmbedding |= Byte.MIN_VALUE;
367 embeddingStack[sp++] = currentEmbedding;
368 currentEmbedding = newEmbedding;
369 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370 directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371 else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372 directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373 else
374 directionalOverride = -1;
377 break;
378 case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
380 // FIXME: we don't properly handle a pop with a corresponding
381 // invalid push.
382 if (sp == 0)
384 // We saw a pop without a push. Just ignore it.
385 break;
387 byte newEmbedding = embeddingStack[--sp];
388 currentEmbedding = (byte) (newEmbedding & 0x7f);
389 if (newEmbedding < 0)
390 directionalOverride
391 = (((newEmbedding & 1) == 0)
392 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394 else
395 directionalOverride = -1;
397 break;
398 default:
399 isSpecial = false;
400 break;
402 levels[i] = currentEmbedding;
403 if (isSpecial)
405 // Mark this character for removal.
406 if (formatterIndices == null)
407 formatterIndices = new ArrayList();
408 formatterIndices.add(Integer.valueOf(i));
410 else if (directionalOverride != -1)
411 types[i] = directionalOverride;
414 // Remove the formatting codes and update both the arrays
415 // and 'length'. It would be more efficient not to remove
416 // these codes, but it is also more complicated. Also, the
417 // Unicode algorithm reference does not properly describe
418 // how this is to be done -- from what I can tell, their suggestions
419 // in this area will not yield the correct results.
420 if (formatterIndices == null)
421 return;
422 int output = 0, input = 0;
423 final int size = formatterIndices.size();
424 for (int i = 0; i <= size; ++i)
426 int nextFmt;
427 if (i == size)
428 nextFmt = length;
429 else
430 nextFmt = ((Integer) formatterIndices.get(i)).intValue();
431 // Non-formatter codes are from 'input' to 'nextFmt'.
432 int len = nextFmt - input;
433 System.arraycopy(levels, input, levels, output, len);
434 System.arraycopy(types, input, types, output, len);
435 output += len;
436 input = nextFmt + 1;
438 length -= formatterIndices.size();
442 * An internal function to compute the boundaries of runs
443 * in the text. It isn't strictly necessary to do this, but
444 * it lets us write some following passes in a less complicated
445 * way. Also it lets us efficiently implement some of the public
446 * methods. A run is simply a sequence of characters at the
447 * same level.
449 private void computeRuns()
451 int runCount = 0;
452 int currentEmbedding = baseEmbedding;
453 for (int i = 0; i < length; ++i)
455 if (levels[i] != currentEmbedding)
457 currentEmbedding = levels[i];
458 ++runCount;
462 // This may be called multiple times. If so, and if
463 // the number of runs has not changed, then don't bother
464 // allocating a new array.
465 if (runs == null || runs.length != runCount + 1)
466 runs = new int[runCount + 1];
467 int where = 0;
468 int lastRunStart = 0;
469 currentEmbedding = baseEmbedding;
470 for (int i = 0; i < length; ++i)
472 if (levels[i] != currentEmbedding)
474 runs[where++] = lastRunStart;
475 lastRunStart = i;
476 currentEmbedding = levels[i];
479 runs[where++] = lastRunStart;
483 * An internal method to resolve weak types. This implements
484 * rules W1 through W7.
486 private void resolveWeakTypes()
488 final int runCount = getRunCount();
490 int previousLevel = baseEmbedding;
491 for (int run = 0; run < runCount; ++run)
493 int start = getRunStart(run);
494 int end = getRunLimit(run);
495 int level = getRunLevel(run);
497 // These are the names used in the Bidi algorithm.
498 byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501 int nextLevel;
502 if (run == runCount - 1)
503 nextLevel = baseEmbedding;
504 else
505 nextLevel = getRunLevel(run + 1);
506 byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
510 byte prevType = sor;
511 byte prevStrongType = sor;
512 for (int i = start; i < end; ++i)
514 final byte nextType = (i == end - 1) ? eor : types[i + 1];
516 // Rule W1: change NSM to the prevailing direction.
517 if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518 types[i] = prevType;
519 else
520 prevType = types[i];
522 // Rule W2: change EN to AN in some cases.
523 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
525 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526 types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
528 else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531 prevStrongType = types[i];
533 // Rule W3: change AL to R.
534 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535 types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
537 // Rule W4: handle separators between two numbers.
538 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
541 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543 types[i] = nextType;
545 else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546 && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547 && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548 types[i] = nextType;
550 // Rule W5: change a sequence of european terminators to
551 // european numbers, if they are adjacent to european numbers.
552 // We also include BN characters in this.
553 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
556 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557 types[i] = prevType;
558 else
560 // Look ahead to see if there is an EN terminating this
561 // sequence of ETs.
562 int j = i + 1;
563 while (j < end
564 && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565 || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566 ++j;
567 if (j < end
568 && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
570 // Change them all to EN now.
571 for (int k = i; k < j; ++k)
572 types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
577 // Rule W6: separators and terminators change to ON.
578 // Again we include BN.
579 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583 types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
585 // Rule W7: change european number types.
586 if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588 types[i] = prevStrongType;
591 previousLevel = level;
596 * An internal method to resolve neutral types. This implements
597 * rules N1 and N2.
599 private void resolveNeutralTypes()
601 // This implements rules N1 and N2.
602 final int runCount = getRunCount();
604 int previousLevel = baseEmbedding;
605 for (int run = 0; run < runCount; ++run)
607 int start = getRunStart(run);
608 int end = getRunLimit(run);
609 int level = getRunLevel(run);
611 byte embeddingDirection
612 = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614 // These are the names used in the Bidi algorithm.
615 byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618 int nextLevel;
619 if (run == runCount - 1)
620 nextLevel = baseEmbedding;
621 else
622 nextLevel = getRunLevel(run + 1);
623 byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
627 byte prevStrong = sor;
628 int neutralStart = -1;
629 for (int i = start; i <= end; ++i)
631 byte newStrong = -1;
632 byte thisType = i == end ? eor : types[i];
633 switch (thisType)
635 case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637 break;
638 case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639 case Character.DIRECTIONALITY_ARABIC_NUMBER:
640 case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642 break;
643 case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644 case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645 case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646 case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647 if (neutralStart == -1)
648 neutralStart = i;
649 break;
651 // If we see a strong character, update all the neutrals.
652 if (newStrong != -1)
654 if (neutralStart != -1)
656 byte override = (prevStrong == newStrong
657 ? prevStrong
658 : embeddingDirection);
659 for (int j = neutralStart; j < i; ++j)
660 types[i] = override;
662 prevStrong = newStrong;
663 neutralStart = -1;
667 previousLevel = level;
672 * An internal method to resolve implicit levels.
673 * This implements rules I1 and I2.
675 private void resolveImplicitLevels()
677 // This implements rules I1 and I2.
678 for (int i = 0; i < length; ++i)
680 if ((levels[i] & 1) == 0)
682 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
683 ++levels[i];
684 else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
685 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
686 levels[i] += 2;
688 else
690 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
691 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
692 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
693 ++levels[i];
696 // Update the result flags.
697 resultFlags |= 1 << (levels[i] & 1);
699 // One final update of the result flags, using the base level.
700 resultFlags |= 1 << baseEmbedding;
704 * This reinserts the formatting codes that we removed early on.
705 * Actually it does not insert formatting codes per se, but rather
706 * simply inserts new levels at the appropriate locations in the
707 * 'levels' array.
709 private void reinsertFormattingCodes()
711 if (formatterIndices == null)
712 return;
713 int input = length;
714 int output = levels.length;
715 // Process from the end as we are copying the array over itself here.
716 for (int index = formatterIndices.size() - 1; index >= 0; --index)
718 int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
720 // nextFmt points to a location in the original array. So,
721 // nextFmt+1 is the target of our copying. output is the location
722 // to which we last copied, thus we can derive the length of the
723 // copy from it.
724 int len = output - nextFmt - 1;
725 output = nextFmt;
726 input -= len;
727 // Note that we no longer need 'types' at this point, so we
728 // only edit 'levels'.
729 if (nextFmt + 1 < levels.length)
730 System.arraycopy(levels, input, levels, nextFmt + 1, len);
732 // Now set the level at the reinsertion point.
733 int rightLevel;
734 if (output == levels.length - 1)
735 rightLevel = baseEmbedding;
736 else
737 rightLevel = levels[output + 1];
738 int leftLevel;
739 if (input == 0)
740 leftLevel = baseEmbedding;
741 else
742 leftLevel = levels[input];
743 levels[output] = (byte) Math.max(leftLevel, rightLevel);
745 length = levels.length;
749 * This is the main internal entry point. After a constructor
750 * has initialized the appropriate local state, it will call
751 * this method to do all the work.
753 private void runBidi()
755 computeTypes();
756 baseEmbedding = computeParagraphEmbeddingLevel();
757 computeExplicitLevels();
758 computeRuns();
759 resolveWeakTypes();
760 resolveNeutralTypes();
761 resolveImplicitLevels();
762 // We're done with the types. Let the GC clean up.
763 types = null;
764 reinsertFormattingCodes();
765 // After resolving the implicit levels, the number
766 // of runs may have changed.
767 computeRuns();
771 * Return true if the paragraph base embedding is left-to-right,
772 * false otherwise.
774 public boolean baseIsLeftToRight()
776 return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
780 * Create a new Bidi object for a single line of text, taken
781 * from the text used when creating the current Bidi object.
782 * @param start the index of the first character of the line
783 * @param end the index of the final character of the line
784 * @return a new Bidi object for the indicated line of text
786 public Bidi createLineBidi(int start, int end)
788 // This isn't the most efficient implementation possible.
789 // This probably does not matter, so we choose simplicity instead.
790 int level = getLevelAt(start);
791 int flag = (((level % 2) == 0)
792 ? DIRECTION_LEFT_TO_RIGHT
793 : DIRECTION_RIGHT_TO_LEFT);
794 return new Bidi(text, textOffset + start,
795 embeddings, embeddingOffset + start,
796 end - start, flag);
800 * Return the base embedding level of the paragraph.
802 public int getBaseLevel()
804 return baseEmbedding;
808 * Return the length of the paragraph, in characters.
810 public int getLength()
812 return length;
816 * Return the level at the indicated character. If the
817 * supplied index is less than zero or greater than the length
818 * of the text, then the paragraph's base embedding level will
819 * be returned.
820 * @param offset the character to examine
821 * @return the level of that character
823 public int getLevelAt(int offset)
825 if (offset < 0 || offset >= length)
826 return getBaseLevel();
827 return levels[offset];
831 * Return the number of runs in the result. A run is
832 * a sequence of characters at the same embedding level.
834 public int getRunCount()
836 return runs.length;
840 * Return the level of the indicated run.
841 * @param which the run to examine
842 * @return the level of that run
844 public int getRunLevel(int which)
846 return levels[runs[which]];
850 * Return the index of the character just following the end
851 * of the indicated run.
852 * @param which the run to examine
853 * @return the index of the character after the final character
854 * of the run
856 public int getRunLimit(int which)
858 if (which == runs.length - 1)
859 return length;
860 return runs[which + 1];
864 * Return the index of the first character in the indicated run.
865 * @param which the run to examine
866 * @return the index of the first character of the run
868 public int getRunStart(int which)
870 return runs[which];
874 * Return true if the text is entirely left-to-right, and the
875 * base embedding is also left-to-right.
877 public boolean isLeftToRight()
879 return resultFlags == LTOR;
883 * Return true if the text consists of mixed left-to-right and
884 * right-to-left runs, or if the text consists of one kind of run
885 * which differs from the base embedding direction.
887 public boolean isMixed()
889 return resultFlags == (LTOR | RTOL);
893 * Return true if the text is entirely right-to-left, and the
894 * base embedding is also right-to-left.
896 public boolean isRightToLeft()
898 return resultFlags == RTOL;
902 * Return a String describing the internal state of this object.
903 * This is only useful for debugging.
905 public String toString()
907 return "Bidi Bidi Bidi I like you, Buck!";
911 * Reorder objects according to the levels passed in. This implements
912 * reordering as defined by the Unicode bidirectional layout specification.
913 * The levels are integers from 0 to 62; even numbers represent left-to-right
914 * runs, and odd numbers represent right-to-left runs.
916 * @param levels the levels associated with each object
917 * @param levelOffset the index of the first level to use
918 * @param objs the objects to reorder according to the levels
919 * @param objOffset the index of the first object to use
920 * @param count the number of objects (and levels) to manipulate
922 public static void reorderVisually(byte[] levels, int levelOffset,
923 Object[] objs, int objOffset, int count)
925 // We need a copy of the 'levels' array, as we are going to modify it.
926 // This is unfortunate but difficult to avoid.
927 byte[] levelCopy = new byte[count];
928 // Do this explicitly so we can also find the maximum depth at the
929 // same time.
930 int max = 0;
931 int lowestOdd = 63;
932 for (int i = 0; i < count; ++i)
934 levelCopy[i] = levels[levelOffset + i];
935 max = Math.max(levelCopy[i], max);
936 if (levelCopy[i] % 2 != 0)
937 lowestOdd = Math.min(lowestOdd, levelCopy[i]);
940 // Reverse the runs starting with the deepest.
941 for (int depth = max; depth >= lowestOdd; --depth)
943 int start = 0;
944 while (start < count)
946 // Find the start of a run >= DEPTH.
947 while (start < count && levelCopy[start] < depth)
948 ++start;
949 if (start == count)
950 break;
951 // Find the end of the run.
952 int end = start + 1;
953 while (end < count && levelCopy[end] >= depth)
954 ++end;
956 // Reverse this run.
957 for (int i = 0; i < (end - start) / 2; ++i)
959 byte tmpb = levelCopy[end - i - 1];
960 levelCopy[end - i - 1] = levelCopy[start + i];
961 levelCopy[start + i] = tmpb;
962 Object tmpo = objs[objOffset + end - i - 1];
963 objs[objOffset + end - i - 1] = objs[objOffset + start + i];
964 objs[objOffset + start + i] = tmpo;
967 // Handle the next run.
968 start = end + 1;
974 * Returns false if all characters in the text between start and end
975 * are all left-to-right text. This implementation is just calls
976 * <code>Character.getDirectionality(char)</code> on all characters
977 * and makes sure all characters are either explicitly left-to-right
978 * or neutral in directionality (character types L, EN, ES, ET, AN,
979 * CS, S and WS).
981 public static boolean requiresBidi(char[] text, int start, int end)
983 for (int i = start; i < end; i++)
985 byte dir = Character.getDirectionality(text[i]);
986 if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
987 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
988 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
989 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
990 && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
991 && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
992 && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
993 && dir != Character.DIRECTIONALITY_WHITESPACE)
994 return true;
997 return false;