Import GNU Classpath (20121202).
[official-gcc.git] / libjava / classpath / java / text / Bidi.java
blob236247d5ea0773dcd0413bb41bbcd5d2aeac7f47
1 /* Bidi.java -- Bidirectional Algorithm implementation
2 Copyright (C) 2005, 2006, 2012 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.text;
41 import java.awt.font.NumericShaper;
42 import java.awt.font.TextAttribute;
43 import java.util.ArrayList;
46 /**
47 * Bidirectional Algorithm implementation.
49 * The full algorithm is
50 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
51 * Annex #9: The Bidirectional Algorithm</a>.
53 * @since 1.4
55 public final class Bidi
57 /**
58 * This indicates that a strongly directional character in the text should
59 * set the initial direction, but if no such character is found, then the
60 * initial direction will be left-to-right.
62 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
64 /**
65 * This indicates that a strongly directional character in the text should
66 * set the initial direction, but if no such character is found, then the
67 * initial direction will be right-to-left.
69 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
71 /**
72 * This indicates that the initial direction should be left-to-right.
74 public static final int DIRECTION_LEFT_TO_RIGHT = 0;
76 /**
77 * This indicates that the initial direction should be right-to-left.
79 public static final int DIRECTION_RIGHT_TO_LEFT = 1;
81 // Flags used when computing the result.
82 private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
83 private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
85 // The text we are examining, and the starting offset.
86 // If we had a better way to handle createLineBidi, we wouldn't
87 // need this at all -- which for the String case would be an
88 // efficiency win.
89 private char[] text;
90 private int textOffset;
91 // The embeddings corresponding to the text, and the starting offset.
92 private byte[] embeddings;
93 private int embeddingOffset;
94 // The length of the text (and embeddings) to use.
95 private int length;
96 // The flags.
97 private int flags;
99 // All instance fields following this point are initialized
100 // during analysis. Fields before this must be set by the constructor.
102 // The initial embedding level.
103 private int baseEmbedding;
104 // The type of each character in the text.
105 private byte[] types;
106 // The levels we compute.
107 private byte[] levels;
109 // A list of indices where a formatting code was found. These
110 // are indicies into the original text -- not into the text after
111 // the codes have been removed.
112 private ArrayList<Integer> formatterIndices;
114 // Indices of the starts of runs in the text.
115 private int[] runs;
117 // A convenience field where we keep track of what kinds of runs
118 // we've seen.
119 private int resultFlags;
122 * Create a new Bidi object given an attributed character iterator.
123 * This constructor will examine various attributes of the text:
124 * <ul>
125 * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126 * paragraph's base embedding level. This constructor will recognize
127 * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128 * {@link TextAttribute#RUN_DIRECTION_RTL}. If neither is given,
129 * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130 * </li>
132 * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133 * shaping will be done before the Bidi algorithm is run.
134 * </li>
136 * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137 * character, then the value of this attribute will be used as an
138 * embedding level override.
139 * </li>
140 * </ul>
141 * @param iter the attributed character iterator to use
143 public Bidi(AttributedCharacterIterator iter)
145 // If set, this attribute should be set on all characters.
146 // We don't check this (should we?) but we do assume that we
147 // can simply examine the first character.
148 Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149 if (val == TextAttribute.RUN_DIRECTION_LTR)
150 this.flags = DIRECTION_LEFT_TO_RIGHT;
151 else if (val == TextAttribute.RUN_DIRECTION_RTL)
152 this.flags = DIRECTION_RIGHT_TO_LEFT;
153 else
154 this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
156 // Likewise this attribute should be specified on the whole text.
157 // We read it here and then, if it is set, we apply the numeric shaper
158 // to the text before processing it.
159 NumericShaper shaper = null;
160 val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161 if (val instanceof NumericShaper)
162 shaper = (NumericShaper) val;
164 text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165 embeddings = new byte[text.length];
166 embeddingOffset = 0;
167 length = text.length;
168 for (int i = 0; i < text.length; ++i)
170 text[i] = iter.current();
172 val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173 if (val instanceof Integer)
175 int ival = ((Integer) val).intValue();
176 byte bval;
177 if (ival < -62 || ival > 62)
178 bval = 0;
179 else
180 bval = (byte) ival;
181 embeddings[i] = bval;
185 // Invoke the numeric shaper, if specified.
186 if (shaper != null)
187 shaper.shape(text, 0, length);
189 runBidi();
193 * Create a new Bidi object with the indicated text and, possibly, explicit
194 * embedding settings.
196 * If the embeddings array is null, it is ignored. Otherwise it is taken to
197 * be explicit embedding settings corresponding to the text. Positive values
198 * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199 * embedding overrides. (FIXME: not at all clear what this really means.)
201 * @param text the text to use
202 * @param offset the offset of the first character of the text
203 * @param embeddings the explicit embeddings, or null if there are none
204 * @param embedOffset the offset of the first embedding value to use
205 * @param length the length of both the text and the embeddings
206 * @param flags a flag indicating the base embedding direction
208 public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209 int length, int flags)
211 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213 && flags != DIRECTION_LEFT_TO_RIGHT
214 && flags != DIRECTION_RIGHT_TO_LEFT)
215 throw new IllegalArgumentException("unrecognized 'flags' argument: "
216 + flags);
217 this.text = text;
218 this.textOffset = offset;
219 this.embeddings = embeddings;
220 this.embeddingOffset = embedOffset;
221 this.length = length;
222 this.flags = flags;
224 runBidi();
228 * Create a new Bidi object using the contents of the given String
229 * as the text.
230 * @param text the text to use
231 * @param flags a flag indicating the base embedding direction
233 public Bidi(String text, int flags)
235 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237 && flags != DIRECTION_LEFT_TO_RIGHT
238 && flags != DIRECTION_RIGHT_TO_LEFT)
239 throw new IllegalArgumentException("unrecognized 'flags' argument: "
240 + flags);
242 // This is inefficient, but it isn't clear whether it matters.
243 // If it does we can change our implementation a bit to allow either
244 // a String or a char[].
245 this.text = text.toCharArray();
246 this.textOffset = 0;
247 this.embeddings = null;
248 this.embeddingOffset = 0;
249 this.length = text.length();
250 this.flags = flags;
252 runBidi();
256 * Implementation function which computes the initial type of
257 * each character in the input.
259 private void computeTypes()
261 types = new byte[length];
262 for (int i = 0; i < length; ++i)
263 types[i] = Character.getDirectionality(text[textOffset + i]);
267 * An internal function which implements rules P2 and P3.
268 * This computes the base embedding level.
269 * @return the paragraph's base embedding level
271 private int computeParagraphEmbeddingLevel()
273 // First check to see if the user supplied a directionality override.
274 if (flags == DIRECTION_LEFT_TO_RIGHT
275 || flags == DIRECTION_RIGHT_TO_LEFT)
276 return flags;
278 // This implements rules P2 and P3.
279 // (Note that we don't need P1, as the user supplies
280 // a paragraph.)
281 for (int i = 0; i < length; ++i)
283 int dir = types[i];
284 if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285 return DIRECTION_LEFT_TO_RIGHT;
286 if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287 || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288 return DIRECTION_RIGHT_TO_LEFT;
290 return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291 ? DIRECTION_LEFT_TO_RIGHT
292 : DIRECTION_RIGHT_TO_LEFT);
296 * An internal function which implements rules X1 through X9.
297 * This computes the initial levels for the text, handling
298 * explicit overrides and embeddings.
300 private void computeExplicitLevels()
302 levels = new byte[length];
303 byte currentEmbedding = (byte) baseEmbedding;
304 // The directional override is a Character directionality
305 // constant. -1 means there is no override.
306 byte directionalOverride = -1;
307 // The stack of pushed embeddings, and the stack pointer.
308 // Note that because the direction is inherent in the depth,
309 // and because we have a bit left over in a byte, we can encode
310 // the override, if any, directly in this value on the stack.
311 final int MAX_DEPTH = 62;
312 byte[] embeddingStack = new byte[MAX_DEPTH];
313 int sp = 0;
315 for (int i = 0; i < length; ++i)
317 // If we see an explicit embedding, we use that, even if
318 // the current character is itself a directional override.
319 if (embeddings != null && embeddings[embeddingOffset + i] != 0)
321 // It isn't at all clear what we're supposed to do here.
322 // What does a negative value really mean?
323 // Should we push on the embedding stack here?
324 currentEmbedding = embeddings[embeddingOffset + i];
325 if (currentEmbedding < 0)
327 currentEmbedding = (byte) -currentEmbedding;
328 directionalOverride
329 = (((currentEmbedding % 2) == 0)
330 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
333 else
334 directionalOverride = -1;
335 continue;
337 // No explicit embedding.
338 boolean isLtoR = false;
339 boolean isSpecial = true;
340 switch (types[i])
342 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344 isLtoR = true;
345 // Fall through.
346 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
349 byte newEmbedding;
350 if (isLtoR)
352 // Least greater even.
353 newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
355 else
357 // Least greater odd.
358 newEmbedding = (byte) ((currentEmbedding + 1) | 1);
360 // FIXME: we don't properly handle invalid pushes.
361 if (newEmbedding < MAX_DEPTH)
363 // The new level is valid. Push the old value.
364 // See above for a comment on the encoding here.
365 if (directionalOverride != -1)
366 currentEmbedding |= Byte.MIN_VALUE;
367 embeddingStack[sp++] = currentEmbedding;
368 currentEmbedding = newEmbedding;
369 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370 directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371 else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372 directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373 else
374 directionalOverride = -1;
377 break;
378 case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
380 // FIXME: we don't properly handle a pop with a corresponding
381 // invalid push.
382 if (sp == 0)
384 // We saw a pop without a push. Just ignore it.
385 break;
387 byte newEmbedding = embeddingStack[--sp];
388 currentEmbedding = (byte) (newEmbedding & 0x7f);
389 if (newEmbedding < 0)
390 directionalOverride
391 = (((newEmbedding & 1) == 0)
392 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394 else
395 directionalOverride = -1;
397 break;
398 default:
399 isSpecial = false;
400 break;
402 levels[i] = currentEmbedding;
403 if (isSpecial)
405 // Mark this character for removal.
406 if (formatterIndices == null)
407 formatterIndices = new ArrayList<Integer>();
408 formatterIndices.add(Integer.valueOf(i));
410 else if (directionalOverride != -1)
411 types[i] = directionalOverride;
414 // Remove the formatting codes and update both the arrays
415 // and 'length'. It would be more efficient not to remove
416 // these codes, but it is also more complicated. Also, the
417 // Unicode algorithm reference does not properly describe
418 // how this is to be done -- from what I can tell, their suggestions
419 // in this area will not yield the correct results.
420 if (formatterIndices == null)
421 return;
422 int output = 0, input = 0;
423 final int size = formatterIndices.size();
424 for (int i = 0; i <= size; ++i)
426 int nextFmt;
427 if (i == size)
428 nextFmt = length;
429 else
430 nextFmt = formatterIndices.get(i).intValue();
431 // Non-formatter codes are from 'input' to 'nextFmt'.
432 int len = nextFmt - input;
433 System.arraycopy(levels, input, levels, output, len);
434 System.arraycopy(types, input, types, output, len);
435 output += len;
436 input = nextFmt + 1;
438 length -= formatterIndices.size();
442 * An internal function to compute the boundaries of runs
443 * in the text. It isn't strictly necessary to do this, but
444 * it lets us write some following passes in a less complicated
445 * way. Also it lets us efficiently implement some of the public
446 * methods. A run is simply a sequence of characters at the
447 * same level.
449 private void computeRuns()
451 int runCount = 0;
452 int currentEmbedding = baseEmbedding;
453 for (int i = 0; i < length; ++i)
455 if (levels[i] != currentEmbedding)
457 currentEmbedding = levels[i];
458 ++runCount;
462 // This may be called multiple times. If so, and if
463 // the number of runs has not changed, then don't bother
464 // allocating a new array.
465 if (runs == null || runs.length != runCount + 1)
466 runs = new int[runCount + 1];
467 int where = 0;
468 int lastRunStart = 0;
469 currentEmbedding = baseEmbedding;
470 for (int i = 0; i < length; ++i)
472 if (levels[i] != currentEmbedding)
474 runs[where++] = lastRunStart;
475 lastRunStart = i;
476 currentEmbedding = levels[i];
479 runs[where++] = lastRunStart;
483 * An internal method to resolve weak types. This implements
484 * rules W1 through W7.
486 private void resolveWeakTypes()
488 final int runCount = getRunCount();
490 int previousLevel = baseEmbedding;
491 for (int run = 0; run < runCount; ++run)
493 int start = getRunStart(run);
494 int end = getRunLimit(run);
495 int level = getRunLevel(run);
497 // These are the names used in the Bidi algorithm.
498 byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501 int nextLevel;
502 if (run == runCount - 1)
503 nextLevel = baseEmbedding;
504 else
505 nextLevel = getRunLevel(run + 1);
506 byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
510 byte prevType = sor;
511 byte prevStrongType = sor;
512 for (int i = start; i < end; ++i)
514 final byte nextType = (i == end - 1) ? eor : types[i + 1];
516 // Rule W1: change NSM to the prevailing direction.
517 if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518 types[i] = prevType;
519 else
520 prevType = types[i];
522 // Rule W2: change EN to AN in some cases.
523 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
525 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526 types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
528 else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531 prevStrongType = types[i];
533 // Rule W3: change AL to R.
534 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535 types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
537 // Rule W4: handle separators between two numbers.
538 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
541 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543 types[i] = nextType;
545 else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546 && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547 && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548 types[i] = nextType;
550 // Rule W5: change a sequence of european terminators to
551 // european numbers, if they are adjacent to european numbers.
552 // We also include BN characters in this.
553 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
556 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557 types[i] = prevType;
558 else
560 // Look ahead to see if there is an EN terminating this
561 // sequence of ETs.
562 int j = i + 1;
563 while (j < end
564 && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565 || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566 ++j;
567 if (j < end
568 && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
570 // Change them all to EN now.
571 for (int k = i; k < j; ++k)
572 types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
577 // Rule W6: separators and terminators change to ON.
578 // Again we include BN.
579 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583 types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
585 // Rule W7: change european number types.
586 if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588 types[i] = prevStrongType;
591 previousLevel = level;
596 * An internal method to resolve neutral types. This implements
597 * rules N1 and N2.
599 private void resolveNeutralTypes()
601 // This implements rules N1 and N2.
602 final int runCount = getRunCount();
604 int previousLevel = baseEmbedding;
605 for (int run = 0; run < runCount; ++run)
607 int start = getRunStart(run);
608 int end = getRunLimit(run);
609 int level = getRunLevel(run);
611 byte embeddingDirection
612 = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614 // These are the names used in the Bidi algorithm.
615 byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618 int nextLevel;
619 if (run == runCount - 1)
620 nextLevel = baseEmbedding;
621 else
622 nextLevel = getRunLevel(run + 1);
623 byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
627 byte prevStrong = sor;
628 int neutralStart = -1;
629 for (int i = start; i <= end; ++i)
631 byte newStrong = -1;
632 byte thisType = i == end ? eor : types[i];
633 switch (thisType)
635 case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637 break;
638 case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639 case Character.DIRECTIONALITY_ARABIC_NUMBER:
640 case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642 break;
643 case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644 case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645 case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646 case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647 case Character.DIRECTIONALITY_WHITESPACE:
648 if (neutralStart == -1)
649 neutralStart = i;
650 break;
652 // If we see a strong character, update all the neutrals.
653 if (newStrong != -1)
655 if (neutralStart != -1)
657 byte override = (prevStrong == newStrong
658 ? prevStrong
659 : embeddingDirection);
660 for (int j = neutralStart; j < i; ++j)
661 types[j] = override;
663 prevStrong = newStrong;
664 neutralStart = -1;
668 previousLevel = level;
673 * An internal method to resolve implicit levels.
674 * This implements rules I1 and I2.
676 private void resolveImplicitLevels()
678 // This implements rules I1 and I2.
679 for (int i = 0; i < length; ++i)
681 if ((levels[i] & 1) == 0)
683 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
684 ++levels[i];
685 else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
686 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
687 levels[i] += 2;
689 else
691 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
692 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
693 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
694 ++levels[i];
697 // Update the result flags.
698 resultFlags |= 1 << (levels[i] & 1);
700 // One final update of the result flags, using the base level.
701 resultFlags |= 1 << baseEmbedding;
705 * This reinserts the formatting codes that we removed early on.
706 * Actually it does not insert formatting codes per se, but rather
707 * simply inserts new levels at the appropriate locations in the
708 * 'levels' array.
710 private void reinsertFormattingCodes()
712 if (formatterIndices == null)
713 return;
714 int input = length;
715 int output = levels.length;
716 // Process from the end as we are copying the array over itself here.
717 for (int index = formatterIndices.size() - 1; index >= 0; --index)
719 int nextFmt = formatterIndices.get(index).intValue();
721 // nextFmt points to a location in the original array. So,
722 // nextFmt+1 is the target of our copying. output is the location
723 // to which we last copied, thus we can derive the length of the
724 // copy from it.
725 int len = output - nextFmt - 1;
726 output = nextFmt;
727 input -= len;
728 // Note that we no longer need 'types' at this point, so we
729 // only edit 'levels'.
730 if (nextFmt + 1 < levels.length)
731 System.arraycopy(levels, input, levels, nextFmt + 1, len);
733 // Now set the level at the reinsertion point.
734 int rightLevel;
735 if (output == levels.length - 1)
736 rightLevel = baseEmbedding;
737 else
738 rightLevel = levels[output + 1];
739 int leftLevel;
740 if (input == 0)
741 leftLevel = baseEmbedding;
742 else
743 leftLevel = levels[input];
744 levels[output] = (byte) Math.max(leftLevel, rightLevel);
746 length = levels.length;
750 * This is the main internal entry point. After a constructor
751 * has initialized the appropriate local state, it will call
752 * this method to do all the work.
754 private void runBidi()
756 computeTypes();
757 baseEmbedding = computeParagraphEmbeddingLevel();
758 computeExplicitLevels();
759 computeRuns();
760 resolveWeakTypes();
761 resolveNeutralTypes();
762 resolveImplicitLevels();
763 // We're done with the types. Let the GC clean up.
764 types = null;
765 reinsertFormattingCodes();
766 // After resolving the implicit levels, the number
767 // of runs may have changed.
768 computeRuns();
772 * Return true if the paragraph base embedding is left-to-right,
773 * false otherwise.
775 public boolean baseIsLeftToRight()
777 return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
781 * Create a new Bidi object for a single line of text, taken
782 * from the text used when creating the current Bidi object.
783 * @param start the index of the first character of the line
784 * @param end the index of the final character of the line
785 * @return a new Bidi object for the indicated line of text
787 public Bidi createLineBidi(int start, int end)
789 // This isn't the most efficient implementation possible.
790 // This probably does not matter, so we choose simplicity instead.
791 int level = getLevelAt(start);
792 int flag = (((level % 2) == 0)
793 ? DIRECTION_LEFT_TO_RIGHT
794 : DIRECTION_RIGHT_TO_LEFT);
795 return new Bidi(text, textOffset + start,
796 embeddings, embeddingOffset + start,
797 end - start, flag);
801 * Return the base embedding level of the paragraph.
803 public int getBaseLevel()
805 return baseEmbedding;
809 * Return the length of the paragraph, in characters.
811 public int getLength()
813 return length;
817 * Return the level at the indicated character. If the
818 * supplied index is less than zero or greater than the length
819 * of the text, then the paragraph's base embedding level will
820 * be returned.
821 * @param offset the character to examine
822 * @return the level of that character
824 public int getLevelAt(int offset)
826 if (offset < 0 || offset >= length)
827 return getBaseLevel();
828 return levels[offset];
832 * Return the number of runs in the result. A run is
833 * a sequence of characters at the same embedding level.
835 public int getRunCount()
837 return runs.length;
841 * Return the level of the indicated run.
842 * @param which the run to examine
843 * @return the level of that run
845 public int getRunLevel(int which)
847 return levels[runs[which]];
851 * Return the index of the character just following the end
852 * of the indicated run.
853 * @param which the run to examine
854 * @return the index of the character after the final character
855 * of the run
857 public int getRunLimit(int which)
859 if (which == runs.length - 1)
860 return length;
861 return runs[which + 1];
865 * Return the index of the first character in the indicated run.
866 * @param which the run to examine
867 * @return the index of the first character of the run
869 public int getRunStart(int which)
871 return runs[which];
875 * Return true if the text is entirely left-to-right, and the
876 * base embedding is also left-to-right.
878 public boolean isLeftToRight()
880 return resultFlags == LTOR;
884 * Return true if the text consists of mixed left-to-right and
885 * right-to-left runs, or if the text consists of one kind of run
886 * which differs from the base embedding direction.
888 public boolean isMixed()
890 return resultFlags == (LTOR | RTOL);
894 * Return true if the text is entirely right-to-left, and the
895 * base embedding is also right-to-left.
897 public boolean isRightToLeft()
899 return resultFlags == RTOL;
903 * Return a String describing the internal state of this object.
904 * This is only useful for debugging.
906 public String toString()
908 return "Bidi Bidi Bidi I like you, Buck!";
912 * Reorder objects according to the levels passed in. This implements
913 * reordering as defined by the Unicode bidirectional layout specification.
914 * The levels are integers from 0 to 62; even numbers represent left-to-right
915 * runs, and odd numbers represent right-to-left runs.
917 * @param levels the levels associated with each object
918 * @param levelOffset the index of the first level to use
919 * @param objs the objects to reorder according to the levels
920 * @param objOffset the index of the first object to use
921 * @param count the number of objects (and levels) to manipulate
923 public static void reorderVisually(byte[] levels, int levelOffset,
924 Object[] objs, int objOffset, int count)
926 // We need a copy of the 'levels' array, as we are going to modify it.
927 // This is unfortunate but difficult to avoid.
928 byte[] levelCopy = new byte[count];
929 // Do this explicitly so we can also find the maximum depth at the
930 // same time.
931 int max = 0;
932 int lowestOdd = 63;
933 for (int i = 0; i < count; ++i)
935 levelCopy[i] = levels[levelOffset + i];
936 max = Math.max(levelCopy[i], max);
937 if (levelCopy[i] % 2 != 0)
938 lowestOdd = Math.min(lowestOdd, levelCopy[i]);
941 // Reverse the runs starting with the deepest.
942 for (int depth = max; depth >= lowestOdd; --depth)
944 int start = 0;
945 while (start < count)
947 // Find the start of a run >= DEPTH.
948 while (start < count && levelCopy[start] < depth)
949 ++start;
950 if (start == count)
951 break;
952 // Find the end of the run.
953 int end = start + 1;
954 while (end < count && levelCopy[end] >= depth)
955 ++end;
957 // Reverse this run.
958 for (int i = 0; i < (end - start) / 2; ++i)
960 byte tmpb = levelCopy[end - i - 1];
961 levelCopy[end - i - 1] = levelCopy[start + i];
962 levelCopy[start + i] = tmpb;
963 Object tmpo = objs[objOffset + end - i - 1];
964 objs[objOffset + end - i - 1] = objs[objOffset + start + i];
965 objs[objOffset + start + i] = tmpo;
968 // Handle the next run.
969 start = end + 1;
975 * Returns false if all characters in the text between start and end
976 * are all left-to-right text. This implementation is just calls
977 * <code>Character.getDirectionality(char)</code> on all characters
978 * and makes sure all characters are either explicitly left-to-right
979 * or neutral in directionality (character types L, EN, ES, ET, AN,
980 * CS, S and WS).
982 public static boolean requiresBidi(char[] text, int start, int end)
984 for (int i = start; i < end; i++)
986 byte dir = Character.getDirectionality(text[i]);
987 if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
988 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
989 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
990 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
991 && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
992 && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
993 && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
994 && dir != Character.DIRECTIONALITY_WHITESPACE
995 && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
996 return true;
999 return false;