1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 as per UAX#9, a part of the Unicode Standard.
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
58 #include "character.h"
59 #include "dispextern.h"
61 static int bidi_initialized
= 0;
63 static Lisp_Object bidi_type_table
, bidi_mirror_table
;
65 /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
66 #define LRM_CHAR 0x200E
67 #define RLM_CHAR 0x200F
68 #define LRE_CHAR 0x202A
69 #define RLE_CHAR 0x202B
70 #define PDF_CHAR 0x202C
71 #define LRO_CHAR 0x202D
72 #define RLO_CHAR 0x202E
76 /* Local data structures. (Look in dispextern.h for the rest.) */
78 /* What we need to know about the current paragraph. */
79 struct bidi_paragraph_info
{
80 EMACS_INT start_bytepos
; /* byte position where it begins */
81 EMACS_INT end_bytepos
; /* byte position where it ends */
82 int embedding_level
; /* its basic embedding level */
83 bidi_dir_t base_dir
; /* its base direction */
86 /* Data type for describing the bidirectional character categories. */
94 int bidi_ignore_explicit_marks_for_paragraph_level
= 1;
96 static Lisp_Object paragraph_start_re
, paragraph_separate_re
;
97 static Lisp_Object Qparagraph_start
, Qparagraph_separate
;
100 bidi_initialize (void)
103 #include "biditype.h"
104 #include "bidimirror.h"
108 bidi_type_table
= Fmake_char_table (Qnil
, make_number (STRONG_L
));
109 staticpro (&bidi_type_table
);
111 for (i
= 0; i
< sizeof bidi_type
/ sizeof bidi_type
[0]; i
++)
112 char_table_set_range (bidi_type_table
, bidi_type
[i
].from
, bidi_type
[i
].to
,
113 make_number (bidi_type
[i
].type
));
115 bidi_mirror_table
= Fmake_char_table (Qnil
, Qnil
);
116 staticpro (&bidi_mirror_table
);
118 for (i
= 0; i
< sizeof bidi_mirror
/ sizeof bidi_mirror
[0]; i
++)
119 char_table_set (bidi_mirror_table
, bidi_mirror
[i
].from
,
120 make_number (bidi_mirror
[i
].to
));
122 Qparagraph_start
= intern ("paragraph-start");
123 staticpro (&Qparagraph_start
);
124 paragraph_start_re
= Fsymbol_value (Qparagraph_start
);
125 if (!STRINGP (paragraph_start_re
))
126 paragraph_start_re
= build_string ("\f\\|[ \t]*$");
127 staticpro (¶graph_start_re
);
128 Qparagraph_separate
= intern ("paragraph-separate");
129 staticpro (&Qparagraph_separate
);
130 paragraph_separate_re
= Fsymbol_value (Qparagraph_separate
);
131 if (!STRINGP (paragraph_separate_re
))
132 paragraph_separate_re
= build_string ("[ \t\f]*$");
133 staticpro (¶graph_separate_re
);
134 bidi_initialized
= 1;
137 /* Return the bidi type of a character CH, subject to the current
138 directional OVERRIDE. */
139 static INLINE bidi_type_t
140 bidi_get_type (int ch
, bidi_dir_t override
)
142 bidi_type_t default_type
;
146 if (ch
< 0 || ch
> MAX_CHAR
)
149 default_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
151 if (override
== NEUTRAL_DIR
)
154 switch (default_type
)
156 /* Although UAX#9 does not tell, it doesn't make sense to
157 override NEUTRAL_B and LRM/RLM characters. */
172 if (override
== L2R
) /* X6 */
174 else if (override
== R2L
)
177 abort (); /* can't happen: handled above */
183 bidi_check_type (bidi_type_t type
)
185 if (type
< UNKNOWN_BT
|| type
> NEUTRAL_ON
)
189 /* Given a bidi TYPE of a character, return its category. */
190 static INLINE bidi_category_t
191 bidi_get_category (bidi_type_t type
)
205 case PDF
: /* ??? really?? */
224 /* Return the mirrored character of C, if it has one. If C has no
225 mirrored counterpart, return C.
226 Note: The conditions in UAX#9 clause L4 regarding the surrounding
227 context must be tested by the caller. */
229 bidi_mirror_char (int c
)
235 if (c
< 0 || c
> MAX_CHAR
)
238 val
= CHAR_TABLE_REF (bidi_mirror_table
, c
);
243 if (v
< 0 || v
> MAX_CHAR
)
252 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
253 copies the part of the level stack that is actually in use. */
255 bidi_copy_it (struct bidi_it
*to
, struct bidi_it
*from
)
259 /* Copy everything except the level stack and beyond. */
260 memcpy (to
, from
, ((size_t)&((struct bidi_it
*)0)->level_stack
[0]));
262 /* Copy the active part of the level stack. */
263 to
->level_stack
[0] = from
->level_stack
[0]; /* level zero is always in use */
264 for (i
= 1; i
<= from
->stack_idx
; i
++)
265 to
->level_stack
[i
] = from
->level_stack
[i
];
268 /* Caching the bidi iterator states. */
270 #define BIDI_CACHE_CHUNK 200
271 static struct bidi_it
*bidi_cache
;
272 static size_t bidi_cache_size
= 0;
273 static size_t elsz
= sizeof (struct bidi_it
);
274 static int bidi_cache_idx
; /* next unused cache slot */
275 static int bidi_cache_last_idx
; /* slot of last cache hit */
278 bidi_cache_reset (void)
281 bidi_cache_last_idx
= -1;
285 bidi_cache_shrink (void)
287 if (bidi_cache_size
> BIDI_CACHE_CHUNK
)
289 bidi_cache_size
= BIDI_CACHE_CHUNK
;
291 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
297 bidi_cache_fetch_state (int idx
, struct bidi_it
*bidi_it
)
299 int current_scan_dir
= bidi_it
->scan_dir
;
301 if (idx
< 0 || idx
>= bidi_cache_idx
)
304 bidi_copy_it (bidi_it
, &bidi_cache
[idx
]);
305 bidi_it
->scan_dir
= current_scan_dir
;
306 bidi_cache_last_idx
= idx
;
309 /* Find a cached state with a given CHARPOS and resolved embedding
310 level less or equal to LEVEL. if LEVEL is -1, disregard the
311 resolved levels in cached states. DIR, if non-zero, means search
312 in that direction from the last cache hit. */
314 bidi_cache_search (EMACS_INT charpos
, int level
, int dir
)
320 if (charpos
< bidi_cache
[bidi_cache_last_idx
].charpos
)
322 else if (charpos
> bidi_cache
[bidi_cache_last_idx
].charpos
)
325 i_start
= bidi_cache_last_idx
;
329 i_start
= bidi_cache_idx
- 1;
334 /* Linear search for now; FIXME! */
335 for (i
= i_start
; i
>= 0; i
--)
336 if (bidi_cache
[i
].charpos
== charpos
337 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
342 for (i
= i_start
; i
< bidi_cache_idx
; i
++)
343 if (bidi_cache
[i
].charpos
== charpos
344 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
352 /* Find a cached state where the resolved level changes to a value
353 that is lower than LEVEL, and return its cache slot index. DIR is
354 the direction to search, starting with the last used cache slot.
355 BEFORE, if non-zero, means return the index of the slot that is
356 ``before'' the level change in the search direction. That is,
357 given the cached levels like this:
362 and assuming we are at the position cached at the slot marked with
363 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
364 index of slot B or A, depending whether BEFORE is, respectively,
367 bidi_cache_find_level_change (int level
, int dir
, int before
)
371 int i
= dir
? bidi_cache_last_idx
: bidi_cache_idx
- 1;
372 int incr
= before
? 1 : 0;
383 if (bidi_cache
[i
- incr
].resolved_level
>= 0
384 && bidi_cache
[i
- incr
].resolved_level
< level
)
391 while (i
< bidi_cache_idx
- incr
)
393 if (bidi_cache
[i
+ incr
].resolved_level
>= 0
394 && bidi_cache
[i
+ incr
].resolved_level
< level
)
405 bidi_cache_iterator_state (struct bidi_it
*bidi_it
, int resolved
)
409 /* We should never cache on backward scans. */
410 if (bidi_it
->scan_dir
== -1)
412 idx
= bidi_cache_search (bidi_it
->charpos
, -1, 1);
416 idx
= bidi_cache_idx
;
417 /* Enlarge the cache as needed. */
418 if (idx
>= bidi_cache_size
)
420 bidi_cache_size
+= BIDI_CACHE_CHUNK
;
422 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
424 /* Character positions should correspond to cache positions 1:1.
425 If we are outside the range of cached positions, the cache is
426 useless and must be reset. */
428 (bidi_it
->charpos
> bidi_cache
[idx
- 1].charpos
+ 1
429 || bidi_it
->charpos
< bidi_cache
[0].charpos
))
434 bidi_copy_it (&bidi_cache
[idx
], bidi_it
);
436 bidi_cache
[idx
].resolved_level
= -1;
440 /* Copy only the members which could have changed, to avoid
441 costly copying of the entire struct. */
442 bidi_cache
[idx
].type
= bidi_it
->type
;
443 bidi_check_type (bidi_it
->type
);
444 bidi_cache
[idx
].type_after_w1
= bidi_it
->type_after_w1
;
445 bidi_check_type (bidi_it
->type_after_w1
);
447 bidi_cache
[idx
].resolved_level
= bidi_it
->resolved_level
;
449 bidi_cache
[idx
].resolved_level
= -1;
450 bidi_cache
[idx
].invalid_levels
= bidi_it
->invalid_levels
;
451 bidi_cache
[idx
].invalid_rl_levels
= bidi_it
->invalid_rl_levels
;
452 bidi_cache
[idx
].next_for_neutral
= bidi_it
->next_for_neutral
;
453 bidi_cache
[idx
].next_for_ws
= bidi_it
->next_for_ws
;
454 bidi_cache
[idx
].ignore_bn_limit
= bidi_it
->ignore_bn_limit
;
457 bidi_cache_last_idx
= idx
;
458 if (idx
>= bidi_cache_idx
)
459 bidi_cache_idx
= idx
+ 1;
462 static INLINE bidi_type_t
463 bidi_cache_find (EMACS_INT charpos
, int level
, struct bidi_it
*bidi_it
)
465 int i
= bidi_cache_search (charpos
, level
, bidi_it
->scan_dir
);
469 bidi_dir_t current_scan_dir
= bidi_it
->scan_dir
;
471 bidi_copy_it (bidi_it
, &bidi_cache
[i
]);
472 bidi_cache_last_idx
= i
;
473 /* Don't let scan direction from from the cached state override
474 the current scan direction. */
475 bidi_it
->scan_dir
= current_scan_dir
;
476 return bidi_it
->type
;
483 bidi_peek_at_next_level (struct bidi_it
*bidi_it
)
485 if (bidi_cache_idx
== 0 || bidi_cache_last_idx
== -1)
487 return bidi_cache
[bidi_cache_last_idx
+ bidi_it
->scan_dir
].resolved_level
;
490 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
491 Value is the non-negative length of the paragraph separator
492 following the buffer position, -1 if position is at the beginning
493 of a new paragraph, or -2 if position is neither at beginning nor
494 at end of a paragraph. */
496 bidi_at_paragraph_end (EMACS_INT charpos
, EMACS_INT bytepos
)
499 Lisp_Object start_re
;
502 sep_re
= paragraph_separate_re
;
503 start_re
= paragraph_start_re
;
505 val
= fast_looking_at (sep_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
);
508 if (fast_looking_at (start_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
) >= 0)
517 /* Determine the start-of-run (sor) directional type given the two
518 embedding levels on either side of the run boundary. Also, update
519 the saved info about previously seen characters, since that info is
520 generally valid for a single level run. */
522 bidi_set_sor_type (struct bidi_it
*bidi_it
, int level_before
, int level_after
)
524 int higher_level
= level_before
> level_after
? level_before
: level_after
;
526 /* The prev_was_pdf gork is required for when we have several PDFs
527 in a row. In that case, we want to compute the sor type for the
528 next level run only once: when we see the first PDF. That's
529 because the sor type depends only on the higher of the two levels
530 that we find on the two sides of the level boundary (see UAX#9,
531 clause X10), and so we don't need to know the final embedding
532 level to which we descend after processing all the PDFs. */
533 if (!bidi_it
->prev_was_pdf
|| level_before
< level_after
)
534 /* FIXME: should the default sor direction be user selectable? */
535 bidi_it
->sor
= (higher_level
& 1) != 0 ? R2L
: L2R
;
536 if (level_before
> level_after
)
537 bidi_it
->prev_was_pdf
= 1;
539 bidi_it
->prev
.type
= UNKNOWN_BT
;
540 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
541 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
542 bidi_it
->prev_for_neutral
.type
= bidi_it
->sor
== R2L
? STRONG_R
: STRONG_L
;
543 bidi_it
->prev_for_neutral
.charpos
= bidi_it
->charpos
;
544 bidi_it
->prev_for_neutral
.bytepos
= bidi_it
->bytepos
;
545 bidi_it
->next_for_neutral
.type
= bidi_it
->next_for_neutral
.type_after_w1
=
546 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
547 bidi_it
->ignore_bn_limit
= 0; /* meaning it's unknown */
551 bidi_line_init (struct bidi_it
*bidi_it
)
553 bidi_it
->scan_dir
= 1; /* FIXME: do we need to have control on this? */
554 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
555 bidi_it
->level_stack
[0].override
= NEUTRAL_DIR
; /* X1 */
556 bidi_it
->invalid_levels
= 0;
557 bidi_it
->invalid_rl_levels
= -1;
558 bidi_it
->next_en_pos
= -1;
559 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
560 bidi_set_sor_type (bidi_it
,
561 bidi_it
->paragraph_dir
== R2L
? 1 : 0,
562 bidi_it
->level_stack
[0].level
); /* X10 */
567 /* Find the beginning of this paragraph by looking back in the buffer.
568 Value is the byte position of the paragraph's beginning. */
570 bidi_find_paragraph_start (EMACS_INT pos
, EMACS_INT pos_byte
)
572 Lisp_Object re
= paragraph_start_re
;
573 EMACS_INT limit
= ZV
, limit_byte
= ZV_BYTE
;
575 while (pos_byte
> BEGV_BYTE
576 && fast_looking_at (re
, pos
, pos_byte
, limit
, limit_byte
, Qnil
) < 0)
578 pos
= find_next_newline_no_quit (pos
- 1, -1);
579 pos_byte
= CHAR_TO_BYTE (pos
);
584 /* Determine the base direction, a.k.a. base embedding level, of the
585 paragraph we are about to iterate through. If DIR is either L2R or
586 R2L, just use that. Otherwise, determine the paragraph direction
587 from the first strong directional character of the paragraph.
589 NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
590 has no strong directional characters and both DIR and
591 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
592 in the buffer until a paragraph is found with a strong character,
593 or until hitting BEGV. In the latter case, fall back to L2R. This
594 flag is used in current-bidi-paragraph-direction.
596 Note that this function gives the paragraph separator the same
597 direction as the preceding paragraph, even though Emacs generally
598 views the separartor as not belonging to any paragraph. */
600 bidi_paragraph_init (bidi_dir_t dir
, struct bidi_it
*bidi_it
, int no_default_p
)
602 EMACS_INT bytepos
= bidi_it
->bytepos
;
603 EMACS_INT pstartbyte
;
605 /* Special case for an empty buffer. */
606 if (bytepos
== BEGV_BYTE
&& bytepos
== ZV_BYTE
)
608 /* We should never be called at EOB or before BEGV. */
609 else if (bytepos
>= ZV_BYTE
|| bytepos
< BEGV_BYTE
)
614 bidi_it
->paragraph_dir
= L2R
;
615 bidi_it
->new_paragraph
= 0;
619 bidi_it
->paragraph_dir
= R2L
;
620 bidi_it
->new_paragraph
= 0;
622 else if (dir
== NEUTRAL_DIR
) /* P2 */
628 if (!bidi_initialized
)
631 /* If we are inside a paragraph separator, we are just waiting
632 for the separator to be exhausted; use the previous paragraph
633 direction. But don't do that if we have been just reseated,
634 because we need to reinitialize below in that case. */
635 if (!bidi_it
->first_elt
636 && bidi_it
->charpos
< bidi_it
->separator_limit
)
639 /* If we are on a newline, get past it to where the next
640 paragraph might start. But don't do that at BEGV since then
641 we are potentially in a new paragraph that doesn't yet
643 pos
= bidi_it
->charpos
;
644 if (bytepos
> BEGV_BYTE
&& FETCH_CHAR (bytepos
) == '\n')
650 /* We are either at the beginning of a paragraph or in the
651 middle of it. Find where this paragraph starts. */
652 pstartbyte
= bidi_find_paragraph_start (pos
, bytepos
);
653 bidi_it
->separator_limit
= -1;
654 bidi_it
->new_paragraph
= 0;
656 /* The following loop is run more than once only if NO_DEFAULT_P
659 bytepos
= pstartbyte
;
660 ch
= FETCH_CHAR (bytepos
);
661 ch_len
= CHAR_BYTES (ch
);
662 pos
= BYTE_TO_CHAR (bytepos
);
663 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
665 for (pos
++, bytepos
+= ch_len
;
666 /* NOTE: UAX#9 says to search only for L, AL, or R types
667 of characters, and ignore RLE, RLO, LRE, and LRO.
668 However, I'm not sure it makes sense to omit those 4;
669 should try with and without that to see the effect. */
670 (bidi_get_category (type
) != STRONG
)
671 || (bidi_ignore_explicit_marks_for_paragraph_level
672 && (type
== RLE
|| type
== RLO
673 || type
== LRE
|| type
== LRO
));
674 type
= bidi_get_type (ch
, NEUTRAL_DIR
))
676 if (type
== NEUTRAL_B
&& bidi_at_paragraph_end (pos
, bytepos
) >= -1)
678 if (bytepos
>= ZV_BYTE
)
680 /* Pretend there's a paragraph separator at end of
685 FETCH_CHAR_ADVANCE (ch
, pos
, bytepos
);
687 if (type
== STRONG_R
|| type
== STRONG_AL
) /* P3 */
688 bidi_it
->paragraph_dir
= R2L
;
689 else if (type
== STRONG_L
)
690 bidi_it
->paragraph_dir
= L2R
;
691 if (no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
)
693 /* If this paragraph is at BEGV, default to L2R. */
694 if (pstartbyte
== BEGV_BYTE
)
695 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 */
698 EMACS_INT prevpbyte
= pstartbyte
;
699 EMACS_INT p
= BYTE_TO_CHAR (pstartbyte
), pbyte
= pstartbyte
;
701 /* Find the beginning of the previous paragraph, if any. */
702 while (pbyte
> BEGV_BYTE
&& prevpbyte
>= pstartbyte
)
705 pbyte
= CHAR_TO_BYTE (p
);
706 prevpbyte
= bidi_find_paragraph_start (p
, pbyte
);
708 pstartbyte
= prevpbyte
;
711 } while (no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
);
716 /* Contrary to UAX#9 clause P3, we only default the paragraph
717 direction to L2R if we have no previous usable paragraph
718 direction. This is allowed by the HL1 clause. */
719 if (bidi_it
->paragraph_dir
!= L2R
&& bidi_it
->paragraph_dir
!= R2L
)
720 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 ``higher-level protocols'' */
721 if (bidi_it
->paragraph_dir
== R2L
)
722 bidi_it
->level_stack
[0].level
= 1;
724 bidi_it
->level_stack
[0].level
= 0;
726 bidi_line_init (bidi_it
);
729 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
732 bidi_set_paragraph_end (struct bidi_it
*bidi_it
)
734 bidi_it
->invalid_levels
= 0;
735 bidi_it
->invalid_rl_levels
= -1;
736 bidi_it
->stack_idx
= 0;
737 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
740 /* Initialize the bidi iterator from buffer position CHARPOS. */
742 bidi_init_it (EMACS_INT charpos
, EMACS_INT bytepos
, struct bidi_it
*bidi_it
)
744 if (! bidi_initialized
)
746 bidi_it
->charpos
= charpos
;
747 bidi_it
->bytepos
= bytepos
;
748 bidi_it
->first_elt
= 1;
749 bidi_set_paragraph_end (bidi_it
);
750 bidi_it
->new_paragraph
= 1;
751 bidi_it
->separator_limit
= -1;
752 bidi_it
->type
= NEUTRAL_B
;
753 bidi_it
->type_after_w1
= NEUTRAL_B
;
754 bidi_it
->orig_type
= NEUTRAL_B
;
755 bidi_it
->prev_was_pdf
= 0;
756 bidi_it
->prev
.type
= bidi_it
->prev
.type_after_w1
=
757 bidi_it
->prev
.orig_type
= UNKNOWN_BT
;
758 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
759 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
760 bidi_it
->next_for_neutral
.charpos
= -1;
761 bidi_it
->next_for_neutral
.type
=
762 bidi_it
->next_for_neutral
.type_after_w1
=
763 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
764 bidi_it
->prev_for_neutral
.charpos
= -1;
765 bidi_it
->prev_for_neutral
.type
=
766 bidi_it
->prev_for_neutral
.type_after_w1
=
767 bidi_it
->prev_for_neutral
.orig_type
= UNKNOWN_BT
;
768 bidi_it
->sor
= L2R
; /* FIXME: should it be user-selectable? */
769 bidi_cache_shrink ();
772 /* Push the current embedding level and override status; reset the
773 current level to LEVEL and the current override status to OVERRIDE. */
775 bidi_push_embedding_level (struct bidi_it
*bidi_it
,
776 int level
, bidi_dir_t override
)
778 bidi_it
->stack_idx
++;
779 if (bidi_it
->stack_idx
>= BIDI_MAXLEVEL
)
781 bidi_it
->level_stack
[bidi_it
->stack_idx
].level
= level
;
782 bidi_it
->level_stack
[bidi_it
->stack_idx
].override
= override
;
785 /* Pop the embedding level and directional override status from the
786 stack, and return the new level. */
788 bidi_pop_embedding_level (struct bidi_it
*bidi_it
)
790 /* UAX#9 says to ignore invalid PDFs. */
791 if (bidi_it
->stack_idx
> 0)
792 bidi_it
->stack_idx
--;
793 return bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
796 /* Record in SAVED_INFO the information about the current character. */
798 bidi_remember_char (struct bidi_saved_info
*saved_info
,
799 struct bidi_it
*bidi_it
)
801 saved_info
->charpos
= bidi_it
->charpos
;
802 saved_info
->bytepos
= bidi_it
->bytepos
;
803 saved_info
->type
= bidi_it
->type
;
804 bidi_check_type (bidi_it
->type
);
805 saved_info
->type_after_w1
= bidi_it
->type_after_w1
;
806 bidi_check_type (bidi_it
->type_after_w1
);
807 saved_info
->orig_type
= bidi_it
->orig_type
;
808 bidi_check_type (bidi_it
->orig_type
);
811 /* Resolve the type of a neutral character according to the type of
812 surrounding strong text and the current embedding level. */
813 static INLINE bidi_type_t
814 bidi_resolve_neutral_1 (bidi_type_t prev_type
, bidi_type_t next_type
, int lev
)
816 /* N1: European and Arabic numbers are treated as though they were R. */
817 if (next_type
== WEAK_EN
|| next_type
== WEAK_AN
)
818 next_type
= STRONG_R
;
819 if (prev_type
== WEAK_EN
|| prev_type
== WEAK_AN
)
820 prev_type
= STRONG_R
;
822 if (next_type
== prev_type
) /* N1 */
824 else if ((lev
& 1) == 0) /* N2 */
831 bidi_explicit_dir_char (int c
)
833 /* FIXME: this should be replaced with a lookup table with suitable
834 bits set, like standard C ctype macros do. */
835 return (c
== LRE_CHAR
|| c
== LRO_CHAR
836 || c
== RLE_CHAR
|| c
== RLO_CHAR
|| c
== PDF_CHAR
);
839 /* A helper function for bidi_resolve_explicit. It advances to the
840 next character in logical order and determines the new embedding
841 level and directional override, but does not take into account
844 bidi_resolve_explicit_1 (struct bidi_it
*bidi_it
)
852 if (bidi_it
->bytepos
< BEGV_BYTE
/* after reseat to BEGV? */
853 || bidi_it
->first_elt
)
855 bidi_it
->first_elt
= 0;
856 if (bidi_it
->charpos
< BEGV
)
857 bidi_it
->charpos
= BEGV
;
858 bidi_it
->bytepos
= CHAR_TO_BYTE (bidi_it
->charpos
);
860 else if (bidi_it
->bytepos
< ZV_BYTE
) /* don't move at ZV */
863 if (bidi_it
->ch_len
== 0)
865 bidi_it
->bytepos
+= bidi_it
->ch_len
;
868 current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
; /* X1 */
869 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
870 new_level
= current_level
;
872 /* in case it is a unibyte character (not yet implemented) */
873 /* _fetch_multibyte_char_len = 1; */
874 if (bidi_it
->bytepos
>= ZV_BYTE
)
881 curchar
= FETCH_CHAR (bidi_it
->bytepos
);
882 bidi_it
->ch_len
= CHAR_BYTES (curchar
);
884 bidi_it
->ch
= curchar
;
886 /* Don't apply directional override here, as all the types we handle
887 below will not be affected by the override anyway, and we need
888 the original type unaltered. The override will be applied in
889 bidi_resolve_weak. */
890 type
= bidi_get_type (curchar
, NEUTRAL_DIR
);
891 bidi_it
->orig_type
= type
;
892 bidi_check_type (bidi_it
->orig_type
);
895 bidi_it
->prev_was_pdf
= 0;
897 bidi_it
->type_after_w1
= UNKNOWN_BT
;
903 bidi_it
->type_after_w1
= type
;
904 bidi_check_type (bidi_it
->type_after_w1
);
905 type
= WEAK_BN
; /* X9/Retaining */
906 if (bidi_it
->ignore_bn_limit
<= 0)
908 if (current_level
<= BIDI_MAXLEVEL
- 4)
910 /* Compute the least odd embedding level greater than
911 the current level. */
912 new_level
= ((current_level
+ 1) & ~1) + 1;
913 if (bidi_it
->type_after_w1
== RLE
)
914 override
= NEUTRAL_DIR
;
917 if (current_level
== BIDI_MAXLEVEL
- 4)
918 bidi_it
->invalid_rl_levels
= 0;
919 bidi_push_embedding_level (bidi_it
, new_level
, override
);
923 bidi_it
->invalid_levels
++;
924 /* See the commentary about invalid_rl_levels below. */
925 if (bidi_it
->invalid_rl_levels
< 0)
926 bidi_it
->invalid_rl_levels
= 0;
927 bidi_it
->invalid_rl_levels
++;
930 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
931 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
936 bidi_it
->type_after_w1
= type
;
937 bidi_check_type (bidi_it
->type_after_w1
);
938 type
= WEAK_BN
; /* X9/Retaining */
939 if (bidi_it
->ignore_bn_limit
<= 0)
941 if (current_level
<= BIDI_MAXLEVEL
- 5)
943 /* Compute the least even embedding level greater than
944 the current level. */
945 new_level
= ((current_level
+ 2) & ~1);
946 if (bidi_it
->type_after_w1
== LRE
)
947 override
= NEUTRAL_DIR
;
950 bidi_push_embedding_level (bidi_it
, new_level
, override
);
954 bidi_it
->invalid_levels
++;
955 /* invalid_rl_levels counts invalid levels encountered
956 while the embedding level was already too high for
957 LRE/LRO, but not for RLE/RLO. That is because
958 there may be exactly one PDF which we should not
959 ignore even though invalid_levels is non-zero.
960 invalid_rl_levels helps to know what PDF is
962 if (bidi_it
->invalid_rl_levels
>= 0)
963 bidi_it
->invalid_rl_levels
++;
966 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
967 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
971 bidi_it
->type_after_w1
= type
;
972 bidi_check_type (bidi_it
->type_after_w1
);
973 type
= WEAK_BN
; /* X9/Retaining */
974 if (bidi_it
->ignore_bn_limit
<= 0)
976 if (!bidi_it
->invalid_rl_levels
)
978 new_level
= bidi_pop_embedding_level (bidi_it
);
979 bidi_it
->invalid_rl_levels
= -1;
980 if (bidi_it
->invalid_levels
)
981 bidi_it
->invalid_levels
--;
982 /* else nothing: UAX#9 says to ignore invalid PDFs */
984 if (!bidi_it
->invalid_levels
)
985 new_level
= bidi_pop_embedding_level (bidi_it
);
988 bidi_it
->invalid_levels
--;
989 bidi_it
->invalid_rl_levels
--;
992 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
993 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1001 bidi_it
->type
= type
;
1002 bidi_check_type (bidi_it
->type
);
1007 /* Given an iterator state in BIDI_IT, advance one character position
1008 in the buffer to the next character (in the logical order), resolve
1009 any explicit embeddings and directional overrides, and return the
1010 embedding level of the character after resolving explicit
1011 directives and ignoring empty embeddings. */
1013 bidi_resolve_explicit (struct bidi_it
*bidi_it
)
1015 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1016 int new_level
= bidi_resolve_explicit_1 (bidi_it
);
1018 if (prev_level
< new_level
1019 && bidi_it
->type
== WEAK_BN
1020 && bidi_it
->ignore_bn_limit
== 0 /* only if not already known */
1021 && bidi_it
->bytepos
< ZV_BYTE
/* not already at EOB */
1022 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it
->bytepos
1023 + bidi_it
->ch_len
)))
1025 /* Avoid pushing and popping embedding levels if the level run
1026 is empty, as this breaks level runs where it shouldn't.
1027 UAX#9 removes all the explicit embedding and override codes,
1028 so empty embeddings disappear without a trace. We need to
1029 behave as if we did the same. */
1030 struct bidi_it saved_it
;
1031 int level
= prev_level
;
1033 bidi_copy_it (&saved_it
, bidi_it
);
1035 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it
->bytepos
1036 + bidi_it
->ch_len
)))
1038 level
= bidi_resolve_explicit_1 (bidi_it
);
1041 if (level
== prev_level
) /* empty embedding */
1042 saved_it
.ignore_bn_limit
= bidi_it
->charpos
+ 1;
1043 else /* this embedding is non-empty */
1044 saved_it
.ignore_bn_limit
= -1;
1046 bidi_copy_it (bidi_it
, &saved_it
);
1047 if (bidi_it
->ignore_bn_limit
> 0)
1049 /* We pushed a level, but we shouldn't have. Undo that. */
1050 if (!bidi_it
->invalid_rl_levels
)
1052 new_level
= bidi_pop_embedding_level (bidi_it
);
1053 bidi_it
->invalid_rl_levels
= -1;
1054 if (bidi_it
->invalid_levels
)
1055 bidi_it
->invalid_levels
--;
1057 if (!bidi_it
->invalid_levels
)
1058 new_level
= bidi_pop_embedding_level (bidi_it
);
1061 bidi_it
->invalid_levels
--;
1062 bidi_it
->invalid_rl_levels
--;
1067 if (bidi_it
->type
== NEUTRAL_B
) /* X8 */
1069 bidi_set_paragraph_end (bidi_it
);
1070 /* This is needed by bidi_resolve_weak below, and in L1. */
1071 bidi_it
->type_after_w1
= bidi_it
->type
;
1072 bidi_check_type (bidi_it
->type_after_w1
);
1078 /* Advance in the buffer, resolve weak types and return the type of
1079 the next character after weak type resolution. */
1081 bidi_resolve_weak (struct bidi_it
*bidi_it
)
1084 bidi_dir_t override
;
1085 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1086 int new_level
= bidi_resolve_explicit (bidi_it
);
1088 bidi_type_t type_of_next
;
1089 struct bidi_it saved_it
;
1091 type
= bidi_it
->type
;
1092 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1094 if (type
== UNKNOWN_BT
1102 if (new_level
!= prev_level
1103 || bidi_it
->type
== NEUTRAL_B
)
1105 /* We've got a new embedding level run, compute the directional
1106 type of sor and initialize per-run variables (UAX#9, clause
1108 bidi_set_sor_type (bidi_it
, prev_level
, new_level
);
1110 else if (type
== NEUTRAL_S
|| type
== NEUTRAL_WS
1111 || type
== WEAK_BN
|| type
== STRONG_AL
)
1112 bidi_it
->type_after_w1
= type
; /* needed in L1 */
1113 bidi_check_type (bidi_it
->type_after_w1
);
1115 /* Level and directional override status are already recorded in
1116 bidi_it, and do not need any change; see X6. */
1117 if (override
== R2L
) /* X6 */
1119 else if (override
== L2R
)
1123 if (type
== WEAK_NSM
) /* W1 */
1125 /* Note that we don't need to consider the case where the
1126 prev character has its type overridden by an RLO or LRO,
1127 because then either the type of this NSM would have been
1128 also overridden, or the previous character is outside the
1129 current level run, and thus not relevant to this NSM.
1130 This is why NSM gets the type_after_w1 of the previous
1132 if (bidi_it
->prev
.type_after_w1
!= UNKNOWN_BT
1133 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1134 && bidi_it
->prev
.type_after_w1
!= NEUTRAL_B
)
1135 type
= bidi_it
->prev
.type_after_w1
;
1136 else if (bidi_it
->sor
== R2L
)
1138 else if (bidi_it
->sor
== L2R
)
1140 else /* shouldn't happen! */
1143 if (type
== WEAK_EN
/* W2 */
1144 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
1146 else if (type
== STRONG_AL
) /* W3 */
1148 else if ((type
== WEAK_ES
/* W4 */
1149 && bidi_it
->prev
.type_after_w1
== WEAK_EN
1150 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1152 && ((bidi_it
->prev
.type_after_w1
== WEAK_EN
1153 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1154 || bidi_it
->prev
.type_after_w1
== WEAK_AN
)))
1157 bidi_it
->bytepos
+ bidi_it
->ch_len
>= ZV_BYTE
1158 ? BIDI_EOB
: FETCH_CHAR (bidi_it
->bytepos
+ bidi_it
->ch_len
);
1159 type_of_next
= bidi_get_type (next_char
, override
);
1161 if (type_of_next
== WEAK_BN
1162 || bidi_explicit_dir_char (next_char
))
1164 bidi_copy_it (&saved_it
, bidi_it
);
1165 while (bidi_resolve_explicit (bidi_it
) == new_level
1166 && bidi_it
->type
== WEAK_BN
)
1168 type_of_next
= bidi_it
->type
;
1169 bidi_copy_it (bidi_it
, &saved_it
);
1172 /* If the next character is EN, but the last strong-type
1173 character is AL, that next EN will be changed to AN when
1174 we process it in W2 above. So in that case, this ES
1175 should not be changed into EN. */
1177 && type_of_next
== WEAK_EN
1178 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1180 else if (type
== WEAK_CS
)
1182 if (bidi_it
->prev
.type_after_w1
== WEAK_AN
1183 && (type_of_next
== WEAK_AN
1184 /* If the next character is EN, but the last
1185 strong-type character is AL, EN will be later
1186 changed to AN when we process it in W2 above.
1187 So in that case, this ES should not be
1189 || (type_of_next
== WEAK_EN
1190 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)))
1192 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
1193 && type_of_next
== WEAK_EN
1194 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1198 else if (type
== WEAK_ET
/* W5: ET with EN before or after it */
1199 || type
== WEAK_BN
) /* W5/Retaining */
1201 if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* ET/BN w/EN before it */
1202 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1204 else /* W5: ET/BN with EN after it. */
1206 EMACS_INT en_pos
= bidi_it
->charpos
+ 1;
1209 bidi_it
->bytepos
+ bidi_it
->ch_len
>= ZV_BYTE
1210 ? BIDI_EOB
: FETCH_CHAR (bidi_it
->bytepos
+ bidi_it
->ch_len
);
1211 type_of_next
= bidi_get_type (next_char
, override
);
1213 if (type_of_next
== WEAK_ET
1214 || type_of_next
== WEAK_BN
1215 || bidi_explicit_dir_char (next_char
))
1217 bidi_copy_it (&saved_it
, bidi_it
);
1218 while (bidi_resolve_explicit (bidi_it
) == new_level
1219 && (bidi_it
->type
== WEAK_BN
1220 || bidi_it
->type
== WEAK_ET
))
1222 type_of_next
= bidi_it
->type
;
1223 en_pos
= bidi_it
->charpos
;
1224 bidi_copy_it (bidi_it
, &saved_it
);
1226 if (type_of_next
== WEAK_EN
)
1228 /* If the last strong character is AL, the EN we've
1229 found will become AN when we get to it (W2). */
1230 if (bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1233 /* Remember this EN position, to speed up processing
1235 bidi_it
->next_en_pos
= en_pos
;
1237 else if (type
== WEAK_BN
)
1238 type
= NEUTRAL_ON
; /* W6/Retaining */
1244 if (type
== WEAK_ES
|| type
== WEAK_ET
|| type
== WEAK_CS
/* W6 */
1246 && (bidi_it
->prev
.type_after_w1
== WEAK_CS
/* W6/Retaining */
1247 || bidi_it
->prev
.type_after_w1
== WEAK_ES
1248 || bidi_it
->prev
.type_after_w1
== WEAK_ET
)))
1251 /* Store the type we've got so far, before we clobber it with strong
1252 types in W7 and while resolving neutral types. But leave alone
1253 the original types that were recorded above, because we will need
1254 them for the L1 clause. */
1255 if (bidi_it
->type_after_w1
== UNKNOWN_BT
)
1256 bidi_it
->type_after_w1
= type
;
1257 bidi_check_type (bidi_it
->type_after_w1
);
1259 if (type
== WEAK_EN
) /* W7 */
1261 if ((bidi_it
->last_strong
.type_after_w1
== STRONG_L
)
1262 || (bidi_it
->last_strong
.type
== UNKNOWN_BT
&& bidi_it
->sor
== L2R
))
1266 bidi_it
->type
= type
;
1267 bidi_check_type (bidi_it
->type
);
1272 bidi_resolve_neutral (struct bidi_it
*bidi_it
)
1274 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1275 bidi_type_t type
= bidi_resolve_weak (bidi_it
);
1276 int current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1278 if (!(type
== STRONG_R
1283 || type
== NEUTRAL_B
1284 || type
== NEUTRAL_S
1285 || type
== NEUTRAL_WS
1286 || type
== NEUTRAL_ON
))
1289 if (bidi_get_category (type
) == NEUTRAL
1290 || (type
== WEAK_BN
&& prev_level
== current_level
))
1292 if (bidi_it
->next_for_neutral
.type
!= UNKNOWN_BT
)
1293 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1294 bidi_it
->next_for_neutral
.type
,
1298 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1299 the assumption of batch-style processing; see clauses W4,
1300 W5, and especially N1, which require to look far forward
1301 (as well as back) in the buffer. May the fleas of a
1302 thousand camels infest the armpits of those who design
1303 supposedly general-purpose algorithms by looking at their
1304 own implementations, and fail to consider other possible
1306 struct bidi_it saved_it
;
1307 bidi_type_t next_type
;
1309 if (bidi_it
->scan_dir
== -1)
1312 bidi_copy_it (&saved_it
, bidi_it
);
1313 /* Scan the text forward until we find the first non-neutral
1314 character, and then use that to resolve the neutral we
1315 are dealing with now. We also cache the scanned iterator
1316 states, to salvage some of the effort later. */
1317 bidi_cache_iterator_state (bidi_it
, 0);
1319 /* Record the info about the previous character, so that
1320 it will be cached below with this state. */
1321 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1322 && bidi_it
->type
!= WEAK_BN
)
1323 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1324 type
= bidi_resolve_weak (bidi_it
);
1325 /* Paragraph separators have their levels fully resolved
1326 at this point, so cache them as resolved. */
1327 bidi_cache_iterator_state (bidi_it
, type
== NEUTRAL_B
);
1328 /* FIXME: implement L1 here, by testing for a newline and
1329 resetting the level for any sequence of whitespace
1330 characters adjacent to it. */
1331 } while (!(type
== NEUTRAL_B
1333 && bidi_get_category (type
) != NEUTRAL
)
1334 /* This is all per level run, so stop when we
1335 reach the end of this level run. */
1336 || bidi_it
->level_stack
[bidi_it
->stack_idx
].level
!=
1339 bidi_remember_char (&saved_it
.next_for_neutral
, bidi_it
);
1350 /* N1: ``European and Arabic numbers are treated as
1351 though they were R.'' */
1352 next_type
= STRONG_R
;
1353 saved_it
.next_for_neutral
.type
= STRONG_R
;
1356 if (!bidi_explicit_dir_char (bidi_it
->ch
))
1357 abort (); /* can't happen: BNs are skipped */
1360 /* Marched all the way to the end of this level run.
1361 We need to use the eor type, whose information is
1362 stored by bidi_set_sor_type in the prev_for_neutral
1364 if (saved_it
.type
!= WEAK_BN
1365 || bidi_get_category (bidi_it
->prev
.type_after_w1
) == NEUTRAL
)
1367 next_type
= bidi_it
->prev_for_neutral
.type
;
1368 saved_it
.next_for_neutral
.type
= next_type
;
1369 bidi_check_type (next_type
);
1373 /* This is a BN which does not adjoin neutrals.
1374 Leave its type alone. */
1375 bidi_copy_it (bidi_it
, &saved_it
);
1376 return bidi_it
->type
;
1382 type
= bidi_resolve_neutral_1 (saved_it
.prev_for_neutral
.type
,
1383 next_type
, current_level
);
1384 saved_it
.type
= type
;
1385 bidi_check_type (type
);
1386 bidi_copy_it (bidi_it
, &saved_it
);
1392 /* Given an iterator state in BIDI_IT, advance one character position
1393 in the buffer to the next character (in the logical order), resolve
1394 the bidi type of that next character, and return that type. */
1396 bidi_type_of_next_char (struct bidi_it
*bidi_it
)
1400 /* This should always be called during a forward scan. */
1401 if (bidi_it
->scan_dir
!= 1)
1404 /* Reset the limit until which to ignore BNs if we step out of the
1405 area where we found only empty levels. */
1406 if ((bidi_it
->ignore_bn_limit
> 0
1407 && bidi_it
->ignore_bn_limit
<= bidi_it
->charpos
)
1408 || (bidi_it
->ignore_bn_limit
== -1
1409 && !bidi_explicit_dir_char (bidi_it
->ch
)))
1410 bidi_it
->ignore_bn_limit
= 0;
1412 type
= bidi_resolve_neutral (bidi_it
);
1417 /* Given an iterator state BIDI_IT, advance one character position in
1418 the buffer to the next character (in the logical order), resolve
1419 the embedding and implicit levels of that next character, and
1420 return the resulting level. */
1422 bidi_level_of_next_char (struct bidi_it
*bidi_it
)
1425 int level
, prev_level
= -1;
1426 struct bidi_saved_info next_for_neutral
;
1428 if (bidi_it
->scan_dir
== 1)
1430 /* There's no sense in trying to advance if we hit end of text. */
1431 if (bidi_it
->bytepos
>= ZV_BYTE
)
1432 return bidi_it
->resolved_level
;
1434 /* Record the info about the previous character. */
1435 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1436 && bidi_it
->type
!= WEAK_BN
)
1437 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1438 if (bidi_it
->type_after_w1
== STRONG_R
1439 || bidi_it
->type_after_w1
== STRONG_L
1440 || bidi_it
->type_after_w1
== STRONG_AL
)
1441 bidi_remember_char (&bidi_it
->last_strong
, bidi_it
);
1442 /* FIXME: it sounds like we don't need both prev and
1443 prev_for_neutral members, but I'm leaving them both for now. */
1444 if (bidi_it
->type
== STRONG_R
|| bidi_it
->type
== STRONG_L
1445 || bidi_it
->type
== WEAK_EN
|| bidi_it
->type
== WEAK_AN
)
1446 bidi_remember_char (&bidi_it
->prev_for_neutral
, bidi_it
);
1448 /* If we overstepped the characters used for resolving neutrals
1449 and whitespace, invalidate their info in the iterator. */
1450 if (bidi_it
->charpos
>= bidi_it
->next_for_neutral
.charpos
)
1451 bidi_it
->next_for_neutral
.type
= UNKNOWN_BT
;
1452 if (bidi_it
->next_en_pos
>= 0
1453 && bidi_it
->charpos
>= bidi_it
->next_en_pos
)
1454 bidi_it
->next_en_pos
= -1;
1455 if (bidi_it
->next_for_ws
.type
!= UNKNOWN_BT
1456 && bidi_it
->charpos
>= bidi_it
->next_for_ws
.charpos
)
1457 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
1459 /* This must be taken before we fill the iterator with the info
1460 about the next char. If we scan backwards, the iterator
1461 state must be already cached, so there's no need to know the
1462 embedding level of the previous character, since we will be
1463 returning to our caller shortly. */
1464 prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1466 next_for_neutral
= bidi_it
->next_for_neutral
;
1468 /* Perhaps it is already cached. */
1469 type
= bidi_cache_find (bidi_it
->charpos
+ bidi_it
->scan_dir
, -1, bidi_it
);
1470 if (type
!= UNKNOWN_BT
)
1472 /* Don't lose the information for resolving neutrals! The
1473 cached states could have been cached before their
1474 next_for_neutral member was computed. If we are on our way
1475 forward, we can simply take the info from the previous
1477 if (bidi_it
->scan_dir
== 1
1478 && bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
1479 bidi_it
->next_for_neutral
= next_for_neutral
;
1481 /* If resolved_level is -1, it means this state was cached
1482 before it was completely resolved, so we cannot return
1484 if (bidi_it
->resolved_level
!= -1)
1485 return bidi_it
->resolved_level
;
1487 if (bidi_it
->scan_dir
== -1)
1488 /* If we are going backwards, the iterator state is already cached
1489 from previous scans, and should be fully resolved. */
1492 if (type
== UNKNOWN_BT
)
1493 type
= bidi_type_of_next_char (bidi_it
);
1495 if (type
== NEUTRAL_B
)
1496 return bidi_it
->resolved_level
;
1498 level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1499 if ((bidi_get_category (type
) == NEUTRAL
/* && type != NEUTRAL_B */)
1500 || (type
== WEAK_BN
&& prev_level
== level
))
1502 if (bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
1505 /* If the cached state shows a neutral character, it was not
1506 resolved by bidi_resolve_neutral, so do it now. */
1507 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1508 bidi_it
->next_for_neutral
.type
,
1512 if (!(type
== STRONG_R
1516 || type
== WEAK_AN
))
1518 bidi_it
->type
= type
;
1519 bidi_check_type (bidi_it
->type
);
1521 /* For L1 below, we need to know, for each WS character, whether
1522 it belongs to a sequence of WS characters preceding a newline
1523 or a TAB or a paragraph separator. */
1524 if (bidi_it
->orig_type
== NEUTRAL_WS
1525 && bidi_it
->next_for_ws
.type
== UNKNOWN_BT
)
1528 int clen
= bidi_it
->ch_len
;
1529 EMACS_INT bpos
= bidi_it
->bytepos
;
1530 EMACS_INT cpos
= bidi_it
->charpos
;
1534 /*_fetch_multibyte_char_len = 1;*/
1535 ch
= bpos
+ clen
>= ZV_BYTE
? BIDI_EOB
: FETCH_CHAR (bpos
+ clen
);
1538 clen
= (ch
== BIDI_EOB
? 1 : CHAR_BYTES (ch
));
1539 if (ch
== '\n' || ch
== BIDI_EOB
/* || ch == LINESEP_CHAR */)
1542 chtype
= bidi_get_type (ch
, NEUTRAL_DIR
);
1543 } while (chtype
== NEUTRAL_WS
|| chtype
== WEAK_BN
1544 || bidi_explicit_dir_char (ch
)); /* L1/Retaining */
1545 bidi_it
->next_for_ws
.type
= chtype
;
1546 bidi_check_type (bidi_it
->next_for_ws
.type
);
1547 bidi_it
->next_for_ws
.charpos
= cpos
;
1548 bidi_it
->next_for_ws
.bytepos
= bpos
;
1551 /* Resolve implicit levels, with a twist: PDFs get the embedding
1552 level of the enbedding they terminate. See below for the
1554 if (bidi_it
->orig_type
== PDF
1555 /* Don't do this if this formatting code didn't change the
1556 embedding level due to invalid or empty embeddings. */
1557 && prev_level
!= level
)
1559 /* Don't look in UAX#9 for the reason for this: it's our own
1560 private quirk. The reason is that we want the formatting
1561 codes to be delivered so that they bracket the text of their
1562 embedding. For example, given the text
1566 we want it to be displayed as
1574 which will result because we bump up the embedding level as
1575 soon as we see the RLO and pop it as soon as we see the PDF,
1576 so RLO itself has the same embedding level as "teST", and
1577 thus would be normally delivered last, just before the PDF.
1578 The switch below fiddles with the level of PDF so that this
1579 ugly side effect does not happen.
1581 (This is, of course, only important if the formatting codes
1582 are actually displayed, but Emacs does need to display them
1583 if the user wants to.) */
1586 else if (bidi_it
->orig_type
== NEUTRAL_B
/* L1 */
1587 || bidi_it
->orig_type
== NEUTRAL_S
1588 || bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
1589 /* || bidi_it->ch == LINESEP_CHAR */
1590 || (bidi_it
->orig_type
== NEUTRAL_WS
1591 && (bidi_it
->next_for_ws
.type
== NEUTRAL_B
1592 || bidi_it
->next_for_ws
.type
== NEUTRAL_S
)))
1593 level
= bidi_it
->level_stack
[0].level
;
1594 else if ((level
& 1) == 0) /* I1 */
1596 if (type
== STRONG_R
)
1598 else if (type
== WEAK_EN
|| type
== WEAK_AN
)
1603 if (type
== STRONG_L
|| type
== WEAK_EN
|| type
== WEAK_AN
)
1607 bidi_it
->resolved_level
= level
;
1611 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1612 non-zero, we are at the end of a level, and we need to prepare to
1613 resume the scan of the lower level.
1615 If this level's other edge is cached, we simply jump to it, filling
1616 the iterator structure with the iterator state on the other edge.
1617 Otherwise, we walk the buffer until we come back to the same level
1620 Note: we are not talking here about a ``level run'' in the UAX#9
1621 sense of the term, but rather about a ``level'' which includes
1622 all the levels higher than it. In other words, given the levels
1625 11111112222222333333334443343222222111111112223322111
1628 and assuming we are at point A scanning left to right, this
1629 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1632 bidi_find_other_level_edge (struct bidi_it
*bidi_it
, int level
, int end_flag
)
1634 int dir
= end_flag
? -bidi_it
->scan_dir
: bidi_it
->scan_dir
;
1637 /* Try the cache first. */
1638 if ((idx
= bidi_cache_find_level_change (level
, dir
, end_flag
)) >= 0)
1639 bidi_cache_fetch_state (idx
, bidi_it
);
1645 abort (); /* if we are at end of level, its edges must be cached */
1647 bidi_cache_iterator_state (bidi_it
, 1);
1649 new_level
= bidi_level_of_next_char (bidi_it
);
1650 bidi_cache_iterator_state (bidi_it
, 1);
1651 } while (new_level
>= level
);
1656 bidi_move_to_visually_next (struct bidi_it
*bidi_it
)
1658 int old_level
, new_level
, next_level
;
1659 struct bidi_it sentinel
;
1661 if (bidi_it
->scan_dir
== 0)
1663 bidi_it
->scan_dir
= 1; /* default to logical order */
1666 /* If we just passed a newline, initialize for the next line. */
1667 if (!bidi_it
->first_elt
&& bidi_it
->orig_type
== NEUTRAL_B
)
1668 bidi_line_init (bidi_it
);
1670 /* Prepare the sentinel iterator state, and cache it. When we bump
1671 into it, scanning backwards, we'll know that the last non-base
1672 level is exhausted. */
1673 if (bidi_cache_idx
== 0)
1675 bidi_copy_it (&sentinel
, bidi_it
);
1676 if (bidi_it
->first_elt
)
1678 sentinel
.charpos
--; /* cached charpos needs to be monotonic */
1680 sentinel
.ch
= '\n'; /* doesn't matter, but why not? */
1681 sentinel
.ch_len
= 1;
1683 bidi_cache_iterator_state (&sentinel
, 1);
1686 old_level
= bidi_it
->resolved_level
;
1687 new_level
= bidi_level_of_next_char (bidi_it
);
1689 /* Reordering of resolved levels (clause L2) is implemented by
1690 jumping to the other edge of the level and flipping direction of
1691 scanning the text whenever we find a level change. */
1692 if (new_level
!= old_level
)
1694 int ascending
= new_level
> old_level
;
1695 int level_to_search
= ascending
? old_level
+ 1 : old_level
;
1696 int incr
= ascending
? 1 : -1;
1697 int expected_next_level
= old_level
+ incr
;
1699 /* Jump (or walk) to the other edge of this level. */
1700 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
1701 /* Switch scan direction and peek at the next character in the
1703 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
1705 /* The following loop handles the case where the resolved level
1706 jumps by more than one. This is typical for numbers inside a
1707 run of text with left-to-right embedding direction, but can
1708 also happen in other situations. In those cases the decision
1709 where to continue after a level change, and in what direction,
1710 is tricky. For example, given a text like below:
1715 (where the numbers below the text show the resolved levels),
1716 the result of reordering according to UAX#9 should be this:
1720 This is implemented by the loop below which flips direction
1721 and jumps to the other edge of the level each time it finds
1722 the new level not to be the expected one. The expected level
1723 is always one more or one less than the previous one. */
1724 next_level
= bidi_peek_at_next_level (bidi_it
);
1725 while (next_level
!= expected_next_level
)
1727 expected_next_level
+= incr
;
1728 level_to_search
+= incr
;
1729 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
1730 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
1731 next_level
= bidi_peek_at_next_level (bidi_it
);
1734 /* Finally, deliver the next character in the new direction. */
1735 next_level
= bidi_level_of_next_char (bidi_it
);
1738 /* Take note when we have just processed the newline that precedes
1739 the end of the paragraph. The next time we are about to be
1740 called, set_iterator_to_next will automatically reinit the
1741 paragraph direction, if needed. We do this at the newline before
1742 the paragraph separator, because the next character might not be
1743 the first character of the next paragraph, due to the bidi
1744 reordering, whereas we _must_ know the paragraph base direction
1745 _before_ we process the paragraph's text, since the base
1746 direction affects the reordering. */
1747 if (bidi_it
->scan_dir
== 1
1748 && bidi_it
->orig_type
== NEUTRAL_B
1749 && bidi_it
->bytepos
< ZV_BYTE
)
1752 bidi_at_paragraph_end (bidi_it
->charpos
+ 1,
1753 bidi_it
->bytepos
+ bidi_it
->ch_len
);
1756 bidi_it
->new_paragraph
= 1;
1757 /* Record the buffer position of the last character of the
1758 paragraph separator. */
1759 bidi_it
->separator_limit
= bidi_it
->charpos
+ 1 + sep_len
;
1763 if (bidi_it
->scan_dir
== 1 && bidi_cache_idx
)
1765 /* If we are at paragraph's base embedding level and beyond the
1766 last cached position, the cache's job is done and we can
1768 if (bidi_it
->resolved_level
== bidi_it
->level_stack
[0].level
1769 && bidi_it
->charpos
> bidi_cache
[bidi_cache_idx
- 1].charpos
)
1770 bidi_cache_reset ();
1771 /* But as long as we are caching during forward scan, we must
1772 cache each state, or else the cache integrity will be
1773 compromised: it assumes cached states correspond to buffer
1776 bidi_cache_iterator_state (bidi_it
, 1);
1780 /* This is meant to be called from within the debugger, whenever you
1781 wish to examine the cache contents. */
1783 bidi_dump_cached_states (void)
1788 if (bidi_cache_idx
== 0)
1790 fprintf (stderr
, "The cache is empty.\n");
1793 fprintf (stderr
, "Total of %d state%s in cache:\n",
1794 bidi_cache_idx
, bidi_cache_idx
== 1 ? "" : "s");
1796 for (i
= bidi_cache
[bidi_cache_idx
- 1].charpos
; i
> 0; i
/= 10)
1798 fputs ("ch ", stderr
);
1799 for (i
= 0; i
< bidi_cache_idx
; i
++)
1800 fprintf (stderr
, "%*c", ndigits
, bidi_cache
[i
].ch
);
1801 fputs ("\n", stderr
);
1802 fputs ("lvl ", stderr
);
1803 for (i
= 0; i
< bidi_cache_idx
; i
++)
1804 fprintf (stderr
, "%*d", ndigits
, bidi_cache
[i
].resolved_level
);
1805 fputs ("\n", stderr
);
1806 fputs ("pos ", stderr
);
1807 for (i
= 0; i
< bidi_cache_idx
; i
++)
1808 fprintf (stderr
, "%*ld", ndigits
, (long)bidi_cache
[i
].charpos
);
1809 fputs ("\n", stderr
);