1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
58 #include "character.h"
59 #include "dispextern.h"
61 static int bidi_initialized
= 0;
63 static Lisp_Object bidi_type_table
, bidi_mirror_table
;
65 #define LRM_CHAR 0x200E
66 #define RLM_CHAR 0x200F
69 /* Data type for describing the bidirectional character categories. */
77 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE
;
78 int bidi_ignore_explicit_marks_for_paragraph_level
= 1;
80 static Lisp_Object paragraph_start_re
, paragraph_separate_re
;
81 static Lisp_Object Qparagraph_start
, Qparagraph_separate
;
84 /***********************************************************************
86 ***********************************************************************/
88 /* Return the bidi type of a character CH, subject to the current
89 directional OVERRIDE. */
90 static inline bidi_type_t
91 bidi_get_type (int ch
, bidi_dir_t override
)
93 bidi_type_t default_type
;
97 if (ch
< 0 || ch
> MAX_CHAR
)
100 default_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
102 if (override
== NEUTRAL_DIR
)
105 switch (default_type
)
107 /* Although UAX#9 does not tell, it doesn't make sense to
108 override NEUTRAL_B and LRM/RLM characters. */
123 if (override
== L2R
) /* X6 */
125 else if (override
== R2L
)
128 abort (); /* can't happen: handled above */
134 bidi_check_type (bidi_type_t type
)
136 if (type
< UNKNOWN_BT
|| type
> NEUTRAL_ON
)
140 /* Given a bidi TYPE of a character, return its category. */
141 static inline bidi_category_t
142 bidi_get_category (bidi_type_t type
)
156 case PDF
: /* ??? really?? */
175 /* Return the mirrored character of C, if it has one. If C has no
176 mirrored counterpart, return C.
177 Note: The conditions in UAX#9 clause L4 regarding the surrounding
178 context must be tested by the caller. */
180 bidi_mirror_char (int c
)
186 if (c
< 0 || c
> MAX_CHAR
)
189 val
= CHAR_TABLE_REF (bidi_mirror_table
, c
);
194 if (v
< 0 || v
> MAX_CHAR
)
203 /* Determine the start-of-run (sor) directional type given the two
204 embedding levels on either side of the run boundary. Also, update
205 the saved info about previously seen characters, since that info is
206 generally valid for a single level run. */
208 bidi_set_sor_type (struct bidi_it
*bidi_it
, int level_before
, int level_after
)
210 int higher_level
= level_before
> level_after
? level_before
: level_after
;
212 /* The prev_was_pdf gork is required for when we have several PDFs
213 in a row. In that case, we want to compute the sor type for the
214 next level run only once: when we see the first PDF. That's
215 because the sor type depends only on the higher of the two levels
216 that we find on the two sides of the level boundary (see UAX#9,
217 clause X10), and so we don't need to know the final embedding
218 level to which we descend after processing all the PDFs. */
219 if (!bidi_it
->prev_was_pdf
|| level_before
< level_after
)
220 /* FIXME: should the default sor direction be user selectable? */
221 bidi_it
->sor
= (higher_level
& 1) != 0 ? R2L
: L2R
;
222 if (level_before
> level_after
)
223 bidi_it
->prev_was_pdf
= 1;
225 bidi_it
->prev
.type
= UNKNOWN_BT
;
226 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
227 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
228 bidi_it
->prev_for_neutral
.type
= bidi_it
->sor
== R2L
? STRONG_R
: STRONG_L
;
229 bidi_it
->prev_for_neutral
.charpos
= bidi_it
->charpos
;
230 bidi_it
->prev_for_neutral
.bytepos
= bidi_it
->bytepos
;
231 bidi_it
->next_for_neutral
.type
= bidi_it
->next_for_neutral
.type_after_w1
=
232 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
233 bidi_it
->ignore_bn_limit
= -1; /* meaning it's unknown */
236 /* Push the current embedding level and override status; reset the
237 current level to LEVEL and the current override status to OVERRIDE. */
239 bidi_push_embedding_level (struct bidi_it
*bidi_it
,
240 int level
, bidi_dir_t override
)
242 bidi_it
->stack_idx
++;
243 xassert (bidi_it
->stack_idx
< BIDI_MAXLEVEL
);
244 bidi_it
->level_stack
[bidi_it
->stack_idx
].level
= level
;
245 bidi_it
->level_stack
[bidi_it
->stack_idx
].override
= override
;
248 /* Pop the embedding level and directional override status from the
249 stack, and return the new level. */
251 bidi_pop_embedding_level (struct bidi_it
*bidi_it
)
253 /* UAX#9 says to ignore invalid PDFs. */
254 if (bidi_it
->stack_idx
> 0)
255 bidi_it
->stack_idx
--;
256 return bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
259 /* Record in SAVED_INFO the information about the current character. */
261 bidi_remember_char (struct bidi_saved_info
*saved_info
,
262 struct bidi_it
*bidi_it
)
264 saved_info
->charpos
= bidi_it
->charpos
;
265 saved_info
->bytepos
= bidi_it
->bytepos
;
266 saved_info
->type
= bidi_it
->type
;
267 bidi_check_type (bidi_it
->type
);
268 saved_info
->type_after_w1
= bidi_it
->type_after_w1
;
269 bidi_check_type (bidi_it
->type_after_w1
);
270 saved_info
->orig_type
= bidi_it
->orig_type
;
271 bidi_check_type (bidi_it
->orig_type
);
274 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
275 copies the part of the level stack that is actually in use. */
277 bidi_copy_it (struct bidi_it
*to
, struct bidi_it
*from
)
281 /* Copy everything except the level stack and beyond. */
282 memcpy (to
, from
, offsetof (struct bidi_it
, level_stack
[0]));
284 /* Copy the active part of the level stack. */
285 to
->level_stack
[0] = from
->level_stack
[0]; /* level zero is always in use */
286 for (i
= 1; i
<= from
->stack_idx
; i
++)
287 to
->level_stack
[i
] = from
->level_stack
[i
];
291 /***********************************************************************
292 Caching the bidi iterator states
293 ***********************************************************************/
295 #define BIDI_CACHE_CHUNK 200
296 static struct bidi_it
*bidi_cache
;
297 static size_t bidi_cache_size
= 0;
298 static size_t elsz
= sizeof (struct bidi_it
);
299 static EMACS_INT bidi_cache_idx
; /* next unused cache slot */
300 static EMACS_INT bidi_cache_last_idx
; /* slot of last cache hit */
301 static EMACS_INT bidi_cache_start
= 0; /* start of cache for this
304 /* Reset the cache state to the empty state. We only reset the part
305 of the cache relevant to iteration of the current object. Previous
306 objects, which are pushed on the display iterator's stack, are left
307 intact. This is called when the cached information is no more
308 useful for the current iteration, e.g. when we were reseated to a
309 new position on the same object. */
311 bidi_cache_reset (void)
313 bidi_cache_idx
= bidi_cache_start
;
314 bidi_cache_last_idx
= -1;
317 /* Shrink the cache to its minimal size. Called when we init the bidi
318 iterator for reordering a buffer or a string that does not come
319 from display properties, because that means all the previously
320 cached info is of no further use. */
322 bidi_cache_shrink (void)
324 if (bidi_cache_size
> BIDI_CACHE_CHUNK
)
326 bidi_cache_size
= BIDI_CACHE_CHUNK
;
328 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
334 bidi_cache_fetch_state (int idx
, struct bidi_it
*bidi_it
)
336 int current_scan_dir
= bidi_it
->scan_dir
;
338 if (idx
< bidi_cache_start
|| idx
>= bidi_cache_idx
)
341 bidi_copy_it (bidi_it
, &bidi_cache
[idx
]);
342 bidi_it
->scan_dir
= current_scan_dir
;
343 bidi_cache_last_idx
= idx
;
346 /* Find a cached state with a given CHARPOS and resolved embedding
347 level less or equal to LEVEL. if LEVEL is -1, disregard the
348 resolved levels in cached states. DIR, if non-zero, means search
349 in that direction from the last cache hit. */
351 bidi_cache_search (EMACS_INT charpos
, int level
, int dir
)
357 if (charpos
< bidi_cache
[bidi_cache_last_idx
].charpos
)
360 i_start
= bidi_cache_last_idx
- 1;
362 else if (charpos
> (bidi_cache
[bidi_cache_last_idx
].charpos
363 + bidi_cache
[bidi_cache_last_idx
].nchars
- 1))
366 i_start
= bidi_cache_last_idx
+ 1;
369 i_start
= bidi_cache_last_idx
;
373 i_start
= bidi_cache_idx
- 1;
378 /* Linear search for now; FIXME! */
379 for (i
= i_start
; i
>= bidi_cache_start
; i
--)
380 if (bidi_cache
[i
].charpos
<= charpos
381 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
382 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
387 for (i
= i_start
; i
< bidi_cache_idx
; i
++)
388 if (bidi_cache
[i
].charpos
<= charpos
389 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
390 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
398 /* Find a cached state where the resolved level changes to a value
399 that is lower than LEVEL, and return its cache slot index. DIR is
400 the direction to search, starting with the last used cache slot.
401 If DIR is zero, we search backwards from the last occupied cache
402 slot. BEFORE, if non-zero, means return the index of the slot that
403 is ``before'' the level change in the search direction. That is,
404 given the cached levels like this:
409 and assuming we are at the position cached at the slot marked with
410 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
411 index of slot B or A, depending whether BEFORE is, respectively,
414 bidi_cache_find_level_change (int level
, int dir
, int before
)
418 int i
= dir
? bidi_cache_last_idx
: bidi_cache_idx
- 1;
419 int incr
= before
? 1 : 0;
428 while (i
>= bidi_cache_start
+ incr
)
430 if (bidi_cache
[i
- incr
].resolved_level
>= 0
431 && bidi_cache
[i
- incr
].resolved_level
< level
)
438 while (i
< bidi_cache_idx
- incr
)
440 if (bidi_cache
[i
+ incr
].resolved_level
>= 0
441 && bidi_cache
[i
+ incr
].resolved_level
< level
)
452 bidi_cache_ensure_space (int idx
)
454 /* Enlarge the cache as needed. */
455 if (idx
>= bidi_cache_size
)
457 bidi_cache_size
+= BIDI_CACHE_CHUNK
;
459 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
464 bidi_cache_iterator_state (struct bidi_it
*bidi_it
, int resolved
)
468 /* We should never cache on backward scans. */
469 if (bidi_it
->scan_dir
== -1)
471 idx
= bidi_cache_search (bidi_it
->charpos
, -1, 1);
475 idx
= bidi_cache_idx
;
476 bidi_cache_ensure_space (idx
);
477 /* Character positions should correspond to cache positions 1:1.
478 If we are outside the range of cached positions, the cache is
479 useless and must be reset. */
480 if (idx
> bidi_cache_start
&&
481 (bidi_it
->charpos
> (bidi_cache
[idx
- 1].charpos
482 + bidi_cache
[idx
- 1].nchars
)
483 || bidi_it
->charpos
< bidi_cache
[bidi_cache_start
].charpos
))
486 idx
= bidi_cache_start
;
488 if (bidi_it
->nchars
<= 0)
490 bidi_copy_it (&bidi_cache
[idx
], bidi_it
);
492 bidi_cache
[idx
].resolved_level
= -1;
496 /* Copy only the members which could have changed, to avoid
497 costly copying of the entire struct. */
498 bidi_cache
[idx
].type
= bidi_it
->type
;
499 bidi_check_type (bidi_it
->type
);
500 bidi_cache
[idx
].type_after_w1
= bidi_it
->type_after_w1
;
501 bidi_check_type (bidi_it
->type_after_w1
);
503 bidi_cache
[idx
].resolved_level
= bidi_it
->resolved_level
;
505 bidi_cache
[idx
].resolved_level
= -1;
506 bidi_cache
[idx
].invalid_levels
= bidi_it
->invalid_levels
;
507 bidi_cache
[idx
].invalid_rl_levels
= bidi_it
->invalid_rl_levels
;
508 bidi_cache
[idx
].next_for_neutral
= bidi_it
->next_for_neutral
;
509 bidi_cache
[idx
].next_for_ws
= bidi_it
->next_for_ws
;
510 bidi_cache
[idx
].ignore_bn_limit
= bidi_it
->ignore_bn_limit
;
513 bidi_cache_last_idx
= idx
;
514 if (idx
>= bidi_cache_idx
)
515 bidi_cache_idx
= idx
+ 1;
518 static inline bidi_type_t
519 bidi_cache_find (EMACS_INT charpos
, int level
, struct bidi_it
*bidi_it
)
521 int i
= bidi_cache_search (charpos
, level
, bidi_it
->scan_dir
);
523 if (i
>= bidi_cache_start
)
525 bidi_dir_t current_scan_dir
= bidi_it
->scan_dir
;
527 bidi_copy_it (bidi_it
, &bidi_cache
[i
]);
528 bidi_cache_last_idx
= i
;
529 /* Don't let scan direction from from the cached state override
530 the current scan direction. */
531 bidi_it
->scan_dir
= current_scan_dir
;
532 return bidi_it
->type
;
539 bidi_peek_at_next_level (struct bidi_it
*bidi_it
)
541 if (bidi_cache_idx
== bidi_cache_start
|| bidi_cache_last_idx
== -1)
543 return bidi_cache
[bidi_cache_last_idx
+ bidi_it
->scan_dir
].resolved_level
;
547 /***********************************************************************
548 Pushing and popping the bidi iterator state
549 ***********************************************************************/
550 /* 5-slot stack for saving the start of the previous level of the
551 cache. xdisp.c maintains a 5-slot stack for its iterator state,
552 and we need the same size of our stack. */
553 static int bidi_cache_start_stack
[IT_STACK_SIZE
];
554 static int bidi_cache_sp
;
556 /* Push the bidi iterator state in preparation for reordering a
557 different object, e.g. display string found at certain buffer
558 position. Pushing the bidi iterator boils down to saving its
559 entire state on the cache and starting a new cache "stacked" on top
560 of the current cache. */
562 bidi_push_it (struct bidi_it
*bidi_it
)
564 /* Save the current iterator state in its entirety after the last
566 bidi_cache_ensure_space (bidi_cache_idx
);
567 memcpy (&bidi_cache
[bidi_cache_idx
++], bidi_it
, sizeof (struct bidi_it
));
569 /* Push the current cache start onto the stack. */
570 xassert (bidi_cache_sp
< IT_STACK_SIZE
);
571 bidi_cache_start_stack
[bidi_cache_sp
++] = bidi_cache_start
;
573 /* Start a new level of cache, and make it empty. */
574 bidi_cache_start
= bidi_cache_idx
;
575 bidi_cache_last_idx
= -1;
578 /* Restore the iterator state saved by bidi_push_it and return the
579 cache to the corresponding state. */
581 bidi_pop_it (struct bidi_it
*bidi_it
)
583 if (bidi_cache_start
<= 0)
586 /* Reset the next free cache slot index to what it was before the
587 call to bidi_push_it. */
588 bidi_cache_idx
= bidi_cache_start
- 1;
590 /* Restore the bidi iterator state saved in the cache. */
591 memcpy (bidi_it
, &bidi_cache
[bidi_cache_idx
], sizeof (struct bidi_it
));
593 /* Pop the previous cache start from the stack. */
594 if (bidi_cache_sp
<= 0)
596 bidi_cache_start
= bidi_cache_start_stack
[--bidi_cache_sp
];
598 /* Invalidate the last-used cache slot data. */
599 bidi_cache_last_idx
= -1;
603 /***********************************************************************
605 ***********************************************************************/
607 bidi_initialize (void)
610 #include "biditype.h"
611 #include "bidimirror.h"
615 bidi_type_table
= Fmake_char_table (Qnil
, make_number (STRONG_L
));
616 staticpro (&bidi_type_table
);
618 for (i
= 0; i
< sizeof bidi_type
/ sizeof bidi_type
[0]; i
++)
619 char_table_set_range (bidi_type_table
, bidi_type
[i
].from
, bidi_type
[i
].to
,
620 make_number (bidi_type
[i
].type
));
622 bidi_mirror_table
= Fmake_char_table (Qnil
, Qnil
);
623 staticpro (&bidi_mirror_table
);
625 for (i
= 0; i
< sizeof bidi_mirror
/ sizeof bidi_mirror
[0]; i
++)
626 char_table_set (bidi_mirror_table
, bidi_mirror
[i
].from
,
627 make_number (bidi_mirror
[i
].to
));
629 Qparagraph_start
= intern ("paragraph-start");
630 staticpro (&Qparagraph_start
);
631 paragraph_start_re
= Fsymbol_value (Qparagraph_start
);
632 if (!STRINGP (paragraph_start_re
))
633 paragraph_start_re
= build_string ("\f\\|[ \t]*$");
634 staticpro (¶graph_start_re
);
635 Qparagraph_separate
= intern ("paragraph-separate");
636 staticpro (&Qparagraph_separate
);
637 paragraph_separate_re
= Fsymbol_value (Qparagraph_separate
);
638 if (!STRINGP (paragraph_separate_re
))
639 paragraph_separate_re
= build_string ("[ \t\f]*$");
640 staticpro (¶graph_separate_re
);
644 bidi_initialized
= 1;
647 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
650 bidi_set_paragraph_end (struct bidi_it
*bidi_it
)
652 bidi_it
->invalid_levels
= 0;
653 bidi_it
->invalid_rl_levels
= -1;
654 bidi_it
->stack_idx
= 0;
655 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
658 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
660 bidi_init_it (EMACS_INT charpos
, EMACS_INT bytepos
, int frame_window_p
,
661 struct bidi_it
*bidi_it
)
663 if (! bidi_initialized
)
666 bidi_it
->charpos
= charpos
;
668 bidi_it
->bytepos
= bytepos
;
669 bidi_it
->frame_window_p
= frame_window_p
;
670 bidi_it
->nchars
= -1; /* to be computed in bidi_resolve_explicit_1 */
671 bidi_it
->first_elt
= 1;
672 bidi_set_paragraph_end (bidi_it
);
673 bidi_it
->new_paragraph
= 1;
674 bidi_it
->separator_limit
= -1;
675 bidi_it
->type
= NEUTRAL_B
;
676 bidi_it
->type_after_w1
= NEUTRAL_B
;
677 bidi_it
->orig_type
= NEUTRAL_B
;
678 bidi_it
->prev_was_pdf
= 0;
679 bidi_it
->prev
.type
= bidi_it
->prev
.type_after_w1
=
680 bidi_it
->prev
.orig_type
= UNKNOWN_BT
;
681 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
682 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
683 bidi_it
->next_for_neutral
.charpos
= -1;
684 bidi_it
->next_for_neutral
.type
=
685 bidi_it
->next_for_neutral
.type_after_w1
=
686 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
687 bidi_it
->prev_for_neutral
.charpos
= -1;
688 bidi_it
->prev_for_neutral
.type
=
689 bidi_it
->prev_for_neutral
.type_after_w1
=
690 bidi_it
->prev_for_neutral
.orig_type
= UNKNOWN_BT
;
691 bidi_it
->sor
= L2R
; /* FIXME: should it be user-selectable? */
692 bidi_it
->disp_pos
= -1; /* invalid/unknown */
693 /* We can only shrink the cache if we are at the bottom level of its
695 if (bidi_cache_start
== 0)
696 bidi_cache_shrink ();
701 /* Perform initializations for reordering a new line of bidi text. */
703 bidi_line_init (struct bidi_it
*bidi_it
)
705 bidi_it
->scan_dir
= 1; /* FIXME: do we need to have control on this? */
706 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
707 bidi_it
->level_stack
[0].override
= NEUTRAL_DIR
; /* X1 */
708 bidi_it
->invalid_levels
= 0;
709 bidi_it
->invalid_rl_levels
= -1;
710 bidi_it
->next_en_pos
= -1;
711 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
712 bidi_set_sor_type (bidi_it
,
713 bidi_it
->paragraph_dir
== R2L
? 1 : 0,
714 bidi_it
->level_stack
[0].level
); /* X10 */
720 /***********************************************************************
722 ***********************************************************************/
724 /* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG
725 and END are zero-based character positions in S, BEGBYTE is byte
726 position corresponding to BEG. */
727 static inline EMACS_INT
728 bidi_count_bytes (const unsigned char *s
, const EMACS_INT beg
,
729 const EMACS_INT begbyte
, const EMACS_INT end
)
732 const unsigned char *p
= s
+ begbyte
, *start
= p
;
734 if (!CHAR_HEAD_P (*p
))
739 p
+= BYTES_BY_CHAR_HEAD (*p
);
746 /* Fetch and returns the character at byte position BYTEPOS. If S is
747 non-NULL, fetch the character from string S; otherwise fetch the
748 character from the current buffer. */
750 bidi_char_at_pos (EMACS_INT bytepos
, const unsigned char *s
)
753 return STRING_CHAR (s
+ bytepos
);
755 return FETCH_MULTIBYTE_CHAR (bytepos
);
758 /* Fetch and return the character at BYTEPOS/CHARPOS. If that
759 character is covered by a display string, treat the entire run of
760 covered characters as a single character u+FFFC, and return their
761 combined length in CH_LEN and NCHARS. DISP_POS specifies the
762 character position of the next display string, or -1 if not yet
763 computed. When the next character is at or beyond that position,
764 the function updates DISP_POS with the position of the next display
765 string. STRING->s is the C string to iterate, or NULL if iterating
766 over a buffer or a Lisp string; in the latter case, STRING->lstring
767 is the Lisp string. */
769 bidi_fetch_char (EMACS_INT bytepos
, EMACS_INT charpos
, EMACS_INT
*disp_pos
,
770 struct bidi_string_data
*string
,
771 int frame_window_p
, EMACS_INT
*ch_len
, EMACS_INT
*nchars
)
775 (string
->s
|| STRINGP (string
->lstring
)) ? string
->schars
: ZV
;
778 /* If we got past the last known position of display string, compute
779 the position of the next one. That position could be at CHARPOS. */
780 if (charpos
< endpos
&& charpos
> *disp_pos
)
782 SET_TEXT_POS (pos
, charpos
, bytepos
);
783 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
);
786 /* Fetch the character at BYTEPOS. */
787 if (charpos
>= endpos
)
794 else if (charpos
>= *disp_pos
)
796 EMACS_INT disp_end_pos
;
798 /* We don't expect to find ourselves in the middle of a display
799 property. Hopefully, it will never be needed. */
800 if (charpos
> *disp_pos
)
802 /* Return the Unicode Object Replacement Character to represent
803 the entire run of characters covered by the display string. */
805 disp_end_pos
= compute_display_string_end (*disp_pos
, string
);
806 *nchars
= disp_end_pos
- *disp_pos
;
808 *ch_len
= bidi_count_bytes (string
->s
, *disp_pos
, bytepos
,
810 else if (STRINGP (string
->lstring
))
811 *ch_len
= bidi_count_bytes (SDATA (string
->lstring
), *disp_pos
,
812 bytepos
, disp_end_pos
);
814 *ch_len
= CHAR_TO_BYTE (disp_end_pos
) - bytepos
;
822 ch
= STRING_CHAR_AND_LENGTH (string
->s
+ bytepos
, len
);
825 else if (STRINGP (string
->lstring
))
829 ch
= STRING_CHAR_AND_LENGTH (SDATA (string
->lstring
) + bytepos
, len
);
834 ch
= FETCH_MULTIBYTE_CHAR (bytepos
);
835 *ch_len
= CHAR_BYTES (ch
);
840 /* If we just entered a run of characters covered by a display
841 string, compute the position of the next display string. */
842 if (charpos
+ *nchars
<= endpos
&& charpos
+ *nchars
> *disp_pos
)
844 SET_TEXT_POS (pos
, charpos
+ *nchars
, bytepos
+ *ch_len
);
845 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
);
852 /***********************************************************************
853 Determining paragraph direction
854 ***********************************************************************/
856 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
857 Value is the non-negative length of the paragraph separator
858 following the buffer position, -1 if position is at the beginning
859 of a new paragraph, or -2 if position is neither at beginning nor
860 at end of a paragraph. */
862 bidi_at_paragraph_end (EMACS_INT charpos
, EMACS_INT bytepos
)
865 Lisp_Object start_re
;
868 sep_re
= paragraph_separate_re
;
869 start_re
= paragraph_start_re
;
871 val
= fast_looking_at (sep_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
);
874 if (fast_looking_at (start_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
) >= 0)
883 /* Find the beginning of this paragraph by looking back in the buffer.
884 Value is the byte position of the paragraph's beginning. */
886 bidi_find_paragraph_start (EMACS_INT pos
, EMACS_INT pos_byte
)
888 Lisp_Object re
= paragraph_start_re
;
889 EMACS_INT limit
= ZV
, limit_byte
= ZV_BYTE
;
891 while (pos_byte
> BEGV_BYTE
892 && fast_looking_at (re
, pos
, pos_byte
, limit
, limit_byte
, Qnil
) < 0)
894 /* FIXME: What if the paragraph beginning is covered by a
895 display string? And what if a display string covering some
896 of the text over which we scan back includes
897 paragraph_start_re? */
898 pos
= find_next_newline_no_quit (pos
- 1, -1);
899 pos_byte
= CHAR_TO_BYTE (pos
);
904 /* Determine the base direction, a.k.a. base embedding level, of the
905 paragraph we are about to iterate through. If DIR is either L2R or
906 R2L, just use that. Otherwise, determine the paragraph direction
907 from the first strong directional character of the paragraph.
909 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
910 has no strong directional characters and both DIR and
911 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
912 in the buffer until a paragraph is found with a strong character,
913 or until hitting BEGV. In the latter case, fall back to L2R. This
914 flag is used in current-bidi-paragraph-direction.
916 Note that this function gives the paragraph separator the same
917 direction as the preceding paragraph, even though Emacs generally
918 views the separartor as not belonging to any paragraph. */
920 bidi_paragraph_init (bidi_dir_t dir
, struct bidi_it
*bidi_it
, int no_default_p
)
922 EMACS_INT bytepos
= bidi_it
->bytepos
;
923 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
924 EMACS_INT pstartbyte
;
925 /* Note that begbyte is a byte position, while end is a character
926 position. Yes, this is ugly, but we are trying to avoid costly
927 calls to BYTE_TO_CHAR and its ilk. */
928 EMACS_INT begbyte
= string_p
? 0 : BEGV_BYTE
;
929 EMACS_INT end
= string_p
? bidi_it
->string
.schars
: ZV
;
931 /* Special case for an empty buffer. */
932 if (bytepos
== begbyte
&& bidi_it
->charpos
== end
)
934 /* We should never be called at EOB or before BEGV. */
935 else if (bidi_it
->charpos
>= end
|| bytepos
< begbyte
)
940 bidi_it
->paragraph_dir
= L2R
;
941 bidi_it
->new_paragraph
= 0;
945 bidi_it
->paragraph_dir
= R2L
;
946 bidi_it
->new_paragraph
= 0;
948 else if (dir
== NEUTRAL_DIR
) /* P2 */
951 EMACS_INT ch_len
, nchars
;
952 EMACS_INT pos
, disp_pos
= -1;
954 const unsigned char *s
;
956 if (!bidi_initialized
)
959 /* If we are inside a paragraph separator, we are just waiting
960 for the separator to be exhausted; use the previous paragraph
961 direction. But don't do that if we have been just reseated,
962 because we need to reinitialize below in that case. */
963 if (!bidi_it
->first_elt
964 && bidi_it
->charpos
< bidi_it
->separator_limit
)
967 /* If we are on a newline, get past it to where the next
968 paragraph might start. But don't do that at BEGV since then
969 we are potentially in a new paragraph that doesn't yet
971 pos
= bidi_it
->charpos
;
972 s
= STRINGP (bidi_it
->string
.lstring
) ?
973 SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
974 if (bytepos
> begbyte
&& bidi_char_at_pos (bytepos
, s
) == '\n')
980 /* We are either at the beginning of a paragraph or in the
981 middle of it. Find where this paragraph starts. */
984 /* We don't support changes of paragraph direction inside a
985 string. It is treated as a single paragraph. */
989 pstartbyte
= bidi_find_paragraph_start (pos
, bytepos
);
990 bidi_it
->separator_limit
= -1;
991 bidi_it
->new_paragraph
= 0;
993 /* The following loop is run more than once only if NO_DEFAULT_P
994 is non-zero, and only if we are iterating on a buffer. */
996 bytepos
= pstartbyte
;
998 pos
= BYTE_TO_CHAR (bytepos
);
999 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
, &bidi_it
->string
,
1000 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1001 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1003 for (pos
+= nchars
, bytepos
+= ch_len
;
1004 /* NOTE: UAX#9 says to search only for L, AL, or R types
1005 of characters, and ignore RLE, RLO, LRE, and LRO.
1006 However, I'm not sure it makes sense to omit those 4;
1007 should try with and without that to see the effect. */
1008 (bidi_get_category (type
) != STRONG
)
1009 || (bidi_ignore_explicit_marks_for_paragraph_level
1010 && (type
== RLE
|| type
== RLO
1011 || type
== LRE
|| type
== LRO
));
1012 type
= bidi_get_type (ch
, NEUTRAL_DIR
))
1015 && type
== NEUTRAL_B
1016 && bidi_at_paragraph_end (pos
, bytepos
) >= -1)
1020 /* Pretend there's a paragraph separator at end of
1025 /* Fetch next character and advance to get past it. */
1026 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
, &bidi_it
->string
,
1027 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1031 if (type
== STRONG_R
|| type
== STRONG_AL
) /* P3 */
1032 bidi_it
->paragraph_dir
= R2L
;
1033 else if (type
== STRONG_L
)
1034 bidi_it
->paragraph_dir
= L2R
;
1036 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
)
1038 /* If this paragraph is at BEGV, default to L2R. */
1039 if (pstartbyte
== BEGV_BYTE
)
1040 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 */
1043 EMACS_INT prevpbyte
= pstartbyte
;
1044 EMACS_INT p
= BYTE_TO_CHAR (pstartbyte
), pbyte
= pstartbyte
;
1046 /* Find the beginning of the previous paragraph, if any. */
1047 while (pbyte
> BEGV_BYTE
&& prevpbyte
>= pstartbyte
)
1049 /* FXIME: What if p is covered by a display
1050 string? See also a FIXME inside
1051 bidi_find_paragraph_start. */
1053 pbyte
= CHAR_TO_BYTE (p
);
1054 prevpbyte
= bidi_find_paragraph_start (p
, pbyte
);
1056 pstartbyte
= prevpbyte
;
1060 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
);
1065 /* Contrary to UAX#9 clause P3, we only default the paragraph
1066 direction to L2R if we have no previous usable paragraph
1067 direction. This is allowed by the HL1 clause. */
1068 if (bidi_it
->paragraph_dir
!= L2R
&& bidi_it
->paragraph_dir
!= R2L
)
1069 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 ``higher-level protocols'' */
1070 if (bidi_it
->paragraph_dir
== R2L
)
1071 bidi_it
->level_stack
[0].level
= 1;
1073 bidi_it
->level_stack
[0].level
= 0;
1075 bidi_line_init (bidi_it
);
1079 /***********************************************************************
1080 Resolving explicit and implicit levels.
1081 The rest of this file constitutes the core of the UBA implementation.
1082 ***********************************************************************/
1085 bidi_explicit_dir_char (int ch
)
1087 bidi_type_t ch_type
;
1089 if (!bidi_initialized
)
1091 ch_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
1092 return (ch_type
== LRE
|| ch_type
== LRO
1093 || ch_type
== RLE
|| ch_type
== RLO
1097 /* A helper function for bidi_resolve_explicit. It advances to the
1098 next character in logical order and determines the new embedding
1099 level and directional override, but does not take into account
1100 empty embeddings. */
1102 bidi_resolve_explicit_1 (struct bidi_it
*bidi_it
)
1108 bidi_dir_t override
;
1109 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
1111 /* If reseat()'ed, don't advance, so as to start iteration from the
1112 position where we were reseated. bidi_it->bytepos can be less
1113 than BEGV_BYTE after reseat to BEGV. */
1114 if (bidi_it
->bytepos
< (string_p
? 0 : BEGV_BYTE
)
1115 || bidi_it
->first_elt
)
1117 bidi_it
->first_elt
= 0;
1120 const unsigned char *p
=
1121 STRINGP (bidi_it
->string
.lstring
)
1122 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1124 if (bidi_it
->charpos
< 0)
1125 bidi_it
->charpos
= 0;
1126 bidi_it
->bytepos
= bidi_count_bytes (p
, 0, 0, bidi_it
->charpos
);
1130 if (bidi_it
->charpos
< BEGV
)
1131 bidi_it
->charpos
= BEGV
;
1132 bidi_it
->bytepos
= CHAR_TO_BYTE (bidi_it
->charpos
);
1135 /* Don't move at end of buffer/string. */
1136 else if (bidi_it
->charpos
< (string_p
? bidi_it
->string
.schars
: ZV
))
1138 /* Advance to the next character, skipping characters covered by
1139 display strings (nchars > 1). */
1140 if (bidi_it
->nchars
<= 0)
1142 bidi_it
->charpos
+= bidi_it
->nchars
;
1143 if (bidi_it
->ch_len
== 0)
1145 bidi_it
->bytepos
+= bidi_it
->ch_len
;
1148 current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
; /* X1 */
1149 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1150 new_level
= current_level
;
1152 if (bidi_it
->charpos
>= (string_p
? bidi_it
->string
.schars
: ZV
))
1155 bidi_it
->ch_len
= 1;
1156 bidi_it
->nchars
= 1;
1157 bidi_it
->disp_pos
= (string_p
? bidi_it
->string
.schars
: ZV
);
1161 /* Fetch the character at BYTEPOS. If it is covered by a
1162 display string, treat the entire run of covered characters as
1163 a single character u+FFFC. */
1164 curchar
= bidi_fetch_char (bidi_it
->bytepos
, bidi_it
->charpos
,
1165 &bidi_it
->disp_pos
, &bidi_it
->string
,
1166 bidi_it
->frame_window_p
,
1167 &bidi_it
->ch_len
, &bidi_it
->nchars
);
1169 bidi_it
->ch
= curchar
;
1171 /* Don't apply directional override here, as all the types we handle
1172 below will not be affected by the override anyway, and we need
1173 the original type unaltered. The override will be applied in
1174 bidi_resolve_weak. */
1175 type
= bidi_get_type (curchar
, NEUTRAL_DIR
);
1176 bidi_it
->orig_type
= type
;
1177 bidi_check_type (bidi_it
->orig_type
);
1180 bidi_it
->prev_was_pdf
= 0;
1182 bidi_it
->type_after_w1
= UNKNOWN_BT
;
1188 bidi_it
->type_after_w1
= type
;
1189 bidi_check_type (bidi_it
->type_after_w1
);
1190 type
= WEAK_BN
; /* X9/Retaining */
1191 if (bidi_it
->ignore_bn_limit
<= -1)
1193 if (current_level
<= BIDI_MAXLEVEL
- 4)
1195 /* Compute the least odd embedding level greater than
1196 the current level. */
1197 new_level
= ((current_level
+ 1) & ~1) + 1;
1198 if (bidi_it
->type_after_w1
== RLE
)
1199 override
= NEUTRAL_DIR
;
1202 if (current_level
== BIDI_MAXLEVEL
- 4)
1203 bidi_it
->invalid_rl_levels
= 0;
1204 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1208 bidi_it
->invalid_levels
++;
1209 /* See the commentary about invalid_rl_levels below. */
1210 if (bidi_it
->invalid_rl_levels
< 0)
1211 bidi_it
->invalid_rl_levels
= 0;
1212 bidi_it
->invalid_rl_levels
++;
1215 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1216 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1221 bidi_it
->type_after_w1
= type
;
1222 bidi_check_type (bidi_it
->type_after_w1
);
1223 type
= WEAK_BN
; /* X9/Retaining */
1224 if (bidi_it
->ignore_bn_limit
<= -1)
1226 if (current_level
<= BIDI_MAXLEVEL
- 5)
1228 /* Compute the least even embedding level greater than
1229 the current level. */
1230 new_level
= ((current_level
+ 2) & ~1);
1231 if (bidi_it
->type_after_w1
== LRE
)
1232 override
= NEUTRAL_DIR
;
1235 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1239 bidi_it
->invalid_levels
++;
1240 /* invalid_rl_levels counts invalid levels encountered
1241 while the embedding level was already too high for
1242 LRE/LRO, but not for RLE/RLO. That is because
1243 there may be exactly one PDF which we should not
1244 ignore even though invalid_levels is non-zero.
1245 invalid_rl_levels helps to know what PDF is
1247 if (bidi_it
->invalid_rl_levels
>= 0)
1248 bidi_it
->invalid_rl_levels
++;
1251 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1252 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1256 bidi_it
->type_after_w1
= type
;
1257 bidi_check_type (bidi_it
->type_after_w1
);
1258 type
= WEAK_BN
; /* X9/Retaining */
1259 if (bidi_it
->ignore_bn_limit
<= -1)
1261 if (!bidi_it
->invalid_rl_levels
)
1263 new_level
= bidi_pop_embedding_level (bidi_it
);
1264 bidi_it
->invalid_rl_levels
= -1;
1265 if (bidi_it
->invalid_levels
)
1266 bidi_it
->invalid_levels
--;
1267 /* else nothing: UAX#9 says to ignore invalid PDFs */
1269 if (!bidi_it
->invalid_levels
)
1270 new_level
= bidi_pop_embedding_level (bidi_it
);
1273 bidi_it
->invalid_levels
--;
1274 bidi_it
->invalid_rl_levels
--;
1277 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1278 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1286 bidi_it
->type
= type
;
1287 bidi_check_type (bidi_it
->type
);
1292 /* Given an iterator state in BIDI_IT, advance one character position
1293 in the buffer/string to the next character (in the logical order),
1294 resolve any explicit embeddings and directional overrides, and
1295 return the embedding level of the character after resolving
1296 explicit directives and ignoring empty embeddings. */
1298 bidi_resolve_explicit (struct bidi_it
*bidi_it
)
1300 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1301 int new_level
= bidi_resolve_explicit_1 (bidi_it
);
1302 EMACS_INT eob
= bidi_it
->string
.s
? bidi_it
->string
.schars
: ZV
;
1303 const unsigned char *s
= STRINGP (bidi_it
->string
.lstring
)
1304 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1306 if (prev_level
< new_level
1307 && bidi_it
->type
== WEAK_BN
1308 && bidi_it
->ignore_bn_limit
== -1 /* only if not already known */
1309 && bidi_it
->charpos
< eob
/* not already at EOB */
1310 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1311 + bidi_it
->ch_len
, s
)))
1313 /* Avoid pushing and popping embedding levels if the level run
1314 is empty, as this breaks level runs where it shouldn't.
1315 UAX#9 removes all the explicit embedding and override codes,
1316 so empty embeddings disappear without a trace. We need to
1317 behave as if we did the same. */
1318 struct bidi_it saved_it
;
1319 int level
= prev_level
;
1321 bidi_copy_it (&saved_it
, bidi_it
);
1323 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1324 + bidi_it
->ch_len
, s
)))
1326 /* This advances to the next character, skipping any
1327 characters covered by display strings. */
1328 level
= bidi_resolve_explicit_1 (bidi_it
);
1329 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1330 a pointer to its data is no longer valid. */
1331 if (STRINGP (bidi_it
->string
.lstring
))
1332 s
= SDATA (bidi_it
->string
.lstring
);
1335 if (bidi_it
->nchars
<= 0)
1337 if (level
== prev_level
) /* empty embedding */
1338 saved_it
.ignore_bn_limit
= bidi_it
->charpos
+ bidi_it
->nchars
;
1339 else /* this embedding is non-empty */
1340 saved_it
.ignore_bn_limit
= -2;
1342 bidi_copy_it (bidi_it
, &saved_it
);
1343 if (bidi_it
->ignore_bn_limit
> -1)
1345 /* We pushed a level, but we shouldn't have. Undo that. */
1346 if (!bidi_it
->invalid_rl_levels
)
1348 new_level
= bidi_pop_embedding_level (bidi_it
);
1349 bidi_it
->invalid_rl_levels
= -1;
1350 if (bidi_it
->invalid_levels
)
1351 bidi_it
->invalid_levels
--;
1353 if (!bidi_it
->invalid_levels
)
1354 new_level
= bidi_pop_embedding_level (bidi_it
);
1357 bidi_it
->invalid_levels
--;
1358 bidi_it
->invalid_rl_levels
--;
1363 if (bidi_it
->type
== NEUTRAL_B
) /* X8 */
1365 bidi_set_paragraph_end (bidi_it
);
1366 /* This is needed by bidi_resolve_weak below, and in L1. */
1367 bidi_it
->type_after_w1
= bidi_it
->type
;
1368 bidi_check_type (bidi_it
->type_after_w1
);
1374 /* Advance in the buffer/string, resolve weak types and return the
1375 type of the next character after weak type resolution. */
1377 bidi_resolve_weak (struct bidi_it
*bidi_it
)
1380 bidi_dir_t override
;
1381 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1382 int new_level
= bidi_resolve_explicit (bidi_it
);
1384 bidi_type_t type_of_next
;
1385 struct bidi_it saved_it
;
1387 (STRINGP (bidi_it
->string
.lstring
) || bidi_it
->string
.s
)
1388 ? bidi_it
->string
.schars
: ZV
;
1390 type
= bidi_it
->type
;
1391 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1393 if (type
== UNKNOWN_BT
1401 if (new_level
!= prev_level
1402 || bidi_it
->type
== NEUTRAL_B
)
1404 /* We've got a new embedding level run, compute the directional
1405 type of sor and initialize per-run variables (UAX#9, clause
1407 bidi_set_sor_type (bidi_it
, prev_level
, new_level
);
1409 else if (type
== NEUTRAL_S
|| type
== NEUTRAL_WS
1410 || type
== WEAK_BN
|| type
== STRONG_AL
)
1411 bidi_it
->type_after_w1
= type
; /* needed in L1 */
1412 bidi_check_type (bidi_it
->type_after_w1
);
1414 /* Level and directional override status are already recorded in
1415 bidi_it, and do not need any change; see X6. */
1416 if (override
== R2L
) /* X6 */
1418 else if (override
== L2R
)
1422 if (type
== WEAK_NSM
) /* W1 */
1424 /* Note that we don't need to consider the case where the
1425 prev character has its type overridden by an RLO or LRO,
1426 because then either the type of this NSM would have been
1427 also overridden, or the previous character is outside the
1428 current level run, and thus not relevant to this NSM.
1429 This is why NSM gets the type_after_w1 of the previous
1431 if (bidi_it
->prev
.type_after_w1
!= UNKNOWN_BT
1432 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1433 && bidi_it
->prev
.type_after_w1
!= NEUTRAL_B
)
1434 type
= bidi_it
->prev
.type_after_w1
;
1435 else if (bidi_it
->sor
== R2L
)
1437 else if (bidi_it
->sor
== L2R
)
1439 else /* shouldn't happen! */
1442 if (type
== WEAK_EN
/* W2 */
1443 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
1445 else if (type
== STRONG_AL
) /* W3 */
1447 else if ((type
== WEAK_ES
/* W4 */
1448 && bidi_it
->prev
.type_after_w1
== WEAK_EN
1449 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1451 && ((bidi_it
->prev
.type_after_w1
== WEAK_EN
1452 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1453 || bidi_it
->prev
.type_after_w1
== WEAK_AN
)))
1455 const unsigned char *s
=
1456 STRINGP (bidi_it
->string
.lstring
)
1457 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1460 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1462 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
);
1463 type_of_next
= bidi_get_type (next_char
, override
);
1465 if (type_of_next
== WEAK_BN
1466 || bidi_explicit_dir_char (next_char
))
1468 bidi_copy_it (&saved_it
, bidi_it
);
1469 while (bidi_resolve_explicit (bidi_it
) == new_level
1470 && bidi_it
->type
== WEAK_BN
)
1472 type_of_next
= bidi_it
->type
;
1473 bidi_copy_it (bidi_it
, &saved_it
);
1476 /* If the next character is EN, but the last strong-type
1477 character is AL, that next EN will be changed to AN when
1478 we process it in W2 above. So in that case, this ES
1479 should not be changed into EN. */
1481 && type_of_next
== WEAK_EN
1482 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1484 else if (type
== WEAK_CS
)
1486 if (bidi_it
->prev
.type_after_w1
== WEAK_AN
1487 && (type_of_next
== WEAK_AN
1488 /* If the next character is EN, but the last
1489 strong-type character is AL, EN will be later
1490 changed to AN when we process it in W2 above.
1491 So in that case, this ES should not be
1493 || (type_of_next
== WEAK_EN
1494 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)))
1496 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
1497 && type_of_next
== WEAK_EN
1498 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1502 else if (type
== WEAK_ET
/* W5: ET with EN before or after it */
1503 || type
== WEAK_BN
) /* W5/Retaining */
1505 if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* ET/BN w/EN before it */
1506 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1508 else /* W5: ET/BN with EN after it. */
1510 EMACS_INT en_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1511 const unsigned char *s
=
1512 STRINGP (bidi_it
->string
.lstring
)
1513 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1515 if (bidi_it
->nchars
<= 0)
1518 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1520 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
);
1521 type_of_next
= bidi_get_type (next_char
, override
);
1523 if (type_of_next
== WEAK_ET
1524 || type_of_next
== WEAK_BN
1525 || bidi_explicit_dir_char (next_char
))
1527 bidi_copy_it (&saved_it
, bidi_it
);
1528 while (bidi_resolve_explicit (bidi_it
) == new_level
1529 && (bidi_it
->type
== WEAK_BN
1530 || bidi_it
->type
== WEAK_ET
))
1532 type_of_next
= bidi_it
->type
;
1533 en_pos
= bidi_it
->charpos
;
1534 bidi_copy_it (bidi_it
, &saved_it
);
1536 if (type_of_next
== WEAK_EN
)
1538 /* If the last strong character is AL, the EN we've
1539 found will become AN when we get to it (W2). */
1540 if (bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1543 /* Remember this EN position, to speed up processing
1545 bidi_it
->next_en_pos
= en_pos
;
1547 else if (type
== WEAK_BN
)
1548 type
= NEUTRAL_ON
; /* W6/Retaining */
1554 if (type
== WEAK_ES
|| type
== WEAK_ET
|| type
== WEAK_CS
/* W6 */
1556 && (bidi_it
->prev
.type_after_w1
== WEAK_CS
/* W6/Retaining */
1557 || bidi_it
->prev
.type_after_w1
== WEAK_ES
1558 || bidi_it
->prev
.type_after_w1
== WEAK_ET
)))
1561 /* Store the type we've got so far, before we clobber it with strong
1562 types in W7 and while resolving neutral types. But leave alone
1563 the original types that were recorded above, because we will need
1564 them for the L1 clause. */
1565 if (bidi_it
->type_after_w1
== UNKNOWN_BT
)
1566 bidi_it
->type_after_w1
= type
;
1567 bidi_check_type (bidi_it
->type_after_w1
);
1569 if (type
== WEAK_EN
) /* W7 */
1571 if ((bidi_it
->last_strong
.type_after_w1
== STRONG_L
)
1572 || (bidi_it
->last_strong
.type
== UNKNOWN_BT
&& bidi_it
->sor
== L2R
))
1576 bidi_it
->type
= type
;
1577 bidi_check_type (bidi_it
->type
);
1581 /* Resolve the type of a neutral character according to the type of
1582 surrounding strong text and the current embedding level. */
1583 static inline bidi_type_t
1584 bidi_resolve_neutral_1 (bidi_type_t prev_type
, bidi_type_t next_type
, int lev
)
1586 /* N1: European and Arabic numbers are treated as though they were R. */
1587 if (next_type
== WEAK_EN
|| next_type
== WEAK_AN
)
1588 next_type
= STRONG_R
;
1589 if (prev_type
== WEAK_EN
|| prev_type
== WEAK_AN
)
1590 prev_type
= STRONG_R
;
1592 if (next_type
== prev_type
) /* N1 */
1594 else if ((lev
& 1) == 0) /* N2 */
1601 bidi_resolve_neutral (struct bidi_it
*bidi_it
)
1603 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1604 bidi_type_t type
= bidi_resolve_weak (bidi_it
);
1605 int current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1607 if (!(type
== STRONG_R
1612 || type
== NEUTRAL_B
1613 || type
== NEUTRAL_S
1614 || type
== NEUTRAL_WS
1615 || type
== NEUTRAL_ON
))
1618 if (bidi_get_category (type
) == NEUTRAL
1619 || (type
== WEAK_BN
&& prev_level
== current_level
))
1621 if (bidi_it
->next_for_neutral
.type
!= UNKNOWN_BT
)
1622 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1623 bidi_it
->next_for_neutral
.type
,
1627 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1628 the assumption of batch-style processing; see clauses W4,
1629 W5, and especially N1, which require to look far forward
1630 (as well as back) in the buffer/string. May the fleas of
1631 a thousand camels infest the armpits of those who design
1632 supposedly general-purpose algorithms by looking at their
1633 own implementations, and fail to consider other possible
1635 struct bidi_it saved_it
;
1636 bidi_type_t next_type
;
1638 if (bidi_it
->scan_dir
== -1)
1641 bidi_copy_it (&saved_it
, bidi_it
);
1642 /* Scan the text forward until we find the first non-neutral
1643 character, and then use that to resolve the neutral we
1644 are dealing with now. We also cache the scanned iterator
1645 states, to salvage some of the effort later. */
1646 bidi_cache_iterator_state (bidi_it
, 0);
1648 /* Record the info about the previous character, so that
1649 it will be cached below with this state. */
1650 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1651 && bidi_it
->type
!= WEAK_BN
)
1652 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1653 type
= bidi_resolve_weak (bidi_it
);
1654 /* Paragraph separators have their levels fully resolved
1655 at this point, so cache them as resolved. */
1656 bidi_cache_iterator_state (bidi_it
, type
== NEUTRAL_B
);
1657 /* FIXME: implement L1 here, by testing for a newline and
1658 resetting the level for any sequence of whitespace
1659 characters adjacent to it. */
1660 } while (!(type
== NEUTRAL_B
1662 && bidi_get_category (type
) != NEUTRAL
)
1663 /* This is all per level run, so stop when we
1664 reach the end of this level run. */
1665 || bidi_it
->level_stack
[bidi_it
->stack_idx
].level
!=
1668 bidi_remember_char (&saved_it
.next_for_neutral
, bidi_it
);
1679 /* N1: ``European and Arabic numbers are treated as
1680 though they were R.'' */
1681 next_type
= STRONG_R
;
1682 saved_it
.next_for_neutral
.type
= STRONG_R
;
1685 if (!bidi_explicit_dir_char (bidi_it
->ch
))
1686 abort (); /* can't happen: BNs are skipped */
1689 /* Marched all the way to the end of this level run.
1690 We need to use the eor type, whose information is
1691 stored by bidi_set_sor_type in the prev_for_neutral
1693 if (saved_it
.type
!= WEAK_BN
1694 || bidi_get_category (bidi_it
->prev
.type_after_w1
) == NEUTRAL
)
1696 next_type
= bidi_it
->prev_for_neutral
.type
;
1697 saved_it
.next_for_neutral
.type
= next_type
;
1698 bidi_check_type (next_type
);
1702 /* This is a BN which does not adjoin neutrals.
1703 Leave its type alone. */
1704 bidi_copy_it (bidi_it
, &saved_it
);
1705 return bidi_it
->type
;
1711 type
= bidi_resolve_neutral_1 (saved_it
.prev_for_neutral
.type
,
1712 next_type
, current_level
);
1713 saved_it
.type
= type
;
1714 bidi_check_type (type
);
1715 bidi_copy_it (bidi_it
, &saved_it
);
1721 /* Given an iterator state in BIDI_IT, advance one character position
1722 in the buffer/string to the next character (in the logical order),
1723 resolve the bidi type of that next character, and return that
1726 bidi_type_of_next_char (struct bidi_it
*bidi_it
)
1730 /* This should always be called during a forward scan. */
1731 if (bidi_it
->scan_dir
!= 1)
1734 /* Reset the limit until which to ignore BNs if we step out of the
1735 area where we found only empty levels. */
1736 if ((bidi_it
->ignore_bn_limit
> -1
1737 && bidi_it
->ignore_bn_limit
<= bidi_it
->charpos
)
1738 || (bidi_it
->ignore_bn_limit
== -2
1739 && !bidi_explicit_dir_char (bidi_it
->ch
)))
1740 bidi_it
->ignore_bn_limit
= -1;
1742 type
= bidi_resolve_neutral (bidi_it
);
1747 /* Given an iterator state BIDI_IT, advance one character position in
1748 the buffer/string to the next character (in the current scan
1749 direction), resolve the embedding and implicit levels of that next
1750 character, and return the resulting level. */
1752 bidi_level_of_next_char (struct bidi_it
*bidi_it
)
1755 int level
, prev_level
= -1;
1756 struct bidi_saved_info next_for_neutral
;
1757 EMACS_INT next_char_pos
= -2;
1759 if (bidi_it
->scan_dir
== 1)
1762 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
1763 ? bidi_it
->string
.schars
: ZV
;
1765 /* There's no sense in trying to advance if we hit end of text. */
1766 if (bidi_it
->charpos
>= eob
)
1767 return bidi_it
->resolved_level
;
1769 /* Record the info about the previous character. */
1770 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1771 && bidi_it
->type
!= WEAK_BN
)
1772 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1773 if (bidi_it
->type_after_w1
== STRONG_R
1774 || bidi_it
->type_after_w1
== STRONG_L
1775 || bidi_it
->type_after_w1
== STRONG_AL
)
1776 bidi_remember_char (&bidi_it
->last_strong
, bidi_it
);
1777 /* FIXME: it sounds like we don't need both prev and
1778 prev_for_neutral members, but I'm leaving them both for now. */
1779 if (bidi_it
->type
== STRONG_R
|| bidi_it
->type
== STRONG_L
1780 || bidi_it
->type
== WEAK_EN
|| bidi_it
->type
== WEAK_AN
)
1781 bidi_remember_char (&bidi_it
->prev_for_neutral
, bidi_it
);
1783 /* If we overstepped the characters used for resolving neutrals
1784 and whitespace, invalidate their info in the iterator. */
1785 if (bidi_it
->charpos
>= bidi_it
->next_for_neutral
.charpos
)
1786 bidi_it
->next_for_neutral
.type
= UNKNOWN_BT
;
1787 if (bidi_it
->next_en_pos
>= 0
1788 && bidi_it
->charpos
>= bidi_it
->next_en_pos
)
1789 bidi_it
->next_en_pos
= -1;
1790 if (bidi_it
->next_for_ws
.type
!= UNKNOWN_BT
1791 && bidi_it
->charpos
>= bidi_it
->next_for_ws
.charpos
)
1792 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
1794 /* This must be taken before we fill the iterator with the info
1795 about the next char. If we scan backwards, the iterator
1796 state must be already cached, so there's no need to know the
1797 embedding level of the previous character, since we will be
1798 returning to our caller shortly. */
1799 prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1801 next_for_neutral
= bidi_it
->next_for_neutral
;
1803 /* Perhaps the character we want is already cached. If it is, the
1804 call to bidi_cache_find below will return a type other than
1806 if (bidi_cache_idx
> bidi_cache_start
&& !bidi_it
->first_elt
)
1809 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
)) ? 0 : 1;
1811 if (bidi_it
->scan_dir
> 0)
1813 if (bidi_it
->nchars
<= 0)
1815 next_char_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1817 else if (bidi_it
->charpos
>= bob
)
1818 /* Implementation note: we allow next_char_pos to be as low as
1819 0 for buffers or -1 for strings, and that is okay because
1820 that's the "position" of the sentinel iterator state we
1821 cached at the beginning of the iteration. */
1822 next_char_pos
= bidi_it
->charpos
- 1;
1823 if (next_char_pos
>= bob
- 1)
1824 type
= bidi_cache_find (next_char_pos
, -1, bidi_it
);
1830 if (type
!= UNKNOWN_BT
)
1832 /* Don't lose the information for resolving neutrals! The
1833 cached states could have been cached before their
1834 next_for_neutral member was computed. If we are on our way
1835 forward, we can simply take the info from the previous
1837 if (bidi_it
->scan_dir
== 1
1838 && bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
1839 bidi_it
->next_for_neutral
= next_for_neutral
;
1841 /* If resolved_level is -1, it means this state was cached
1842 before it was completely resolved, so we cannot return
1844 if (bidi_it
->resolved_level
!= -1)
1845 return bidi_it
->resolved_level
;
1847 if (bidi_it
->scan_dir
== -1)
1848 /* If we are going backwards, the iterator state is already cached
1849 from previous scans, and should be fully resolved. */
1852 if (type
== UNKNOWN_BT
)
1853 type
= bidi_type_of_next_char (bidi_it
);
1855 if (type
== NEUTRAL_B
)
1856 return bidi_it
->resolved_level
;
1858 level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1859 if ((bidi_get_category (type
) == NEUTRAL
/* && type != NEUTRAL_B */)
1860 || (type
== WEAK_BN
&& prev_level
== level
))
1862 if (bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
1865 /* If the cached state shows a neutral character, it was not
1866 resolved by bidi_resolve_neutral, so do it now. */
1867 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1868 bidi_it
->next_for_neutral
.type
,
1872 if (!(type
== STRONG_R
1876 || type
== WEAK_AN
))
1878 bidi_it
->type
= type
;
1879 bidi_check_type (bidi_it
->type
);
1881 /* For L1 below, we need to know, for each WS character, whether
1882 it belongs to a sequence of WS characters preceding a newline
1883 or a TAB or a paragraph separator. */
1884 if (bidi_it
->orig_type
== NEUTRAL_WS
1885 && bidi_it
->next_for_ws
.type
== UNKNOWN_BT
)
1888 EMACS_INT clen
= bidi_it
->ch_len
;
1889 EMACS_INT bpos
= bidi_it
->bytepos
;
1890 EMACS_INT cpos
= bidi_it
->charpos
;
1891 EMACS_INT disp_pos
= bidi_it
->disp_pos
;
1892 EMACS_INT nc
= bidi_it
->nchars
;
1893 struct bidi_string_data bs
= bidi_it
->string
;
1895 int fwp
= bidi_it
->frame_window_p
;
1897 if (bidi_it
->nchars
<= 0)
1900 ch
= bidi_fetch_char (bpos
+= clen
, cpos
+= nc
, &disp_pos
, &bs
, fwp
,
1902 if (ch
== '\n' || ch
== BIDI_EOB
/* || ch == LINESEP_CHAR */)
1905 chtype
= bidi_get_type (ch
, NEUTRAL_DIR
);
1906 } while (chtype
== NEUTRAL_WS
|| chtype
== WEAK_BN
1907 || bidi_explicit_dir_char (ch
)); /* L1/Retaining */
1908 bidi_it
->next_for_ws
.type
= chtype
;
1909 bidi_check_type (bidi_it
->next_for_ws
.type
);
1910 bidi_it
->next_for_ws
.charpos
= cpos
;
1911 bidi_it
->next_for_ws
.bytepos
= bpos
;
1914 /* Resolve implicit levels, with a twist: PDFs get the embedding
1915 level of the enbedding they terminate. See below for the
1917 if (bidi_it
->orig_type
== PDF
1918 /* Don't do this if this formatting code didn't change the
1919 embedding level due to invalid or empty embeddings. */
1920 && prev_level
!= level
)
1922 /* Don't look in UAX#9 for the reason for this: it's our own
1923 private quirk. The reason is that we want the formatting
1924 codes to be delivered so that they bracket the text of their
1925 embedding. For example, given the text
1929 we want it to be displayed as
1937 which will result because we bump up the embedding level as
1938 soon as we see the RLO and pop it as soon as we see the PDF,
1939 so RLO itself has the same embedding level as "teST", and
1940 thus would be normally delivered last, just before the PDF.
1941 The switch below fiddles with the level of PDF so that this
1942 ugly side effect does not happen.
1944 (This is, of course, only important if the formatting codes
1945 are actually displayed, but Emacs does need to display them
1946 if the user wants to.) */
1949 else if (bidi_it
->orig_type
== NEUTRAL_B
/* L1 */
1950 || bidi_it
->orig_type
== NEUTRAL_S
1951 || bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
1952 /* || bidi_it->ch == LINESEP_CHAR */
1953 || (bidi_it
->orig_type
== NEUTRAL_WS
1954 && (bidi_it
->next_for_ws
.type
== NEUTRAL_B
1955 || bidi_it
->next_for_ws
.type
== NEUTRAL_S
)))
1956 level
= bidi_it
->level_stack
[0].level
;
1957 else if ((level
& 1) == 0) /* I1 */
1959 if (type
== STRONG_R
)
1961 else if (type
== WEAK_EN
|| type
== WEAK_AN
)
1966 if (type
== STRONG_L
|| type
== WEAK_EN
|| type
== WEAK_AN
)
1970 bidi_it
->resolved_level
= level
;
1974 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1975 non-zero, we are at the end of a level, and we need to prepare to
1976 resume the scan of the lower level.
1978 If this level's other edge is cached, we simply jump to it, filling
1979 the iterator structure with the iterator state on the other edge.
1980 Otherwise, we walk the buffer or string until we come back to the
1981 same level as LEVEL.
1983 Note: we are not talking here about a ``level run'' in the UAX#9
1984 sense of the term, but rather about a ``level'' which includes
1985 all the levels higher than it. In other words, given the levels
1988 11111112222222333333334443343222222111111112223322111
1991 and assuming we are at point A scanning left to right, this
1992 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1995 bidi_find_other_level_edge (struct bidi_it
*bidi_it
, int level
, int end_flag
)
1997 int dir
= end_flag
? -bidi_it
->scan_dir
: bidi_it
->scan_dir
;
2000 /* Try the cache first. */
2001 if ((idx
= bidi_cache_find_level_change (level
, dir
, end_flag
))
2002 >= bidi_cache_start
)
2003 bidi_cache_fetch_state (idx
, bidi_it
);
2009 abort (); /* if we are at end of level, its edges must be cached */
2011 bidi_cache_iterator_state (bidi_it
, 1);
2013 new_level
= bidi_level_of_next_char (bidi_it
);
2014 bidi_cache_iterator_state (bidi_it
, 1);
2015 } while (new_level
>= level
);
2020 bidi_move_to_visually_next (struct bidi_it
*bidi_it
)
2022 int old_level
, new_level
, next_level
;
2023 struct bidi_it sentinel
;
2024 struct gcpro gcpro1
;
2026 if (bidi_it
->charpos
< 0 || bidi_it
->bytepos
< 0)
2029 if (bidi_it
->scan_dir
== 0)
2031 bidi_it
->scan_dir
= 1; /* default to logical order */
2034 /* The code below can call eval, and thus cause GC. If we are
2035 iterating a Lisp string, make sure it won't be GCed. */
2036 if (STRINGP (bidi_it
->string
.lstring
))
2037 GCPRO1 (bidi_it
->string
.lstring
);
2039 /* If we just passed a newline, initialize for the next line. */
2040 if (!bidi_it
->first_elt
&& bidi_it
->orig_type
== NEUTRAL_B
)
2041 bidi_line_init (bidi_it
);
2043 /* Prepare the sentinel iterator state, and cache it. When we bump
2044 into it, scanning backwards, we'll know that the last non-base
2045 level is exhausted. */
2046 if (bidi_cache_idx
== bidi_cache_start
)
2048 bidi_copy_it (&sentinel
, bidi_it
);
2049 if (bidi_it
->first_elt
)
2051 sentinel
.charpos
--; /* cached charpos needs to be monotonic */
2053 sentinel
.ch
= '\n'; /* doesn't matter, but why not? */
2054 sentinel
.ch_len
= 1;
2055 sentinel
.nchars
= 1;
2057 bidi_cache_iterator_state (&sentinel
, 1);
2060 old_level
= bidi_it
->resolved_level
;
2061 new_level
= bidi_level_of_next_char (bidi_it
);
2063 /* Reordering of resolved levels (clause L2) is implemented by
2064 jumping to the other edge of the level and flipping direction of
2065 scanning the text whenever we find a level change. */
2066 if (new_level
!= old_level
)
2068 int ascending
= new_level
> old_level
;
2069 int level_to_search
= ascending
? old_level
+ 1 : old_level
;
2070 int incr
= ascending
? 1 : -1;
2071 int expected_next_level
= old_level
+ incr
;
2073 /* Jump (or walk) to the other edge of this level. */
2074 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2075 /* Switch scan direction and peek at the next character in the
2077 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2079 /* The following loop handles the case where the resolved level
2080 jumps by more than one. This is typical for numbers inside a
2081 run of text with left-to-right embedding direction, but can
2082 also happen in other situations. In those cases the decision
2083 where to continue after a level change, and in what direction,
2084 is tricky. For example, given a text like below:
2089 (where the numbers below the text show the resolved levels),
2090 the result of reordering according to UAX#9 should be this:
2094 This is implemented by the loop below which flips direction
2095 and jumps to the other edge of the level each time it finds
2096 the new level not to be the expected one. The expected level
2097 is always one more or one less than the previous one. */
2098 next_level
= bidi_peek_at_next_level (bidi_it
);
2099 while (next_level
!= expected_next_level
)
2101 expected_next_level
+= incr
;
2102 level_to_search
+= incr
;
2103 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2104 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2105 next_level
= bidi_peek_at_next_level (bidi_it
);
2108 /* Finally, deliver the next character in the new direction. */
2109 next_level
= bidi_level_of_next_char (bidi_it
);
2112 /* Take note when we have just processed the newline that precedes
2113 the end of the paragraph. The next time we are about to be
2114 called, set_iterator_to_next will automatically reinit the
2115 paragraph direction, if needed. We do this at the newline before
2116 the paragraph separator, because the next character might not be
2117 the first character of the next paragraph, due to the bidi
2118 reordering, whereas we _must_ know the paragraph base direction
2119 _before_ we process the paragraph's text, since the base
2120 direction affects the reordering. */
2121 if (bidi_it
->scan_dir
== 1 && bidi_it
->orig_type
== NEUTRAL_B
)
2123 /* The paragraph direction of the entire string, once
2124 determined, is in effect for the entire string. Setting the
2125 separator limit to the end of the string prevents
2126 bidi_paragraph_init from being called automatically on this
2128 if (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2129 bidi_it
->separator_limit
= bidi_it
->string
.schars
;
2130 else if (bidi_it
->bytepos
< ZV_BYTE
)
2133 bidi_at_paragraph_end (bidi_it
->charpos
+ bidi_it
->nchars
,
2134 bidi_it
->bytepos
+ bidi_it
->ch_len
);
2135 if (bidi_it
->nchars
<= 0)
2139 bidi_it
->new_paragraph
= 1;
2140 /* Record the buffer position of the last character of the
2141 paragraph separator. */
2142 bidi_it
->separator_limit
=
2143 bidi_it
->charpos
+ bidi_it
->nchars
+ sep_len
;
2148 if (bidi_it
->scan_dir
== 1 && bidi_cache_idx
> bidi_cache_start
)
2150 /* If we are at paragraph's base embedding level and beyond the
2151 last cached position, the cache's job is done and we can
2153 if (bidi_it
->resolved_level
== bidi_it
->level_stack
[0].level
2154 && bidi_it
->charpos
> (bidi_cache
[bidi_cache_idx
- 1].charpos
2155 + bidi_cache
[bidi_cache_idx
- 1].nchars
- 1))
2156 bidi_cache_reset ();
2157 /* But as long as we are caching during forward scan, we must
2158 cache each state, or else the cache integrity will be
2159 compromised: it assumes cached states correspond to buffer
2162 bidi_cache_iterator_state (bidi_it
, 1);
2165 if (STRINGP (bidi_it
->string
.lstring
))
2169 /* This is meant to be called from within the debugger, whenever you
2170 wish to examine the cache contents. */
2171 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE
;
2173 bidi_dump_cached_states (void)
2178 if (bidi_cache_idx
== 0)
2180 fprintf (stderr
, "The cache is empty.\n");
2183 fprintf (stderr
, "Total of %d state%s in cache:\n",
2184 bidi_cache_idx
, bidi_cache_idx
== 1 ? "" : "s");
2186 for (i
= bidi_cache
[bidi_cache_idx
- 1].charpos
; i
> 0; i
/= 10)
2188 fputs ("ch ", stderr
);
2189 for (i
= 0; i
< bidi_cache_idx
; i
++)
2190 fprintf (stderr
, "%*c", ndigits
, bidi_cache
[i
].ch
);
2191 fputs ("\n", stderr
);
2192 fputs ("lvl ", stderr
);
2193 for (i
= 0; i
< bidi_cache_idx
; i
++)
2194 fprintf (stderr
, "%*d", ndigits
, bidi_cache
[i
].resolved_level
);
2195 fputs ("\n", stderr
);
2196 fputs ("pos ", stderr
);
2197 for (i
= 0; i
< bidi_cache_idx
; i
++)
2198 fprintf (stderr
, "%*"pI
"d", ndigits
, bidi_cache
[i
].charpos
);
2199 fputs ("\n", stderr
);