1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the Reference Implementation and most other implementations,
26 this one is designed to be called once for every character in the
27 buffer or string. That way, we can leave intact the design of the
28 Emacs display engine, whereby an iterator object is used to
29 traverse buffer or string text character by character, and generate
30 the necessary data for displaying each character in 'struct glyph'
31 objects. (See xdisp.c for the details of that iteration.) The
32 functions on this file replace the original linear iteration in the
33 logical order of the text with a non-linear iteration in the visual
34 order, i.e. in the order characters should be shown on display.
36 The main entry point is bidi_move_to_visually_next. Each time it
37 is called, it finds the next character in the visual order, and
38 returns its information in a special structure. The caller is then
39 expected to process this character for display or any other
40 purposes, and call bidi_move_to_visually_next for the next
41 character. See the comments in bidi_move_to_visually_next for more
42 details about its algorithm that finds the next visual-order
43 character by resolving their levels on the fly.
45 Two other entry points are bidi_paragraph_init and
46 bidi_mirror_char. The first determines the base direction of a
47 paragraph, while the second returns the mirrored version of its
50 A few auxiliary entry points are used to initialize the bidi
51 iterator for iterating an object (buffer or string), push and pop
52 the bidi iterator state, and save and restore the state of the bidi
55 If you want to understand the code, you will have to read it
56 together with the relevant portions of UAX#9. The comments include
57 references to UAX#9 rules, for that very reason.
59 A note about references to UAX#9 rules: if the reference says
60 something like "X9/Retaining", it means that you need to refer to
61 rule X9 and to its modifications described in the "Implementation
62 Notes" section of UAX#9, under "Retaining Format Codes".
64 Here's the overview of the design of the reordering engine
65 implemented by this file.
67 Basic implementation structure
68 ------------------------------
70 The sequential processing steps described by UAX#9 are implemented
71 as recursive levels of processing, all of which examine the next
72 character in the logical order. This hierarchy of processing looks
73 as follows, from the innermost (deepest) to the outermost level,
74 omitting some subroutines used by each level:
76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_neutral -- resolve neutral types
80 bidi_level_of_next_char -- resolve implicit levels
82 Each level calls the level below it, and works on the result
83 returned by the lower level, including all of its sub-levels.
85 Unlike all the levels below it, bidi_level_of_next_char can return
86 the information about either the next or previous character in the
87 logical order, depending on the current direction of scanning the
88 buffer or string. For the next character, it calls all the levels
89 below it; for the previous character, it uses the cache, described
92 Thus, the result of calling bidi_level_of_next_char is the resolved
93 level of the next or the previous character in the logical order.
94 Based on this information, the function bidi_move_to_visually_next
95 finds the next character in the visual order and updates the
96 direction in which the buffer is scanned, either forward or
97 backward, to find the next character to be displayed. (Text is
98 scanned backwards when it needs to be reversed for display, i.e. if
99 the visual order is the inverse of the logical order.) This
100 implements the last, reordering steps of the UBA, by successively
101 calling bidi_level_of_next_char until the character of the required
102 embedding level is found; the scan direction is dynamically updated
103 as a side effect. See the commentary before the 'while' loop in
104 bidi_move_to_visually_next, for the details.
109 In a nutshell, fetching the next character boils down to calling
110 STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
111 string position. See bidi_fetch_char. However, if the next
112 character is "covered" by a display property of some kind,
113 bidi_fetch_char returns the u+FFFC "object replacement character"
114 that represents the entire run of text covered by the display
115 property. (The ch_len and nchars members of 'struct bidi_it'
116 reflect the length in bytes and characters of that text.) This is
117 so we reorder text on both sides of the display property as
118 appropriate for an image or embedded string. Similarly, text
119 covered by a display spec of the form '(space ...)', is replaced
120 with the u+2029 paragraph separator character, so such display
121 specs produce the same effect as a TAB under UBA. Both these
122 special characters are not actually displayed -- the display
123 property is displayed instead -- but just used to compute the
124 embedding level of the surrounding text so as to produce the
130 The UBA is highly context dependent in some of its parts,
131 i.e. results of processing a character can generally depend on
132 characters very far away. The UAX#9 description of the UBA
133 prescribes a stateful processing of each character, whereby the
134 results of this processing depend on various state variables, such
135 as the current embedding level, level stack, and directional
136 override status. In addition, the UAX#9 description includes many
137 passages like this (from rule W2 in this case):
139 Search backward from each instance of a European number until the
140 first strong type (R, L, AL, or sos) is found. If an AL is found,
141 change the type of the European number to Arabic number.
143 To support this, we use a bidi iterator object, 'struct bidi_it',
144 which is a sub-structure of 'struct it' used by xdisp.c (see
145 dispextern.h for the definition of both of these structures). The
146 bidi iterator holds the entire state of the iteration required by
147 the UBA, and is updated as the text is traversed. In particular,
148 the embedding level of the current character being resolved is
149 recorded in the iterator state. To avoid costly searches backward
150 in support of rules like W2 above, the necessary character types
151 are also recorded in the iterator state as they are found during
152 the forward scan, and then used when such rules need to be applied.
153 (Forward scans cannot be avoided in this way; they need to be
154 performed at least once, and the results recorded in the iterator
155 state, to be reused until the forward scan oversteps the recorded
158 In this manner, the iterator state acts as a mini-cache of
159 contextual information required for resolving the level of the
160 current character by various UBA rules.
162 Caching of bidi iterator states
163 -------------------------------
165 As described above, the reordering engine uses the information
166 recorded in the bidi iterator state in order to resolve the
167 embedding level of the current character. When the reordering
168 engine needs to process the next character in the logical order, it
169 fetches it and applies to it all the UBA levels, updating the
170 iterator state as it goes. But when the buffer or string is
171 scanned backwards, i.e. in the reverse order of buffer/string
172 positions, the scanned characters were already processed during the
173 preceding forward scan (see bidi_find_other_level_edge). To avoid
174 costly re-processing of characters that were already processed
175 during the forward scan, the iterator states computed while
176 scanning forward are cached.
178 The cache is just a linear array of 'struct bidi_it' objects, which
179 is dynamically allocated and reallocated as needed, since the size
180 of the cache depends on the text being processed. We only need the
181 cache while processing embedded levels higher than the base
182 paragraph embedding level, because these higher levels require
183 changes in scan direction. Therefore, as soon as we are back to
184 the base embedding level, we can free the cache; see the calls to
185 bidi_cache_reset and bidi_cache_shrink, for the conditions to do
188 The cache maintains the index of the next unused cache slot -- this
189 is where the next iterator state will be cached. The function
190 bidi_cache_iterator_state saves an instance of the state in the
191 cache and increments the unused slot index. The companion function
192 bidi_cache_find looks up a cached state that corresponds to a given
193 buffer/string position. All of the cached states must correspond
194 1:1 to the buffer or string region whose processing they reflect;
195 bidi.c will abort if it finds cache slots that violate this 1:1
198 When the parent iterator 'struct it' is pushed (see push_it in
199 xdisp.c) to pause the current iteration and start iterating over a
200 different object (e.g., a 'display' string that covers some buffer
201 text), the bidi iterator cache needs to be "pushed" as well, so
202 that a new empty cache could be used while iterating over the new
203 object. Later, when the new object is exhausted, and xdisp.c calls
204 pop_it, we need to "pop" the bidi cache as well and return to the
205 original cache. See bidi_push_it and bidi_pop_it for how this is
208 Some functions of the display engine save copies of 'struct it' in
209 local variables, and restore them later. For examples, see
210 pos_visible_p and move_it_in_display_line_to in xdisp.c, and
211 window_scroll_pixel_based in window.c. When this happens, we need
212 to save and restore the bidi cache as well, because conceptually
213 the cache is part of the 'struct it' state, and needs to be in
214 perfect sync with the portion of the buffer/string that is being
215 processed. This saving and restoring of the cache state is handled
216 by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
217 SAVE_IT and RESTORE_IT defined on xdisp.c.
219 Note that, because reordering is implemented below the level in
220 xdisp.c that breaks glyphs into screen lines, we are violating
221 paragraph 3.4 of UAX#9. which mandates that line breaking shall be
222 done before reordering each screen line separately. However,
223 following UAX#9 to the letter in this matter goes against the basic
224 design of the Emacs display engine, and so we choose here this
225 minor deviation from the UBA letter in preference to redesign of
226 the display engine. The effect of this is only seen in continued
227 lines that are broken into screen lines in the middle of a run
228 whose direction is opposite to the paragraph's base direction.
230 Important design and implementation note: when the code needs to
231 scan far ahead, be sure to avoid such scans as much as possible
232 when the buffer/string doesn't contain any RTL characters. Users
233 of left-to-right scripts will never forgive you if you introduce
234 some slow-down due to bidi in situations that don't involve any
235 bidirectional text. See the large comment near the beginning of
236 bidi_resolve_neutral, for one situation where such shortcut was
243 #include "character.h"
245 #include "dispextern.h"
246 #include "region-cache.h"
248 static bool bidi_initialized
= 0;
250 static Lisp_Object bidi_type_table
, bidi_mirror_table
;
252 #define BIDI_EOB (-1)
254 /* Data type for describing the bidirectional character categories. */
263 static Lisp_Object paragraph_start_re
, paragraph_separate_re
;
264 static Lisp_Object Qparagraph_start
, Qparagraph_separate
;
267 /***********************************************************************
269 ***********************************************************************/
271 /* Return the bidi type of a character CH, subject to the current
272 directional OVERRIDE. */
274 bidi_get_type (int ch
, bidi_dir_t override
)
276 bidi_type_t default_type
;
280 if (ch
< 0 || ch
> MAX_CHAR
)
283 default_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
284 /* Every valid character code, even those that are unassigned by the
285 UCD, have some bidi-class property, according to
286 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
287 (= zero) code from CHAR_TABLE_REF, that's a bug. */
288 if (default_type
== UNKNOWN_BT
)
291 switch (default_type
)
308 else if (override
== R2L
)
316 bidi_check_type (bidi_type_t type
)
318 eassert (UNKNOWN_BT
<= type
&& type
<= NEUTRAL_ON
);
321 /* Given a bidi TYPE of a character, return its category. */
322 static bidi_category_t
323 bidi_get_category (bidi_type_t type
)
355 return EXPLICIT_FORMATTING
;
361 /* Return the mirrored character of C, if it has one. If C has no
362 mirrored counterpart, return C.
363 Note: The conditions in UAX#9 clause L4 regarding the surrounding
364 context must be tested by the caller. */
366 bidi_mirror_char (int c
)
372 if (c
< 0 || c
> MAX_CHAR
)
375 val
= CHAR_TABLE_REF (bidi_mirror_table
, c
);
380 /* When debugging, check before assigning to V, so that the check
381 isn't broken by undefined behavior due to int overflow. */
382 eassert (CHAR_VALID_P (XINT (val
)));
386 /* Minimal test we must do in optimized builds, to prevent weird
387 crashes further down the road. */
388 if (v
< 0 || v
> MAX_CHAR
)
397 /* Determine the start-of-sequence (sos) directional type given the two
398 embedding levels on either side of the run boundary. Also, update
399 the saved info about previously seen characters, since that info is
400 generally valid for a single level run. */
402 bidi_set_sos_type (struct bidi_it
*bidi_it
, int level_before
, int level_after
)
404 int higher_level
= (level_before
> level_after
? level_before
: level_after
);
406 if (level_before
< level_after
)
407 /* FIXME: should the default sos direction be user selectable? */
408 bidi_it
->sos
= ((higher_level
& 1) != 0 ? R2L
: L2R
);
410 bidi_it
->prev
.type
= UNKNOWN_BT
;
411 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
412 = bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
413 bidi_it
->prev_for_neutral
.type
= (bidi_it
->sos
== R2L
? STRONG_R
: STRONG_L
);
414 bidi_it
->prev_for_neutral
.charpos
= bidi_it
->charpos
;
415 bidi_it
->prev_for_neutral
.bytepos
= bidi_it
->bytepos
;
416 bidi_it
->next_for_neutral
.type
= bidi_it
->next_for_neutral
.type_after_w1
417 = bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
418 bidi_it
->ignore_bn_limit
= -1; /* meaning it's unknown */
421 /* Push the current embedding level and override status; reset the
422 current level to LEVEL and the current override status to OVERRIDE. */
424 bidi_push_embedding_level (struct bidi_it
*bidi_it
,
425 int level
, bidi_dir_t override
, bool isolate_status
)
427 struct bidi_stack
*st
;
429 bidi_it
->stack_idx
++;
430 eassert (bidi_it
->stack_idx
< BIDI_MAXDEPTH
+2+1);
431 st
= &bidi_it
->level_stack
[bidi_it
->stack_idx
];
433 st
->override
= override
;
434 st
->isolate_status
= isolate_status
;
437 st
->prev
= bidi_it
->prev
;
438 st
->last_strong
= bidi_it
->last_strong
;
439 st
->prev_for_neutral
= bidi_it
->prev_for_neutral
;
440 st
->next_for_neutral
= bidi_it
->next_for_neutral
;
441 st
->next_for_ws
= bidi_it
->next_for_ws
;
442 st
->sos
= bidi_it
->sos
;
446 /* Pop from the stack the embedding level, the directional override
447 status, and optionally saved information for the isolating run
448 sequence. Return the new level. */
450 bidi_pop_embedding_level (struct bidi_it
*bidi_it
)
452 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
453 and PDIs (X6a, 2nd bullet). */
454 if (bidi_it
->stack_idx
> 0)
457 = bidi_it
->level_stack
[bidi_it
->stack_idx
].isolate_status
;
458 struct bidi_stack st
;
460 st
= bidi_it
->level_stack
[bidi_it
->stack_idx
];
463 bidi_it
->prev
= st
.prev
;
464 bidi_it
->last_strong
= st
.last_strong
;
465 bidi_it
->prev_for_neutral
= st
.prev_for_neutral
;
466 bidi_it
->next_for_neutral
= st
.next_for_neutral
;
467 bidi_it
->next_for_ws
= st
.next_for_ws
;
468 bidi_it
->sos
= st
.sos
;
470 bidi_it
->stack_idx
--;
472 return bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
475 /* Record in SAVED_INFO the information about the current character. */
477 bidi_remember_char (struct bidi_saved_info
*saved_info
,
478 struct bidi_it
*bidi_it
)
480 saved_info
->charpos
= bidi_it
->charpos
;
481 saved_info
->bytepos
= bidi_it
->bytepos
;
482 saved_info
->type
= bidi_it
->type
;
483 bidi_check_type (bidi_it
->type
);
484 saved_info
->type_after_w1
= bidi_it
->type_after_w1
;
485 bidi_check_type (bidi_it
->type_after_w1
);
486 saved_info
->orig_type
= bidi_it
->orig_type
;
487 bidi_check_type (bidi_it
->orig_type
);
490 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
491 copies the part of the level stack that is actually in use. */
493 bidi_copy_it (struct bidi_it
*to
, struct bidi_it
*from
)
495 /* Copy everything from the start through the active part of
498 (offsetof (struct bidi_it
, level_stack
[1])
499 + from
->stack_idx
* sizeof from
->level_stack
[0]));
503 /***********************************************************************
504 Caching the bidi iterator states
505 ***********************************************************************/
507 /* We allocate and de-allocate the cache in chunks of this size (in
508 characters). 200 was chosen as an upper limit for reasonably-long
509 lines in a text file/buffer. */
510 #define BIDI_CACHE_CHUNK 200
511 static struct bidi_it
*bidi_cache
;
512 static ptrdiff_t bidi_cache_size
= 0;
513 enum { elsz
= sizeof (struct bidi_it
) };
514 static ptrdiff_t bidi_cache_idx
; /* next unused cache slot */
515 static ptrdiff_t bidi_cache_last_idx
; /* slot of last cache hit */
516 static ptrdiff_t bidi_cache_start
= 0; /* start of cache for this
519 /* 5-slot stack for saving the start of the previous level of the
520 cache. xdisp.c maintains a 5-slot stack for its iterator state,
521 and we need the same size of our stack. */
522 static ptrdiff_t bidi_cache_start_stack
[IT_STACK_SIZE
];
523 static int bidi_cache_sp
;
525 /* Size of header used by bidi_shelve_cache. */
528 bidi_shelve_header_size
529 = (sizeof (bidi_cache_idx
) + sizeof (bidi_cache_start_stack
)
530 + sizeof (bidi_cache_sp
) + sizeof (bidi_cache_start
)
531 + sizeof (bidi_cache_last_idx
))
534 /* Reset the cache state to the empty state. We only reset the part
535 of the cache relevant to iteration of the current object. Previous
536 objects, which are pushed on the display iterator's stack, are left
537 intact. This is called when the cached information is no more
538 useful for the current iteration, e.g. when we were reseated to a
539 new position on the same object. */
541 bidi_cache_reset (void)
543 bidi_cache_idx
= bidi_cache_start
;
544 bidi_cache_last_idx
= -1;
547 /* Shrink the cache to its minimal size. Called when we init the bidi
548 iterator for reordering a buffer or a string that does not come
549 from display properties, because that means all the previously
550 cached info is of no further use. */
552 bidi_cache_shrink (void)
554 if (bidi_cache_size
> BIDI_CACHE_CHUNK
)
556 bidi_cache
= xrealloc (bidi_cache
, BIDI_CACHE_CHUNK
* elsz
);
557 bidi_cache_size
= BIDI_CACHE_CHUNK
;
563 bidi_cache_fetch_state (ptrdiff_t idx
, struct bidi_it
*bidi_it
)
565 int current_scan_dir
= bidi_it
->scan_dir
;
567 if (idx
< bidi_cache_start
|| idx
>= bidi_cache_idx
)
570 bidi_copy_it (bidi_it
, &bidi_cache
[idx
]);
571 bidi_it
->scan_dir
= current_scan_dir
;
572 bidi_cache_last_idx
= idx
;
575 /* Find a cached state with a given CHARPOS and resolved embedding
576 level less or equal to LEVEL. if LEVEL is -1, disregard the
577 resolved levels in cached states. DIR, if non-zero, means search
578 in that direction from the last cache hit. */
580 bidi_cache_search (ptrdiff_t charpos
, int level
, int dir
)
582 ptrdiff_t i
, i_start
;
584 if (bidi_cache_idx
> bidi_cache_start
)
586 if (bidi_cache_last_idx
== -1)
587 bidi_cache_last_idx
= bidi_cache_idx
- 1;
588 if (charpos
< bidi_cache
[bidi_cache_last_idx
].charpos
)
591 i_start
= bidi_cache_last_idx
- 1;
593 else if (charpos
> (bidi_cache
[bidi_cache_last_idx
].charpos
594 + bidi_cache
[bidi_cache_last_idx
].nchars
- 1))
597 i_start
= bidi_cache_last_idx
+ 1;
600 i_start
= bidi_cache_last_idx
;
604 i_start
= bidi_cache_idx
- 1;
609 /* Linear search for now; FIXME! */
610 for (i
= i_start
; i
>= bidi_cache_start
; i
--)
611 if (bidi_cache
[i
].charpos
<= charpos
612 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
613 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
618 for (i
= i_start
; i
< bidi_cache_idx
; i
++)
619 if (bidi_cache
[i
].charpos
<= charpos
620 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
621 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
629 /* Find a cached state where the resolved level changes to a value
630 that is lower than LEVEL, and return its cache slot index. DIR is
631 the direction to search, starting with the last used cache slot.
632 If DIR is zero, we search backwards from the last occupied cache
633 slot. BEFORE means return the index of the slot that
634 is ``before'' the level change in the search direction. That is,
635 given the cached levels like this:
640 and assuming we are at the position cached at the slot marked with
641 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
642 index of slot B or A, depending whether BEFORE is, respectively,
645 bidi_cache_find_level_change (int level
, int dir
, bool before
)
649 ptrdiff_t i
= dir
? bidi_cache_last_idx
: bidi_cache_idx
- 1;
650 int incr
= before
? 1 : 0;
652 eassert (!dir
|| bidi_cache_last_idx
>= 0);
661 while (i
>= bidi_cache_start
+ incr
)
663 if (bidi_cache
[i
- incr
].resolved_level
>= 0
664 && bidi_cache
[i
- incr
].resolved_level
< level
)
671 while (i
< bidi_cache_idx
- incr
)
673 if (bidi_cache
[i
+ incr
].resolved_level
>= 0
674 && bidi_cache
[i
+ incr
].resolved_level
< level
)
685 bidi_cache_ensure_space (ptrdiff_t idx
)
687 /* Enlarge the cache as needed. */
688 if (idx
>= bidi_cache_size
)
690 /* The bidi cache cannot be larger than the largest Lisp string
692 ptrdiff_t string_or_buffer_bound
693 = max (BUF_BYTES_MAX
, STRING_BYTES_BOUND
);
695 /* Also, it cannot be larger than what C can represent. */
697 = (min (PTRDIFF_MAX
, SIZE_MAX
) - bidi_shelve_header_size
) / elsz
;
700 = xpalloc (bidi_cache
, &bidi_cache_size
,
701 max (BIDI_CACHE_CHUNK
, idx
- bidi_cache_size
+ 1),
702 min (string_or_buffer_bound
, c_bound
), elsz
);
707 bidi_cache_iterator_state (struct bidi_it
*bidi_it
, bool resolved
)
711 /* We should never cache on backward scans. */
712 if (bidi_it
->scan_dir
== -1)
714 idx
= bidi_cache_search (bidi_it
->charpos
, -1, 1);
718 idx
= bidi_cache_idx
;
719 bidi_cache_ensure_space (idx
);
720 /* Character positions should correspond to cache positions 1:1.
721 If we are outside the range of cached positions, the cache is
722 useless and must be reset. */
723 if (idx
> bidi_cache_start
&&
724 (bidi_it
->charpos
> (bidi_cache
[idx
- 1].charpos
725 + bidi_cache
[idx
- 1].nchars
)
726 || bidi_it
->charpos
< bidi_cache
[bidi_cache_start
].charpos
))
729 idx
= bidi_cache_start
;
731 if (bidi_it
->nchars
<= 0)
733 bidi_copy_it (&bidi_cache
[idx
], bidi_it
);
735 bidi_cache
[idx
].resolved_level
= -1;
739 /* Copy only the members which could have changed, to avoid
740 costly copying of the entire struct. */
741 bidi_cache
[idx
].type
= bidi_it
->type
;
742 bidi_check_type (bidi_it
->type
);
743 bidi_cache
[idx
].type_after_w1
= bidi_it
->type_after_w1
;
744 bidi_check_type (bidi_it
->type_after_w1
);
746 bidi_cache
[idx
].resolved_level
= bidi_it
->resolved_level
;
748 bidi_cache
[idx
].resolved_level
= -1;
749 bidi_cache
[idx
].invalid_levels
= bidi_it
->invalid_levels
;
750 bidi_cache
[idx
].next_for_neutral
= bidi_it
->next_for_neutral
;
751 bidi_cache
[idx
].next_for_ws
= bidi_it
->next_for_ws
;
752 bidi_cache
[idx
].ignore_bn_limit
= bidi_it
->ignore_bn_limit
;
753 bidi_cache
[idx
].disp_pos
= bidi_it
->disp_pos
;
754 bidi_cache
[idx
].disp_prop
= bidi_it
->disp_prop
;
757 bidi_cache_last_idx
= idx
;
758 if (idx
>= bidi_cache_idx
)
759 bidi_cache_idx
= idx
+ 1;
763 bidi_cache_find (ptrdiff_t charpos
, int level
, struct bidi_it
*bidi_it
)
765 ptrdiff_t i
= bidi_cache_search (charpos
, level
, bidi_it
->scan_dir
);
767 if (i
>= bidi_cache_start
)
769 bidi_dir_t current_scan_dir
= bidi_it
->scan_dir
;
771 bidi_copy_it (bidi_it
, &bidi_cache
[i
]);
772 bidi_cache_last_idx
= i
;
773 /* Don't let scan direction from the cached state override
774 the current scan direction. */
775 bidi_it
->scan_dir
= current_scan_dir
;
776 return bidi_it
->type
;
783 bidi_peek_at_next_level (struct bidi_it
*bidi_it
)
785 if (bidi_cache_idx
== bidi_cache_start
|| bidi_cache_last_idx
== -1)
787 return bidi_cache
[bidi_cache_last_idx
+ bidi_it
->scan_dir
].resolved_level
;
791 /***********************************************************************
792 Pushing and popping the bidi iterator state
793 ***********************************************************************/
795 /* Push the bidi iterator state in preparation for reordering a
796 different object, e.g. display string found at certain buffer
797 position. Pushing the bidi iterator boils down to saving its
798 entire state on the cache and starting a new cache "stacked" on top
799 of the current cache. */
801 bidi_push_it (struct bidi_it
*bidi_it
)
803 /* Save the current iterator state in its entirety after the last
805 bidi_cache_ensure_space (bidi_cache_idx
);
806 bidi_cache
[bidi_cache_idx
++] = *bidi_it
;
808 /* Push the current cache start onto the stack. */
809 eassert (bidi_cache_sp
< IT_STACK_SIZE
);
810 bidi_cache_start_stack
[bidi_cache_sp
++] = bidi_cache_start
;
812 /* Start a new level of cache, and make it empty. */
813 bidi_cache_start
= bidi_cache_idx
;
814 bidi_cache_last_idx
= -1;
817 /* Restore the iterator state saved by bidi_push_it and return the
818 cache to the corresponding state. */
820 bidi_pop_it (struct bidi_it
*bidi_it
)
822 if (bidi_cache_start
<= 0)
825 /* Reset the next free cache slot index to what it was before the
826 call to bidi_push_it. */
827 bidi_cache_idx
= bidi_cache_start
- 1;
829 /* Restore the bidi iterator state saved in the cache. */
830 *bidi_it
= bidi_cache
[bidi_cache_idx
];
832 /* Pop the previous cache start from the stack. */
833 if (bidi_cache_sp
<= 0)
835 bidi_cache_start
= bidi_cache_start_stack
[--bidi_cache_sp
];
837 /* Invalidate the last-used cache slot data. */
838 bidi_cache_last_idx
= -1;
841 static ptrdiff_t bidi_cache_total_alloc
;
843 /* Stash away a copy of the cache and its control variables. */
845 bidi_shelve_cache (void)
847 unsigned char *databuf
;
851 if (bidi_cache_idx
== 0)
854 alloc
= (bidi_shelve_header_size
855 + bidi_cache_idx
* sizeof (struct bidi_it
));
856 databuf
= xmalloc (alloc
);
857 bidi_cache_total_alloc
+= alloc
;
859 memcpy (databuf
, &bidi_cache_idx
, sizeof (bidi_cache_idx
));
860 memcpy (databuf
+ sizeof (bidi_cache_idx
),
861 bidi_cache
, bidi_cache_idx
* sizeof (struct bidi_it
));
862 memcpy (databuf
+ sizeof (bidi_cache_idx
)
863 + bidi_cache_idx
* sizeof (struct bidi_it
),
864 bidi_cache_start_stack
, sizeof (bidi_cache_start_stack
));
865 memcpy (databuf
+ sizeof (bidi_cache_idx
)
866 + bidi_cache_idx
* sizeof (struct bidi_it
)
867 + sizeof (bidi_cache_start_stack
),
868 &bidi_cache_sp
, sizeof (bidi_cache_sp
));
869 memcpy (databuf
+ sizeof (bidi_cache_idx
)
870 + bidi_cache_idx
* sizeof (struct bidi_it
)
871 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
872 &bidi_cache_start
, sizeof (bidi_cache_start
));
873 memcpy (databuf
+ sizeof (bidi_cache_idx
)
874 + bidi_cache_idx
* sizeof (struct bidi_it
)
875 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
876 + sizeof (bidi_cache_start
),
877 &bidi_cache_last_idx
, sizeof (bidi_cache_last_idx
));
882 /* Restore the cache state from a copy stashed away by
883 bidi_shelve_cache, and free the buffer used to stash that copy.
884 JUST_FREE means free the buffer, but don't restore the
885 cache; used when the corresponding iterator is discarded instead of
888 bidi_unshelve_cache (void *databuf
, bool just_free
)
890 unsigned char *p
= databuf
;
896 /* A NULL pointer means an empty cache. */
897 bidi_cache_start
= 0;
908 memcpy (&idx
, p
, sizeof (bidi_cache_idx
));
909 bidi_cache_total_alloc
910 -= bidi_shelve_header_size
+ idx
* sizeof (struct bidi_it
);
914 memcpy (&bidi_cache_idx
, p
, sizeof (bidi_cache_idx
));
915 bidi_cache_ensure_space (bidi_cache_idx
);
916 memcpy (bidi_cache
, p
+ sizeof (bidi_cache_idx
),
917 bidi_cache_idx
* sizeof (struct bidi_it
));
918 memcpy (bidi_cache_start_stack
,
919 p
+ sizeof (bidi_cache_idx
)
920 + bidi_cache_idx
* sizeof (struct bidi_it
),
921 sizeof (bidi_cache_start_stack
));
922 memcpy (&bidi_cache_sp
,
923 p
+ sizeof (bidi_cache_idx
)
924 + bidi_cache_idx
* sizeof (struct bidi_it
)
925 + sizeof (bidi_cache_start_stack
),
926 sizeof (bidi_cache_sp
));
927 memcpy (&bidi_cache_start
,
928 p
+ sizeof (bidi_cache_idx
)
929 + bidi_cache_idx
* sizeof (struct bidi_it
)
930 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
931 sizeof (bidi_cache_start
));
932 memcpy (&bidi_cache_last_idx
,
933 p
+ sizeof (bidi_cache_idx
)
934 + bidi_cache_idx
* sizeof (struct bidi_it
)
935 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
936 + sizeof (bidi_cache_start
),
937 sizeof (bidi_cache_last_idx
));
938 bidi_cache_total_alloc
939 -= (bidi_shelve_header_size
940 + bidi_cache_idx
* sizeof (struct bidi_it
));
948 /***********************************************************************
950 ***********************************************************************/
952 bidi_initialize (void)
954 bidi_type_table
= uniprop_table (intern ("bidi-class"));
955 if (NILP (bidi_type_table
))
957 staticpro (&bidi_type_table
);
959 bidi_mirror_table
= uniprop_table (intern ("mirroring"));
960 if (NILP (bidi_mirror_table
))
962 staticpro (&bidi_mirror_table
);
964 Qparagraph_start
= intern ("paragraph-start");
965 staticpro (&Qparagraph_start
);
966 paragraph_start_re
= Fsymbol_value (Qparagraph_start
);
967 if (!STRINGP (paragraph_start_re
))
968 paragraph_start_re
= build_string ("\f\\|[ \t]*$");
969 staticpro (¶graph_start_re
);
970 Qparagraph_separate
= intern ("paragraph-separate");
971 staticpro (&Qparagraph_separate
);
972 paragraph_separate_re
= Fsymbol_value (Qparagraph_separate
);
973 if (!STRINGP (paragraph_separate_re
))
974 paragraph_separate_re
= build_string ("[ \t\f]*$");
975 staticpro (¶graph_separate_re
);
978 bidi_cache_total_alloc
= 0;
980 bidi_initialized
= 1;
983 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
986 bidi_set_paragraph_end (struct bidi_it
*bidi_it
)
988 bidi_it
->invalid_levels
= 0;
989 bidi_it
->invalid_isolates
= 0;
990 bidi_it
->stack_idx
= 0;
991 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
994 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
996 bidi_init_it (ptrdiff_t charpos
, ptrdiff_t bytepos
, bool frame_window_p
,
997 struct bidi_it
*bidi_it
)
999 if (! bidi_initialized
)
1002 bidi_it
->charpos
= charpos
;
1004 bidi_it
->bytepos
= bytepos
;
1005 bidi_it
->frame_window_p
= frame_window_p
;
1006 bidi_it
->nchars
= -1; /* to be computed in bidi_resolve_explicit_1 */
1007 bidi_it
->first_elt
= 1;
1008 bidi_set_paragraph_end (bidi_it
);
1009 bidi_it
->new_paragraph
= 1;
1010 bidi_it
->separator_limit
= -1;
1011 bidi_it
->type
= NEUTRAL_B
;
1012 bidi_it
->type_after_w1
= NEUTRAL_B
;
1013 bidi_it
->orig_type
= NEUTRAL_B
;
1014 /* FIXME: Review this!!! */
1015 bidi_it
->prev
.type
= bidi_it
->prev
.type_after_w1
1016 = bidi_it
->prev
.orig_type
= UNKNOWN_BT
;
1017 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
1018 = bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
1019 bidi_it
->next_for_neutral
.charpos
= -1;
1020 bidi_it
->next_for_neutral
.type
1021 = bidi_it
->next_for_neutral
.type_after_w1
1022 = bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
1023 bidi_it
->prev_for_neutral
.charpos
= -1;
1024 bidi_it
->prev_for_neutral
.type
1025 = bidi_it
->prev_for_neutral
.type_after_w1
1026 = bidi_it
->prev_for_neutral
.orig_type
= UNKNOWN_BT
;
1027 bidi_it
->sos
= L2R
; /* FIXME: should it be user-selectable? */
1028 bidi_it
->disp_pos
= -1; /* invalid/unknown */
1029 bidi_it
->disp_prop
= 0;
1030 /* We can only shrink the cache if we are at the bottom level of its
1032 if (bidi_cache_start
== 0)
1033 bidi_cache_shrink ();
1035 bidi_cache_reset ();
1038 /* Perform initializations for reordering a new line of bidi text. */
1040 bidi_line_init (struct bidi_it
*bidi_it
)
1042 bidi_it
->scan_dir
= 1; /* FIXME: do we need to have control on this? */
1043 bidi_it
->stack_idx
= 0;
1044 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
1045 bidi_it
->level_stack
[0].override
= NEUTRAL_DIR
; /* X1 */
1046 bidi_it
->level_stack
[0].isolate_status
= false; /* X1 */
1047 bidi_it
->invalid_levels
= 0;
1048 bidi_it
->isolate_level
= 0; /* X1 */
1049 bidi_it
->invalid_isolates
= 0; /* X1 */
1050 /* Setting this to zero will force its recomputation the first time
1051 we need it for W5. */
1052 bidi_it
->next_en_pos
= 0;
1053 bidi_it
->next_en_type
= UNKNOWN_BT
;
1054 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
1055 bidi_set_sos_type (bidi_it
,
1056 (bidi_it
->paragraph_dir
== R2L
? 1 : 0),
1057 bidi_it
->level_stack
[0].level
); /* X10 */
1059 bidi_cache_reset ();
1063 /***********************************************************************
1065 ***********************************************************************/
1067 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
1068 are zero-based character positions in S, BEGBYTE is byte position
1069 corresponding to BEG. UNIBYTE means S is a unibyte string. */
1071 bidi_count_bytes (const unsigned char *s
, ptrdiff_t beg
,
1072 ptrdiff_t begbyte
, ptrdiff_t end
, bool unibyte
)
1074 ptrdiff_t pos
= beg
;
1075 const unsigned char *p
= s
+ begbyte
, *start
= p
;
1081 if (!CHAR_HEAD_P (*p
))
1086 p
+= BYTES_BY_CHAR_HEAD (*p
);
1094 /* Fetch and return the character at byte position BYTEPOS. If S is
1095 non-NULL, fetch the character from string S; otherwise fetch the
1096 character from the current buffer. UNIBYTE means S is a
1099 bidi_char_at_pos (ptrdiff_t bytepos
, const unsigned char *s
, bool unibyte
)
1108 s
= BYTE_POS_ADDR (bytepos
);
1109 return STRING_CHAR (s
);
1112 /* Fetch and return the character at CHARPOS/BYTEPOS. If that
1113 character is covered by a display string, treat the entire run of
1114 covered characters as a single character, either u+2029 or u+FFFC,
1115 and return their combined length in CH_LEN and NCHARS. DISP_POS
1116 specifies the character position of the next display string, or -1
1117 if not yet computed. When the next character is at or beyond that
1118 position, the function updates DISP_POS with the position of the
1119 next display string. *DISP_PROP non-zero means that there's really
1120 a display string at DISP_POS, as opposed to when we searched till
1121 DISP_POS without finding one. If *DISP_PROP is 2, it means the
1122 display spec is of the form `(space ...)', which is replaced with
1123 u+2029 to handle it as a paragraph separator. STRING->s is the C
1124 string to iterate, or NULL if iterating over a buffer or a Lisp
1125 string; in the latter case, STRING->lstring is the Lisp string. */
1127 bidi_fetch_char (ptrdiff_t charpos
, ptrdiff_t bytepos
, ptrdiff_t *disp_pos
,
1128 int *disp_prop
, struct bidi_string_data
*string
,
1130 bool frame_window_p
, ptrdiff_t *ch_len
, ptrdiff_t *nchars
)
1134 = (string
->s
|| STRINGP (string
->lstring
)) ? string
->schars
: ZV
;
1135 struct text_pos pos
;
1138 /* If we got past the last known position of display string, compute
1139 the position of the next one. That position could be at CHARPOS. */
1140 if (charpos
< endpos
&& charpos
> *disp_pos
)
1142 SET_TEXT_POS (pos
, charpos
, bytepos
);
1143 *disp_pos
= compute_display_string_pos (&pos
, string
, w
, frame_window_p
,
1147 /* Fetch the character at BYTEPOS. */
1148 if (charpos
>= endpos
)
1156 else if (charpos
>= *disp_pos
&& *disp_prop
)
1158 ptrdiff_t disp_end_pos
;
1160 /* We don't expect to find ourselves in the middle of a display
1161 property. Hopefully, it will never be needed. */
1162 if (charpos
> *disp_pos
)
1164 /* Text covered by `display' properties and overlays with
1165 display properties or display strings is handled as a single
1166 character that represents the entire run of characters
1167 covered by the display property. */
1168 if (*disp_prop
== 2)
1170 /* `(space ...)' display specs are handled as paragraph
1171 separators for the purposes of the reordering; see UAX#9
1172 section 3 and clause HL1 in section 4.3 there. */
1177 /* All other display specs are handled as the Unicode Object
1178 Replacement Character. */
1181 disp_end_pos
= compute_display_string_end (*disp_pos
, string
);
1182 if (disp_end_pos
< 0)
1184 /* Somebody removed the display string from the buffer
1185 behind our back. Recover by processing this buffer
1186 position as if no display property were present there to
1191 *nchars
= disp_end_pos
- *disp_pos
;
1195 *ch_len
= bidi_count_bytes (string
->s
, *disp_pos
, bytepos
,
1196 disp_end_pos
, string
->unibyte
);
1197 else if (STRINGP (string
->lstring
))
1198 *ch_len
= bidi_count_bytes (SDATA (string
->lstring
), *disp_pos
,
1199 bytepos
, disp_end_pos
, string
->unibyte
);
1201 *ch_len
= CHAR_TO_BYTE (disp_end_pos
) - bytepos
;
1209 if (!string
->unibyte
)
1211 ch
= STRING_CHAR_AND_LENGTH (string
->s
+ bytepos
, len
);
1216 ch
= UNIBYTE_TO_CHAR (string
->s
[bytepos
]);
1220 else if (STRINGP (string
->lstring
))
1222 if (!string
->unibyte
)
1224 ch
= STRING_CHAR_AND_LENGTH (SDATA (string
->lstring
) + bytepos
,
1230 ch
= UNIBYTE_TO_CHAR (SREF (string
->lstring
, bytepos
));
1236 ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos
), len
);
1242 /* If we just entered a run of characters covered by a display
1243 string, compute the position of the next display string. */
1244 if (charpos
+ *nchars
<= endpos
&& charpos
+ *nchars
> *disp_pos
1247 SET_TEXT_POS (pos
, charpos
+ *nchars
, bytepos
+ *ch_len
);
1248 *disp_pos
= compute_display_string_pos (&pos
, string
, w
, frame_window_p
,
1255 /* Like bidi_fetch_char, but ignore any text between an isolate
1256 initiator and its matching PDI or, if it has no matching PDI, the
1257 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1258 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1259 and the character that was fetched after skipping the isolates. */
1261 bidi_fetch_char_skip_isolates (ptrdiff_t charpos
, ptrdiff_t bytepos
,
1262 ptrdiff_t *disp_pos
, int *disp_prop
,
1263 struct bidi_string_data
*string
,
1264 struct window
*w
, bool frame_window_p
,
1265 ptrdiff_t *ch_len
, ptrdiff_t *nchars
)
1267 ptrdiff_t orig_charpos
= charpos
, orig_bytepos
= bytepos
;
1268 int ch
= bidi_fetch_char (charpos
, bytepos
, disp_pos
, disp_prop
, string
, w
,
1269 frame_window_p
, ch_len
, nchars
);
1270 bidi_type_t ch_type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1271 ptrdiff_t level
= 0;
1273 if (ch_type
== LRI
|| ch_type
== RLI
|| ch_type
== FSI
)
1276 while (level
> 0 && ch_type
!= NEUTRAL_B
)
1280 ch
= bidi_fetch_char (charpos
, bytepos
, disp_pos
, disp_prop
, string
,
1281 w
, frame_window_p
, ch_len
, nchars
);
1282 ch_type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1283 /* A Note to P2 says to ignore max_depth limit. */
1284 if (ch_type
== LRI
|| ch_type
== RLI
|| ch_type
== FSI
)
1286 else if (ch_type
== PDI
)
1291 /* Communicate to the caller how much did we skip, so it could get
1292 past the last character position we examined. */
1293 *nchars
+= charpos
- orig_charpos
;
1294 *ch_len
+= bytepos
- orig_bytepos
;
1300 /***********************************************************************
1301 Determining paragraph direction
1302 ***********************************************************************/
1304 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1305 Value is the non-negative length of the paragraph separator
1306 following the buffer position, -1 if position is at the beginning
1307 of a new paragraph, or -2 if position is neither at beginning nor
1308 at end of a paragraph. */
1310 bidi_at_paragraph_end (ptrdiff_t charpos
, ptrdiff_t bytepos
)
1313 Lisp_Object start_re
;
1316 sep_re
= paragraph_separate_re
;
1317 start_re
= paragraph_start_re
;
1319 val
= fast_looking_at (sep_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
);
1322 if (fast_looking_at (start_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
) >= 0)
1331 /* If the user has requested the long scans caching, make sure that
1332 BIDI cache is enabled. Otherwise, make sure it's disabled. */
1334 static struct region_cache
*
1335 bidi_paragraph_cache_on_off (void)
1337 struct buffer
*cache_buffer
= current_buffer
;
1338 bool indirect_p
= false;
1340 /* For indirect buffers, make sure to use the cache of their base
1342 if (cache_buffer
->base_buffer
)
1344 cache_buffer
= cache_buffer
->base_buffer
;
1348 /* Don't turn on or off the cache in the base buffer, if the value
1349 of cache-long-scans of the base buffer is inconsistent with that.
1350 This is because doing so will just make the cache pure overhead,
1351 since if we turn it on via indirect buffer, it will be
1352 immediately turned off by its base buffer. */
1353 if (NILP (BVAR (current_buffer
, cache_long_scans
)))
1356 || NILP (BVAR (cache_buffer
, cache_long_scans
)))
1358 if (cache_buffer
->bidi_paragraph_cache
)
1360 free_region_cache (cache_buffer
->bidi_paragraph_cache
);
1361 cache_buffer
->bidi_paragraph_cache
= 0;
1369 || !NILP (BVAR (cache_buffer
, cache_long_scans
)))
1371 if (!cache_buffer
->bidi_paragraph_cache
)
1372 cache_buffer
->bidi_paragraph_cache
= new_region_cache ();
1374 return cache_buffer
->bidi_paragraph_cache
;
1378 /* On my 2005-vintage machine, searching back for paragraph start
1379 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1380 when user types C-p. The number below limits each call to
1381 bidi_paragraph_init to about 10 ms. */
1382 #define MAX_PARAGRAPH_SEARCH 7500
1384 /* Find the beginning of this paragraph by looking back in the buffer.
1385 Value is the byte position of the paragraph's beginning, or
1386 BEGV_BYTE if paragraph_start_re is still not found after looking
1387 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
1389 bidi_find_paragraph_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
1391 Lisp_Object re
= paragraph_start_re
;
1392 ptrdiff_t limit
= ZV
, limit_byte
= ZV_BYTE
;
1393 struct region_cache
*bpc
= bidi_paragraph_cache_on_off ();
1394 ptrdiff_t n
= 0, oldpos
= pos
, next
;
1395 struct buffer
*cache_buffer
= current_buffer
;
1397 if (cache_buffer
->base_buffer
)
1398 cache_buffer
= cache_buffer
->base_buffer
;
1400 while (pos_byte
> BEGV_BYTE
1401 && n
++ < MAX_PARAGRAPH_SEARCH
1402 && fast_looking_at (re
, pos
, pos_byte
, limit
, limit_byte
, Qnil
) < 0)
1404 /* FIXME: What if the paragraph beginning is covered by a
1405 display string? And what if a display string covering some
1406 of the text over which we scan back includes
1407 paragraph_start_re? */
1408 DEC_BOTH (pos
, pos_byte
);
1409 if (bpc
&& region_cache_backward (cache_buffer
, bpc
, pos
, &next
))
1411 pos
= next
, pos_byte
= CHAR_TO_BYTE (pos
);
1415 pos
= find_newline_no_quit (pos
, pos_byte
, -1, &pos_byte
);
1417 if (n
>= MAX_PARAGRAPH_SEARCH
)
1418 pos
= BEGV
, pos_byte
= BEGV_BYTE
;
1420 know_region_cache (cache_buffer
, bpc
, pos
, oldpos
);
1421 /* Positions returned by the region cache are not limited to
1422 BEGV..ZV range, so we limit them here. */
1423 pos_byte
= clip_to_bounds (BEGV_BYTE
, pos_byte
, ZV_BYTE
);
1427 /* On a 3.4 GHz machine, searching forward for a strong directional
1428 character in a long paragraph full of weaks or neutrals takes about
1429 1 ms for each 20K characters. The number below limits each call to
1430 bidi_paragraph_init to less than 10 ms even on slow machines. */
1431 #define MAX_STRONG_CHAR_SEARCH 100000
1433 /* Starting from POS, find the first strong (L, R, or AL) character,
1434 while skipping over any characters between an isolate initiator and
1435 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1436 matches the isolate initiator at POS. Return the bidi type of the
1437 character where the search stopped. Give up if after examining
1438 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1439 character was found. */
1441 find_first_strong_char (ptrdiff_t pos
, ptrdiff_t bytepos
, ptrdiff_t end
,
1442 ptrdiff_t *disp_pos
, int *disp_prop
,
1443 struct bidi_string_data
*string
, struct window
*w
,
1444 bool string_p
, bool frame_window_p
,
1445 ptrdiff_t *ch_len
, ptrdiff_t *nchars
, bool stop_at_pdi
)
1453 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1454 at POS. Get past it. */
1455 #ifdef ENABLE_CHECKING
1456 ch
= bidi_fetch_char (pos
, bytepos
, disp_pos
, disp_prop
, string
, w
,
1457 frame_window_p
, ch_len
, nchars
);
1458 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1459 eassert (type
== FSI
/* || type == LRI || type == RLI */);
1464 ch
= bidi_fetch_char_skip_isolates (pos
, bytepos
, disp_pos
, disp_prop
, string
,
1465 w
, frame_window_p
, ch_len
, nchars
);
1466 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1469 for (pos
+= *nchars
, bytepos
+= *ch_len
;
1470 bidi_get_category (type
) != STRONG
1471 /* If requested to stop at first PDI, stop there. */
1472 && !(stop_at_pdi
&& type
== PDI
)
1473 /* Stop when searched too far into an abnormally large
1474 paragraph full of weak or neutral characters. */
1475 && pos
- pos1
< MAX_STRONG_CHAR_SEARCH
;
1476 type
= bidi_get_type (ch
, NEUTRAL_DIR
))
1480 /* Pretend there's a paragraph separator at end of
1486 && type
== NEUTRAL_B
1487 && bidi_at_paragraph_end (pos
, bytepos
) >= -1)
1489 /* Fetch next character and advance to get past it. */
1490 ch
= bidi_fetch_char_skip_isolates (pos
, bytepos
, disp_pos
, disp_prop
,
1491 string
, w
, frame_window_p
,
1499 /* Determine the base direction, a.k.a. base embedding level, of the
1500 paragraph we are about to iterate through. If DIR is either L2R or
1501 R2L, just use that. Otherwise, determine the paragraph direction
1502 from the first strong directional character of the paragraph.
1504 NO_DEFAULT_P means don't default to L2R if the paragraph
1505 has no strong directional characters and both DIR and
1506 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1507 in the buffer until a paragraph is found with a strong character,
1508 or until hitting BEGV. In the latter case, fall back to L2R. This
1509 flag is used in current-bidi-paragraph-direction.
1511 Note that this function gives the paragraph separator the same
1512 direction as the preceding paragraph, even though Emacs generally
1513 views the separator as not belonging to any paragraph. */
1515 bidi_paragraph_init (bidi_dir_t dir
, struct bidi_it
*bidi_it
, bool no_default_p
)
1517 ptrdiff_t bytepos
= bidi_it
->bytepos
;
1518 bool string_p
= bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
);
1519 ptrdiff_t pstartbyte
;
1520 /* Note that begbyte is a byte position, while end is a character
1521 position. Yes, this is ugly, but we are trying to avoid costly
1522 calls to BYTE_TO_CHAR and its ilk. */
1523 ptrdiff_t begbyte
= string_p
? 0 : BEGV_BYTE
;
1524 ptrdiff_t end
= string_p
? bidi_it
->string
.schars
: ZV
;
1526 /* Special case for an empty buffer. */
1527 if (bytepos
== begbyte
&& bidi_it
->charpos
== end
)
1529 /* We should never be called at EOB or before BEGV. */
1530 else if (bidi_it
->charpos
>= end
|| bytepos
< begbyte
)
1535 bidi_it
->paragraph_dir
= L2R
;
1536 bidi_it
->new_paragraph
= 0;
1538 else if (dir
== R2L
)
1540 bidi_it
->paragraph_dir
= R2L
;
1541 bidi_it
->new_paragraph
= 0;
1543 else if (dir
== NEUTRAL_DIR
) /* P2 */
1545 ptrdiff_t ch_len
, nchars
;
1546 ptrdiff_t pos
, disp_pos
= -1;
1549 const unsigned char *s
;
1551 if (!bidi_initialized
)
1554 /* If we are inside a paragraph separator, we are just waiting
1555 for the separator to be exhausted; use the previous paragraph
1556 direction. But don't do that if we have been just reseated,
1557 because we need to reinitialize below in that case. */
1558 if (!bidi_it
->first_elt
1559 && bidi_it
->charpos
< bidi_it
->separator_limit
)
1562 /* If we are on a newline, get past it to where the next
1563 paragraph might start. But don't do that at BEGV since then
1564 we are potentially in a new paragraph that doesn't yet
1566 pos
= bidi_it
->charpos
;
1567 s
= (STRINGP (bidi_it
->string
.lstring
)
1568 ? SDATA (bidi_it
->string
.lstring
)
1569 : bidi_it
->string
.s
);
1570 if (bytepos
> begbyte
1571 && bidi_char_at_pos (bytepos
, s
, bidi_it
->string
.unibyte
) == '\n')
1577 /* We are either at the beginning of a paragraph or in the
1578 middle of it. Find where this paragraph starts. */
1581 /* We don't support changes of paragraph direction inside a
1582 string. It is treated as a single paragraph. */
1586 pstartbyte
= bidi_find_paragraph_start (pos
, bytepos
);
1587 bidi_it
->separator_limit
= -1;
1588 bidi_it
->new_paragraph
= 0;
1590 /* The following loop is run more than once only if NO_DEFAULT_P,
1591 and only if we are iterating on a buffer. */
1593 bytepos
= pstartbyte
;
1595 pos
= BYTE_TO_CHAR (bytepos
);
1596 type
= find_first_strong_char (pos
, bytepos
, end
, &disp_pos
, &disp_prop
,
1597 &bidi_it
->string
, bidi_it
->w
,
1598 string_p
, bidi_it
->frame_window_p
,
1599 &ch_len
, &nchars
, false);
1600 if (type
== STRONG_R
|| type
== STRONG_AL
) /* P3 */
1601 bidi_it
->paragraph_dir
= R2L
;
1602 else if (type
== STRONG_L
)
1603 bidi_it
->paragraph_dir
= L2R
;
1605 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
)
1607 /* If this paragraph is at BEGV, default to L2R. */
1608 if (pstartbyte
== BEGV_BYTE
)
1609 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 */
1612 ptrdiff_t prevpbyte
= pstartbyte
;
1613 ptrdiff_t p
= BYTE_TO_CHAR (pstartbyte
), pbyte
= pstartbyte
;
1615 /* Find the beginning of the previous paragraph, if any. */
1616 while (pbyte
> BEGV_BYTE
&& prevpbyte
>= pstartbyte
)
1618 /* FXIME: What if p is covered by a display
1619 string? See also a FIXME inside
1620 bidi_find_paragraph_start. */
1621 DEC_BOTH (p
, pbyte
);
1622 prevpbyte
= bidi_find_paragraph_start (p
, pbyte
);
1624 pstartbyte
= prevpbyte
;
1628 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
);
1633 /* Contrary to UAX#9 clause P3, we only default the paragraph
1634 direction to L2R if we have no previous usable paragraph
1635 direction. This is allowed by the HL1 clause. */
1636 if (bidi_it
->paragraph_dir
!= L2R
&& bidi_it
->paragraph_dir
!= R2L
)
1637 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 ``higher-level protocols'' */
1638 if (bidi_it
->paragraph_dir
== R2L
)
1639 bidi_it
->level_stack
[0].level
= 1;
1641 bidi_it
->level_stack
[0].level
= 0;
1643 bidi_line_init (bidi_it
);
1647 /***********************************************************************
1648 Resolving explicit and implicit levels.
1649 The rest of this file constitutes the core of the UBA implementation.
1650 ***********************************************************************/
1653 bidi_explicit_dir_char (int ch
)
1655 bidi_type_t ch_type
;
1657 if (!bidi_initialized
)
1659 ch_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
1660 return (ch_type
== LRE
|| ch_type
== LRO
1661 || ch_type
== RLE
|| ch_type
== RLO
1665 /* A helper function for bidi_resolve_explicit. It advances to the
1666 next character in logical order and determines the new embedding
1667 level and directional override, but does not take into account
1668 empty embeddings. */
1670 bidi_resolve_explicit_1 (struct bidi_it
*bidi_it
)
1673 bidi_type_t type
, typ1
;
1676 bidi_dir_t override
;
1677 bool isolate_status
;
1678 bool string_p
= bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
);
1679 ptrdiff_t ch_len
, nchars
, disp_pos
, end
;
1682 /* If reseat()'ed, don't advance, so as to start iteration from the
1683 position where we were reseated. bidi_it->bytepos can be less
1684 than BEGV_BYTE after reseat to BEGV. */
1685 if (bidi_it
->bytepos
< (string_p
? 0 : BEGV_BYTE
)
1686 || bidi_it
->first_elt
)
1688 bidi_it
->first_elt
= 0;
1691 const unsigned char *p
1692 = (STRINGP (bidi_it
->string
.lstring
)
1693 ? SDATA (bidi_it
->string
.lstring
)
1694 : bidi_it
->string
.s
);
1696 if (bidi_it
->charpos
< 0)
1697 bidi_it
->charpos
= bidi_it
->bytepos
= 0;
1698 eassert (bidi_it
->bytepos
== bidi_count_bytes (p
, 0, 0,
1700 bidi_it
->string
.unibyte
));
1704 if (bidi_it
->charpos
< BEGV
)
1706 bidi_it
->charpos
= BEGV
;
1707 bidi_it
->bytepos
= BEGV_BYTE
;
1709 eassert (bidi_it
->bytepos
== CHAR_TO_BYTE (bidi_it
->charpos
));
1712 /* Don't move at end of buffer/string. */
1713 else if (bidi_it
->charpos
< (string_p
? bidi_it
->string
.schars
: ZV
))
1715 /* Advance to the next character, skipping characters covered by
1716 display strings (nchars > 1). */
1717 if (bidi_it
->nchars
<= 0)
1719 bidi_it
->charpos
+= bidi_it
->nchars
;
1720 if (bidi_it
->ch_len
== 0)
1722 bidi_it
->bytepos
+= bidi_it
->ch_len
;
1725 current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
; /* X1 */
1726 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1727 isolate_status
= bidi_it
->level_stack
[bidi_it
->stack_idx
].isolate_status
;
1728 new_level
= current_level
;
1729 bidi_it
->resolved_level
= new_level
;
1731 if (bidi_it
->charpos
>= (string_p
? bidi_it
->string
.schars
: ZV
))
1734 bidi_it
->ch_len
= 1;
1735 bidi_it
->nchars
= 1;
1736 bidi_it
->disp_pos
= (string_p
? bidi_it
->string
.schars
: ZV
);
1737 bidi_it
->disp_prop
= 0;
1741 /* Fetch the character at BYTEPOS. If it is covered by a
1742 display string, treat the entire run of covered characters as
1743 a single character u+FFFC. */
1744 curchar
= bidi_fetch_char (bidi_it
->charpos
, bidi_it
->bytepos
,
1745 &bidi_it
->disp_pos
, &bidi_it
->disp_prop
,
1746 &bidi_it
->string
, bidi_it
->w
,
1747 bidi_it
->frame_window_p
,
1748 &bidi_it
->ch_len
, &bidi_it
->nchars
);
1750 bidi_it
->ch
= curchar
;
1752 /* Don't apply directional override here, as all the types we handle
1753 below will not be affected by the override anyway, and we need
1754 the original type unaltered. The override will be applied in
1755 bidi_resolve_weak. */
1756 type
= bidi_get_type (curchar
, NEUTRAL_DIR
);
1757 bidi_it
->orig_type
= type
;
1758 bidi_check_type (bidi_it
->orig_type
);
1760 bidi_it
->type_after_w1
= UNKNOWN_BT
;
1766 bidi_it
->type_after_w1
= type
;
1767 bidi_check_type (bidi_it
->type_after_w1
);
1768 type
= WEAK_BN
; /* X9/Retaining */
1769 if (bidi_it
->ignore_bn_limit
<= -1)
1771 if (current_level
< BIDI_MAXDEPTH
1772 && bidi_it
->invalid_levels
== 0
1773 && bidi_it
->invalid_isolates
== 0)
1775 /* Compute the least odd embedding level greater than
1776 the current level. */
1777 new_level
= ((current_level
+ 1) & ~1) + 1;
1778 if (bidi_it
->type_after_w1
== RLE
)
1779 override
= NEUTRAL_DIR
;
1782 bidi_push_embedding_level (bidi_it
, new_level
, override
, false);
1783 bidi_it
->resolved_level
= new_level
;
1787 if (bidi_it
->invalid_isolates
== 0)
1788 bidi_it
->invalid_levels
++;
1791 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1792 || (bidi_it
->next_en_pos
> bidi_it
->charpos
1793 && bidi_it
->next_en_type
== WEAK_EN
))
1798 bidi_it
->type_after_w1
= type
;
1799 bidi_check_type (bidi_it
->type_after_w1
);
1800 type
= WEAK_BN
; /* X9/Retaining */
1801 if (bidi_it
->ignore_bn_limit
<= -1)
1803 if (current_level
< BIDI_MAXDEPTH
- 1
1804 && bidi_it
->invalid_levels
== 0
1805 && bidi_it
->invalid_isolates
== 0)
1807 /* Compute the least even embedding level greater than
1808 the current level. */
1809 new_level
= ((current_level
+ 2) & ~1);
1810 if (bidi_it
->type_after_w1
== LRE
)
1811 override
= NEUTRAL_DIR
;
1814 bidi_push_embedding_level (bidi_it
, new_level
, override
, false);
1815 bidi_it
->resolved_level
= new_level
;
1819 if (bidi_it
->invalid_isolates
== 0)
1820 bidi_it
->invalid_levels
++;
1823 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1824 || (bidi_it
->next_en_pos
> bidi_it
->charpos
1825 && bidi_it
->next_en_type
== WEAK_EN
))
1829 end
= string_p
? bidi_it
->string
.schars
: ZV
;
1830 disp_pos
= bidi_it
->disp_pos
;
1831 disp_prop
= bidi_it
->disp_prop
;
1832 nchars
= bidi_it
->nchars
;
1833 ch_len
= bidi_it
->ch_len
;
1834 typ1
= find_first_strong_char (bidi_it
->charpos
, bidi_it
->bytepos
, end
,
1835 &disp_pos
, &disp_prop
, &bidi_it
->string
,
1836 bidi_it
->w
, string_p
, bidi_it
->frame_window_p
,
1837 &ch_len
, &nchars
, true);
1838 if (typ1
!= STRONG_R
&& typ1
!= STRONG_AL
)
1847 if (override
== NEUTRAL_DIR
)
1848 bidi_it
->type_after_w1
= type
;
1849 else /* Unicode 8.0 correction. */
1850 bidi_it
->type_after_w1
= (override
== L2R
? STRONG_L
: STRONG_R
);
1851 bidi_check_type (bidi_it
->type_after_w1
);
1852 if (current_level
< BIDI_MAXDEPTH
1853 && bidi_it
->invalid_levels
== 0
1854 && bidi_it
->invalid_isolates
== 0)
1856 new_level
= ((current_level
+ 1) & ~1) + 1;
1857 bidi_it
->isolate_level
++;
1858 bidi_push_embedding_level (bidi_it
, new_level
, NEUTRAL_DIR
, true);
1861 bidi_it
->invalid_isolates
++;
1865 if (override
== NEUTRAL_DIR
)
1866 bidi_it
->type_after_w1
= type
;
1867 else /* Unicode 8.0 correction. */
1868 bidi_it
->type_after_w1
= (override
== L2R
? STRONG_L
: STRONG_R
);
1869 bidi_check_type (bidi_it
->type_after_w1
);
1870 if (current_level
< BIDI_MAXDEPTH
- 1
1871 && bidi_it
->invalid_levels
== 0
1872 && bidi_it
->invalid_isolates
== 0)
1874 new_level
= ((current_level
+ 2) & ~1);
1875 bidi_it
->isolate_level
++;
1876 bidi_push_embedding_level (bidi_it
, new_level
, NEUTRAL_DIR
, true);
1879 bidi_it
->invalid_isolates
++;
1882 if (bidi_it
->invalid_isolates
)
1884 bidi_it
->invalid_isolates
--;
1885 new_level
= current_level
;
1887 else if (bidi_it
->isolate_level
> 0)
1889 bidi_it
->invalid_levels
= 0;
1890 while (!bidi_it
->level_stack
[bidi_it
->stack_idx
].isolate_status
)
1891 bidi_pop_embedding_level (bidi_it
);
1892 eassert (bidi_it
->stack_idx
> 0);
1893 new_level
= bidi_pop_embedding_level (bidi_it
);
1894 bidi_it
->isolate_level
--;
1896 bidi_it
->resolved_level
= new_level
;
1897 /* Unicode 8.0 correction. */
1898 if (bidi_it
->level_stack
[bidi_it
->stack_idx
].override
== L2R
)
1899 bidi_it
->type_after_w1
= STRONG_L
;
1900 else if (bidi_it
->level_stack
[bidi_it
->stack_idx
].override
== R2L
)
1901 bidi_it
->type_after_w1
= STRONG_R
;
1903 bidi_it
->type_after_w1
= type
;
1906 bidi_it
->type_after_w1
= type
;
1907 bidi_check_type (bidi_it
->type_after_w1
);
1908 type
= WEAK_BN
; /* X9/Retaining */
1909 if (bidi_it
->ignore_bn_limit
<= -1)
1911 if (!bidi_it
->invalid_isolates
)
1913 if (bidi_it
->invalid_levels
)
1914 bidi_it
->invalid_levels
--;
1915 else if (!isolate_status
&& bidi_it
->stack_idx
> 1)
1916 new_level
= bidi_pop_embedding_level (bidi_it
);
1919 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1920 || (bidi_it
->next_en_pos
> bidi_it
->charpos
1921 && bidi_it
->next_en_type
== WEAK_EN
))
1923 bidi_it
->resolved_level
= new_level
;
1930 bidi_it
->type
= type
;
1931 bidi_check_type (bidi_it
->type
);
1933 return bidi_it
->resolved_level
;
1936 /* Given an iterator state in BIDI_IT, advance one character position
1937 in the buffer/string to the next character (in the logical order),
1938 resolve any explicit embeddings and directional overrides, and
1939 return the embedding level of the character after resolving
1940 explicit directives and ignoring empty embeddings. */
1942 bidi_resolve_explicit (struct bidi_it
*bidi_it
)
1944 int prev_level
= bidi_it
->resolved_level
;
1945 int new_level
= bidi_resolve_explicit_1 (bidi_it
);
1946 ptrdiff_t eob
= bidi_it
->string
.s
? bidi_it
->string
.schars
: ZV
;
1947 const unsigned char *s
1948 = (STRINGP (bidi_it
->string
.lstring
)
1949 ? SDATA (bidi_it
->string
.lstring
)
1950 : bidi_it
->string
.s
);
1952 eassert (prev_level
>= 0);
1953 if (prev_level
< new_level
1954 && bidi_it
->type
== WEAK_BN
1955 && bidi_it
->ignore_bn_limit
== -1 /* only if not already known */
1956 && bidi_it
->charpos
< eob
/* not already at EOB */
1957 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1958 + bidi_it
->ch_len
, s
,
1959 bidi_it
->string
.unibyte
)))
1961 /* Avoid pushing and popping embedding levels if the level run
1962 is empty, as this breaks level runs where it shouldn't.
1963 UAX#9 removes all the explicit embedding and override codes,
1964 so empty embeddings disappear without a trace. We need to
1965 behave as if we did the same. */
1966 struct bidi_it saved_it
;
1967 int level
= prev_level
;
1969 bidi_copy_it (&saved_it
, bidi_it
);
1971 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1972 + bidi_it
->ch_len
, s
,
1973 bidi_it
->string
.unibyte
)))
1975 /* This advances to the next character, skipping any
1976 characters covered by display strings. */
1977 level
= bidi_resolve_explicit_1 (bidi_it
);
1978 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1979 a pointer to its data is no longer valid. */
1980 if (STRINGP (bidi_it
->string
.lstring
))
1981 s
= SDATA (bidi_it
->string
.lstring
);
1984 if (bidi_it
->nchars
<= 0)
1986 if (level
== prev_level
) /* empty embedding */
1987 saved_it
.ignore_bn_limit
= bidi_it
->charpos
+ bidi_it
->nchars
;
1988 else /* this embedding is non-empty */
1989 saved_it
.ignore_bn_limit
= -2;
1991 bidi_copy_it (bidi_it
, &saved_it
);
1992 if (bidi_it
->ignore_bn_limit
> -1)
1994 /* We pushed a level, but we shouldn't have. Undo that. */
1995 if (!bidi_it
->invalid_levels
)
1996 new_level
= bidi_pop_embedding_level (bidi_it
);
1998 bidi_it
->invalid_levels
--;
2002 if (bidi_it
->type
== NEUTRAL_B
) /* X8 */
2004 bidi_set_paragraph_end (bidi_it
);
2005 /* This is needed by bidi_resolve_weak below, and in L1. */
2006 bidi_it
->type_after_w1
= bidi_it
->type
;
2007 bidi_check_type (bidi_it
->type_after_w1
);
2014 bidi_isolate_fmt_char (bidi_type_t ch_type
)
2016 return (ch_type
== LRI
|| ch_type
== RLI
|| ch_type
== PDI
);
2019 /* Advance in the buffer/string, resolve weak types and return the
2020 type of the next character after weak type resolution. */
2022 bidi_resolve_weak (struct bidi_it
*bidi_it
)
2025 bidi_dir_t override
;
2026 int prev_level
= bidi_it
->resolved_level
;
2027 int new_level
= bidi_resolve_explicit (bidi_it
);
2029 bidi_type_t type_of_next
;
2030 struct bidi_it saved_it
;
2032 = ((STRINGP (bidi_it
->string
.lstring
) || bidi_it
->string
.s
)
2033 ? bidi_it
->string
.schars
: ZV
);
2035 type
= bidi_it
->type
;
2036 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
2038 if (type
== UNKNOWN_BT
2046 eassert (prev_level
>= 0);
2047 if (new_level
> prev_level
2048 /* When the embedding level goes down, we only need to compute
2049 the type of sos if this level is not an isolate, because the
2050 sos type of the isolating sequence was already computed and
2051 saved on the stack. */
2052 || (new_level
< prev_level
2053 && !bidi_it
->level_stack
[bidi_it
->stack_idx
].isolate_status
)
2054 || bidi_it
->type
== NEUTRAL_B
)
2056 /* We've got a new isolating sequence, compute the directional
2057 type of sos and initialize per-run variables (UAX#9, clause
2059 bidi_set_sos_type (bidi_it
, prev_level
, new_level
);
2061 else if (type
== NEUTRAL_S
|| type
== NEUTRAL_WS
2062 || type
== WEAK_BN
|| type
== STRONG_AL
)
2063 bidi_it
->type_after_w1
= type
; /* needed in L1 */
2064 bidi_check_type (bidi_it
->type_after_w1
);
2066 /* Level and directional override status are already recorded in
2067 bidi_it, and do not need any change; see X6. */
2068 if (override
== R2L
) /* X6 */
2070 else if (override
== L2R
)
2074 if (type
== WEAK_NSM
) /* W1 */
2076 /* Note that we don't need to consider the case where the
2077 prev character has its type overridden by an RLO or LRO,
2078 because then either the type of this NSM would have been
2079 also overridden, or the previous character is outside the
2080 current level run, and thus not relevant to this NSM.
2081 This is why NSM gets the type_after_w1 of the previous
2083 if (bidi_it
->prev
.type_after_w1
!= UNKNOWN_BT
2084 /* if type_after_w1 is NEUTRAL_B, this NSM is at sos */
2085 && bidi_it
->prev
.type_after_w1
!= NEUTRAL_B
)
2087 if (bidi_isolate_fmt_char (bidi_it
->prev
.type_after_w1
))
2090 type
= bidi_it
->prev
.type_after_w1
;
2092 else if (bidi_it
->sos
== R2L
)
2094 else if (bidi_it
->sos
== L2R
)
2096 else /* shouldn't happen! */
2099 if (type
== WEAK_EN
/* W2 */
2100 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
2102 else if (type
== STRONG_AL
) /* W3 */
2104 else if ((type
== WEAK_ES
/* W4 */
2105 && bidi_it
->prev
.type_after_w1
== WEAK_EN
2106 && bidi_it
->prev
.orig_type
== WEAK_EN
)
2108 && ((bidi_it
->prev
.type_after_w1
== WEAK_EN
2109 && bidi_it
->prev
.orig_type
== WEAK_EN
)
2110 || bidi_it
->prev
.type_after_w1
== WEAK_AN
)))
2112 const unsigned char *s
2113 = (STRINGP (bidi_it
->string
.lstring
)
2114 ? SDATA (bidi_it
->string
.lstring
)
2115 : bidi_it
->string
.s
);
2117 next_char
= (bidi_it
->charpos
+ bidi_it
->nchars
>= eob
2119 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
,
2120 s
, bidi_it
->string
.unibyte
));
2121 type_of_next
= bidi_get_type (next_char
, override
);
2123 if (type_of_next
== WEAK_BN
2124 || bidi_explicit_dir_char (next_char
))
2126 bidi_copy_it (&saved_it
, bidi_it
);
2127 while (bidi_resolve_explicit (bidi_it
) == new_level
2128 && bidi_it
->type
== WEAK_BN
)
2130 type_of_next
= bidi_it
->type
;
2131 bidi_copy_it (bidi_it
, &saved_it
);
2134 /* If the next character is EN, but the last strong-type
2135 character is AL, that next EN will be changed to AN when
2136 we process it in W2 above. So in that case, this ES
2137 should not be changed into EN. */
2139 && type_of_next
== WEAK_EN
2140 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
2142 else if (type
== WEAK_CS
)
2144 if (bidi_it
->prev
.type_after_w1
== WEAK_AN
2145 && (type_of_next
== WEAK_AN
2146 /* If the next character is EN, but the last
2147 strong-type character is AL, EN will be later
2148 changed to AN when we process it in W2 above.
2149 So in that case, this ES should not be
2151 || (type_of_next
== WEAK_EN
2152 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)))
2154 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
2155 && type_of_next
== WEAK_EN
2156 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
2160 else if (type
== WEAK_ET
/* W5: ET with EN before or after it */
2161 || type
== WEAK_BN
) /* W5/Retaining */
2163 if (bidi_it
->prev
.type_after_w1
== WEAK_EN
) /* ET/BN w/EN before it */
2165 else if (bidi_it
->next_en_pos
> bidi_it
->charpos
2166 && bidi_it
->next_en_type
!= WEAK_BN
)
2168 if (bidi_it
->next_en_type
== WEAK_EN
) /* ET/BN with EN after it */
2171 else if (bidi_it
->next_en_pos
>=0)
2173 ptrdiff_t en_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
2174 const unsigned char *s
= (STRINGP (bidi_it
->string
.lstring
)
2175 ? SDATA (bidi_it
->string
.lstring
)
2176 : bidi_it
->string
.s
);
2178 if (bidi_it
->nchars
<= 0)
2181 = (bidi_it
->charpos
+ bidi_it
->nchars
>= eob
2183 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
,
2184 bidi_it
->string
.unibyte
));
2185 type_of_next
= bidi_get_type (next_char
, override
);
2187 if (type_of_next
== WEAK_ET
2188 || type_of_next
== WEAK_BN
2189 || bidi_explicit_dir_char (next_char
))
2191 bidi_copy_it (&saved_it
, bidi_it
);
2192 while (bidi_resolve_explicit (bidi_it
) == new_level
2193 && (bidi_it
->type
== WEAK_BN
2194 || bidi_it
->type
== WEAK_ET
))
2196 type_of_next
= bidi_it
->type
;
2197 en_pos
= bidi_it
->charpos
;
2198 bidi_copy_it (bidi_it
, &saved_it
);
2200 /* Remember this position, to speed up processing of the
2202 bidi_it
->next_en_pos
= en_pos
;
2203 if (type_of_next
== WEAK_EN
)
2205 /* If the last strong character is AL, the EN we've
2206 found will become AN when we get to it (W2). */
2207 if (bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
2208 type_of_next
= WEAK_AN
;
2209 else if (type
== WEAK_BN
)
2210 type
= NEUTRAL_ON
; /* W6/Retaining */
2214 else if (type_of_next
== NEUTRAL_B
)
2215 /* Record the fact that there are no more ENs from
2216 here to the end of paragraph, to avoid entering the
2217 loop above ever again in this paragraph. */
2218 bidi_it
->next_en_pos
= -1;
2219 /* Record the type of the character where we ended our search. */
2220 bidi_it
->next_en_type
= type_of_next
;
2225 if (type
== WEAK_ES
|| type
== WEAK_ET
|| type
== WEAK_CS
/* W6 */
2227 && (bidi_it
->prev
.type_after_w1
== WEAK_CS
/* W6/Retaining */
2228 || bidi_it
->prev
.type_after_w1
== WEAK_ES
2229 || bidi_it
->prev
.type_after_w1
== WEAK_ET
)))
2232 /* Store the type we've got so far, before we clobber it with strong
2233 types in W7 and while resolving neutral types. But leave alone
2234 the original types that were recorded above, because we will need
2235 them for the L1 clause. */
2236 if (bidi_it
->type_after_w1
== UNKNOWN_BT
)
2237 bidi_it
->type_after_w1
= type
;
2238 bidi_check_type (bidi_it
->type_after_w1
);
2240 if (type
== WEAK_EN
) /* W7 */
2242 if ((bidi_it
->last_strong
.type_after_w1
== STRONG_L
)
2243 || (bidi_it
->last_strong
.type
== UNKNOWN_BT
&& bidi_it
->sos
== L2R
))
2247 bidi_it
->type
= type
;
2248 bidi_check_type (bidi_it
->type
);
2252 /* Resolve the type of a neutral character according to the type of
2253 surrounding strong text and the current embedding level. */
2255 bidi_resolve_neutral_1 (bidi_type_t prev_type
, bidi_type_t next_type
, int lev
)
2257 /* N1: European and Arabic numbers are treated as though they were R. */
2258 if (next_type
== WEAK_EN
|| next_type
== WEAK_AN
)
2259 next_type
= STRONG_R
;
2260 if (prev_type
== WEAK_EN
|| prev_type
== WEAK_AN
)
2261 prev_type
= STRONG_R
;
2263 if (next_type
== prev_type
) /* N1 */
2265 else if ((lev
& 1) == 0) /* N2 */
2272 bidi_resolve_neutral (struct bidi_it
*bidi_it
)
2274 int prev_level
= bidi_it
->resolved_level
;
2275 bidi_type_t type
= bidi_resolve_weak (bidi_it
);
2276 int current_level
= bidi_it
->resolved_level
;
2278 if (!(type
== STRONG_R
2283 || type
== NEUTRAL_B
2284 || type
== NEUTRAL_S
2285 || type
== NEUTRAL_WS
2286 || type
== NEUTRAL_ON
))
2289 eassert (prev_level
>= 0);
2290 eassert (current_level
>= 0);
2291 if ((type
!= NEUTRAL_B
/* Don't risk entering the long loop below if
2292 we are already at paragraph end. */
2293 && bidi_get_category (type
) == NEUTRAL
)
2294 || (type
== WEAK_BN
&& prev_level
== current_level
))
2296 if (bidi_it
->next_for_neutral
.type
!= UNKNOWN_BT
)
2297 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2298 bidi_it
->next_for_neutral
.type
,
2300 /* The next two "else if" clauses are shortcuts for the
2301 important special case when we have a long sequence of
2302 neutral or WEAK_BN characters, such as whitespace or nulls or
2303 other control characters, on the base embedding level of the
2304 paragraph, and that sequence goes all the way to the end of
2305 the paragraph and follows a character whose resolved
2306 directionality is identical to the base embedding level.
2307 (This is what happens in a buffer with plain L2R text that
2308 happens to include long sequences of control characters.) By
2309 virtue of N1, the result of examining this long sequence will
2310 always be either STRONG_L or STRONG_R, depending on the base
2311 embedding level. So we use this fact directly instead of
2312 entering the expensive loop in the "else" clause. */
2313 else if (current_level
== 0
2314 && bidi_it
->prev_for_neutral
.type
== STRONG_L
2315 && !bidi_explicit_dir_char (bidi_it
->ch
))
2316 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2317 STRONG_L
, current_level
);
2318 else if (/* current level is 1 */
2320 /* base embedding level is also 1 */
2321 && bidi_it
->level_stack
[0].level
== 1
2322 /* previous character is one of those considered R for
2323 the purposes of W5 */
2324 && (bidi_it
->prev_for_neutral
.type
== STRONG_R
2325 || bidi_it
->prev_for_neutral
.type
== WEAK_EN
2326 || bidi_it
->prev_for_neutral
.type
== WEAK_AN
)
2327 && !bidi_explicit_dir_char (bidi_it
->ch
))
2328 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2329 STRONG_R
, current_level
);
2332 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
2333 the assumption of batch-style processing; see clauses W4,
2334 W5, and especially N1, which require to look far forward
2335 (as well as back) in the buffer/string. May the fleas of
2336 a thousand camels infest the armpits of those who design
2337 supposedly general-purpose algorithms by looking at their
2338 own implementations, and fail to consider other possible
2340 struct bidi_it saved_it
;
2341 bidi_type_t next_type
;
2343 if (bidi_it
->scan_dir
== -1)
2346 bidi_copy_it (&saved_it
, bidi_it
);
2347 /* Scan the text forward until we find the first non-neutral
2348 character, and then use that to resolve the neutral we
2349 are dealing with now. We also cache the scanned iterator
2350 states, to salvage some of the effort later. */
2351 bidi_cache_iterator_state (bidi_it
, 0);
2353 /* Record the info about the previous character, so that
2354 it will be cached below with this state. */
2355 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
2356 && bidi_it
->type
!= WEAK_BN
)
2357 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
2358 type
= bidi_resolve_weak (bidi_it
);
2359 /* Paragraph separators have their levels fully resolved
2360 at this point, so cache them as resolved. */
2361 bidi_cache_iterator_state (bidi_it
, type
== NEUTRAL_B
);
2362 /* FIXME: implement L1 here, by testing for a newline and
2363 resetting the level for any sequence of whitespace
2364 characters adjacent to it. */
2365 } while (!(type
== NEUTRAL_B
2367 && bidi_get_category (type
) != NEUTRAL
)
2368 /* This is all per level run, so stop when we
2369 reach the end of this level run. */
2370 || (bidi_it
->level_stack
[bidi_it
->stack_idx
].level
2371 != current_level
)));
2373 bidi_remember_char (&saved_it
.next_for_neutral
, bidi_it
);
2380 /* Actually, STRONG_AL cannot happen here, because
2381 bidi_resolve_weak converts it to STRONG_R, per W3. */
2382 eassert (type
!= STRONG_AL
);
2387 /* N1: ``European and Arabic numbers are treated as
2388 though they were R.'' */
2389 next_type
= STRONG_R
;
2392 case NEUTRAL_ON
: /* W6/Retaining */
2393 if (!bidi_explicit_dir_char (bidi_it
->ch
))
2394 emacs_abort (); /* can't happen: BNs are skipped */
2397 /* Marched all the way to the end of this level run.
2398 We need to use the eos type, whose information is
2399 stored by bidi_set_sos_type in the prev_for_neutral
2401 if (saved_it
.type
!= WEAK_BN
2402 || bidi_get_category (bidi_it
->prev
.type_after_w1
) == NEUTRAL
)
2403 next_type
= bidi_it
->prev_for_neutral
.type
;
2406 /* This is a BN which does not adjoin neutrals.
2407 Leave its type alone. */
2408 bidi_copy_it (bidi_it
, &saved_it
);
2409 return bidi_it
->type
;
2415 type
= bidi_resolve_neutral_1 (saved_it
.prev_for_neutral
.type
,
2416 next_type
, current_level
);
2417 saved_it
.next_for_neutral
.type
= next_type
;
2418 saved_it
.type
= type
;
2419 bidi_check_type (next_type
);
2420 bidi_check_type (type
);
2421 bidi_copy_it (bidi_it
, &saved_it
);
2427 /* Given an iterator state in BIDI_IT, advance one character position
2428 in the buffer/string to the next character (in the logical order),
2429 resolve the bidi type of that next character, and return that
2432 bidi_type_of_next_char (struct bidi_it
*bidi_it
)
2436 /* This should always be called during a forward scan. */
2437 if (bidi_it
->scan_dir
!= 1)
2440 /* Reset the limit until which to ignore BNs if we step out of the
2441 area where we found only empty levels. */
2442 if ((bidi_it
->ignore_bn_limit
> -1
2443 && bidi_it
->ignore_bn_limit
<= bidi_it
->charpos
)
2444 || (bidi_it
->ignore_bn_limit
== -2
2445 && !bidi_explicit_dir_char (bidi_it
->ch
)))
2446 bidi_it
->ignore_bn_limit
= -1;
2448 type
= bidi_resolve_neutral (bidi_it
);
2453 /* Given an iterator state BIDI_IT, advance one character position in
2454 the buffer/string to the next character (in the current scan
2455 direction), resolve the embedding and implicit levels of that next
2456 character, and return the resulting level. */
2458 bidi_level_of_next_char (struct bidi_it
*bidi_it
)
2461 int level
, prev_level
= -1;
2462 struct bidi_saved_info next_for_neutral
;
2463 ptrdiff_t next_char_pos
= -2;
2465 if (bidi_it
->scan_dir
== 1)
2468 = ((bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2469 ? bidi_it
->string
.schars
: ZV
);
2471 /* There's no sense in trying to advance if we hit end of text. */
2472 if (bidi_it
->charpos
>= eob
)
2473 return bidi_it
->resolved_level
;
2475 /* Record the info about the previous character. */
2476 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
2477 && bidi_it
->type
!= WEAK_BN
)
2478 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
2479 if (bidi_it
->type_after_w1
== STRONG_R
2480 || bidi_it
->type_after_w1
== STRONG_L
2481 || bidi_it
->type_after_w1
== STRONG_AL
)
2482 bidi_remember_char (&bidi_it
->last_strong
, bidi_it
);
2483 /* FIXME: it sounds like we don't need both prev and
2484 prev_for_neutral members, but I'm leaving them both for now. */
2485 if (bidi_it
->type
== STRONG_R
|| bidi_it
->type
== STRONG_L
2486 || bidi_it
->type
== WEAK_EN
|| bidi_it
->type
== WEAK_AN
)
2487 bidi_remember_char (&bidi_it
->prev_for_neutral
, bidi_it
);
2489 /* If we overstepped the characters used for resolving neutrals
2490 and whitespace, invalidate their info in the iterator. */
2491 if (bidi_it
->charpos
>= bidi_it
->next_for_neutral
.charpos
)
2492 bidi_it
->next_for_neutral
.type
= UNKNOWN_BT
;
2493 if (bidi_it
->next_en_pos
>= 0
2494 && bidi_it
->charpos
>= bidi_it
->next_en_pos
)
2496 bidi_it
->next_en_pos
= 0;
2497 bidi_it
->next_en_type
= UNKNOWN_BT
;
2499 if (bidi_it
->next_for_ws
.type
!= UNKNOWN_BT
2500 && bidi_it
->charpos
>= bidi_it
->next_for_ws
.charpos
)
2501 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
2503 /* This must be taken before we fill the iterator with the info
2504 about the next char. If we scan backwards, the iterator
2505 state must be already cached, so there's no need to know the
2506 embedding level of the previous character, since we will be
2507 returning to our caller shortly. */
2508 prev_level
= bidi_it
->resolved_level
;
2509 eassert (prev_level
>= 0);
2511 next_for_neutral
= bidi_it
->next_for_neutral
;
2513 /* Perhaps the character we want is already cached. If it is, the
2514 call to bidi_cache_find below will return a type other than
2516 if (bidi_cache_idx
> bidi_cache_start
&& !bidi_it
->first_elt
)
2518 int bob
= ((bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2520 if (bidi_it
->scan_dir
> 0)
2522 if (bidi_it
->nchars
<= 0)
2524 next_char_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
2526 else if (bidi_it
->charpos
>= bob
)
2527 /* Implementation note: we allow next_char_pos to be as low as
2528 0 for buffers or -1 for strings, and that is okay because
2529 that's the "position" of the sentinel iterator state we
2530 cached at the beginning of the iteration. */
2531 next_char_pos
= bidi_it
->charpos
- 1;
2532 if (next_char_pos
>= bob
- 1)
2533 type
= bidi_cache_find (next_char_pos
, -1, bidi_it
);
2539 if (type
!= UNKNOWN_BT
)
2541 /* Don't lose the information for resolving neutrals! The
2542 cached states could have been cached before their
2543 next_for_neutral member was computed. If we are on our way
2544 forward, we can simply take the info from the previous
2546 if (bidi_it
->scan_dir
== 1
2547 && bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
2548 bidi_it
->next_for_neutral
= next_for_neutral
;
2550 /* If resolved_level is -1, it means this state was cached
2551 before it was completely resolved, so we cannot return
2553 if (bidi_it
->resolved_level
!= -1)
2554 return bidi_it
->resolved_level
;
2556 if (bidi_it
->scan_dir
== -1)
2557 /* If we are going backwards, the iterator state is already cached
2558 from previous scans, and should be fully resolved. */
2561 if (type
== UNKNOWN_BT
)
2562 type
= bidi_type_of_next_char (bidi_it
);
2564 if (type
== NEUTRAL_B
)
2565 return bidi_it
->resolved_level
;
2567 level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
2568 if ((bidi_get_category (type
) == NEUTRAL
/* && type != NEUTRAL_B */)
2569 || (type
== WEAK_BN
&& prev_level
== level
))
2571 if (bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
2574 /* If the cached state shows a neutral character, it was not
2575 resolved by bidi_resolve_neutral, so do it now. */
2576 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2577 bidi_it
->next_for_neutral
.type
,
2581 if (!(type
== STRONG_R
2585 || type
== WEAK_AN
))
2587 bidi_it
->type
= type
;
2588 bidi_check_type (bidi_it
->type
);
2590 /* For L1 below, we need to know, for each WS character, whether
2591 it belongs to a sequence of WS characters preceding a newline
2592 or a TAB or a paragraph separator. */
2593 if (bidi_it
->orig_type
== NEUTRAL_WS
2594 && bidi_it
->next_for_ws
.type
== UNKNOWN_BT
)
2597 ptrdiff_t clen
= bidi_it
->ch_len
;
2598 ptrdiff_t bpos
= bidi_it
->bytepos
;
2599 ptrdiff_t cpos
= bidi_it
->charpos
;
2600 ptrdiff_t disp_pos
= bidi_it
->disp_pos
;
2601 ptrdiff_t nc
= bidi_it
->nchars
;
2602 struct bidi_string_data bs
= bidi_it
->string
;
2604 bool fwp
= bidi_it
->frame_window_p
;
2605 int dpp
= bidi_it
->disp_prop
;
2607 if (bidi_it
->nchars
<= 0)
2610 ch
= bidi_fetch_char (cpos
+= nc
, bpos
+= clen
, &disp_pos
, &dpp
, &bs
,
2611 bidi_it
->w
, fwp
, &clen
, &nc
);
2612 chtype
= bidi_get_type (ch
, NEUTRAL_DIR
);
2613 } while (chtype
== NEUTRAL_WS
|| chtype
== WEAK_BN
2614 || bidi_explicit_dir_char (ch
)); /* L1/Retaining */
2615 bidi_it
->next_for_ws
.type
= chtype
;
2616 bidi_check_type (bidi_it
->next_for_ws
.type
);
2617 bidi_it
->next_for_ws
.charpos
= cpos
;
2618 bidi_it
->next_for_ws
.bytepos
= bpos
;
2621 /* Resolve implicit levels, with a twist: PDFs get the embedding
2622 level of the embedding they terminate. See below for the
2624 if (bidi_it
->orig_type
== PDF
2625 /* Don't do this if this formatting code didn't change the
2626 embedding level due to invalid or empty embeddings. */
2627 && prev_level
!= level
)
2629 /* Don't look in UAX#9 for the reason for this: it's our own
2630 private quirk. The reason is that we want the formatting
2631 codes to be delivered so that they bracket the text of their
2632 embedding. For example, given the text
2636 we want it to be displayed as
2644 which will result because we bump up the embedding level as
2645 soon as we see the RLO and pop it as soon as we see the PDF,
2646 so RLO itself has the same embedding level as "teST", and
2647 thus would be normally delivered last, just before the PDF.
2648 The switch below fiddles with the level of PDF so that this
2649 ugly side effect does not happen.
2651 (This is, of course, only important if the formatting codes
2652 are actually displayed, but Emacs does need to display them
2653 if the user wants to.) */
2656 else if (bidi_it
->orig_type
== NEUTRAL_B
/* L1 */
2657 || bidi_it
->orig_type
== NEUTRAL_S
2658 || bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
2659 || (bidi_it
->orig_type
== NEUTRAL_WS
2660 && (bidi_it
->next_for_ws
.type
== NEUTRAL_B
2661 || bidi_it
->next_for_ws
.type
== NEUTRAL_S
)))
2662 level
= bidi_it
->level_stack
[0].level
;
2663 else if ((level
& 1) == 0) /* I1 */
2665 if (type
== STRONG_R
)
2667 else if (type
== WEAK_EN
|| type
== WEAK_AN
)
2672 if (type
== STRONG_L
|| type
== WEAK_EN
|| type
== WEAK_AN
)
2676 bidi_it
->resolved_level
= level
;
2680 /* Move to the other edge of a level given by LEVEL. If END_FLAG,
2681 we are at the end of a level, and we need to prepare to
2682 resume the scan of the lower level.
2684 If this level's other edge is cached, we simply jump to it, filling
2685 the iterator structure with the iterator state on the other edge.
2686 Otherwise, we walk the buffer or string until we come back to the
2687 same level as LEVEL.
2689 Note: we are not talking here about a ``level run'' in the UAX#9
2690 sense of the term, but rather about a ``level'' which includes
2691 all the levels higher than it. In other words, given the levels
2694 11111112222222333333334443343222222111111112223322111
2697 and assuming we are at point A scanning left to right, this
2698 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2701 bidi_find_other_level_edge (struct bidi_it
*bidi_it
, int level
, bool end_flag
)
2703 int dir
= end_flag
? -bidi_it
->scan_dir
: bidi_it
->scan_dir
;
2706 /* Try the cache first. */
2707 if ((idx
= bidi_cache_find_level_change (level
, dir
, end_flag
))
2708 >= bidi_cache_start
)
2709 bidi_cache_fetch_state (idx
, bidi_it
);
2714 /* If we are at end of level, its edges must be cached. */
2718 bidi_cache_iterator_state (bidi_it
, 1);
2720 new_level
= bidi_level_of_next_char (bidi_it
);
2721 bidi_cache_iterator_state (bidi_it
, 1);
2722 } while (new_level
>= level
);
2727 bidi_move_to_visually_next (struct bidi_it
*bidi_it
)
2729 int old_level
, new_level
, next_level
;
2730 struct bidi_it sentinel
;
2731 struct gcpro gcpro1
;
2733 if (bidi_it
->charpos
< 0 || bidi_it
->bytepos
< 0)
2736 if (bidi_it
->scan_dir
== 0)
2738 bidi_it
->scan_dir
= 1; /* default to logical order */
2741 /* The code below can call eval, and thus cause GC. If we are
2742 iterating a Lisp string, make sure it won't be GCed. */
2743 if (STRINGP (bidi_it
->string
.lstring
))
2744 GCPRO1 (bidi_it
->string
.lstring
);
2746 /* If we just passed a newline, initialize for the next line. */
2747 if (!bidi_it
->first_elt
2748 && (bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
))
2749 bidi_line_init (bidi_it
);
2751 /* Prepare the sentinel iterator state, and cache it. When we bump
2752 into it, scanning backwards, we'll know that the last non-base
2753 level is exhausted. */
2754 if (bidi_cache_idx
== bidi_cache_start
)
2756 bidi_copy_it (&sentinel
, bidi_it
);
2757 if (bidi_it
->first_elt
)
2759 sentinel
.charpos
--; /* cached charpos needs to be monotonic */
2761 sentinel
.ch
= '\n'; /* doesn't matter, but why not? */
2762 sentinel
.ch_len
= 1;
2763 sentinel
.nchars
= 1;
2765 bidi_cache_iterator_state (&sentinel
, 1);
2768 old_level
= bidi_it
->resolved_level
;
2769 new_level
= bidi_level_of_next_char (bidi_it
);
2771 /* Reordering of resolved levels (clause L2) is implemented by
2772 jumping to the other edge of the level and flipping direction of
2773 scanning the text whenever we find a level change. */
2774 if (new_level
!= old_level
)
2776 bool ascending
= new_level
> old_level
;
2777 int level_to_search
= ascending
? old_level
+ 1 : old_level
;
2778 int incr
= ascending
? 1 : -1;
2779 int expected_next_level
= old_level
+ incr
;
2781 /* Jump (or walk) to the other edge of this level. */
2782 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2783 /* Switch scan direction and peek at the next character in the
2785 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2787 /* The following loop handles the case where the resolved level
2788 jumps by more than one. This is typical for numbers inside a
2789 run of text with left-to-right embedding direction, but can
2790 also happen in other situations. In those cases the decision
2791 where to continue after a level change, and in what direction,
2792 is tricky. For example, given a text like below:
2797 (where the numbers below the text show the resolved levels),
2798 the result of reordering according to UAX#9 should be this:
2802 This is implemented by the loop below which flips direction
2803 and jumps to the other edge of the level each time it finds
2804 the new level not to be the expected one. The expected level
2805 is always one more or one less than the previous one. */
2806 next_level
= bidi_peek_at_next_level (bidi_it
);
2807 while (next_level
!= expected_next_level
)
2809 /* If next_level is -1, it means we have an unresolved level
2810 in the cache, which at this point should not happen. If
2811 it does, we will infloop. */
2812 eassert (next_level
>= 0);
2813 expected_next_level
+= incr
;
2814 level_to_search
+= incr
;
2815 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2816 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2817 next_level
= bidi_peek_at_next_level (bidi_it
);
2820 /* Finally, deliver the next character in the new direction. */
2821 next_level
= bidi_level_of_next_char (bidi_it
);
2824 /* Take note when we have just processed the newline that precedes
2825 the end of the paragraph. The next time we are about to be
2826 called, set_iterator_to_next will automatically reinit the
2827 paragraph direction, if needed. We do this at the newline before
2828 the paragraph separator, because the next character might not be
2829 the first character of the next paragraph, due to the bidi
2830 reordering, whereas we _must_ know the paragraph base direction
2831 _before_ we process the paragraph's text, since the base
2832 direction affects the reordering. */
2833 if (bidi_it
->scan_dir
== 1
2834 && (bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
))
2836 /* The paragraph direction of the entire string, once
2837 determined, is in effect for the entire string. Setting the
2838 separator limit to the end of the string prevents
2839 bidi_paragraph_init from being called automatically on this
2841 if (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2842 bidi_it
->separator_limit
= bidi_it
->string
.schars
;
2843 else if (bidi_it
->bytepos
< ZV_BYTE
)
2846 = bidi_at_paragraph_end (bidi_it
->charpos
+ bidi_it
->nchars
,
2847 bidi_it
->bytepos
+ bidi_it
->ch_len
);
2848 if (bidi_it
->nchars
<= 0)
2852 bidi_it
->new_paragraph
= 1;
2853 /* Record the buffer position of the last character of the
2854 paragraph separator. */
2855 bidi_it
->separator_limit
2856 = bidi_it
->charpos
+ bidi_it
->nchars
+ sep_len
;
2861 if (bidi_it
->scan_dir
== 1 && bidi_cache_idx
> bidi_cache_start
)
2863 /* If we are at paragraph's base embedding level and beyond the
2864 last cached position, the cache's job is done and we can
2866 if (bidi_it
->resolved_level
== bidi_it
->level_stack
[0].level
2867 && bidi_it
->charpos
> (bidi_cache
[bidi_cache_idx
- 1].charpos
2868 + bidi_cache
[bidi_cache_idx
- 1].nchars
- 1))
2869 bidi_cache_reset ();
2870 /* But as long as we are caching during forward scan, we must
2871 cache each state, or else the cache integrity will be
2872 compromised: it assumes cached states correspond to buffer
2875 bidi_cache_iterator_state (bidi_it
, 1);
2878 if (STRINGP (bidi_it
->string
.lstring
))
2882 /* This is meant to be called from within the debugger, whenever you
2883 wish to examine the cache contents. */
2884 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE
;
2886 bidi_dump_cached_states (void)
2891 if (bidi_cache_idx
== 0)
2893 fprintf (stderr
, "The cache is empty.\n");
2896 fprintf (stderr
, "Total of %"pD
"d state%s in cache:\n",
2897 bidi_cache_idx
, bidi_cache_idx
== 1 ? "" : "s");
2899 for (i
= bidi_cache
[bidi_cache_idx
- 1].charpos
; i
> 0; i
/= 10)
2901 fputs ("ch ", stderr
);
2902 for (i
= 0; i
< bidi_cache_idx
; i
++)
2903 fprintf (stderr
, "%*c", ndigits
, bidi_cache
[i
].ch
);
2904 fputs ("\n", stderr
);
2905 fputs ("lvl ", stderr
);
2906 for (i
= 0; i
< bidi_cache_idx
; i
++)
2907 fprintf (stderr
, "%*d", ndigits
, bidi_cache
[i
].resolved_level
);
2908 fputs ("\n", stderr
);
2909 fputs ("pos ", stderr
);
2910 for (i
= 0; i
< bidi_cache_idx
; i
++)
2911 fprintf (stderr
, "%*"pD
"d", ndigits
, bidi_cache
[i
].charpos
);
2912 fputs ("\n", stderr
);