Reset bracket_resolved bit earlier; remove bytepos from bidi_saved_info.
[emacs.git] / src / bidi.c
blobd14d6f6186a52cb33ee626d495441284aafdd113
1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software
3 Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the Reference Implementation and most other implementations,
26 this one is designed to be called once for every character in the
27 buffer or string. That way, we can leave intact the design of the
28 Emacs display engine, whereby an iterator object is used to
29 traverse buffer or string text character by character, and generate
30 the necessary data for displaying each character in 'struct glyph'
31 objects. (See xdisp.c for the details of that iteration.) The
32 functions on this file replace the original linear iteration in the
33 logical order of the text with a non-linear iteration in the visual
34 order, i.e. in the order characters should be shown on display.
36 The main entry point is bidi_move_to_visually_next. Each time it
37 is called, it finds the next character in the visual order, and
38 returns its information in a special structure. The caller is then
39 expected to process this character for display or any other
40 purposes, and call bidi_move_to_visually_next for the next
41 character. See the comments in bidi_move_to_visually_next for more
42 details about its algorithm that finds the next visual-order
43 character by resolving their levels on the fly.
45 Two other entry points are bidi_paragraph_init and
46 bidi_mirror_char. The first determines the base direction of a
47 paragraph, while the second returns the mirrored version of its
48 argument character.
50 A few auxiliary entry points are used to initialize the bidi
51 iterator for iterating an object (buffer or string), push and pop
52 the bidi iterator state, and save and restore the state of the bidi
53 cache.
55 If you want to understand the code, you will have to read it
56 together with the relevant portions of UAX#9. The comments include
57 references to UAX#9 rules, for that very reason.
59 A note about references to UAX#9 rules: if the reference says
60 something like "X9/Retaining", it means that you need to refer to
61 rule X9 and to its modifications described in the "Implementation
62 Notes" section of UAX#9, under "Retaining Format Codes".
64 Here's the overview of the design of the reordering engine
65 implemented by this file.
67 Basic implementation structure
68 ------------------------------
70 The sequential processing steps described by UAX#9 are implemented
71 as recursive levels of processing, all of which examine the next
72 character in the logical order. This hierarchy of processing looks
73 as follows, from the innermost (deepest) to the outermost level,
74 omitting some subroutines used by each level:
76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_neutral -- resolve neutral types
80 bidi_level_of_next_char -- resolve implicit levels
82 Each level calls the level below it, and works on the result
83 returned by the lower level, including all of its sub-levels.
85 Unlike all the levels below it, bidi_level_of_next_char can return
86 the information about either the next or previous character in the
87 logical order, depending on the current direction of scanning the
88 buffer or string. For the next character, it calls all the levels
89 below it; for the previous character, it uses the cache, described
90 below.
92 Thus, the result of calling bidi_level_of_next_char is the resolved
93 level of the next or the previous character in the logical order.
94 Based on this information, the function bidi_move_to_visually_next
95 finds the next character in the visual order and updates the
96 direction in which the buffer is scanned, either forward or
97 backward, to find the next character to be displayed. (Text is
98 scanned backwards when it needs to be reversed for display, i.e. if
99 the visual order is the inverse of the logical order.) This
100 implements the last, reordering steps of the UBA, by successively
101 calling bidi_level_of_next_char until the character of the required
102 embedding level is found; the scan direction is dynamically updated
103 as a side effect. See the commentary before the 'while' loop in
104 bidi_move_to_visually_next, for the details.
106 Fetching characters
107 -------------------
109 In a nutshell, fetching the next character boils down to calling
110 STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
111 string position. See bidi_fetch_char. However, if the next
112 character is "covered" by a display property of some kind,
113 bidi_fetch_char returns the u+FFFC "object replacement character"
114 that represents the entire run of text covered by the display
115 property. (The ch_len and nchars members of 'struct bidi_it'
116 reflect the length in bytes and characters of that text.) This is
117 so we reorder text on both sides of the display property as
118 appropriate for an image or embedded string. Similarly, text
119 covered by a display spec of the form '(space ...)', is replaced
120 with the u+2029 paragraph separator character, so such display
121 specs produce the same effect as a TAB under UBA. Both these
122 special characters are not actually displayed -- the display
123 property is displayed instead -- but just used to compute the
124 embedding level of the surrounding text so as to produce the
125 required effect.
127 Bidi iterator states
128 --------------------
130 The UBA is highly context dependent in some of its parts,
131 i.e. results of processing a character can generally depend on
132 characters very far away. The UAX#9 description of the UBA
133 prescribes a stateful processing of each character, whereby the
134 results of this processing depend on various state variables, such
135 as the current embedding level, level stack, and directional
136 override status. In addition, the UAX#9 description includes many
137 passages like this (from rule W2 in this case):
139 Search backward from each instance of a European number until the
140 first strong type (R, L, AL, or sos) is found. If an AL is found,
141 change the type of the European number to Arabic number.
143 To support this, we use a bidi iterator object, 'struct bidi_it',
144 which is a sub-structure of 'struct it' used by xdisp.c (see
145 dispextern.h for the definition of both of these structures). The
146 bidi iterator holds the entire state of the iteration required by
147 the UBA, and is updated as the text is traversed. In particular,
148 the embedding level of the current character being resolved is
149 recorded in the iterator state. To avoid costly searches backward
150 in support of rules like W2 above, the necessary character types
151 are also recorded in the iterator state as they are found during
152 the forward scan, and then used when such rules need to be applied.
153 (Forward scans cannot be avoided in this way; they need to be
154 performed at least once, and the results recorded in the iterator
155 state, to be reused until the forward scan oversteps the recorded
156 position.)
158 In this manner, the iterator state acts as a mini-cache of
159 contextual information required for resolving the level of the
160 current character by various UBA rules.
162 Caching of bidi iterator states
163 -------------------------------
165 As described above, the reordering engine uses the information
166 recorded in the bidi iterator state in order to resolve the
167 embedding level of the current character. When the reordering
168 engine needs to process the next character in the logical order, it
169 fetches it and applies to it all the UBA levels, updating the
170 iterator state as it goes. But when the buffer or string is
171 scanned backwards, i.e. in the reverse order of buffer/string
172 positions, the scanned characters were already processed during the
173 preceding forward scan (see bidi_find_other_level_edge). To avoid
174 costly re-processing of characters that were already processed
175 during the forward scan, the iterator states computed while
176 scanning forward are cached.
178 The cache is just a linear array of 'struct bidi_it' objects, which
179 is dynamically allocated and reallocated as needed, since the size
180 of the cache depends on the text being processed. We only need the
181 cache while processing embedded levels higher than the base
182 paragraph embedding level, because these higher levels require
183 changes in scan direction. Therefore, as soon as we are back to
184 the base embedding level, we can free the cache; see the calls to
185 bidi_cache_reset and bidi_cache_shrink, for the conditions to do
186 this.
188 The cache maintains the index of the next unused cache slot -- this
189 is where the next iterator state will be cached. The function
190 bidi_cache_iterator_state saves an instance of the state in the
191 cache and increments the unused slot index. The companion function
192 bidi_cache_find looks up a cached state that corresponds to a given
193 buffer/string position. All of the cached states must correspond
194 1:1 to the buffer or string region whose processing they reflect;
195 bidi.c will abort if it finds cache slots that violate this 1:1
196 correspondence.
198 When the parent iterator 'struct it' is pushed (see push_it in
199 xdisp.c) to pause the current iteration and start iterating over a
200 different object (e.g., a 'display' string that covers some buffer
201 text), the bidi iterator cache needs to be "pushed" as well, so
202 that a new empty cache could be used while iterating over the new
203 object. Later, when the new object is exhausted, and xdisp.c calls
204 pop_it, we need to "pop" the bidi cache as well and return to the
205 original cache. See bidi_push_it and bidi_pop_it for how this is
206 done.
208 Some functions of the display engine save copies of 'struct it' in
209 local variables, and restore them later. For examples, see
210 pos_visible_p and move_it_in_display_line_to in xdisp.c, and
211 window_scroll_pixel_based in window.c. When this happens, we need
212 to save and restore the bidi cache as well, because conceptually
213 the cache is part of the 'struct it' state, and needs to be in
214 perfect sync with the portion of the buffer/string that is being
215 processed. This saving and restoring of the cache state is handled
216 by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
217 SAVE_IT and RESTORE_IT defined on xdisp.c.
219 Note that, because reordering is implemented below the level in
220 xdisp.c that breaks glyphs into screen lines, we are violating
221 paragraph 3.4 of UAX#9. which mandates that line breaking shall be
222 done before reordering each screen line separately. However,
223 following UAX#9 to the letter in this matter goes against the basic
224 design of the Emacs display engine, and so we choose here this
225 minor deviation from the UBA letter in preference to redesign of
226 the display engine. The effect of this is only seen in continued
227 lines that are broken into screen lines in the middle of a run
228 whose direction is opposite to the paragraph's base direction.
230 Important design and implementation note: when the code needs to
231 scan far ahead, be sure to avoid such scans as much as possible
232 when the buffer/string doesn't contain any RTL characters. Users
233 of left-to-right scripts will never forgive you if you introduce
234 some slow-down due to bidi in situations that don't involve any
235 bidirectional text. See the large comment near the beginning of
236 bidi_resolve_neutral, for one situation where such shortcut was
237 necessary. */
239 #include <config.h>
240 #include <stdio.h>
242 #include "lisp.h"
243 #include "character.h"
244 #include "buffer.h"
245 #include "dispextern.h"
246 #include "region-cache.h"
248 static bool bidi_initialized = 0;
250 static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
252 #define BIDI_EOB (-1)
254 /* Data type for describing the bidirectional character categories. */
255 typedef enum {
256 UNKNOWN_BC,
257 NEUTRAL,
258 WEAK,
259 STRONG,
260 EXPLICIT_FORMATTING
261 } bidi_category_t;
263 static Lisp_Object paragraph_start_re, paragraph_separate_re;
264 static Lisp_Object Qparagraph_start, Qparagraph_separate;
267 /***********************************************************************
268 Utilities
269 ***********************************************************************/
271 /* Return the bidi type of a character CH, subject to the current
272 directional OVERRIDE. */
273 static bidi_type_t
274 bidi_get_type (int ch, bidi_dir_t override)
276 bidi_type_t default_type;
278 if (ch == BIDI_EOB)
279 return NEUTRAL_B;
280 if (ch < 0 || ch > MAX_CHAR)
281 emacs_abort ();
283 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
284 /* Every valid character code, even those that are unassigned by the
285 UCD, have some bidi-class property, according to
286 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
287 (= zero) code from CHAR_TABLE_REF, that's a bug. */
288 if (default_type == UNKNOWN_BT)
289 emacs_abort ();
291 switch (default_type)
293 case WEAK_BN:
294 case NEUTRAL_B:
295 case LRE:
296 case LRO:
297 case RLE:
298 case RLO:
299 case PDF:
300 case LRI:
301 case RLI:
302 case FSI:
303 case PDI:
304 return default_type;
305 default:
306 if (override == L2R)
307 return STRONG_L;
308 else if (override == R2L)
309 return STRONG_R;
310 else
311 return default_type;
315 static void
316 bidi_check_type (bidi_type_t type)
318 eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
321 /* Given a bidi TYPE of a character, return its category. */
322 static bidi_category_t
323 bidi_get_category (bidi_type_t type)
325 switch (type)
327 case UNKNOWN_BT:
328 return UNKNOWN_BC;
329 case STRONG_L:
330 case STRONG_R:
331 case STRONG_AL:
332 return STRONG;
333 case WEAK_EN:
334 case WEAK_ES:
335 case WEAK_ET:
336 case WEAK_AN:
337 case WEAK_CS:
338 case WEAK_NSM:
339 case WEAK_BN:
340 return WEAK;
341 case NEUTRAL_B:
342 case NEUTRAL_S:
343 case NEUTRAL_WS:
344 case NEUTRAL_ON:
345 return NEUTRAL;
346 case LRE:
347 case LRO:
348 case RLE:
349 case RLO:
350 case PDF:
351 case LRI:
352 case RLI:
353 case FSI:
354 case PDI:
355 return EXPLICIT_FORMATTING;
356 default:
357 emacs_abort ();
361 /* Return the mirrored character of C, if it has one. If C has no
362 mirrored counterpart, return C.
363 Note: The conditions in UAX#9 clause L4 regarding the surrounding
364 context must be tested by the caller. */
366 bidi_mirror_char (int c)
368 Lisp_Object val;
370 if (c == BIDI_EOB)
371 return c;
372 if (c < 0 || c > MAX_CHAR)
373 emacs_abort ();
375 val = CHAR_TABLE_REF (bidi_mirror_table, c);
376 if (INTEGERP (val))
378 int v;
380 /* When debugging, check before assigning to V, so that the check
381 isn't broken by undefined behavior due to int overflow. */
382 eassert (CHAR_VALID_P (XINT (val)));
384 v = XINT (val);
386 /* Minimal test we must do in optimized builds, to prevent weird
387 crashes further down the road. */
388 if (v < 0 || v > MAX_CHAR)
389 emacs_abort ();
391 return v;
394 return c;
397 /* Determine the start-of-sequence (sos) directional type given the two
398 embedding levels on either side of the run boundary. Also, update
399 the saved info about previously seen characters, since that info is
400 generally valid for a single level run. */
401 static void
402 bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
404 int higher_level = (level_before > level_after ? level_before : level_after);
406 /* FIXME: should the default sos direction be user selectable? */
407 bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
409 bidi_it->prev.type = bidi_it->prev.type_after_wn = UNKNOWN_BT;
410 bidi_it->last_strong.type = bidi_it->last_strong.type_after_wn
411 = bidi_it->last_strong.orig_type = UNKNOWN_BT;
412 bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
413 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
414 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_wn
415 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
418 /* Push the current embedding level and override status; reset the
419 current level to LEVEL and the current override status to OVERRIDE. */
420 static void
421 bidi_push_embedding_level (struct bidi_it *bidi_it,
422 int level, bidi_dir_t override, bool isolate_status)
424 struct bidi_stack *st;
425 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
427 bidi_it->stack_idx++;
428 eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
429 st = &bidi_it->level_stack[bidi_it->stack_idx];
430 eassert (level <= (1 << 7));
431 st->level = level;
432 st->override = override;
433 st->isolate_status = isolate_status;
434 if (isolate_status)
436 st->last_strong = bidi_it->last_strong;
437 st->prev_for_neutral = bidi_it->prev_for_neutral;
438 st->next_for_neutral = bidi_it->next_for_neutral;
439 st->sos = bidi_it->sos;
441 /* We've got a new isolating sequence, compute the directional type
442 of sos and initialize per-sequence variables (UAX#9, clause X10). */
443 bidi_set_sos_type (bidi_it, prev_level, level);
446 /* Pop from the stack the embedding level, the directional override
447 status, and optionally saved information for the isolating run
448 sequence. Return the new level. */
449 static int
450 bidi_pop_embedding_level (struct bidi_it *bidi_it)
452 int level;
454 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
455 and PDIs (X6a, 2nd bullet). */
456 if (bidi_it->stack_idx > 0)
458 bool isolate_status
459 = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
460 int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
462 struct bidi_stack st;
464 st = bidi_it->level_stack[bidi_it->stack_idx];
465 if (isolate_status)
467 /* PREV is used in W1 for resolving WEAK_NSM. By the time
468 we get to an NSM, we must have gotten past at least one
469 character: the PDI that ends the isolate from which we
470 are popping here. So PREV will have been filled up by
471 the time we first use it. We initialize it here to
472 UNKNOWN_BT to be able to catch any blunders in this
473 logic. */
474 bidi_it->prev.orig_type = bidi_it->prev.type_after_wn
475 = bidi_it->prev.type = UNKNOWN_BT;
476 bidi_it->last_strong = st.last_strong;
477 bidi_it->prev_for_neutral = st.prev_for_neutral;
478 bidi_it->next_for_neutral = st.next_for_neutral;
479 bidi_it->sos = st.sos;
481 else
482 bidi_set_sos_type (bidi_it, old_level,
483 bidi_it->level_stack[bidi_it->stack_idx - 1].level);
485 bidi_it->stack_idx--;
487 level = bidi_it->level_stack[bidi_it->stack_idx].level;
488 eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
489 return level;
492 /* Record in SAVED_INFO the information about the current character. */
493 static void
494 bidi_remember_char (struct bidi_saved_info *saved_info,
495 struct bidi_it *bidi_it)
497 saved_info->charpos = bidi_it->charpos;
498 saved_info->type = bidi_it->type;
499 bidi_check_type (bidi_it->type);
500 saved_info->type_after_wn = bidi_it->type_after_wn;
501 bidi_check_type (bidi_it->type_after_wn);
502 saved_info->orig_type = bidi_it->orig_type;
503 bidi_check_type (bidi_it->orig_type);
504 saved_info->bracket_resolved = bidi_it->bracket_resolved;
507 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
508 copies the part of the level stack that is actually in use. */
509 static void
510 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
512 /* Copy everything from the start through the active part of
513 the level stack. */
514 memcpy (to, from,
515 (offsetof (struct bidi_it, level_stack[1])
516 + from->stack_idx * sizeof from->level_stack[0]));
520 /***********************************************************************
521 Caching the bidi iterator states
522 ***********************************************************************/
524 /* We allocate and de-allocate the cache in chunks of this size (in
525 characters). 200 was chosen as an upper limit for reasonably-long
526 lines in a text file/buffer. */
527 #define BIDI_CACHE_CHUNK 200
528 static struct bidi_it *bidi_cache;
529 static ptrdiff_t bidi_cache_size = 0;
530 enum { elsz = sizeof (struct bidi_it) };
531 static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
532 static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
533 static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
534 "stack" level */
536 /* 5-slot stack for saving the start of the previous level of the
537 cache. xdisp.c maintains a 5-slot stack for its iterator state,
538 and we need the same size of our stack. */
539 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
540 static int bidi_cache_sp;
542 /* Size of header used by bidi_shelve_cache. */
543 enum
545 bidi_shelve_header_size
546 = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
547 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
548 + sizeof (bidi_cache_last_idx))
551 /* Reset the cache state to the empty state. We only reset the part
552 of the cache relevant to iteration of the current object. Previous
553 objects, which are pushed on the display iterator's stack, are left
554 intact. This is called when the cached information is no more
555 useful for the current iteration, e.g. when we were reseated to a
556 new position on the same object. */
557 static void
558 bidi_cache_reset (void)
560 bidi_cache_idx = bidi_cache_start;
561 bidi_cache_last_idx = -1;
564 /* Shrink the cache to its minimal size. Called when we init the bidi
565 iterator for reordering a buffer or a string that does not come
566 from display properties, because that means all the previously
567 cached info is of no further use. */
568 static void
569 bidi_cache_shrink (void)
571 if (bidi_cache_size > BIDI_CACHE_CHUNK)
573 bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
574 bidi_cache_size = BIDI_CACHE_CHUNK;
576 bidi_cache_reset ();
579 static void
580 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
582 int current_scan_dir = bidi_it->scan_dir;
584 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
585 emacs_abort ();
587 bidi_copy_it (bidi_it, &bidi_cache[idx]);
588 bidi_it->scan_dir = current_scan_dir;
589 bidi_cache_last_idx = idx;
592 /* Find a cached state with a given CHARPOS and resolved embedding
593 level less or equal to LEVEL. If LEVEL is -1, disregard the
594 resolved levels in cached states. DIR, if non-zero, means search
595 in that direction from the last cache hit. */
596 static ptrdiff_t
597 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
599 ptrdiff_t i, i_start;
601 if (bidi_cache_idx > bidi_cache_start)
603 if (bidi_cache_last_idx == -1)
604 bidi_cache_last_idx = bidi_cache_idx - 1;
605 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
607 dir = -1;
608 i_start = bidi_cache_last_idx - 1;
610 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
611 + bidi_cache[bidi_cache_last_idx].nchars - 1))
613 dir = 1;
614 i_start = bidi_cache_last_idx + 1;
616 else if (dir)
617 i_start = bidi_cache_last_idx;
618 else
620 dir = -1;
621 i_start = bidi_cache_idx - 1;
624 if (dir < 0)
626 /* Linear search for now; FIXME! */
627 for (i = i_start; i >= bidi_cache_start; i--)
628 if (bidi_cache[i].charpos <= charpos
629 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
630 && (level == -1 || bidi_cache[i].resolved_level <= level))
631 return i;
633 else
635 for (i = i_start; i < bidi_cache_idx; i++)
636 if (bidi_cache[i].charpos <= charpos
637 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
638 && (level == -1 || bidi_cache[i].resolved_level <= level))
639 return i;
643 return -1;
646 /* Find a cached state where the resolved level changes to a value
647 that is lower than LEVEL, and return its cache slot index. DIR is
648 the direction to search, starting with the last used cache slot.
649 If DIR is zero, we search backwards from the last occupied cache
650 slot. BEFORE means return the index of the slot that
651 is ``before'' the level change in the search direction. That is,
652 given the cached levels like this:
654 1122333442211
655 AB C
657 and assuming we are at the position cached at the slot marked with
658 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
659 index of slot B or A, depending whether BEFORE is, respectively,
660 true or false. */
661 static ptrdiff_t
662 bidi_cache_find_level_change (int level, int dir, bool before)
664 if (bidi_cache_idx)
666 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
667 int incr = before ? 1 : 0;
669 eassert (!dir || bidi_cache_last_idx >= 0);
671 if (!dir)
672 dir = -1;
673 else if (!incr)
674 i += dir;
676 if (dir < 0)
678 while (i >= bidi_cache_start + incr)
680 if (bidi_cache[i - incr].resolved_level >= 0
681 && bidi_cache[i - incr].resolved_level < level)
682 return i;
683 i--;
686 else
688 while (i < bidi_cache_idx - incr)
690 if (bidi_cache[i + incr].resolved_level >= 0
691 && bidi_cache[i + incr].resolved_level < level)
692 return i;
693 i++;
698 return -1;
701 static void
702 bidi_cache_ensure_space (ptrdiff_t idx)
704 /* Enlarge the cache as needed. */
705 if (idx >= bidi_cache_size)
707 /* The bidi cache cannot be larger than the largest Lisp string
708 or buffer. */
709 ptrdiff_t string_or_buffer_bound
710 = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
712 /* Also, it cannot be larger than what C can represent. */
713 ptrdiff_t c_bound
714 = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
716 bidi_cache
717 = xpalloc (bidi_cache, &bidi_cache_size,
718 max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
719 min (string_or_buffer_bound, c_bound), elsz);
723 static void
724 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
726 ptrdiff_t idx;
728 /* We should never cache on backward scans. */
729 if (bidi_it->scan_dir == -1)
730 emacs_abort ();
731 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
733 if (idx < 0)
735 idx = bidi_cache_idx;
736 bidi_cache_ensure_space (idx);
737 /* Character positions should correspond to cache positions 1:1.
738 If we are outside the range of cached positions, the cache is
739 useless and must be reset. */
740 if (idx > bidi_cache_start &&
741 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
742 + bidi_cache[idx - 1].nchars)
743 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
745 bidi_cache_reset ();
746 idx = bidi_cache_start;
748 if (bidi_it->nchars <= 0)
749 emacs_abort ();
750 bidi_copy_it (&bidi_cache[idx], bidi_it);
751 if (!resolved)
752 bidi_cache[idx].resolved_level = -1;
754 else
756 /* Copy only the members which could have changed, to avoid
757 costly copying of the entire struct. */
758 bidi_cache[idx].type = bidi_it->type;
759 bidi_check_type (bidi_it->type);
760 bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
761 bidi_check_type (bidi_it->type_after_wn);
762 if (resolved)
763 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
764 else
765 bidi_cache[idx].resolved_level = -1;
766 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
767 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
768 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
769 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
770 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
771 bidi_cache[idx].bracket_resolved = bidi_it->bracket_resolved;
774 bidi_cache_last_idx = idx;
775 if (idx >= bidi_cache_idx)
776 bidi_cache_idx = idx + 1;
779 /* Look for a cached iterator state that corresponds to CHARPOS. If
780 found, copy the cached state into BIDI_IT and return the type of
781 the cached entry. If not found, return UNKNOWN_BT. NEUTRALS_OK
782 non-zero means it is OK to return cached state for neutral
783 characters that have no valid next_for_neutral member, and
784 therefore cannot be resolved. This can happen if the state was
785 cached before it was resolved in bidi_resolve_neutral. */
786 static bidi_type_t
787 bidi_cache_find (ptrdiff_t charpos, bool neutrals_ok, struct bidi_it *bidi_it)
789 ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
791 if (i >= bidi_cache_start
792 && (neutrals_ok
793 || !(bidi_cache[i].resolved_level == -1
794 && bidi_get_category (bidi_cache[i].type) == NEUTRAL
795 && bidi_cache[i].next_for_neutral.type == UNKNOWN_BT)))
797 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
799 bidi_copy_it (bidi_it, &bidi_cache[i]);
800 bidi_cache_last_idx = i;
801 /* Don't let scan direction from the cached state override
802 the current scan direction. */
803 bidi_it->scan_dir = current_scan_dir;
804 return bidi_it->type;
807 return UNKNOWN_BT;
810 static int
811 bidi_peek_at_next_level (struct bidi_it *bidi_it)
813 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
814 emacs_abort ();
815 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
819 /***********************************************************************
820 Pushing and popping the bidi iterator state
821 ***********************************************************************/
823 /* Push the bidi iterator state in preparation for reordering a
824 different object, e.g. display string found at certain buffer
825 position. Pushing the bidi iterator boils down to saving its
826 entire state on the cache and starting a new cache "stacked" on top
827 of the current cache. */
828 void
829 bidi_push_it (struct bidi_it *bidi_it)
831 /* Save the current iterator state in its entirety after the last
832 used cache slot. */
833 bidi_cache_ensure_space (bidi_cache_idx);
834 bidi_cache[bidi_cache_idx++] = *bidi_it;
836 /* Push the current cache start onto the stack. */
837 eassert (bidi_cache_sp < IT_STACK_SIZE);
838 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
840 /* Start a new level of cache, and make it empty. */
841 bidi_cache_start = bidi_cache_idx;
842 bidi_cache_last_idx = -1;
845 /* Restore the iterator state saved by bidi_push_it and return the
846 cache to the corresponding state. */
847 void
848 bidi_pop_it (struct bidi_it *bidi_it)
850 if (bidi_cache_start <= 0)
851 emacs_abort ();
853 /* Reset the next free cache slot index to what it was before the
854 call to bidi_push_it. */
855 bidi_cache_idx = bidi_cache_start - 1;
857 /* Restore the bidi iterator state saved in the cache. */
858 *bidi_it = bidi_cache[bidi_cache_idx];
860 /* Pop the previous cache start from the stack. */
861 if (bidi_cache_sp <= 0)
862 emacs_abort ();
863 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
865 /* Invalidate the last-used cache slot data. */
866 bidi_cache_last_idx = -1;
869 static ptrdiff_t bidi_cache_total_alloc;
871 /* Stash away a copy of the cache and its control variables. */
872 void *
873 bidi_shelve_cache (void)
875 unsigned char *databuf;
876 ptrdiff_t alloc;
878 /* Empty cache. */
879 if (bidi_cache_idx == 0)
880 return NULL;
882 alloc = (bidi_shelve_header_size
883 + bidi_cache_idx * sizeof (struct bidi_it));
884 databuf = xmalloc (alloc);
885 bidi_cache_total_alloc += alloc;
887 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
888 memcpy (databuf + sizeof (bidi_cache_idx),
889 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
890 memcpy (databuf + sizeof (bidi_cache_idx)
891 + bidi_cache_idx * sizeof (struct bidi_it),
892 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
893 memcpy (databuf + sizeof (bidi_cache_idx)
894 + bidi_cache_idx * sizeof (struct bidi_it)
895 + sizeof (bidi_cache_start_stack),
896 &bidi_cache_sp, sizeof (bidi_cache_sp));
897 memcpy (databuf + sizeof (bidi_cache_idx)
898 + bidi_cache_idx * sizeof (struct bidi_it)
899 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
900 &bidi_cache_start, sizeof (bidi_cache_start));
901 memcpy (databuf + sizeof (bidi_cache_idx)
902 + bidi_cache_idx * sizeof (struct bidi_it)
903 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
904 + sizeof (bidi_cache_start),
905 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
907 return databuf;
910 /* Restore the cache state from a copy stashed away by
911 bidi_shelve_cache, and free the buffer used to stash that copy.
912 JUST_FREE means free the buffer, but don't restore the
913 cache; used when the corresponding iterator is discarded instead of
914 being restored. */
915 void
916 bidi_unshelve_cache (void *databuf, bool just_free)
918 unsigned char *p = databuf;
920 if (!p)
922 if (!just_free)
924 /* A NULL pointer means an empty cache. */
925 bidi_cache_start = 0;
926 bidi_cache_sp = 0;
927 bidi_cache_reset ();
930 else
932 if (just_free)
934 ptrdiff_t idx;
936 memcpy (&idx, p, sizeof (bidi_cache_idx));
937 bidi_cache_total_alloc
938 -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
940 else
942 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
943 bidi_cache_ensure_space (bidi_cache_idx);
944 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
945 bidi_cache_idx * sizeof (struct bidi_it));
946 memcpy (bidi_cache_start_stack,
947 p + sizeof (bidi_cache_idx)
948 + bidi_cache_idx * sizeof (struct bidi_it),
949 sizeof (bidi_cache_start_stack));
950 memcpy (&bidi_cache_sp,
951 p + sizeof (bidi_cache_idx)
952 + bidi_cache_idx * sizeof (struct bidi_it)
953 + sizeof (bidi_cache_start_stack),
954 sizeof (bidi_cache_sp));
955 memcpy (&bidi_cache_start,
956 p + sizeof (bidi_cache_idx)
957 + bidi_cache_idx * sizeof (struct bidi_it)
958 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
959 sizeof (bidi_cache_start));
960 memcpy (&bidi_cache_last_idx,
961 p + sizeof (bidi_cache_idx)
962 + bidi_cache_idx * sizeof (struct bidi_it)
963 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
964 + sizeof (bidi_cache_start),
965 sizeof (bidi_cache_last_idx));
966 bidi_cache_total_alloc
967 -= (bidi_shelve_header_size
968 + bidi_cache_idx * sizeof (struct bidi_it));
971 xfree (p);
976 /***********************************************************************
977 Initialization
978 ***********************************************************************/
979 static void
980 bidi_initialize (void)
982 bidi_type_table = uniprop_table (intern ("bidi-class"));
983 if (NILP (bidi_type_table))
984 emacs_abort ();
985 staticpro (&bidi_type_table);
987 bidi_mirror_table = uniprop_table (intern ("mirroring"));
988 if (NILP (bidi_mirror_table))
989 emacs_abort ();
990 staticpro (&bidi_mirror_table);
992 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
993 if (NILP (bidi_brackets_table))
994 emacs_abort ();
995 staticpro (&bidi_brackets_table);
997 Qparagraph_start = intern ("paragraph-start");
998 staticpro (&Qparagraph_start);
999 paragraph_start_re = Fsymbol_value (Qparagraph_start);
1000 if (!STRINGP (paragraph_start_re))
1001 paragraph_start_re = build_string ("\f\\|[ \t]*$");
1002 staticpro (&paragraph_start_re);
1003 Qparagraph_separate = intern ("paragraph-separate");
1004 staticpro (&Qparagraph_separate);
1005 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
1006 if (!STRINGP (paragraph_separate_re))
1007 paragraph_separate_re = build_string ("[ \t\f]*$");
1008 staticpro (&paragraph_separate_re);
1010 bidi_cache_sp = 0;
1011 bidi_cache_total_alloc = 0;
1013 bidi_initialized = 1;
1016 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
1017 end. */
1018 static void
1019 bidi_set_paragraph_end (struct bidi_it *bidi_it)
1021 bidi_it->invalid_levels = 0;
1022 bidi_it->invalid_isolates = 0;
1023 bidi_it->stack_idx = 0;
1024 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1027 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
1028 void
1029 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
1030 struct bidi_it *bidi_it)
1032 if (! bidi_initialized)
1033 bidi_initialize ();
1034 if (charpos >= 0)
1035 bidi_it->charpos = charpos;
1036 if (bytepos >= 0)
1037 bidi_it->bytepos = bytepos;
1038 bidi_it->frame_window_p = frame_window_p;
1039 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
1040 bidi_it->first_elt = 1;
1041 bidi_set_paragraph_end (bidi_it);
1042 bidi_it->new_paragraph = 1;
1043 bidi_it->separator_limit = -1;
1044 bidi_it->type = NEUTRAL_B;
1045 bidi_it->type_after_wn = NEUTRAL_B;
1046 bidi_it->orig_type = NEUTRAL_B;
1047 /* FIXME: Review this!!! */
1048 bidi_it->prev.type = bidi_it->prev.type_after_wn
1049 = bidi_it->prev.orig_type = UNKNOWN_BT;
1050 bidi_it->last_strong.type = bidi_it->last_strong.type_after_wn
1051 = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1052 bidi_it->next_for_neutral.charpos = -1;
1053 bidi_it->next_for_neutral.type
1054 = bidi_it->next_for_neutral.type_after_wn
1055 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
1056 bidi_it->prev_for_neutral.charpos = -1;
1057 bidi_it->prev_for_neutral.type
1058 = bidi_it->prev_for_neutral.type_after_wn
1059 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
1060 bidi_it->sos = L2R; /* FIXME: should it be user-selectable? */
1061 bidi_it->disp_pos = -1; /* invalid/unknown */
1062 bidi_it->disp_prop = 0;
1063 /* We can only shrink the cache if we are at the bottom level of its
1064 "stack". */
1065 if (bidi_cache_start == 0)
1066 bidi_cache_shrink ();
1067 else
1068 bidi_cache_reset ();
1071 /* Perform initializations for reordering a new line of bidi text. */
1072 static void
1073 bidi_line_init (struct bidi_it *bidi_it)
1075 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
1076 bidi_it->stack_idx = 0;
1077 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1078 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
1079 bidi_it->level_stack[0].isolate_status = false; /* X1 */
1080 bidi_it->invalid_levels = 0;
1081 bidi_it->isolate_level = 0; /* X1 */
1082 bidi_it->invalid_isolates = 0; /* X1 */
1083 /* Setting this to zero will force its recomputation the first time
1084 we need it for W5. */
1085 bidi_it->next_en_pos = 0;
1086 bidi_it->next_en_type = UNKNOWN_BT;
1087 bidi_it->next_for_ws.type = UNKNOWN_BT;
1088 bidi_set_sos_type (bidi_it,
1089 (bidi_it->paragraph_dir == R2L ? 1 : 0),
1090 bidi_it->level_stack[0].level); /* X10 */
1092 bidi_cache_reset ();
1096 /***********************************************************************
1097 Fetching characters
1098 ***********************************************************************/
1100 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
1101 are zero-based character positions in S, BEGBYTE is byte position
1102 corresponding to BEG. UNIBYTE means S is a unibyte string. */
1103 static ptrdiff_t
1104 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
1105 ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
1107 ptrdiff_t pos = beg;
1108 const unsigned char *p = s + begbyte, *start = p;
1110 if (unibyte)
1111 p = s + end;
1112 else
1114 if (!CHAR_HEAD_P (*p))
1115 emacs_abort ();
1117 while (pos < end)
1119 p += BYTES_BY_CHAR_HEAD (*p);
1120 pos++;
1124 return p - start;
1127 /* Fetch and return the character at byte position BYTEPOS. If S is
1128 non-NULL, fetch the character from string S; otherwise fetch the
1129 character from the current buffer. UNIBYTE means S is a
1130 unibyte string. */
1131 static int
1132 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
1134 if (s)
1136 s += bytepos;
1137 if (unibyte)
1138 return *s;
1140 else
1141 s = BYTE_POS_ADDR (bytepos);
1142 return STRING_CHAR (s);
1145 /* Fetch and return the character at CHARPOS/BYTEPOS. If that
1146 character is covered by a display string, treat the entire run of
1147 covered characters as a single character, either u+2029 or u+FFFC,
1148 and return their combined length in CH_LEN and NCHARS. DISP_POS
1149 specifies the character position of the next display string, or -1
1150 if not yet computed. When the next character is at or beyond that
1151 position, the function updates DISP_POS with the position of the
1152 next display string. *DISP_PROP non-zero means that there's really
1153 a display string at DISP_POS, as opposed to when we searched till
1154 DISP_POS without finding one. If *DISP_PROP is 2, it means the
1155 display spec is of the form `(space ...)', which is replaced with
1156 u+2029 to handle it as a paragraph separator. STRING->s is the C
1157 string to iterate, or NULL if iterating over a buffer or a Lisp
1158 string; in the latter case, STRING->lstring is the Lisp string. */
1159 static int
1160 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
1161 int *disp_prop, struct bidi_string_data *string,
1162 struct window *w,
1163 bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
1165 int ch;
1166 ptrdiff_t endpos
1167 = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
1168 struct text_pos pos;
1169 int len;
1171 /* If we got past the last known position of display string, compute
1172 the position of the next one. That position could be at CHARPOS. */
1173 if (charpos < endpos && charpos > *disp_pos)
1175 SET_TEXT_POS (pos, charpos, bytepos);
1176 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1177 disp_prop);
1180 /* Fetch the character at BYTEPOS. */
1181 if (charpos >= endpos)
1183 ch = BIDI_EOB;
1184 *ch_len = 1;
1185 *nchars = 1;
1186 *disp_pos = endpos;
1187 *disp_prop = 0;
1189 else if (charpos >= *disp_pos && *disp_prop)
1191 ptrdiff_t disp_end_pos;
1193 /* We don't expect to find ourselves in the middle of a display
1194 property. Hopefully, it will never be needed. */
1195 if (charpos > *disp_pos)
1196 emacs_abort ();
1197 /* Text covered by `display' properties and overlays with
1198 display properties or display strings is handled as a single
1199 character that represents the entire run of characters
1200 covered by the display property. */
1201 if (*disp_prop == 2)
1203 /* `(space ...)' display specs are handled as paragraph
1204 separators for the purposes of the reordering; see UAX#9
1205 section 3 and clause HL1 in section 4.3 there. */
1206 ch = 0x2029;
1208 else
1210 /* All other display specs are handled as the Unicode Object
1211 Replacement Character. */
1212 ch = 0xFFFC;
1214 disp_end_pos = compute_display_string_end (*disp_pos, string);
1215 if (disp_end_pos < 0)
1217 /* Somebody removed the display string from the buffer
1218 behind our back. Recover by processing this buffer
1219 position as if no display property were present there to
1220 begin with. */
1221 *disp_prop = 0;
1222 goto normal_char;
1224 *nchars = disp_end_pos - *disp_pos;
1225 if (*nchars <= 0)
1226 emacs_abort ();
1227 if (string->s)
1228 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
1229 disp_end_pos, string->unibyte);
1230 else if (STRINGP (string->lstring))
1231 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1232 bytepos, disp_end_pos, string->unibyte);
1233 else
1234 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1236 else
1238 normal_char:
1239 if (string->s)
1242 if (!string->unibyte)
1244 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1245 *ch_len = len;
1247 else
1249 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1250 *ch_len = 1;
1253 else if (STRINGP (string->lstring))
1255 if (!string->unibyte)
1257 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1258 len);
1259 *ch_len = len;
1261 else
1263 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1264 *ch_len = 1;
1267 else
1269 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1270 *ch_len = len;
1272 *nchars = 1;
1275 /* If we just entered a run of characters covered by a display
1276 string, compute the position of the next display string. */
1277 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1278 && *disp_prop)
1280 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1281 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1282 disp_prop);
1285 return ch;
1288 /* Like bidi_fetch_char, but ignore any text between an isolate
1289 initiator and its matching PDI or, if it has no matching PDI, the
1290 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1291 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1292 and the character that was fetched after skipping the isolates. */
1293 static int
1294 bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
1295 ptrdiff_t *disp_pos, int *disp_prop,
1296 struct bidi_string_data *string,
1297 struct window *w, bool frame_window_p,
1298 ptrdiff_t *ch_len, ptrdiff_t *nchars)
1300 ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
1301 int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
1302 frame_window_p, ch_len, nchars);
1303 bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1304 ptrdiff_t level = 0;
1306 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1308 level++;
1309 while (level > 0 && ch_type != NEUTRAL_B)
1311 charpos += *nchars;
1312 bytepos += *ch_len;
1313 ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
1314 w, frame_window_p, ch_len, nchars);
1315 ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1316 /* A Note to P2 says to ignore max_depth limit. */
1317 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1318 level++;
1319 else if (ch_type == PDI)
1320 level--;
1324 /* Communicate to the caller how much did we skip, so it could get
1325 past the last character position we examined. */
1326 *nchars += charpos - orig_charpos;
1327 *ch_len += bytepos - orig_bytepos;
1328 return ch;
1333 /***********************************************************************
1334 Determining paragraph direction
1335 ***********************************************************************/
1337 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1338 Value is the non-negative length of the paragraph separator
1339 following the buffer position, -1 if position is at the beginning
1340 of a new paragraph, or -2 if position is neither at beginning nor
1341 at end of a paragraph. */
1342 static ptrdiff_t
1343 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1345 Lisp_Object sep_re;
1346 Lisp_Object start_re;
1347 ptrdiff_t val;
1349 sep_re = paragraph_separate_re;
1350 start_re = paragraph_start_re;
1352 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1353 if (val < 0)
1355 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1356 val = -1;
1357 else
1358 val = -2;
1361 return val;
1364 /* If the user has requested the long scans caching, make sure that
1365 BIDI cache is enabled. Otherwise, make sure it's disabled. */
1367 static struct region_cache *
1368 bidi_paragraph_cache_on_off (void)
1370 struct buffer *cache_buffer = current_buffer;
1371 bool indirect_p = false;
1373 /* For indirect buffers, make sure to use the cache of their base
1374 buffer. */
1375 if (cache_buffer->base_buffer)
1377 cache_buffer = cache_buffer->base_buffer;
1378 indirect_p = true;
1381 /* Don't turn on or off the cache in the base buffer, if the value
1382 of cache-long-scans of the base buffer is inconsistent with that.
1383 This is because doing so will just make the cache pure overhead,
1384 since if we turn it on via indirect buffer, it will be
1385 immediately turned off by its base buffer. */
1386 if (NILP (BVAR (current_buffer, cache_long_scans)))
1388 if (!indirect_p
1389 || NILP (BVAR (cache_buffer, cache_long_scans)))
1391 if (cache_buffer->bidi_paragraph_cache)
1393 free_region_cache (cache_buffer->bidi_paragraph_cache);
1394 cache_buffer->bidi_paragraph_cache = 0;
1397 return NULL;
1399 else
1401 if (!indirect_p
1402 || !NILP (BVAR (cache_buffer, cache_long_scans)))
1404 if (!cache_buffer->bidi_paragraph_cache)
1405 cache_buffer->bidi_paragraph_cache = new_region_cache ();
1407 return cache_buffer->bidi_paragraph_cache;
1411 /* On my 2005-vintage machine, searching back for paragraph start
1412 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1413 when user types C-p. The number below limits each call to
1414 bidi_paragraph_init to about 10 ms. */
1415 #define MAX_PARAGRAPH_SEARCH 7500
1417 /* Find the beginning of this paragraph by looking back in the buffer.
1418 Value is the byte position of the paragraph's beginning, or
1419 BEGV_BYTE if paragraph_start_re is still not found after looking
1420 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
1421 static ptrdiff_t
1422 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1424 Lisp_Object re = paragraph_start_re;
1425 ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1426 struct region_cache *bpc = bidi_paragraph_cache_on_off ();
1427 ptrdiff_t n = 0, oldpos = pos, next;
1428 struct buffer *cache_buffer = current_buffer;
1430 if (cache_buffer->base_buffer)
1431 cache_buffer = cache_buffer->base_buffer;
1433 while (pos_byte > BEGV_BYTE
1434 && n++ < MAX_PARAGRAPH_SEARCH
1435 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1437 /* FIXME: What if the paragraph beginning is covered by a
1438 display string? And what if a display string covering some
1439 of the text over which we scan back includes
1440 paragraph_start_re? */
1441 DEC_BOTH (pos, pos_byte);
1442 if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
1444 pos = next, pos_byte = CHAR_TO_BYTE (pos);
1445 break;
1447 else
1448 pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
1450 if (n >= MAX_PARAGRAPH_SEARCH)
1451 pos = BEGV, pos_byte = BEGV_BYTE;
1452 if (bpc)
1453 know_region_cache (cache_buffer, bpc, pos, oldpos);
1454 /* Positions returned by the region cache are not limited to
1455 BEGV..ZV range, so we limit them here. */
1456 pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
1457 return pos_byte;
1460 /* On a 3.4 GHz machine, searching forward for a strong directional
1461 character in a long paragraph full of weaks or neutrals takes about
1462 1 ms for each 20K characters. The number below limits each call to
1463 bidi_paragraph_init to less than 10 ms even on slow machines. */
1464 #define MAX_STRONG_CHAR_SEARCH 100000
1466 /* Starting from POS, find the first strong (L, R, or AL) character,
1467 while skipping over any characters between an isolate initiator and
1468 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1469 matches the isolate initiator at POS. Return the bidi type of the
1470 character where the search stopped. Give up if after examining
1471 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1472 character was found. */
1473 static bidi_type_t
1474 find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
1475 ptrdiff_t *disp_pos, int *disp_prop,
1476 struct bidi_string_data *string, struct window *w,
1477 bool string_p, bool frame_window_p,
1478 ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
1480 ptrdiff_t pos1;
1481 bidi_type_t type;
1482 int ch;
1484 if (stop_at_pdi)
1486 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1487 at POS. Get past it. */
1488 #ifdef ENABLE_CHECKING
1489 ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
1490 frame_window_p, ch_len, nchars);
1491 type = bidi_get_type (ch, NEUTRAL_DIR);
1492 eassert (type == FSI /* || type == LRI || type == RLI */);
1493 #endif
1494 pos += *nchars;
1495 bytepos += *ch_len;
1497 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
1498 w, frame_window_p, ch_len, nchars);
1499 type = bidi_get_type (ch, NEUTRAL_DIR);
1501 pos1 = pos;
1502 for (pos += *nchars, bytepos += *ch_len;
1503 bidi_get_category (type) != STRONG
1504 /* If requested to stop at first PDI, stop there. */
1505 && !(stop_at_pdi && type == PDI)
1506 /* Stop when searched too far into an abnormally large
1507 paragraph full of weak or neutral characters. */
1508 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1509 type = bidi_get_type (ch, NEUTRAL_DIR))
1511 if (pos >= end)
1513 /* Pretend there's a paragraph separator at end of
1514 buffer/string. */
1515 type = NEUTRAL_B;
1516 break;
1518 if (!string_p
1519 && type == NEUTRAL_B
1520 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1521 break;
1522 /* Fetch next character and advance to get past it. */
1523 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
1524 string, w, frame_window_p,
1525 ch_len, nchars);
1526 pos += *nchars;
1527 bytepos += *ch_len;
1529 return type;
1532 /* Determine the base direction, a.k.a. base embedding level, of the
1533 paragraph we are about to iterate through. If DIR is either L2R or
1534 R2L, just use that. Otherwise, determine the paragraph direction
1535 from the first strong directional character of the paragraph.
1537 NO_DEFAULT_P means don't default to L2R if the paragraph
1538 has no strong directional characters and both DIR and
1539 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1540 in the buffer until a paragraph is found with a strong character,
1541 or until hitting BEGV. In the latter case, fall back to L2R. This
1542 flag is used in current-bidi-paragraph-direction.
1544 Note that this function gives the paragraph separator the same
1545 direction as the preceding paragraph, even though Emacs generally
1546 views the separator as not belonging to any paragraph. */
1547 void
1548 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1550 ptrdiff_t bytepos = bidi_it->bytepos;
1551 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1552 ptrdiff_t pstartbyte;
1553 /* Note that begbyte is a byte position, while end is a character
1554 position. Yes, this is ugly, but we are trying to avoid costly
1555 calls to BYTE_TO_CHAR and its ilk. */
1556 ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1557 ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1559 /* Special case for an empty buffer. */
1560 if (bytepos == begbyte && bidi_it->charpos == end)
1561 dir = L2R;
1562 /* We should never be called at EOB or before BEGV. */
1563 else if (bidi_it->charpos >= end || bytepos < begbyte)
1564 emacs_abort ();
1566 if (dir == L2R)
1568 bidi_it->paragraph_dir = L2R;
1569 bidi_it->new_paragraph = 0;
1571 else if (dir == R2L)
1573 bidi_it->paragraph_dir = R2L;
1574 bidi_it->new_paragraph = 0;
1576 else if (dir == NEUTRAL_DIR) /* P2 */
1578 ptrdiff_t ch_len, nchars;
1579 ptrdiff_t pos, disp_pos = -1;
1580 int disp_prop = 0;
1581 bidi_type_t type;
1582 const unsigned char *s;
1584 if (!bidi_initialized)
1585 bidi_initialize ();
1587 /* If we are inside a paragraph separator, we are just waiting
1588 for the separator to be exhausted; use the previous paragraph
1589 direction. But don't do that if we have been just reseated,
1590 because we need to reinitialize below in that case. */
1591 if (!bidi_it->first_elt
1592 && bidi_it->charpos < bidi_it->separator_limit)
1593 return;
1595 /* If we are on a newline, get past it to where the next
1596 paragraph might start. But don't do that at BEGV since then
1597 we are potentially in a new paragraph that doesn't yet
1598 exist. */
1599 pos = bidi_it->charpos;
1600 s = (STRINGP (bidi_it->string.lstring)
1601 ? SDATA (bidi_it->string.lstring)
1602 : bidi_it->string.s);
1603 if (bytepos > begbyte
1604 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1606 bytepos++;
1607 pos++;
1610 /* We are either at the beginning of a paragraph or in the
1611 middle of it. Find where this paragraph starts. */
1612 if (string_p)
1614 /* We don't support changes of paragraph direction inside a
1615 string. It is treated as a single paragraph. */
1616 pstartbyte = 0;
1618 else
1619 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1620 bidi_it->separator_limit = -1;
1621 bidi_it->new_paragraph = 0;
1623 /* The following loop is run more than once only if NO_DEFAULT_P,
1624 and only if we are iterating on a buffer. */
1625 do {
1626 bytepos = pstartbyte;
1627 if (!string_p)
1628 pos = BYTE_TO_CHAR (bytepos);
1629 type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
1630 &bidi_it->string, bidi_it->w,
1631 string_p, bidi_it->frame_window_p,
1632 &ch_len, &nchars, false);
1633 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1634 bidi_it->paragraph_dir = R2L;
1635 else if (type == STRONG_L)
1636 bidi_it->paragraph_dir = L2R;
1637 if (!string_p
1638 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1640 /* If this paragraph is at BEGV, default to L2R. */
1641 if (pstartbyte == BEGV_BYTE)
1642 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1643 else
1645 ptrdiff_t prevpbyte = pstartbyte;
1646 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1648 /* Find the beginning of the previous paragraph, if any. */
1649 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1651 /* FXIME: What if p is covered by a display
1652 string? See also a FIXME inside
1653 bidi_find_paragraph_start. */
1654 DEC_BOTH (p, pbyte);
1655 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1657 pstartbyte = prevpbyte;
1660 } while (!string_p
1661 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1663 else
1664 emacs_abort ();
1666 /* Contrary to UAX#9 clause P3, we only default the paragraph
1667 direction to L2R if we have no previous usable paragraph
1668 direction. This is allowed by the HL1 clause. */
1669 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1670 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1671 if (bidi_it->paragraph_dir == R2L)
1672 bidi_it->level_stack[0].level = 1;
1673 else
1674 bidi_it->level_stack[0].level = 0;
1676 bidi_line_init (bidi_it);
1680 /***********************************************************************
1681 Resolving explicit and implicit levels.
1682 The rest of this file constitutes the core of the UBA implementation.
1683 ***********************************************************************/
1685 static bool
1686 bidi_explicit_dir_char (int ch)
1688 bidi_type_t ch_type;
1690 if (!bidi_initialized)
1691 emacs_abort ();
1692 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1693 return (ch_type == LRE || ch_type == LRO
1694 || ch_type == RLE || ch_type == RLO
1695 || ch_type == PDF);
1698 /* Given an iterator state in BIDI_IT, advance one character position
1699 in the buffer/string to the next character (in the logical order),
1700 resolve any explicit embeddings, directional overrides, and isolate
1701 initiators and terminators, and return the embedding level of the
1702 character after resolving these explicit directives. */
1703 static int
1704 bidi_resolve_explicit (struct bidi_it *bidi_it)
1706 int curchar;
1707 bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
1708 int current_level;
1709 int new_level;
1710 bidi_dir_t override;
1711 bool isolate_status;
1712 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1713 ptrdiff_t ch_len, nchars, disp_pos, end;
1714 int disp_prop;
1716 /* If reseat()'ed, don't advance, so as to start iteration from the
1717 position where we were reseated. bidi_it->bytepos can be less
1718 than BEGV_BYTE after reseat to BEGV. */
1719 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1720 || bidi_it->first_elt)
1722 bidi_it->first_elt = 0;
1723 if (string_p)
1725 const unsigned char *p
1726 = (STRINGP (bidi_it->string.lstring)
1727 ? SDATA (bidi_it->string.lstring)
1728 : bidi_it->string.s);
1730 if (bidi_it->charpos < 0)
1731 bidi_it->charpos = bidi_it->bytepos = 0;
1732 eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1733 bidi_it->charpos,
1734 bidi_it->string.unibyte));
1736 else
1738 if (bidi_it->charpos < BEGV)
1740 bidi_it->charpos = BEGV;
1741 bidi_it->bytepos = BEGV_BYTE;
1743 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1745 /* Determine the orginal bidi type of the previous character,
1746 which is needed for handling isolate initiators and PDF. The
1747 type of the previous character will only be non-trivial if
1748 our caller moved through some previous text in
1749 get_visually_first_element, in which case bidi_it->prev holds
1750 the information we want. */
1751 if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
1753 eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
1754 prev_type = bidi_it->prev.orig_type;
1755 if (prev_type == FSI)
1756 prev_type = bidi_it->type_after_wn;
1759 /* Don't move at end of buffer/string. */
1760 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1762 /* Advance to the next character, skipping characters covered by
1763 display strings (nchars > 1). */
1764 if (bidi_it->nchars <= 0)
1765 emacs_abort ();
1766 bidi_it->charpos += bidi_it->nchars;
1767 if (bidi_it->ch_len == 0)
1768 emacs_abort ();
1769 bidi_it->bytepos += bidi_it->ch_len;
1770 prev_type = bidi_it->orig_type;
1771 if (prev_type == FSI)
1772 prev_type = bidi_it->type_after_wn;
1774 else /* EOB or end of string */
1775 prev_type = NEUTRAL_B;
1777 /* Reset the bracket_resolved flag. */
1778 bidi_it->bracket_resolved = 0;
1780 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1781 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1782 isolate_status = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
1783 new_level = current_level;
1785 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1787 curchar = BIDI_EOB;
1788 bidi_it->ch_len = 1;
1789 bidi_it->nchars = 1;
1790 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1791 bidi_it->disp_prop = 0;
1793 else
1795 /* LRI, RLI, and FSI increment, and PDF decrements, the
1796 embedding level of the _following_ characters, so we must
1797 first look at the type of the previous character to support
1798 that. */
1799 switch (prev_type)
1801 case RLI: /* X5a */
1802 if (current_level < BIDI_MAXDEPTH
1803 && bidi_it->invalid_levels == 0
1804 && bidi_it->invalid_isolates == 0)
1806 new_level = ((current_level + 1) & ~1) + 1;
1807 bidi_it->isolate_level++;
1808 bidi_push_embedding_level (bidi_it, new_level,
1809 NEUTRAL_DIR, true);
1811 else
1812 bidi_it->invalid_isolates++;
1813 break;
1814 case LRI: /* X5b */
1815 if (current_level < BIDI_MAXDEPTH - 1
1816 && bidi_it->invalid_levels == 0
1817 && bidi_it->invalid_isolates == 0)
1819 new_level = ((current_level + 2) & ~1);
1820 bidi_it->isolate_level++;
1821 bidi_push_embedding_level (bidi_it, new_level,
1822 NEUTRAL_DIR, true);
1824 else
1825 bidi_it->invalid_isolates++;
1826 break;
1827 case PDF: /* X7 */
1828 if (!bidi_it->invalid_isolates)
1830 if (bidi_it->invalid_levels)
1831 bidi_it->invalid_levels--;
1832 else if (!isolate_status && bidi_it->stack_idx >= 1)
1833 new_level = bidi_pop_embedding_level (bidi_it);
1835 break;
1836 default:
1837 eassert (prev_type != FSI);
1838 /* Nothing. */
1839 break;
1841 /* Fetch the character at BYTEPOS. If it is covered by a
1842 display string, treat the entire run of covered characters as
1843 a single character u+FFFC. */
1844 curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
1845 &bidi_it->disp_pos, &bidi_it->disp_prop,
1846 &bidi_it->string, bidi_it->w,
1847 bidi_it->frame_window_p,
1848 &bidi_it->ch_len, &bidi_it->nchars);
1850 bidi_it->ch = curchar;
1851 bidi_it->resolved_level = new_level;
1853 /* Don't apply directional override here, as all the types we handle
1854 below will not be affected by the override anyway, and we need
1855 the original type unaltered. The override will be applied in
1856 bidi_resolve_weak. */
1857 type = bidi_get_type (curchar, NEUTRAL_DIR);
1858 bidi_it->orig_type = type;
1859 bidi_check_type (bidi_it->orig_type);
1861 bidi_it->type_after_wn = UNKNOWN_BT;
1863 switch (type)
1865 case RLE: /* X2 */
1866 case RLO: /* X4 */
1867 bidi_it->type_after_wn = type;
1868 bidi_check_type (bidi_it->type_after_wn);
1869 type = WEAK_BN; /* X9/Retaining */
1870 if (new_level < BIDI_MAXDEPTH
1871 && bidi_it->invalid_levels == 0
1872 && bidi_it->invalid_isolates == 0)
1874 /* Compute the least odd embedding level greater than
1875 the current level. */
1876 new_level = ((new_level + 1) & ~1) + 1;
1877 if (bidi_it->type_after_wn == RLE)
1878 override = NEUTRAL_DIR;
1879 else
1880 override = R2L;
1881 bidi_push_embedding_level (bidi_it, new_level, override, false);
1882 bidi_it->resolved_level = new_level;
1884 else
1886 if (bidi_it->invalid_isolates == 0)
1887 bidi_it->invalid_levels++;
1889 break;
1890 case LRE: /* X3 */
1891 case LRO: /* X5 */
1892 bidi_it->type_after_wn = type;
1893 bidi_check_type (bidi_it->type_after_wn);
1894 type = WEAK_BN; /* X9/Retaining */
1895 if (new_level < BIDI_MAXDEPTH - 1
1896 && bidi_it->invalid_levels == 0
1897 && bidi_it->invalid_isolates == 0)
1899 /* Compute the least even embedding level greater than
1900 the current level. */
1901 new_level = ((new_level + 2) & ~1);
1902 if (bidi_it->type_after_wn == LRE)
1903 override = NEUTRAL_DIR;
1904 else
1905 override = L2R;
1906 bidi_push_embedding_level (bidi_it, new_level, override, false);
1907 bidi_it->resolved_level = new_level;
1909 else
1911 if (bidi_it->invalid_isolates == 0)
1912 bidi_it->invalid_levels++;
1914 break;
1915 case FSI: /* X5c */
1916 end = string_p ? bidi_it->string.schars : ZV;
1917 disp_pos = bidi_it->disp_pos;
1918 disp_prop = bidi_it->disp_prop;
1919 nchars = bidi_it->nchars;
1920 ch_len = bidi_it->ch_len;
1921 typ1 = find_first_strong_char (bidi_it->charpos,
1922 bidi_it->bytepos, end,
1923 &disp_pos, &disp_prop,
1924 &bidi_it->string, bidi_it->w,
1925 string_p, bidi_it->frame_window_p,
1926 &ch_len, &nchars, true);
1927 if (typ1 != STRONG_R && typ1 != STRONG_AL)
1929 type = LRI;
1930 goto fsi_as_lri;
1932 else
1933 type = RLI;
1934 /* FALLTHROUGH */
1935 case RLI: /* X5a */
1936 if (override == NEUTRAL_DIR)
1937 bidi_it->type_after_wn = type;
1938 else /* Unicode 8.0 correction. */
1939 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
1940 bidi_check_type (bidi_it->type_after_wn);
1941 break;
1942 case LRI: /* X5b */
1943 fsi_as_lri:
1944 if (override == NEUTRAL_DIR)
1945 bidi_it->type_after_wn = type;
1946 else /* Unicode 8.0 correction. */
1947 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
1948 bidi_check_type (bidi_it->type_after_wn);
1949 break;
1950 case PDI: /* X6a */
1951 if (bidi_it->invalid_isolates)
1952 bidi_it->invalid_isolates--;
1953 else if (bidi_it->isolate_level > 0)
1955 bidi_it->invalid_levels = 0;
1956 while (!bidi_it->level_stack[bidi_it->stack_idx].isolate_status)
1957 bidi_pop_embedding_level (bidi_it);
1958 eassert (bidi_it->stack_idx > 0);
1959 new_level = bidi_pop_embedding_level (bidi_it);
1960 bidi_it->isolate_level--;
1962 bidi_it->resolved_level = new_level;
1963 /* Unicode 8.0 correction. */
1964 if (bidi_it->level_stack[bidi_it->stack_idx].override == L2R)
1965 bidi_it->type_after_wn = STRONG_L;
1966 else if (bidi_it->level_stack[bidi_it->stack_idx].override == R2L)
1967 bidi_it->type_after_wn = STRONG_R;
1968 else
1969 bidi_it->type_after_wn = type;
1970 break;
1971 case PDF: /* X7 */
1972 bidi_it->type_after_wn = type;
1973 bidi_check_type (bidi_it->type_after_wn);
1974 type = WEAK_BN; /* X9/Retaining */
1975 break;
1976 default:
1977 /* Nothing. */
1978 break;
1981 bidi_it->type = type;
1982 bidi_check_type (bidi_it->type);
1984 if (bidi_it->type == NEUTRAL_B) /* X8 */
1986 bidi_set_paragraph_end (bidi_it);
1987 /* This is needed by bidi_resolve_weak below, and in L1. */
1988 bidi_it->type_after_wn = bidi_it->type;
1991 eassert (bidi_it->resolved_level >= 0);
1992 return bidi_it->resolved_level;
1995 static bool
1996 bidi_isolate_fmt_char (bidi_type_t ch_type)
1998 return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
2001 /* Advance in the buffer/string, resolve weak types and return the
2002 type of the next character after weak type resolution. */
2003 static bidi_type_t
2004 bidi_resolve_weak (struct bidi_it *bidi_it)
2006 bidi_type_t type;
2007 bidi_dir_t override;
2008 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2009 int new_level = bidi_resolve_explicit (bidi_it);
2010 int next_char;
2011 bidi_type_t type_of_next;
2012 struct bidi_it saved_it;
2013 ptrdiff_t eob
2014 = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
2015 ? bidi_it->string.schars : ZV);
2017 type = bidi_it->type;
2018 override = bidi_it->level_stack[bidi_it->stack_idx].override;
2020 eassert (!(type == UNKNOWN_BT
2021 || type == LRE
2022 || type == LRO
2023 || type == RLE
2024 || type == RLO
2025 || type == PDF));
2027 eassert (prev_level >= 0);
2028 if (bidi_it->type == NEUTRAL_B)
2030 /* We've got a new isolating sequence, compute the directional
2031 type of sos and initialize per-run variables (UAX#9, clause
2032 X10). */
2033 bidi_set_sos_type (bidi_it, prev_level, new_level);
2035 if (type == NEUTRAL_S || type == NEUTRAL_WS
2036 || type == WEAK_BN || type == STRONG_AL)
2037 bidi_it->type_after_wn = type; /* needed in L1 */
2038 bidi_check_type (bidi_it->type_after_wn);
2040 /* Level and directional override status are already recorded in
2041 bidi_it, and do not need any change; see X6. */
2042 if (override == R2L) /* X6 */
2043 type = STRONG_R;
2044 else if (override == L2R)
2045 type = STRONG_L;
2046 else
2048 if (type == WEAK_NSM) /* W1 */
2050 /* Note that we don't need to consider the case where the
2051 prev character has its type overridden by an RLO or LRO,
2052 because then either the type of this NSM would have been
2053 also overridden, or the previous character is outside the
2054 current level run, and thus not relevant to this NSM.
2055 This is why NSM gets the type_after_wn of the previous
2056 character. */
2057 /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT. */
2058 if (bidi_it->prev.type_after_wn != UNKNOWN_BT
2059 /* If type_after_wn is NEUTRAL_B, this NSM is at sos. */
2060 && bidi_it->prev.type_after_wn != NEUTRAL_B)
2062 if (bidi_isolate_fmt_char (bidi_it->prev.type_after_wn))
2064 /* From W1: "Note that in an isolating run sequence,
2065 an isolate initiator followed by an NSM or any
2066 type other than PDI must be an overflow isolate
2067 initiator." */
2068 eassert (bidi_it->invalid_isolates > 0);
2069 type = NEUTRAL_ON;
2071 else
2073 type = bidi_it->prev.type_after_wn;
2074 /* Unicode 8.0 correction for N0. */
2075 if (type == NEUTRAL_ON
2076 && bidi_it->prev.bracket_resolved
2077 && (bidi_it->prev.type == STRONG_L
2078 || bidi_it->prev.type == STRONG_R))
2079 type = bidi_it->prev.type;
2082 else if (bidi_it->sos == R2L)
2083 type = STRONG_R;
2084 else if (bidi_it->sos == L2R)
2085 type = STRONG_L;
2086 else /* shouldn't happen! */
2087 emacs_abort ();
2089 if (type == WEAK_EN /* W2 */
2090 && bidi_it->last_strong.type_after_wn == STRONG_AL)
2091 type = WEAK_AN;
2092 else if (type == STRONG_AL) /* W3 */
2093 type = STRONG_R;
2094 else if ((type == WEAK_ES /* W4 */
2095 && bidi_it->prev.type_after_wn == WEAK_EN
2096 && bidi_it->prev.orig_type == WEAK_EN)
2097 || (type == WEAK_CS
2098 && ((bidi_it->prev.type_after_wn == WEAK_EN
2099 && bidi_it->prev.orig_type == WEAK_EN)
2100 || bidi_it->prev.type_after_wn == WEAK_AN)))
2102 const unsigned char *s
2103 = (STRINGP (bidi_it->string.lstring)
2104 ? SDATA (bidi_it->string.lstring)
2105 : bidi_it->string.s);
2107 next_char = (bidi_it->charpos + bidi_it->nchars >= eob
2108 ? BIDI_EOB
2109 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
2110 s, bidi_it->string.unibyte));
2111 type_of_next = bidi_get_type (next_char, override);
2113 if (type_of_next == WEAK_BN
2114 || bidi_explicit_dir_char (next_char))
2116 bidi_copy_it (&saved_it, bidi_it);
2117 while (bidi_resolve_explicit (bidi_it) == new_level
2118 && bidi_it->type == WEAK_BN)
2119 type_of_next = bidi_it->type;
2120 bidi_copy_it (bidi_it, &saved_it);
2123 /* If the next character is EN, but the last strong-type
2124 character is AL, that next EN will be changed to AN when
2125 we process it in W2 above. So in that case, this ES
2126 should not be changed into EN. */
2127 if (type == WEAK_ES
2128 && type_of_next == WEAK_EN
2129 && bidi_it->last_strong.type_after_wn != STRONG_AL)
2130 type = WEAK_EN;
2131 else if (type == WEAK_CS)
2133 if (bidi_it->prev.type_after_wn == WEAK_AN
2134 && (type_of_next == WEAK_AN
2135 /* If the next character is EN, but the last
2136 strong-type character is AL, EN will be later
2137 changed to AN when we process it in W2 above.
2138 So in that case, this ES should not be
2139 changed into EN. */
2140 || (type_of_next == WEAK_EN
2141 && bidi_it->last_strong.type_after_wn == STRONG_AL)))
2142 type = WEAK_AN;
2143 else if (bidi_it->prev.type_after_wn == WEAK_EN
2144 && type_of_next == WEAK_EN
2145 && bidi_it->last_strong.type_after_wn != STRONG_AL)
2146 type = WEAK_EN;
2149 else if (type == WEAK_ET /* W5: ET with EN before or after it */
2150 || type == WEAK_BN) /* W5/Retaining */
2152 if (bidi_it->prev.type_after_wn == WEAK_EN) /* ET/BN w/EN before it */
2153 type = WEAK_EN;
2154 else if (bidi_it->next_en_pos > bidi_it->charpos
2155 && bidi_it->next_en_type != WEAK_BN)
2157 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
2158 type = WEAK_EN;
2160 else if (bidi_it->next_en_pos >=0)
2162 /* We overstepped the last known position for ET
2163 resolution but there could be other such characters
2164 in this paragraph (when we are sure there are no more
2165 such positions, we set next_en_pos to a negative
2166 value). Try to find the next position for ET
2167 resolution. */
2168 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
2169 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
2170 ? SDATA (bidi_it->string.lstring)
2171 : bidi_it->string.s);
2173 if (bidi_it->nchars <= 0)
2174 emacs_abort ();
2175 next_char
2176 = (bidi_it->charpos + bidi_it->nchars >= eob
2177 ? BIDI_EOB
2178 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
2179 bidi_it->string.unibyte));
2180 type_of_next = bidi_get_type (next_char, override);
2182 if (type_of_next == WEAK_ET
2183 || type_of_next == WEAK_BN
2184 || bidi_explicit_dir_char (next_char))
2186 bidi_copy_it (&saved_it, bidi_it);
2187 while (bidi_resolve_explicit (bidi_it) == new_level
2188 && (bidi_it->type == WEAK_BN
2189 || bidi_it->type == WEAK_ET))
2190 type_of_next = bidi_it->type;
2191 if (type == WEAK_BN
2192 && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
2194 /* If we entered the above loop with a BN that
2195 changes the level, the type of next
2196 character, which is in a different level, is
2197 not relevant to resolving this series of ET
2198 and BN. */
2199 en_pos = saved_it.charpos;
2200 type_of_next = type;
2202 else
2203 en_pos = bidi_it->charpos;
2204 bidi_copy_it (bidi_it, &saved_it);
2206 /* Remember this position, to speed up processing of the
2207 next ETs. */
2208 bidi_it->next_en_pos = en_pos;
2209 if (type_of_next == WEAK_EN)
2211 /* If the last strong character is AL, the EN we've
2212 found will become AN when we get to it (W2). */
2213 if (bidi_it->last_strong.type_after_wn == STRONG_AL)
2214 type_of_next = WEAK_AN;
2215 else if (type == WEAK_BN)
2216 type = NEUTRAL_ON; /* W6/Retaining */
2217 else
2218 type = WEAK_EN;
2220 else if (type_of_next == NEUTRAL_B)
2221 /* Record the fact that there are no more ENs from
2222 here to the end of paragraph, to avoid entering the
2223 loop above ever again in this paragraph. */
2224 bidi_it->next_en_pos = -1;
2225 /* Record the type of the character where we ended our search. */
2226 bidi_it->next_en_type = type_of_next;
2231 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
2232 || (type == WEAK_BN
2233 && (bidi_it->prev.type_after_wn == WEAK_CS /* W6/Retaining */
2234 || bidi_it->prev.type_after_wn == WEAK_ES
2235 || bidi_it->prev.type_after_wn == WEAK_ET)))
2236 type = NEUTRAL_ON;
2238 /* Store the type we've got so far, before we clobber it with strong
2239 types in W7 and while resolving neutral types. But leave alone
2240 the original types that were recorded above, because we will need
2241 them for the L1 clause. */
2242 if (bidi_it->type_after_wn == UNKNOWN_BT)
2243 bidi_it->type_after_wn = type;
2244 bidi_check_type (bidi_it->type_after_wn);
2246 if (type == WEAK_EN) /* W7 */
2248 if ((bidi_it->last_strong.type_after_wn == STRONG_L)
2249 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
2250 type = STRONG_L;
2253 bidi_it->type = type;
2254 bidi_check_type (bidi_it->type);
2255 return type;
2258 /* Resolve the type of a neutral character according to the type of
2259 surrounding strong text and the current embedding level. */
2260 static bidi_type_t
2261 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2263 /* N1: "European and Arabic numbers act as if they were R in terms
2264 of their influence on NIs." */
2265 if (next_type == WEAK_EN || next_type == WEAK_AN)
2266 next_type = STRONG_R;
2267 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
2268 prev_type = STRONG_R;
2270 if (next_type == prev_type) /* N1 */
2271 return next_type;
2272 else if ((lev & 1) == 0) /* N2 */
2273 return STRONG_L;
2274 else
2275 return STRONG_R;
2278 static bidi_bracket_type_t
2279 bidi_paired_bracket_type (int c)
2281 if (c == BIDI_EOB)
2282 return BIDI_BRACKET_NONE;
2283 if (c < 0 || c > MAX_CHAR)
2284 emacs_abort ();
2286 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
2289 #define FLAG_EMBEDDING_INSIDE 1
2290 #define FLAG_OPPOSITE_INSIDE 2
2291 #define FLAG_EMBEDDING_OUTSIDE 4
2292 #define FLAG_OPPOSITE_OUTSIDE 8
2294 /* A data type used in the stack maintained by
2295 bidi_resolve_bracket_pairs below. */
2296 typedef struct bpa_stack_entry {
2297 int close_bracket_char;
2298 int open_bracket_idx;
2299 #ifdef ENABLE_CHECKING
2300 ptrdiff_t open_bracket_pos;
2301 #endif
2302 unsigned flags : 4;
2303 } bpa_stack_entry;
2305 /* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2306 BPA stack, which should be more than enough for actual bidi text. */
2307 #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2309 #ifdef ENABLE_CHECKING
2310 # define STORE_BRACKET_CHARPOS \
2311 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2312 #else
2313 # define STORE_BRACKET_CHARPOS /* nothing */
2314 #endif
2316 #define PUSH_BPA_STACK(EMBEDDING_LEVEL, LAST_STRONG) \
2317 do { \
2318 bpa_sp++; \
2319 if (bpa_sp >= MAX_BPA_STACK) \
2320 goto bpa_give_up; \
2321 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \
2322 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2323 STORE_BRACKET_CHARPOS; \
2324 if (((EMBEDDING_LEVEL) & 1) == 0) \
2325 bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \
2326 ? FLAG_EMBEDDING_OUTSIDE \
2327 : FLAG_OPPOSITE_OUTSIDE); \
2328 else \
2329 bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \
2330 ? FLAG_OPPOSITE_OUTSIDE \
2331 : FLAG_EMBEDDING_OUTSIDE); \
2332 } while (0)
2335 /* This function implements BPA, the Bidi Parenthesis Algorithm,
2336 described in BD16 and N0 of UAX#9. */
2337 static bidi_type_t
2338 bidi_resolve_bracket_pairs (struct bidi_it *bidi_it)
2340 bidi_bracket_type_t btype;
2341 bidi_type_t type = bidi_it->type;
2343 /* When scanning backwards, we don't expect any unresolved bidi
2344 bracket characters. */
2345 if (bidi_it->scan_dir != 1)
2346 emacs_abort ();
2348 btype = bidi_paired_bracket_type (bidi_it->ch);
2349 if (btype == BIDI_BRACKET_OPEN)
2351 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2352 int bpa_sp = -1;
2353 struct bidi_it saved_it;
2354 bidi_type_t last_strong;
2355 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2357 eassert (MAX_BPA_STACK >= 100);
2358 bidi_copy_it (&saved_it, bidi_it);
2359 last_strong = bidi_it->prev_for_neutral.type;
2361 while (1)
2363 int old_sidx, new_sidx;
2364 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2366 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2367 if (btype == BIDI_BRACKET_OPEN)
2368 PUSH_BPA_STACK (embedding_level, last_strong);
2369 else if (btype == BIDI_BRACKET_CLOSE)
2371 int sp = bpa_sp;
2372 int curchar = bidi_it->ch;
2374 eassert (sp >= 0);
2375 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2376 sp--;
2377 if (sp >= 0)
2379 /* Resolve the bracket type according to N0. */
2380 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE) /* N0b */
2381 type = ((embedding_level & 1) ? STRONG_R : STRONG_L);
2382 else if ((bpa_stack[sp].flags /* N0c1 */
2383 & (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE))
2384 == (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE))
2385 type = ((embedding_level & 1) ? STRONG_L : STRONG_R);
2386 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE) /*N0c2*/
2387 type = ((embedding_level & 1) ? STRONG_R : STRONG_L);
2389 /* Update and cache the closing bracket. */
2390 bidi_it->type = type;
2391 bidi_it->bracket_resolved = 1;
2392 bidi_cache_iterator_state (bidi_it, 0);
2393 /* Update and cache the corresponding opening bracket. */
2394 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2395 bidi_it);
2396 #ifdef ENABLE_CHECKING
2397 eassert (bpa_stack[sp].open_bracket_pos == bidi_it->charpos);
2398 #endif
2399 bidi_it->type = type;
2400 bidi_it->bracket_resolved = 1;
2401 bidi_cache_iterator_state (bidi_it, 0);
2402 bpa_sp = sp - 1;
2403 if (bpa_sp < 0)
2404 break;
2406 else
2407 bidi_it->bracket_resolved = 1;
2409 else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
2411 unsigned flag;
2412 int sp;
2414 /* Update the "inside" flags of all the slots on the stack. */
2415 switch (bidi_it->type)
2417 case STRONG_L:
2418 flag = ((embedding_level & 1) == 0
2419 ? FLAG_EMBEDDING_INSIDE
2420 : FLAG_OPPOSITE_INSIDE);
2421 last_strong = STRONG_L;
2422 break;
2423 case STRONG_R:
2424 case WEAK_EN:
2425 case WEAK_AN:
2426 flag = ((embedding_level & 1) == 1
2427 ? FLAG_EMBEDDING_INSIDE
2428 : FLAG_OPPOSITE_INSIDE);
2429 last_strong = STRONG_R;
2430 break;
2431 default:
2432 break;
2434 for (sp = bpa_sp; sp >= 0; sp--)
2435 bpa_stack[sp].flags |= flag;
2436 /* FIXME: Pay attention to types that can be
2437 next_for_neutral, and when found, update cached
2438 states for which it is relevant. */
2440 /* Record the info about the previous character, so that it
2441 will be cached with this state. */
2442 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
2443 && bidi_it->type != WEAK_BN)
2444 bidi_remember_char (&bidi_it->prev, bidi_it);
2445 old_sidx = bidi_it->stack_idx;
2446 type = bidi_resolve_weak (bidi_it);
2447 /* Skip level runs excluded from this isolating run sequence. */
2448 new_sidx = bidi_it->stack_idx;
2449 if (bidi_it->level_stack[new_sidx].level > current_level
2450 && (bidi_it->level_stack[new_sidx].isolate_status
2451 || (new_sidx > old_sidx + 1
2452 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2454 while (bidi_it->level_stack[bidi_it->stack_idx].level
2455 > current_level)
2457 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2458 type = bidi_resolve_weak (bidi_it);
2461 if (type == NEUTRAL_B
2462 || (bidi_it->level_stack[bidi_it->stack_idx].level
2463 != current_level))
2465 bpa_give_up:
2466 /* We've marched all the way to the end of this
2467 isolating run sequence, and didn't find matching
2468 closing brackets for some opening brackets. Unwind
2469 whatever is left on the BPA stack, and mark each
2470 bracket there as BPA-resolved. */
2471 while (bpa_sp >= 0)
2473 bidi_cache_fetch_state (bpa_stack[bpa_sp].open_bracket_idx,
2474 bidi_it);
2475 #ifdef ENABLE_CHECKING
2476 eassert (bpa_stack[bpa_sp].open_bracket_pos
2477 == bidi_it->charpos);
2478 #endif
2479 bidi_it->bracket_resolved = 1;
2480 bidi_cache_iterator_state (bidi_it, 0);
2481 bpa_sp--;
2483 type = saved_it.type;
2484 break;
2486 if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
2487 btype = bidi_paired_bracket_type (bidi_it->ch);
2488 else
2489 btype = BIDI_BRACKET_NONE;
2491 bidi_check_type (type);
2493 bidi_copy_it (bidi_it, &saved_it);
2494 bidi_it->type = type;
2495 bidi_it->bracket_resolved = 1;
2498 return type;
2501 static bidi_type_t
2502 bidi_resolve_neutral (struct bidi_it *bidi_it)
2504 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2505 bidi_type_t type = bidi_resolve_weak (bidi_it);
2506 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2507 bool is_neutral;
2508 int ch = bidi_it->ch;
2510 eassert (type == STRONG_R
2511 || type == STRONG_L
2512 || type == WEAK_BN
2513 || type == WEAK_EN
2514 || type == WEAK_AN
2515 || type == NEUTRAL_B
2516 || type == NEUTRAL_S
2517 || type == NEUTRAL_WS
2518 || type == NEUTRAL_ON
2519 || type == LRI
2520 || type == RLI
2521 || type == PDI);
2523 eassert (prev_level >= 0);
2524 eassert (current_level >= 0);
2526 /* FIXME: Insert the code for N0 here. */
2527 if (type == NEUTRAL_ON
2528 && bidi_paired_bracket_type (ch) != BIDI_BRACKET_NONE)
2530 if (bidi_cache_idx > bidi_cache_start
2531 && bidi_cache_find (bidi_it->charpos, 1, bidi_it) != UNKNOWN_BT
2532 && bidi_it->bracket_resolved)
2533 type = bidi_it->type;
2534 else
2535 type = bidi_resolve_bracket_pairs (bidi_it);
2538 is_neutral = bidi_get_category (type) == NEUTRAL;
2540 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2541 we are already at paragraph end. */
2542 && (is_neutral || bidi_isolate_fmt_char (type)))
2543 /* N1-N2/Retaining */
2544 || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
2546 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
2547 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2548 bidi_it->next_for_neutral.type,
2549 current_level);
2550 /* The next two "else if" clauses are shortcuts for the
2551 important special case when we have a long sequence of
2552 neutral or WEAK_BN characters, such as whitespace or nulls or
2553 other control characters, on the base embedding level of the
2554 paragraph, and that sequence goes all the way to the end of
2555 the paragraph and follows a character whose resolved
2556 directionality is identical to the base embedding level.
2557 (This is what happens in a buffer with plain L2R text that
2558 happens to include long sequences of control characters.) By
2559 virtue of N1, the result of examining this long sequence will
2560 always be either STRONG_L or STRONG_R, depending on the base
2561 embedding level. So we use this fact directly instead of
2562 entering the expensive loop in the "else" clause. */
2563 else if (current_level == 0
2564 && bidi_it->prev_for_neutral.type == STRONG_L
2565 && !bidi_explicit_dir_char (bidi_it->ch)
2566 && !bidi_isolate_fmt_char (type))
2567 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2568 STRONG_L, current_level);
2569 else if (/* current level is 1 */
2570 current_level == 1
2571 /* base embedding level is also 1 */
2572 && bidi_it->level_stack[0].level == 1
2573 /* previous character is one of those considered R for
2574 the purposes of W5 */
2575 && (bidi_it->prev_for_neutral.type == STRONG_R
2576 || bidi_it->prev_for_neutral.type == WEAK_EN
2577 || bidi_it->prev_for_neutral.type == WEAK_AN)
2578 && !bidi_explicit_dir_char (bidi_it->ch)
2579 && !bidi_isolate_fmt_char (type))
2580 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2581 STRONG_R, current_level);
2582 else
2584 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
2585 the assumption of batch-style processing; see clauses W4,
2586 W5, and especially N1, which require to look far forward
2587 (as well as back) in the buffer/string. May the fleas of
2588 a thousand camels infest the armpits of those who design
2589 supposedly general-purpose algorithms by looking at their
2590 own implementations, and fail to consider other possible
2591 implementations! */
2592 struct bidi_it saved_it;
2593 bidi_type_t next_type;
2594 bool adjacent_to_neutrals = is_neutral;
2596 if (bidi_it->scan_dir == -1)
2597 emacs_abort ();
2599 bidi_copy_it (&saved_it, bidi_it);
2600 /* Scan the text forward until we find the first non-neutral
2601 character, and then use that to resolve the neutral we
2602 are dealing with now. We also cache the scanned iterator
2603 states, to salvage some of the effort later. */
2604 do {
2605 int old_sidx, new_sidx;
2607 /* Paragraph separators have their levels fully resolved
2608 at this point, so cache them as resolved. */
2609 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2610 /* Record the info about the previous character, so that
2611 it will be cached with this state. */
2612 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
2613 && bidi_it->type != WEAK_BN)
2614 bidi_remember_char (&bidi_it->prev, bidi_it);
2615 old_sidx = bidi_it->stack_idx;
2616 type = bidi_resolve_weak (bidi_it);
2617 /* Skip level runs excluded from this isolating run sequence. */
2618 new_sidx = bidi_it->stack_idx;
2619 if (bidi_it->level_stack[new_sidx].level > current_level
2620 && (bidi_it->level_stack[new_sidx].isolate_status
2621 /* This is for when we have an isolate initiator
2622 immediately followed by an embedding or
2623 override initiator, in which case we get the
2624 level stack pushed twice by the single call to
2625 bidi_resolve_weak above. */
2626 || (new_sidx > old_sidx + 1
2627 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2629 while (bidi_it->level_stack[bidi_it->stack_idx].level
2630 > current_level)
2632 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2633 type = bidi_resolve_weak (bidi_it);
2636 if (!adjacent_to_neutrals
2637 && (bidi_get_category (type) == NEUTRAL
2638 || bidi_isolate_fmt_char (type)))
2639 adjacent_to_neutrals = true;
2640 } while (!(type == NEUTRAL_B
2641 || (type != WEAK_BN
2642 && bidi_get_category (type) != NEUTRAL
2643 && !bidi_isolate_fmt_char (type))
2644 /* This is all per level run, so stop when we
2645 reach the end of this level run. */
2646 || (bidi_it->level_stack[bidi_it->stack_idx].level
2647 != current_level)));
2649 /* Record the character we stopped at. */
2650 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
2652 if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
2653 || type == NEUTRAL_B)
2655 /* Marched all the way to the end of this level run. We
2656 need to use the eos type, whose information is stored
2657 by bidi_set_sos_type in the prev_for_neutral
2658 member. */
2659 if (adjacent_to_neutrals)
2660 next_type = bidi_it->prev_for_neutral.type;
2661 else
2663 /* This is a BN which does not adjoin neutrals.
2664 Leave its type alone. */
2665 bidi_copy_it (bidi_it, &saved_it);
2666 return bidi_it->type;
2669 else
2671 switch (type)
2673 case STRONG_L:
2674 case STRONG_R:
2675 case STRONG_AL:
2676 /* Actually, STRONG_AL cannot happen here, because
2677 bidi_resolve_weak converts it to STRONG_R, per W3. */
2678 eassert (type != STRONG_AL);
2679 next_type = type;
2680 break;
2681 case WEAK_EN:
2682 case WEAK_AN:
2683 /* N1: "European and Arabic numbers act as if they
2684 were R in terms of their influence on NIs." */
2685 next_type = STRONG_R;
2686 break;
2687 default:
2688 emacs_abort ();
2689 break;
2692 /* Resolve the type of all the NIs found during the above loop. */
2693 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
2694 next_type, current_level);
2695 /* Update next_for_neutral with the resolved type, so we
2696 could use it for all the other NIs up to the place where
2697 we exited the loop. */
2698 saved_it.next_for_neutral.type = next_type;
2699 bidi_check_type (type);
2700 /* Update the character which caused us to enter the above loop. */
2701 saved_it.type = type;
2702 bidi_check_type (next_type);
2703 bidi_copy_it (bidi_it, &saved_it);
2706 return type;
2709 /* Given an iterator state in BIDI_IT, advance one character position
2710 in the buffer/string to the next character (in the logical order),
2711 resolve the bidi type of that next character, and return that
2712 type. */
2713 static bidi_type_t
2714 bidi_type_of_next_char (struct bidi_it *bidi_it)
2716 bidi_type_t type;
2718 /* This should always be called during a forward scan. */
2719 if (bidi_it->scan_dir != 1)
2720 emacs_abort ();
2722 type = bidi_resolve_neutral (bidi_it);
2724 return type;
2727 /* Given an iterator state BIDI_IT, advance one character position in
2728 the buffer/string to the next character (in the current scan
2729 direction), resolve the embedding and implicit levels of that next
2730 character, and return the resulting level. */
2731 static int
2732 bidi_level_of_next_char (struct bidi_it *bidi_it)
2734 bidi_type_t type;
2735 int level, prev_level = -1;
2736 struct bidi_saved_info next_for_neutral;
2737 ptrdiff_t next_char_pos = -2;
2738 bool need_to_update_cache = false;
2740 if (bidi_it->scan_dir == 1)
2742 ptrdiff_t eob
2743 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2744 ? bidi_it->string.schars : ZV);
2746 /* There's no sense in trying to advance if we hit end of text. */
2747 if (bidi_it->charpos >= eob)
2749 eassert (bidi_it->resolved_level >= 0);
2750 return bidi_it->resolved_level;
2753 /* Record the info about the previous character. */
2754 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
2755 && bidi_it->type != WEAK_BN)
2756 bidi_remember_char (&bidi_it->prev, bidi_it);
2757 if (bidi_it->type_after_wn == STRONG_R
2758 || bidi_it->type_after_wn == STRONG_L
2759 || bidi_it->type_after_wn == STRONG_AL)
2760 bidi_remember_char (&bidi_it->last_strong, bidi_it);
2761 /* FIXME: it sounds like we don't need both prev and
2762 prev_for_neutral members, but I'm leaving them both for now. */
2763 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2764 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2765 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2767 /* If we overstepped the characters used for resolving neutrals
2768 and whitespace, invalidate their info in the iterator. */
2769 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2770 bidi_it->next_for_neutral.type = UNKNOWN_BT;
2771 if (bidi_it->next_en_pos >= 0
2772 && bidi_it->charpos >= bidi_it->next_en_pos)
2774 bidi_it->next_en_pos = 0;
2775 bidi_it->next_en_type = UNKNOWN_BT;
2777 if (bidi_it->next_for_ws.type != UNKNOWN_BT
2778 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2779 bidi_it->next_for_ws.type = UNKNOWN_BT;
2781 /* This must be taken before we fill the iterator with the info
2782 about the next char. If we scan backwards, the iterator
2783 state must be already cached, so there's no need to know the
2784 embedding level of the previous character, since we will be
2785 returning to our caller shortly. */
2786 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2787 eassert (prev_level >= 0);
2789 next_for_neutral = bidi_it->next_for_neutral;
2791 /* Perhaps the character we want is already cached. If it is, the
2792 call to bidi_cache_find below will return a type other than
2793 UNKNOWN_BT. */
2794 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2796 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2797 ? 0 : 1);
2798 bidi_type_t prev_type = bidi_it->type;
2799 bidi_type_t type_for_neutral = bidi_it->next_for_neutral.type;
2800 ptrdiff_t pos_for_neutral = bidi_it->next_for_neutral.charpos;
2802 if (bidi_it->scan_dir > 0)
2804 if (bidi_it->nchars <= 0)
2805 emacs_abort ();
2806 next_char_pos = bidi_it->charpos + bidi_it->nchars;
2808 else if (bidi_it->charpos >= bob)
2809 /* Implementation note: we allow next_char_pos to be as low as
2810 0 for buffers or -1 for strings, and that is okay because
2811 that's the "position" of the sentinel iterator state we
2812 cached at the beginning of the iteration. */
2813 next_char_pos = bidi_it->charpos - 1;
2814 if (next_char_pos >= bob - 1)
2815 type = bidi_cache_find (next_char_pos, 0, bidi_it);
2816 else
2817 type = UNKNOWN_BT;
2819 /* For a sequence of BN and NI, copy the type from the previous
2820 character. This is because the loop in bidi_resolve_neutral
2821 that handles such sequences caches the characters it
2822 traverses, but does not (and cannot) store the
2823 next_for_neutral member for them, because it is only known
2824 when the loop ends. So when we find them in the cache, their
2825 type needs to be updated, but we don't have next_for_neutral
2826 to do that. However, whatever type is resolved as result of
2827 that loop, it will be the same for all the traversed
2828 characters, by virtue of N1 and N2. */
2829 if (type == WEAK_BN && bidi_it->scan_dir > 0
2830 && bidi_explicit_dir_char (bidi_it->ch)
2831 && type_for_neutral != UNKNOWN_BT
2832 && bidi_it->charpos < pos_for_neutral)
2834 type = prev_type;
2835 eassert (type != UNKNOWN_BT);
2838 else
2839 type = UNKNOWN_BT;
2840 if (type != UNKNOWN_BT)
2842 /* Don't lose the information for resolving neutrals! The
2843 cached states could have been cached before their
2844 next_for_neutral member was computed. If we are on our way
2845 forward, we can simply take the info from the previous
2846 state. */
2847 if (bidi_it->scan_dir == 1
2848 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2849 bidi_it->next_for_neutral = next_for_neutral;
2851 /* If resolved_level is -1, it means this state was cached
2852 before it was completely resolved, so we cannot return
2853 it. */
2854 if (bidi_it->resolved_level != -1)
2856 eassert (bidi_it->resolved_level >= 0);
2857 return bidi_it->resolved_level;
2859 else
2861 /* Take note when we've got a cached state that is not fully
2862 resolved, so that we could make sure we update the cache
2863 below, when we do resolve it. */
2864 need_to_update_cache = true;
2867 if (bidi_it->scan_dir == -1)
2868 /* If we are going backwards, the iterator state is already cached
2869 from previous scans, and should be fully resolved. */
2870 emacs_abort ();
2872 if (type == UNKNOWN_BT)
2873 type = bidi_type_of_next_char (bidi_it);
2875 if (type == NEUTRAL_B)
2877 eassert (bidi_it->resolved_level >= 0);
2878 return bidi_it->resolved_level;
2881 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2882 if (bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */
2883 || bidi_isolate_fmt_char (type))
2885 /* Make sure the data for resolving neutrals we are about to use
2886 is valid. */
2887 if (bidi_it->next_for_neutral.charpos <= bidi_it->charpos
2888 || bidi_it->next_for_neutral.type == UNKNOWN_BT)
2889 emacs_abort ();
2891 /* If the cached state shows a neutral character, it was not
2892 resolved by bidi_resolve_neutral, so do it now. */
2893 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2894 bidi_it->next_for_neutral.type,
2895 level);
2898 eassert ((type == STRONG_R
2899 || type == STRONG_L
2900 || type == WEAK_BN
2901 || type == WEAK_EN
2902 || type == WEAK_AN));
2903 bidi_it->type = type;
2904 bidi_check_type (bidi_it->type);
2906 /* For L1 below, we need to know, for each WS character, whether
2907 it belongs to a sequence of WS characters preceding a newline
2908 or a TAB or a paragraph separator. */
2909 if ((bidi_it->orig_type == NEUTRAL_WS
2910 || bidi_isolate_fmt_char (bidi_it->orig_type))
2911 && bidi_it->next_for_ws.type == UNKNOWN_BT)
2913 int ch;
2914 ptrdiff_t clen = bidi_it->ch_len;
2915 ptrdiff_t bpos = bidi_it->bytepos;
2916 ptrdiff_t cpos = bidi_it->charpos;
2917 ptrdiff_t disp_pos = bidi_it->disp_pos;
2918 ptrdiff_t nc = bidi_it->nchars;
2919 struct bidi_string_data bs = bidi_it->string;
2920 bidi_type_t chtype;
2921 bool fwp = bidi_it->frame_window_p;
2922 int dpp = bidi_it->disp_prop;
2924 if (bidi_it->nchars <= 0)
2925 emacs_abort ();
2926 do {
2927 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
2928 bidi_it->w, fwp, &clen, &nc);
2929 chtype = bidi_get_type (ch, NEUTRAL_DIR);
2930 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2931 || bidi_isolate_fmt_char (chtype)
2932 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2933 bidi_it->next_for_ws.type = chtype;
2934 bidi_check_type (bidi_it->next_for_ws.type);
2935 bidi_it->next_for_ws.charpos = cpos;
2938 /* Resolve implicit levels. */
2939 if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2940 || bidi_it->orig_type == NEUTRAL_S
2941 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2942 || (bidi_it->orig_type == NEUTRAL_WS
2943 && (bidi_it->next_for_ws.type == NEUTRAL_B
2944 || bidi_it->next_for_ws.type == NEUTRAL_S)))
2945 level = bidi_it->level_stack[0].level;
2946 else if ((level & 1) == 0) /* I1 */
2948 if (type == STRONG_R)
2949 level++;
2950 else if (type == WEAK_EN || type == WEAK_AN)
2951 level += 2;
2953 else /* I2 */
2955 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2956 level++;
2959 bidi_it->resolved_level = level;
2960 if (need_to_update_cache)
2961 bidi_cache_iterator_state (bidi_it, 1);
2962 return level;
2965 /* Move to the other edge of a level given by LEVEL. If END_FLAG,
2966 we are at the end of a level, and we need to prepare to
2967 resume the scan of the lower level.
2969 If this level's other edge is cached, we simply jump to it, filling
2970 the iterator structure with the iterator state on the other edge.
2971 Otherwise, we walk the buffer or string until we come back to the
2972 same level as LEVEL.
2974 Note: we are not talking here about a ``level run'' in the UAX#9
2975 sense of the term, but rather about a ``level'' which includes
2976 all the levels higher than it. In other words, given the levels
2977 like this:
2979 11111112222222333333334443343222222111111112223322111
2980 A B C
2982 and assuming we are at point A scanning left to right, this
2983 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2984 at point B. */
2985 static void
2986 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2988 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2989 ptrdiff_t idx;
2991 /* Try the cache first. */
2992 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2993 >= bidi_cache_start)
2994 bidi_cache_fetch_state (idx, bidi_it);
2995 else
2997 int new_level;
2999 /* If we are at end of level, its edges must be cached. */
3000 if (end_flag)
3001 emacs_abort ();
3003 bidi_cache_iterator_state (bidi_it, 1);
3004 do {
3005 new_level = bidi_level_of_next_char (bidi_it);
3006 bidi_cache_iterator_state (bidi_it, 1);
3007 } while (new_level >= level);
3011 void
3012 bidi_move_to_visually_next (struct bidi_it *bidi_it)
3014 int old_level, new_level, next_level;
3015 struct bidi_it sentinel;
3016 struct gcpro gcpro1;
3018 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
3019 emacs_abort ();
3021 if (bidi_it->scan_dir == 0)
3023 bidi_it->scan_dir = 1; /* default to logical order */
3026 /* The code below can call eval, and thus cause GC. If we are
3027 iterating a Lisp string, make sure it won't be GCed. */
3028 if (STRINGP (bidi_it->string.lstring))
3029 GCPRO1 (bidi_it->string.lstring);
3031 /* If we just passed a newline, initialize for the next line. */
3032 if (!bidi_it->first_elt
3033 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3034 bidi_line_init (bidi_it);
3036 /* Prepare the sentinel iterator state, and cache it. When we bump
3037 into it, scanning backwards, we'll know that the last non-base
3038 level is exhausted. */
3039 if (bidi_cache_idx == bidi_cache_start)
3041 bidi_copy_it (&sentinel, bidi_it);
3042 if (bidi_it->first_elt)
3044 sentinel.charpos--; /* cached charpos needs to be monotonic */
3045 sentinel.bytepos--;
3046 sentinel.ch = '\n'; /* doesn't matter, but why not? */
3047 sentinel.ch_len = 1;
3048 sentinel.nchars = 1;
3050 bidi_cache_iterator_state (&sentinel, 1);
3053 old_level = bidi_it->resolved_level;
3054 new_level = bidi_level_of_next_char (bidi_it);
3056 /* Reordering of resolved levels (clause L2) is implemented by
3057 jumping to the other edge of the level and flipping direction of
3058 scanning the text whenever we find a level change. */
3059 if (new_level != old_level)
3061 bool ascending = new_level > old_level;
3062 int level_to_search = ascending ? old_level + 1 : old_level;
3063 int incr = ascending ? 1 : -1;
3064 int expected_next_level = old_level + incr;
3066 /* Jump (or walk) to the other edge of this level. */
3067 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3068 /* Switch scan direction and peek at the next character in the
3069 new direction. */
3070 bidi_it->scan_dir = -bidi_it->scan_dir;
3072 /* The following loop handles the case where the resolved level
3073 jumps by more than one. This is typical for numbers inside a
3074 run of text with left-to-right embedding direction, but can
3075 also happen in other situations. In those cases the decision
3076 where to continue after a level change, and in what direction,
3077 is tricky. For example, given a text like below:
3079 abcdefgh
3080 11336622
3082 (where the numbers below the text show the resolved levels),
3083 the result of reordering according to UAX#9 should be this:
3085 efdcghba
3087 This is implemented by the loop below which flips direction
3088 and jumps to the other edge of the level each time it finds
3089 the new level not to be the expected one. The expected level
3090 is always one more or one less than the previous one. */
3091 next_level = bidi_peek_at_next_level (bidi_it);
3092 while (next_level != expected_next_level)
3094 /* If next_level is -1, it means we have an unresolved level
3095 in the cache, which at this point should not happen. If
3096 it does, we will infloop. */
3097 eassert (next_level >= 0);
3098 /* If next_level is not consistent with incr, we might
3099 infloop. */
3100 eassert (incr > 0
3101 ? next_level > expected_next_level
3102 : next_level < expected_next_level);
3103 expected_next_level += incr;
3104 level_to_search += incr;
3105 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3106 bidi_it->scan_dir = -bidi_it->scan_dir;
3107 next_level = bidi_peek_at_next_level (bidi_it);
3110 /* Finally, deliver the next character in the new direction. */
3111 next_level = bidi_level_of_next_char (bidi_it);
3114 /* Take note when we have just processed the newline that precedes
3115 the end of the paragraph. The next time we are about to be
3116 called, set_iterator_to_next will automatically reinit the
3117 paragraph direction, if needed. We do this at the newline before
3118 the paragraph separator, because the next character might not be
3119 the first character of the next paragraph, due to the bidi
3120 reordering, whereas we _must_ know the paragraph base direction
3121 _before_ we process the paragraph's text, since the base
3122 direction affects the reordering. */
3123 if (bidi_it->scan_dir == 1
3124 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3126 /* The paragraph direction of the entire string, once
3127 determined, is in effect for the entire string. Setting the
3128 separator limit to the end of the string prevents
3129 bidi_paragraph_init from being called automatically on this
3130 string. */
3131 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3132 bidi_it->separator_limit = bidi_it->string.schars;
3133 else if (bidi_it->bytepos < ZV_BYTE)
3135 ptrdiff_t sep_len
3136 = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
3137 bidi_it->bytepos + bidi_it->ch_len);
3138 if (bidi_it->nchars <= 0)
3139 emacs_abort ();
3140 if (sep_len >= 0)
3142 bidi_it->new_paragraph = 1;
3143 /* Record the buffer position of the last character of the
3144 paragraph separator. */
3145 bidi_it->separator_limit
3146 = bidi_it->charpos + bidi_it->nchars + sep_len;
3151 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
3153 /* If we are at paragraph's base embedding level and beyond the
3154 last cached position, the cache's job is done and we can
3155 discard it. */
3156 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3157 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
3158 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
3159 bidi_cache_reset ();
3160 /* But as long as we are caching during forward scan, we must
3161 cache each state, or else the cache integrity will be
3162 compromised: it assumes cached states correspond to buffer
3163 positions 1:1. */
3164 else
3165 bidi_cache_iterator_state (bidi_it, 1);
3168 if (STRINGP (bidi_it->string.lstring))
3169 UNGCPRO;
3172 /* This is meant to be called from within the debugger, whenever you
3173 wish to examine the cache contents. */
3174 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
3175 void
3176 bidi_dump_cached_states (void)
3178 ptrdiff_t i;
3179 int ndigits = 1;
3181 if (bidi_cache_idx == 0)
3183 fprintf (stderr, "The cache is empty.\n");
3184 return;
3186 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
3187 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
3189 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
3190 ndigits++;
3191 fputs ("ch ", stderr);
3192 for (i = 0; i < bidi_cache_idx; i++)
3193 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
3194 fputs ("\n", stderr);
3195 fputs ("lvl ", stderr);
3196 for (i = 0; i < bidi_cache_idx; i++)
3197 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
3198 fputs ("\n", stderr);
3199 fputs ("pos ", stderr);
3200 for (i = 0; i < bidi_cache_idx; i++)
3201 fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
3202 fputs ("\n", stderr);