* bidi.c (bidi_find_bracket_pairs): Initialize local var.
[emacs.git] / src / bidi.c
blob67eb59e7899c4fe3a19ea6e0eec6f42c226adf9a
1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software
3 Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the Reference Implementation and most other implementations,
26 this one is designed to be called once for every character in the
27 buffer or string. That way, we can leave intact the design of the
28 Emacs display engine, whereby an iterator object is used to
29 traverse buffer or string text character by character, and generate
30 the necessary data for displaying each character in 'struct glyph'
31 objects. (See xdisp.c for the details of that iteration.) The
32 functions on this file replace the original linear iteration in the
33 logical order of the text with a non-linear iteration in the visual
34 order, i.e. in the order characters should be shown on display.
36 The main entry point is bidi_move_to_visually_next. Each time it
37 is called, it finds the next character in the visual order, and
38 returns its information in a special structure. The caller is then
39 expected to process this character for display or any other
40 purposes, and call bidi_move_to_visually_next for the next
41 character. See the comments in bidi_move_to_visually_next for more
42 details about its algorithm that finds the next visual-order
43 character by resolving their levels on the fly.
45 Two other entry points are bidi_paragraph_init and
46 bidi_mirror_char. The first determines the base direction of a
47 paragraph, while the second returns the mirrored version of its
48 argument character.
50 A few auxiliary entry points are used to initialize the bidi
51 iterator for iterating an object (buffer or string), push and pop
52 the bidi iterator state, and save and restore the state of the bidi
53 cache.
55 If you want to understand the code, you will have to read it
56 together with the relevant portions of UAX#9. The comments include
57 references to UAX#9 rules, for that very reason.
59 A note about references to UAX#9 rules: if the reference says
60 something like "X9/Retaining", it means that you need to refer to
61 rule X9 and to its modifications described in the "Implementation
62 Notes" section of UAX#9, under "Retaining Format Codes".
64 Here's the overview of the design of the reordering engine
65 implemented by this file.
67 Basic implementation structure
68 ------------------------------
70 The sequential processing steps described by UAX#9 are implemented
71 as recursive levels of processing, all of which examine the next
72 character in the logical order. This hierarchy of processing looks
73 as follows, from the innermost (deepest) to the outermost level,
74 omitting some subroutines used by each level:
76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_brackets -- resolve "paired brackets" neutral types
80 bidi_resolve_neutral -- resolve neutral types
81 bidi_level_of_next_char -- resolve implicit levels
83 Each level calls the level below it, and works on the result
84 returned by the lower level, including all of its sub-levels.
86 Unlike all the levels below it, bidi_level_of_next_char can return
87 the information about either the next or previous character in the
88 logical order, depending on the current direction of scanning the
89 buffer or string. For the next character, it calls all the levels
90 below it; for the previous character, it uses the cache, described
91 below.
93 Thus, the result of calling bidi_level_of_next_char is the resolved
94 level of the next or the previous character in the logical order.
95 Based on this information, the function bidi_move_to_visually_next
96 finds the next character in the visual order and updates the
97 direction in which the buffer is scanned, either forward or
98 backward, to find the next character to be displayed. (Text is
99 scanned backwards when it needs to be reversed for display, i.e. if
100 the visual order is the inverse of the logical order.) This
101 implements the last, reordering steps of the UBA, by successively
102 calling bidi_level_of_next_char until the character of the required
103 embedding level is found; the scan direction is dynamically updated
104 as a side effect. See the commentary before the 'while' loop in
105 bidi_move_to_visually_next, for the details.
107 Fetching characters
108 -------------------
110 In a nutshell, fetching the next character boils down to calling
111 STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
112 string position. See bidi_fetch_char. However, if the next
113 character is "covered" by a display property of some kind,
114 bidi_fetch_char returns the u+FFFC "object replacement character"
115 that represents the entire run of text covered by the display
116 property. (The ch_len and nchars members of 'struct bidi_it'
117 reflect the length in bytes and characters of that text.) This is
118 so we reorder text on both sides of the display property as
119 appropriate for an image or embedded string. Similarly, text
120 covered by a display spec of the form '(space ...)', is replaced
121 with the u+2029 paragraph separator character, so such display
122 specs produce the same effect as a TAB under UBA. Both these
123 special characters are not actually displayed -- the display
124 property is displayed instead -- but just used to compute the
125 embedding level of the surrounding text so as to produce the
126 required effect.
128 Bidi iterator states
129 --------------------
131 The UBA is highly context dependent in some of its parts,
132 i.e. results of processing a character can generally depend on
133 characters very far away. The UAX#9 description of the UBA
134 prescribes a stateful processing of each character, whereby the
135 results of this processing depend on various state variables, such
136 as the current embedding level, level stack, and directional
137 override status. In addition, the UAX#9 description includes many
138 passages like this (from rule W2 in this case):
140 Search backward from each instance of a European number until the
141 first strong type (R, L, AL, or sos) is found. If an AL is found,
142 change the type of the European number to Arabic number.
144 To support this, we use a bidi iterator object, 'struct bidi_it',
145 which is a sub-structure of 'struct it' used by xdisp.c (see
146 dispextern.h for the definition of both of these structures). The
147 bidi iterator holds the entire state of the iteration required by
148 the UBA, and is updated as the text is traversed. In particular,
149 the embedding level of the current character being resolved is
150 recorded in the iterator state. To avoid costly searches backward
151 in support of rules like W2 above, the necessary character types
152 are also recorded in the iterator state as they are found during
153 the forward scan, and then used when such rules need to be applied.
154 (Forward scans cannot be avoided in this way; they need to be
155 performed at least once, and the results recorded in the iterator
156 state, to be reused until the forward scan oversteps the recorded
157 position.)
159 In this manner, the iterator state acts as a mini-cache of
160 contextual information required for resolving the level of the
161 current character by various UBA rules.
163 Caching of bidi iterator states
164 -------------------------------
166 As described above, the reordering engine uses the information
167 recorded in the bidi iterator state in order to resolve the
168 embedding level of the current character. When the reordering
169 engine needs to process the next character in the logical order, it
170 fetches it and applies to it all the UBA levels, updating the
171 iterator state as it goes. But when the buffer or string is
172 scanned backwards, i.e. in the reverse order of buffer/string
173 positions, the scanned characters were already processed during the
174 preceding forward scan (see bidi_find_other_level_edge). To avoid
175 costly re-processing of characters that were already processed
176 during the forward scan, the iterator states computed while
177 scanning forward are cached.
179 The cache is just a linear array of 'struct bidi_it' objects, which
180 is dynamically allocated and reallocated as needed, since the size
181 of the cache depends on the text being processed. We only need the
182 cache while processing embedded levels higher than the base
183 paragraph embedding level, because these higher levels require
184 changes in scan direction. Therefore, as soon as we are back to
185 the base embedding level, we can free the cache; see the calls to
186 bidi_cache_reset and bidi_cache_shrink, for the conditions to do
187 this.
189 The cache maintains the index of the next unused cache slot -- this
190 is where the next iterator state will be cached. The function
191 bidi_cache_iterator_state saves an instance of the state in the
192 cache and increments the unused slot index. The companion function
193 bidi_cache_find looks up a cached state that corresponds to a given
194 buffer/string position. All of the cached states must correspond
195 1:1 to the buffer or string region whose processing they reflect;
196 bidi.c will abort if it finds cache slots that violate this 1:1
197 correspondence.
199 When the parent iterator 'struct it' is pushed (see push_it in
200 xdisp.c) to pause the current iteration and start iterating over a
201 different object (e.g., a 'display' string that covers some buffer
202 text), the bidi iterator cache needs to be "pushed" as well, so
203 that a new empty cache could be used while iterating over the new
204 object. Later, when the new object is exhausted, and xdisp.c calls
205 pop_it, we need to "pop" the bidi cache as well and return to the
206 original cache. See bidi_push_it and bidi_pop_it for how this is
207 done.
209 Some functions of the display engine save copies of 'struct it' in
210 local variables, and restore them later. For examples, see
211 pos_visible_p and move_it_in_display_line_to in xdisp.c, and
212 window_scroll_pixel_based in window.c. When this happens, we need
213 to save and restore the bidi cache as well, because conceptually
214 the cache is part of the 'struct it' state, and needs to be in
215 perfect sync with the portion of the buffer/string that is being
216 processed. This saving and restoring of the cache state is handled
217 by bidi_shelve_cache and bidi_unshelve_cache, and the helper macros
218 SAVE_IT and RESTORE_IT defined on xdisp.c.
220 Note that, because reordering is implemented below the level in
221 xdisp.c that breaks glyphs into screen lines, we are violating
222 paragraph 3.4 of UAX#9. which mandates that line breaking shall be
223 done before reordering each screen line separately. However,
224 following UAX#9 to the letter in this matter goes against the basic
225 design of the Emacs display engine, and so we choose here this
226 minor deviation from the UBA letter in preference to redesign of
227 the display engine. The effect of this is only seen in continued
228 lines that are broken into screen lines in the middle of a run
229 whose direction is opposite to the paragraph's base direction.
231 Important design and implementation note: when the code needs to
232 scan far ahead, be sure to avoid such scans as much as possible
233 when the buffer/string doesn't contain any RTL characters. Users
234 of left-to-right scripts will never forgive you if you introduce
235 some slow-down due to bidi in situations that don't involve any
236 bidirectional text. See the large comment near the beginning of
237 bidi_resolve_neutral, for one situation where such shortcut was
238 necessary. */
240 #include <config.h>
241 #include <stdio.h>
243 #include "lisp.h"
244 #include "character.h"
245 #include "buffer.h"
246 #include "dispextern.h"
247 #include "region-cache.h"
249 static bool bidi_initialized = 0;
251 static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
253 #define BIDI_EOB (-1)
255 /* Data type for describing the bidirectional character categories. */
256 typedef enum {
257 UNKNOWN_BC,
258 NEUTRAL,
259 WEAK,
260 STRONG,
261 EXPLICIT_FORMATTING
262 } bidi_category_t;
264 static Lisp_Object paragraph_start_re, paragraph_separate_re;
265 static Lisp_Object Qparagraph_start, Qparagraph_separate;
268 /***********************************************************************
269 Utilities
270 ***********************************************************************/
272 /* Return the bidi type of a character CH, subject to the current
273 directional OVERRIDE. */
274 static bidi_type_t
275 bidi_get_type (int ch, bidi_dir_t override)
277 bidi_type_t default_type;
279 if (ch == BIDI_EOB)
280 return NEUTRAL_B;
281 if (ch < 0 || ch > MAX_CHAR)
282 emacs_abort ();
284 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
285 /* Every valid character code, even those that are unassigned by the
286 UCD, have some bidi-class property, according to
287 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
288 (= zero) code from CHAR_TABLE_REF, that's a bug. */
289 if (default_type == UNKNOWN_BT)
290 emacs_abort ();
292 switch (default_type)
294 case WEAK_BN:
295 case NEUTRAL_B:
296 case LRE:
297 case LRO:
298 case RLE:
299 case RLO:
300 case PDF:
301 case LRI:
302 case RLI:
303 case FSI:
304 case PDI:
305 return default_type;
306 default:
307 if (override == L2R)
308 return STRONG_L;
309 else if (override == R2L)
310 return STRONG_R;
311 else
312 return default_type;
316 static void
317 bidi_check_type (bidi_type_t type)
319 eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
322 /* Given a bidi TYPE of a character, return its category. */
323 static bidi_category_t
324 bidi_get_category (bidi_type_t type)
326 switch (type)
328 case UNKNOWN_BT:
329 return UNKNOWN_BC;
330 case STRONG_L:
331 case STRONG_R:
332 case STRONG_AL:
333 return STRONG;
334 case WEAK_EN:
335 case WEAK_ES:
336 case WEAK_ET:
337 case WEAK_AN:
338 case WEAK_CS:
339 case WEAK_NSM:
340 case WEAK_BN:
341 return WEAK;
342 case NEUTRAL_B:
343 case NEUTRAL_S:
344 case NEUTRAL_WS:
345 case NEUTRAL_ON:
346 return NEUTRAL;
347 case LRE:
348 case LRO:
349 case RLE:
350 case RLO:
351 case PDF:
352 case LRI:
353 case RLI:
354 case FSI:
355 case PDI:
356 return EXPLICIT_FORMATTING;
357 default:
358 emacs_abort ();
362 static bool
363 bidi_isolate_fmt_char (bidi_type_t ch_type)
365 return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
368 /* Return the mirrored character of C, if it has one. If C has no
369 mirrored counterpart, return C.
370 Note: The conditions in UAX#9 clause L4 regarding the surrounding
371 context must be tested by the caller. */
373 bidi_mirror_char (int c)
375 Lisp_Object val;
377 if (c == BIDI_EOB)
378 return c;
379 if (c < 0 || c > MAX_CHAR)
380 emacs_abort ();
382 val = CHAR_TABLE_REF (bidi_mirror_table, c);
383 if (INTEGERP (val))
385 int v;
387 /* When debugging, check before assigning to V, so that the check
388 isn't broken by undefined behavior due to int overflow. */
389 eassert (CHAR_VALID_P (XINT (val)));
391 v = XINT (val);
393 /* Minimal test we must do in optimized builds, to prevent weird
394 crashes further down the road. */
395 if (v < 0 || v > MAX_CHAR)
396 emacs_abort ();
398 return v;
401 return c;
404 /* Return the Bidi_Paired_Bracket_Type property of the character C. */
405 static bidi_bracket_type_t
406 bidi_paired_bracket_type (int c)
408 if (c == BIDI_EOB)
409 return BIDI_BRACKET_NONE;
410 if (c < 0 || c > MAX_CHAR)
411 emacs_abort ();
413 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
416 /* Determine the start-of-sequence (sos) directional type given the two
417 embedding levels on either side of the run boundary. Also, update
418 the saved info about previously seen characters, since that info is
419 generally valid for a single level run. */
420 static void
421 bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
423 int higher_level = (level_before > level_after ? level_before : level_after);
425 /* FIXME: should the default sos direction be user selectable? */
426 bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
428 bidi_it->prev.type = UNKNOWN_BT;
429 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
430 bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
431 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
432 bidi_it->next_for_neutral.type
433 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
436 /* Push the current embedding level and override status; reset the
437 current level to LEVEL and the current override status to OVERRIDE. */
438 static void
439 bidi_push_embedding_level (struct bidi_it *bidi_it,
440 int level, bidi_dir_t override, bool isolate_status)
442 struct bidi_stack *st;
443 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
445 bidi_it->stack_idx++;
446 eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
447 st = &bidi_it->level_stack[bidi_it->stack_idx];
448 eassert (level <= (1 << 7));
449 st->level = level;
450 st->override = override;
451 st->isolate_status = isolate_status;
452 if (isolate_status)
454 st->last_strong = bidi_it->last_strong;
455 st->prev_for_neutral = bidi_it->prev_for_neutral;
456 st->next_for_neutral = bidi_it->next_for_neutral;
457 st->sos = bidi_it->sos;
459 /* We've got a new isolating sequence, compute the directional type
460 of sos and initialize per-sequence variables (UAX#9, clause X10). */
461 bidi_set_sos_type (bidi_it, prev_level, level);
464 /* Pop from the stack the embedding level, the directional override
465 status, and optionally saved information for the isolating run
466 sequence. Return the new level. */
467 static int
468 bidi_pop_embedding_level (struct bidi_it *bidi_it)
470 int level;
472 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
473 and PDIs (X6a, 2nd bullet). */
474 if (bidi_it->stack_idx > 0)
476 bool isolate_status
477 = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
478 int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
480 struct bidi_stack st;
482 st = bidi_it->level_stack[bidi_it->stack_idx];
483 if (isolate_status)
485 /* PREV is used in W1 for resolving WEAK_NSM. By the time
486 we get to an NSM, we must have gotten past at least one
487 character: the PDI that ends the isolate from which we
488 are popping here. So PREV will have been filled up by
489 the time we first use it. We initialize it here to
490 UNKNOWN_BT to be able to catch any blunders in this
491 logic. */
492 bidi_it->prev.orig_type = bidi_it->prev.type = UNKNOWN_BT;
493 bidi_it->last_strong = st.last_strong;
494 bidi_it->prev_for_neutral = st.prev_for_neutral;
495 bidi_it->next_for_neutral = st.next_for_neutral;
496 bidi_it->sos = st.sos;
498 else
499 bidi_set_sos_type (bidi_it, old_level,
500 bidi_it->level_stack[bidi_it->stack_idx - 1].level);
502 bidi_it->stack_idx--;
504 level = bidi_it->level_stack[bidi_it->stack_idx].level;
505 eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
506 return level;
509 /* Record in SAVED_INFO the information about the current character. */
510 static void
511 bidi_remember_char (struct bidi_saved_info *saved_info,
512 struct bidi_it *bidi_it, bool from_type)
514 saved_info->charpos = bidi_it->charpos;
515 if (from_type)
516 saved_info->type = bidi_it->type;
517 else
518 saved_info->type = bidi_it->type_after_wn;
519 bidi_check_type (saved_info->type);
520 saved_info->orig_type = bidi_it->orig_type;
521 bidi_check_type (saved_info->orig_type);
524 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
525 copies the part of the level stack that is actually in use. */
526 static void
527 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
529 /* Copy everything from the start through the active part of
530 the level stack. */
531 memcpy (to, from,
532 (offsetof (struct bidi_it, level_stack[1])
533 + from->stack_idx * sizeof from->level_stack[0]));
537 /***********************************************************************
538 Caching the bidi iterator states
539 ***********************************************************************/
541 /* We allocate and de-allocate the cache in chunks of this size (in
542 characters). 200 was chosen as an upper limit for reasonably-long
543 lines in a text file/buffer. */
544 #define BIDI_CACHE_CHUNK 200
545 static struct bidi_it *bidi_cache;
546 static ptrdiff_t bidi_cache_size = 0;
547 enum { elsz = sizeof (struct bidi_it) };
548 static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
549 static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
550 static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
551 "stack" level */
553 /* 5-slot stack for saving the start of the previous level of the
554 cache. xdisp.c maintains a 5-slot stack for its iterator state,
555 and we need the same size of our stack. */
556 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
557 static int bidi_cache_sp;
559 /* Size of header used by bidi_shelve_cache. */
560 enum
562 bidi_shelve_header_size
563 = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
564 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
565 + sizeof (bidi_cache_last_idx))
568 /* Reset the cache state to the empty state. We only reset the part
569 of the cache relevant to iteration of the current object. Previous
570 objects, which are pushed on the display iterator's stack, are left
571 intact. This is called when the cached information is no more
572 useful for the current iteration, e.g. when we were reseated to a
573 new position on the same object. */
574 static void
575 bidi_cache_reset (void)
577 bidi_cache_idx = bidi_cache_start;
578 bidi_cache_last_idx = -1;
581 /* Shrink the cache to its minimal size. Called when we init the bidi
582 iterator for reordering a buffer or a string that does not come
583 from display properties, because that means all the previously
584 cached info is of no further use. */
585 static void
586 bidi_cache_shrink (void)
588 if (bidi_cache_size > BIDI_CACHE_CHUNK)
590 bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
591 bidi_cache_size = BIDI_CACHE_CHUNK;
593 bidi_cache_reset ();
596 static void
597 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
599 int current_scan_dir = bidi_it->scan_dir;
601 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
602 emacs_abort ();
604 bidi_copy_it (bidi_it, &bidi_cache[idx]);
605 bidi_it->scan_dir = current_scan_dir;
606 bidi_cache_last_idx = idx;
609 /* Find a cached state with a given CHARPOS and resolved embedding
610 level less or equal to LEVEL. If LEVEL is -1, disregard the
611 resolved levels in cached states. DIR, if non-zero, means search
612 in that direction from the last cache hit. */
613 static ptrdiff_t
614 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
616 ptrdiff_t i, i_start;
618 if (bidi_cache_idx > bidi_cache_start)
620 if (bidi_cache_last_idx == -1)
621 bidi_cache_last_idx = bidi_cache_idx - 1;
622 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
624 dir = -1;
625 i_start = bidi_cache_last_idx - 1;
627 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
628 + bidi_cache[bidi_cache_last_idx].nchars - 1))
630 dir = 1;
631 i_start = bidi_cache_last_idx + 1;
633 else if (dir)
634 i_start = bidi_cache_last_idx;
635 else
637 dir = -1;
638 i_start = bidi_cache_idx - 1;
641 if (dir < 0)
643 /* Linear search for now; FIXME! */
644 for (i = i_start; i >= bidi_cache_start; i--)
645 if (bidi_cache[i].charpos <= charpos
646 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
647 && (level == -1 || bidi_cache[i].resolved_level <= level))
648 return i;
650 else
652 for (i = i_start; i < bidi_cache_idx; i++)
653 if (bidi_cache[i].charpos <= charpos
654 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
655 && (level == -1 || bidi_cache[i].resolved_level <= level))
656 return i;
660 return -1;
663 /* Find a cached state where the resolved level changes to a value
664 that is lower than LEVEL, and return its cache slot index. DIR is
665 the direction to search, starting with the last used cache slot.
666 If DIR is zero, we search backwards from the last occupied cache
667 slot. BEFORE means return the index of the slot that
668 is ``before'' the level change in the search direction. That is,
669 given the cached levels like this:
671 1122333442211
672 AB C
674 and assuming we are at the position cached at the slot marked with
675 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
676 index of slot B or A, depending whether BEFORE is, respectively,
677 true or false. */
678 static ptrdiff_t
679 bidi_cache_find_level_change (int level, int dir, bool before)
681 if (bidi_cache_idx)
683 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
684 int incr = before ? 1 : 0;
686 eassert (!dir || bidi_cache_last_idx >= 0);
688 if (!dir)
689 dir = -1;
690 else if (!incr)
691 i += dir;
693 if (dir < 0)
695 while (i >= bidi_cache_start + incr)
697 if (bidi_cache[i - incr].resolved_level >= 0
698 && bidi_cache[i - incr].resolved_level < level)
699 return i;
700 i--;
703 else
705 while (i < bidi_cache_idx - incr)
707 if (bidi_cache[i + incr].resolved_level >= 0
708 && bidi_cache[i + incr].resolved_level < level)
709 return i;
710 i++;
715 return -1;
718 static void
719 bidi_cache_ensure_space (ptrdiff_t idx)
721 /* Enlarge the cache as needed. */
722 if (idx >= bidi_cache_size)
724 /* The bidi cache cannot be larger than the largest Lisp string
725 or buffer. */
726 ptrdiff_t string_or_buffer_bound
727 = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
729 /* Also, it cannot be larger than what C can represent. */
730 ptrdiff_t c_bound
731 = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
733 bidi_cache
734 = xpalloc (bidi_cache, &bidi_cache_size,
735 max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
736 min (string_or_buffer_bound, c_bound), elsz);
740 static void
741 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
742 bool update_only)
744 ptrdiff_t idx;
746 /* We should never cache on backward scans. */
747 if (bidi_it->scan_dir == -1)
748 emacs_abort ();
749 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
751 if (idx < 0 && update_only)
752 return;
754 if (idx < 0)
756 idx = bidi_cache_idx;
757 bidi_cache_ensure_space (idx);
758 /* Character positions should correspond to cache positions 1:1.
759 If we are outside the range of cached positions, the cache is
760 useless and must be reset. */
761 if (idx > bidi_cache_start &&
762 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
763 + bidi_cache[idx - 1].nchars)
764 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
766 bidi_cache_reset ();
767 idx = bidi_cache_start;
769 if (bidi_it->nchars <= 0)
770 emacs_abort ();
771 bidi_copy_it (&bidi_cache[idx], bidi_it);
772 if (!resolved)
773 bidi_cache[idx].resolved_level = -1;
775 else
777 /* Copy only the members which could have changed, to avoid
778 costly copying of the entire struct. */
779 bidi_cache[idx].type = bidi_it->type;
780 bidi_check_type (bidi_it->type);
781 bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
782 bidi_check_type (bidi_it->type_after_wn);
783 if (resolved)
784 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
785 else
786 bidi_cache[idx].resolved_level = -1;
787 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
788 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
789 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
790 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
791 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
792 bidi_cache[idx].bracket_pairing_pos = bidi_it->bracket_pairing_pos;
793 bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
796 bidi_cache_last_idx = idx;
797 if (idx >= bidi_cache_idx)
798 bidi_cache_idx = idx + 1;
801 /* Look for a cached iterator state that corresponds to CHARPOS. If
802 found, copy the cached state into BIDI_IT and return the type of
803 the cached entry. If not found, return UNKNOWN_BT. NEUTRALS_OK
804 non-zero means it is OK to return cached state for neutral
805 characters that have no valid next_for_neutral member, and
806 therefore cannot be resolved. This can happen if the state was
807 cached before it was resolved in bidi_resolve_neutral. */
808 static bidi_type_t
809 bidi_cache_find (ptrdiff_t charpos, bool neutrals_ok, struct bidi_it *bidi_it)
811 ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
813 if (i >= bidi_cache_start
814 && (neutrals_ok
815 /* Callers that don't want to resolve neutrals (and set
816 neutrals_ok = false) need to be sure that there's enough
817 info in the cached state to resolve the neutrals and
818 isolates, and if not, they don't want the cached state. */
819 || !(bidi_cache[i].resolved_level == -1
820 && (bidi_get_category (bidi_cache[i].type) == NEUTRAL
821 || bidi_isolate_fmt_char (bidi_cache[i].type))
822 && bidi_cache[i].next_for_neutral.type == UNKNOWN_BT)))
824 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
826 bidi_copy_it (bidi_it, &bidi_cache[i]);
827 bidi_cache_last_idx = i;
828 /* Don't let scan direction from the cached state override
829 the current scan direction. */
830 bidi_it->scan_dir = current_scan_dir;
831 return bidi_it->type;
834 return UNKNOWN_BT;
837 static int
838 bidi_peek_at_next_level (struct bidi_it *bidi_it)
840 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
841 emacs_abort ();
842 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
846 /***********************************************************************
847 Pushing and popping the bidi iterator state
848 ***********************************************************************/
850 /* Push the bidi iterator state in preparation for reordering a
851 different object, e.g. display string found at certain buffer
852 position. Pushing the bidi iterator boils down to saving its
853 entire state on the cache and starting a new cache "stacked" on top
854 of the current cache. */
855 void
856 bidi_push_it (struct bidi_it *bidi_it)
858 /* Save the current iterator state in its entirety after the last
859 used cache slot. */
860 bidi_cache_ensure_space (bidi_cache_idx);
861 bidi_cache[bidi_cache_idx++] = *bidi_it;
863 /* Push the current cache start onto the stack. */
864 eassert (bidi_cache_sp < IT_STACK_SIZE);
865 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
867 /* Start a new level of cache, and make it empty. */
868 bidi_cache_start = bidi_cache_idx;
869 bidi_cache_last_idx = -1;
872 /* Restore the iterator state saved by bidi_push_it and return the
873 cache to the corresponding state. */
874 void
875 bidi_pop_it (struct bidi_it *bidi_it)
877 if (bidi_cache_start <= 0)
878 emacs_abort ();
880 /* Reset the next free cache slot index to what it was before the
881 call to bidi_push_it. */
882 bidi_cache_idx = bidi_cache_start - 1;
884 /* Restore the bidi iterator state saved in the cache. */
885 *bidi_it = bidi_cache[bidi_cache_idx];
887 /* Pop the previous cache start from the stack. */
888 if (bidi_cache_sp <= 0)
889 emacs_abort ();
890 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
892 /* Invalidate the last-used cache slot data. */
893 bidi_cache_last_idx = -1;
896 static ptrdiff_t bidi_cache_total_alloc;
898 /* Stash away a copy of the cache and its control variables. */
899 void *
900 bidi_shelve_cache (void)
902 unsigned char *databuf;
903 ptrdiff_t alloc;
905 /* Empty cache. */
906 if (bidi_cache_idx == 0)
907 return NULL;
909 alloc = (bidi_shelve_header_size
910 + bidi_cache_idx * sizeof (struct bidi_it));
911 databuf = xmalloc (alloc);
912 bidi_cache_total_alloc += alloc;
914 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
915 memcpy (databuf + sizeof (bidi_cache_idx),
916 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
917 memcpy (databuf + sizeof (bidi_cache_idx)
918 + bidi_cache_idx * sizeof (struct bidi_it),
919 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
920 memcpy (databuf + sizeof (bidi_cache_idx)
921 + bidi_cache_idx * sizeof (struct bidi_it)
922 + sizeof (bidi_cache_start_stack),
923 &bidi_cache_sp, sizeof (bidi_cache_sp));
924 memcpy (databuf + sizeof (bidi_cache_idx)
925 + bidi_cache_idx * sizeof (struct bidi_it)
926 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
927 &bidi_cache_start, sizeof (bidi_cache_start));
928 memcpy (databuf + sizeof (bidi_cache_idx)
929 + bidi_cache_idx * sizeof (struct bidi_it)
930 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
931 + sizeof (bidi_cache_start),
932 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
934 return databuf;
937 /* Restore the cache state from a copy stashed away by
938 bidi_shelve_cache, and free the buffer used to stash that copy.
939 JUST_FREE means free the buffer, but don't restore the
940 cache; used when the corresponding iterator is discarded instead of
941 being restored. */
942 void
943 bidi_unshelve_cache (void *databuf, bool just_free)
945 unsigned char *p = databuf;
947 if (!p)
949 if (!just_free)
951 /* A NULL pointer means an empty cache. */
952 bidi_cache_start = 0;
953 bidi_cache_sp = 0;
954 bidi_cache_reset ();
957 else
959 if (just_free)
961 ptrdiff_t idx;
963 memcpy (&idx, p, sizeof (bidi_cache_idx));
964 bidi_cache_total_alloc
965 -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
967 else
969 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
970 bidi_cache_ensure_space (bidi_cache_idx);
971 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
972 bidi_cache_idx * sizeof (struct bidi_it));
973 memcpy (bidi_cache_start_stack,
974 p + sizeof (bidi_cache_idx)
975 + bidi_cache_idx * sizeof (struct bidi_it),
976 sizeof (bidi_cache_start_stack));
977 memcpy (&bidi_cache_sp,
978 p + sizeof (bidi_cache_idx)
979 + bidi_cache_idx * sizeof (struct bidi_it)
980 + sizeof (bidi_cache_start_stack),
981 sizeof (bidi_cache_sp));
982 memcpy (&bidi_cache_start,
983 p + sizeof (bidi_cache_idx)
984 + bidi_cache_idx * sizeof (struct bidi_it)
985 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
986 sizeof (bidi_cache_start));
987 memcpy (&bidi_cache_last_idx,
988 p + sizeof (bidi_cache_idx)
989 + bidi_cache_idx * sizeof (struct bidi_it)
990 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
991 + sizeof (bidi_cache_start),
992 sizeof (bidi_cache_last_idx));
993 bidi_cache_total_alloc
994 -= (bidi_shelve_header_size
995 + bidi_cache_idx * sizeof (struct bidi_it));
998 xfree (p);
1003 /***********************************************************************
1004 Initialization
1005 ***********************************************************************/
1006 static void
1007 bidi_initialize (void)
1009 bidi_type_table = uniprop_table (intern ("bidi-class"));
1010 if (NILP (bidi_type_table))
1011 emacs_abort ();
1012 staticpro (&bidi_type_table);
1014 bidi_mirror_table = uniprop_table (intern ("mirroring"));
1015 if (NILP (bidi_mirror_table))
1016 emacs_abort ();
1017 staticpro (&bidi_mirror_table);
1019 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
1020 if (NILP (bidi_brackets_table))
1021 emacs_abort ();
1022 staticpro (&bidi_brackets_table);
1024 Qparagraph_start = intern ("paragraph-start");
1025 staticpro (&Qparagraph_start);
1026 paragraph_start_re = Fsymbol_value (Qparagraph_start);
1027 if (!STRINGP (paragraph_start_re))
1028 paragraph_start_re = build_string ("\f\\|[ \t]*$");
1029 staticpro (&paragraph_start_re);
1030 Qparagraph_separate = intern ("paragraph-separate");
1031 staticpro (&Qparagraph_separate);
1032 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
1033 if (!STRINGP (paragraph_separate_re))
1034 paragraph_separate_re = build_string ("[ \t\f]*$");
1035 staticpro (&paragraph_separate_re);
1037 bidi_cache_sp = 0;
1038 bidi_cache_total_alloc = 0;
1040 bidi_initialized = 1;
1043 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
1044 end. */
1045 static void
1046 bidi_set_paragraph_end (struct bidi_it *bidi_it)
1048 bidi_it->invalid_levels = 0;
1049 bidi_it->invalid_isolates = 0;
1050 bidi_it->stack_idx = 0;
1051 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1054 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
1055 void
1056 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
1057 struct bidi_it *bidi_it)
1059 if (! bidi_initialized)
1060 bidi_initialize ();
1061 if (charpos >= 0)
1062 bidi_it->charpos = charpos;
1063 if (bytepos >= 0)
1064 bidi_it->bytepos = bytepos;
1065 bidi_it->frame_window_p = frame_window_p;
1066 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
1067 bidi_it->first_elt = 1;
1068 bidi_set_paragraph_end (bidi_it);
1069 bidi_it->new_paragraph = 1;
1070 bidi_it->separator_limit = -1;
1071 bidi_it->type = NEUTRAL_B;
1072 bidi_it->type_after_wn = NEUTRAL_B;
1073 bidi_it->orig_type = NEUTRAL_B;
1074 /* FIXME: Review this!!! */
1075 bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
1076 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1077 bidi_it->next_for_neutral.charpos = -1;
1078 bidi_it->next_for_neutral.type
1079 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
1080 bidi_it->prev_for_neutral.charpos = -1;
1081 bidi_it->prev_for_neutral.type
1082 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
1083 bidi_it->sos = L2R; /* FIXME: should it be user-selectable? */
1084 bidi_it->disp_pos = -1; /* invalid/unknown */
1085 bidi_it->disp_prop = 0;
1086 /* We can only shrink the cache if we are at the bottom level of its
1087 "stack". */
1088 if (bidi_cache_start == 0)
1089 bidi_cache_shrink ();
1090 else
1091 bidi_cache_reset ();
1094 /* Perform initializations for reordering a new line of bidi text. */
1095 static void
1096 bidi_line_init (struct bidi_it *bidi_it)
1098 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
1099 bidi_it->stack_idx = 0;
1100 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1101 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
1102 bidi_it->level_stack[0].isolate_status = false; /* X1 */
1103 bidi_it->invalid_levels = 0;
1104 bidi_it->isolate_level = 0; /* X1 */
1105 bidi_it->invalid_isolates = 0; /* X1 */
1106 /* Setting this to zero will force its recomputation the first time
1107 we need it for W5. */
1108 bidi_it->next_en_pos = 0;
1109 bidi_it->next_en_type = UNKNOWN_BT;
1110 bidi_it->next_for_ws.charpos = -1;
1111 bidi_it->next_for_ws.type = UNKNOWN_BT;
1112 bidi_set_sos_type (bidi_it,
1113 (bidi_it->paragraph_dir == R2L ? 1 : 0),
1114 bidi_it->level_stack[0].level); /* X10 */
1116 bidi_cache_reset ();
1120 /***********************************************************************
1121 Fetching characters
1122 ***********************************************************************/
1124 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
1125 are zero-based character positions in S, BEGBYTE is byte position
1126 corresponding to BEG. UNIBYTE means S is a unibyte string. */
1127 static ptrdiff_t
1128 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
1129 ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
1131 ptrdiff_t pos = beg;
1132 const unsigned char *p = s + begbyte, *start = p;
1134 if (unibyte)
1135 p = s + end;
1136 else
1138 if (!CHAR_HEAD_P (*p))
1139 emacs_abort ();
1141 while (pos < end)
1143 p += BYTES_BY_CHAR_HEAD (*p);
1144 pos++;
1148 return p - start;
1151 /* Fetch and return the character at byte position BYTEPOS. If S is
1152 non-NULL, fetch the character from string S; otherwise fetch the
1153 character from the current buffer. UNIBYTE means S is a
1154 unibyte string. */
1155 static int
1156 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
1158 if (s)
1160 s += bytepos;
1161 if (unibyte)
1162 return *s;
1164 else
1165 s = BYTE_POS_ADDR (bytepos);
1166 return STRING_CHAR (s);
1169 /* Fetch and return the character at CHARPOS/BYTEPOS. If that
1170 character is covered by a display string, treat the entire run of
1171 covered characters as a single character, either u+2029 or u+FFFC,
1172 and return their combined length in CH_LEN and NCHARS. DISP_POS
1173 specifies the character position of the next display string, or -1
1174 if not yet computed. When the next character is at or beyond that
1175 position, the function updates DISP_POS with the position of the
1176 next display string. *DISP_PROP non-zero means that there's really
1177 a display string at DISP_POS, as opposed to when we searched till
1178 DISP_POS without finding one. If *DISP_PROP is 2, it means the
1179 display spec is of the form `(space ...)', which is replaced with
1180 u+2029 to handle it as a paragraph separator. STRING->s is the C
1181 string to iterate, or NULL if iterating over a buffer or a Lisp
1182 string; in the latter case, STRING->lstring is the Lisp string. */
1183 static int
1184 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
1185 int *disp_prop, struct bidi_string_data *string,
1186 struct window *w,
1187 bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
1189 int ch;
1190 ptrdiff_t endpos
1191 = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
1192 struct text_pos pos;
1193 int len;
1195 /* If we got past the last known position of display string, compute
1196 the position of the next one. That position could be at CHARPOS. */
1197 if (charpos < endpos && charpos > *disp_pos)
1199 SET_TEXT_POS (pos, charpos, bytepos);
1200 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1201 disp_prop);
1204 /* Fetch the character at BYTEPOS. */
1205 if (charpos >= endpos)
1207 ch = BIDI_EOB;
1208 *ch_len = 1;
1209 *nchars = 1;
1210 *disp_pos = endpos;
1211 *disp_prop = 0;
1213 else if (charpos >= *disp_pos && *disp_prop)
1215 ptrdiff_t disp_end_pos;
1217 /* We don't expect to find ourselves in the middle of a display
1218 property. Hopefully, it will never be needed. */
1219 if (charpos > *disp_pos)
1220 emacs_abort ();
1221 /* Text covered by `display' properties and overlays with
1222 display properties or display strings is handled as a single
1223 character that represents the entire run of characters
1224 covered by the display property. */
1225 if (*disp_prop == 2)
1227 /* `(space ...)' display specs are handled as paragraph
1228 separators for the purposes of the reordering; see UAX#9
1229 section 3 and clause HL1 in section 4.3 there. */
1230 ch = 0x2029;
1232 else
1234 /* All other display specs are handled as the Unicode Object
1235 Replacement Character. */
1236 ch = 0xFFFC;
1238 disp_end_pos = compute_display_string_end (*disp_pos, string);
1239 if (disp_end_pos < 0)
1241 /* Somebody removed the display string from the buffer
1242 behind our back. Recover by processing this buffer
1243 position as if no display property were present there to
1244 begin with. */
1245 *disp_prop = 0;
1246 goto normal_char;
1248 *nchars = disp_end_pos - *disp_pos;
1249 if (*nchars <= 0)
1250 emacs_abort ();
1251 if (string->s)
1252 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
1253 disp_end_pos, string->unibyte);
1254 else if (STRINGP (string->lstring))
1255 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1256 bytepos, disp_end_pos, string->unibyte);
1257 else
1258 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1260 else
1262 normal_char:
1263 if (string->s)
1266 if (!string->unibyte)
1268 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1269 *ch_len = len;
1271 else
1273 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1274 *ch_len = 1;
1277 else if (STRINGP (string->lstring))
1279 if (!string->unibyte)
1281 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1282 len);
1283 *ch_len = len;
1285 else
1287 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1288 *ch_len = 1;
1291 else
1293 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1294 *ch_len = len;
1296 *nchars = 1;
1299 /* If we just entered a run of characters covered by a display
1300 string, compute the position of the next display string. */
1301 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1302 && *disp_prop)
1304 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1305 *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1306 disp_prop);
1309 return ch;
1312 /* Like bidi_fetch_char, but ignore any text between an isolate
1313 initiator and its matching PDI or, if it has no matching PDI, the
1314 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1315 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1316 and the character that was fetched after skipping the isolates. */
1317 static int
1318 bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
1319 ptrdiff_t *disp_pos, int *disp_prop,
1320 struct bidi_string_data *string,
1321 struct window *w, bool frame_window_p,
1322 ptrdiff_t *ch_len, ptrdiff_t *nchars)
1324 ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
1325 int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
1326 frame_window_p, ch_len, nchars);
1327 bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1328 ptrdiff_t level = 0;
1330 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1332 level++;
1333 while (level > 0 && ch_type != NEUTRAL_B)
1335 charpos += *nchars;
1336 bytepos += *ch_len;
1337 ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
1338 w, frame_window_p, ch_len, nchars);
1339 ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1340 /* A Note to P2 says to ignore max_depth limit. */
1341 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1342 level++;
1343 else if (ch_type == PDI)
1344 level--;
1348 /* Communicate to the caller how much did we skip, so it could get
1349 past the last character position we examined. */
1350 *nchars += charpos - orig_charpos;
1351 *ch_len += bytepos - orig_bytepos;
1352 return ch;
1357 /***********************************************************************
1358 Determining paragraph direction
1359 ***********************************************************************/
1361 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1362 Value is the non-negative length of the paragraph separator
1363 following the buffer position, -1 if position is at the beginning
1364 of a new paragraph, or -2 if position is neither at beginning nor
1365 at end of a paragraph. */
1366 static ptrdiff_t
1367 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1369 Lisp_Object sep_re;
1370 Lisp_Object start_re;
1371 ptrdiff_t val;
1373 sep_re = paragraph_separate_re;
1374 start_re = paragraph_start_re;
1376 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1377 if (val < 0)
1379 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1380 val = -1;
1381 else
1382 val = -2;
1385 return val;
1388 /* If the user has requested the long scans caching, make sure that
1389 BIDI cache is enabled. Otherwise, make sure it's disabled. */
1391 static struct region_cache *
1392 bidi_paragraph_cache_on_off (void)
1394 struct buffer *cache_buffer = current_buffer;
1395 bool indirect_p = false;
1397 /* For indirect buffers, make sure to use the cache of their base
1398 buffer. */
1399 if (cache_buffer->base_buffer)
1401 cache_buffer = cache_buffer->base_buffer;
1402 indirect_p = true;
1405 /* Don't turn on or off the cache in the base buffer, if the value
1406 of cache-long-scans of the base buffer is inconsistent with that.
1407 This is because doing so will just make the cache pure overhead,
1408 since if we turn it on via indirect buffer, it will be
1409 immediately turned off by its base buffer. */
1410 if (NILP (BVAR (current_buffer, cache_long_scans)))
1412 if (!indirect_p
1413 || NILP (BVAR (cache_buffer, cache_long_scans)))
1415 if (cache_buffer->bidi_paragraph_cache)
1417 free_region_cache (cache_buffer->bidi_paragraph_cache);
1418 cache_buffer->bidi_paragraph_cache = 0;
1421 return NULL;
1423 else
1425 if (!indirect_p
1426 || !NILP (BVAR (cache_buffer, cache_long_scans)))
1428 if (!cache_buffer->bidi_paragraph_cache)
1429 cache_buffer->bidi_paragraph_cache = new_region_cache ();
1431 return cache_buffer->bidi_paragraph_cache;
1435 /* On my 2005-vintage machine, searching back for paragraph start
1436 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1437 when user types C-p. The number below limits each call to
1438 bidi_paragraph_init to about 10 ms. */
1439 #define MAX_PARAGRAPH_SEARCH 7500
1441 /* Find the beginning of this paragraph by looking back in the buffer.
1442 Value is the byte position of the paragraph's beginning, or
1443 BEGV_BYTE if paragraph_start_re is still not found after looking
1444 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
1445 static ptrdiff_t
1446 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1448 Lisp_Object re = paragraph_start_re;
1449 ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1450 struct region_cache *bpc = bidi_paragraph_cache_on_off ();
1451 ptrdiff_t n = 0, oldpos = pos, next;
1452 struct buffer *cache_buffer = current_buffer;
1454 if (cache_buffer->base_buffer)
1455 cache_buffer = cache_buffer->base_buffer;
1457 while (pos_byte > BEGV_BYTE
1458 && n++ < MAX_PARAGRAPH_SEARCH
1459 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1461 /* FIXME: What if the paragraph beginning is covered by a
1462 display string? And what if a display string covering some
1463 of the text over which we scan back includes
1464 paragraph_start_re? */
1465 DEC_BOTH (pos, pos_byte);
1466 if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
1468 pos = next, pos_byte = CHAR_TO_BYTE (pos);
1469 break;
1471 else
1472 pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
1474 if (n >= MAX_PARAGRAPH_SEARCH)
1475 pos = BEGV, pos_byte = BEGV_BYTE;
1476 if (bpc)
1477 know_region_cache (cache_buffer, bpc, pos, oldpos);
1478 /* Positions returned by the region cache are not limited to
1479 BEGV..ZV range, so we limit them here. */
1480 pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
1481 return pos_byte;
1484 /* On a 3.4 GHz machine, searching forward for a strong directional
1485 character in a long paragraph full of weaks or neutrals takes about
1486 1 ms for each 20K characters. The number below limits each call to
1487 bidi_paragraph_init to less than 10 ms even on slow machines. */
1488 #define MAX_STRONG_CHAR_SEARCH 100000
1490 /* Starting from POS, find the first strong (L, R, or AL) character,
1491 while skipping over any characters between an isolate initiator and
1492 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1493 matches the isolate initiator at POS. Return the bidi type of the
1494 character where the search stopped. Give up if after examining
1495 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1496 character was found. */
1497 static bidi_type_t
1498 find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
1499 ptrdiff_t *disp_pos, int *disp_prop,
1500 struct bidi_string_data *string, struct window *w,
1501 bool string_p, bool frame_window_p,
1502 ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
1504 ptrdiff_t pos1;
1505 bidi_type_t type;
1506 int ch;
1508 if (stop_at_pdi)
1510 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1511 at POS. Get past it. */
1512 #ifdef ENABLE_CHECKING
1513 ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
1514 frame_window_p, ch_len, nchars);
1515 type = bidi_get_type (ch, NEUTRAL_DIR);
1516 eassert (type == FSI /* || type == LRI || type == RLI */);
1517 #endif
1518 pos += *nchars;
1519 bytepos += *ch_len;
1521 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
1522 w, frame_window_p, ch_len, nchars);
1523 type = bidi_get_type (ch, NEUTRAL_DIR);
1525 pos1 = pos;
1526 for (pos += *nchars, bytepos += *ch_len;
1527 bidi_get_category (type) != STRONG
1528 /* If requested to stop at first PDI, stop there. */
1529 && !(stop_at_pdi && type == PDI)
1530 /* Stop when searched too far into an abnormally large
1531 paragraph full of weak or neutral characters. */
1532 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1533 type = bidi_get_type (ch, NEUTRAL_DIR))
1535 if (pos >= end)
1537 /* Pretend there's a paragraph separator at end of
1538 buffer/string. */
1539 type = NEUTRAL_B;
1540 break;
1542 if (!string_p
1543 && type == NEUTRAL_B
1544 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1545 break;
1546 /* Fetch next character and advance to get past it. */
1547 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
1548 string, w, frame_window_p,
1549 ch_len, nchars);
1550 pos += *nchars;
1551 bytepos += *ch_len;
1553 return type;
1556 /* Determine the base direction, a.k.a. base embedding level, of the
1557 paragraph we are about to iterate through. If DIR is either L2R or
1558 R2L, just use that. Otherwise, determine the paragraph direction
1559 from the first strong directional character of the paragraph.
1561 NO_DEFAULT_P means don't default to L2R if the paragraph
1562 has no strong directional characters and both DIR and
1563 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1564 in the buffer until a paragraph is found with a strong character,
1565 or until hitting BEGV. In the latter case, fall back to L2R. This
1566 flag is used in current-bidi-paragraph-direction.
1568 Note that this function gives the paragraph separator the same
1569 direction as the preceding paragraph, even though Emacs generally
1570 views the separator as not belonging to any paragraph. */
1571 void
1572 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1574 ptrdiff_t bytepos = bidi_it->bytepos;
1575 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1576 ptrdiff_t pstartbyte;
1577 /* Note that begbyte is a byte position, while end is a character
1578 position. Yes, this is ugly, but we are trying to avoid costly
1579 calls to BYTE_TO_CHAR and its ilk. */
1580 ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1581 ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1583 /* Special case for an empty buffer. */
1584 if (bytepos == begbyte && bidi_it->charpos == end)
1585 dir = L2R;
1586 /* We should never be called at EOB or before BEGV. */
1587 else if (bidi_it->charpos >= end || bytepos < begbyte)
1588 emacs_abort ();
1590 if (dir == L2R)
1592 bidi_it->paragraph_dir = L2R;
1593 bidi_it->new_paragraph = 0;
1595 else if (dir == R2L)
1597 bidi_it->paragraph_dir = R2L;
1598 bidi_it->new_paragraph = 0;
1600 else if (dir == NEUTRAL_DIR) /* P2 */
1602 ptrdiff_t ch_len, nchars;
1603 ptrdiff_t pos, disp_pos = -1;
1604 int disp_prop = 0;
1605 bidi_type_t type;
1606 const unsigned char *s;
1608 if (!bidi_initialized)
1609 bidi_initialize ();
1611 /* If we are inside a paragraph separator, we are just waiting
1612 for the separator to be exhausted; use the previous paragraph
1613 direction. But don't do that if we have been just reseated,
1614 because we need to reinitialize below in that case. */
1615 if (!bidi_it->first_elt
1616 && bidi_it->charpos < bidi_it->separator_limit)
1617 return;
1619 /* If we are on a newline, get past it to where the next
1620 paragraph might start. But don't do that at BEGV since then
1621 we are potentially in a new paragraph that doesn't yet
1622 exist. */
1623 pos = bidi_it->charpos;
1624 s = (STRINGP (bidi_it->string.lstring)
1625 ? SDATA (bidi_it->string.lstring)
1626 : bidi_it->string.s);
1627 if (bytepos > begbyte
1628 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1630 bytepos++;
1631 pos++;
1634 /* We are either at the beginning of a paragraph or in the
1635 middle of it. Find where this paragraph starts. */
1636 if (string_p)
1638 /* We don't support changes of paragraph direction inside a
1639 string. It is treated as a single paragraph. */
1640 pstartbyte = 0;
1642 else
1643 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1644 bidi_it->separator_limit = -1;
1645 bidi_it->new_paragraph = 0;
1647 /* The following loop is run more than once only if NO_DEFAULT_P,
1648 and only if we are iterating on a buffer. */
1649 do {
1650 bytepos = pstartbyte;
1651 if (!string_p)
1652 pos = BYTE_TO_CHAR (bytepos);
1653 type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
1654 &bidi_it->string, bidi_it->w,
1655 string_p, bidi_it->frame_window_p,
1656 &ch_len, &nchars, false);
1657 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1658 bidi_it->paragraph_dir = R2L;
1659 else if (type == STRONG_L)
1660 bidi_it->paragraph_dir = L2R;
1661 if (!string_p
1662 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1664 /* If this paragraph is at BEGV, default to L2R. */
1665 if (pstartbyte == BEGV_BYTE)
1666 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1667 else
1669 ptrdiff_t prevpbyte = pstartbyte;
1670 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1672 /* Find the beginning of the previous paragraph, if any. */
1673 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1675 /* FXIME: What if p is covered by a display
1676 string? See also a FIXME inside
1677 bidi_find_paragraph_start. */
1678 DEC_BOTH (p, pbyte);
1679 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1681 pstartbyte = prevpbyte;
1684 } while (!string_p
1685 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1687 else
1688 emacs_abort ();
1690 /* Contrary to UAX#9 clause P3, we only default the paragraph
1691 direction to L2R if we have no previous usable paragraph
1692 direction. This is allowed by the HL1 clause. */
1693 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1694 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1695 if (bidi_it->paragraph_dir == R2L)
1696 bidi_it->level_stack[0].level = 1;
1697 else
1698 bidi_it->level_stack[0].level = 0;
1700 bidi_line_init (bidi_it);
1704 /***********************************************************************
1705 Resolving explicit and implicit levels.
1706 The rest of this file constitutes the core of the UBA implementation.
1707 ***********************************************************************/
1709 static bool
1710 bidi_explicit_dir_char (int ch)
1712 bidi_type_t ch_type;
1714 if (!bidi_initialized)
1715 emacs_abort ();
1716 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1717 return (ch_type == LRE || ch_type == LRO
1718 || ch_type == RLE || ch_type == RLO
1719 || ch_type == PDF);
1722 /* Given an iterator state in BIDI_IT, advance one character position
1723 in the buffer/string to the next character (in the logical order),
1724 resolve any explicit embeddings, directional overrides, and isolate
1725 initiators and terminators, and return the embedding level of the
1726 character after resolving these explicit directives. */
1727 static int
1728 bidi_resolve_explicit (struct bidi_it *bidi_it)
1730 int curchar;
1731 bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
1732 int current_level;
1733 int new_level;
1734 bidi_dir_t override;
1735 bool isolate_status;
1736 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1737 ptrdiff_t ch_len, nchars, disp_pos, end;
1738 int disp_prop;
1740 /* Record the info about the previous character. */
1741 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
1742 && bidi_it->type != WEAK_BN)
1744 /* This special case is needed in support of Unicode 8.0
1745 correction to N0, as implemented in bidi_resolve_weak/W1
1746 below. */
1747 if (bidi_it->type_after_wn == NEUTRAL_ON
1748 && bidi_get_category (bidi_it->type) == STRONG
1749 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
1750 bidi_remember_char (&bidi_it->prev, bidi_it, 1);
1751 else
1752 bidi_remember_char (&bidi_it->prev, bidi_it, 0);
1754 if (bidi_it->type_after_wn == STRONG_R
1755 || bidi_it->type_after_wn == STRONG_L
1756 || bidi_it->type_after_wn == STRONG_AL)
1757 bidi_remember_char (&bidi_it->last_strong, bidi_it, 0);
1758 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1759 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1760 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it, 1);
1762 /* If we overstepped the characters used for resolving neutrals
1763 and whitespace, invalidate their info in the iterator. */
1764 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1765 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1766 if (bidi_it->next_en_pos >= 0
1767 && bidi_it->charpos >= bidi_it->next_en_pos)
1769 bidi_it->next_en_pos = 0;
1770 bidi_it->next_en_type = UNKNOWN_BT;
1773 /* Reset the bracket resolution info. */
1774 bidi_it->bracket_pairing_pos = -1;
1775 bidi_it->bracket_enclosed_type = UNKNOWN_BT;
1777 /* If reseat()'ed, don't advance, so as to start iteration from the
1778 position where we were reseated. bidi_it->bytepos can be less
1779 than BEGV_BYTE after reseat to BEGV. */
1780 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1781 || bidi_it->first_elt)
1783 bidi_it->first_elt = 0;
1784 if (string_p)
1786 const unsigned char *p
1787 = (STRINGP (bidi_it->string.lstring)
1788 ? SDATA (bidi_it->string.lstring)
1789 : bidi_it->string.s);
1791 if (bidi_it->charpos < 0)
1792 bidi_it->charpos = bidi_it->bytepos = 0;
1793 eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1794 bidi_it->charpos,
1795 bidi_it->string.unibyte));
1797 else
1799 if (bidi_it->charpos < BEGV)
1801 bidi_it->charpos = BEGV;
1802 bidi_it->bytepos = BEGV_BYTE;
1804 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1806 /* Determine the orginal bidi type of the previous character,
1807 which is needed for handling isolate initiators and PDF. The
1808 type of the previous character will only be non-trivial if
1809 our caller moved through some previous text in
1810 get_visually_first_element, in which case bidi_it->prev holds
1811 the information we want. */
1812 if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
1814 eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
1815 prev_type = bidi_it->prev.orig_type;
1816 if (prev_type == FSI)
1817 prev_type = bidi_it->type_after_wn;
1820 /* Don't move at end of buffer/string. */
1821 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1823 /* Advance to the next character, skipping characters covered by
1824 display strings (nchars > 1). */
1825 if (bidi_it->nchars <= 0)
1826 emacs_abort ();
1827 bidi_it->charpos += bidi_it->nchars;
1828 if (bidi_it->ch_len == 0)
1829 emacs_abort ();
1830 bidi_it->bytepos += bidi_it->ch_len;
1831 prev_type = bidi_it->orig_type;
1832 if (prev_type == FSI)
1833 prev_type = bidi_it->type_after_wn;
1835 else /* EOB or end of string */
1836 prev_type = NEUTRAL_B;
1838 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1839 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1840 isolate_status = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
1841 new_level = current_level;
1843 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1845 curchar = BIDI_EOB;
1846 bidi_it->ch_len = 1;
1847 bidi_it->nchars = 1;
1848 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1849 bidi_it->disp_prop = 0;
1851 else
1853 /* LRI, RLI, and FSI increment, and PDF decrements, the
1854 embedding level of the _following_ characters, so we must
1855 first look at the type of the previous character to support
1856 that. */
1857 switch (prev_type)
1859 case RLI: /* X5a */
1860 if (current_level < BIDI_MAXDEPTH
1861 && bidi_it->invalid_levels == 0
1862 && bidi_it->invalid_isolates == 0)
1864 new_level = ((current_level + 1) & ~1) + 1;
1865 bidi_it->isolate_level++;
1866 bidi_push_embedding_level (bidi_it, new_level,
1867 NEUTRAL_DIR, true);
1869 else
1870 bidi_it->invalid_isolates++;
1871 break;
1872 case LRI: /* X5b */
1873 if (current_level < BIDI_MAXDEPTH - 1
1874 && bidi_it->invalid_levels == 0
1875 && bidi_it->invalid_isolates == 0)
1877 new_level = ((current_level + 2) & ~1);
1878 bidi_it->isolate_level++;
1879 bidi_push_embedding_level (bidi_it, new_level,
1880 NEUTRAL_DIR, true);
1882 else
1883 bidi_it->invalid_isolates++;
1884 break;
1885 case PDF: /* X7 */
1886 if (!bidi_it->invalid_isolates)
1888 if (bidi_it->invalid_levels)
1889 bidi_it->invalid_levels--;
1890 else if (!isolate_status && bidi_it->stack_idx >= 1)
1891 new_level = bidi_pop_embedding_level (bidi_it);
1893 break;
1894 default:
1895 eassert (prev_type != FSI);
1896 /* Nothing. */
1897 break;
1899 /* Fetch the character at BYTEPOS. If it is covered by a
1900 display string, treat the entire run of covered characters as
1901 a single character u+FFFC. */
1902 curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
1903 &bidi_it->disp_pos, &bidi_it->disp_prop,
1904 &bidi_it->string, bidi_it->w,
1905 bidi_it->frame_window_p,
1906 &bidi_it->ch_len, &bidi_it->nchars);
1908 bidi_it->ch = curchar;
1909 bidi_it->resolved_level = new_level;
1911 /* Don't apply directional override here, as all the types we handle
1912 below will not be affected by the override anyway, and we need
1913 the original type unaltered. The override will be applied in
1914 bidi_resolve_weak. */
1915 type = bidi_get_type (curchar, NEUTRAL_DIR);
1916 bidi_it->orig_type = type;
1917 bidi_check_type (bidi_it->orig_type);
1919 bidi_it->type_after_wn = UNKNOWN_BT;
1921 switch (type)
1923 case RLE: /* X2 */
1924 case RLO: /* X4 */
1925 bidi_it->type_after_wn = type;
1926 bidi_check_type (bidi_it->type_after_wn);
1927 type = WEAK_BN; /* X9/Retaining */
1928 if (new_level < BIDI_MAXDEPTH
1929 && bidi_it->invalid_levels == 0
1930 && bidi_it->invalid_isolates == 0)
1932 /* Compute the least odd embedding level greater than
1933 the current level. */
1934 new_level = ((new_level + 1) & ~1) + 1;
1935 if (bidi_it->type_after_wn == RLE)
1936 override = NEUTRAL_DIR;
1937 else
1938 override = R2L;
1939 bidi_push_embedding_level (bidi_it, new_level, override, false);
1940 bidi_it->resolved_level = new_level;
1942 else
1944 if (bidi_it->invalid_isolates == 0)
1945 bidi_it->invalid_levels++;
1947 break;
1948 case LRE: /* X3 */
1949 case LRO: /* X5 */
1950 bidi_it->type_after_wn = type;
1951 bidi_check_type (bidi_it->type_after_wn);
1952 type = WEAK_BN; /* X9/Retaining */
1953 if (new_level < BIDI_MAXDEPTH - 1
1954 && bidi_it->invalid_levels == 0
1955 && bidi_it->invalid_isolates == 0)
1957 /* Compute the least even embedding level greater than
1958 the current level. */
1959 new_level = ((new_level + 2) & ~1);
1960 if (bidi_it->type_after_wn == LRE)
1961 override = NEUTRAL_DIR;
1962 else
1963 override = L2R;
1964 bidi_push_embedding_level (bidi_it, new_level, override, false);
1965 bidi_it->resolved_level = new_level;
1967 else
1969 if (bidi_it->invalid_isolates == 0)
1970 bidi_it->invalid_levels++;
1972 break;
1973 case FSI: /* X5c */
1974 end = string_p ? bidi_it->string.schars : ZV;
1975 disp_pos = bidi_it->disp_pos;
1976 disp_prop = bidi_it->disp_prop;
1977 nchars = bidi_it->nchars;
1978 ch_len = bidi_it->ch_len;
1979 typ1 = find_first_strong_char (bidi_it->charpos,
1980 bidi_it->bytepos, end,
1981 &disp_pos, &disp_prop,
1982 &bidi_it->string, bidi_it->w,
1983 string_p, bidi_it->frame_window_p,
1984 &ch_len, &nchars, true);
1985 if (typ1 != STRONG_R && typ1 != STRONG_AL)
1987 type = LRI;
1988 goto fsi_as_lri;
1990 else
1991 type = RLI;
1992 /* FALLTHROUGH */
1993 case RLI: /* X5a */
1994 if (override == NEUTRAL_DIR)
1995 bidi_it->type_after_wn = type;
1996 else /* Unicode 8.0 correction. */
1997 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
1998 bidi_check_type (bidi_it->type_after_wn);
1999 break;
2000 case LRI: /* X5b */
2001 fsi_as_lri:
2002 if (override == NEUTRAL_DIR)
2003 bidi_it->type_after_wn = type;
2004 else /* Unicode 8.0 correction. */
2005 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2006 bidi_check_type (bidi_it->type_after_wn);
2007 break;
2008 case PDI: /* X6a */
2009 if (bidi_it->invalid_isolates)
2010 bidi_it->invalid_isolates--;
2011 else if (bidi_it->isolate_level > 0)
2013 bidi_it->invalid_levels = 0;
2014 while (!bidi_it->level_stack[bidi_it->stack_idx].isolate_status)
2015 bidi_pop_embedding_level (bidi_it);
2016 eassert (bidi_it->stack_idx > 0);
2017 new_level = bidi_pop_embedding_level (bidi_it);
2018 bidi_it->isolate_level--;
2020 bidi_it->resolved_level = new_level;
2021 /* Unicode 8.0 correction. */
2022 if (bidi_it->level_stack[bidi_it->stack_idx].override == L2R)
2023 bidi_it->type_after_wn = STRONG_L;
2024 else if (bidi_it->level_stack[bidi_it->stack_idx].override == R2L)
2025 bidi_it->type_after_wn = STRONG_R;
2026 else
2027 bidi_it->type_after_wn = type;
2028 break;
2029 case PDF: /* X7 */
2030 bidi_it->type_after_wn = type;
2031 bidi_check_type (bidi_it->type_after_wn);
2032 type = WEAK_BN; /* X9/Retaining */
2033 break;
2034 default:
2035 /* Nothing. */
2036 break;
2039 bidi_it->type = type;
2040 bidi_check_type (bidi_it->type);
2042 if (bidi_it->type == NEUTRAL_B) /* X8 */
2044 bidi_set_paragraph_end (bidi_it);
2045 /* This is needed by bidi_resolve_weak below, and in L1. */
2046 bidi_it->type_after_wn = bidi_it->type;
2049 eassert (bidi_it->resolved_level >= 0);
2050 return bidi_it->resolved_level;
2053 /* Advance in the buffer/string, resolve weak types and return the
2054 type of the next character after weak type resolution. */
2055 static bidi_type_t
2056 bidi_resolve_weak (struct bidi_it *bidi_it)
2058 bidi_type_t type;
2059 bidi_dir_t override;
2060 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2061 int new_level = bidi_resolve_explicit (bidi_it);
2062 int next_char;
2063 bidi_type_t type_of_next;
2064 struct bidi_it saved_it;
2065 ptrdiff_t eob
2066 = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
2067 ? bidi_it->string.schars : ZV);
2069 type = bidi_it->type;
2070 override = bidi_it->level_stack[bidi_it->stack_idx].override;
2072 eassert (!(type == UNKNOWN_BT
2073 || type == LRE
2074 || type == LRO
2075 || type == RLE
2076 || type == RLO
2077 || type == PDF));
2079 eassert (prev_level >= 0);
2080 if (bidi_it->type == NEUTRAL_B)
2082 /* We've got a new isolating sequence, compute the directional
2083 type of sos and initialize per-run variables (UAX#9, clause
2084 X10). */
2085 bidi_set_sos_type (bidi_it, prev_level, new_level);
2087 if (type == NEUTRAL_S || type == NEUTRAL_WS
2088 || type == WEAK_BN || type == STRONG_AL)
2089 bidi_it->type_after_wn = type; /* needed in L1 */
2090 bidi_check_type (bidi_it->type_after_wn);
2092 /* Level and directional override status are already recorded in
2093 bidi_it, and do not need any change; see X6. */
2094 if (override == R2L) /* X6 */
2095 type = STRONG_R;
2096 else if (override == L2R)
2097 type = STRONG_L;
2098 else
2100 if (type == WEAK_NSM) /* W1 */
2102 /* Note that we don't need to consider the case where the
2103 prev character has its type overridden by an RLO or LRO,
2104 because then either the type of this NSM would have been
2105 also overridden, or the previous character is outside the
2106 current level run, and thus not relevant to this NSM.
2107 This is why NSM gets the type_after_wn of the previous
2108 character. */
2109 /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT. */
2110 if (bidi_it->prev.type != UNKNOWN_BT
2111 /* If type_after_wn is NEUTRAL_B, this NSM is at sos. */
2112 && bidi_it->prev.type != NEUTRAL_B)
2114 if (bidi_isolate_fmt_char (bidi_it->prev.type))
2116 /* From W1: "Note that in an isolating run sequence,
2117 an isolate initiator followed by an NSM or any
2118 type other than PDI must be an overflow isolate
2119 initiator." */
2120 eassert (bidi_it->invalid_isolates > 0);
2121 type = NEUTRAL_ON;
2123 else
2125 /* This includes the Unicode 8.0 correction for N0,
2126 due to how we set prev.type in bidi_resolve_explicit,
2127 which see. */
2128 type = bidi_it->prev.type;
2131 else if (bidi_it->sos == R2L)
2132 type = STRONG_R;
2133 else if (bidi_it->sos == L2R)
2134 type = STRONG_L;
2135 else /* shouldn't happen! */
2136 emacs_abort ();
2138 if (type == WEAK_EN /* W2 */
2139 && bidi_it->last_strong.type == STRONG_AL)
2140 type = WEAK_AN;
2141 else if (type == STRONG_AL) /* W3 */
2142 type = STRONG_R;
2143 else if ((type == WEAK_ES /* W4 */
2144 && bidi_it->prev.type == WEAK_EN
2145 && bidi_it->prev.orig_type == WEAK_EN)
2146 || (type == WEAK_CS
2147 && ((bidi_it->prev.type == WEAK_EN
2148 && bidi_it->prev.orig_type == WEAK_EN)
2149 || bidi_it->prev.type == WEAK_AN)))
2151 const unsigned char *s
2152 = (STRINGP (bidi_it->string.lstring)
2153 ? SDATA (bidi_it->string.lstring)
2154 : bidi_it->string.s);
2156 next_char = (bidi_it->charpos + bidi_it->nchars >= eob
2157 ? BIDI_EOB
2158 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
2159 s, bidi_it->string.unibyte));
2160 type_of_next = bidi_get_type (next_char, override);
2162 if (type_of_next == WEAK_BN
2163 || bidi_explicit_dir_char (next_char))
2165 bidi_copy_it (&saved_it, bidi_it);
2166 while (bidi_resolve_explicit (bidi_it) == new_level
2167 && bidi_it->type == WEAK_BN)
2168 type_of_next = bidi_it->type;
2169 bidi_copy_it (bidi_it, &saved_it);
2172 /* If the next character is EN, but the last strong-type
2173 character is AL, that next EN will be changed to AN when
2174 we process it in W2 above. So in that case, this ES
2175 should not be changed into EN. */
2176 if (type == WEAK_ES
2177 && type_of_next == WEAK_EN
2178 && bidi_it->last_strong.type != STRONG_AL)
2179 type = WEAK_EN;
2180 else if (type == WEAK_CS)
2182 if (bidi_it->prev.type == WEAK_AN
2183 && (type_of_next == WEAK_AN
2184 /* If the next character is EN, but the last
2185 strong-type character is AL, EN will be later
2186 changed to AN when we process it in W2 above.
2187 So in that case, this ES should not be
2188 changed into EN. */
2189 || (type_of_next == WEAK_EN
2190 && bidi_it->last_strong.type == STRONG_AL)))
2191 type = WEAK_AN;
2192 else if (bidi_it->prev.type == WEAK_EN
2193 && type_of_next == WEAK_EN
2194 && bidi_it->last_strong.type != STRONG_AL)
2195 type = WEAK_EN;
2198 else if (type == WEAK_ET /* W5: ET with EN before or after it */
2199 || type == WEAK_BN) /* W5/Retaining */
2201 if (bidi_it->prev.type == WEAK_EN) /* ET/BN w/EN before it */
2202 type = WEAK_EN;
2203 else if (bidi_it->next_en_pos > bidi_it->charpos
2204 && bidi_it->next_en_type != WEAK_BN)
2206 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
2207 type = WEAK_EN;
2209 else if (bidi_it->next_en_pos >=0)
2211 /* We overstepped the last known position for ET
2212 resolution but there could be other such characters
2213 in this paragraph (when we are sure there are no more
2214 such positions, we set next_en_pos to a negative
2215 value). Try to find the next position for ET
2216 resolution. */
2217 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
2218 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
2219 ? SDATA (bidi_it->string.lstring)
2220 : bidi_it->string.s);
2222 if (bidi_it->nchars <= 0)
2223 emacs_abort ();
2224 next_char
2225 = (bidi_it->charpos + bidi_it->nchars >= eob
2226 ? BIDI_EOB
2227 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
2228 bidi_it->string.unibyte));
2229 type_of_next = bidi_get_type (next_char, override);
2231 if (type_of_next == WEAK_ET
2232 || type_of_next == WEAK_BN
2233 || bidi_explicit_dir_char (next_char))
2235 bidi_copy_it (&saved_it, bidi_it);
2236 while (bidi_resolve_explicit (bidi_it) == new_level
2237 && (bidi_it->type == WEAK_BN
2238 || bidi_it->type == WEAK_ET))
2239 type_of_next = bidi_it->type;
2240 if (type == WEAK_BN
2241 && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
2243 /* If we entered the above loop with a BN that
2244 changes the level, the type of next
2245 character, which is in a different level, is
2246 not relevant to resolving this series of ET
2247 and BN. */
2248 en_pos = saved_it.charpos;
2249 type_of_next = type;
2251 else
2252 en_pos = bidi_it->charpos;
2253 bidi_copy_it (bidi_it, &saved_it);
2255 /* Remember this position, to speed up processing of the
2256 next ETs. */
2257 bidi_it->next_en_pos = en_pos;
2258 if (type_of_next == WEAK_EN)
2260 /* If the last strong character is AL, the EN we've
2261 found will become AN when we get to it (W2). */
2262 if (bidi_it->last_strong.type == STRONG_AL)
2263 type_of_next = WEAK_AN;
2264 else if (type == WEAK_BN)
2265 type = NEUTRAL_ON; /* W6/Retaining */
2266 else
2267 type = WEAK_EN;
2269 else if (type_of_next == NEUTRAL_B)
2270 /* Record the fact that there are no more ENs from
2271 here to the end of paragraph, to avoid entering the
2272 loop above ever again in this paragraph. */
2273 bidi_it->next_en_pos = -1;
2274 /* Record the type of the character where we ended our search. */
2275 bidi_it->next_en_type = type_of_next;
2280 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
2281 || (type == WEAK_BN
2282 && (bidi_it->prev.type == WEAK_CS /* W6/Retaining */
2283 || bidi_it->prev.type == WEAK_ES
2284 || bidi_it->prev.type == WEAK_ET)))
2285 type = NEUTRAL_ON;
2287 /* Store the type we've got so far, before we clobber it with strong
2288 types in W7 and while resolving neutral types. But leave alone
2289 the original types that were recorded above, because we will need
2290 them for the L1 clause. */
2291 if (bidi_it->type_after_wn == UNKNOWN_BT)
2292 bidi_it->type_after_wn = type;
2293 bidi_check_type (bidi_it->type_after_wn);
2295 if (type == WEAK_EN) /* W7 */
2297 if ((bidi_it->last_strong.type == STRONG_L)
2298 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
2299 type = STRONG_L;
2302 bidi_it->type = type;
2303 bidi_check_type (bidi_it->type);
2304 return type;
2307 /* Resolve the type of a neutral character according to the type of
2308 surrounding strong text and the current embedding level. */
2309 static bidi_type_t
2310 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2312 /* N1: "European and Arabic numbers act as if they were R in terms
2313 of their influence on NIs." */
2314 if (next_type == WEAK_EN || next_type == WEAK_AN)
2315 next_type = STRONG_R;
2316 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
2317 prev_type = STRONG_R;
2319 if (next_type == prev_type) /* N1 */
2320 return next_type;
2321 else if ((lev & 1) == 0) /* N2 */
2322 return STRONG_L;
2323 else
2324 return STRONG_R;
2327 #define FLAG_EMBEDDING_INSIDE 1
2328 #define FLAG_OPPOSITE_INSIDE 2
2330 /* A data type used in the stack maintained by
2331 bidi_find_bracket_pairs below. */
2332 typedef struct bpa_stack_entry {
2333 int close_bracket_char;
2334 int open_bracket_idx;
2335 #ifdef ENABLE_CHECKING
2336 ptrdiff_t open_bracket_pos;
2337 #endif
2338 unsigned flags : 2;
2339 } bpa_stack_entry;
2341 /* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2342 BPA stack, which should be more than enough for actual bidi text. */
2343 #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2345 #ifdef ENABLE_CHECKING
2346 # define STORE_BRACKET_CHARPOS \
2347 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2348 #else
2349 # define STORE_BRACKET_CHARPOS /* nothing */
2350 #endif
2352 #define PUSH_BPA_STACK \
2353 do { \
2354 bpa_sp++; \
2355 if (bpa_sp >= MAX_BPA_STACK) \
2357 bpa_sp = MAX_BPA_STACK - 1; \
2358 goto bpa_give_up; \
2360 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \
2361 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2362 bpa_stack[bpa_sp].flags = 0; \
2363 STORE_BRACKET_CHARPOS; \
2364 } while (0)
2367 /* This function implements BPA, the Bidi Parenthesis Algorithm,
2368 described in BD16 and N0 of UAX#9. It finds all the bracket pairs
2369 in the current isolating sequence, and records the enclosed type
2370 and the position of the matching bracket in the cache. It returns
2371 non-zero if called with the iterator on the opening bracket which
2372 has a matching closing bracket in the current isolating sequence,
2373 zero otherwise. */
2374 static bool
2375 bidi_find_bracket_pairs (struct bidi_it *bidi_it)
2377 bidi_bracket_type_t btype;
2378 bidi_type_t type = bidi_it->type;
2379 bool retval = false;
2381 /* When scanning backwards, we don't expect any unresolved bidi
2382 bracket characters. */
2383 if (bidi_it->scan_dir != 1)
2384 emacs_abort ();
2386 btype = bidi_paired_bracket_type (bidi_it->ch);
2387 if (btype == BIDI_BRACKET_OPEN)
2389 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2390 int bpa_sp = -1;
2391 struct bidi_it saved_it;
2392 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2393 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2394 struct bidi_it tem_it;
2396 eassert (MAX_BPA_STACK >= 100);
2397 bidi_copy_it (&saved_it, bidi_it);
2398 /* bidi_cache_iterator_state refuses to cache on backward scans,
2399 and bidi_cache_fetch_state doesn't bring scan_dir from the
2400 cache, so we must initialize this explicitly. */
2401 tem_it.scan_dir = 1;
2403 while (1)
2405 int old_sidx, new_sidx;
2406 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2408 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2409 if (btype == BIDI_BRACKET_OPEN)
2410 PUSH_BPA_STACK;
2411 else if (btype == BIDI_BRACKET_CLOSE)
2413 int sp = bpa_sp;
2414 int curchar = bidi_it->ch;
2416 eassert (sp >= 0);
2417 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2418 sp--;
2419 if (sp >= 0)
2421 /* Update and cache the corresponding opening bracket. */
2422 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2423 &tem_it);
2424 #ifdef ENABLE_CHECKING
2425 eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
2426 #endif
2427 /* Determine the enclosed type for this bracket
2428 pair's type resolution according to N0. */
2429 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
2430 tem_it.bracket_enclosed_type = embedding_type; /* N0b */
2431 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
2432 tem_it.bracket_enclosed_type /* N0c */
2433 = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
2434 else /* N0d */
2435 tem_it.bracket_enclosed_type = UNKNOWN_BT;
2437 /* Record the position of the matching closing
2438 bracket, and update the cache. */
2439 tem_it.bracket_pairing_pos = bidi_it->charpos;
2440 bidi_cache_iterator_state (&tem_it, 0, 1);
2442 /* Pop the BPA stack. */
2443 bpa_sp = sp - 1;
2445 if (bpa_sp < 0)
2447 retval = true;
2448 break;
2451 else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
2453 unsigned flag = 0;
2454 int sp;
2456 /* Whenever we see a strong type, update the flags of
2457 all the slots on the stack. */
2458 switch (bidi_it->type)
2460 case STRONG_L:
2461 flag = ((embedding_level & 1) == 0
2462 ? FLAG_EMBEDDING_INSIDE
2463 : FLAG_OPPOSITE_INSIDE);
2464 break;
2465 case STRONG_R:
2466 case WEAK_EN:
2467 case WEAK_AN:
2468 flag = ((embedding_level & 1) == 1
2469 ? FLAG_EMBEDDING_INSIDE
2470 : FLAG_OPPOSITE_INSIDE);
2471 break;
2472 default:
2473 break;
2475 for (sp = bpa_sp; sp >= 0; sp--)
2476 bpa_stack[sp].flags |= flag;
2478 old_sidx = bidi_it->stack_idx;
2479 type = bidi_resolve_weak (bidi_it);
2480 /* Skip level runs excluded from this isolating run sequence. */
2481 new_sidx = bidi_it->stack_idx;
2482 if (bidi_it->level_stack[new_sidx].level > current_level
2483 && (bidi_it->level_stack[new_sidx].isolate_status
2484 || (new_sidx > old_sidx + 1
2485 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2487 while (bidi_it->level_stack[bidi_it->stack_idx].level
2488 > current_level)
2490 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2491 type = bidi_resolve_weak (bidi_it);
2494 if (type == NEUTRAL_B
2495 || (bidi_it->level_stack[bidi_it->stack_idx].level
2496 != current_level))
2498 bpa_give_up:
2499 /* We've marched all the way to the end of this
2500 isolating run sequence, and didn't find matching
2501 closing brackets for some opening brackets. Leave
2502 their type unchanged. */
2503 break;
2505 if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
2506 btype = bidi_paired_bracket_type (bidi_it->ch);
2507 else
2508 btype = BIDI_BRACKET_NONE;
2511 /* Restore bidi_it from the cache, which should have the bracket
2512 resolution members set as determined by the above loop. */
2513 type = bidi_cache_find (saved_it.charpos, 1, bidi_it);
2514 eassert (type == NEUTRAL_ON);
2517 return retval;
2520 static bidi_type_t
2521 bidi_resolve_brackets (struct bidi_it *bidi_it)
2523 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2524 bool resolve_bracket = false;
2525 bidi_type_t type = UNKNOWN_BT;
2526 int ch;
2527 struct bidi_saved_info tem_info;
2529 bidi_remember_char (&tem_info, bidi_it, 1);
2530 if (!bidi_it->first_elt)
2532 type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 1, bidi_it);
2533 ch = bidi_it->ch;
2535 if (type == UNKNOWN_BT)
2537 type = bidi_resolve_weak (bidi_it);
2538 if (type == NEUTRAL_ON && bidi_find_bracket_pairs (bidi_it))
2539 resolve_bracket = true;
2541 else
2543 if (type == NEUTRAL_ON
2544 && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN)
2546 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level)
2548 if (bidi_it->bracket_pairing_pos > 0)
2550 /* A cached opening bracket that wasn't completely
2551 resolved yet. */
2552 resolve_bracket = true;
2555 else
2557 /* Higher levels were not BPA-resolved yet, even if
2558 cached by bidi_find_bracket_pairs. Lower levels were
2559 probably processed by bidi_find_bracket_pairs, but we
2560 have no easy way of retaining the prev_for_neutral
2561 from the previous level run of the isolating
2562 sequence. Force application of BPA now. */
2563 if (bidi_find_bracket_pairs (bidi_it))
2564 resolve_bracket = true;
2567 /* Keep track of the prev_for_neutral type, needed for resolving
2568 brackets below and for resolving neutrals in bidi_resolve_neutral. */
2569 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level
2570 && (tem_info.type == STRONG_L || tem_info.type == STRONG_R
2571 || tem_info.type == WEAK_AN || tem_info.type == WEAK_EN))
2572 bidi_it->prev_for_neutral = tem_info;
2575 /* If needed, resolve the bracket type according to N0. */
2576 if (resolve_bracket)
2578 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2579 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2581 eassert (bidi_it->prev_for_neutral.type != UNKNOWN_BT);
2582 eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
2583 if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
2584 type = embedding_type;
2585 else
2587 switch (bidi_it->prev_for_neutral.type)
2589 case STRONG_R:
2590 case WEAK_EN:
2591 case WEAK_AN:
2592 type =
2593 (bidi_it->bracket_enclosed_type == STRONG_R) /* N0c */
2594 ? STRONG_R /* N0c1 */
2595 : embedding_type; /* N0c2 */
2596 break;
2597 case STRONG_L:
2598 type =
2599 (bidi_it->bracket_enclosed_type == STRONG_L) /* N0c */
2600 ? STRONG_L /* N0c1 */
2601 : embedding_type; /* N0c2 */
2602 break;
2603 default:
2604 /* N0d: Do not set the type for that bracket pair. */
2605 break;
2608 eassert (type == STRONG_L || type == STRONG_R || type == NEUTRAL_ON);
2610 /* Update the type of the paired closing bracket to the same
2611 type as for the resolved opening bracket. */
2612 if (type != NEUTRAL_ON)
2614 ptrdiff_t idx = bidi_cache_search (bidi_it->bracket_pairing_pos,
2615 -1, 1);
2617 if (idx < bidi_cache_start)
2618 emacs_abort ();
2619 bidi_cache[idx].type = type;
2623 return type;
2626 static bidi_type_t
2627 bidi_resolve_neutral (struct bidi_it *bidi_it)
2629 bidi_type_t type = bidi_resolve_brackets (bidi_it);
2630 int current_level;
2631 bool is_neutral;
2633 eassert (type == STRONG_R
2634 || type == STRONG_L
2635 || type == WEAK_BN
2636 || type == WEAK_EN
2637 || type == WEAK_AN
2638 || type == NEUTRAL_B
2639 || type == NEUTRAL_S
2640 || type == NEUTRAL_WS
2641 || type == NEUTRAL_ON
2642 || type == LRI
2643 || type == RLI
2644 || type == PDI);
2646 current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2647 eassert (current_level >= 0);
2648 is_neutral = bidi_get_category (type) == NEUTRAL;
2650 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2651 we are already at paragraph end. */
2652 && (is_neutral || bidi_isolate_fmt_char (type)))
2653 /* N1-N2/Retaining */
2654 || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
2656 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
2657 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2658 bidi_it->next_for_neutral.type,
2659 current_level);
2660 /* The next two "else if" clauses are shortcuts for the
2661 important special case when we have a long sequence of
2662 neutral or WEAK_BN characters, such as whitespace or nulls or
2663 other control characters, on the base embedding level of the
2664 paragraph, and that sequence goes all the way to the end of
2665 the paragraph and follows a character whose resolved
2666 directionality is identical to the base embedding level.
2667 (This is what happens in a buffer with plain L2R text that
2668 happens to include long sequences of control characters.) By
2669 virtue of N1, the result of examining this long sequence will
2670 always be either STRONG_L or STRONG_R, depending on the base
2671 embedding level. So we use this fact directly instead of
2672 entering the expensive loop in the "else" clause. */
2673 else if (current_level == 0
2674 && bidi_it->prev_for_neutral.type == STRONG_L
2675 && !bidi_explicit_dir_char (bidi_it->ch)
2676 && !bidi_isolate_fmt_char (type))
2677 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2678 STRONG_L, current_level);
2679 else if (/* current level is 1 */
2680 current_level == 1
2681 /* base embedding level is also 1 */
2682 && bidi_it->level_stack[0].level == 1
2683 /* previous character is one of those considered R for
2684 the purposes of W5 */
2685 && (bidi_it->prev_for_neutral.type == STRONG_R
2686 || bidi_it->prev_for_neutral.type == WEAK_EN
2687 || bidi_it->prev_for_neutral.type == WEAK_AN)
2688 && !bidi_explicit_dir_char (bidi_it->ch)
2689 && !bidi_isolate_fmt_char (type))
2690 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2691 STRONG_R, current_level);
2692 else
2694 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
2695 the assumption of batch-style processing; see clauses W4,
2696 W5, and especially N1, which require to look far forward
2697 (as well as back) in the buffer/string. May the fleas of
2698 a thousand camels infest the armpits of those who design
2699 supposedly general-purpose algorithms by looking at their
2700 own implementations, and fail to consider other possible
2701 implementations! */
2702 struct bidi_it saved_it;
2703 bidi_type_t next_type;
2704 bool adjacent_to_neutrals = is_neutral;
2706 bidi_copy_it (&saved_it, bidi_it);
2707 /* Scan the text forward until we find the first non-neutral
2708 character, and then use that to resolve the neutral we
2709 are dealing with now. We also cache the scanned iterator
2710 states, to salvage some of the effort later. */
2711 do {
2712 int old_sidx, new_sidx;
2714 /* Paragraph separators have their levels fully resolved
2715 at this point, so cache them as resolved. */
2716 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2717 old_sidx = bidi_it->stack_idx;
2718 type = bidi_resolve_brackets (bidi_it);
2719 /* Skip level runs excluded from this isolating run sequence. */
2720 new_sidx = bidi_it->stack_idx;
2721 if (bidi_it->level_stack[new_sidx].level > current_level
2722 && (bidi_it->level_stack[new_sidx].isolate_status
2723 /* This is for when we have an isolate initiator
2724 immediately followed by an embedding or
2725 override initiator, in which case we get the
2726 level stack pushed twice by the single call to
2727 bidi_resolve_weak above. */
2728 || (new_sidx > old_sidx + 1
2729 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2731 while (bidi_it->level_stack[bidi_it->stack_idx].level
2732 > current_level)
2734 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2735 type = bidi_resolve_brackets (bidi_it);
2738 if (!adjacent_to_neutrals
2739 && (bidi_get_category (type) == NEUTRAL
2740 || bidi_isolate_fmt_char (type)))
2741 adjacent_to_neutrals = true;
2742 } while (!(type == NEUTRAL_B
2743 || (type != WEAK_BN
2744 && bidi_get_category (type) != NEUTRAL
2745 && !bidi_isolate_fmt_char (type))
2746 /* This is all per level run, so stop when we
2747 reach the end of this level run. */
2748 || (bidi_it->level_stack[bidi_it->stack_idx].level
2749 != current_level)));
2751 /* Record the character we stopped at. */
2752 bidi_remember_char (&saved_it.next_for_neutral, bidi_it, 1);
2754 if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
2755 || type == NEUTRAL_B)
2757 /* Marched all the way to the end of this level run. We
2758 need to use the eos type, whose information is stored
2759 by bidi_set_sos_type in the prev_for_neutral
2760 member. */
2761 if (adjacent_to_neutrals)
2762 next_type = bidi_it->prev_for_neutral.type;
2763 else
2765 /* This is a BN which does not adjoin neutrals.
2766 Leave its type alone. */
2767 bidi_copy_it (bidi_it, &saved_it);
2768 return bidi_it->type;
2771 else
2773 switch (type)
2775 case STRONG_L:
2776 case STRONG_R:
2777 case STRONG_AL:
2778 /* Actually, STRONG_AL cannot happen here, because
2779 bidi_resolve_weak converts it to STRONG_R, per W3. */
2780 eassert (type != STRONG_AL);
2781 next_type = type;
2782 break;
2783 case WEAK_EN:
2784 case WEAK_AN:
2785 /* N1: "European and Arabic numbers act as if they
2786 were R in terms of their influence on NIs." */
2787 next_type = STRONG_R;
2788 break;
2789 default:
2790 emacs_abort ();
2791 break;
2794 /* Resolve the type of all the NIs found during the above loop. */
2795 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
2796 next_type, current_level);
2797 /* Update next_for_neutral with the resolved type, so we
2798 could use it for all the other NIs up to the place where
2799 we exited the loop. */
2800 saved_it.next_for_neutral.type = next_type;
2801 bidi_check_type (type);
2802 /* Update the character which caused us to enter the above loop. */
2803 saved_it.type = type;
2804 bidi_check_type (next_type);
2805 bidi_copy_it (bidi_it, &saved_it);
2808 return type;
2811 /* Given an iterator state in BIDI_IT, advance one character position
2812 in the buffer/string to the next character (in the logical order),
2813 resolve the bidi type of that next character, and return that
2814 type. */
2815 static bidi_type_t
2816 bidi_type_of_next_char (struct bidi_it *bidi_it)
2818 bidi_type_t type;
2820 /* This should always be called during a forward scan. */
2821 if (bidi_it->scan_dir != 1)
2822 emacs_abort ();
2824 type = bidi_resolve_neutral (bidi_it);
2826 return type;
2829 /* Given an iterator state BIDI_IT, advance one character position in
2830 the buffer/string to the next character (in the current scan
2831 direction), resolve the embedding and implicit levels of that next
2832 character, and return the resulting level. */
2833 static int
2834 bidi_level_of_next_char (struct bidi_it *bidi_it)
2836 bidi_type_t type = UNKNOWN_BT;
2837 int level;
2838 ptrdiff_t next_char_pos = -2;
2840 if (bidi_it->scan_dir == 1)
2842 ptrdiff_t eob
2843 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2844 ? bidi_it->string.schars : ZV);
2846 /* There's no sense in trying to advance if we've already hit
2847 the end of text. */
2848 if (bidi_it->charpos >= eob)
2850 eassert (bidi_it->resolved_level >= 0);
2851 return bidi_it->resolved_level;
2855 /* Perhaps the character we want is already cached. If it is, the
2856 call to bidi_cache_find below will return a type other than
2857 UNKNOWN_BT. */
2858 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2860 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2861 ? 0 : 1);
2862 bidi_type_t prev_type = bidi_it->type;
2863 bidi_type_t type_for_neutral = bidi_it->next_for_neutral.type;
2864 ptrdiff_t pos_for_neutral = bidi_it->next_for_neutral.charpos;
2866 if (bidi_it->scan_dir > 0)
2868 if (bidi_it->nchars <= 0)
2869 emacs_abort ();
2870 next_char_pos = bidi_it->charpos + bidi_it->nchars;
2872 else if (bidi_it->charpos >= bob)
2873 /* Implementation note: we allow next_char_pos to be as low as
2874 0 for buffers or -1 for strings, and that is okay because
2875 that's the "position" of the sentinel iterator state we
2876 cached at the beginning of the iteration. */
2877 next_char_pos = bidi_it->charpos - 1;
2878 if (next_char_pos >= bob - 1)
2879 type = bidi_cache_find (next_char_pos, 0, bidi_it);
2881 /* For a sequence of BN and NI, copy the type from the previous
2882 character. This is because the loop in bidi_resolve_neutral
2883 that handles such sequences caches the characters it
2884 traverses, but does not (and cannot) store the
2885 next_for_neutral member for them, because it is only known
2886 when the loop ends. So when we find them in the cache, their
2887 type needs to be updated, but we don't have next_for_neutral
2888 to do that. However, whatever type is resolved as result of
2889 that loop, it will be the same for all the traversed
2890 characters, by virtue of N1 and N2. */
2891 if (type == WEAK_BN && bidi_it->scan_dir > 0
2892 && bidi_explicit_dir_char (bidi_it->ch)
2893 && type_for_neutral != UNKNOWN_BT
2894 && bidi_it->charpos < pos_for_neutral)
2896 type = prev_type;
2897 eassert (type != UNKNOWN_BT);
2899 if (type != UNKNOWN_BT)
2901 /* If resolved_level is -1, it means this state was cached
2902 before it was completely resolved, so we cannot return
2903 it. */
2904 if (bidi_it->resolved_level != -1)
2906 eassert (bidi_it->resolved_level >= 0);
2907 return bidi_it->resolved_level;
2909 else
2911 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2912 if (bidi_get_category (type) == NEUTRAL
2913 || bidi_isolate_fmt_char (type))
2915 /* Make sure the data for resolving neutrals we are
2916 about to use is valid. */
2917 if (bidi_it->next_for_neutral.charpos < bidi_it->charpos
2918 /* PDI defines an eos, so it's OK for it to
2919 serve as its own next_for_neutral. */
2920 || (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2921 && bidi_it->type != PDI)
2922 || bidi_it->next_for_neutral.type == UNKNOWN_BT)
2923 emacs_abort ();
2925 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2926 bidi_it->next_for_neutral.type,
2927 level);
2933 if (bidi_it->scan_dir == -1)
2934 /* If we are going backwards, the iterator state is already cached
2935 from previous scans, and should be fully resolved. */
2936 emacs_abort ();
2938 if (type == UNKNOWN_BT)
2939 type = bidi_type_of_next_char (bidi_it);
2941 if (type == NEUTRAL_B)
2943 eassert (bidi_it->resolved_level >= 0);
2944 return bidi_it->resolved_level;
2947 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2949 eassert ((type == STRONG_R
2950 || type == STRONG_L
2951 || type == WEAK_BN
2952 || type == WEAK_EN
2953 || type == WEAK_AN));
2954 bidi_it->type = type;
2955 bidi_check_type (bidi_it->type);
2957 /* For L1 below, we need to know, for each WS character, whether
2958 it belongs to a sequence of WS characters preceding a newline
2959 or a TAB or a paragraph separator. */
2960 if ((bidi_it->orig_type == NEUTRAL_WS
2961 || bidi_isolate_fmt_char (bidi_it->orig_type))
2962 && bidi_it->next_for_ws.charpos < bidi_it->charpos)
2964 int ch;
2965 ptrdiff_t clen = bidi_it->ch_len;
2966 ptrdiff_t bpos = bidi_it->bytepos;
2967 ptrdiff_t cpos = bidi_it->charpos;
2968 ptrdiff_t disp_pos = bidi_it->disp_pos;
2969 ptrdiff_t nc = bidi_it->nchars;
2970 struct bidi_string_data bs = bidi_it->string;
2971 bidi_type_t chtype;
2972 bool fwp = bidi_it->frame_window_p;
2973 int dpp = bidi_it->disp_prop;
2975 if (bidi_it->nchars <= 0)
2976 emacs_abort ();
2977 do {
2978 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
2979 bidi_it->w, fwp, &clen, &nc);
2980 chtype = bidi_get_type (ch, NEUTRAL_DIR);
2981 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2982 || bidi_isolate_fmt_char (chtype)
2983 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2984 bidi_it->next_for_ws.type = chtype;
2985 bidi_check_type (bidi_it->next_for_ws.type);
2986 bidi_it->next_for_ws.charpos = cpos;
2989 /* Update the cache, but only if this state was already cached. */
2990 bidi_cache_iterator_state (bidi_it, 1, 1);
2992 /* Resolve implicit levels. */
2993 if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2994 || bidi_it->orig_type == NEUTRAL_S
2995 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2996 || (bidi_it->orig_type == NEUTRAL_WS
2997 && (bidi_it->next_for_ws.type == NEUTRAL_B
2998 || bidi_it->next_for_ws.type == NEUTRAL_S)))
2999 level = bidi_it->level_stack[0].level;
3000 else if ((level & 1) == 0) /* I1 */
3002 if (type == STRONG_R)
3003 level++;
3004 else if (type == WEAK_EN || type == WEAK_AN)
3005 level += 2;
3007 else /* I2 */
3009 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
3010 level++;
3013 bidi_it->resolved_level = level;
3014 return level;
3017 /* Move to the other edge of a level given by LEVEL. If END_FLAG,
3018 we are at the end of a level, and we need to prepare to
3019 resume the scan of the lower level.
3021 If this level's other edge is cached, we simply jump to it, filling
3022 the iterator structure with the iterator state on the other edge.
3023 Otherwise, we walk the buffer or string until we come back to the
3024 same level as LEVEL.
3026 Note: we are not talking here about a ``level run'' in the UAX#9
3027 sense of the term, but rather about a ``level'' which includes
3028 all the levels higher than it. In other words, given the levels
3029 like this:
3031 11111112222222333333334443343222222111111112223322111
3032 A B C
3034 and assuming we are at point A scanning left to right, this
3035 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
3036 at point B. */
3037 static void
3038 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
3040 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
3041 ptrdiff_t idx;
3043 /* Try the cache first. */
3044 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
3045 >= bidi_cache_start)
3046 bidi_cache_fetch_state (idx, bidi_it);
3047 else
3049 int new_level;
3051 /* If we are at end of level, its edges must be cached. */
3052 if (end_flag)
3053 emacs_abort ();
3055 bidi_cache_iterator_state (bidi_it, 1, 0);
3056 do {
3057 new_level = bidi_level_of_next_char (bidi_it);
3058 bidi_cache_iterator_state (bidi_it, 1, 0);
3059 } while (new_level >= level);
3063 void
3064 bidi_move_to_visually_next (struct bidi_it *bidi_it)
3066 int old_level, new_level, next_level;
3067 struct bidi_it sentinel;
3068 struct gcpro gcpro1;
3070 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
3071 emacs_abort ();
3073 if (bidi_it->scan_dir == 0)
3075 bidi_it->scan_dir = 1; /* default to logical order */
3078 /* The code below can call eval, and thus cause GC. If we are
3079 iterating a Lisp string, make sure it won't be GCed. */
3080 if (STRINGP (bidi_it->string.lstring))
3081 GCPRO1 (bidi_it->string.lstring);
3083 /* If we just passed a newline, initialize for the next line. */
3084 if (!bidi_it->first_elt
3085 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3086 bidi_line_init (bidi_it);
3088 /* Prepare the sentinel iterator state, and cache it. When we bump
3089 into it, scanning backwards, we'll know that the last non-base
3090 level is exhausted. */
3091 if (bidi_cache_idx == bidi_cache_start)
3093 bidi_copy_it (&sentinel, bidi_it);
3094 if (bidi_it->first_elt)
3096 sentinel.charpos--; /* cached charpos needs to be monotonic */
3097 sentinel.bytepos--;
3098 sentinel.ch = '\n'; /* doesn't matter, but why not? */
3099 sentinel.ch_len = 1;
3100 sentinel.nchars = 1;
3102 bidi_cache_iterator_state (&sentinel, 1, 0);
3105 old_level = bidi_it->resolved_level;
3106 new_level = bidi_level_of_next_char (bidi_it);
3108 /* Reordering of resolved levels (clause L2) is implemented by
3109 jumping to the other edge of the level and flipping direction of
3110 scanning the text whenever we find a level change. */
3111 if (new_level != old_level)
3113 bool ascending = new_level > old_level;
3114 int level_to_search = ascending ? old_level + 1 : old_level;
3115 int incr = ascending ? 1 : -1;
3116 int expected_next_level = old_level + incr;
3118 /* Jump (or walk) to the other edge of this level. */
3119 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3120 /* Switch scan direction and peek at the next character in the
3121 new direction. */
3122 bidi_it->scan_dir = -bidi_it->scan_dir;
3124 /* The following loop handles the case where the resolved level
3125 jumps by more than one. This is typical for numbers inside a
3126 run of text with left-to-right embedding direction, but can
3127 also happen in other situations. In those cases the decision
3128 where to continue after a level change, and in what direction,
3129 is tricky. For example, given a text like below:
3131 abcdefgh
3132 11336622
3134 (where the numbers below the text show the resolved levels),
3135 the result of reordering according to UAX#9 should be this:
3137 efdcghba
3139 This is implemented by the loop below which flips direction
3140 and jumps to the other edge of the level each time it finds
3141 the new level not to be the expected one. The expected level
3142 is always one more or one less than the previous one. */
3143 next_level = bidi_peek_at_next_level (bidi_it);
3144 while (next_level != expected_next_level)
3146 /* If next_level is -1, it means we have an unresolved level
3147 in the cache, which at this point should not happen. If
3148 it does, we will infloop. */
3149 eassert (next_level >= 0);
3150 /* If next_level is not consistent with incr, we might
3151 infloop. */
3152 eassert (incr > 0
3153 ? next_level > expected_next_level
3154 : next_level < expected_next_level);
3155 expected_next_level += incr;
3156 level_to_search += incr;
3157 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
3158 bidi_it->scan_dir = -bidi_it->scan_dir;
3159 next_level = bidi_peek_at_next_level (bidi_it);
3162 /* Finally, deliver the next character in the new direction. */
3163 next_level = bidi_level_of_next_char (bidi_it);
3166 /* Take note when we have just processed the newline that precedes
3167 the end of the paragraph. The next time we are about to be
3168 called, set_iterator_to_next will automatically reinit the
3169 paragraph direction, if needed. We do this at the newline before
3170 the paragraph separator, because the next character might not be
3171 the first character of the next paragraph, due to the bidi
3172 reordering, whereas we _must_ know the paragraph base direction
3173 _before_ we process the paragraph's text, since the base
3174 direction affects the reordering. */
3175 if (bidi_it->scan_dir == 1
3176 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
3178 /* The paragraph direction of the entire string, once
3179 determined, is in effect for the entire string. Setting the
3180 separator limit to the end of the string prevents
3181 bidi_paragraph_init from being called automatically on this
3182 string. */
3183 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
3184 bidi_it->separator_limit = bidi_it->string.schars;
3185 else if (bidi_it->bytepos < ZV_BYTE)
3187 ptrdiff_t sep_len
3188 = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
3189 bidi_it->bytepos + bidi_it->ch_len);
3190 if (bidi_it->nchars <= 0)
3191 emacs_abort ();
3192 if (sep_len >= 0)
3194 bidi_it->new_paragraph = 1;
3195 /* Record the buffer position of the last character of the
3196 paragraph separator. */
3197 bidi_it->separator_limit
3198 = bidi_it->charpos + bidi_it->nchars + sep_len;
3203 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
3205 /* If we are at paragraph's base embedding level and beyond the
3206 last cached position, the cache's job is done and we can
3207 discard it. */
3208 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
3209 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
3210 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
3211 bidi_cache_reset ();
3212 /* But as long as we are caching during forward scan, we must
3213 cache each state, or else the cache integrity will be
3214 compromised: it assumes cached states correspond to buffer
3215 positions 1:1. */
3216 else
3217 bidi_cache_iterator_state (bidi_it, 1, 0);
3220 eassert (bidi_it->resolved_level >= 0
3221 && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
3223 if (STRINGP (bidi_it->string.lstring))
3224 UNGCPRO;
3227 /* This is meant to be called from within the debugger, whenever you
3228 wish to examine the cache contents. */
3229 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
3230 void
3231 bidi_dump_cached_states (void)
3233 ptrdiff_t i;
3234 int ndigits = 1;
3236 if (bidi_cache_idx == 0)
3238 fprintf (stderr, "The cache is empty.\n");
3239 return;
3241 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
3242 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
3244 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
3245 ndigits++;
3246 fputs ("ch ", stderr);
3247 for (i = 0; i < bidi_cache_idx; i++)
3248 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
3249 fputs ("\n", stderr);
3250 fputs ("lvl ", stderr);
3251 for (i = 0; i < bidi_cache_idx; i++)
3252 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
3253 fputs ("\n", stderr);
3254 fputs ("pos ", stderr);
3255 for (i = 0; i < bidi_cache_idx; i++)
3256 fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
3257 fputs ("\n", stderr);