src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59
  60 #include "lisp.h"
  61 #include "character.h"
  62 #include "buffer.h"
  63 #include "dispextern.h"
  64
  65 static bool bidi_initialized = 0;
  66
  67 static Lisp_Object bidi_type_table, bidi_mirror_table;
  68
  69 #define LRM_CHAR   0x200E
  70 #define RLM_CHAR   0x200F
  71 #define BIDI_EOB   -1
  72
  73 /* Data type for describing the bidirectional character categories.  */
  74 typedef enum {
  75   UNKNOWN_BC,
  76   NEUTRAL,
  77   WEAK,
  78   STRONG
  79 } bidi_category_t;
  80
  81 /* UAX#9 says to search only for L, AL, or R types of characters, and
  82    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  83    level.  Yudit indeed ignores them.  This variable is therefore set
  84    by default to ignore them, but clearing it will take them into
  85    account.  */
  86 extern bool bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  87 bool bidi_ignore_explicit_marks_for_paragraph_level = 1;
  88
  89 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  90 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  91
  92 \f
  93 /***********************************************************************
  94                         Utilities
  95  ***********************************************************************/
  96
  97 /* Return the bidi type of a character CH, subject to the current
  98    directional OVERRIDE.  */
  99 static bidi_type_t
 100 bidi_get_type (int ch, bidi_dir_t override)
 101 {
 102   bidi_type_t default_type;
 103
 104   if (ch == BIDI_EOB)
 105     return NEUTRAL_B;
 106   if (ch < 0 || ch > MAX_CHAR)
 107     emacs_abort ();
 108
 109   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 110   /* Every valid character code, even those that are unassigned by the
 111      UCD, have some bidi-class property, according to
 112      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 113      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 114   if (default_type == UNKNOWN_BT)
 115     emacs_abort ();
 116
 117   if (override == NEUTRAL_DIR)
 118     return default_type;
 119
 120   switch (default_type)
 121     {
 122       /* Although UAX#9 does not tell, it doesn't make sense to
 123          override NEUTRAL_B and LRM/RLM characters.  */
 124       case NEUTRAL_B:
 125       case LRE:
 126       case LRO:
 127       case RLE:
 128       case RLO:
 129       case PDF:
 130         return default_type;
 131       default:
 132         switch (ch)
 133           {
 134             case LRM_CHAR:
 135             case RLM_CHAR:
 136               return default_type;
 137             default:
 138               if (override == L2R) /* X6 */
 139                 return STRONG_L;
 140               else if (override == R2L)
 141                 return STRONG_R;
 142               else
 143                 emacs_abort (); /* can't happen: handled above */
 144           }
 145     }
 146 }
 147
 148 static void
 149 bidi_check_type (bidi_type_t type)
 150 {
 151   eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 152 }
 153
 154 /* Given a bidi TYPE of a character, return its category.  */
 155 static bidi_category_t
 156 bidi_get_category (bidi_type_t type)
 157 {
 158   switch (type)
 159     {
 160       case UNKNOWN_BT:
 161         return UNKNOWN_BC;
 162       case STRONG_L:
 163       case STRONG_R:
 164       case STRONG_AL:
 165       case LRE:
 166       case LRO:
 167       case RLE:
 168       case RLO:
 169         return STRONG;
 170       case PDF:         /* ??? really?? */
 171       case WEAK_EN:
 172       case WEAK_ES:
 173       case WEAK_ET:
 174       case WEAK_AN:
 175       case WEAK_CS:
 176       case WEAK_NSM:
 177       case WEAK_BN:
 178         return WEAK;
 179       case NEUTRAL_B:
 180       case NEUTRAL_S:
 181       case NEUTRAL_WS:
 182       case NEUTRAL_ON:
 183         return NEUTRAL;
 184       default:
 185         emacs_abort ();
 186     }
 187 }
 188
 189 /* Return the mirrored character of C, if it has one.  If C has no
 190    mirrored counterpart, return C.
 191    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 192    context must be tested by the caller.  */
 193 int
 194 bidi_mirror_char (int c)
 195 {
 196   Lisp_Object val;
 197
 198   if (c == BIDI_EOB)
 199     return c;
 200   if (c < 0 || c > MAX_CHAR)
 201     emacs_abort ();
 202
 203   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 204   if (INTEGERP (val))
 205     {
 206       int v;
 207
 208       /* When debugging, check before assigning to V, so that the check
 209          isn't broken by undefined behavior due to int overflow.  */
 210       eassert (CHAR_VALID_P (XINT (val)));
 211
 212       v = XINT (val);
 213
 214       /* Minimal test we must do in optimized builds, to prevent weird
 215          crashes further down the road.  */
 216       if (v < 0 || v > MAX_CHAR)
 217         emacs_abort ();
 218
 219       return v;
 220     }
 221
 222   return c;
 223 }
 224
 225 /* Determine the start-of-run (sor) directional type given the two
 226    embedding levels on either side of the run boundary.  Also, update
 227    the saved info about previously seen characters, since that info is
 228    generally valid for a single level run.  */
 229 static void
 230 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 231 {
 232   int higher_level = (level_before > level_after ? level_before : level_after);
 233
 234   /* The prev_was_pdf gork is required for when we have several PDFs
 235      in a row.  In that case, we want to compute the sor type for the
 236      next level run only once: when we see the first PDF.  That's
 237      because the sor type depends only on the higher of the two levels
 238      that we find on the two sides of the level boundary (see UAX#9,
 239      clause X10), and so we don't need to know the final embedding
 240      level to which we descend after processing all the PDFs.  */
 241   if (!bidi_it->prev_was_pdf || level_before < level_after)
 242     /* FIXME: should the default sor direction be user selectable?  */
 243     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 244   if (level_before > level_after)
 245     bidi_it->prev_was_pdf = 1;
 246
 247   bidi_it->prev.type = UNKNOWN_BT;
 248   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 249     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 250   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 251   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 252   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 253   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 254     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 255   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 256 }
 257
 258 /* Push the current embedding level and override status; reset the
 259    current level to LEVEL and the current override status to OVERRIDE.  */
 260 static void
 261 bidi_push_embedding_level (struct bidi_it *bidi_it,
 262                            int level, bidi_dir_t override)
 263 {
 264   bidi_it->stack_idx++;
 265   eassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 266   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 267   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 268 }
 269
 270 /* Pop the embedding level and directional override status from the
 271    stack, and return the new level.  */
 272 static int
 273 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 274 {
 275   /* UAX#9 says to ignore invalid PDFs.  */
 276   if (bidi_it->stack_idx > 0)
 277     bidi_it->stack_idx--;
 278   return bidi_it->level_stack[bidi_it->stack_idx].level;
 279 }
 280
 281 /* Record in SAVED_INFO the information about the current character.  */
 282 static void
 283 bidi_remember_char (struct bidi_saved_info *saved_info,
 284                     struct bidi_it *bidi_it)
 285 {
 286   saved_info->charpos = bidi_it->charpos;
 287   saved_info->bytepos = bidi_it->bytepos;
 288   saved_info->type = bidi_it->type;
 289   bidi_check_type (bidi_it->type);
 290   saved_info->type_after_w1 = bidi_it->type_after_w1;
 291   bidi_check_type (bidi_it->type_after_w1);
 292   saved_info->orig_type = bidi_it->orig_type;
 293   bidi_check_type (bidi_it->orig_type);
 294 }
 295
 296 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 297    copies the part of the level stack that is actually in use.  */
 298 static void
 299 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 300 {
 301   /* Copy everything from the start through the active part of
 302      the level stack.  */
 303   memcpy (to, from,
 304           (offsetof (struct bidi_it, level_stack[1])
 305            + from->stack_idx * sizeof from->level_stack[0]));
 306 }
 307
 308 \f
 309 /***********************************************************************
 310                         Caching the bidi iterator states
 311  ***********************************************************************/
 312
 313 #define BIDI_CACHE_CHUNK 200
 314 static struct bidi_it *bidi_cache;
 315 static ptrdiff_t bidi_cache_size = 0;
 316 enum { elsz = sizeof (struct bidi_it) };
 317 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 318 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 319 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 320                                            "stack" level */
 321
 322 /* 5-slot stack for saving the start of the previous level of the
 323    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 324    and we need the same size of our stack.  */
 325 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 326 static int bidi_cache_sp;
 327
 328 /* Size of header used by bidi_shelve_cache.  */
 329 enum
 330   {
 331     bidi_shelve_header_size
 332       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 333          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 334          + sizeof (bidi_cache_last_idx))
 335   };
 336
 337 /* Reset the cache state to the empty state.  We only reset the part
 338    of the cache relevant to iteration of the current object.  Previous
 339    objects, which are pushed on the display iterator's stack, are left
 340    intact.  This is called when the cached information is no more
 341    useful for the current iteration, e.g. when we were reseated to a
 342    new position on the same object.  */
 343 static void
 344 bidi_cache_reset (void)
 345 {
 346   bidi_cache_idx = bidi_cache_start;
 347   bidi_cache_last_idx = -1;
 348 }
 349
 350 /* Shrink the cache to its minimal size.  Called when we init the bidi
 351    iterator for reordering a buffer or a string that does not come
 352    from display properties, because that means all the previously
 353    cached info is of no further use.  */
 354 static void
 355 bidi_cache_shrink (void)
 356 {
 357   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 358     {
 359       bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 360       bidi_cache_size = BIDI_CACHE_CHUNK;
 361     }
 362   bidi_cache_reset ();
 363 }
 364
 365 static void
 366 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 367 {
 368   int current_scan_dir = bidi_it->scan_dir;
 369
 370   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 371     emacs_abort ();
 372
 373   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 374   bidi_it->scan_dir = current_scan_dir;
 375   bidi_cache_last_idx = idx;
 376 }
 377
 378 /* Find a cached state with a given CHARPOS and resolved embedding
 379    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 380    resolved levels in cached states.  DIR, if non-zero, means search
 381    in that direction from the last cache hit.  */
 382 static ptrdiff_t
 383 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 384 {
 385   ptrdiff_t i, i_start;
 386
 387   if (bidi_cache_idx > bidi_cache_start)
 388     {
 389       if (bidi_cache_last_idx == -1)
 390         bidi_cache_last_idx = bidi_cache_idx - 1;
 391       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 392         {
 393           dir = -1;
 394           i_start = bidi_cache_last_idx - 1;
 395         }
 396       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 397                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 398         {
 399           dir = 1;
 400           i_start = bidi_cache_last_idx + 1;
 401         }
 402       else if (dir)
 403         i_start = bidi_cache_last_idx;
 404       else
 405         {
 406           dir = -1;
 407           i_start = bidi_cache_idx - 1;
 408         }
 409
 410       if (dir < 0)
 411         {
 412           /* Linear search for now; FIXME!  */
 413           for (i = i_start; i >= bidi_cache_start; i--)
 414             if (bidi_cache[i].charpos <= charpos
 415                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 416                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 417               return i;
 418         }
 419       else
 420         {
 421           for (i = i_start; i < bidi_cache_idx; i++)
 422             if (bidi_cache[i].charpos <= charpos
 423                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 424                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 425               return i;
 426         }
 427     }
 428
 429   return -1;
 430 }
 431
 432 /* Find a cached state where the resolved level changes to a value
 433    that is lower than LEVEL, and return its cache slot index.  DIR is
 434    the direction to search, starting with the last used cache slot.
 435    If DIR is zero, we search backwards from the last occupied cache
 436    slot.  BEFORE means return the index of the slot that
 437    is ``before'' the level change in the search direction.  That is,
 438    given the cached levels like this:
 439
 440          1122333442211
 441           AB        C
 442
 443    and assuming we are at the position cached at the slot marked with
 444    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 445    index of slot B or A, depending whether BEFORE is, respectively,
 446    true or false.  */
 447 static ptrdiff_t
 448 bidi_cache_find_level_change (int level, int dir, bool before)
 449 {
 450   if (bidi_cache_idx)
 451     {
 452       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 453       int incr = before ? 1 : 0;
 454
 455       eassert (!dir || bidi_cache_last_idx >= 0);
 456
 457       if (!dir)
 458         dir = -1;
 459       else if (!incr)
 460         i += dir;
 461
 462       if (dir < 0)
 463         {
 464           while (i >= bidi_cache_start + incr)
 465             {
 466               if (bidi_cache[i - incr].resolved_level >= 0
 467                   && bidi_cache[i - incr].resolved_level < level)
 468                 return i;
 469               i--;
 470             }
 471         }
 472       else
 473         {
 474           while (i < bidi_cache_idx - incr)
 475             {
 476               if (bidi_cache[i + incr].resolved_level >= 0
 477                   && bidi_cache[i + incr].resolved_level < level)
 478                 return i;
 479               i++;
 480             }
 481         }
 482     }
 483
 484   return -1;
 485 }
 486
 487 static void
 488 bidi_cache_ensure_space (ptrdiff_t idx)
 489 {
 490   /* Enlarge the cache as needed.  */
 491   if (idx >= bidi_cache_size)
 492     {
 493       /* The bidi cache cannot be larger than the largest Lisp string
 494          or buffer.  */
 495       ptrdiff_t string_or_buffer_bound
 496         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 497
 498       /* Also, it cannot be larger than what C can represent.  */
 499       ptrdiff_t c_bound
 500         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 501
 502       bidi_cache
 503         = xpalloc (bidi_cache, &bidi_cache_size,
 504                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 505                    min (string_or_buffer_bound, c_bound), elsz);
 506     }
 507 }
 508
 509 static void
 510 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
 511 {
 512   ptrdiff_t idx;
 513
 514   /* We should never cache on backward scans.  */
 515   if (bidi_it->scan_dir == -1)
 516     emacs_abort ();
 517   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 518
 519   if (idx < 0)
 520     {
 521       idx = bidi_cache_idx;
 522       bidi_cache_ensure_space (idx);
 523       /* Character positions should correspond to cache positions 1:1.
 524          If we are outside the range of cached positions, the cache is
 525          useless and must be reset.  */
 526       if (idx > bidi_cache_start &&
 527           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 528                                + bidi_cache[idx - 1].nchars)
 529            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 530         {
 531           bidi_cache_reset ();
 532           idx = bidi_cache_start;
 533         }
 534       if (bidi_it->nchars <= 0)
 535         emacs_abort ();
 536       bidi_copy_it (&bidi_cache[idx], bidi_it);
 537       if (!resolved)
 538         bidi_cache[idx].resolved_level = -1;
 539     }
 540   else
 541     {
 542       /* Copy only the members which could have changed, to avoid
 543          costly copying of the entire struct.  */
 544       bidi_cache[idx].type = bidi_it->type;
 545       bidi_check_type (bidi_it->type);
 546       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 547       bidi_check_type (bidi_it->type_after_w1);
 548       if (resolved)
 549         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 550       else
 551         bidi_cache[idx].resolved_level = -1;
 552       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 553       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 554       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 555       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 556       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 557       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 558       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 559     }
 560
 561   bidi_cache_last_idx = idx;
 562   if (idx >= bidi_cache_idx)
 563     bidi_cache_idx = idx + 1;
 564 }
 565
 566 static bidi_type_t
 567 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 568 {
 569   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 570
 571   if (i >= bidi_cache_start)
 572     {
 573       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 574
 575       bidi_copy_it (bidi_it, &bidi_cache[i]);
 576       bidi_cache_last_idx = i;
 577       /* Don't let scan direction from the cached state override
 578          the current scan direction.  */
 579       bidi_it->scan_dir = current_scan_dir;
 580       return bidi_it->type;
 581     }
 582
 583   return UNKNOWN_BT;
 584 }
 585
 586 static int
 587 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 588 {
 589   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 590     emacs_abort ();
 591   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 592 }
 593
 594 \f
 595 /***********************************************************************
 596              Pushing and popping the bidi iterator state
 597  ***********************************************************************/
 598
 599 /* Push the bidi iterator state in preparation for reordering a
 600    different object, e.g. display string found at certain buffer
 601    position.  Pushing the bidi iterator boils down to saving its
 602    entire state on the cache and starting a new cache "stacked" on top
 603    of the current cache.  */
 604 void
 605 bidi_push_it (struct bidi_it *bidi_it)
 606 {
 607   /* Save the current iterator state in its entirety after the last
 608      used cache slot.  */
 609   bidi_cache_ensure_space (bidi_cache_idx);
 610   bidi_cache[bidi_cache_idx++] = *bidi_it;
 611
 612   /* Push the current cache start onto the stack.  */
 613   eassert (bidi_cache_sp < IT_STACK_SIZE);
 614   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 615
 616   /* Start a new level of cache, and make it empty.  */
 617   bidi_cache_start = bidi_cache_idx;
 618   bidi_cache_last_idx = -1;
 619 }
 620
 621 /* Restore the iterator state saved by bidi_push_it and return the
 622    cache to the corresponding state.  */
 623 void
 624 bidi_pop_it (struct bidi_it *bidi_it)
 625 {
 626   if (bidi_cache_start <= 0)
 627     emacs_abort ();
 628
 629   /* Reset the next free cache slot index to what it was before the
 630      call to bidi_push_it.  */
 631   bidi_cache_idx = bidi_cache_start - 1;
 632
 633   /* Restore the bidi iterator state saved in the cache.  */
 634   *bidi_it = bidi_cache[bidi_cache_idx];
 635
 636   /* Pop the previous cache start from the stack.  */
 637   if (bidi_cache_sp <= 0)
 638     emacs_abort ();
 639   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 640
 641   /* Invalidate the last-used cache slot data.  */
 642   bidi_cache_last_idx = -1;
 643 }
 644
 645 static ptrdiff_t bidi_cache_total_alloc;
 646
 647 /* Stash away a copy of the cache and its control variables.  */
 648 void *
 649 bidi_shelve_cache (void)
 650 {
 651   unsigned char *databuf;
 652   ptrdiff_t alloc;
 653
 654   /* Empty cache.  */
 655   if (bidi_cache_idx == 0)
 656     return NULL;
 657
 658   alloc = (bidi_shelve_header_size
 659            + bidi_cache_idx * sizeof (struct bidi_it));
 660   databuf = xmalloc (alloc);
 661   bidi_cache_total_alloc += alloc;
 662
 663   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 664   memcpy (databuf + sizeof (bidi_cache_idx),
 665           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 666   memcpy (databuf + sizeof (bidi_cache_idx)
 667           + bidi_cache_idx * sizeof (struct bidi_it),
 668           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 669   memcpy (databuf + sizeof (bidi_cache_idx)
 670           + bidi_cache_idx * sizeof (struct bidi_it)
 671           + sizeof (bidi_cache_start_stack),
 672           &bidi_cache_sp, sizeof (bidi_cache_sp));
 673   memcpy (databuf + sizeof (bidi_cache_idx)
 674           + bidi_cache_idx * sizeof (struct bidi_it)
 675           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 676           &bidi_cache_start, sizeof (bidi_cache_start));
 677   memcpy (databuf + sizeof (bidi_cache_idx)
 678           + bidi_cache_idx * sizeof (struct bidi_it)
 679           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 680           + sizeof (bidi_cache_start),
 681           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 682
 683   return databuf;
 684 }
 685
 686 /* Restore the cache state from a copy stashed away by
 687    bidi_shelve_cache, and free the buffer used to stash that copy.
 688    JUST_FREE means free the buffer, but don't restore the
 689    cache; used when the corresponding iterator is discarded instead of
 690    being restored.  */
 691 void
 692 bidi_unshelve_cache (void *databuf, bool just_free)
 693 {
 694   unsigned char *p = databuf;
 695
 696   if (!p)
 697     {
 698       if (!just_free)
 699         {
 700           /* A NULL pointer means an empty cache.  */
 701           bidi_cache_start = 0;
 702           bidi_cache_sp = 0;
 703           bidi_cache_reset ();
 704         }
 705     }
 706   else
 707     {
 708       if (just_free)
 709         {
 710           ptrdiff_t idx;
 711
 712           memcpy (&idx, p, sizeof (bidi_cache_idx));
 713           bidi_cache_total_alloc
 714             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 715         }
 716       else
 717         {
 718           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 719           bidi_cache_ensure_space (bidi_cache_idx);
 720           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 721                   bidi_cache_idx * sizeof (struct bidi_it));
 722           memcpy (bidi_cache_start_stack,
 723                   p + sizeof (bidi_cache_idx)
 724                   + bidi_cache_idx * sizeof (struct bidi_it),
 725                   sizeof (bidi_cache_start_stack));
 726           memcpy (&bidi_cache_sp,
 727                   p + sizeof (bidi_cache_idx)
 728                   + bidi_cache_idx * sizeof (struct bidi_it)
 729                   + sizeof (bidi_cache_start_stack),
 730                   sizeof (bidi_cache_sp));
 731           memcpy (&bidi_cache_start,
 732                   p + sizeof (bidi_cache_idx)
 733                   + bidi_cache_idx * sizeof (struct bidi_it)
 734                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 735                   sizeof (bidi_cache_start));
 736           memcpy (&bidi_cache_last_idx,
 737                   p + sizeof (bidi_cache_idx)
 738                   + bidi_cache_idx * sizeof (struct bidi_it)
 739                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 740                   + sizeof (bidi_cache_start),
 741                   sizeof (bidi_cache_last_idx));
 742           bidi_cache_total_alloc
 743             -= (bidi_shelve_header_size
 744                 + bidi_cache_idx * sizeof (struct bidi_it));
 745         }
 746
 747       xfree (p);
 748     }
 749 }
 750
 751 \f
 752 /***********************************************************************
 753                         Initialization
 754  ***********************************************************************/
 755 static void
 756 bidi_initialize (void)
 757 {
 758   bidi_type_table = uniprop_table (intern ("bidi-class"));
 759   if (NILP (bidi_type_table))
 760     emacs_abort ();
 761   staticpro (&bidi_type_table);
 762
 763   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 764   if (NILP (bidi_mirror_table))
 765     emacs_abort ();
 766   staticpro (&bidi_mirror_table);
 767
 768   Qparagraph_start = intern ("paragraph-start");
 769   staticpro (&Qparagraph_start);
 770   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 771   if (!STRINGP (paragraph_start_re))
 772     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 773   staticpro (&paragraph_start_re);
 774   Qparagraph_separate = intern ("paragraph-separate");
 775   staticpro (&Qparagraph_separate);
 776   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 777   if (!STRINGP (paragraph_separate_re))
 778     paragraph_separate_re = build_string ("[ \t\f]*$");
 779   staticpro (&paragraph_separate_re);
 780
 781   bidi_cache_sp = 0;
 782   bidi_cache_total_alloc = 0;
 783
 784   bidi_initialized = 1;
 785 }
 786
 787 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 788    end.  */
 789 static void
 790 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 791 {
 792   bidi_it->invalid_levels = 0;
 793   bidi_it->invalid_rl_levels = -1;
 794   bidi_it->stack_idx = 0;
 795   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 796 }
 797
 798 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 799 void
 800 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
 801               struct bidi_it *bidi_it)
 802 {
 803   if (! bidi_initialized)
 804     bidi_initialize ();
 805   if (charpos >= 0)
 806     bidi_it->charpos = charpos;
 807   if (bytepos >= 0)
 808     bidi_it->bytepos = bytepos;
 809   bidi_it->frame_window_p = frame_window_p;
 810   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 811   bidi_it->first_elt = 1;
 812   bidi_set_paragraph_end (bidi_it);
 813   bidi_it->new_paragraph = 1;
 814   bidi_it->separator_limit = -1;
 815   bidi_it->type = NEUTRAL_B;
 816   bidi_it->type_after_w1 = NEUTRAL_B;
 817   bidi_it->orig_type = NEUTRAL_B;
 818   bidi_it->prev_was_pdf = 0;
 819   bidi_it->prev.type = bidi_it->prev.type_after_w1
 820     = bidi_it->prev.orig_type = UNKNOWN_BT;
 821   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 822     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 823   bidi_it->next_for_neutral.charpos = -1;
 824   bidi_it->next_for_neutral.type
 825     = bidi_it->next_for_neutral.type_after_w1
 826     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 827   bidi_it->prev_for_neutral.charpos = -1;
 828   bidi_it->prev_for_neutral.type
 829     = bidi_it->prev_for_neutral.type_after_w1
 830     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 831   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 832   bidi_it->disp_pos = -1;       /* invalid/unknown */
 833   bidi_it->disp_prop = 0;
 834   /* We can only shrink the cache if we are at the bottom level of its
 835      "stack".  */
 836   if (bidi_cache_start == 0)
 837     bidi_cache_shrink ();
 838   else
 839     bidi_cache_reset ();
 840 }
 841
 842 /* Perform initializations for reordering a new line of bidi text.  */
 843 static void
 844 bidi_line_init (struct bidi_it *bidi_it)
 845 {
 846   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 847   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 848   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 849   bidi_it->invalid_levels = 0;
 850   bidi_it->invalid_rl_levels = -1;
 851   /* Setting this to zero will force its recomputation the first time
 852      we need it for W5.  */
 853   bidi_it->next_en_pos = 0;
 854   bidi_it->next_en_type = UNKNOWN_BT;
 855   bidi_it->next_for_ws.type = UNKNOWN_BT;
 856   bidi_set_sor_type (bidi_it,
 857                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 858                      bidi_it->level_stack[0].level); /* X10 */
 859
 860   bidi_cache_reset ();
 861 }
 862
 863 \f
 864 /***********************************************************************
 865                         Fetching characters
 866  ***********************************************************************/
 867
 868 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 869    are zero-based character positions in S, BEGBYTE is byte position
 870    corresponding to BEG.  UNIBYTE means S is a unibyte string.  */
 871 static ptrdiff_t
 872 bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
 873                   const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
 874 {
 875   ptrdiff_t pos = beg;
 876   const unsigned char *p = s + begbyte, *start = p;
 877
 878   if (unibyte)
 879     p = s + end;
 880   else
 881     {
 882       if (!CHAR_HEAD_P (*p))
 883         emacs_abort ();
 884
 885       while (pos < end)
 886         {
 887           p += BYTES_BY_CHAR_HEAD (*p);
 888           pos++;
 889         }
 890     }
 891
 892   return p - start;
 893 }
 894
 895 /* Fetch and return the character at byte position BYTEPOS.  If S is
 896    non-NULL, fetch the character from string S; otherwise fetch the
 897    character from the current buffer.  UNIBYTE means S is a
 898    unibyte string.  */
 899 static int
 900 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
 901 {
 902   if (s)
 903     {
 904       s += bytepos;
 905       if (unibyte)
 906         return *s;
 907     }
 908   else
 909     s = BYTE_POS_ADDR (bytepos);
 910   return STRING_CHAR (s);
 911 }
 912
 913 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 914    character is covered by a display string, treat the entire run of
 915    covered characters as a single character, either u+2029 or u+FFFC,
 916    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 917    specifies the character position of the next display string, or -1
 918    if not yet computed.  When the next character is at or beyond that
 919    position, the function updates DISP_POS with the position of the
 920    next display string.  *DISP_PROP non-zero means that there's really
 921    a display string at DISP_POS, as opposed to when we searched till
 922    DISP_POS without finding one.  If *DISP_PROP is 2, it means the
 923    display spec is of the form `(space ...)', which is replaced with
 924    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 925    string to iterate, or NULL if iterating over a buffer or a Lisp
 926    string; in the latter case, STRING->lstring is the Lisp string.  */
 927 static int
 928 bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
 929                  int *disp_prop, struct bidi_string_data *string,
 930                  bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 931 {
 932   int ch;
 933   ptrdiff_t endpos
 934     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 935   struct text_pos pos;
 936   int len;
 937
 938   /* If we got past the last known position of display string, compute
 939      the position of the next one.  That position could be at CHARPOS.  */
 940   if (charpos < endpos && charpos > *disp_pos)
 941     {
 942       SET_TEXT_POS (pos, charpos, bytepos);
 943       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 944                                               disp_prop);
 945     }
 946
 947   /* Fetch the character at BYTEPOS.  */
 948   if (charpos >= endpos)
 949     {
 950       ch = BIDI_EOB;
 951       *ch_len = 1;
 952       *nchars = 1;
 953       *disp_pos = endpos;
 954       *disp_prop = 0;
 955     }
 956   else if (charpos >= *disp_pos && *disp_prop)
 957     {
 958       ptrdiff_t disp_end_pos;
 959
 960       /* We don't expect to find ourselves in the middle of a display
 961          property.  Hopefully, it will never be needed.  */
 962       if (charpos > *disp_pos)
 963         emacs_abort ();
 964       /* Text covered by `display' properties and overlays with
 965          display properties or display strings is handled as a single
 966          character that represents the entire run of characters
 967          covered by the display property.  */
 968       if (*disp_prop == 2)
 969         {
 970           /* `(space ...)' display specs are handled as paragraph
 971              separators for the purposes of the reordering; see UAX#9
 972              section 3 and clause HL1 in section 4.3 there.  */
 973           ch = 0x2029;
 974         }
 975       else
 976         {
 977           /* All other display specs are handled as the Unicode Object
 978              Replacement Character.  */
 979           ch = 0xFFFC;
 980         }
 981       disp_end_pos = compute_display_string_end (*disp_pos, string);
 982       if (disp_end_pos < 0)
 983         {
 984           /* Somebody removed the display string from the buffer
 985              behind our back.  Recover by processing this buffer
 986              position as if no display property were present there to
 987              begin with.  */
 988           *disp_prop = 0;
 989           goto normal_char;
 990         }
 991       *nchars = disp_end_pos - *disp_pos;
 992       if (*nchars <= 0)
 993         emacs_abort ();
 994       if (string->s)
 995         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 996                                     disp_end_pos, string->unibyte);
 997       else if (STRINGP (string->lstring))
 998         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 999                                     bytepos, disp_end_pos, string->unibyte);
1000       else
1001         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1002     }
1003   else
1004     {
1005     normal_char:
1006       if (string->s)
1007         {
1008
1009           if (!string->unibyte)
1010             {
1011               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1012               *ch_len = len;
1013             }
1014           else
1015             {
1016               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1017               *ch_len = 1;
1018             }
1019         }
1020       else if (STRINGP (string->lstring))
1021         {
1022           if (!string->unibyte)
1023             {
1024               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1025                                            len);
1026               *ch_len = len;
1027             }
1028           else
1029             {
1030               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1031               *ch_len = 1;
1032             }
1033         }
1034       else
1035         {
1036           ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1037           *ch_len = len;
1038         }
1039       *nchars = 1;
1040     }
1041
1042   /* If we just entered a run of characters covered by a display
1043      string, compute the position of the next display string.  */
1044   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1045       && *disp_prop)
1046     {
1047       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1048       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1049                                               disp_prop);
1050     }
1051
1052   return ch;
1053 }
1054
1055 \f
1056 /***********************************************************************
1057                         Determining paragraph direction
1058  ***********************************************************************/
1059
1060 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1061    Value is the non-negative length of the paragraph separator
1062    following the buffer position, -1 if position is at the beginning
1063    of a new paragraph, or -2 if position is neither at beginning nor
1064    at end of a paragraph.  */
1065 static ptrdiff_t
1066 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1067 {
1068   Lisp_Object sep_re;
1069   Lisp_Object start_re;
1070   ptrdiff_t val;
1071
1072   sep_re = paragraph_separate_re;
1073   start_re = paragraph_start_re;
1074
1075   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1076   if (val < 0)
1077     {
1078       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1079         val = -1;
1080       else
1081         val = -2;
1082     }
1083
1084   return val;
1085 }
1086
1087 /* On my 2005-vintage machine, searching back for paragraph start
1088    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1089    when user types C-p.  The number below limits each call to
1090    bidi_paragraph_init to about 10 ms.  */
1091 #define MAX_PARAGRAPH_SEARCH 7500
1092
1093 /* Find the beginning of this paragraph by looking back in the buffer.
1094    Value is the byte position of the paragraph's beginning, or
1095    BEGV_BYTE if paragraph_start_re is still not found after looking
1096    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1097 static ptrdiff_t
1098 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1099 {
1100   Lisp_Object re = paragraph_start_re;
1101   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1102   ptrdiff_t n = 0;
1103
1104   while (pos_byte > BEGV_BYTE
1105          && n++ < MAX_PARAGRAPH_SEARCH
1106          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1107     {
1108       /* FIXME: What if the paragraph beginning is covered by a
1109          display string?  And what if a display string covering some
1110          of the text over which we scan back includes
1111          paragraph_start_re?  */
1112       pos = find_next_newline_no_quit (pos - 1, -1);
1113       pos_byte = CHAR_TO_BYTE (pos);
1114     }
1115   if (n >= MAX_PARAGRAPH_SEARCH)
1116     pos_byte = BEGV_BYTE;
1117   return pos_byte;
1118 }
1119
1120 /* On a 3.4 GHz machine, searching forward for a strong directional
1121    character in a long paragraph full of weaks or neutrals takes about
1122    1 ms for each 20K characters.  The number below limits each call to
1123    bidi_paragraph_init to less than 10 ms even on slow machines.  */
1124 #define MAX_STRONG_CHAR_SEARCH 100000
1125
1126 /* Determine the base direction, a.k.a. base embedding level, of the
1127    paragraph we are about to iterate through.  If DIR is either L2R or
1128    R2L, just use that.  Otherwise, determine the paragraph direction
1129    from the first strong directional character of the paragraph.
1130
1131    NO_DEFAULT_P means don't default to L2R if the paragraph
1132    has no strong directional characters and both DIR and
1133    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1134    in the buffer until a paragraph is found with a strong character,
1135    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1136    flag is used in current-bidi-paragraph-direction.
1137
1138    Note that this function gives the paragraph separator the same
1139    direction as the preceding paragraph, even though Emacs generally
1140    views the separator as not belonging to any paragraph.  */
1141 void
1142 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1143 {
1144   ptrdiff_t bytepos = bidi_it->bytepos;
1145   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1146   ptrdiff_t pstartbyte;
1147   /* Note that begbyte is a byte position, while end is a character
1148      position.  Yes, this is ugly, but we are trying to avoid costly
1149      calls to BYTE_TO_CHAR and its ilk.  */
1150   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1151   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1152
1153   /* Special case for an empty buffer. */
1154   if (bytepos == begbyte && bidi_it->charpos == end)
1155     dir = L2R;
1156   /* We should never be called at EOB or before BEGV.  */
1157   else if (bidi_it->charpos >= end || bytepos < begbyte)
1158     emacs_abort ();
1159
1160   if (dir == L2R)
1161     {
1162       bidi_it->paragraph_dir = L2R;
1163       bidi_it->new_paragraph = 0;
1164     }
1165   else if (dir == R2L)
1166     {
1167       bidi_it->paragraph_dir = R2L;
1168       bidi_it->new_paragraph = 0;
1169     }
1170   else if (dir == NEUTRAL_DIR)  /* P2 */
1171     {
1172       int ch;
1173       ptrdiff_t ch_len, nchars;
1174       ptrdiff_t pos, disp_pos = -1;
1175       int disp_prop = 0;
1176       bidi_type_t type;
1177       const unsigned char *s;
1178
1179       if (!bidi_initialized)
1180         bidi_initialize ();
1181
1182       /* If we are inside a paragraph separator, we are just waiting
1183          for the separator to be exhausted; use the previous paragraph
1184          direction.  But don't do that if we have been just reseated,
1185          because we need to reinitialize below in that case.  */
1186       if (!bidi_it->first_elt
1187           && bidi_it->charpos < bidi_it->separator_limit)
1188         return;
1189
1190       /* If we are on a newline, get past it to where the next
1191          paragraph might start.  But don't do that at BEGV since then
1192          we are potentially in a new paragraph that doesn't yet
1193          exist.  */
1194       pos = bidi_it->charpos;
1195       s = (STRINGP (bidi_it->string.lstring)
1196            ? SDATA (bidi_it->string.lstring)
1197            : bidi_it->string.s);
1198       if (bytepos > begbyte
1199           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1200         {
1201           bytepos++;
1202           pos++;
1203         }
1204
1205       /* We are either at the beginning of a paragraph or in the
1206          middle of it.  Find where this paragraph starts.  */
1207       if (string_p)
1208         {
1209           /* We don't support changes of paragraph direction inside a
1210              string.  It is treated as a single paragraph.  */
1211           pstartbyte = 0;
1212         }
1213       else
1214         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1215       bidi_it->separator_limit = -1;
1216       bidi_it->new_paragraph = 0;
1217
1218       /* The following loop is run more than once only if NO_DEFAULT_P,
1219          and only if we are iterating on a buffer.  */
1220       do {
1221         ptrdiff_t pos1;
1222
1223         bytepos = pstartbyte;
1224         if (!string_p)
1225           pos = BYTE_TO_CHAR (bytepos);
1226         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1227                               &bidi_it->string,
1228                               bidi_it->frame_window_p, &ch_len, &nchars);
1229         type = bidi_get_type (ch, NEUTRAL_DIR);
1230
1231         pos1 = pos;
1232         for (pos += nchars, bytepos += ch_len;
1233              ((bidi_get_category (type) != STRONG)
1234               || (bidi_ignore_explicit_marks_for_paragraph_level
1235                   && (type == RLE || type == RLO
1236                       || type == LRE || type == LRO)))
1237                /* Stop when searched too far into an abnormally large
1238                   paragraph full of weak or neutral characters.  */
1239                && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1240              type = bidi_get_type (ch, NEUTRAL_DIR))
1241           {
1242             if (pos >= end)
1243               {
1244                 /* Pretend there's a paragraph separator at end of
1245                    buffer/string.  */
1246                 type = NEUTRAL_B;
1247                 break;
1248               }
1249             if (!string_p
1250                 && type == NEUTRAL_B
1251                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1252               break;
1253             /* Fetch next character and advance to get past it.  */
1254             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1255                                   &disp_prop, &bidi_it->string,
1256                                   bidi_it->frame_window_p, &ch_len, &nchars);
1257             pos += nchars;
1258             bytepos += ch_len;
1259           }
1260         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1261             || (!bidi_ignore_explicit_marks_for_paragraph_level
1262                 && (type == RLO || type == RLE)))
1263           bidi_it->paragraph_dir = R2L;
1264         else if (type == STRONG_L
1265                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1266                      && (type == LRO || type == LRE)))
1267           bidi_it->paragraph_dir = L2R;
1268         if (!string_p
1269             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1270           {
1271             /* If this paragraph is at BEGV, default to L2R.  */
1272             if (pstartbyte == BEGV_BYTE)
1273               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1274             else
1275               {
1276                 ptrdiff_t prevpbyte = pstartbyte;
1277                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1278
1279                 /* Find the beginning of the previous paragraph, if any.  */
1280                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1281                   {
1282                     /* FXIME: What if p is covered by a display
1283                        string?  See also a FIXME inside
1284                        bidi_find_paragraph_start.  */
1285                     p--;
1286                     pbyte = CHAR_TO_BYTE (p);
1287                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1288                   }
1289                 pstartbyte = prevpbyte;
1290               }
1291           }
1292       } while (!string_p
1293                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1294     }
1295   else
1296     emacs_abort ();
1297
1298   /* Contrary to UAX#9 clause P3, we only default the paragraph
1299      direction to L2R if we have no previous usable paragraph
1300      direction.  This is allowed by the HL1 clause.  */
1301   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1302     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1303   if (bidi_it->paragraph_dir == R2L)
1304     bidi_it->level_stack[0].level = 1;
1305   else
1306     bidi_it->level_stack[0].level = 0;
1307
1308   bidi_line_init (bidi_it);
1309 }
1310
1311 \f
1312 /***********************************************************************
1313                  Resolving explicit and implicit levels.
1314   The rest of this file constitutes the core of the UBA implementation.
1315  ***********************************************************************/
1316
1317 static bool
1318 bidi_explicit_dir_char (int ch)
1319 {
1320   bidi_type_t ch_type;
1321
1322   if (!bidi_initialized)
1323     emacs_abort ();
1324   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1325   return (ch_type == LRE || ch_type == LRO
1326           || ch_type == RLE || ch_type == RLO
1327           || ch_type == PDF);
1328 }
1329
1330 /* A helper function for bidi_resolve_explicit.  It advances to the
1331    next character in logical order and determines the new embedding
1332    level and directional override, but does not take into account
1333    empty embeddings.  */
1334 static int
1335 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1336 {
1337   int curchar;
1338   bidi_type_t type;
1339   int current_level;
1340   int new_level;
1341   bidi_dir_t override;
1342   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1343
1344   /* If reseat()'ed, don't advance, so as to start iteration from the
1345      position where we were reseated.  bidi_it->bytepos can be less
1346      than BEGV_BYTE after reseat to BEGV.  */
1347   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1348       || bidi_it->first_elt)
1349     {
1350       bidi_it->first_elt = 0;
1351       if (string_p)
1352         {
1353           const unsigned char *p
1354             = (STRINGP (bidi_it->string.lstring)
1355                ? SDATA (bidi_it->string.lstring)
1356                : bidi_it->string.s);
1357
1358           if (bidi_it->charpos < 0)
1359             bidi_it->charpos = 0;
1360           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1361                                                bidi_it->string.unibyte);
1362         }
1363       else
1364         {
1365           if (bidi_it->charpos < BEGV)
1366             bidi_it->charpos = BEGV;
1367           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1368         }
1369     }
1370   /* Don't move at end of buffer/string.  */
1371   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1372     {
1373       /* Advance to the next character, skipping characters covered by
1374          display strings (nchars > 1).  */
1375       if (bidi_it->nchars <= 0)
1376         emacs_abort ();
1377       bidi_it->charpos += bidi_it->nchars;
1378       if (bidi_it->ch_len == 0)
1379         emacs_abort ();
1380       bidi_it->bytepos += bidi_it->ch_len;
1381     }
1382
1383   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1384   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1385   new_level = current_level;
1386
1387   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1388     {
1389       curchar = BIDI_EOB;
1390       bidi_it->ch_len = 1;
1391       bidi_it->nchars = 1;
1392       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1393       bidi_it->disp_prop = 0;
1394     }
1395   else
1396     {
1397       /* Fetch the character at BYTEPOS.  If it is covered by a
1398          display string, treat the entire run of covered characters as
1399          a single character u+FFFC.  */
1400       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1401                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1402                                  &bidi_it->string, bidi_it->frame_window_p,
1403                                  &bidi_it->ch_len, &bidi_it->nchars);
1404     }
1405   bidi_it->ch = curchar;
1406
1407   /* Don't apply directional override here, as all the types we handle
1408      below will not be affected by the override anyway, and we need
1409      the original type unaltered.  The override will be applied in
1410      bidi_resolve_weak.  */
1411   type = bidi_get_type (curchar, NEUTRAL_DIR);
1412   bidi_it->orig_type = type;
1413   bidi_check_type (bidi_it->orig_type);
1414
1415   if (type != PDF)
1416     bidi_it->prev_was_pdf = 0;
1417
1418   bidi_it->type_after_w1 = UNKNOWN_BT;
1419
1420   switch (type)
1421     {
1422       case RLE: /* X2 */
1423       case RLO: /* X4 */
1424         bidi_it->type_after_w1 = type;
1425         bidi_check_type (bidi_it->type_after_w1);
1426         type = WEAK_BN; /* X9/Retaining */
1427         if (bidi_it->ignore_bn_limit <= -1)
1428           {
1429             if (current_level <= BIDI_MAXLEVEL - 4)
1430               {
1431                 /* Compute the least odd embedding level greater than
1432                    the current level.  */
1433                 new_level = ((current_level + 1) & ~1) + 1;
1434                 if (bidi_it->type_after_w1 == RLE)
1435                   override = NEUTRAL_DIR;
1436                 else
1437                   override = R2L;
1438                 if (current_level == BIDI_MAXLEVEL - 4)
1439                   bidi_it->invalid_rl_levels = 0;
1440                 bidi_push_embedding_level (bidi_it, new_level, override);
1441               }
1442             else
1443               {
1444                 bidi_it->invalid_levels++;
1445                 /* See the commentary about invalid_rl_levels below.  */
1446                 if (bidi_it->invalid_rl_levels < 0)
1447                   bidi_it->invalid_rl_levels = 0;
1448                 bidi_it->invalid_rl_levels++;
1449               }
1450           }
1451         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1452                  || (bidi_it->next_en_pos > bidi_it->charpos
1453                      && bidi_it->next_en_type == WEAK_EN))
1454           type = WEAK_EN;
1455         break;
1456       case LRE: /* X3 */
1457       case LRO: /* X5 */
1458         bidi_it->type_after_w1 = type;
1459         bidi_check_type (bidi_it->type_after_w1);
1460         type = WEAK_BN; /* X9/Retaining */
1461         if (bidi_it->ignore_bn_limit <= -1)
1462           {
1463             if (current_level <= BIDI_MAXLEVEL - 5)
1464               {
1465                 /* Compute the least even embedding level greater than
1466                    the current level.  */
1467                 new_level = ((current_level + 2) & ~1);
1468                 if (bidi_it->type_after_w1 == LRE)
1469                   override = NEUTRAL_DIR;
1470                 else
1471                   override = L2R;
1472                 bidi_push_embedding_level (bidi_it, new_level, override);
1473               }
1474             else
1475               {
1476                 bidi_it->invalid_levels++;
1477                 /* invalid_rl_levels counts invalid levels encountered
1478                    while the embedding level was already too high for
1479                    LRE/LRO, but not for RLE/RLO.  That is because
1480                    there may be exactly one PDF which we should not
1481                    ignore even though invalid_levels is non-zero.
1482                    invalid_rl_levels helps to know what PDF is
1483                    that.  */
1484                 if (bidi_it->invalid_rl_levels >= 0)
1485                   bidi_it->invalid_rl_levels++;
1486               }
1487           }
1488         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1489                  || (bidi_it->next_en_pos > bidi_it->charpos
1490                      && bidi_it->next_en_type == WEAK_EN))
1491           type = WEAK_EN;
1492         break;
1493       case PDF: /* X7 */
1494         bidi_it->type_after_w1 = type;
1495         bidi_check_type (bidi_it->type_after_w1);
1496         type = WEAK_BN; /* X9/Retaining */
1497         if (bidi_it->ignore_bn_limit <= -1)
1498           {
1499             if (!bidi_it->invalid_rl_levels)
1500               {
1501                 new_level = bidi_pop_embedding_level (bidi_it);
1502                 bidi_it->invalid_rl_levels = -1;
1503                 if (bidi_it->invalid_levels)
1504                   bidi_it->invalid_levels--;
1505                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1506               }
1507             if (!bidi_it->invalid_levels)
1508               new_level = bidi_pop_embedding_level (bidi_it);
1509             else
1510               {
1511                 bidi_it->invalid_levels--;
1512                 bidi_it->invalid_rl_levels--;
1513               }
1514           }
1515         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1516                  || (bidi_it->next_en_pos > bidi_it->charpos
1517                      && bidi_it->next_en_type == WEAK_EN))
1518           type = WEAK_EN;
1519         break;
1520       default:
1521         /* Nothing.  */
1522         break;
1523     }
1524
1525   bidi_it->type = type;
1526   bidi_check_type (bidi_it->type);
1527
1528   return new_level;
1529 }
1530
1531 /* Given an iterator state in BIDI_IT, advance one character position
1532    in the buffer/string to the next character (in the logical order),
1533    resolve any explicit embeddings and directional overrides, and
1534    return the embedding level of the character after resolving
1535    explicit directives and ignoring empty embeddings.  */
1536 static int
1537 bidi_resolve_explicit (struct bidi_it *bidi_it)
1538 {
1539   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1540   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1541   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1542   const unsigned char *s
1543     = (STRINGP (bidi_it->string.lstring)
1544        ? SDATA (bidi_it->string.lstring)
1545        : bidi_it->string.s);
1546
1547   if (prev_level < new_level
1548       && bidi_it->type == WEAK_BN
1549       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1550       && bidi_it->charpos < eob         /* not already at EOB */
1551       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1552                                                    + bidi_it->ch_len, s,
1553                                                    bidi_it->string.unibyte)))
1554     {
1555       /* Avoid pushing and popping embedding levels if the level run
1556          is empty, as this breaks level runs where it shouldn't.
1557          UAX#9 removes all the explicit embedding and override codes,
1558          so empty embeddings disappear without a trace.  We need to
1559          behave as if we did the same.  */
1560       struct bidi_it saved_it;
1561       int level = prev_level;
1562
1563       bidi_copy_it (&saved_it, bidi_it);
1564
1565       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1566                                                        + bidi_it->ch_len, s,
1567                                                        bidi_it->string.unibyte)))
1568         {
1569           /* This advances to the next character, skipping any
1570              characters covered by display strings.  */
1571           level = bidi_resolve_explicit_1 (bidi_it);
1572           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1573              a pointer to its data is no longer valid.  */
1574           if (STRINGP (bidi_it->string.lstring))
1575             s = SDATA (bidi_it->string.lstring);
1576         }
1577
1578       if (bidi_it->nchars <= 0)
1579         emacs_abort ();
1580       if (level == prev_level)  /* empty embedding */
1581         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1582       else                      /* this embedding is non-empty */
1583         saved_it.ignore_bn_limit = -2;
1584
1585       bidi_copy_it (bidi_it, &saved_it);
1586       if (bidi_it->ignore_bn_limit > -1)
1587         {
1588           /* We pushed a level, but we shouldn't have.  Undo that. */
1589           if (!bidi_it->invalid_rl_levels)
1590             {
1591               new_level = bidi_pop_embedding_level (bidi_it);
1592               bidi_it->invalid_rl_levels = -1;
1593               if (bidi_it->invalid_levels)
1594                 bidi_it->invalid_levels--;
1595             }
1596           if (!bidi_it->invalid_levels)
1597             new_level = bidi_pop_embedding_level (bidi_it);
1598           else
1599             {
1600               bidi_it->invalid_levels--;
1601               bidi_it->invalid_rl_levels--;
1602             }
1603         }
1604     }
1605
1606   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1607     {
1608       bidi_set_paragraph_end (bidi_it);
1609       /* This is needed by bidi_resolve_weak below, and in L1.  */
1610       bidi_it->type_after_w1 = bidi_it->type;
1611       bidi_check_type (bidi_it->type_after_w1);
1612     }
1613
1614   return new_level;
1615 }
1616
1617 /* Advance in the buffer/string, resolve weak types and return the
1618    type of the next character after weak type resolution.  */
1619 static bidi_type_t
1620 bidi_resolve_weak (struct bidi_it *bidi_it)
1621 {
1622   bidi_type_t type;
1623   bidi_dir_t override;
1624   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1625   int new_level  = bidi_resolve_explicit (bidi_it);
1626   int next_char;
1627   bidi_type_t type_of_next;
1628   struct bidi_it saved_it;
1629   ptrdiff_t eob
1630     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1631        ? bidi_it->string.schars : ZV);
1632
1633   type = bidi_it->type;
1634   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1635
1636   if (type == UNKNOWN_BT
1637       || type == LRE
1638       || type == LRO
1639       || type == RLE
1640       || type == RLO
1641       || type == PDF)
1642     emacs_abort ();
1643
1644   if (new_level != prev_level
1645       || bidi_it->type == NEUTRAL_B)
1646     {
1647       /* We've got a new embedding level run, compute the directional
1648          type of sor and initialize per-run variables (UAX#9, clause
1649          X10).  */
1650       bidi_set_sor_type (bidi_it, prev_level, new_level);
1651     }
1652   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1653            || type == WEAK_BN || type == STRONG_AL)
1654     bidi_it->type_after_w1 = type;      /* needed in L1 */
1655   bidi_check_type (bidi_it->type_after_w1);
1656
1657   /* Level and directional override status are already recorded in
1658      bidi_it, and do not need any change; see X6.  */
1659   if (override == R2L)          /* X6 */
1660     type = STRONG_R;
1661   else if (override == L2R)
1662     type = STRONG_L;
1663   else
1664     {
1665       if (type == WEAK_NSM)     /* W1 */
1666         {
1667           /* Note that we don't need to consider the case where the
1668              prev character has its type overridden by an RLO or LRO,
1669              because then either the type of this NSM would have been
1670              also overridden, or the previous character is outside the
1671              current level run, and thus not relevant to this NSM.
1672              This is why NSM gets the type_after_w1 of the previous
1673              character.  */
1674           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1675               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1676               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1677             type = bidi_it->prev.type_after_w1;
1678           else if (bidi_it->sor == R2L)
1679             type = STRONG_R;
1680           else if (bidi_it->sor == L2R)
1681             type = STRONG_L;
1682           else /* shouldn't happen! */
1683             emacs_abort ();
1684         }
1685       if (type == WEAK_EN       /* W2 */
1686           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1687         type = WEAK_AN;
1688       else if (type == STRONG_AL) /* W3 */
1689         type = STRONG_R;
1690       else if ((type == WEAK_ES /* W4 */
1691                 && bidi_it->prev.type_after_w1 == WEAK_EN
1692                 && bidi_it->prev.orig_type == WEAK_EN)
1693                || (type == WEAK_CS
1694                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1695                         && bidi_it->prev.orig_type == WEAK_EN)
1696                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1697         {
1698           const unsigned char *s
1699             = (STRINGP (bidi_it->string.lstring)
1700                ? SDATA (bidi_it->string.lstring)
1701                : bidi_it->string.s);
1702
1703           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1704                        ? BIDI_EOB
1705                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1706                                            s, bidi_it->string.unibyte));
1707           type_of_next = bidi_get_type (next_char, override);
1708
1709           if (type_of_next == WEAK_BN
1710               || bidi_explicit_dir_char (next_char))
1711             {
1712               bidi_copy_it (&saved_it, bidi_it);
1713               while (bidi_resolve_explicit (bidi_it) == new_level
1714                      && bidi_it->type == WEAK_BN)
1715                 ;
1716               type_of_next = bidi_it->type;
1717               bidi_copy_it (bidi_it, &saved_it);
1718             }
1719
1720           /* If the next character is EN, but the last strong-type
1721              character is AL, that next EN will be changed to AN when
1722              we process it in W2 above.  So in that case, this ES
1723              should not be changed into EN.  */
1724           if (type == WEAK_ES
1725               && type_of_next == WEAK_EN
1726               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1727             type = WEAK_EN;
1728           else if (type == WEAK_CS)
1729             {
1730               if (bidi_it->prev.type_after_w1 == WEAK_AN
1731                   && (type_of_next == WEAK_AN
1732                       /* If the next character is EN, but the last
1733                          strong-type character is AL, EN will be later
1734                          changed to AN when we process it in W2 above.
1735                          So in that case, this ES should not be
1736                          changed into EN.  */
1737                       || (type_of_next == WEAK_EN
1738                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1739                 type = WEAK_AN;
1740               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1741                        && type_of_next == WEAK_EN
1742                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1743                 type = WEAK_EN;
1744             }
1745         }
1746       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1747                || type == WEAK_BN)      /* W5/Retaining */
1748         {
1749           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1750             type = WEAK_EN;
1751           else if (bidi_it->next_en_pos > bidi_it->charpos
1752                    && bidi_it->next_en_type != WEAK_BN)
1753             {
1754               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1755                 type = WEAK_EN;
1756             }
1757           else if (bidi_it->next_en_pos >=0)
1758             {
1759               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1760               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1761                                         ? SDATA (bidi_it->string.lstring)
1762                                         : bidi_it->string.s);
1763
1764               if (bidi_it->nchars <= 0)
1765                 emacs_abort ();
1766               next_char
1767                 = (bidi_it->charpos + bidi_it->nchars >= eob
1768                    ? BIDI_EOB
1769                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1770                                        bidi_it->string.unibyte));
1771               type_of_next = bidi_get_type (next_char, override);
1772
1773               if (type_of_next == WEAK_ET
1774                   || type_of_next == WEAK_BN
1775                   || bidi_explicit_dir_char (next_char))
1776                 {
1777                   bidi_copy_it (&saved_it, bidi_it);
1778                   while (bidi_resolve_explicit (bidi_it) == new_level
1779                          && (bidi_it->type == WEAK_BN
1780                              || bidi_it->type == WEAK_ET))
1781                     ;
1782                   type_of_next = bidi_it->type;
1783                   en_pos = bidi_it->charpos;
1784                   bidi_copy_it (bidi_it, &saved_it);
1785                 }
1786               /* Remember this position, to speed up processing of the
1787                  next ETs.  */
1788               bidi_it->next_en_pos = en_pos;
1789               if (type_of_next == WEAK_EN)
1790                 {
1791                   /* If the last strong character is AL, the EN we've
1792                      found will become AN when we get to it (W2). */
1793                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1794                     type_of_next = WEAK_AN;
1795                   else if (type == WEAK_BN)
1796                     type = NEUTRAL_ON; /* W6/Retaining */
1797                   else
1798                     type = WEAK_EN;
1799                 }
1800               else if (type_of_next == NEUTRAL_B)
1801                 /* Record the fact that there are no more ENs from
1802                    here to the end of paragraph, to avoid entering the
1803                    loop above ever again in this paragraph.  */
1804                 bidi_it->next_en_pos = -1;
1805               /* Record the type of the character where we ended our search.  */
1806               bidi_it->next_en_type = type_of_next;
1807             }
1808         }
1809     }
1810
1811   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1812       || (type == WEAK_BN
1813           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1814               || bidi_it->prev.type_after_w1 == WEAK_ES
1815               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1816     type = NEUTRAL_ON;
1817
1818   /* Store the type we've got so far, before we clobber it with strong
1819      types in W7 and while resolving neutral types.  But leave alone
1820      the original types that were recorded above, because we will need
1821      them for the L1 clause.  */
1822   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1823     bidi_it->type_after_w1 = type;
1824   bidi_check_type (bidi_it->type_after_w1);
1825
1826   if (type == WEAK_EN)  /* W7 */
1827     {
1828       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1829           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1830         type = STRONG_L;
1831     }
1832
1833   bidi_it->type = type;
1834   bidi_check_type (bidi_it->type);
1835   return type;
1836 }
1837
1838 /* Resolve the type of a neutral character according to the type of
1839    surrounding strong text and the current embedding level.  */
1840 static bidi_type_t
1841 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1842 {
1843   /* N1: European and Arabic numbers are treated as though they were R.  */
1844   if (next_type == WEAK_EN || next_type == WEAK_AN)
1845     next_type = STRONG_R;
1846   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1847     prev_type = STRONG_R;
1848
1849   if (next_type == prev_type)   /* N1 */
1850     return next_type;
1851   else if ((lev & 1) == 0)      /* N2 */
1852     return STRONG_L;
1853   else
1854     return STRONG_R;
1855 }
1856
1857 static bidi_type_t
1858 bidi_resolve_neutral (struct bidi_it *bidi_it)
1859 {
1860   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1861   bidi_type_t type = bidi_resolve_weak (bidi_it);
1862   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1863
1864   if (!(type == STRONG_R
1865         || type == STRONG_L
1866         || type == WEAK_BN
1867         || type == WEAK_EN
1868         || type == WEAK_AN
1869         || type == NEUTRAL_B
1870         || type == NEUTRAL_S
1871         || type == NEUTRAL_WS
1872         || type == NEUTRAL_ON))
1873     emacs_abort ();
1874
1875   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1876                             we are already at paragraph end.  */
1877        && bidi_get_category (type) == NEUTRAL)
1878       || (type == WEAK_BN && prev_level == current_level))
1879     {
1880       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1881         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1882                                        bidi_it->next_for_neutral.type,
1883                                        current_level);
1884       /* The next two "else if" clauses are shortcuts for the
1885          important special case when we have a long sequence of
1886          neutral or WEAK_BN characters, such as whitespace or nulls or
1887          other control characters, on the base embedding level of the
1888          paragraph, and that sequence goes all the way to the end of
1889          the paragraph and follows a character whose resolved
1890          directionality is identical to the base embedding level.
1891          (This is what happens in a buffer with plain L2R text that
1892          happens to include long sequences of control characters.)  By
1893          virtue of N1, the result of examining this long sequence will
1894          always be either STRONG_L or STRONG_R, depending on the base
1895          embedding level.  So we use this fact directly instead of
1896          entering the expensive loop in the "else" clause.  */
1897       else if (current_level == 0
1898                && bidi_it->prev_for_neutral.type == STRONG_L
1899                && !bidi_explicit_dir_char (bidi_it->ch))
1900         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1901                                        STRONG_L, current_level);
1902       else if (/* current level is 1 */
1903                current_level == 1
1904                /* base embedding level is also 1 */
1905                && bidi_it->level_stack[0].level == 1
1906                /* previous character is one of those considered R for
1907                   the purposes of W5 */
1908                && (bidi_it->prev_for_neutral.type == STRONG_R
1909                    || bidi_it->prev_for_neutral.type == WEAK_EN
1910                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1911                && !bidi_explicit_dir_char (bidi_it->ch))
1912         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1913                                        STRONG_R, current_level);
1914       else
1915         {
1916           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1917              the assumption of batch-style processing; see clauses W4,
1918              W5, and especially N1, which require to look far forward
1919              (as well as back) in the buffer/string.  May the fleas of
1920              a thousand camels infest the armpits of those who design
1921              supposedly general-purpose algorithms by looking at their
1922              own implementations, and fail to consider other possible
1923              implementations!  */
1924           struct bidi_it saved_it;
1925           bidi_type_t next_type;
1926
1927           if (bidi_it->scan_dir == -1)
1928             emacs_abort ();
1929
1930           bidi_copy_it (&saved_it, bidi_it);
1931           /* Scan the text forward until we find the first non-neutral
1932              character, and then use that to resolve the neutral we
1933              are dealing with now.  We also cache the scanned iterator
1934              states, to salvage some of the effort later.  */
1935           bidi_cache_iterator_state (bidi_it, 0);
1936           do {
1937             /* Record the info about the previous character, so that
1938                it will be cached below with this state.  */
1939             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1940                 && bidi_it->type != WEAK_BN)
1941               bidi_remember_char (&bidi_it->prev, bidi_it);
1942             type = bidi_resolve_weak (bidi_it);
1943             /* Paragraph separators have their levels fully resolved
1944                at this point, so cache them as resolved.  */
1945             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1946             /* FIXME: implement L1 here, by testing for a newline and
1947                resetting the level for any sequence of whitespace
1948                characters adjacent to it.  */
1949           } while (!(type == NEUTRAL_B
1950                      || (type != WEAK_BN
1951                          && bidi_get_category (type) != NEUTRAL)
1952                      /* This is all per level run, so stop when we
1953                         reach the end of this level run.  */
1954                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1955                          != current_level)));
1956
1957           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1958
1959           switch (type)
1960             {
1961               case STRONG_L:
1962               case STRONG_R:
1963               case STRONG_AL:
1964                 /* Actually, STRONG_AL cannot happen here, because
1965                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
1966                 eassert (type != STRONG_AL);
1967                 next_type = type;
1968                 break;
1969               case WEAK_EN:
1970               case WEAK_AN:
1971                 /* N1: ``European and Arabic numbers are treated as
1972                    though they were R.''  */
1973                 next_type = STRONG_R;
1974                 break;
1975               case WEAK_BN:
1976                 if (!bidi_explicit_dir_char (bidi_it->ch))
1977                   emacs_abort (); /* can't happen: BNs are skipped */
1978                 /* FALLTHROUGH */
1979               case NEUTRAL_B:
1980                 /* Marched all the way to the end of this level run.
1981                    We need to use the eor type, whose information is
1982                    stored by bidi_set_sor_type in the prev_for_neutral
1983                    member.  */
1984                 if (saved_it.type != WEAK_BN
1985                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1986                   next_type = bidi_it->prev_for_neutral.type;
1987                 else
1988                   {
1989                     /* This is a BN which does not adjoin neutrals.
1990                        Leave its type alone.  */
1991                     bidi_copy_it (bidi_it, &saved_it);
1992                     return bidi_it->type;
1993                   }
1994                 break;
1995               default:
1996                 emacs_abort ();
1997             }
1998           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1999                                          next_type, current_level);
2000           saved_it.next_for_neutral.type = next_type;
2001           saved_it.type = type;
2002           bidi_check_type (next_type);
2003           bidi_check_type (type);
2004           bidi_copy_it (bidi_it, &saved_it);
2005         }
2006     }
2007   return type;
2008 }
2009
2010 /* Given an iterator state in BIDI_IT, advance one character position
2011    in the buffer/string to the next character (in the logical order),
2012    resolve the bidi type of that next character, and return that
2013    type.  */
2014 static bidi_type_t
2015 bidi_type_of_next_char (struct bidi_it *bidi_it)
2016 {
2017   bidi_type_t type;
2018
2019   /* This should always be called during a forward scan.  */
2020   if (bidi_it->scan_dir != 1)
2021     emacs_abort ();
2022
2023   /* Reset the limit until which to ignore BNs if we step out of the
2024      area where we found only empty levels.  */
2025   if ((bidi_it->ignore_bn_limit > -1
2026        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2027       || (bidi_it->ignore_bn_limit == -2
2028           && !bidi_explicit_dir_char (bidi_it->ch)))
2029     bidi_it->ignore_bn_limit = -1;
2030
2031   type = bidi_resolve_neutral (bidi_it);
2032
2033   return type;
2034 }
2035
2036 /* Given an iterator state BIDI_IT, advance one character position in
2037    the buffer/string to the next character (in the current scan
2038    direction), resolve the embedding and implicit levels of that next
2039    character, and return the resulting level.  */
2040 static int
2041 bidi_level_of_next_char (struct bidi_it *bidi_it)
2042 {
2043   bidi_type_t type;
2044   int level, prev_level = -1;
2045   struct bidi_saved_info next_for_neutral;
2046   ptrdiff_t next_char_pos = -2;
2047
2048   if (bidi_it->scan_dir == 1)
2049     {
2050       ptrdiff_t eob
2051         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2052            ? bidi_it->string.schars : ZV);
2053
2054       /* There's no sense in trying to advance if we hit end of text.  */
2055       if (bidi_it->charpos >= eob)
2056         return bidi_it->resolved_level;
2057
2058       /* Record the info about the previous character.  */
2059       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2060           && bidi_it->type != WEAK_BN)
2061         bidi_remember_char (&bidi_it->prev, bidi_it);
2062       if (bidi_it->type_after_w1 == STRONG_R
2063           || bidi_it->type_after_w1 == STRONG_L
2064           || bidi_it->type_after_w1 == STRONG_AL)
2065         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2066       /* FIXME: it sounds like we don't need both prev and
2067          prev_for_neutral members, but I'm leaving them both for now.  */
2068       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2069           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2070         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2071
2072       /* If we overstepped the characters used for resolving neutrals
2073          and whitespace, invalidate their info in the iterator.  */
2074       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2075         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2076       if (bidi_it->next_en_pos >= 0
2077           && bidi_it->charpos >= bidi_it->next_en_pos)
2078         {
2079           bidi_it->next_en_pos = 0;
2080           bidi_it->next_en_type = UNKNOWN_BT;
2081         }
2082       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2083           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2084         bidi_it->next_for_ws.type = UNKNOWN_BT;
2085
2086       /* This must be taken before we fill the iterator with the info
2087          about the next char.  If we scan backwards, the iterator
2088          state must be already cached, so there's no need to know the
2089          embedding level of the previous character, since we will be
2090          returning to our caller shortly.  */
2091       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2092     }
2093   next_for_neutral = bidi_it->next_for_neutral;
2094
2095   /* Perhaps the character we want is already cached.  If it is, the
2096      call to bidi_cache_find below will return a type other than
2097      UNKNOWN_BT.  */
2098   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2099     {
2100       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2101                  ? 0 : 1);
2102       if (bidi_it->scan_dir > 0)
2103         {
2104           if (bidi_it->nchars <= 0)
2105             emacs_abort ();
2106           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2107         }
2108       else if (bidi_it->charpos >= bob)
2109         /* Implementation note: we allow next_char_pos to be as low as
2110            0 for buffers or -1 for strings, and that is okay because
2111            that's the "position" of the sentinel iterator state we
2112            cached at the beginning of the iteration.  */
2113         next_char_pos = bidi_it->charpos - 1;
2114       if (next_char_pos >= bob - 1)
2115         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2116       else
2117         type = UNKNOWN_BT;
2118     }
2119   else
2120     type = UNKNOWN_BT;
2121   if (type != UNKNOWN_BT)
2122     {
2123       /* Don't lose the information for resolving neutrals!  The
2124          cached states could have been cached before their
2125          next_for_neutral member was computed.  If we are on our way
2126          forward, we can simply take the info from the previous
2127          state.  */
2128       if (bidi_it->scan_dir == 1
2129           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2130         bidi_it->next_for_neutral = next_for_neutral;
2131
2132       /* If resolved_level is -1, it means this state was cached
2133          before it was completely resolved, so we cannot return
2134          it.  */
2135       if (bidi_it->resolved_level != -1)
2136         return bidi_it->resolved_level;
2137     }
2138   if (bidi_it->scan_dir == -1)
2139     /* If we are going backwards, the iterator state is already cached
2140        from previous scans, and should be fully resolved.  */
2141     emacs_abort ();
2142
2143   if (type == UNKNOWN_BT)
2144     type = bidi_type_of_next_char (bidi_it);
2145
2146   if (type == NEUTRAL_B)
2147     return bidi_it->resolved_level;
2148
2149   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2150   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2151       || (type == WEAK_BN && prev_level == level))
2152     {
2153       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2154         emacs_abort ();
2155
2156       /* If the cached state shows a neutral character, it was not
2157          resolved by bidi_resolve_neutral, so do it now.  */
2158       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2159                                      bidi_it->next_for_neutral.type,
2160                                      level);
2161     }
2162
2163   if (!(type == STRONG_R
2164         || type == STRONG_L
2165         || type == WEAK_BN
2166         || type == WEAK_EN
2167         || type == WEAK_AN))
2168     emacs_abort ();
2169   bidi_it->type = type;
2170   bidi_check_type (bidi_it->type);
2171
2172   /* For L1 below, we need to know, for each WS character, whether
2173      it belongs to a sequence of WS characters preceding a newline
2174      or a TAB or a paragraph separator.  */
2175   if (bidi_it->orig_type == NEUTRAL_WS
2176       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2177     {
2178       int ch;
2179       ptrdiff_t clen = bidi_it->ch_len;
2180       ptrdiff_t bpos = bidi_it->bytepos;
2181       ptrdiff_t cpos = bidi_it->charpos;
2182       ptrdiff_t disp_pos = bidi_it->disp_pos;
2183       ptrdiff_t nc = bidi_it->nchars;
2184       struct bidi_string_data bs = bidi_it->string;
2185       bidi_type_t chtype;
2186       bool fwp = bidi_it->frame_window_p;
2187       int dpp = bidi_it->disp_prop;
2188
2189       if (bidi_it->nchars <= 0)
2190         emacs_abort ();
2191       do {
2192         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2193                               fwp, &clen, &nc);
2194         if (ch == '\n' || ch == BIDI_EOB)
2195           chtype = NEUTRAL_B;
2196         else
2197           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2198       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2199                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2200       bidi_it->next_for_ws.type = chtype;
2201       bidi_check_type (bidi_it->next_for_ws.type);
2202       bidi_it->next_for_ws.charpos = cpos;
2203       bidi_it->next_for_ws.bytepos = bpos;
2204     }
2205
2206   /* Resolve implicit levels, with a twist: PDFs get the embedding
2207      level of the embedding they terminate.  See below for the
2208      reason.  */
2209   if (bidi_it->orig_type == PDF
2210       /* Don't do this if this formatting code didn't change the
2211          embedding level due to invalid or empty embeddings.  */
2212       && prev_level != level)
2213     {
2214       /* Don't look in UAX#9 for the reason for this: it's our own
2215          private quirk.  The reason is that we want the formatting
2216          codes to be delivered so that they bracket the text of their
2217          embedding.  For example, given the text
2218
2219              {RLO}teST{PDF}
2220
2221          we want it to be displayed as
2222
2223              {PDF}STet{RLO}
2224
2225          not as
2226
2227              STet{RLO}{PDF}
2228
2229          which will result because we bump up the embedding level as
2230          soon as we see the RLO and pop it as soon as we see the PDF,
2231          so RLO itself has the same embedding level as "teST", and
2232          thus would be normally delivered last, just before the PDF.
2233          The switch below fiddles with the level of PDF so that this
2234          ugly side effect does not happen.
2235
2236          (This is, of course, only important if the formatting codes
2237          are actually displayed, but Emacs does need to display them
2238          if the user wants to.)  */
2239       level = prev_level;
2240     }
2241   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2242            || bidi_it->orig_type == NEUTRAL_S
2243            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2244            || (bidi_it->orig_type == NEUTRAL_WS
2245                && (bidi_it->next_for_ws.type == NEUTRAL_B
2246                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2247     level = bidi_it->level_stack[0].level;
2248   else if ((level & 1) == 0) /* I1 */
2249     {
2250       if (type == STRONG_R)
2251         level++;
2252       else if (type == WEAK_EN || type == WEAK_AN)
2253         level += 2;
2254     }
2255   else                  /* I2 */
2256     {
2257       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2258         level++;
2259     }
2260
2261   bidi_it->resolved_level = level;
2262   return level;
2263 }
2264
2265 /* Move to the other edge of a level given by LEVEL.  If END_FLAG,
2266    we are at the end of a level, and we need to prepare to
2267    resume the scan of the lower level.
2268
2269    If this level's other edge is cached, we simply jump to it, filling
2270    the iterator structure with the iterator state on the other edge.
2271    Otherwise, we walk the buffer or string until we come back to the
2272    same level as LEVEL.
2273
2274    Note: we are not talking here about a ``level run'' in the UAX#9
2275    sense of the term, but rather about a ``level'' which includes
2276    all the levels higher than it.  In other words, given the levels
2277    like this:
2278
2279          11111112222222333333334443343222222111111112223322111
2280                 A      B                    C
2281
2282    and assuming we are at point A scanning left to right, this
2283    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2284    at point B.  */
2285 static void
2286 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2287 {
2288   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2289   ptrdiff_t idx;
2290
2291   /* Try the cache first.  */
2292   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2293       >= bidi_cache_start)
2294     bidi_cache_fetch_state (idx, bidi_it);
2295   else
2296     {
2297       int new_level;
2298
2299       /* If we are at end of level, its edges must be cached.  */
2300       if (end_flag)
2301         emacs_abort ();
2302
2303       bidi_cache_iterator_state (bidi_it, 1);
2304       do {
2305         new_level = bidi_level_of_next_char (bidi_it);
2306         bidi_cache_iterator_state (bidi_it, 1);
2307       } while (new_level >= level);
2308     }
2309 }
2310
2311 void
2312 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2313 {
2314   int old_level, new_level, next_level;
2315   struct bidi_it sentinel;
2316   struct gcpro gcpro1;
2317
2318   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2319     emacs_abort ();
2320
2321   if (bidi_it->scan_dir == 0)
2322     {
2323       bidi_it->scan_dir = 1;    /* default to logical order */
2324     }
2325
2326   /* The code below can call eval, and thus cause GC.  If we are
2327      iterating a Lisp string, make sure it won't be GCed.  */
2328   if (STRINGP (bidi_it->string.lstring))
2329     GCPRO1 (bidi_it->string.lstring);
2330
2331   /* If we just passed a newline, initialize for the next line.  */
2332   if (!bidi_it->first_elt
2333       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2334     bidi_line_init (bidi_it);
2335
2336   /* Prepare the sentinel iterator state, and cache it.  When we bump
2337      into it, scanning backwards, we'll know that the last non-base
2338      level is exhausted.  */
2339   if (bidi_cache_idx == bidi_cache_start)
2340     {
2341       bidi_copy_it (&sentinel, bidi_it);
2342       if (bidi_it->first_elt)
2343         {
2344           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2345           sentinel.bytepos--;
2346           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2347           sentinel.ch_len = 1;
2348           sentinel.nchars = 1;
2349         }
2350       bidi_cache_iterator_state (&sentinel, 1);
2351     }
2352
2353   old_level = bidi_it->resolved_level;
2354   new_level = bidi_level_of_next_char (bidi_it);
2355
2356   /* Reordering of resolved levels (clause L2) is implemented by
2357      jumping to the other edge of the level and flipping direction of
2358      scanning the text whenever we find a level change.  */
2359   if (new_level != old_level)
2360     {
2361       bool ascending = new_level > old_level;
2362       int level_to_search = ascending ? old_level + 1 : old_level;
2363       int incr = ascending ? 1 : -1;
2364       int expected_next_level = old_level + incr;
2365
2366       /* Jump (or walk) to the other edge of this level.  */
2367       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2368       /* Switch scan direction and peek at the next character in the
2369          new direction.  */
2370       bidi_it->scan_dir = -bidi_it->scan_dir;
2371
2372       /* The following loop handles the case where the resolved level
2373          jumps by more than one.  This is typical for numbers inside a
2374          run of text with left-to-right embedding direction, but can
2375          also happen in other situations.  In those cases the decision
2376          where to continue after a level change, and in what direction,
2377          is tricky.  For example, given a text like below:
2378
2379                   abcdefgh
2380                   11336622
2381
2382          (where the numbers below the text show the resolved levels),
2383          the result of reordering according to UAX#9 should be this:
2384
2385                   efdcghba
2386
2387          This is implemented by the loop below which flips direction
2388          and jumps to the other edge of the level each time it finds
2389          the new level not to be the expected one.  The expected level
2390          is always one more or one less than the previous one.  */
2391       next_level = bidi_peek_at_next_level (bidi_it);
2392       while (next_level != expected_next_level)
2393         {
2394           expected_next_level += incr;
2395           level_to_search += incr;
2396           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2397           bidi_it->scan_dir = -bidi_it->scan_dir;
2398           next_level = bidi_peek_at_next_level (bidi_it);
2399         }
2400
2401       /* Finally, deliver the next character in the new direction.  */
2402       next_level = bidi_level_of_next_char (bidi_it);
2403     }
2404
2405   /* Take note when we have just processed the newline that precedes
2406      the end of the paragraph.  The next time we are about to be
2407      called, set_iterator_to_next will automatically reinit the
2408      paragraph direction, if needed.  We do this at the newline before
2409      the paragraph separator, because the next character might not be
2410      the first character of the next paragraph, due to the bidi
2411      reordering, whereas we _must_ know the paragraph base direction
2412      _before_ we process the paragraph's text, since the base
2413      direction affects the reordering.  */
2414   if (bidi_it->scan_dir == 1
2415       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2416     {
2417       /* The paragraph direction of the entire string, once
2418          determined, is in effect for the entire string.  Setting the
2419          separator limit to the end of the string prevents
2420          bidi_paragraph_init from being called automatically on this
2421          string.  */
2422       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2423         bidi_it->separator_limit = bidi_it->string.schars;
2424       else if (bidi_it->bytepos < ZV_BYTE)
2425         {
2426           ptrdiff_t sep_len
2427             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2428                                      bidi_it->bytepos + bidi_it->ch_len);
2429           if (bidi_it->nchars <= 0)
2430             emacs_abort ();
2431           if (sep_len >= 0)
2432             {
2433               bidi_it->new_paragraph = 1;
2434               /* Record the buffer position of the last character of the
2435                  paragraph separator.  */
2436               bidi_it->separator_limit
2437                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2438             }
2439         }
2440     }
2441
2442   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2443     {
2444       /* If we are at paragraph's base embedding level and beyond the
2445          last cached position, the cache's job is done and we can
2446          discard it.  */
2447       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2448           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2449                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2450         bidi_cache_reset ();
2451         /* But as long as we are caching during forward scan, we must
2452            cache each state, or else the cache integrity will be
2453            compromised: it assumes cached states correspond to buffer
2454            positions 1:1.  */
2455       else
2456         bidi_cache_iterator_state (bidi_it, 1);
2457     }
2458
2459   if (STRINGP (bidi_it->string.lstring))
2460     UNGCPRO;
2461 }
2462
2463 /* This is meant to be called from within the debugger, whenever you
2464    wish to examine the cache contents.  */
2465 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2466 void
2467 bidi_dump_cached_states (void)
2468 {
2469   ptrdiff_t i;
2470   int ndigits = 1;
2471
2472   if (bidi_cache_idx == 0)
2473     {
2474       fprintf (stderr, "The cache is empty.\n");
2475       return;
2476     }
2477   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2478            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2479
2480   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2481     ndigits++;
2482   fputs ("ch  ", stderr);
2483   for (i = 0; i < bidi_cache_idx; i++)
2484     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2485   fputs ("\n", stderr);
2486   fputs ("lvl ", stderr);
2487   for (i = 0; i < bidi_cache_idx; i++)
2488     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2489   fputs ("\n", stderr);
2490   fputs ("pos ", stderr);
2491   for (i = 0; i < bidi_cache_idx; i++)
2492     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2493   fputs ("\n", stderr);
2494 }