src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59
  60 #include "lisp.h"
  61 #include "character.h"
  62 #include "buffer.h"
  63 #include "dispextern.h"
  64 #include "region-cache.h"
  65
  66 static bool bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but clearing it will take them into
  86    account.  */
  87 extern bool bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 bool bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     emacs_abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     emacs_abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 emacs_abort (); /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         emacs_abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     emacs_abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v;
 208
 209       /* When debugging, check before assigning to V, so that the check
 210          isn't broken by undefined behavior due to int overflow.  */
 211       eassert (CHAR_VALID_P (XINT (val)));
 212
 213       v = XINT (val);
 214
 215       /* Minimal test we must do in optimized builds, to prevent weird
 216          crashes further down the road.  */
 217       if (v < 0 || v > MAX_CHAR)
 218         emacs_abort ();
 219
 220       return v;
 221     }
 222
 223   return c;
 224 }
 225
 226 /* Determine the start-of-run (sor) directional type given the two
 227    embedding levels on either side of the run boundary.  Also, update
 228    the saved info about previously seen characters, since that info is
 229    generally valid for a single level run.  */
 230 static void
 231 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 232 {
 233   int higher_level = (level_before > level_after ? level_before : level_after);
 234
 235   /* The prev_was_pdf gork is required for when we have several PDFs
 236      in a row.  In that case, we want to compute the sor type for the
 237      next level run only once: when we see the first PDF.  That's
 238      because the sor type depends only on the higher of the two levels
 239      that we find on the two sides of the level boundary (see UAX#9,
 240      clause X10), and so we don't need to know the final embedding
 241      level to which we descend after processing all the PDFs.  */
 242   if (!bidi_it->prev_was_pdf || level_before < level_after)
 243     /* FIXME: should the default sor direction be user selectable?  */
 244     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 245   if (level_before > level_after)
 246     bidi_it->prev_was_pdf = 1;
 247
 248   bidi_it->prev.type = UNKNOWN_BT;
 249   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 250     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 251   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 252   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 253   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 254   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 255     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 256   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 257 }
 258
 259 /* Push the current embedding level and override status; reset the
 260    current level to LEVEL and the current override status to OVERRIDE.  */
 261 static void
 262 bidi_push_embedding_level (struct bidi_it *bidi_it,
 263                            int level, bidi_dir_t override)
 264 {
 265   bidi_it->stack_idx++;
 266   eassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 267   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 268   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 269 }
 270
 271 /* Pop the embedding level and directional override status from the
 272    stack, and return the new level.  */
 273 static int
 274 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 275 {
 276   /* UAX#9 says to ignore invalid PDFs.  */
 277   if (bidi_it->stack_idx > 0)
 278     bidi_it->stack_idx--;
 279   return bidi_it->level_stack[bidi_it->stack_idx].level;
 280 }
 281
 282 /* Record in SAVED_INFO the information about the current character.  */
 283 static void
 284 bidi_remember_char (struct bidi_saved_info *saved_info,
 285                     struct bidi_it *bidi_it)
 286 {
 287   saved_info->charpos = bidi_it->charpos;
 288   saved_info->bytepos = bidi_it->bytepos;
 289   saved_info->type = bidi_it->type;
 290   bidi_check_type (bidi_it->type);
 291   saved_info->type_after_w1 = bidi_it->type_after_w1;
 292   bidi_check_type (bidi_it->type_after_w1);
 293   saved_info->orig_type = bidi_it->orig_type;
 294   bidi_check_type (bidi_it->orig_type);
 295 }
 296
 297 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 298    copies the part of the level stack that is actually in use.  */
 299 static void
 300 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 301 {
 302   /* Copy everything from the start through the active part of
 303      the level stack.  */
 304   memcpy (to, from,
 305           (offsetof (struct bidi_it, level_stack[1])
 306            + from->stack_idx * sizeof from->level_stack[0]));
 307 }
 308
 309 \f
 310 /***********************************************************************
 311                         Caching the bidi iterator states
 312  ***********************************************************************/
 313
 314 #define BIDI_CACHE_CHUNK 200
 315 static struct bidi_it *bidi_cache;
 316 static ptrdiff_t bidi_cache_size = 0;
 317 enum { elsz = sizeof (struct bidi_it) };
 318 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 319 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 320 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 321                                            "stack" level */
 322
 323 /* 5-slot stack for saving the start of the previous level of the
 324    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 325    and we need the same size of our stack.  */
 326 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 327 static int bidi_cache_sp;
 328
 329 /* Size of header used by bidi_shelve_cache.  */
 330 enum
 331   {
 332     bidi_shelve_header_size
 333       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 334          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 335          + sizeof (bidi_cache_last_idx))
 336   };
 337
 338 /* Reset the cache state to the empty state.  We only reset the part
 339    of the cache relevant to iteration of the current object.  Previous
 340    objects, which are pushed on the display iterator's stack, are left
 341    intact.  This is called when the cached information is no more
 342    useful for the current iteration, e.g. when we were reseated to a
 343    new position on the same object.  */
 344 static void
 345 bidi_cache_reset (void)
 346 {
 347   bidi_cache_idx = bidi_cache_start;
 348   bidi_cache_last_idx = -1;
 349 }
 350
 351 /* Shrink the cache to its minimal size.  Called when we init the bidi
 352    iterator for reordering a buffer or a string that does not come
 353    from display properties, because that means all the previously
 354    cached info is of no further use.  */
 355 static void
 356 bidi_cache_shrink (void)
 357 {
 358   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 359     {
 360       bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 361       bidi_cache_size = BIDI_CACHE_CHUNK;
 362     }
 363   bidi_cache_reset ();
 364 }
 365
 366 static void
 367 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 368 {
 369   int current_scan_dir = bidi_it->scan_dir;
 370
 371   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 372     emacs_abort ();
 373
 374   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 375   bidi_it->scan_dir = current_scan_dir;
 376   bidi_cache_last_idx = idx;
 377 }
 378
 379 /* Find a cached state with a given CHARPOS and resolved embedding
 380    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 381    resolved levels in cached states.  DIR, if non-zero, means search
 382    in that direction from the last cache hit.  */
 383 static ptrdiff_t
 384 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 385 {
 386   ptrdiff_t i, i_start;
 387
 388   if (bidi_cache_idx > bidi_cache_start)
 389     {
 390       if (bidi_cache_last_idx == -1)
 391         bidi_cache_last_idx = bidi_cache_idx - 1;
 392       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 393         {
 394           dir = -1;
 395           i_start = bidi_cache_last_idx - 1;
 396         }
 397       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 398                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 399         {
 400           dir = 1;
 401           i_start = bidi_cache_last_idx + 1;
 402         }
 403       else if (dir)
 404         i_start = bidi_cache_last_idx;
 405       else
 406         {
 407           dir = -1;
 408           i_start = bidi_cache_idx - 1;
 409         }
 410
 411       if (dir < 0)
 412         {
 413           /* Linear search for now; FIXME!  */
 414           for (i = i_start; i >= bidi_cache_start; i--)
 415             if (bidi_cache[i].charpos <= charpos
 416                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 417                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 418               return i;
 419         }
 420       else
 421         {
 422           for (i = i_start; i < bidi_cache_idx; i++)
 423             if (bidi_cache[i].charpos <= charpos
 424                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 425                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 426               return i;
 427         }
 428     }
 429
 430   return -1;
 431 }
 432
 433 /* Find a cached state where the resolved level changes to a value
 434    that is lower than LEVEL, and return its cache slot index.  DIR is
 435    the direction to search, starting with the last used cache slot.
 436    If DIR is zero, we search backwards from the last occupied cache
 437    slot.  BEFORE means return the index of the slot that
 438    is ``before'' the level change in the search direction.  That is,
 439    given the cached levels like this:
 440
 441          1122333442211
 442           AB        C
 443
 444    and assuming we are at the position cached at the slot marked with
 445    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 446    index of slot B or A, depending whether BEFORE is, respectively,
 447    true or false.  */
 448 static ptrdiff_t
 449 bidi_cache_find_level_change (int level, int dir, bool before)
 450 {
 451   if (bidi_cache_idx)
 452     {
 453       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 454       int incr = before ? 1 : 0;
 455
 456       eassert (!dir || bidi_cache_last_idx >= 0);
 457
 458       if (!dir)
 459         dir = -1;
 460       else if (!incr)
 461         i += dir;
 462
 463       if (dir < 0)
 464         {
 465           while (i >= bidi_cache_start + incr)
 466             {
 467               if (bidi_cache[i - incr].resolved_level >= 0
 468                   && bidi_cache[i - incr].resolved_level < level)
 469                 return i;
 470               i--;
 471             }
 472         }
 473       else
 474         {
 475           while (i < bidi_cache_idx - incr)
 476             {
 477               if (bidi_cache[i + incr].resolved_level >= 0
 478                   && bidi_cache[i + incr].resolved_level < level)
 479                 return i;
 480               i++;
 481             }
 482         }
 483     }
 484
 485   return -1;
 486 }
 487
 488 static void
 489 bidi_cache_ensure_space (ptrdiff_t idx)
 490 {
 491   /* Enlarge the cache as needed.  */
 492   if (idx >= bidi_cache_size)
 493     {
 494       /* The bidi cache cannot be larger than the largest Lisp string
 495          or buffer.  */
 496       ptrdiff_t string_or_buffer_bound
 497         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 498
 499       /* Also, it cannot be larger than what C can represent.  */
 500       ptrdiff_t c_bound
 501         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 502
 503       bidi_cache
 504         = xpalloc (bidi_cache, &bidi_cache_size,
 505                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 506                    min (string_or_buffer_bound, c_bound), elsz);
 507     }
 508 }
 509
 510 static void
 511 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
 512 {
 513   ptrdiff_t idx;
 514
 515   /* We should never cache on backward scans.  */
 516   if (bidi_it->scan_dir == -1)
 517     emacs_abort ();
 518   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 519
 520   if (idx < 0)
 521     {
 522       idx = bidi_cache_idx;
 523       bidi_cache_ensure_space (idx);
 524       /* Character positions should correspond to cache positions 1:1.
 525          If we are outside the range of cached positions, the cache is
 526          useless and must be reset.  */
 527       if (idx > bidi_cache_start &&
 528           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 529                                + bidi_cache[idx - 1].nchars)
 530            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 531         {
 532           bidi_cache_reset ();
 533           idx = bidi_cache_start;
 534         }
 535       if (bidi_it->nchars <= 0)
 536         emacs_abort ();
 537       bidi_copy_it (&bidi_cache[idx], bidi_it);
 538       if (!resolved)
 539         bidi_cache[idx].resolved_level = -1;
 540     }
 541   else
 542     {
 543       /* Copy only the members which could have changed, to avoid
 544          costly copying of the entire struct.  */
 545       bidi_cache[idx].type = bidi_it->type;
 546       bidi_check_type (bidi_it->type);
 547       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 548       bidi_check_type (bidi_it->type_after_w1);
 549       if (resolved)
 550         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 551       else
 552         bidi_cache[idx].resolved_level = -1;
 553       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 554       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 555       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 556       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 557       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 558       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 559       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 560     }
 561
 562   bidi_cache_last_idx = idx;
 563   if (idx >= bidi_cache_idx)
 564     bidi_cache_idx = idx + 1;
 565 }
 566
 567 static bidi_type_t
 568 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 569 {
 570   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 571
 572   if (i >= bidi_cache_start)
 573     {
 574       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 575
 576       bidi_copy_it (bidi_it, &bidi_cache[i]);
 577       bidi_cache_last_idx = i;
 578       /* Don't let scan direction from the cached state override
 579          the current scan direction.  */
 580       bidi_it->scan_dir = current_scan_dir;
 581       return bidi_it->type;
 582     }
 583
 584   return UNKNOWN_BT;
 585 }
 586
 587 static int
 588 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 589 {
 590   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 591     emacs_abort ();
 592   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 593 }
 594
 595 \f
 596 /***********************************************************************
 597              Pushing and popping the bidi iterator state
 598  ***********************************************************************/
 599
 600 /* Push the bidi iterator state in preparation for reordering a
 601    different object, e.g. display string found at certain buffer
 602    position.  Pushing the bidi iterator boils down to saving its
 603    entire state on the cache and starting a new cache "stacked" on top
 604    of the current cache.  */
 605 void
 606 bidi_push_it (struct bidi_it *bidi_it)
 607 {
 608   /* Save the current iterator state in its entirety after the last
 609      used cache slot.  */
 610   bidi_cache_ensure_space (bidi_cache_idx);
 611   bidi_cache[bidi_cache_idx++] = *bidi_it;
 612
 613   /* Push the current cache start onto the stack.  */
 614   eassert (bidi_cache_sp < IT_STACK_SIZE);
 615   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 616
 617   /* Start a new level of cache, and make it empty.  */
 618   bidi_cache_start = bidi_cache_idx;
 619   bidi_cache_last_idx = -1;
 620 }
 621
 622 /* Restore the iterator state saved by bidi_push_it and return the
 623    cache to the corresponding state.  */
 624 void
 625 bidi_pop_it (struct bidi_it *bidi_it)
 626 {
 627   if (bidi_cache_start <= 0)
 628     emacs_abort ();
 629
 630   /* Reset the next free cache slot index to what it was before the
 631      call to bidi_push_it.  */
 632   bidi_cache_idx = bidi_cache_start - 1;
 633
 634   /* Restore the bidi iterator state saved in the cache.  */
 635   *bidi_it = bidi_cache[bidi_cache_idx];
 636
 637   /* Pop the previous cache start from the stack.  */
 638   if (bidi_cache_sp <= 0)
 639     emacs_abort ();
 640   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 641
 642   /* Invalidate the last-used cache slot data.  */
 643   bidi_cache_last_idx = -1;
 644 }
 645
 646 static ptrdiff_t bidi_cache_total_alloc;
 647
 648 /* Stash away a copy of the cache and its control variables.  */
 649 void *
 650 bidi_shelve_cache (void)
 651 {
 652   unsigned char *databuf;
 653   ptrdiff_t alloc;
 654
 655   /* Empty cache.  */
 656   if (bidi_cache_idx == 0)
 657     return NULL;
 658
 659   alloc = (bidi_shelve_header_size
 660            + bidi_cache_idx * sizeof (struct bidi_it));
 661   databuf = xmalloc (alloc);
 662   bidi_cache_total_alloc += alloc;
 663
 664   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 665   memcpy (databuf + sizeof (bidi_cache_idx),
 666           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it),
 669           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 670   memcpy (databuf + sizeof (bidi_cache_idx)
 671           + bidi_cache_idx * sizeof (struct bidi_it)
 672           + sizeof (bidi_cache_start_stack),
 673           &bidi_cache_sp, sizeof (bidi_cache_sp));
 674   memcpy (databuf + sizeof (bidi_cache_idx)
 675           + bidi_cache_idx * sizeof (struct bidi_it)
 676           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 677           &bidi_cache_start, sizeof (bidi_cache_start));
 678   memcpy (databuf + sizeof (bidi_cache_idx)
 679           + bidi_cache_idx * sizeof (struct bidi_it)
 680           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 681           + sizeof (bidi_cache_start),
 682           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 683
 684   return databuf;
 685 }
 686
 687 /* Restore the cache state from a copy stashed away by
 688    bidi_shelve_cache, and free the buffer used to stash that copy.
 689    JUST_FREE means free the buffer, but don't restore the
 690    cache; used when the corresponding iterator is discarded instead of
 691    being restored.  */
 692 void
 693 bidi_unshelve_cache (void *databuf, bool just_free)
 694 {
 695   unsigned char *p = databuf;
 696
 697   if (!p)
 698     {
 699       if (!just_free)
 700         {
 701           /* A NULL pointer means an empty cache.  */
 702           bidi_cache_start = 0;
 703           bidi_cache_sp = 0;
 704           bidi_cache_reset ();
 705         }
 706     }
 707   else
 708     {
 709       if (just_free)
 710         {
 711           ptrdiff_t idx;
 712
 713           memcpy (&idx, p, sizeof (bidi_cache_idx));
 714           bidi_cache_total_alloc
 715             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 716         }
 717       else
 718         {
 719           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 720           bidi_cache_ensure_space (bidi_cache_idx);
 721           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 722                   bidi_cache_idx * sizeof (struct bidi_it));
 723           memcpy (bidi_cache_start_stack,
 724                   p + sizeof (bidi_cache_idx)
 725                   + bidi_cache_idx * sizeof (struct bidi_it),
 726                   sizeof (bidi_cache_start_stack));
 727           memcpy (&bidi_cache_sp,
 728                   p + sizeof (bidi_cache_idx)
 729                   + bidi_cache_idx * sizeof (struct bidi_it)
 730                   + sizeof (bidi_cache_start_stack),
 731                   sizeof (bidi_cache_sp));
 732           memcpy (&bidi_cache_start,
 733                   p + sizeof (bidi_cache_idx)
 734                   + bidi_cache_idx * sizeof (struct bidi_it)
 735                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 736                   sizeof (bidi_cache_start));
 737           memcpy (&bidi_cache_last_idx,
 738                   p + sizeof (bidi_cache_idx)
 739                   + bidi_cache_idx * sizeof (struct bidi_it)
 740                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 741                   + sizeof (bidi_cache_start),
 742                   sizeof (bidi_cache_last_idx));
 743           bidi_cache_total_alloc
 744             -= (bidi_shelve_header_size
 745                 + bidi_cache_idx * sizeof (struct bidi_it));
 746         }
 747
 748       xfree (p);
 749     }
 750 }
 751
 752 \f
 753 /***********************************************************************
 754                         Initialization
 755  ***********************************************************************/
 756 static void
 757 bidi_initialize (void)
 758 {
 759   bidi_type_table = uniprop_table (intern ("bidi-class"));
 760   if (NILP (bidi_type_table))
 761     emacs_abort ();
 762   staticpro (&bidi_type_table);
 763
 764   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 765   if (NILP (bidi_mirror_table))
 766     emacs_abort ();
 767   staticpro (&bidi_mirror_table);
 768
 769   Qparagraph_start = intern ("paragraph-start");
 770   staticpro (&Qparagraph_start);
 771   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 772   if (!STRINGP (paragraph_start_re))
 773     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 774   staticpro (&paragraph_start_re);
 775   Qparagraph_separate = intern ("paragraph-separate");
 776   staticpro (&Qparagraph_separate);
 777   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 778   if (!STRINGP (paragraph_separate_re))
 779     paragraph_separate_re = build_string ("[ \t\f]*$");
 780   staticpro (&paragraph_separate_re);
 781
 782   bidi_cache_sp = 0;
 783   bidi_cache_total_alloc = 0;
 784
 785   bidi_initialized = 1;
 786 }
 787
 788 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 789    end.  */
 790 static void
 791 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 792 {
 793   bidi_it->invalid_levels = 0;
 794   bidi_it->invalid_rl_levels = -1;
 795   bidi_it->stack_idx = 0;
 796   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 797 }
 798
 799 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 800 void
 801 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
 802               struct bidi_it *bidi_it)
 803 {
 804   if (! bidi_initialized)
 805     bidi_initialize ();
 806   if (charpos >= 0)
 807     bidi_it->charpos = charpos;
 808   if (bytepos >= 0)
 809     bidi_it->bytepos = bytepos;
 810   bidi_it->frame_window_p = frame_window_p;
 811   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 812   bidi_it->first_elt = 1;
 813   bidi_set_paragraph_end (bidi_it);
 814   bidi_it->new_paragraph = 1;
 815   bidi_it->separator_limit = -1;
 816   bidi_it->type = NEUTRAL_B;
 817   bidi_it->type_after_w1 = NEUTRAL_B;
 818   bidi_it->orig_type = NEUTRAL_B;
 819   bidi_it->prev_was_pdf = 0;
 820   bidi_it->prev.type = bidi_it->prev.type_after_w1
 821     = bidi_it->prev.orig_type = UNKNOWN_BT;
 822   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 823     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 824   bidi_it->next_for_neutral.charpos = -1;
 825   bidi_it->next_for_neutral.type
 826     = bidi_it->next_for_neutral.type_after_w1
 827     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 828   bidi_it->prev_for_neutral.charpos = -1;
 829   bidi_it->prev_for_neutral.type
 830     = bidi_it->prev_for_neutral.type_after_w1
 831     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 832   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 833   bidi_it->disp_pos = -1;       /* invalid/unknown */
 834   bidi_it->disp_prop = 0;
 835   /* We can only shrink the cache if we are at the bottom level of its
 836      "stack".  */
 837   if (bidi_cache_start == 0)
 838     bidi_cache_shrink ();
 839   else
 840     bidi_cache_reset ();
 841 }
 842
 843 /* Perform initializations for reordering a new line of bidi text.  */
 844 static void
 845 bidi_line_init (struct bidi_it *bidi_it)
 846 {
 847   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 848   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 849   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 850   bidi_it->invalid_levels = 0;
 851   bidi_it->invalid_rl_levels = -1;
 852   /* Setting this to zero will force its recomputation the first time
 853      we need it for W5.  */
 854   bidi_it->next_en_pos = 0;
 855   bidi_it->next_en_type = UNKNOWN_BT;
 856   bidi_it->next_for_ws.type = UNKNOWN_BT;
 857   bidi_set_sor_type (bidi_it,
 858                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 859                      bidi_it->level_stack[0].level); /* X10 */
 860
 861   bidi_cache_reset ();
 862 }
 863
 864 \f
 865 /***********************************************************************
 866                         Fetching characters
 867  ***********************************************************************/
 868
 869 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 870    are zero-based character positions in S, BEGBYTE is byte position
 871    corresponding to BEG.  UNIBYTE means S is a unibyte string.  */
 872 static ptrdiff_t
 873 bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
 874                   ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
 875 {
 876   ptrdiff_t pos = beg;
 877   const unsigned char *p = s + begbyte, *start = p;
 878
 879   if (unibyte)
 880     p = s + end;
 881   else
 882     {
 883       if (!CHAR_HEAD_P (*p))
 884         emacs_abort ();
 885
 886       while (pos < end)
 887         {
 888           p += BYTES_BY_CHAR_HEAD (*p);
 889           pos++;
 890         }
 891     }
 892
 893   return p - start;
 894 }
 895
 896 /* Fetch and return the character at byte position BYTEPOS.  If S is
 897    non-NULL, fetch the character from string S; otherwise fetch the
 898    character from the current buffer.  UNIBYTE means S is a
 899    unibyte string.  */
 900 static int
 901 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
 902 {
 903   if (s)
 904     {
 905       s += bytepos;
 906       if (unibyte)
 907         return *s;
 908     }
 909   else
 910     s = BYTE_POS_ADDR (bytepos);
 911   return STRING_CHAR (s);
 912 }
 913
 914 /* Fetch and return the character at CHARPOS/BYTEPOS.  If that
 915    character is covered by a display string, treat the entire run of
 916    covered characters as a single character, either u+2029 or u+FFFC,
 917    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 918    specifies the character position of the next display string, or -1
 919    if not yet computed.  When the next character is at or beyond that
 920    position, the function updates DISP_POS with the position of the
 921    next display string.  *DISP_PROP non-zero means that there's really
 922    a display string at DISP_POS, as opposed to when we searched till
 923    DISP_POS without finding one.  If *DISP_PROP is 2, it means the
 924    display spec is of the form `(space ...)', which is replaced with
 925    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 926    string to iterate, or NULL if iterating over a buffer or a Lisp
 927    string; in the latter case, STRING->lstring is the Lisp string.  */
 928 static int
 929 bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
 930                  int *disp_prop, struct bidi_string_data *string,
 931                  struct window *w,
 932                  bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 933 {
 934   int ch;
 935   ptrdiff_t endpos
 936     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 937   struct text_pos pos;
 938   int len;
 939
 940   /* If we got past the last known position of display string, compute
 941      the position of the next one.  That position could be at CHARPOS.  */
 942   if (charpos < endpos && charpos > *disp_pos)
 943     {
 944       SET_TEXT_POS (pos, charpos, bytepos);
 945       *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
 946                                               disp_prop);
 947     }
 948
 949   /* Fetch the character at BYTEPOS.  */
 950   if (charpos >= endpos)
 951     {
 952       ch = BIDI_EOB;
 953       *ch_len = 1;
 954       *nchars = 1;
 955       *disp_pos = endpos;
 956       *disp_prop = 0;
 957     }
 958   else if (charpos >= *disp_pos && *disp_prop)
 959     {
 960       ptrdiff_t disp_end_pos;
 961
 962       /* We don't expect to find ourselves in the middle of a display
 963          property.  Hopefully, it will never be needed.  */
 964       if (charpos > *disp_pos)
 965         emacs_abort ();
 966       /* Text covered by `display' properties and overlays with
 967          display properties or display strings is handled as a single
 968          character that represents the entire run of characters
 969          covered by the display property.  */
 970       if (*disp_prop == 2)
 971         {
 972           /* `(space ...)' display specs are handled as paragraph
 973              separators for the purposes of the reordering; see UAX#9
 974              section 3 and clause HL1 in section 4.3 there.  */
 975           ch = 0x2029;
 976         }
 977       else
 978         {
 979           /* All other display specs are handled as the Unicode Object
 980              Replacement Character.  */
 981           ch = 0xFFFC;
 982         }
 983       disp_end_pos = compute_display_string_end (*disp_pos, string);
 984       if (disp_end_pos < 0)
 985         {
 986           /* Somebody removed the display string from the buffer
 987              behind our back.  Recover by processing this buffer
 988              position as if no display property were present there to
 989              begin with.  */
 990           *disp_prop = 0;
 991           goto normal_char;
 992         }
 993       *nchars = disp_end_pos - *disp_pos;
 994       if (*nchars <= 0)
 995         emacs_abort ();
 996       if (string->s)
 997         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 998                                     disp_end_pos, string->unibyte);
 999       else if (STRINGP (string->lstring))
1000         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1001                                     bytepos, disp_end_pos, string->unibyte);
1002       else
1003         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1004     }
1005   else
1006     {
1007     normal_char:
1008       if (string->s)
1009         {
1010
1011           if (!string->unibyte)
1012             {
1013               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1014               *ch_len = len;
1015             }
1016           else
1017             {
1018               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1019               *ch_len = 1;
1020             }
1021         }
1022       else if (STRINGP (string->lstring))
1023         {
1024           if (!string->unibyte)
1025             {
1026               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1027                                            len);
1028               *ch_len = len;
1029             }
1030           else
1031             {
1032               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1033               *ch_len = 1;
1034             }
1035         }
1036       else
1037         {
1038           ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1039           *ch_len = len;
1040         }
1041       *nchars = 1;
1042     }
1043
1044   /* If we just entered a run of characters covered by a display
1045      string, compute the position of the next display string.  */
1046   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1047       && *disp_prop)
1048     {
1049       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1050       *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
1051                                               disp_prop);
1052     }
1053
1054   return ch;
1055 }
1056
1057 \f
1058 /***********************************************************************
1059                         Determining paragraph direction
1060  ***********************************************************************/
1061
1062 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1063    Value is the non-negative length of the paragraph separator
1064    following the buffer position, -1 if position is at the beginning
1065    of a new paragraph, or -2 if position is neither at beginning nor
1066    at end of a paragraph.  */
1067 static ptrdiff_t
1068 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1069 {
1070   Lisp_Object sep_re;
1071   Lisp_Object start_re;
1072   ptrdiff_t val;
1073
1074   sep_re = paragraph_separate_re;
1075   start_re = paragraph_start_re;
1076
1077   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1078   if (val < 0)
1079     {
1080       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1081         val = -1;
1082       else
1083         val = -2;
1084     }
1085
1086   return val;
1087 }
1088
1089 /* If the user has requested the long scans caching, make sure that
1090    BIDI cache is enabled.  Otherwise, make sure it's disabled.  */
1091
1092 static struct region_cache *
1093 bidi_paragraph_cache_on_off (void)
1094 {
1095   if (NILP (BVAR (current_buffer, cache_long_scans)))
1096     {
1097       if (current_buffer->bidi_paragraph_cache)
1098         {
1099           free_region_cache (current_buffer->bidi_paragraph_cache);
1100           current_buffer->bidi_paragraph_cache = 0;
1101         }
1102       return NULL;
1103     }
1104   else
1105     {
1106       if (!current_buffer->bidi_paragraph_cache)
1107         current_buffer->bidi_paragraph_cache = new_region_cache ();
1108       return current_buffer->bidi_paragraph_cache;
1109     }
1110 }
1111
1112 /* On my 2005-vintage machine, searching back for paragraph start
1113    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1114    when user types C-p.  The number below limits each call to
1115    bidi_paragraph_init to about 10 ms.  */
1116 #define MAX_PARAGRAPH_SEARCH 7500
1117
1118 /* Find the beginning of this paragraph by looking back in the buffer.
1119    Value is the byte position of the paragraph's beginning, or
1120    BEGV_BYTE if paragraph_start_re is still not found after looking
1121    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1122 static ptrdiff_t
1123 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1124 {
1125   Lisp_Object re = paragraph_start_re;
1126   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1127   struct region_cache *bpc = bidi_paragraph_cache_on_off ();
1128   ptrdiff_t n = 0, oldpos = pos, next;
1129
1130   while (pos_byte > BEGV_BYTE
1131          && n++ < MAX_PARAGRAPH_SEARCH
1132          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1133     {
1134       /* FIXME: What if the paragraph beginning is covered by a
1135          display string?  And what if a display string covering some
1136          of the text over which we scan back includes
1137          paragraph_start_re?  */
1138       DEC_BOTH (pos, pos_byte);
1139       if (bpc && region_cache_backward (current_buffer, bpc, pos, &next))
1140         {
1141           pos = next, pos_byte = CHAR_TO_BYTE (pos);
1142           break;
1143         }
1144       else
1145         pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
1146     }
1147   if (n >= MAX_PARAGRAPH_SEARCH)
1148     pos = BEGV, pos_byte = BEGV_BYTE;
1149   if (bpc)
1150     know_region_cache (current_buffer, bpc, pos, oldpos);
1151   /* Positions returned by the region cache are not limited to
1152      BEGV..ZV range, so we limit them here.  */
1153   pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
1154   return pos_byte;
1155 }
1156
1157 /* On a 3.4 GHz machine, searching forward for a strong directional
1158    character in a long paragraph full of weaks or neutrals takes about
1159    1 ms for each 20K characters.  The number below limits each call to
1160    bidi_paragraph_init to less than 10 ms even on slow machines.  */
1161 #define MAX_STRONG_CHAR_SEARCH 100000
1162
1163 /* Determine the base direction, a.k.a. base embedding level, of the
1164    paragraph we are about to iterate through.  If DIR is either L2R or
1165    R2L, just use that.  Otherwise, determine the paragraph direction
1166    from the first strong directional character of the paragraph.
1167
1168    NO_DEFAULT_P means don't default to L2R if the paragraph
1169    has no strong directional characters and both DIR and
1170    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1171    in the buffer until a paragraph is found with a strong character,
1172    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1173    flag is used in current-bidi-paragraph-direction.
1174
1175    Note that this function gives the paragraph separator the same
1176    direction as the preceding paragraph, even though Emacs generally
1177    views the separator as not belonging to any paragraph.  */
1178 void
1179 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1180 {
1181   ptrdiff_t bytepos = bidi_it->bytepos;
1182   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1183   ptrdiff_t pstartbyte;
1184   /* Note that begbyte is a byte position, while end is a character
1185      position.  Yes, this is ugly, but we are trying to avoid costly
1186      calls to BYTE_TO_CHAR and its ilk.  */
1187   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1188   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1189
1190   /* Special case for an empty buffer. */
1191   if (bytepos == begbyte && bidi_it->charpos == end)
1192     dir = L2R;
1193   /* We should never be called at EOB or before BEGV.  */
1194   else if (bidi_it->charpos >= end || bytepos < begbyte)
1195     emacs_abort ();
1196
1197   if (dir == L2R)
1198     {
1199       bidi_it->paragraph_dir = L2R;
1200       bidi_it->new_paragraph = 0;
1201     }
1202   else if (dir == R2L)
1203     {
1204       bidi_it->paragraph_dir = R2L;
1205       bidi_it->new_paragraph = 0;
1206     }
1207   else if (dir == NEUTRAL_DIR)  /* P2 */
1208     {
1209       int ch;
1210       ptrdiff_t ch_len, nchars;
1211       ptrdiff_t pos, disp_pos = -1;
1212       int disp_prop = 0;
1213       bidi_type_t type;
1214       const unsigned char *s;
1215
1216       if (!bidi_initialized)
1217         bidi_initialize ();
1218
1219       /* If we are inside a paragraph separator, we are just waiting
1220          for the separator to be exhausted; use the previous paragraph
1221          direction.  But don't do that if we have been just reseated,
1222          because we need to reinitialize below in that case.  */
1223       if (!bidi_it->first_elt
1224           && bidi_it->charpos < bidi_it->separator_limit)
1225         return;
1226
1227       /* If we are on a newline, get past it to where the next
1228          paragraph might start.  But don't do that at BEGV since then
1229          we are potentially in a new paragraph that doesn't yet
1230          exist.  */
1231       pos = bidi_it->charpos;
1232       s = (STRINGP (bidi_it->string.lstring)
1233            ? SDATA (bidi_it->string.lstring)
1234            : bidi_it->string.s);
1235       if (bytepos > begbyte
1236           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1237         {
1238           bytepos++;
1239           pos++;
1240         }
1241
1242       /* We are either at the beginning of a paragraph or in the
1243          middle of it.  Find where this paragraph starts.  */
1244       if (string_p)
1245         {
1246           /* We don't support changes of paragraph direction inside a
1247              string.  It is treated as a single paragraph.  */
1248           pstartbyte = 0;
1249         }
1250       else
1251         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1252       bidi_it->separator_limit = -1;
1253       bidi_it->new_paragraph = 0;
1254
1255       /* The following loop is run more than once only if NO_DEFAULT_P,
1256          and only if we are iterating on a buffer.  */
1257       do {
1258         ptrdiff_t pos1;
1259
1260         bytepos = pstartbyte;
1261         if (!string_p)
1262           pos = BYTE_TO_CHAR (bytepos);
1263         ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop,
1264                               &bidi_it->string, bidi_it->w,
1265                               bidi_it->frame_window_p, &ch_len, &nchars);
1266         type = bidi_get_type (ch, NEUTRAL_DIR);
1267
1268         pos1 = pos;
1269         for (pos += nchars, bytepos += ch_len;
1270              ((bidi_get_category (type) != STRONG)
1271               || (bidi_ignore_explicit_marks_for_paragraph_level
1272                   && (type == RLE || type == RLO
1273                       || type == LRE || type == LRO)))
1274                /* Stop when searched too far into an abnormally large
1275                   paragraph full of weak or neutral characters.  */
1276                && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1277              type = bidi_get_type (ch, NEUTRAL_DIR))
1278           {
1279             if (pos >= end)
1280               {
1281                 /* Pretend there's a paragraph separator at end of
1282                    buffer/string.  */
1283                 type = NEUTRAL_B;
1284                 break;
1285               }
1286             if (!string_p
1287                 && type == NEUTRAL_B
1288                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1289               break;
1290             /* Fetch next character and advance to get past it.  */
1291             ch = bidi_fetch_char (pos, bytepos, &disp_pos,
1292                                   &disp_prop, &bidi_it->string, bidi_it->w,
1293                                   bidi_it->frame_window_p, &ch_len, &nchars);
1294             pos += nchars;
1295             bytepos += ch_len;
1296           }
1297         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1298             || (!bidi_ignore_explicit_marks_for_paragraph_level
1299                 && (type == RLO || type == RLE)))
1300           bidi_it->paragraph_dir = R2L;
1301         else if (type == STRONG_L
1302                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1303                      && (type == LRO || type == LRE)))
1304           bidi_it->paragraph_dir = L2R;
1305         if (!string_p
1306             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1307           {
1308             /* If this paragraph is at BEGV, default to L2R.  */
1309             if (pstartbyte == BEGV_BYTE)
1310               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1311             else
1312               {
1313                 ptrdiff_t prevpbyte = pstartbyte;
1314                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1315
1316                 /* Find the beginning of the previous paragraph, if any.  */
1317                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1318                   {
1319                     /* FXIME: What if p is covered by a display
1320                        string?  See also a FIXME inside
1321                        bidi_find_paragraph_start.  */
1322                     DEC_BOTH (p, pbyte);
1323                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1324                   }
1325                 pstartbyte = prevpbyte;
1326               }
1327           }
1328       } while (!string_p
1329                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1330     }
1331   else
1332     emacs_abort ();
1333
1334   /* Contrary to UAX#9 clause P3, we only default the paragraph
1335      direction to L2R if we have no previous usable paragraph
1336      direction.  This is allowed by the HL1 clause.  */
1337   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1338     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1339   if (bidi_it->paragraph_dir == R2L)
1340     bidi_it->level_stack[0].level = 1;
1341   else
1342     bidi_it->level_stack[0].level = 0;
1343
1344   bidi_line_init (bidi_it);
1345 }
1346
1347 \f
1348 /***********************************************************************
1349                  Resolving explicit and implicit levels.
1350   The rest of this file constitutes the core of the UBA implementation.
1351  ***********************************************************************/
1352
1353 static bool
1354 bidi_explicit_dir_char (int ch)
1355 {
1356   bidi_type_t ch_type;
1357
1358   if (!bidi_initialized)
1359     emacs_abort ();
1360   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1361   return (ch_type == LRE || ch_type == LRO
1362           || ch_type == RLE || ch_type == RLO
1363           || ch_type == PDF);
1364 }
1365
1366 /* A helper function for bidi_resolve_explicit.  It advances to the
1367    next character in logical order and determines the new embedding
1368    level and directional override, but does not take into account
1369    empty embeddings.  */
1370 static int
1371 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1372 {
1373   int curchar;
1374   bidi_type_t type;
1375   int current_level;
1376   int new_level;
1377   bidi_dir_t override;
1378   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1379
1380   /* If reseat()'ed, don't advance, so as to start iteration from the
1381      position where we were reseated.  bidi_it->bytepos can be less
1382      than BEGV_BYTE after reseat to BEGV.  */
1383   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1384       || bidi_it->first_elt)
1385     {
1386       bidi_it->first_elt = 0;
1387       if (string_p)
1388         {
1389           const unsigned char *p
1390             = (STRINGP (bidi_it->string.lstring)
1391                ? SDATA (bidi_it->string.lstring)
1392                : bidi_it->string.s);
1393
1394           if (bidi_it->charpos < 0)
1395             bidi_it->charpos = bidi_it->bytepos = 0;
1396           eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1397                                                          bidi_it->charpos,
1398                                                          bidi_it->string.unibyte));
1399         }
1400       else
1401         {
1402           if (bidi_it->charpos < BEGV)
1403             {
1404               bidi_it->charpos = BEGV;
1405               bidi_it->bytepos = BEGV_BYTE;
1406             }
1407           eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1408         }
1409     }
1410   /* Don't move at end of buffer/string.  */
1411   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1412     {
1413       /* Advance to the next character, skipping characters covered by
1414          display strings (nchars > 1).  */
1415       if (bidi_it->nchars <= 0)
1416         emacs_abort ();
1417       bidi_it->charpos += bidi_it->nchars;
1418       if (bidi_it->ch_len == 0)
1419         emacs_abort ();
1420       bidi_it->bytepos += bidi_it->ch_len;
1421     }
1422
1423   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1424   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1425   new_level = current_level;
1426
1427   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1428     {
1429       curchar = BIDI_EOB;
1430       bidi_it->ch_len = 1;
1431       bidi_it->nchars = 1;
1432       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1433       bidi_it->disp_prop = 0;
1434     }
1435   else
1436     {
1437       /* Fetch the character at BYTEPOS.  If it is covered by a
1438          display string, treat the entire run of covered characters as
1439          a single character u+FFFC.  */
1440       curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
1441                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1442                                  &bidi_it->string, bidi_it->w,
1443                                  bidi_it->frame_window_p,
1444                                  &bidi_it->ch_len, &bidi_it->nchars);
1445     }
1446   bidi_it->ch = curchar;
1447
1448   /* Don't apply directional override here, as all the types we handle
1449      below will not be affected by the override anyway, and we need
1450      the original type unaltered.  The override will be applied in
1451      bidi_resolve_weak.  */
1452   type = bidi_get_type (curchar, NEUTRAL_DIR);
1453   bidi_it->orig_type = type;
1454   bidi_check_type (bidi_it->orig_type);
1455
1456   if (type != PDF)
1457     bidi_it->prev_was_pdf = 0;
1458
1459   bidi_it->type_after_w1 = UNKNOWN_BT;
1460
1461   switch (type)
1462     {
1463       case RLE: /* X2 */
1464       case RLO: /* X4 */
1465         bidi_it->type_after_w1 = type;
1466         bidi_check_type (bidi_it->type_after_w1);
1467         type = WEAK_BN; /* X9/Retaining */
1468         if (bidi_it->ignore_bn_limit <= -1)
1469           {
1470             if (current_level <= BIDI_MAXLEVEL - 4)
1471               {
1472                 /* Compute the least odd embedding level greater than
1473                    the current level.  */
1474                 new_level = ((current_level + 1) & ~1) + 1;
1475                 if (bidi_it->type_after_w1 == RLE)
1476                   override = NEUTRAL_DIR;
1477                 else
1478                   override = R2L;
1479                 if (current_level == BIDI_MAXLEVEL - 4)
1480                   bidi_it->invalid_rl_levels = 0;
1481                 bidi_push_embedding_level (bidi_it, new_level, override);
1482               }
1483             else
1484               {
1485                 bidi_it->invalid_levels++;
1486                 /* See the commentary about invalid_rl_levels below.  */
1487                 if (bidi_it->invalid_rl_levels < 0)
1488                   bidi_it->invalid_rl_levels = 0;
1489                 bidi_it->invalid_rl_levels++;
1490               }
1491           }
1492         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1493                  || (bidi_it->next_en_pos > bidi_it->charpos
1494                      && bidi_it->next_en_type == WEAK_EN))
1495           type = WEAK_EN;
1496         break;
1497       case LRE: /* X3 */
1498       case LRO: /* X5 */
1499         bidi_it->type_after_w1 = type;
1500         bidi_check_type (bidi_it->type_after_w1);
1501         type = WEAK_BN; /* X9/Retaining */
1502         if (bidi_it->ignore_bn_limit <= -1)
1503           {
1504             if (current_level <= BIDI_MAXLEVEL - 5)
1505               {
1506                 /* Compute the least even embedding level greater than
1507                    the current level.  */
1508                 new_level = ((current_level + 2) & ~1);
1509                 if (bidi_it->type_after_w1 == LRE)
1510                   override = NEUTRAL_DIR;
1511                 else
1512                   override = L2R;
1513                 bidi_push_embedding_level (bidi_it, new_level, override);
1514               }
1515             else
1516               {
1517                 bidi_it->invalid_levels++;
1518                 /* invalid_rl_levels counts invalid levels encountered
1519                    while the embedding level was already too high for
1520                    LRE/LRO, but not for RLE/RLO.  That is because
1521                    there may be exactly one PDF which we should not
1522                    ignore even though invalid_levels is non-zero.
1523                    invalid_rl_levels helps to know what PDF is
1524                    that.  */
1525                 if (bidi_it->invalid_rl_levels >= 0)
1526                   bidi_it->invalid_rl_levels++;
1527               }
1528           }
1529         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1530                  || (bidi_it->next_en_pos > bidi_it->charpos
1531                      && bidi_it->next_en_type == WEAK_EN))
1532           type = WEAK_EN;
1533         break;
1534       case PDF: /* X7 */
1535         bidi_it->type_after_w1 = type;
1536         bidi_check_type (bidi_it->type_after_w1);
1537         type = WEAK_BN; /* X9/Retaining */
1538         if (bidi_it->ignore_bn_limit <= -1)
1539           {
1540             if (!bidi_it->invalid_rl_levels)
1541               {
1542                 new_level = bidi_pop_embedding_level (bidi_it);
1543                 bidi_it->invalid_rl_levels = -1;
1544                 if (bidi_it->invalid_levels)
1545                   bidi_it->invalid_levels--;
1546                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1547               }
1548             if (!bidi_it->invalid_levels)
1549               new_level = bidi_pop_embedding_level (bidi_it);
1550             else
1551               {
1552                 bidi_it->invalid_levels--;
1553                 bidi_it->invalid_rl_levels--;
1554               }
1555           }
1556         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1557                  || (bidi_it->next_en_pos > bidi_it->charpos
1558                      && bidi_it->next_en_type == WEAK_EN))
1559           type = WEAK_EN;
1560         break;
1561       default:
1562         /* Nothing.  */
1563         break;
1564     }
1565
1566   bidi_it->type = type;
1567   bidi_check_type (bidi_it->type);
1568
1569   return new_level;
1570 }
1571
1572 /* Given an iterator state in BIDI_IT, advance one character position
1573    in the buffer/string to the next character (in the logical order),
1574    resolve any explicit embeddings and directional overrides, and
1575    return the embedding level of the character after resolving
1576    explicit directives and ignoring empty embeddings.  */
1577 static int
1578 bidi_resolve_explicit (struct bidi_it *bidi_it)
1579 {
1580   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1581   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1582   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1583   const unsigned char *s
1584     = (STRINGP (bidi_it->string.lstring)
1585        ? SDATA (bidi_it->string.lstring)
1586        : bidi_it->string.s);
1587
1588   if (prev_level < new_level
1589       && bidi_it->type == WEAK_BN
1590       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1591       && bidi_it->charpos < eob         /* not already at EOB */
1592       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1593                                                    + bidi_it->ch_len, s,
1594                                                    bidi_it->string.unibyte)))
1595     {
1596       /* Avoid pushing and popping embedding levels if the level run
1597          is empty, as this breaks level runs where it shouldn't.
1598          UAX#9 removes all the explicit embedding and override codes,
1599          so empty embeddings disappear without a trace.  We need to
1600          behave as if we did the same.  */
1601       struct bidi_it saved_it;
1602       int level = prev_level;
1603
1604       bidi_copy_it (&saved_it, bidi_it);
1605
1606       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1607                                                        + bidi_it->ch_len, s,
1608                                                        bidi_it->string.unibyte)))
1609         {
1610           /* This advances to the next character, skipping any
1611              characters covered by display strings.  */
1612           level = bidi_resolve_explicit_1 (bidi_it);
1613           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1614              a pointer to its data is no longer valid.  */
1615           if (STRINGP (bidi_it->string.lstring))
1616             s = SDATA (bidi_it->string.lstring);
1617         }
1618
1619       if (bidi_it->nchars <= 0)
1620         emacs_abort ();
1621       if (level == prev_level)  /* empty embedding */
1622         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1623       else                      /* this embedding is non-empty */
1624         saved_it.ignore_bn_limit = -2;
1625
1626       bidi_copy_it (bidi_it, &saved_it);
1627       if (bidi_it->ignore_bn_limit > -1)
1628         {
1629           /* We pushed a level, but we shouldn't have.  Undo that. */
1630           if (!bidi_it->invalid_rl_levels)
1631             {
1632               new_level = bidi_pop_embedding_level (bidi_it);
1633               bidi_it->invalid_rl_levels = -1;
1634               if (bidi_it->invalid_levels)
1635                 bidi_it->invalid_levels--;
1636             }
1637           if (!bidi_it->invalid_levels)
1638             new_level = bidi_pop_embedding_level (bidi_it);
1639           else
1640             {
1641               bidi_it->invalid_levels--;
1642               bidi_it->invalid_rl_levels--;
1643             }
1644         }
1645     }
1646
1647   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1648     {
1649       bidi_set_paragraph_end (bidi_it);
1650       /* This is needed by bidi_resolve_weak below, and in L1.  */
1651       bidi_it->type_after_w1 = bidi_it->type;
1652       bidi_check_type (bidi_it->type_after_w1);
1653     }
1654
1655   return new_level;
1656 }
1657
1658 /* Advance in the buffer/string, resolve weak types and return the
1659    type of the next character after weak type resolution.  */
1660 static bidi_type_t
1661 bidi_resolve_weak (struct bidi_it *bidi_it)
1662 {
1663   bidi_type_t type;
1664   bidi_dir_t override;
1665   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1666   int new_level  = bidi_resolve_explicit (bidi_it);
1667   int next_char;
1668   bidi_type_t type_of_next;
1669   struct bidi_it saved_it;
1670   ptrdiff_t eob
1671     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1672        ? bidi_it->string.schars : ZV);
1673
1674   type = bidi_it->type;
1675   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1676
1677   if (type == UNKNOWN_BT
1678       || type == LRE
1679       || type == LRO
1680       || type == RLE
1681       || type == RLO
1682       || type == PDF)
1683     emacs_abort ();
1684
1685   if (new_level != prev_level
1686       || bidi_it->type == NEUTRAL_B)
1687     {
1688       /* We've got a new embedding level run, compute the directional
1689          type of sor and initialize per-run variables (UAX#9, clause
1690          X10).  */
1691       bidi_set_sor_type (bidi_it, prev_level, new_level);
1692     }
1693   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1694            || type == WEAK_BN || type == STRONG_AL)
1695     bidi_it->type_after_w1 = type;      /* needed in L1 */
1696   bidi_check_type (bidi_it->type_after_w1);
1697
1698   /* Level and directional override status are already recorded in
1699      bidi_it, and do not need any change; see X6.  */
1700   if (override == R2L)          /* X6 */
1701     type = STRONG_R;
1702   else if (override == L2R)
1703     type = STRONG_L;
1704   else
1705     {
1706       if (type == WEAK_NSM)     /* W1 */
1707         {
1708           /* Note that we don't need to consider the case where the
1709              prev character has its type overridden by an RLO or LRO,
1710              because then either the type of this NSM would have been
1711              also overridden, or the previous character is outside the
1712              current level run, and thus not relevant to this NSM.
1713              This is why NSM gets the type_after_w1 of the previous
1714              character.  */
1715           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1716               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1717               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1718             type = bidi_it->prev.type_after_w1;
1719           else if (bidi_it->sor == R2L)
1720             type = STRONG_R;
1721           else if (bidi_it->sor == L2R)
1722             type = STRONG_L;
1723           else /* shouldn't happen! */
1724             emacs_abort ();
1725         }
1726       if (type == WEAK_EN       /* W2 */
1727           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1728         type = WEAK_AN;
1729       else if (type == STRONG_AL) /* W3 */
1730         type = STRONG_R;
1731       else if ((type == WEAK_ES /* W4 */
1732                 && bidi_it->prev.type_after_w1 == WEAK_EN
1733                 && bidi_it->prev.orig_type == WEAK_EN)
1734                || (type == WEAK_CS
1735                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1736                         && bidi_it->prev.orig_type == WEAK_EN)
1737                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1738         {
1739           const unsigned char *s
1740             = (STRINGP (bidi_it->string.lstring)
1741                ? SDATA (bidi_it->string.lstring)
1742                : bidi_it->string.s);
1743
1744           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1745                        ? BIDI_EOB
1746                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1747                                            s, bidi_it->string.unibyte));
1748           type_of_next = bidi_get_type (next_char, override);
1749
1750           if (type_of_next == WEAK_BN
1751               || bidi_explicit_dir_char (next_char))
1752             {
1753               bidi_copy_it (&saved_it, bidi_it);
1754               while (bidi_resolve_explicit (bidi_it) == new_level
1755                      && bidi_it->type == WEAK_BN)
1756                 ;
1757               type_of_next = bidi_it->type;
1758               bidi_copy_it (bidi_it, &saved_it);
1759             }
1760
1761           /* If the next character is EN, but the last strong-type
1762              character is AL, that next EN will be changed to AN when
1763              we process it in W2 above.  So in that case, this ES
1764              should not be changed into EN.  */
1765           if (type == WEAK_ES
1766               && type_of_next == WEAK_EN
1767               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1768             type = WEAK_EN;
1769           else if (type == WEAK_CS)
1770             {
1771               if (bidi_it->prev.type_after_w1 == WEAK_AN
1772                   && (type_of_next == WEAK_AN
1773                       /* If the next character is EN, but the last
1774                          strong-type character is AL, EN will be later
1775                          changed to AN when we process it in W2 above.
1776                          So in that case, this ES should not be
1777                          changed into EN.  */
1778                       || (type_of_next == WEAK_EN
1779                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1780                 type = WEAK_AN;
1781               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1782                        && type_of_next == WEAK_EN
1783                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1784                 type = WEAK_EN;
1785             }
1786         }
1787       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1788                || type == WEAK_BN)      /* W5/Retaining */
1789         {
1790           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1791             type = WEAK_EN;
1792           else if (bidi_it->next_en_pos > bidi_it->charpos
1793                    && bidi_it->next_en_type != WEAK_BN)
1794             {
1795               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1796                 type = WEAK_EN;
1797             }
1798           else if (bidi_it->next_en_pos >=0)
1799             {
1800               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1801               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1802                                         ? SDATA (bidi_it->string.lstring)
1803                                         : bidi_it->string.s);
1804
1805               if (bidi_it->nchars <= 0)
1806                 emacs_abort ();
1807               next_char
1808                 = (bidi_it->charpos + bidi_it->nchars >= eob
1809                    ? BIDI_EOB
1810                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1811                                        bidi_it->string.unibyte));
1812               type_of_next = bidi_get_type (next_char, override);
1813
1814               if (type_of_next == WEAK_ET
1815                   || type_of_next == WEAK_BN
1816                   || bidi_explicit_dir_char (next_char))
1817                 {
1818                   bidi_copy_it (&saved_it, bidi_it);
1819                   while (bidi_resolve_explicit (bidi_it) == new_level
1820                          && (bidi_it->type == WEAK_BN
1821                              || bidi_it->type == WEAK_ET))
1822                     ;
1823                   type_of_next = bidi_it->type;
1824                   en_pos = bidi_it->charpos;
1825                   bidi_copy_it (bidi_it, &saved_it);
1826                 }
1827               /* Remember this position, to speed up processing of the
1828                  next ETs.  */
1829               bidi_it->next_en_pos = en_pos;
1830               if (type_of_next == WEAK_EN)
1831                 {
1832                   /* If the last strong character is AL, the EN we've
1833                      found will become AN when we get to it (W2). */
1834                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1835                     type_of_next = WEAK_AN;
1836                   else if (type == WEAK_BN)
1837                     type = NEUTRAL_ON; /* W6/Retaining */
1838                   else
1839                     type = WEAK_EN;
1840                 }
1841               else if (type_of_next == NEUTRAL_B)
1842                 /* Record the fact that there are no more ENs from
1843                    here to the end of paragraph, to avoid entering the
1844                    loop above ever again in this paragraph.  */
1845                 bidi_it->next_en_pos = -1;
1846               /* Record the type of the character where we ended our search.  */
1847               bidi_it->next_en_type = type_of_next;
1848             }
1849         }
1850     }
1851
1852   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1853       || (type == WEAK_BN
1854           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1855               || bidi_it->prev.type_after_w1 == WEAK_ES
1856               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1857     type = NEUTRAL_ON;
1858
1859   /* Store the type we've got so far, before we clobber it with strong
1860      types in W7 and while resolving neutral types.  But leave alone
1861      the original types that were recorded above, because we will need
1862      them for the L1 clause.  */
1863   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1864     bidi_it->type_after_w1 = type;
1865   bidi_check_type (bidi_it->type_after_w1);
1866
1867   if (type == WEAK_EN)  /* W7 */
1868     {
1869       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1870           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1871         type = STRONG_L;
1872     }
1873
1874   bidi_it->type = type;
1875   bidi_check_type (bidi_it->type);
1876   return type;
1877 }
1878
1879 /* Resolve the type of a neutral character according to the type of
1880    surrounding strong text and the current embedding level.  */
1881 static bidi_type_t
1882 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1883 {
1884   /* N1: European and Arabic numbers are treated as though they were R.  */
1885   if (next_type == WEAK_EN || next_type == WEAK_AN)
1886     next_type = STRONG_R;
1887   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1888     prev_type = STRONG_R;
1889
1890   if (next_type == prev_type)   /* N1 */
1891     return next_type;
1892   else if ((lev & 1) == 0)      /* N2 */
1893     return STRONG_L;
1894   else
1895     return STRONG_R;
1896 }
1897
1898 static bidi_type_t
1899 bidi_resolve_neutral (struct bidi_it *bidi_it)
1900 {
1901   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1902   bidi_type_t type = bidi_resolve_weak (bidi_it);
1903   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1904
1905   if (!(type == STRONG_R
1906         || type == STRONG_L
1907         || type == WEAK_BN
1908         || type == WEAK_EN
1909         || type == WEAK_AN
1910         || type == NEUTRAL_B
1911         || type == NEUTRAL_S
1912         || type == NEUTRAL_WS
1913         || type == NEUTRAL_ON))
1914     emacs_abort ();
1915
1916   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1917                             we are already at paragraph end.  */
1918        && bidi_get_category (type) == NEUTRAL)
1919       || (type == WEAK_BN && prev_level == current_level))
1920     {
1921       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1922         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1923                                        bidi_it->next_for_neutral.type,
1924                                        current_level);
1925       /* The next two "else if" clauses are shortcuts for the
1926          important special case when we have a long sequence of
1927          neutral or WEAK_BN characters, such as whitespace or nulls or
1928          other control characters, on the base embedding level of the
1929          paragraph, and that sequence goes all the way to the end of
1930          the paragraph and follows a character whose resolved
1931          directionality is identical to the base embedding level.
1932          (This is what happens in a buffer with plain L2R text that
1933          happens to include long sequences of control characters.)  By
1934          virtue of N1, the result of examining this long sequence will
1935          always be either STRONG_L or STRONG_R, depending on the base
1936          embedding level.  So we use this fact directly instead of
1937          entering the expensive loop in the "else" clause.  */
1938       else if (current_level == 0
1939                && bidi_it->prev_for_neutral.type == STRONG_L
1940                && !bidi_explicit_dir_char (bidi_it->ch))
1941         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1942                                        STRONG_L, current_level);
1943       else if (/* current level is 1 */
1944                current_level == 1
1945                /* base embedding level is also 1 */
1946                && bidi_it->level_stack[0].level == 1
1947                /* previous character is one of those considered R for
1948                   the purposes of W5 */
1949                && (bidi_it->prev_for_neutral.type == STRONG_R
1950                    || bidi_it->prev_for_neutral.type == WEAK_EN
1951                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1952                && !bidi_explicit_dir_char (bidi_it->ch))
1953         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1954                                        STRONG_R, current_level);
1955       else
1956         {
1957           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1958              the assumption of batch-style processing; see clauses W4,
1959              W5, and especially N1, which require to look far forward
1960              (as well as back) in the buffer/string.  May the fleas of
1961              a thousand camels infest the armpits of those who design
1962              supposedly general-purpose algorithms by looking at their
1963              own implementations, and fail to consider other possible
1964              implementations!  */
1965           struct bidi_it saved_it;
1966           bidi_type_t next_type;
1967
1968           if (bidi_it->scan_dir == -1)
1969             emacs_abort ();
1970
1971           bidi_copy_it (&saved_it, bidi_it);
1972           /* Scan the text forward until we find the first non-neutral
1973              character, and then use that to resolve the neutral we
1974              are dealing with now.  We also cache the scanned iterator
1975              states, to salvage some of the effort later.  */
1976           bidi_cache_iterator_state (bidi_it, 0);
1977           do {
1978             /* Record the info about the previous character, so that
1979                it will be cached below with this state.  */
1980             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1981                 && bidi_it->type != WEAK_BN)
1982               bidi_remember_char (&bidi_it->prev, bidi_it);
1983             type = bidi_resolve_weak (bidi_it);
1984             /* Paragraph separators have their levels fully resolved
1985                at this point, so cache them as resolved.  */
1986             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1987             /* FIXME: implement L1 here, by testing for a newline and
1988                resetting the level for any sequence of whitespace
1989                characters adjacent to it.  */
1990           } while (!(type == NEUTRAL_B
1991                      || (type != WEAK_BN
1992                          && bidi_get_category (type) != NEUTRAL)
1993                      /* This is all per level run, so stop when we
1994                         reach the end of this level run.  */
1995                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1996                          != current_level)));
1997
1998           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1999
2000           switch (type)
2001             {
2002               case STRONG_L:
2003               case STRONG_R:
2004               case STRONG_AL:
2005                 /* Actually, STRONG_AL cannot happen here, because
2006                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
2007                 eassert (type != STRONG_AL);
2008                 next_type = type;
2009                 break;
2010               case WEAK_EN:
2011               case WEAK_AN:
2012                 /* N1: ``European and Arabic numbers are treated as
2013                    though they were R.''  */
2014                 next_type = STRONG_R;
2015                 break;
2016               case WEAK_BN:
2017               case NEUTRAL_ON:  /* W6/Retaining */
2018                 if (!bidi_explicit_dir_char (bidi_it->ch))
2019                   emacs_abort (); /* can't happen: BNs are skipped */
2020                 /* FALLTHROUGH */
2021               case NEUTRAL_B:
2022                 /* Marched all the way to the end of this level run.
2023                    We need to use the eor type, whose information is
2024                    stored by bidi_set_sor_type in the prev_for_neutral
2025                    member.  */
2026                 if (saved_it.type != WEAK_BN
2027                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
2028                   next_type = bidi_it->prev_for_neutral.type;
2029                 else
2030                   {
2031                     /* This is a BN which does not adjoin neutrals.
2032                        Leave its type alone.  */
2033                     bidi_copy_it (bidi_it, &saved_it);
2034                     return bidi_it->type;
2035                   }
2036                 break;
2037               default:
2038                 emacs_abort ();
2039             }
2040           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
2041                                          next_type, current_level);
2042           saved_it.next_for_neutral.type = next_type;
2043           saved_it.type = type;
2044           bidi_check_type (next_type);
2045           bidi_check_type (type);
2046           bidi_copy_it (bidi_it, &saved_it);
2047         }
2048     }
2049   return type;
2050 }
2051
2052 /* Given an iterator state in BIDI_IT, advance one character position
2053    in the buffer/string to the next character (in the logical order),
2054    resolve the bidi type of that next character, and return that
2055    type.  */
2056 static bidi_type_t
2057 bidi_type_of_next_char (struct bidi_it *bidi_it)
2058 {
2059   bidi_type_t type;
2060
2061   /* This should always be called during a forward scan.  */
2062   if (bidi_it->scan_dir != 1)
2063     emacs_abort ();
2064
2065   /* Reset the limit until which to ignore BNs if we step out of the
2066      area where we found only empty levels.  */
2067   if ((bidi_it->ignore_bn_limit > -1
2068        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2069       || (bidi_it->ignore_bn_limit == -2
2070           && !bidi_explicit_dir_char (bidi_it->ch)))
2071     bidi_it->ignore_bn_limit = -1;
2072
2073   type = bidi_resolve_neutral (bidi_it);
2074
2075   return type;
2076 }
2077
2078 /* Given an iterator state BIDI_IT, advance one character position in
2079    the buffer/string to the next character (in the current scan
2080    direction), resolve the embedding and implicit levels of that next
2081    character, and return the resulting level.  */
2082 static int
2083 bidi_level_of_next_char (struct bidi_it *bidi_it)
2084 {
2085   bidi_type_t type;
2086   int level, prev_level = -1;
2087   struct bidi_saved_info next_for_neutral;
2088   ptrdiff_t next_char_pos = -2;
2089
2090   if (bidi_it->scan_dir == 1)
2091     {
2092       ptrdiff_t eob
2093         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2094            ? bidi_it->string.schars : ZV);
2095
2096       /* There's no sense in trying to advance if we hit end of text.  */
2097       if (bidi_it->charpos >= eob)
2098         return bidi_it->resolved_level;
2099
2100       /* Record the info about the previous character.  */
2101       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2102           && bidi_it->type != WEAK_BN)
2103         bidi_remember_char (&bidi_it->prev, bidi_it);
2104       if (bidi_it->type_after_w1 == STRONG_R
2105           || bidi_it->type_after_w1 == STRONG_L
2106           || bidi_it->type_after_w1 == STRONG_AL)
2107         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2108       /* FIXME: it sounds like we don't need both prev and
2109          prev_for_neutral members, but I'm leaving them both for now.  */
2110       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2111           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2112         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2113
2114       /* If we overstepped the characters used for resolving neutrals
2115          and whitespace, invalidate their info in the iterator.  */
2116       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2117         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2118       if (bidi_it->next_en_pos >= 0
2119           && bidi_it->charpos >= bidi_it->next_en_pos)
2120         {
2121           bidi_it->next_en_pos = 0;
2122           bidi_it->next_en_type = UNKNOWN_BT;
2123         }
2124       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2125           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2126         bidi_it->next_for_ws.type = UNKNOWN_BT;
2127
2128       /* This must be taken before we fill the iterator with the info
2129          about the next char.  If we scan backwards, the iterator
2130          state must be already cached, so there's no need to know the
2131          embedding level of the previous character, since we will be
2132          returning to our caller shortly.  */
2133       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2134     }
2135   next_for_neutral = bidi_it->next_for_neutral;
2136
2137   /* Perhaps the character we want is already cached.  If it is, the
2138      call to bidi_cache_find below will return a type other than
2139      UNKNOWN_BT.  */
2140   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2141     {
2142       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2143                  ? 0 : 1);
2144       if (bidi_it->scan_dir > 0)
2145         {
2146           if (bidi_it->nchars <= 0)
2147             emacs_abort ();
2148           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2149         }
2150       else if (bidi_it->charpos >= bob)
2151         /* Implementation note: we allow next_char_pos to be as low as
2152            0 for buffers or -1 for strings, and that is okay because
2153            that's the "position" of the sentinel iterator state we
2154            cached at the beginning of the iteration.  */
2155         next_char_pos = bidi_it->charpos - 1;
2156       if (next_char_pos >= bob - 1)
2157         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2158       else
2159         type = UNKNOWN_BT;
2160     }
2161   else
2162     type = UNKNOWN_BT;
2163   if (type != UNKNOWN_BT)
2164     {
2165       /* Don't lose the information for resolving neutrals!  The
2166          cached states could have been cached before their
2167          next_for_neutral member was computed.  If we are on our way
2168          forward, we can simply take the info from the previous
2169          state.  */
2170       if (bidi_it->scan_dir == 1
2171           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2172         bidi_it->next_for_neutral = next_for_neutral;
2173
2174       /* If resolved_level is -1, it means this state was cached
2175          before it was completely resolved, so we cannot return
2176          it.  */
2177       if (bidi_it->resolved_level != -1)
2178         return bidi_it->resolved_level;
2179     }
2180   if (bidi_it->scan_dir == -1)
2181     /* If we are going backwards, the iterator state is already cached
2182        from previous scans, and should be fully resolved.  */
2183     emacs_abort ();
2184
2185   if (type == UNKNOWN_BT)
2186     type = bidi_type_of_next_char (bidi_it);
2187
2188   if (type == NEUTRAL_B)
2189     return bidi_it->resolved_level;
2190
2191   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2192   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2193       || (type == WEAK_BN && prev_level == level))
2194     {
2195       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2196         emacs_abort ();
2197
2198       /* If the cached state shows a neutral character, it was not
2199          resolved by bidi_resolve_neutral, so do it now.  */
2200       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2201                                      bidi_it->next_for_neutral.type,
2202                                      level);
2203     }
2204
2205   if (!(type == STRONG_R
2206         || type == STRONG_L
2207         || type == WEAK_BN
2208         || type == WEAK_EN
2209         || type == WEAK_AN))
2210     emacs_abort ();
2211   bidi_it->type = type;
2212   bidi_check_type (bidi_it->type);
2213
2214   /* For L1 below, we need to know, for each WS character, whether
2215      it belongs to a sequence of WS characters preceding a newline
2216      or a TAB or a paragraph separator.  */
2217   if (bidi_it->orig_type == NEUTRAL_WS
2218       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2219     {
2220       int ch;
2221       ptrdiff_t clen = bidi_it->ch_len;
2222       ptrdiff_t bpos = bidi_it->bytepos;
2223       ptrdiff_t cpos = bidi_it->charpos;
2224       ptrdiff_t disp_pos = bidi_it->disp_pos;
2225       ptrdiff_t nc = bidi_it->nchars;
2226       struct bidi_string_data bs = bidi_it->string;
2227       bidi_type_t chtype;
2228       bool fwp = bidi_it->frame_window_p;
2229       int dpp = bidi_it->disp_prop;
2230
2231       if (bidi_it->nchars <= 0)
2232         emacs_abort ();
2233       do {
2234         ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
2235                               bidi_it->w, fwp, &clen, &nc);
2236         if (ch == '\n' || ch == BIDI_EOB)
2237           chtype = NEUTRAL_B;
2238         else
2239           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2240       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2241                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2242       bidi_it->next_for_ws.type = chtype;
2243       bidi_check_type (bidi_it->next_for_ws.type);
2244       bidi_it->next_for_ws.charpos = cpos;
2245       bidi_it->next_for_ws.bytepos = bpos;
2246     }
2247
2248   /* Resolve implicit levels, with a twist: PDFs get the embedding
2249      level of the embedding they terminate.  See below for the
2250      reason.  */
2251   if (bidi_it->orig_type == PDF
2252       /* Don't do this if this formatting code didn't change the
2253          embedding level due to invalid or empty embeddings.  */
2254       && prev_level != level)
2255     {
2256       /* Don't look in UAX#9 for the reason for this: it's our own
2257          private quirk.  The reason is that we want the formatting
2258          codes to be delivered so that they bracket the text of their
2259          embedding.  For example, given the text
2260
2261              {RLO}teST{PDF}
2262
2263          we want it to be displayed as
2264
2265              {PDF}STet{RLO}
2266
2267          not as
2268
2269              STet{RLO}{PDF}
2270
2271          which will result because we bump up the embedding level as
2272          soon as we see the RLO and pop it as soon as we see the PDF,
2273          so RLO itself has the same embedding level as "teST", and
2274          thus would be normally delivered last, just before the PDF.
2275          The switch below fiddles with the level of PDF so that this
2276          ugly side effect does not happen.
2277
2278          (This is, of course, only important if the formatting codes
2279          are actually displayed, but Emacs does need to display them
2280          if the user wants to.)  */
2281       level = prev_level;
2282     }
2283   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2284            || bidi_it->orig_type == NEUTRAL_S
2285            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2286            || (bidi_it->orig_type == NEUTRAL_WS
2287                && (bidi_it->next_for_ws.type == NEUTRAL_B
2288                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2289     level = bidi_it->level_stack[0].level;
2290   else if ((level & 1) == 0) /* I1 */
2291     {
2292       if (type == STRONG_R)
2293         level++;
2294       else if (type == WEAK_EN || type == WEAK_AN)
2295         level += 2;
2296     }
2297   else                  /* I2 */
2298     {
2299       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2300         level++;
2301     }
2302
2303   bidi_it->resolved_level = level;
2304   return level;
2305 }
2306
2307 /* Move to the other edge of a level given by LEVEL.  If END_FLAG,
2308    we are at the end of a level, and we need to prepare to
2309    resume the scan of the lower level.
2310
2311    If this level's other edge is cached, we simply jump to it, filling
2312    the iterator structure with the iterator state on the other edge.
2313    Otherwise, we walk the buffer or string until we come back to the
2314    same level as LEVEL.
2315
2316    Note: we are not talking here about a ``level run'' in the UAX#9
2317    sense of the term, but rather about a ``level'' which includes
2318    all the levels higher than it.  In other words, given the levels
2319    like this:
2320
2321          11111112222222333333334443343222222111111112223322111
2322                 A      B                    C
2323
2324    and assuming we are at point A scanning left to right, this
2325    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2326    at point B.  */
2327 static void
2328 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2329 {
2330   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2331   ptrdiff_t idx;
2332
2333   /* Try the cache first.  */
2334   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2335       >= bidi_cache_start)
2336     bidi_cache_fetch_state (idx, bidi_it);
2337   else
2338     {
2339       int new_level;
2340
2341       /* If we are at end of level, its edges must be cached.  */
2342       if (end_flag)
2343         emacs_abort ();
2344
2345       bidi_cache_iterator_state (bidi_it, 1);
2346       do {
2347         new_level = bidi_level_of_next_char (bidi_it);
2348         bidi_cache_iterator_state (bidi_it, 1);
2349       } while (new_level >= level);
2350     }
2351 }
2352
2353 void
2354 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2355 {
2356   int old_level, new_level, next_level;
2357   struct bidi_it sentinel;
2358   struct gcpro gcpro1;
2359
2360   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2361     emacs_abort ();
2362
2363   if (bidi_it->scan_dir == 0)
2364     {
2365       bidi_it->scan_dir = 1;    /* default to logical order */
2366     }
2367
2368   /* The code below can call eval, and thus cause GC.  If we are
2369      iterating a Lisp string, make sure it won't be GCed.  */
2370   if (STRINGP (bidi_it->string.lstring))
2371     GCPRO1 (bidi_it->string.lstring);
2372
2373   /* If we just passed a newline, initialize for the next line.  */
2374   if (!bidi_it->first_elt
2375       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2376     bidi_line_init (bidi_it);
2377
2378   /* Prepare the sentinel iterator state, and cache it.  When we bump
2379      into it, scanning backwards, we'll know that the last non-base
2380      level is exhausted.  */
2381   if (bidi_cache_idx == bidi_cache_start)
2382     {
2383       bidi_copy_it (&sentinel, bidi_it);
2384       if (bidi_it->first_elt)
2385         {
2386           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2387           sentinel.bytepos--;
2388           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2389           sentinel.ch_len = 1;
2390           sentinel.nchars = 1;
2391         }
2392       bidi_cache_iterator_state (&sentinel, 1);
2393     }
2394
2395   old_level = bidi_it->resolved_level;
2396   new_level = bidi_level_of_next_char (bidi_it);
2397
2398   /* Reordering of resolved levels (clause L2) is implemented by
2399      jumping to the other edge of the level and flipping direction of
2400      scanning the text whenever we find a level change.  */
2401   if (new_level != old_level)
2402     {
2403       bool ascending = new_level > old_level;
2404       int level_to_search = ascending ? old_level + 1 : old_level;
2405       int incr = ascending ? 1 : -1;
2406       int expected_next_level = old_level + incr;
2407
2408       /* Jump (or walk) to the other edge of this level.  */
2409       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2410       /* Switch scan direction and peek at the next character in the
2411          new direction.  */
2412       bidi_it->scan_dir = -bidi_it->scan_dir;
2413
2414       /* The following loop handles the case where the resolved level
2415          jumps by more than one.  This is typical for numbers inside a
2416          run of text with left-to-right embedding direction, but can
2417          also happen in other situations.  In those cases the decision
2418          where to continue after a level change, and in what direction,
2419          is tricky.  For example, given a text like below:
2420
2421                   abcdefgh
2422                   11336622
2423
2424          (where the numbers below the text show the resolved levels),
2425          the result of reordering according to UAX#9 should be this:
2426
2427                   efdcghba
2428
2429          This is implemented by the loop below which flips direction
2430          and jumps to the other edge of the level each time it finds
2431          the new level not to be the expected one.  The expected level
2432          is always one more or one less than the previous one.  */
2433       next_level = bidi_peek_at_next_level (bidi_it);
2434       while (next_level != expected_next_level)
2435         {
2436           /* If next_level is -1, it means we have an unresolved level
2437              in the cache, which at this point should not happen.  If
2438              it does, we will infloop.  */
2439           eassert (next_level >= 0);
2440           expected_next_level += incr;
2441           level_to_search += incr;
2442           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2443           bidi_it->scan_dir = -bidi_it->scan_dir;
2444           next_level = bidi_peek_at_next_level (bidi_it);
2445         }
2446
2447       /* Finally, deliver the next character in the new direction.  */
2448       next_level = bidi_level_of_next_char (bidi_it);
2449     }
2450
2451   /* Take note when we have just processed the newline that precedes
2452      the end of the paragraph.  The next time we are about to be
2453      called, set_iterator_to_next will automatically reinit the
2454      paragraph direction, if needed.  We do this at the newline before
2455      the paragraph separator, because the next character might not be
2456      the first character of the next paragraph, due to the bidi
2457      reordering, whereas we _must_ know the paragraph base direction
2458      _before_ we process the paragraph's text, since the base
2459      direction affects the reordering.  */
2460   if (bidi_it->scan_dir == 1
2461       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2462     {
2463       /* The paragraph direction of the entire string, once
2464          determined, is in effect for the entire string.  Setting the
2465          separator limit to the end of the string prevents
2466          bidi_paragraph_init from being called automatically on this
2467          string.  */
2468       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2469         bidi_it->separator_limit = bidi_it->string.schars;
2470       else if (bidi_it->bytepos < ZV_BYTE)
2471         {
2472           ptrdiff_t sep_len
2473             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2474                                      bidi_it->bytepos + bidi_it->ch_len);
2475           if (bidi_it->nchars <= 0)
2476             emacs_abort ();
2477           if (sep_len >= 0)
2478             {
2479               bidi_it->new_paragraph = 1;
2480               /* Record the buffer position of the last character of the
2481                  paragraph separator.  */
2482               bidi_it->separator_limit
2483                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2484             }
2485         }
2486     }
2487
2488   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2489     {
2490       /* If we are at paragraph's base embedding level and beyond the
2491          last cached position, the cache's job is done and we can
2492          discard it.  */
2493       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2494           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2495                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2496         bidi_cache_reset ();
2497         /* But as long as we are caching during forward scan, we must
2498            cache each state, or else the cache integrity will be
2499            compromised: it assumes cached states correspond to buffer
2500            positions 1:1.  */
2501       else
2502         bidi_cache_iterator_state (bidi_it, 1);
2503     }
2504
2505   if (STRINGP (bidi_it->string.lstring))
2506     UNGCPRO;
2507 }
2508
2509 /* This is meant to be called from within the debugger, whenever you
2510    wish to examine the cache contents.  */
2511 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2512 void
2513 bidi_dump_cached_states (void)
2514 {
2515   ptrdiff_t i;
2516   int ndigits = 1;
2517
2518   if (bidi_cache_idx == 0)
2519     {
2520       fprintf (stderr, "The cache is empty.\n");
2521       return;
2522     }
2523   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2524            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2525
2526   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2527     ndigits++;
2528   fputs ("ch  ", stderr);
2529   for (i = 0; i < bidi_cache_idx; i++)
2530     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2531   fputs ("\n", stderr);
2532   fputs ("lvl ", stderr);
2533   for (i = 0; i < bidi_cache_idx; i++)
2534     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2535   fputs ("\n", stderr);
2536   fputs ("pos ", stderr);
2537   for (i = 0; i < bidi_cache_idx; i++)
2538     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2539   fputs ("\n", stderr);
2540 }