src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = level_before > level_after ? level_before : level_after;
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 242     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 247     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* 5-slot stack for saving the start of the previous level of the
 320    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 321    and we need the same size of our stack.  */
 322 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 323 static int bidi_cache_sp;
 324
 325 /* Size of header used by bidi_shelve_cache.  */
 326 enum
 327   {
 328     bidi_shelve_header_size =
 329       (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 330        + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 331        + sizeof (bidi_cache_last_idx))
 332   };
 333
 334 /* Reset the cache state to the empty state.  We only reset the part
 335    of the cache relevant to iteration of the current object.  Previous
 336    objects, which are pushed on the display iterator's stack, are left
 337    intact.  This is called when the cached information is no more
 338    useful for the current iteration, e.g. when we were reseated to a
 339    new position on the same object.  */
 340 static inline void
 341 bidi_cache_reset (void)
 342 {
 343   bidi_cache_idx = bidi_cache_start;
 344   bidi_cache_last_idx = -1;
 345 }
 346
 347 /* Shrink the cache to its minimal size.  Called when we init the bidi
 348    iterator for reordering a buffer or a string that does not come
 349    from display properties, because that means all the previously
 350    cached info is of no further use.  */
 351 static inline void
 352 bidi_cache_shrink (void)
 353 {
 354   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 355     {
 356       bidi_cache =
 357         (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 358       bidi_cache_size = BIDI_CACHE_CHUNK;
 359     }
 360   bidi_cache_reset ();
 361 }
 362
 363 static inline void
 364 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 365 {
 366   int current_scan_dir = bidi_it->scan_dir;
 367
 368   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 369     abort ();
 370
 371   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 372   bidi_it->scan_dir = current_scan_dir;
 373   bidi_cache_last_idx = idx;
 374 }
 375
 376 /* Find a cached state with a given CHARPOS and resolved embedding
 377    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 378    resolved levels in cached states.  DIR, if non-zero, means search
 379    in that direction from the last cache hit.  */
 380 static inline ptrdiff_t
 381 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 382 {
 383   ptrdiff_t i, i_start;
 384
 385   if (bidi_cache_idx > bidi_cache_start)
 386     {
 387       if (bidi_cache_last_idx == -1)
 388         bidi_cache_last_idx = bidi_cache_idx - 1;
 389       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 390         {
 391           dir = -1;
 392           i_start = bidi_cache_last_idx - 1;
 393         }
 394       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 395                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 396         {
 397           dir = 1;
 398           i_start = bidi_cache_last_idx + 1;
 399         }
 400       else if (dir)
 401         i_start = bidi_cache_last_idx;
 402       else
 403         {
 404           dir = -1;
 405           i_start = bidi_cache_idx - 1;
 406         }
 407
 408       if (dir < 0)
 409         {
 410           /* Linear search for now; FIXME!  */
 411           for (i = i_start; i >= bidi_cache_start; i--)
 412             if (bidi_cache[i].charpos <= charpos
 413                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 414                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 415               return i;
 416         }
 417       else
 418         {
 419           for (i = i_start; i < bidi_cache_idx; i++)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425     }
 426
 427   return -1;
 428 }
 429
 430 /* Find a cached state where the resolved level changes to a value
 431    that is lower than LEVEL, and return its cache slot index.  DIR is
 432    the direction to search, starting with the last used cache slot.
 433    If DIR is zero, we search backwards from the last occupied cache
 434    slot.  BEFORE, if non-zero, means return the index of the slot that
 435    is ``before'' the level change in the search direction.  That is,
 436    given the cached levels like this:
 437
 438          1122333442211
 439           AB        C
 440
 441    and assuming we are at the position cached at the slot marked with
 442    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 443    index of slot B or A, depending whether BEFORE is, respectively,
 444    non-zero or zero.  */
 445 static ptrdiff_t
 446 bidi_cache_find_level_change (int level, int dir, int before)
 447 {
 448   if (bidi_cache_idx)
 449     {
 450       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 451       int incr = before ? 1 : 0;
 452
 453       xassert (!dir || bidi_cache_last_idx >= 0);
 454
 455       if (!dir)
 456         dir = -1;
 457       else if (!incr)
 458         i += dir;
 459
 460       if (dir < 0)
 461         {
 462           while (i >= bidi_cache_start + incr)
 463             {
 464               if (bidi_cache[i - incr].resolved_level >= 0
 465                   && bidi_cache[i - incr].resolved_level < level)
 466                 return i;
 467               i--;
 468             }
 469         }
 470       else
 471         {
 472           while (i < bidi_cache_idx - incr)
 473             {
 474               if (bidi_cache[i + incr].resolved_level >= 0
 475                   && bidi_cache[i + incr].resolved_level < level)
 476                 return i;
 477               i++;
 478             }
 479         }
 480     }
 481
 482   return -1;
 483 }
 484
 485 static inline void
 486 bidi_cache_ensure_space (ptrdiff_t idx)
 487 {
 488   /* Enlarge the cache as needed.  */
 489   if (idx >= bidi_cache_size)
 490     {
 491       /* The bidi cache cannot be larger than the largest Lisp string
 492          or buffer.  */
 493       ptrdiff_t string_or_buffer_bound =
 494         max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 495
 496       /* Also, it cannot be larger than what C can represent.  */
 497       ptrdiff_t c_bound =
 498         (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 499
 500       bidi_cache =
 501         xpalloc (bidi_cache, &bidi_cache_size,
 502                  max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 503                  min (string_or_buffer_bound, c_bound), elsz);
 504     }
 505 }
 506
 507 static inline void
 508 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 509 {
 510   ptrdiff_t idx;
 511
 512   /* We should never cache on backward scans.  */
 513   if (bidi_it->scan_dir == -1)
 514     abort ();
 515   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 516
 517   if (idx < 0)
 518     {
 519       idx = bidi_cache_idx;
 520       bidi_cache_ensure_space (idx);
 521       /* Character positions should correspond to cache positions 1:1.
 522          If we are outside the range of cached positions, the cache is
 523          useless and must be reset.  */
 524       if (idx > bidi_cache_start &&
 525           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 526                                + bidi_cache[idx - 1].nchars)
 527            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 528         {
 529           bidi_cache_reset ();
 530           idx = bidi_cache_start;
 531         }
 532       if (bidi_it->nchars <= 0)
 533         abort ();
 534       bidi_copy_it (&bidi_cache[idx], bidi_it);
 535       if (!resolved)
 536         bidi_cache[idx].resolved_level = -1;
 537     }
 538   else
 539     {
 540       /* Copy only the members which could have changed, to avoid
 541          costly copying of the entire struct.  */
 542       bidi_cache[idx].type = bidi_it->type;
 543       bidi_check_type (bidi_it->type);
 544       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 545       bidi_check_type (bidi_it->type_after_w1);
 546       if (resolved)
 547         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 548       else
 549         bidi_cache[idx].resolved_level = -1;
 550       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 551       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 552       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 553       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 554       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 555       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 556       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 557     }
 558
 559   bidi_cache_last_idx = idx;
 560   if (idx >= bidi_cache_idx)
 561     bidi_cache_idx = idx + 1;
 562 }
 563
 564 static inline bidi_type_t
 565 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 566 {
 567   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 568
 569   if (i >= bidi_cache_start)
 570     {
 571       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 572
 573       bidi_copy_it (bidi_it, &bidi_cache[i]);
 574       bidi_cache_last_idx = i;
 575       /* Don't let scan direction from from the cached state override
 576          the current scan direction.  */
 577       bidi_it->scan_dir = current_scan_dir;
 578       return bidi_it->type;
 579     }
 580
 581   return UNKNOWN_BT;
 582 }
 583
 584 static inline int
 585 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 586 {
 587   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 588     abort ();
 589   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 590 }
 591
 592 \f
 593 /***********************************************************************
 594              Pushing and popping the bidi iterator state
 595  ***********************************************************************/
 596
 597 /* Push the bidi iterator state in preparation for reordering a
 598    different object, e.g. display string found at certain buffer
 599    position.  Pushing the bidi iterator boils down to saving its
 600    entire state on the cache and starting a new cache "stacked" on top
 601    of the current cache.  */
 602 void
 603 bidi_push_it (struct bidi_it *bidi_it)
 604 {
 605   /* Save the current iterator state in its entirety after the last
 606      used cache slot.  */
 607   bidi_cache_ensure_space (bidi_cache_idx);
 608   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 609
 610   /* Push the current cache start onto the stack.  */
 611   xassert (bidi_cache_sp < IT_STACK_SIZE);
 612   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 613
 614   /* Start a new level of cache, and make it empty.  */
 615   bidi_cache_start = bidi_cache_idx;
 616   bidi_cache_last_idx = -1;
 617 }
 618
 619 /* Restore the iterator state saved by bidi_push_it and return the
 620    cache to the corresponding state.  */
 621 void
 622 bidi_pop_it (struct bidi_it *bidi_it)
 623 {
 624   if (bidi_cache_start <= 0)
 625     abort ();
 626
 627   /* Reset the next free cache slot index to what it was before the
 628      call to bidi_push_it.  */
 629   bidi_cache_idx = bidi_cache_start - 1;
 630
 631   /* Restore the bidi iterator state saved in the cache.  */
 632   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 633
 634   /* Pop the previous cache start from the stack.  */
 635   if (bidi_cache_sp <= 0)
 636     abort ();
 637   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 638
 639   /* Invalidate the last-used cache slot data.  */
 640   bidi_cache_last_idx = -1;
 641 }
 642
 643 static ptrdiff_t bidi_cache_total_alloc;
 644
 645 /* Stash away a copy of the cache and its control variables.  */
 646 void *
 647 bidi_shelve_cache (void)
 648 {
 649   unsigned char *databuf;
 650   ptrdiff_t alloc;
 651
 652   /* Empty cache.  */
 653   if (bidi_cache_idx == 0)
 654     return NULL;
 655
 656   alloc = (bidi_shelve_header_size
 657            + bidi_cache_idx * sizeof (struct bidi_it));
 658   databuf = xmalloc (alloc);
 659   bidi_cache_total_alloc += alloc;
 660
 661   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 662   memcpy (databuf + sizeof (bidi_cache_idx),
 663           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it),
 666           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it)
 669           + sizeof (bidi_cache_start_stack),
 670           &bidi_cache_sp, sizeof (bidi_cache_sp));
 671   memcpy (databuf + sizeof (bidi_cache_idx)
 672           + bidi_cache_idx * sizeof (struct bidi_it)
 673           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 674           &bidi_cache_start, sizeof (bidi_cache_start));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 678           + sizeof (bidi_cache_start),
 679           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 680
 681   return databuf;
 682 }
 683
 684 /* Restore the cache state from a copy stashed away by
 685    bidi_shelve_cache, and free the buffer used to stash that copy.
 686    JUST_FREE non-zero means free the buffer, but don't restore the
 687    cache; used when the corresponding iterator is discarded instead of
 688    being restored.  */
 689 void
 690 bidi_unshelve_cache (void *databuf, int just_free)
 691 {
 692   unsigned char *p = databuf;
 693
 694   if (!p)
 695     {
 696       if (!just_free)
 697         {
 698           /* A NULL pointer means an empty cache.  */
 699           bidi_cache_start = 0;
 700           bidi_cache_sp = 0;
 701           bidi_cache_reset ();
 702         }
 703     }
 704   else
 705     {
 706       if (just_free)
 707         {
 708           ptrdiff_t idx;
 709
 710           memcpy (&idx, p, sizeof (bidi_cache_idx));
 711           bidi_cache_total_alloc -=
 712             bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 713         }
 714       else
 715         {
 716           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 717           bidi_cache_ensure_space (bidi_cache_idx);
 718           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 719                   bidi_cache_idx * sizeof (struct bidi_it));
 720           memcpy (bidi_cache_start_stack,
 721                   p + sizeof (bidi_cache_idx)
 722                   + bidi_cache_idx * sizeof (struct bidi_it),
 723                   sizeof (bidi_cache_start_stack));
 724           memcpy (&bidi_cache_sp,
 725                   p + sizeof (bidi_cache_idx)
 726                   + bidi_cache_idx * sizeof (struct bidi_it)
 727                   + sizeof (bidi_cache_start_stack),
 728                   sizeof (bidi_cache_sp));
 729           memcpy (&bidi_cache_start,
 730                   p + sizeof (bidi_cache_idx)
 731                   + bidi_cache_idx * sizeof (struct bidi_it)
 732                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 733                   sizeof (bidi_cache_start));
 734           memcpy (&bidi_cache_last_idx,
 735                   p + sizeof (bidi_cache_idx)
 736                   + bidi_cache_idx * sizeof (struct bidi_it)
 737                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 738                   + sizeof (bidi_cache_start),
 739                   sizeof (bidi_cache_last_idx));
 740           bidi_cache_total_alloc -=
 741             bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it);
 742         }
 743
 744       xfree (p);
 745     }
 746 }
 747
 748 \f
 749 /***********************************************************************
 750                         Initialization
 751  ***********************************************************************/
 752 static void
 753 bidi_initialize (void)
 754 {
 755   bidi_type_table = uniprop_table (intern ("bidi-class"));
 756   if (NILP (bidi_type_table))
 757     abort ();
 758   staticpro (&bidi_type_table);
 759
 760   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 761   if (NILP (bidi_mirror_table))
 762     abort ();
 763   staticpro (&bidi_mirror_table);
 764
 765   Qparagraph_start = intern ("paragraph-start");
 766   staticpro (&Qparagraph_start);
 767   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 768   if (!STRINGP (paragraph_start_re))
 769     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 770   staticpro (&paragraph_start_re);
 771   Qparagraph_separate = intern ("paragraph-separate");
 772   staticpro (&Qparagraph_separate);
 773   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 774   if (!STRINGP (paragraph_separate_re))
 775     paragraph_separate_re = build_string ("[ \t\f]*$");
 776   staticpro (&paragraph_separate_re);
 777
 778   bidi_cache_sp = 0;
 779   bidi_cache_total_alloc = 0;
 780
 781   bidi_initialized = 1;
 782 }
 783
 784 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 785    end.  */
 786 static inline void
 787 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 788 {
 789   bidi_it->invalid_levels = 0;
 790   bidi_it->invalid_rl_levels = -1;
 791   bidi_it->stack_idx = 0;
 792   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 793 }
 794
 795 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 796 void
 797 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 798               struct bidi_it *bidi_it)
 799 {
 800   if (! bidi_initialized)
 801     bidi_initialize ();
 802   if (charpos >= 0)
 803     bidi_it->charpos = charpos;
 804   if (bytepos >= 0)
 805     bidi_it->bytepos = bytepos;
 806   bidi_it->frame_window_p = frame_window_p;
 807   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 808   bidi_it->first_elt = 1;
 809   bidi_set_paragraph_end (bidi_it);
 810   bidi_it->new_paragraph = 1;
 811   bidi_it->separator_limit = -1;
 812   bidi_it->type = NEUTRAL_B;
 813   bidi_it->type_after_w1 = NEUTRAL_B;
 814   bidi_it->orig_type = NEUTRAL_B;
 815   bidi_it->prev_was_pdf = 0;
 816   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 817     bidi_it->prev.orig_type = UNKNOWN_BT;
 818   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 819     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 820   bidi_it->next_for_neutral.charpos = -1;
 821   bidi_it->next_for_neutral.type =
 822     bidi_it->next_for_neutral.type_after_w1 =
 823     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 824   bidi_it->prev_for_neutral.charpos = -1;
 825   bidi_it->prev_for_neutral.type =
 826     bidi_it->prev_for_neutral.type_after_w1 =
 827     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 828   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 829   bidi_it->disp_pos = -1;       /* invalid/unknown */
 830   bidi_it->disp_prop = 0;
 831   /* We can only shrink the cache if we are at the bottom level of its
 832      "stack".  */
 833   if (bidi_cache_start == 0)
 834     bidi_cache_shrink ();
 835   else
 836     bidi_cache_reset ();
 837 }
 838
 839 /* Perform initializations for reordering a new line of bidi text.  */
 840 static void
 841 bidi_line_init (struct bidi_it *bidi_it)
 842 {
 843   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 844   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 845   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 846   bidi_it->invalid_levels = 0;
 847   bidi_it->invalid_rl_levels = -1;
 848   bidi_it->next_en_pos = -1;
 849   bidi_it->next_for_ws.type = UNKNOWN_BT;
 850   bidi_set_sor_type (bidi_it,
 851                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 852                      bidi_it->level_stack[0].level); /* X10 */
 853
 854   bidi_cache_reset ();
 855 }
 856
 857 \f
 858 /***********************************************************************
 859                         Fetching characters
 860  ***********************************************************************/
 861
 862 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 863    are zero-based character positions in S, BEGBYTE is byte position
 864    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 865    string.  */
 866 static inline EMACS_INT
 867 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 868                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 869 {
 870   EMACS_INT pos = beg;
 871   const unsigned char *p = s + begbyte, *start = p;
 872
 873   if (unibyte)
 874     p = s + end;
 875   else
 876     {
 877       if (!CHAR_HEAD_P (*p))
 878         abort ();
 879
 880       while (pos < end)
 881         {
 882           p += BYTES_BY_CHAR_HEAD (*p);
 883           pos++;
 884         }
 885     }
 886
 887   return p - start;
 888 }
 889
 890 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 891    non-NULL, fetch the character from string S; otherwise fetch the
 892    character from the current buffer.  UNIBYTE non-zero means S is a
 893    unibyte string.  */
 894 static inline int
 895 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 896 {
 897   if (s)
 898     {
 899       if (unibyte)
 900         return s[bytepos];
 901       else
 902         return STRING_CHAR (s + bytepos);
 903     }
 904   else
 905     return FETCH_MULTIBYTE_CHAR (bytepos);
 906 }
 907
 908 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 909    character is covered by a display string, treat the entire run of
 910    covered characters as a single character, either u+2029 or u+FFFC,
 911    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 912    specifies the character position of the next display string, or -1
 913    if not yet computed.  When the next character is at or beyond that
 914    position, the function updates DISP_POS with the position of the
 915    next display string.  DISP_PROP non-zero means that there's really
 916    a display string at DISP_POS, as opposed to when we searched till
 917    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 918    display spec is of the form `(space ...)', which is replaced with
 919    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 920    string to iterate, or NULL if iterating over a buffer or a Lisp
 921    string; in the latter case, STRING->lstring is the Lisp string.  */
 922 static inline int
 923 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 924                  int *disp_prop, struct bidi_string_data *string,
 925                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 926 {
 927   int ch;
 928   EMACS_INT endpos =
 929     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 930   struct text_pos pos;
 931
 932   /* If we got past the last known position of display string, compute
 933      the position of the next one.  That position could be at CHARPOS.  */
 934   if (charpos < endpos && charpos > *disp_pos)
 935     {
 936       SET_TEXT_POS (pos, charpos, bytepos);
 937       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 938                                               disp_prop);
 939     }
 940
 941   /* Fetch the character at BYTEPOS.  */
 942   if (charpos >= endpos)
 943     {
 944       ch = BIDI_EOB;
 945       *ch_len = 1;
 946       *nchars = 1;
 947       *disp_pos = endpos;
 948       *disp_prop = 0;
 949     }
 950   else if (charpos >= *disp_pos && *disp_prop)
 951     {
 952       EMACS_INT disp_end_pos;
 953
 954       /* We don't expect to find ourselves in the middle of a display
 955          property.  Hopefully, it will never be needed.  */
 956       if (charpos > *disp_pos)
 957         abort ();
 958       /* Text covered by `display' properties and overlays with
 959          display properties or display strings is handled as a single
 960          character that represents the entire run of characters
 961          covered by the display property.  */
 962       if (*disp_prop == 2)
 963         {
 964           /* `(space ...)' display specs are handled as paragraph
 965              separators for the purposes of the reordering; see UAX#9
 966              section 3 and clause HL1 in section 4.3 there.  */
 967           ch = 0x2029;
 968         }
 969       else
 970         {
 971           /* All other display specs are handled as the Unicode Object
 972              Replacement Character.  */
 973           ch = 0xFFFC;
 974         }
 975       disp_end_pos = compute_display_string_end (*disp_pos, string);
 976       *nchars = disp_end_pos - *disp_pos;
 977       if (*nchars <= 0)
 978         abort ();
 979       if (string->s)
 980         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 981                                     disp_end_pos, string->unibyte);
 982       else if (STRINGP (string->lstring))
 983         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 984                                     bytepos, disp_end_pos, string->unibyte);
 985       else
 986         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 987     }
 988   else
 989     {
 990       if (string->s)
 991         {
 992           int len;
 993
 994           if (!string->unibyte)
 995             {
 996               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 997               *ch_len = len;
 998             }
 999           else
1000             {
1001               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1002               *ch_len = 1;
1003             }
1004         }
1005       else if (STRINGP (string->lstring))
1006         {
1007           int len;
1008
1009           if (!string->unibyte)
1010             {
1011               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1012                                            len);
1013               *ch_len = len;
1014             }
1015           else
1016             {
1017               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1018               *ch_len = 1;
1019             }
1020         }
1021       else
1022         {
1023           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1024           *ch_len = CHAR_BYTES (ch);
1025         }
1026       *nchars = 1;
1027     }
1028
1029   /* If we just entered a run of characters covered by a display
1030      string, compute the position of the next display string.  */
1031   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1032       && *disp_prop)
1033     {
1034       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1035       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1036                                               disp_prop);
1037     }
1038
1039   return ch;
1040 }
1041
1042 \f
1043 /***********************************************************************
1044                         Determining paragraph direction
1045  ***********************************************************************/
1046
1047 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1048    Value is the non-negative length of the paragraph separator
1049    following the buffer position, -1 if position is at the beginning
1050    of a new paragraph, or -2 if position is neither at beginning nor
1051    at end of a paragraph.  */
1052 static EMACS_INT
1053 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1054 {
1055   Lisp_Object sep_re;
1056   Lisp_Object start_re;
1057   EMACS_INT val;
1058
1059   sep_re = paragraph_separate_re;
1060   start_re = paragraph_start_re;
1061
1062   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1063   if (val < 0)
1064     {
1065       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1066         val = -1;
1067       else
1068         val = -2;
1069     }
1070
1071   return val;
1072 }
1073
1074 /* On my 2005-vintage machine, searching back for paragraph start
1075    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1076    when user types C-p.  The number below limits each call to
1077    bidi_paragraph_init to about 10 ms.  */
1078 #define MAX_PARAGRAPH_SEARCH 7500
1079
1080 /* Find the beginning of this paragraph by looking back in the buffer.
1081    Value is the byte position of the paragraph's beginning, or
1082    BEGV_BYTE if paragraph_start_re is still not found after looking
1083    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1084 static EMACS_INT
1085 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1086 {
1087   Lisp_Object re = paragraph_start_re;
1088   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1089   EMACS_INT n = 0;
1090
1091   while (pos_byte > BEGV_BYTE
1092          && n++ < MAX_PARAGRAPH_SEARCH
1093          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1094     {
1095       /* FIXME: What if the paragraph beginning is covered by a
1096          display string?  And what if a display string covering some
1097          of the text over which we scan back includes
1098          paragraph_start_re?  */
1099       pos = find_next_newline_no_quit (pos - 1, -1);
1100       pos_byte = CHAR_TO_BYTE (pos);
1101     }
1102   if (n >= MAX_PARAGRAPH_SEARCH)
1103     pos_byte = BEGV_BYTE;
1104   return pos_byte;
1105 }
1106
1107 /* Determine the base direction, a.k.a. base embedding level, of the
1108    paragraph we are about to iterate through.  If DIR is either L2R or
1109    R2L, just use that.  Otherwise, determine the paragraph direction
1110    from the first strong directional character of the paragraph.
1111
1112    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1113    has no strong directional characters and both DIR and
1114    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1115    in the buffer until a paragraph is found with a strong character,
1116    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1117    flag is used in current-bidi-paragraph-direction.
1118
1119    Note that this function gives the paragraph separator the same
1120    direction as the preceding paragraph, even though Emacs generally
1121    views the separartor as not belonging to any paragraph.  */
1122 void
1123 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1124 {
1125   EMACS_INT bytepos = bidi_it->bytepos;
1126   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1127   EMACS_INT pstartbyte;
1128   /* Note that begbyte is a byte position, while end is a character
1129      position.  Yes, this is ugly, but we are trying to avoid costly
1130      calls to BYTE_TO_CHAR and its ilk.  */
1131   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1132   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1133
1134   /* Special case for an empty buffer. */
1135   if (bytepos == begbyte && bidi_it->charpos == end)
1136     dir = L2R;
1137   /* We should never be called at EOB or before BEGV.  */
1138   else if (bidi_it->charpos >= end || bytepos < begbyte)
1139     abort ();
1140
1141   if (dir == L2R)
1142     {
1143       bidi_it->paragraph_dir = L2R;
1144       bidi_it->new_paragraph = 0;
1145     }
1146   else if (dir == R2L)
1147     {
1148       bidi_it->paragraph_dir = R2L;
1149       bidi_it->new_paragraph = 0;
1150     }
1151   else if (dir == NEUTRAL_DIR)  /* P2 */
1152     {
1153       int ch;
1154       EMACS_INT ch_len, nchars;
1155       EMACS_INT pos, disp_pos = -1;
1156       int disp_prop = 0;
1157       bidi_type_t type;
1158       const unsigned char *s;
1159
1160       if (!bidi_initialized)
1161         bidi_initialize ();
1162
1163       /* If we are inside a paragraph separator, we are just waiting
1164          for the separator to be exhausted; use the previous paragraph
1165          direction.  But don't do that if we have been just reseated,
1166          because we need to reinitialize below in that case.  */
1167       if (!bidi_it->first_elt
1168           && bidi_it->charpos < bidi_it->separator_limit)
1169         return;
1170
1171       /* If we are on a newline, get past it to where the next
1172          paragraph might start.  But don't do that at BEGV since then
1173          we are potentially in a new paragraph that doesn't yet
1174          exist.  */
1175       pos = bidi_it->charpos;
1176       s = STRINGP (bidi_it->string.lstring) ?
1177         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1178       if (bytepos > begbyte
1179           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1180         {
1181           bytepos++;
1182           pos++;
1183         }
1184
1185       /* We are either at the beginning of a paragraph or in the
1186          middle of it.  Find where this paragraph starts.  */
1187       if (string_p)
1188         {
1189           /* We don't support changes of paragraph direction inside a
1190              string.  It is treated as a single paragraph.  */
1191           pstartbyte = 0;
1192         }
1193       else
1194         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1195       bidi_it->separator_limit = -1;
1196       bidi_it->new_paragraph = 0;
1197
1198       /* The following loop is run more than once only if NO_DEFAULT_P
1199          is non-zero, and only if we are iterating on a buffer.  */
1200       do {
1201         bytepos = pstartbyte;
1202         if (!string_p)
1203           pos = BYTE_TO_CHAR (bytepos);
1204         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1205                               &bidi_it->string,
1206                               bidi_it->frame_window_p, &ch_len, &nchars);
1207         type = bidi_get_type (ch, NEUTRAL_DIR);
1208
1209         for (pos += nchars, bytepos += ch_len;
1210              (bidi_get_category (type) != STRONG)
1211                || (bidi_ignore_explicit_marks_for_paragraph_level
1212                    && (type == RLE || type == RLO
1213                        || type == LRE || type == LRO));
1214              type = bidi_get_type (ch, NEUTRAL_DIR))
1215           {
1216             if (pos >= end)
1217               {
1218                 /* Pretend there's a paragraph separator at end of
1219                    buffer/string.  */
1220                 type = NEUTRAL_B;
1221                 break;
1222               }
1223             if (!string_p
1224                 && type == NEUTRAL_B
1225                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1226               break;
1227             /* Fetch next character and advance to get past it.  */
1228             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1229                                   &disp_prop, &bidi_it->string,
1230                                   bidi_it->frame_window_p, &ch_len, &nchars);
1231             pos += nchars;
1232             bytepos += ch_len;
1233           }
1234         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1235             || (!bidi_ignore_explicit_marks_for_paragraph_level
1236                 && (type == RLO || type == RLE)))
1237           bidi_it->paragraph_dir = R2L;
1238         else if (type == STRONG_L
1239                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1240                      && (type == LRO || type == LRE)))
1241           bidi_it->paragraph_dir = L2R;
1242         if (!string_p
1243             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1244           {
1245             /* If this paragraph is at BEGV, default to L2R.  */
1246             if (pstartbyte == BEGV_BYTE)
1247               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1248             else
1249               {
1250                 EMACS_INT prevpbyte = pstartbyte;
1251                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1252
1253                 /* Find the beginning of the previous paragraph, if any.  */
1254                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1255                   {
1256                     /* FXIME: What if p is covered by a display
1257                        string?  See also a FIXME inside
1258                        bidi_find_paragraph_start.  */
1259                     p--;
1260                     pbyte = CHAR_TO_BYTE (p);
1261                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1262                   }
1263                 pstartbyte = prevpbyte;
1264               }
1265           }
1266       } while (!string_p
1267                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1268     }
1269   else
1270     abort ();
1271
1272   /* Contrary to UAX#9 clause P3, we only default the paragraph
1273      direction to L2R if we have no previous usable paragraph
1274      direction.  This is allowed by the HL1 clause.  */
1275   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1276     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1277   if (bidi_it->paragraph_dir == R2L)
1278     bidi_it->level_stack[0].level = 1;
1279   else
1280     bidi_it->level_stack[0].level = 0;
1281
1282   bidi_line_init (bidi_it);
1283 }
1284
1285 \f
1286 /***********************************************************************
1287                  Resolving explicit and implicit levels.
1288   The rest of this file constitutes the core of the UBA implementation.
1289  ***********************************************************************/
1290
1291 static inline int
1292 bidi_explicit_dir_char (int ch)
1293 {
1294   bidi_type_t ch_type;
1295
1296   if (!bidi_initialized)
1297     abort ();
1298   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1299   return (ch_type == LRE || ch_type == LRO
1300           || ch_type == RLE || ch_type == RLO
1301           || ch_type == PDF);
1302 }
1303
1304 /* A helper function for bidi_resolve_explicit.  It advances to the
1305    next character in logical order and determines the new embedding
1306    level and directional override, but does not take into account
1307    empty embeddings.  */
1308 static int
1309 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1310 {
1311   int curchar;
1312   bidi_type_t type;
1313   int current_level;
1314   int new_level;
1315   bidi_dir_t override;
1316   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1317
1318   /* If reseat()'ed, don't advance, so as to start iteration from the
1319      position where we were reseated.  bidi_it->bytepos can be less
1320      than BEGV_BYTE after reseat to BEGV.  */
1321   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1322       || bidi_it->first_elt)
1323     {
1324       bidi_it->first_elt = 0;
1325       if (string_p)
1326         {
1327           const unsigned char *p =
1328             STRINGP (bidi_it->string.lstring)
1329             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1330
1331           if (bidi_it->charpos < 0)
1332             bidi_it->charpos = 0;
1333           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1334                                                bidi_it->string.unibyte);
1335         }
1336       else
1337         {
1338           if (bidi_it->charpos < BEGV)
1339             bidi_it->charpos = BEGV;
1340           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1341         }
1342     }
1343   /* Don't move at end of buffer/string.  */
1344   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1345     {
1346       /* Advance to the next character, skipping characters covered by
1347          display strings (nchars > 1).  */
1348       if (bidi_it->nchars <= 0)
1349         abort ();
1350       bidi_it->charpos += bidi_it->nchars;
1351       if (bidi_it->ch_len == 0)
1352         abort ();
1353       bidi_it->bytepos += bidi_it->ch_len;
1354     }
1355
1356   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1357   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1358   new_level = current_level;
1359
1360   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1361     {
1362       curchar = BIDI_EOB;
1363       bidi_it->ch_len = 1;
1364       bidi_it->nchars = 1;
1365       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1366       bidi_it->disp_prop = 0;
1367     }
1368   else
1369     {
1370       /* Fetch the character at BYTEPOS.  If it is covered by a
1371          display string, treat the entire run of covered characters as
1372          a single character u+FFFC.  */
1373       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1374                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1375                                  &bidi_it->string, bidi_it->frame_window_p,
1376                                  &bidi_it->ch_len, &bidi_it->nchars);
1377     }
1378   bidi_it->ch = curchar;
1379
1380   /* Don't apply directional override here, as all the types we handle
1381      below will not be affected by the override anyway, and we need
1382      the original type unaltered.  The override will be applied in
1383      bidi_resolve_weak.  */
1384   type = bidi_get_type (curchar, NEUTRAL_DIR);
1385   bidi_it->orig_type = type;
1386   bidi_check_type (bidi_it->orig_type);
1387
1388   if (type != PDF)
1389     bidi_it->prev_was_pdf = 0;
1390
1391   bidi_it->type_after_w1 = UNKNOWN_BT;
1392
1393   switch (type)
1394     {
1395       case RLE: /* X2 */
1396       case RLO: /* X4 */
1397         bidi_it->type_after_w1 = type;
1398         bidi_check_type (bidi_it->type_after_w1);
1399         type = WEAK_BN; /* X9/Retaining */
1400         if (bidi_it->ignore_bn_limit <= -1)
1401           {
1402             if (current_level <= BIDI_MAXLEVEL - 4)
1403               {
1404                 /* Compute the least odd embedding level greater than
1405                    the current level.  */
1406                 new_level = ((current_level + 1) & ~1) + 1;
1407                 if (bidi_it->type_after_w1 == RLE)
1408                   override = NEUTRAL_DIR;
1409                 else
1410                   override = R2L;
1411                 if (current_level == BIDI_MAXLEVEL - 4)
1412                   bidi_it->invalid_rl_levels = 0;
1413                 bidi_push_embedding_level (bidi_it, new_level, override);
1414               }
1415             else
1416               {
1417                 bidi_it->invalid_levels++;
1418                 /* See the commentary about invalid_rl_levels below.  */
1419                 if (bidi_it->invalid_rl_levels < 0)
1420                   bidi_it->invalid_rl_levels = 0;
1421                 bidi_it->invalid_rl_levels++;
1422               }
1423           }
1424         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1425                  || bidi_it->next_en_pos > bidi_it->charpos)
1426           type = WEAK_EN;
1427         break;
1428       case LRE: /* X3 */
1429       case LRO: /* X5 */
1430         bidi_it->type_after_w1 = type;
1431         bidi_check_type (bidi_it->type_after_w1);
1432         type = WEAK_BN; /* X9/Retaining */
1433         if (bidi_it->ignore_bn_limit <= -1)
1434           {
1435             if (current_level <= BIDI_MAXLEVEL - 5)
1436               {
1437                 /* Compute the least even embedding level greater than
1438                    the current level.  */
1439                 new_level = ((current_level + 2) & ~1);
1440                 if (bidi_it->type_after_w1 == LRE)
1441                   override = NEUTRAL_DIR;
1442                 else
1443                   override = L2R;
1444                 bidi_push_embedding_level (bidi_it, new_level, override);
1445               }
1446             else
1447               {
1448                 bidi_it->invalid_levels++;
1449                 /* invalid_rl_levels counts invalid levels encountered
1450                    while the embedding level was already too high for
1451                    LRE/LRO, but not for RLE/RLO.  That is because
1452                    there may be exactly one PDF which we should not
1453                    ignore even though invalid_levels is non-zero.
1454                    invalid_rl_levels helps to know what PDF is
1455                    that.  */
1456                 if (bidi_it->invalid_rl_levels >= 0)
1457                   bidi_it->invalid_rl_levels++;
1458               }
1459           }
1460         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1461                  || bidi_it->next_en_pos > bidi_it->charpos)
1462           type = WEAK_EN;
1463         break;
1464       case PDF: /* X7 */
1465         bidi_it->type_after_w1 = type;
1466         bidi_check_type (bidi_it->type_after_w1);
1467         type = WEAK_BN; /* X9/Retaining */
1468         if (bidi_it->ignore_bn_limit <= -1)
1469           {
1470             if (!bidi_it->invalid_rl_levels)
1471               {
1472                 new_level = bidi_pop_embedding_level (bidi_it);
1473                 bidi_it->invalid_rl_levels = -1;
1474                 if (bidi_it->invalid_levels)
1475                   bidi_it->invalid_levels--;
1476                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1477               }
1478             if (!bidi_it->invalid_levels)
1479               new_level = bidi_pop_embedding_level (bidi_it);
1480             else
1481               {
1482                 bidi_it->invalid_levels--;
1483                 bidi_it->invalid_rl_levels--;
1484               }
1485           }
1486         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1487                  || bidi_it->next_en_pos > bidi_it->charpos)
1488           type = WEAK_EN;
1489         break;
1490       default:
1491         /* Nothing.  */
1492         break;
1493     }
1494
1495   bidi_it->type = type;
1496   bidi_check_type (bidi_it->type);
1497
1498   return new_level;
1499 }
1500
1501 /* Given an iterator state in BIDI_IT, advance one character position
1502    in the buffer/string to the next character (in the logical order),
1503    resolve any explicit embeddings and directional overrides, and
1504    return the embedding level of the character after resolving
1505    explicit directives and ignoring empty embeddings.  */
1506 static int
1507 bidi_resolve_explicit (struct bidi_it *bidi_it)
1508 {
1509   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1510   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1511   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1512   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1513     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1514
1515   if (prev_level < new_level
1516       && bidi_it->type == WEAK_BN
1517       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1518       && bidi_it->charpos < eob         /* not already at EOB */
1519       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1520                                                    + bidi_it->ch_len, s,
1521                                                    bidi_it->string.unibyte)))
1522     {
1523       /* Avoid pushing and popping embedding levels if the level run
1524          is empty, as this breaks level runs where it shouldn't.
1525          UAX#9 removes all the explicit embedding and override codes,
1526          so empty embeddings disappear without a trace.  We need to
1527          behave as if we did the same.  */
1528       struct bidi_it saved_it;
1529       int level = prev_level;
1530
1531       bidi_copy_it (&saved_it, bidi_it);
1532
1533       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1534                                                        + bidi_it->ch_len, s,
1535                                                        bidi_it->string.unibyte)))
1536         {
1537           /* This advances to the next character, skipping any
1538              characters covered by display strings.  */
1539           level = bidi_resolve_explicit_1 (bidi_it);
1540           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1541              a pointer to its data is no longer valid.  */
1542           if (STRINGP (bidi_it->string.lstring))
1543             s = SDATA (bidi_it->string.lstring);
1544         }
1545
1546       if (bidi_it->nchars <= 0)
1547         abort ();
1548       if (level == prev_level)  /* empty embedding */
1549         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1550       else                      /* this embedding is non-empty */
1551         saved_it.ignore_bn_limit = -2;
1552
1553       bidi_copy_it (bidi_it, &saved_it);
1554       if (bidi_it->ignore_bn_limit > -1)
1555         {
1556           /* We pushed a level, but we shouldn't have.  Undo that. */
1557           if (!bidi_it->invalid_rl_levels)
1558             {
1559               new_level = bidi_pop_embedding_level (bidi_it);
1560               bidi_it->invalid_rl_levels = -1;
1561               if (bidi_it->invalid_levels)
1562                 bidi_it->invalid_levels--;
1563             }
1564           if (!bidi_it->invalid_levels)
1565             new_level = bidi_pop_embedding_level (bidi_it);
1566           else
1567             {
1568               bidi_it->invalid_levels--;
1569               bidi_it->invalid_rl_levels--;
1570             }
1571         }
1572     }
1573
1574   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1575     {
1576       bidi_set_paragraph_end (bidi_it);
1577       /* This is needed by bidi_resolve_weak below, and in L1.  */
1578       bidi_it->type_after_w1 = bidi_it->type;
1579       bidi_check_type (bidi_it->type_after_w1);
1580     }
1581
1582   return new_level;
1583 }
1584
1585 /* Advance in the buffer/string, resolve weak types and return the
1586    type of the next character after weak type resolution.  */
1587 static bidi_type_t
1588 bidi_resolve_weak (struct bidi_it *bidi_it)
1589 {
1590   bidi_type_t type;
1591   bidi_dir_t override;
1592   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1593   int new_level  = bidi_resolve_explicit (bidi_it);
1594   int next_char;
1595   bidi_type_t type_of_next;
1596   struct bidi_it saved_it;
1597   EMACS_INT eob =
1598     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1599     ? bidi_it->string.schars : ZV;
1600
1601   type = bidi_it->type;
1602   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1603
1604   if (type == UNKNOWN_BT
1605       || type == LRE
1606       || type == LRO
1607       || type == RLE
1608       || type == RLO
1609       || type == PDF)
1610     abort ();
1611
1612   if (new_level != prev_level
1613       || bidi_it->type == NEUTRAL_B)
1614     {
1615       /* We've got a new embedding level run, compute the directional
1616          type of sor and initialize per-run variables (UAX#9, clause
1617          X10).  */
1618       bidi_set_sor_type (bidi_it, prev_level, new_level);
1619     }
1620   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1621            || type == WEAK_BN || type == STRONG_AL)
1622     bidi_it->type_after_w1 = type;      /* needed in L1 */
1623   bidi_check_type (bidi_it->type_after_w1);
1624
1625   /* Level and directional override status are already recorded in
1626      bidi_it, and do not need any change; see X6.  */
1627   if (override == R2L)          /* X6 */
1628     type = STRONG_R;
1629   else if (override == L2R)
1630     type = STRONG_L;
1631   else
1632     {
1633       if (type == WEAK_NSM)     /* W1 */
1634         {
1635           /* Note that we don't need to consider the case where the
1636              prev character has its type overridden by an RLO or LRO,
1637              because then either the type of this NSM would have been
1638              also overridden, or the previous character is outside the
1639              current level run, and thus not relevant to this NSM.
1640              This is why NSM gets the type_after_w1 of the previous
1641              character.  */
1642           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1643               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1644               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1645             type = bidi_it->prev.type_after_w1;
1646           else if (bidi_it->sor == R2L)
1647             type = STRONG_R;
1648           else if (bidi_it->sor == L2R)
1649             type = STRONG_L;
1650           else /* shouldn't happen! */
1651             abort ();
1652         }
1653       if (type == WEAK_EN       /* W2 */
1654           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1655         type = WEAK_AN;
1656       else if (type == STRONG_AL) /* W3 */
1657         type = STRONG_R;
1658       else if ((type == WEAK_ES /* W4 */
1659                 && bidi_it->prev.type_after_w1 == WEAK_EN
1660                 && bidi_it->prev.orig_type == WEAK_EN)
1661                || (type == WEAK_CS
1662                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1663                         && bidi_it->prev.orig_type == WEAK_EN)
1664                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1665         {
1666           const unsigned char *s =
1667             STRINGP (bidi_it->string.lstring)
1668             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1669
1670           next_char =
1671             bidi_it->charpos + bidi_it->nchars >= eob
1672             ? BIDI_EOB
1673             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1674                                 bidi_it->string.unibyte);
1675           type_of_next = bidi_get_type (next_char, override);
1676
1677           if (type_of_next == WEAK_BN
1678               || bidi_explicit_dir_char (next_char))
1679             {
1680               bidi_copy_it (&saved_it, bidi_it);
1681               while (bidi_resolve_explicit (bidi_it) == new_level
1682                      && bidi_it->type == WEAK_BN)
1683                 ;
1684               type_of_next = bidi_it->type;
1685               bidi_copy_it (bidi_it, &saved_it);
1686             }
1687
1688           /* If the next character is EN, but the last strong-type
1689              character is AL, that next EN will be changed to AN when
1690              we process it in W2 above.  So in that case, this ES
1691              should not be changed into EN.  */
1692           if (type == WEAK_ES
1693               && type_of_next == WEAK_EN
1694               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1695             type = WEAK_EN;
1696           else if (type == WEAK_CS)
1697             {
1698               if (bidi_it->prev.type_after_w1 == WEAK_AN
1699                   && (type_of_next == WEAK_AN
1700                       /* If the next character is EN, but the last
1701                          strong-type character is AL, EN will be later
1702                          changed to AN when we process it in W2 above.
1703                          So in that case, this ES should not be
1704                          changed into EN.  */
1705                       || (type_of_next == WEAK_EN
1706                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1707                 type = WEAK_AN;
1708               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1709                        && type_of_next == WEAK_EN
1710                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1711                 type = WEAK_EN;
1712             }
1713         }
1714       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1715                || type == WEAK_BN)      /* W5/Retaining */
1716         {
1717           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1718               || bidi_it->next_en_pos > bidi_it->charpos)
1719             type = WEAK_EN;
1720           else                  /* W5: ET/BN with EN after it.  */
1721             {
1722               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1723               const unsigned char *s =
1724                 STRINGP (bidi_it->string.lstring)
1725                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1726
1727               if (bidi_it->nchars <= 0)
1728                 abort ();
1729               next_char =
1730                 bidi_it->charpos + bidi_it->nchars >= eob
1731                 ? BIDI_EOB
1732                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1733                                     bidi_it->string.unibyte);
1734               type_of_next = bidi_get_type (next_char, override);
1735
1736               if (type_of_next == WEAK_ET
1737                   || type_of_next == WEAK_BN
1738                   || bidi_explicit_dir_char (next_char))
1739                 {
1740                   bidi_copy_it (&saved_it, bidi_it);
1741                   while (bidi_resolve_explicit (bidi_it) == new_level
1742                          && (bidi_it->type == WEAK_BN
1743                              || bidi_it->type == WEAK_ET))
1744                     ;
1745                   type_of_next = bidi_it->type;
1746                   en_pos = bidi_it->charpos;
1747                   bidi_copy_it (bidi_it, &saved_it);
1748                 }
1749               if (type_of_next == WEAK_EN)
1750                 {
1751                   /* If the last strong character is AL, the EN we've
1752                      found will become AN when we get to it (W2). */
1753                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1754                     {
1755                       type = WEAK_EN;
1756                       /* Remember this EN position, to speed up processing
1757                          of the next ETs.  */
1758                       bidi_it->next_en_pos = en_pos;
1759                     }
1760                   else if (type == WEAK_BN)
1761                     type = NEUTRAL_ON; /* W6/Retaining */
1762                 }
1763             }
1764         }
1765     }
1766
1767   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1768       || (type == WEAK_BN
1769           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1770               || bidi_it->prev.type_after_w1 == WEAK_ES
1771               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1772     type = NEUTRAL_ON;
1773
1774   /* Store the type we've got so far, before we clobber it with strong
1775      types in W7 and while resolving neutral types.  But leave alone
1776      the original types that were recorded above, because we will need
1777      them for the L1 clause.  */
1778   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1779     bidi_it->type_after_w1 = type;
1780   bidi_check_type (bidi_it->type_after_w1);
1781
1782   if (type == WEAK_EN)  /* W7 */
1783     {
1784       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1785           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1786         type = STRONG_L;
1787     }
1788
1789   bidi_it->type = type;
1790   bidi_check_type (bidi_it->type);
1791   return type;
1792 }
1793
1794 /* Resolve the type of a neutral character according to the type of
1795    surrounding strong text and the current embedding level.  */
1796 static inline bidi_type_t
1797 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1798 {
1799   /* N1: European and Arabic numbers are treated as though they were R.  */
1800   if (next_type == WEAK_EN || next_type == WEAK_AN)
1801     next_type = STRONG_R;
1802   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1803     prev_type = STRONG_R;
1804
1805   if (next_type == prev_type)   /* N1 */
1806     return next_type;
1807   else if ((lev & 1) == 0)      /* N2 */
1808     return STRONG_L;
1809   else
1810     return STRONG_R;
1811 }
1812
1813 static bidi_type_t
1814 bidi_resolve_neutral (struct bidi_it *bidi_it)
1815 {
1816   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1817   bidi_type_t type = bidi_resolve_weak (bidi_it);
1818   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1819
1820   if (!(type == STRONG_R
1821         || type == STRONG_L
1822         || type == WEAK_BN
1823         || type == WEAK_EN
1824         || type == WEAK_AN
1825         || type == NEUTRAL_B
1826         || type == NEUTRAL_S
1827         || type == NEUTRAL_WS
1828         || type == NEUTRAL_ON))
1829     abort ();
1830
1831   if (bidi_get_category (type) == NEUTRAL
1832       || (type == WEAK_BN && prev_level == current_level))
1833     {
1834       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1835         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1836                                        bidi_it->next_for_neutral.type,
1837                                        current_level);
1838       else
1839         {
1840           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1841              the assumption of batch-style processing; see clauses W4,
1842              W5, and especially N1, which require to look far forward
1843              (as well as back) in the buffer/string.  May the fleas of
1844              a thousand camels infest the armpits of those who design
1845              supposedly general-purpose algorithms by looking at their
1846              own implementations, and fail to consider other possible
1847              implementations!  */
1848           struct bidi_it saved_it;
1849           bidi_type_t next_type;
1850
1851           if (bidi_it->scan_dir == -1)
1852             abort ();
1853
1854           bidi_copy_it (&saved_it, bidi_it);
1855           /* Scan the text forward until we find the first non-neutral
1856              character, and then use that to resolve the neutral we
1857              are dealing with now.  We also cache the scanned iterator
1858              states, to salvage some of the effort later.  */
1859           bidi_cache_iterator_state (bidi_it, 0);
1860           do {
1861             /* Record the info about the previous character, so that
1862                it will be cached below with this state.  */
1863             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1864                 && bidi_it->type != WEAK_BN)
1865               bidi_remember_char (&bidi_it->prev, bidi_it);
1866             type = bidi_resolve_weak (bidi_it);
1867             /* Paragraph separators have their levels fully resolved
1868                at this point, so cache them as resolved.  */
1869             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1870             /* FIXME: implement L1 here, by testing for a newline and
1871                resetting the level for any sequence of whitespace
1872                characters adjacent to it.  */
1873           } while (!(type == NEUTRAL_B
1874                      || (type != WEAK_BN
1875                          && bidi_get_category (type) != NEUTRAL)
1876                      /* This is all per level run, so stop when we
1877                         reach the end of this level run.  */
1878                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1879                      current_level));
1880
1881           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1882
1883           switch (type)
1884             {
1885               case STRONG_L:
1886               case STRONG_R:
1887               case STRONG_AL:
1888                 next_type = type;
1889                 break;
1890               case WEAK_EN:
1891               case WEAK_AN:
1892                 /* N1: ``European and Arabic numbers are treated as
1893                    though they were R.''  */
1894                 next_type = STRONG_R;
1895                 saved_it.next_for_neutral.type = STRONG_R;
1896                 break;
1897               case WEAK_BN:
1898                 if (!bidi_explicit_dir_char (bidi_it->ch))
1899                   abort ();             /* can't happen: BNs are skipped */
1900                 /* FALLTHROUGH */
1901               case NEUTRAL_B:
1902                 /* Marched all the way to the end of this level run.
1903                    We need to use the eor type, whose information is
1904                    stored by bidi_set_sor_type in the prev_for_neutral
1905                    member.  */
1906                 if (saved_it.type != WEAK_BN
1907                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1908                   {
1909                     next_type = bidi_it->prev_for_neutral.type;
1910                     saved_it.next_for_neutral.type = next_type;
1911                     bidi_check_type (next_type);
1912                   }
1913                 else
1914                   {
1915                     /* This is a BN which does not adjoin neutrals.
1916                        Leave its type alone.  */
1917                     bidi_copy_it (bidi_it, &saved_it);
1918                     return bidi_it->type;
1919                   }
1920                 break;
1921               default:
1922                 abort ();
1923             }
1924           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1925                                          next_type, current_level);
1926           saved_it.type = type;
1927           bidi_check_type (type);
1928           bidi_copy_it (bidi_it, &saved_it);
1929         }
1930     }
1931   return type;
1932 }
1933
1934 /* Given an iterator state in BIDI_IT, advance one character position
1935    in the buffer/string to the next character (in the logical order),
1936    resolve the bidi type of that next character, and return that
1937    type.  */
1938 static bidi_type_t
1939 bidi_type_of_next_char (struct bidi_it *bidi_it)
1940 {
1941   bidi_type_t type;
1942
1943   /* This should always be called during a forward scan.  */
1944   if (bidi_it->scan_dir != 1)
1945     abort ();
1946
1947   /* Reset the limit until which to ignore BNs if we step out of the
1948      area where we found only empty levels.  */
1949   if ((bidi_it->ignore_bn_limit > -1
1950        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1951       || (bidi_it->ignore_bn_limit == -2
1952           && !bidi_explicit_dir_char (bidi_it->ch)))
1953     bidi_it->ignore_bn_limit = -1;
1954
1955   type = bidi_resolve_neutral (bidi_it);
1956
1957   return type;
1958 }
1959
1960 /* Given an iterator state BIDI_IT, advance one character position in
1961    the buffer/string to the next character (in the current scan
1962    direction), resolve the embedding and implicit levels of that next
1963    character, and return the resulting level.  */
1964 static int
1965 bidi_level_of_next_char (struct bidi_it *bidi_it)
1966 {
1967   bidi_type_t type;
1968   int level, prev_level = -1;
1969   struct bidi_saved_info next_for_neutral;
1970   EMACS_INT next_char_pos = -2;
1971
1972   if (bidi_it->scan_dir == 1)
1973     {
1974       EMACS_INT eob =
1975         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1976         ? bidi_it->string.schars : ZV;
1977
1978       /* There's no sense in trying to advance if we hit end of text.  */
1979       if (bidi_it->charpos >= eob)
1980         return bidi_it->resolved_level;
1981
1982       /* Record the info about the previous character.  */
1983       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1984           && bidi_it->type != WEAK_BN)
1985         bidi_remember_char (&bidi_it->prev, bidi_it);
1986       if (bidi_it->type_after_w1 == STRONG_R
1987           || bidi_it->type_after_w1 == STRONG_L
1988           || bidi_it->type_after_w1 == STRONG_AL)
1989         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1990       /* FIXME: it sounds like we don't need both prev and
1991          prev_for_neutral members, but I'm leaving them both for now.  */
1992       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1993           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1994         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1995
1996       /* If we overstepped the characters used for resolving neutrals
1997          and whitespace, invalidate their info in the iterator.  */
1998       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1999         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2000       if (bidi_it->next_en_pos >= 0
2001           && bidi_it->charpos >= bidi_it->next_en_pos)
2002         bidi_it->next_en_pos = -1;
2003       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2004           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2005         bidi_it->next_for_ws.type = UNKNOWN_BT;
2006
2007       /* This must be taken before we fill the iterator with the info
2008          about the next char.  If we scan backwards, the iterator
2009          state must be already cached, so there's no need to know the
2010          embedding level of the previous character, since we will be
2011          returning to our caller shortly.  */
2012       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2013     }
2014   next_for_neutral = bidi_it->next_for_neutral;
2015
2016   /* Perhaps the character we want is already cached.  If it is, the
2017      call to bidi_cache_find below will return a type other than
2018      UNKNOWN_BT.  */
2019   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2020     {
2021       int bob =
2022         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
2023
2024       if (bidi_it->scan_dir > 0)
2025         {
2026           if (bidi_it->nchars <= 0)
2027             abort ();
2028           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2029         }
2030       else if (bidi_it->charpos >= bob)
2031         /* Implementation note: we allow next_char_pos to be as low as
2032            0 for buffers or -1 for strings, and that is okay because
2033            that's the "position" of the sentinel iterator state we
2034            cached at the beginning of the iteration.  */
2035         next_char_pos = bidi_it->charpos - 1;
2036       if (next_char_pos >= bob - 1)
2037         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2038       else
2039         type = UNKNOWN_BT;
2040     }
2041   else
2042     type = UNKNOWN_BT;
2043   if (type != UNKNOWN_BT)
2044     {
2045       /* Don't lose the information for resolving neutrals!  The
2046          cached states could have been cached before their
2047          next_for_neutral member was computed.  If we are on our way
2048          forward, we can simply take the info from the previous
2049          state.  */
2050       if (bidi_it->scan_dir == 1
2051           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2052         bidi_it->next_for_neutral = next_for_neutral;
2053
2054       /* If resolved_level is -1, it means this state was cached
2055          before it was completely resolved, so we cannot return
2056          it.  */
2057       if (bidi_it->resolved_level != -1)
2058         return bidi_it->resolved_level;
2059     }
2060   if (bidi_it->scan_dir == -1)
2061     /* If we are going backwards, the iterator state is already cached
2062        from previous scans, and should be fully resolved.  */
2063     abort ();
2064
2065   if (type == UNKNOWN_BT)
2066     type = bidi_type_of_next_char (bidi_it);
2067
2068   if (type == NEUTRAL_B)
2069     return bidi_it->resolved_level;
2070
2071   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2072   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2073       || (type == WEAK_BN && prev_level == level))
2074     {
2075       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2076         abort ();
2077
2078       /* If the cached state shows a neutral character, it was not
2079          resolved by bidi_resolve_neutral, so do it now.  */
2080       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2081                                      bidi_it->next_for_neutral.type,
2082                                      level);
2083     }
2084
2085   if (!(type == STRONG_R
2086         || type == STRONG_L
2087         || type == WEAK_BN
2088         || type == WEAK_EN
2089         || type == WEAK_AN))
2090     abort ();
2091   bidi_it->type = type;
2092   bidi_check_type (bidi_it->type);
2093
2094   /* For L1 below, we need to know, for each WS character, whether
2095      it belongs to a sequence of WS characters preceding a newline
2096      or a TAB or a paragraph separator.  */
2097   if (bidi_it->orig_type == NEUTRAL_WS
2098       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2099     {
2100       int ch;
2101       EMACS_INT clen = bidi_it->ch_len;
2102       EMACS_INT bpos = bidi_it->bytepos;
2103       EMACS_INT cpos = bidi_it->charpos;
2104       EMACS_INT disp_pos = bidi_it->disp_pos;
2105       EMACS_INT nc = bidi_it->nchars;
2106       struct bidi_string_data bs = bidi_it->string;
2107       bidi_type_t chtype;
2108       int fwp = bidi_it->frame_window_p;
2109       int dpp = bidi_it->disp_prop;
2110
2111       if (bidi_it->nchars <= 0)
2112         abort ();
2113       do {
2114         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2115                               fwp, &clen, &nc);
2116         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2117           chtype = NEUTRAL_B;
2118         else
2119           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2120       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2121                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2122       bidi_it->next_for_ws.type = chtype;
2123       bidi_check_type (bidi_it->next_for_ws.type);
2124       bidi_it->next_for_ws.charpos = cpos;
2125       bidi_it->next_for_ws.bytepos = bpos;
2126     }
2127
2128   /* Resolve implicit levels, with a twist: PDFs get the embedding
2129      level of the enbedding they terminate.  See below for the
2130      reason.  */
2131   if (bidi_it->orig_type == PDF
2132       /* Don't do this if this formatting code didn't change the
2133          embedding level due to invalid or empty embeddings.  */
2134       && prev_level != level)
2135     {
2136       /* Don't look in UAX#9 for the reason for this: it's our own
2137          private quirk.  The reason is that we want the formatting
2138          codes to be delivered so that they bracket the text of their
2139          embedding.  For example, given the text
2140
2141              {RLO}teST{PDF}
2142
2143          we want it to be displayed as
2144
2145              {PDF}STet{RLO}
2146
2147          not as
2148
2149              STet{RLO}{PDF}
2150
2151          which will result because we bump up the embedding level as
2152          soon as we see the RLO and pop it as soon as we see the PDF,
2153          so RLO itself has the same embedding level as "teST", and
2154          thus would be normally delivered last, just before the PDF.
2155          The switch below fiddles with the level of PDF so that this
2156          ugly side effect does not happen.
2157
2158          (This is, of course, only important if the formatting codes
2159          are actually displayed, but Emacs does need to display them
2160          if the user wants to.)  */
2161       level = prev_level;
2162     }
2163   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2164            || bidi_it->orig_type == NEUTRAL_S
2165            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2166            /* || bidi_it->ch == LINESEP_CHAR */
2167            || (bidi_it->orig_type == NEUTRAL_WS
2168                && (bidi_it->next_for_ws.type == NEUTRAL_B
2169                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2170     level = bidi_it->level_stack[0].level;
2171   else if ((level & 1) == 0) /* I1 */
2172     {
2173       if (type == STRONG_R)
2174         level++;
2175       else if (type == WEAK_EN || type == WEAK_AN)
2176         level += 2;
2177     }
2178   else                  /* I2 */
2179     {
2180       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2181         level++;
2182     }
2183
2184   bidi_it->resolved_level = level;
2185   return level;
2186 }
2187
2188 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2189    non-zero, we are at the end of a level, and we need to prepare to
2190    resume the scan of the lower level.
2191
2192    If this level's other edge is cached, we simply jump to it, filling
2193    the iterator structure with the iterator state on the other edge.
2194    Otherwise, we walk the buffer or string until we come back to the
2195    same level as LEVEL.
2196
2197    Note: we are not talking here about a ``level run'' in the UAX#9
2198    sense of the term, but rather about a ``level'' which includes
2199    all the levels higher than it.  In other words, given the levels
2200    like this:
2201
2202          11111112222222333333334443343222222111111112223322111
2203                 A      B                    C
2204
2205    and assuming we are at point A scanning left to right, this
2206    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2207    at point B.  */
2208 static void
2209 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2210 {
2211   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2212   ptrdiff_t idx;
2213
2214   /* Try the cache first.  */
2215   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2216       >= bidi_cache_start)
2217     bidi_cache_fetch_state (idx, bidi_it);
2218   else
2219     {
2220       int new_level;
2221
2222       if (end_flag)
2223         abort (); /* if we are at end of level, its edges must be cached */
2224
2225       bidi_cache_iterator_state (bidi_it, 1);
2226       do {
2227         new_level = bidi_level_of_next_char (bidi_it);
2228         bidi_cache_iterator_state (bidi_it, 1);
2229       } while (new_level >= level);
2230     }
2231 }
2232
2233 void
2234 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2235 {
2236   int old_level, new_level, next_level;
2237   struct bidi_it sentinel;
2238   struct gcpro gcpro1;
2239
2240   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2241     abort ();
2242
2243   if (bidi_it->scan_dir == 0)
2244     {
2245       bidi_it->scan_dir = 1;    /* default to logical order */
2246     }
2247
2248   /* The code below can call eval, and thus cause GC.  If we are
2249      iterating a Lisp string, make sure it won't be GCed.  */
2250   if (STRINGP (bidi_it->string.lstring))
2251     GCPRO1 (bidi_it->string.lstring);
2252
2253   /* If we just passed a newline, initialize for the next line.  */
2254   if (!bidi_it->first_elt
2255       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2256     bidi_line_init (bidi_it);
2257
2258   /* Prepare the sentinel iterator state, and cache it.  When we bump
2259      into it, scanning backwards, we'll know that the last non-base
2260      level is exhausted.  */
2261   if (bidi_cache_idx == bidi_cache_start)
2262     {
2263       bidi_copy_it (&sentinel, bidi_it);
2264       if (bidi_it->first_elt)
2265         {
2266           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2267           sentinel.bytepos--;
2268           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2269           sentinel.ch_len = 1;
2270           sentinel.nchars = 1;
2271         }
2272       bidi_cache_iterator_state (&sentinel, 1);
2273     }
2274
2275   old_level = bidi_it->resolved_level;
2276   new_level = bidi_level_of_next_char (bidi_it);
2277
2278   /* Reordering of resolved levels (clause L2) is implemented by
2279      jumping to the other edge of the level and flipping direction of
2280      scanning the text whenever we find a level change.  */
2281   if (new_level != old_level)
2282     {
2283       int ascending = new_level > old_level;
2284       int level_to_search = ascending ? old_level + 1 : old_level;
2285       int incr = ascending ? 1 : -1;
2286       int expected_next_level = old_level + incr;
2287
2288       /* Jump (or walk) to the other edge of this level.  */
2289       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2290       /* Switch scan direction and peek at the next character in the
2291          new direction.  */
2292       bidi_it->scan_dir = -bidi_it->scan_dir;
2293
2294       /* The following loop handles the case where the resolved level
2295          jumps by more than one.  This is typical for numbers inside a
2296          run of text with left-to-right embedding direction, but can
2297          also happen in other situations.  In those cases the decision
2298          where to continue after a level change, and in what direction,
2299          is tricky.  For example, given a text like below:
2300
2301                   abcdefgh
2302                   11336622
2303
2304          (where the numbers below the text show the resolved levels),
2305          the result of reordering according to UAX#9 should be this:
2306
2307                   efdcghba
2308
2309          This is implemented by the loop below which flips direction
2310          and jumps to the other edge of the level each time it finds
2311          the new level not to be the expected one.  The expected level
2312          is always one more or one less than the previous one.  */
2313       next_level = bidi_peek_at_next_level (bidi_it);
2314       while (next_level != expected_next_level)
2315         {
2316           expected_next_level += incr;
2317           level_to_search += incr;
2318           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2319           bidi_it->scan_dir = -bidi_it->scan_dir;
2320           next_level = bidi_peek_at_next_level (bidi_it);
2321         }
2322
2323       /* Finally, deliver the next character in the new direction.  */
2324       next_level = bidi_level_of_next_char (bidi_it);
2325     }
2326
2327   /* Take note when we have just processed the newline that precedes
2328      the end of the paragraph.  The next time we are about to be
2329      called, set_iterator_to_next will automatically reinit the
2330      paragraph direction, if needed.  We do this at the newline before
2331      the paragraph separator, because the next character might not be
2332      the first character of the next paragraph, due to the bidi
2333      reordering, whereas we _must_ know the paragraph base direction
2334      _before_ we process the paragraph's text, since the base
2335      direction affects the reordering.  */
2336   if (bidi_it->scan_dir == 1
2337       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2338     {
2339       /* The paragraph direction of the entire string, once
2340          determined, is in effect for the entire string.  Setting the
2341          separator limit to the end of the string prevents
2342          bidi_paragraph_init from being called automatically on this
2343          string.  */
2344       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2345         bidi_it->separator_limit = bidi_it->string.schars;
2346       else if (bidi_it->bytepos < ZV_BYTE)
2347         {
2348           EMACS_INT sep_len =
2349             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2350                                    bidi_it->bytepos + bidi_it->ch_len);
2351           if (bidi_it->nchars <= 0)
2352             abort ();
2353           if (sep_len >= 0)
2354             {
2355               bidi_it->new_paragraph = 1;
2356               /* Record the buffer position of the last character of the
2357                  paragraph separator.  */
2358               bidi_it->separator_limit =
2359                 bidi_it->charpos + bidi_it->nchars + sep_len;
2360             }
2361         }
2362     }
2363
2364   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2365     {
2366       /* If we are at paragraph's base embedding level and beyond the
2367          last cached position, the cache's job is done and we can
2368          discard it.  */
2369       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2370           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2371                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2372         bidi_cache_reset ();
2373         /* But as long as we are caching during forward scan, we must
2374            cache each state, or else the cache integrity will be
2375            compromised: it assumes cached states correspond to buffer
2376            positions 1:1.  */
2377       else
2378         bidi_cache_iterator_state (bidi_it, 1);
2379     }
2380
2381   if (STRINGP (bidi_it->string.lstring))
2382     UNGCPRO;
2383 }
2384
2385 /* This is meant to be called from within the debugger, whenever you
2386    wish to examine the cache contents.  */
2387 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2388 void
2389 bidi_dump_cached_states (void)
2390 {
2391   ptrdiff_t i;
2392   int ndigits = 1;
2393
2394   if (bidi_cache_idx == 0)
2395     {
2396       fprintf (stderr, "The cache is empty.\n");
2397       return;
2398     }
2399   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2400            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2401
2402   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2403     ndigits++;
2404   fputs ("ch  ", stderr);
2405   for (i = 0; i < bidi_cache_idx; i++)
2406     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2407   fputs ("\n", stderr);
2408   fputs ("lvl ", stderr);
2409   for (i = 0; i < bidi_cache_idx; i++)
2410     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2411   fputs ("\n", stderr);
2412   fputs ("pos ", stderr);
2413   for (i = 0; i < bidi_cache_idx; i++)
2414     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2415   fputs ("\n", stderr);
2416 }