src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = (level_before > level_after ? level_before : level_after);
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 242     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 247     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* 5-slot stack for saving the start of the previous level of the
 320    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 321    and we need the same size of our stack.  */
 322 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 323 static int bidi_cache_sp;
 324
 325 /* Size of header used by bidi_shelve_cache.  */
 326 enum
 327   {
 328     bidi_shelve_header_size
 329       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 330          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 331          + sizeof (bidi_cache_last_idx))
 332   };
 333
 334 /* Reset the cache state to the empty state.  We only reset the part
 335    of the cache relevant to iteration of the current object.  Previous
 336    objects, which are pushed on the display iterator's stack, are left
 337    intact.  This is called when the cached information is no more
 338    useful for the current iteration, e.g. when we were reseated to a
 339    new position on the same object.  */
 340 static inline void
 341 bidi_cache_reset (void)
 342 {
 343   bidi_cache_idx = bidi_cache_start;
 344   bidi_cache_last_idx = -1;
 345 }
 346
 347 /* Shrink the cache to its minimal size.  Called when we init the bidi
 348    iterator for reordering a buffer or a string that does not come
 349    from display properties, because that means all the previously
 350    cached info is of no further use.  */
 351 static inline void
 352 bidi_cache_shrink (void)
 353 {
 354   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 355     {
 356       bidi_cache
 357         = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 358       bidi_cache_size = BIDI_CACHE_CHUNK;
 359     }
 360   bidi_cache_reset ();
 361 }
 362
 363 static inline void
 364 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 365 {
 366   int current_scan_dir = bidi_it->scan_dir;
 367
 368   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 369     abort ();
 370
 371   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 372   bidi_it->scan_dir = current_scan_dir;
 373   bidi_cache_last_idx = idx;
 374 }
 375
 376 /* Find a cached state with a given CHARPOS and resolved embedding
 377    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 378    resolved levels in cached states.  DIR, if non-zero, means search
 379    in that direction from the last cache hit.  */
 380 static inline ptrdiff_t
 381 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 382 {
 383   ptrdiff_t i, i_start;
 384
 385   if (bidi_cache_idx > bidi_cache_start)
 386     {
 387       if (bidi_cache_last_idx == -1)
 388         bidi_cache_last_idx = bidi_cache_idx - 1;
 389       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 390         {
 391           dir = -1;
 392           i_start = bidi_cache_last_idx - 1;
 393         }
 394       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 395                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 396         {
 397           dir = 1;
 398           i_start = bidi_cache_last_idx + 1;
 399         }
 400       else if (dir)
 401         i_start = bidi_cache_last_idx;
 402       else
 403         {
 404           dir = -1;
 405           i_start = bidi_cache_idx - 1;
 406         }
 407
 408       if (dir < 0)
 409         {
 410           /* Linear search for now; FIXME!  */
 411           for (i = i_start; i >= bidi_cache_start; i--)
 412             if (bidi_cache[i].charpos <= charpos
 413                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 414                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 415               return i;
 416         }
 417       else
 418         {
 419           for (i = i_start; i < bidi_cache_idx; i++)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425     }
 426
 427   return -1;
 428 }
 429
 430 /* Find a cached state where the resolved level changes to a value
 431    that is lower than LEVEL, and return its cache slot index.  DIR is
 432    the direction to search, starting with the last used cache slot.
 433    If DIR is zero, we search backwards from the last occupied cache
 434    slot.  BEFORE, if non-zero, means return the index of the slot that
 435    is ``before'' the level change in the search direction.  That is,
 436    given the cached levels like this:
 437
 438          1122333442211
 439           AB        C
 440
 441    and assuming we are at the position cached at the slot marked with
 442    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 443    index of slot B or A, depending whether BEFORE is, respectively,
 444    non-zero or zero.  */
 445 static ptrdiff_t
 446 bidi_cache_find_level_change (int level, int dir, int before)
 447 {
 448   if (bidi_cache_idx)
 449     {
 450       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 451       int incr = before ? 1 : 0;
 452
 453       xassert (!dir || bidi_cache_last_idx >= 0);
 454
 455       if (!dir)
 456         dir = -1;
 457       else if (!incr)
 458         i += dir;
 459
 460       if (dir < 0)
 461         {
 462           while (i >= bidi_cache_start + incr)
 463             {
 464               if (bidi_cache[i - incr].resolved_level >= 0
 465                   && bidi_cache[i - incr].resolved_level < level)
 466                 return i;
 467               i--;
 468             }
 469         }
 470       else
 471         {
 472           while (i < bidi_cache_idx - incr)
 473             {
 474               if (bidi_cache[i + incr].resolved_level >= 0
 475                   && bidi_cache[i + incr].resolved_level < level)
 476                 return i;
 477               i++;
 478             }
 479         }
 480     }
 481
 482   return -1;
 483 }
 484
 485 static inline void
 486 bidi_cache_ensure_space (ptrdiff_t idx)
 487 {
 488   /* Enlarge the cache as needed.  */
 489   if (idx >= bidi_cache_size)
 490     {
 491       /* The bidi cache cannot be larger than the largest Lisp string
 492          or buffer.  */
 493       ptrdiff_t string_or_buffer_bound
 494         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 495
 496       /* Also, it cannot be larger than what C can represent.  */
 497       ptrdiff_t c_bound
 498         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 499
 500       bidi_cache
 501         = xpalloc (bidi_cache, &bidi_cache_size,
 502                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 503                    min (string_or_buffer_bound, c_bound), elsz);
 504     }
 505 }
 506
 507 static inline void
 508 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 509 {
 510   ptrdiff_t idx;
 511
 512   /* We should never cache on backward scans.  */
 513   if (bidi_it->scan_dir == -1)
 514     abort ();
 515   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 516
 517   if (idx < 0)
 518     {
 519       idx = bidi_cache_idx;
 520       bidi_cache_ensure_space (idx);
 521       /* Character positions should correspond to cache positions 1:1.
 522          If we are outside the range of cached positions, the cache is
 523          useless and must be reset.  */
 524       if (idx > bidi_cache_start &&
 525           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 526                                + bidi_cache[idx - 1].nchars)
 527            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 528         {
 529           bidi_cache_reset ();
 530           idx = bidi_cache_start;
 531         }
 532       if (bidi_it->nchars <= 0)
 533         abort ();
 534       bidi_copy_it (&bidi_cache[idx], bidi_it);
 535       if (!resolved)
 536         bidi_cache[idx].resolved_level = -1;
 537     }
 538   else
 539     {
 540       /* Copy only the members which could have changed, to avoid
 541          costly copying of the entire struct.  */
 542       bidi_cache[idx].type = bidi_it->type;
 543       bidi_check_type (bidi_it->type);
 544       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 545       bidi_check_type (bidi_it->type_after_w1);
 546       if (resolved)
 547         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 548       else
 549         bidi_cache[idx].resolved_level = -1;
 550       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 551       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 552       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 553       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 554       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 555       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 556       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 557     }
 558
 559   bidi_cache_last_idx = idx;
 560   if (idx >= bidi_cache_idx)
 561     bidi_cache_idx = idx + 1;
 562 }
 563
 564 static inline bidi_type_t
 565 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 566 {
 567   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 568
 569   if (i >= bidi_cache_start)
 570     {
 571       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 572
 573       bidi_copy_it (bidi_it, &bidi_cache[i]);
 574       bidi_cache_last_idx = i;
 575       /* Don't let scan direction from the cached state override
 576          the current scan direction.  */
 577       bidi_it->scan_dir = current_scan_dir;
 578       return bidi_it->type;
 579     }
 580
 581   return UNKNOWN_BT;
 582 }
 583
 584 static inline int
 585 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 586 {
 587   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 588     abort ();
 589   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 590 }
 591
 592 \f
 593 /***********************************************************************
 594              Pushing and popping the bidi iterator state
 595  ***********************************************************************/
 596
 597 /* Push the bidi iterator state in preparation for reordering a
 598    different object, e.g. display string found at certain buffer
 599    position.  Pushing the bidi iterator boils down to saving its
 600    entire state on the cache and starting a new cache "stacked" on top
 601    of the current cache.  */
 602 void
 603 bidi_push_it (struct bidi_it *bidi_it)
 604 {
 605   /* Save the current iterator state in its entirety after the last
 606      used cache slot.  */
 607   bidi_cache_ensure_space (bidi_cache_idx);
 608   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 609
 610   /* Push the current cache start onto the stack.  */
 611   xassert (bidi_cache_sp < IT_STACK_SIZE);
 612   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 613
 614   /* Start a new level of cache, and make it empty.  */
 615   bidi_cache_start = bidi_cache_idx;
 616   bidi_cache_last_idx = -1;
 617 }
 618
 619 /* Restore the iterator state saved by bidi_push_it and return the
 620    cache to the corresponding state.  */
 621 void
 622 bidi_pop_it (struct bidi_it *bidi_it)
 623 {
 624   if (bidi_cache_start <= 0)
 625     abort ();
 626
 627   /* Reset the next free cache slot index to what it was before the
 628      call to bidi_push_it.  */
 629   bidi_cache_idx = bidi_cache_start - 1;
 630
 631   /* Restore the bidi iterator state saved in the cache.  */
 632   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 633
 634   /* Pop the previous cache start from the stack.  */
 635   if (bidi_cache_sp <= 0)
 636     abort ();
 637   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 638
 639   /* Invalidate the last-used cache slot data.  */
 640   bidi_cache_last_idx = -1;
 641 }
 642
 643 static ptrdiff_t bidi_cache_total_alloc;
 644
 645 /* Stash away a copy of the cache and its control variables.  */
 646 void *
 647 bidi_shelve_cache (void)
 648 {
 649   unsigned char *databuf;
 650   ptrdiff_t alloc;
 651
 652   /* Empty cache.  */
 653   if (bidi_cache_idx == 0)
 654     return NULL;
 655
 656   alloc = (bidi_shelve_header_size
 657            + bidi_cache_idx * sizeof (struct bidi_it));
 658   databuf = xmalloc (alloc);
 659   bidi_cache_total_alloc += alloc;
 660
 661   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 662   memcpy (databuf + sizeof (bidi_cache_idx),
 663           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it),
 666           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it)
 669           + sizeof (bidi_cache_start_stack),
 670           &bidi_cache_sp, sizeof (bidi_cache_sp));
 671   memcpy (databuf + sizeof (bidi_cache_idx)
 672           + bidi_cache_idx * sizeof (struct bidi_it)
 673           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 674           &bidi_cache_start, sizeof (bidi_cache_start));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 678           + sizeof (bidi_cache_start),
 679           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 680
 681   return databuf;
 682 }
 683
 684 /* Restore the cache state from a copy stashed away by
 685    bidi_shelve_cache, and free the buffer used to stash that copy.
 686    JUST_FREE non-zero means free the buffer, but don't restore the
 687    cache; used when the corresponding iterator is discarded instead of
 688    being restored.  */
 689 void
 690 bidi_unshelve_cache (void *databuf, int just_free)
 691 {
 692   unsigned char *p = databuf;
 693
 694   if (!p)
 695     {
 696       if (!just_free)
 697         {
 698           /* A NULL pointer means an empty cache.  */
 699           bidi_cache_start = 0;
 700           bidi_cache_sp = 0;
 701           bidi_cache_reset ();
 702         }
 703     }
 704   else
 705     {
 706       if (just_free)
 707         {
 708           ptrdiff_t idx;
 709
 710           memcpy (&idx, p, sizeof (bidi_cache_idx));
 711           bidi_cache_total_alloc
 712             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 713         }
 714       else
 715         {
 716           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 717           bidi_cache_ensure_space (bidi_cache_idx);
 718           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 719                   bidi_cache_idx * sizeof (struct bidi_it));
 720           memcpy (bidi_cache_start_stack,
 721                   p + sizeof (bidi_cache_idx)
 722                   + bidi_cache_idx * sizeof (struct bidi_it),
 723                   sizeof (bidi_cache_start_stack));
 724           memcpy (&bidi_cache_sp,
 725                   p + sizeof (bidi_cache_idx)
 726                   + bidi_cache_idx * sizeof (struct bidi_it)
 727                   + sizeof (bidi_cache_start_stack),
 728                   sizeof (bidi_cache_sp));
 729           memcpy (&bidi_cache_start,
 730                   p + sizeof (bidi_cache_idx)
 731                   + bidi_cache_idx * sizeof (struct bidi_it)
 732                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 733                   sizeof (bidi_cache_start));
 734           memcpy (&bidi_cache_last_idx,
 735                   p + sizeof (bidi_cache_idx)
 736                   + bidi_cache_idx * sizeof (struct bidi_it)
 737                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 738                   + sizeof (bidi_cache_start),
 739                   sizeof (bidi_cache_last_idx));
 740           bidi_cache_total_alloc
 741             -= (bidi_shelve_header_size
 742                 + bidi_cache_idx * sizeof (struct bidi_it));
 743         }
 744
 745       xfree (p);
 746     }
 747 }
 748
 749 \f
 750 /***********************************************************************
 751                         Initialization
 752  ***********************************************************************/
 753 static void
 754 bidi_initialize (void)
 755 {
 756   bidi_type_table = uniprop_table (intern ("bidi-class"));
 757   if (NILP (bidi_type_table))
 758     abort ();
 759   staticpro (&bidi_type_table);
 760
 761   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 762   if (NILP (bidi_mirror_table))
 763     abort ();
 764   staticpro (&bidi_mirror_table);
 765
 766   Qparagraph_start = intern ("paragraph-start");
 767   staticpro (&Qparagraph_start);
 768   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 769   if (!STRINGP (paragraph_start_re))
 770     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 771   staticpro (&paragraph_start_re);
 772   Qparagraph_separate = intern ("paragraph-separate");
 773   staticpro (&Qparagraph_separate);
 774   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 775   if (!STRINGP (paragraph_separate_re))
 776     paragraph_separate_re = build_string ("[ \t\f]*$");
 777   staticpro (&paragraph_separate_re);
 778
 779   bidi_cache_sp = 0;
 780   bidi_cache_total_alloc = 0;
 781
 782   bidi_initialized = 1;
 783 }
 784
 785 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 786    end.  */
 787 static inline void
 788 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 789 {
 790   bidi_it->invalid_levels = 0;
 791   bidi_it->invalid_rl_levels = -1;
 792   bidi_it->stack_idx = 0;
 793   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 794 }
 795
 796 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 797 void
 798 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 799               struct bidi_it *bidi_it)
 800 {
 801   if (! bidi_initialized)
 802     bidi_initialize ();
 803   if (charpos >= 0)
 804     bidi_it->charpos = charpos;
 805   if (bytepos >= 0)
 806     bidi_it->bytepos = bytepos;
 807   bidi_it->frame_window_p = frame_window_p;
 808   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 809   bidi_it->first_elt = 1;
 810   bidi_set_paragraph_end (bidi_it);
 811   bidi_it->new_paragraph = 1;
 812   bidi_it->separator_limit = -1;
 813   bidi_it->type = NEUTRAL_B;
 814   bidi_it->type_after_w1 = NEUTRAL_B;
 815   bidi_it->orig_type = NEUTRAL_B;
 816   bidi_it->prev_was_pdf = 0;
 817   bidi_it->prev.type = bidi_it->prev.type_after_w1
 818     = bidi_it->prev.orig_type = UNKNOWN_BT;
 819   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 820     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 821   bidi_it->next_for_neutral.charpos = -1;
 822   bidi_it->next_for_neutral.type
 823     = bidi_it->next_for_neutral.type_after_w1
 824     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 825   bidi_it->prev_for_neutral.charpos = -1;
 826   bidi_it->prev_for_neutral.type
 827     = bidi_it->prev_for_neutral.type_after_w1
 828     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 829   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 830   bidi_it->disp_pos = -1;       /* invalid/unknown */
 831   bidi_it->disp_prop = 0;
 832   /* We can only shrink the cache if we are at the bottom level of its
 833      "stack".  */
 834   if (bidi_cache_start == 0)
 835     bidi_cache_shrink ();
 836   else
 837     bidi_cache_reset ();
 838 }
 839
 840 /* Perform initializations for reordering a new line of bidi text.  */
 841 static void
 842 bidi_line_init (struct bidi_it *bidi_it)
 843 {
 844   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 845   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 846   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 847   bidi_it->invalid_levels = 0;
 848   bidi_it->invalid_rl_levels = -1;
 849   /* Setting this to zero will force its recomputation the first time
 850      we need it for W5.  */
 851   bidi_it->next_en_pos = 0;
 852   bidi_it->next_en_type = UNKNOWN_BT;
 853   bidi_it->next_for_ws.type = UNKNOWN_BT;
 854   bidi_set_sor_type (bidi_it,
 855                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 856                      bidi_it->level_stack[0].level); /* X10 */
 857
 858   bidi_cache_reset ();
 859 }
 860
 861 \f
 862 /***********************************************************************
 863                         Fetching characters
 864  ***********************************************************************/
 865
 866 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 867    are zero-based character positions in S, BEGBYTE is byte position
 868    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 869    string.  */
 870 static inline EMACS_INT
 871 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 872                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 873 {
 874   EMACS_INT pos = beg;
 875   const unsigned char *p = s + begbyte, *start = p;
 876
 877   if (unibyte)
 878     p = s + end;
 879   else
 880     {
 881       if (!CHAR_HEAD_P (*p))
 882         abort ();
 883
 884       while (pos < end)
 885         {
 886           p += BYTES_BY_CHAR_HEAD (*p);
 887           pos++;
 888         }
 889     }
 890
 891   return p - start;
 892 }
 893
 894 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 895    non-NULL, fetch the character from string S; otherwise fetch the
 896    character from the current buffer.  UNIBYTE non-zero means S is a
 897    unibyte string.  */
 898 static inline int
 899 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 900 {
 901   if (s)
 902     {
 903       if (unibyte)
 904         return s[bytepos];
 905       else
 906         return STRING_CHAR (s + bytepos);
 907     }
 908   else
 909     return FETCH_MULTIBYTE_CHAR (bytepos);
 910 }
 911
 912 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 913    character is covered by a display string, treat the entire run of
 914    covered characters as a single character, either u+2029 or u+FFFC,
 915    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 916    specifies the character position of the next display string, or -1
 917    if not yet computed.  When the next character is at or beyond that
 918    position, the function updates DISP_POS with the position of the
 919    next display string.  DISP_PROP non-zero means that there's really
 920    a display string at DISP_POS, as opposed to when we searched till
 921    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 922    display spec is of the form `(space ...)', which is replaced with
 923    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 924    string to iterate, or NULL if iterating over a buffer or a Lisp
 925    string; in the latter case, STRING->lstring is the Lisp string.  */
 926 static inline int
 927 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 928                  int *disp_prop, struct bidi_string_data *string,
 929                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 930 {
 931   int ch;
 932   EMACS_INT endpos
 933     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 934   struct text_pos pos;
 935
 936   /* If we got past the last known position of display string, compute
 937      the position of the next one.  That position could be at CHARPOS.  */
 938   if (charpos < endpos && charpos > *disp_pos)
 939     {
 940       SET_TEXT_POS (pos, charpos, bytepos);
 941       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 942                                               disp_prop);
 943     }
 944
 945   /* Fetch the character at BYTEPOS.  */
 946   if (charpos >= endpos)
 947     {
 948       ch = BIDI_EOB;
 949       *ch_len = 1;
 950       *nchars = 1;
 951       *disp_pos = endpos;
 952       *disp_prop = 0;
 953     }
 954   else if (charpos >= *disp_pos && *disp_prop)
 955     {
 956       EMACS_INT disp_end_pos;
 957
 958       /* We don't expect to find ourselves in the middle of a display
 959          property.  Hopefully, it will never be needed.  */
 960       if (charpos > *disp_pos)
 961         abort ();
 962       /* Text covered by `display' properties and overlays with
 963          display properties or display strings is handled as a single
 964          character that represents the entire run of characters
 965          covered by the display property.  */
 966       if (*disp_prop == 2)
 967         {
 968           /* `(space ...)' display specs are handled as paragraph
 969              separators for the purposes of the reordering; see UAX#9
 970              section 3 and clause HL1 in section 4.3 there.  */
 971           ch = 0x2029;
 972         }
 973       else
 974         {
 975           /* All other display specs are handled as the Unicode Object
 976              Replacement Character.  */
 977           ch = 0xFFFC;
 978         }
 979       disp_end_pos = compute_display_string_end (*disp_pos, string);
 980       if (disp_end_pos < 0)
 981         {
 982           /* Somebody removed the display string from the buffer
 983              behind our back.  Recover by processing this buffer
 984              position as if no display property were present there to
 985              begin with.  */
 986           *disp_prop = 0;
 987           goto normal_char;
 988         }
 989       *nchars = disp_end_pos - *disp_pos;
 990       if (*nchars <= 0)
 991         abort ();
 992       if (string->s)
 993         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 994                                     disp_end_pos, string->unibyte);
 995       else if (STRINGP (string->lstring))
 996         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 997                                     bytepos, disp_end_pos, string->unibyte);
 998       else
 999         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1000     }
1001   else
1002     {
1003     normal_char:
1004       if (string->s)
1005         {
1006           int len;
1007
1008           if (!string->unibyte)
1009             {
1010               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1011               *ch_len = len;
1012             }
1013           else
1014             {
1015               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1016               *ch_len = 1;
1017             }
1018         }
1019       else if (STRINGP (string->lstring))
1020         {
1021           int len;
1022
1023           if (!string->unibyte)
1024             {
1025               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1026                                            len);
1027               *ch_len = len;
1028             }
1029           else
1030             {
1031               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1032               *ch_len = 1;
1033             }
1034         }
1035       else
1036         {
1037           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1038           *ch_len = CHAR_BYTES (ch);
1039         }
1040       *nchars = 1;
1041     }
1042
1043   /* If we just entered a run of characters covered by a display
1044      string, compute the position of the next display string.  */
1045   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1046       && *disp_prop)
1047     {
1048       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1049       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1050                                               disp_prop);
1051     }
1052
1053   return ch;
1054 }
1055
1056 \f
1057 /***********************************************************************
1058                         Determining paragraph direction
1059  ***********************************************************************/
1060
1061 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1062    Value is the non-negative length of the paragraph separator
1063    following the buffer position, -1 if position is at the beginning
1064    of a new paragraph, or -2 if position is neither at beginning nor
1065    at end of a paragraph.  */
1066 static EMACS_INT
1067 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1068 {
1069   Lisp_Object sep_re;
1070   Lisp_Object start_re;
1071   EMACS_INT val;
1072
1073   sep_re = paragraph_separate_re;
1074   start_re = paragraph_start_re;
1075
1076   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1077   if (val < 0)
1078     {
1079       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1080         val = -1;
1081       else
1082         val = -2;
1083     }
1084
1085   return val;
1086 }
1087
1088 /* On my 2005-vintage machine, searching back for paragraph start
1089    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1090    when user types C-p.  The number below limits each call to
1091    bidi_paragraph_init to about 10 ms.  */
1092 #define MAX_PARAGRAPH_SEARCH 7500
1093
1094 /* Find the beginning of this paragraph by looking back in the buffer.
1095    Value is the byte position of the paragraph's beginning, or
1096    BEGV_BYTE if paragraph_start_re is still not found after looking
1097    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1098 static EMACS_INT
1099 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1100 {
1101   Lisp_Object re = paragraph_start_re;
1102   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1103   EMACS_INT n = 0;
1104
1105   while (pos_byte > BEGV_BYTE
1106          && n++ < MAX_PARAGRAPH_SEARCH
1107          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1108     {
1109       /* FIXME: What if the paragraph beginning is covered by a
1110          display string?  And what if a display string covering some
1111          of the text over which we scan back includes
1112          paragraph_start_re?  */
1113       pos = find_next_newline_no_quit (pos - 1, -1);
1114       pos_byte = CHAR_TO_BYTE (pos);
1115     }
1116   if (n >= MAX_PARAGRAPH_SEARCH)
1117     pos_byte = BEGV_BYTE;
1118   return pos_byte;
1119 }
1120
1121 /* Determine the base direction, a.k.a. base embedding level, of the
1122    paragraph we are about to iterate through.  If DIR is either L2R or
1123    R2L, just use that.  Otherwise, determine the paragraph direction
1124    from the first strong directional character of the paragraph.
1125
1126    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1127    has no strong directional characters and both DIR and
1128    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1129    in the buffer until a paragraph is found with a strong character,
1130    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1131    flag is used in current-bidi-paragraph-direction.
1132
1133    Note that this function gives the paragraph separator the same
1134    direction as the preceding paragraph, even though Emacs generally
1135    views the separator as not belonging to any paragraph.  */
1136 void
1137 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1138 {
1139   EMACS_INT bytepos = bidi_it->bytepos;
1140   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1141   EMACS_INT pstartbyte;
1142   /* Note that begbyte is a byte position, while end is a character
1143      position.  Yes, this is ugly, but we are trying to avoid costly
1144      calls to BYTE_TO_CHAR and its ilk.  */
1145   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1146   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1147
1148   /* Special case for an empty buffer. */
1149   if (bytepos == begbyte && bidi_it->charpos == end)
1150     dir = L2R;
1151   /* We should never be called at EOB or before BEGV.  */
1152   else if (bidi_it->charpos >= end || bytepos < begbyte)
1153     abort ();
1154
1155   if (dir == L2R)
1156     {
1157       bidi_it->paragraph_dir = L2R;
1158       bidi_it->new_paragraph = 0;
1159     }
1160   else if (dir == R2L)
1161     {
1162       bidi_it->paragraph_dir = R2L;
1163       bidi_it->new_paragraph = 0;
1164     }
1165   else if (dir == NEUTRAL_DIR)  /* P2 */
1166     {
1167       int ch;
1168       EMACS_INT ch_len, nchars;
1169       EMACS_INT pos, disp_pos = -1;
1170       int disp_prop = 0;
1171       bidi_type_t type;
1172       const unsigned char *s;
1173
1174       if (!bidi_initialized)
1175         bidi_initialize ();
1176
1177       /* If we are inside a paragraph separator, we are just waiting
1178          for the separator to be exhausted; use the previous paragraph
1179          direction.  But don't do that if we have been just reseated,
1180          because we need to reinitialize below in that case.  */
1181       if (!bidi_it->first_elt
1182           && bidi_it->charpos < bidi_it->separator_limit)
1183         return;
1184
1185       /* If we are on a newline, get past it to where the next
1186          paragraph might start.  But don't do that at BEGV since then
1187          we are potentially in a new paragraph that doesn't yet
1188          exist.  */
1189       pos = bidi_it->charpos;
1190       s = (STRINGP (bidi_it->string.lstring)
1191            ? SDATA (bidi_it->string.lstring)
1192            : bidi_it->string.s);
1193       if (bytepos > begbyte
1194           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1195         {
1196           bytepos++;
1197           pos++;
1198         }
1199
1200       /* We are either at the beginning of a paragraph or in the
1201          middle of it.  Find where this paragraph starts.  */
1202       if (string_p)
1203         {
1204           /* We don't support changes of paragraph direction inside a
1205              string.  It is treated as a single paragraph.  */
1206           pstartbyte = 0;
1207         }
1208       else
1209         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1210       bidi_it->separator_limit = -1;
1211       bidi_it->new_paragraph = 0;
1212
1213       /* The following loop is run more than once only if NO_DEFAULT_P
1214          is non-zero, and only if we are iterating on a buffer.  */
1215       do {
1216         bytepos = pstartbyte;
1217         if (!string_p)
1218           pos = BYTE_TO_CHAR (bytepos);
1219         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1220                               &bidi_it->string,
1221                               bidi_it->frame_window_p, &ch_len, &nchars);
1222         type = bidi_get_type (ch, NEUTRAL_DIR);
1223
1224         for (pos += nchars, bytepos += ch_len;
1225              (bidi_get_category (type) != STRONG)
1226                || (bidi_ignore_explicit_marks_for_paragraph_level
1227                    && (type == RLE || type == RLO
1228                        || type == LRE || type == LRO));
1229              type = bidi_get_type (ch, NEUTRAL_DIR))
1230           {
1231             if (pos >= end)
1232               {
1233                 /* Pretend there's a paragraph separator at end of
1234                    buffer/string.  */
1235                 type = NEUTRAL_B;
1236                 break;
1237               }
1238             if (!string_p
1239                 && type == NEUTRAL_B
1240                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1241               break;
1242             /* Fetch next character and advance to get past it.  */
1243             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1244                                   &disp_prop, &bidi_it->string,
1245                                   bidi_it->frame_window_p, &ch_len, &nchars);
1246             pos += nchars;
1247             bytepos += ch_len;
1248           }
1249         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1250             || (!bidi_ignore_explicit_marks_for_paragraph_level
1251                 && (type == RLO || type == RLE)))
1252           bidi_it->paragraph_dir = R2L;
1253         else if (type == STRONG_L
1254                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1255                      && (type == LRO || type == LRE)))
1256           bidi_it->paragraph_dir = L2R;
1257         if (!string_p
1258             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1259           {
1260             /* If this paragraph is at BEGV, default to L2R.  */
1261             if (pstartbyte == BEGV_BYTE)
1262               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1263             else
1264               {
1265                 EMACS_INT prevpbyte = pstartbyte;
1266                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1267
1268                 /* Find the beginning of the previous paragraph, if any.  */
1269                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1270                   {
1271                     /* FXIME: What if p is covered by a display
1272                        string?  See also a FIXME inside
1273                        bidi_find_paragraph_start.  */
1274                     p--;
1275                     pbyte = CHAR_TO_BYTE (p);
1276                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1277                   }
1278                 pstartbyte = prevpbyte;
1279               }
1280           }
1281       } while (!string_p
1282                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1283     }
1284   else
1285     abort ();
1286
1287   /* Contrary to UAX#9 clause P3, we only default the paragraph
1288      direction to L2R if we have no previous usable paragraph
1289      direction.  This is allowed by the HL1 clause.  */
1290   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1291     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1292   if (bidi_it->paragraph_dir == R2L)
1293     bidi_it->level_stack[0].level = 1;
1294   else
1295     bidi_it->level_stack[0].level = 0;
1296
1297   bidi_line_init (bidi_it);
1298 }
1299
1300 \f
1301 /***********************************************************************
1302                  Resolving explicit and implicit levels.
1303   The rest of this file constitutes the core of the UBA implementation.
1304  ***********************************************************************/
1305
1306 static inline int
1307 bidi_explicit_dir_char (int ch)
1308 {
1309   bidi_type_t ch_type;
1310
1311   if (!bidi_initialized)
1312     abort ();
1313   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1314   return (ch_type == LRE || ch_type == LRO
1315           || ch_type == RLE || ch_type == RLO
1316           || ch_type == PDF);
1317 }
1318
1319 /* A helper function for bidi_resolve_explicit.  It advances to the
1320    next character in logical order and determines the new embedding
1321    level and directional override, but does not take into account
1322    empty embeddings.  */
1323 static int
1324 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1325 {
1326   int curchar;
1327   bidi_type_t type;
1328   int current_level;
1329   int new_level;
1330   bidi_dir_t override;
1331   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1332
1333   /* If reseat()'ed, don't advance, so as to start iteration from the
1334      position where we were reseated.  bidi_it->bytepos can be less
1335      than BEGV_BYTE after reseat to BEGV.  */
1336   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1337       || bidi_it->first_elt)
1338     {
1339       bidi_it->first_elt = 0;
1340       if (string_p)
1341         {
1342           const unsigned char *p
1343             = (STRINGP (bidi_it->string.lstring)
1344                ? SDATA (bidi_it->string.lstring)
1345                : bidi_it->string.s);
1346
1347           if (bidi_it->charpos < 0)
1348             bidi_it->charpos = 0;
1349           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1350                                                bidi_it->string.unibyte);
1351         }
1352       else
1353         {
1354           if (bidi_it->charpos < BEGV)
1355             bidi_it->charpos = BEGV;
1356           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1357         }
1358     }
1359   /* Don't move at end of buffer/string.  */
1360   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1361     {
1362       /* Advance to the next character, skipping characters covered by
1363          display strings (nchars > 1).  */
1364       if (bidi_it->nchars <= 0)
1365         abort ();
1366       bidi_it->charpos += bidi_it->nchars;
1367       if (bidi_it->ch_len == 0)
1368         abort ();
1369       bidi_it->bytepos += bidi_it->ch_len;
1370     }
1371
1372   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1373   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1374   new_level = current_level;
1375
1376   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1377     {
1378       curchar = BIDI_EOB;
1379       bidi_it->ch_len = 1;
1380       bidi_it->nchars = 1;
1381       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1382       bidi_it->disp_prop = 0;
1383     }
1384   else
1385     {
1386       /* Fetch the character at BYTEPOS.  If it is covered by a
1387          display string, treat the entire run of covered characters as
1388          a single character u+FFFC.  */
1389       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1390                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1391                                  &bidi_it->string, bidi_it->frame_window_p,
1392                                  &bidi_it->ch_len, &bidi_it->nchars);
1393     }
1394   bidi_it->ch = curchar;
1395
1396   /* Don't apply directional override here, as all the types we handle
1397      below will not be affected by the override anyway, and we need
1398      the original type unaltered.  The override will be applied in
1399      bidi_resolve_weak.  */
1400   type = bidi_get_type (curchar, NEUTRAL_DIR);
1401   bidi_it->orig_type = type;
1402   bidi_check_type (bidi_it->orig_type);
1403
1404   if (type != PDF)
1405     bidi_it->prev_was_pdf = 0;
1406
1407   bidi_it->type_after_w1 = UNKNOWN_BT;
1408
1409   switch (type)
1410     {
1411       case RLE: /* X2 */
1412       case RLO: /* X4 */
1413         bidi_it->type_after_w1 = type;
1414         bidi_check_type (bidi_it->type_after_w1);
1415         type = WEAK_BN; /* X9/Retaining */
1416         if (bidi_it->ignore_bn_limit <= -1)
1417           {
1418             if (current_level <= BIDI_MAXLEVEL - 4)
1419               {
1420                 /* Compute the least odd embedding level greater than
1421                    the current level.  */
1422                 new_level = ((current_level + 1) & ~1) + 1;
1423                 if (bidi_it->type_after_w1 == RLE)
1424                   override = NEUTRAL_DIR;
1425                 else
1426                   override = R2L;
1427                 if (current_level == BIDI_MAXLEVEL - 4)
1428                   bidi_it->invalid_rl_levels = 0;
1429                 bidi_push_embedding_level (bidi_it, new_level, override);
1430               }
1431             else
1432               {
1433                 bidi_it->invalid_levels++;
1434                 /* See the commentary about invalid_rl_levels below.  */
1435                 if (bidi_it->invalid_rl_levels < 0)
1436                   bidi_it->invalid_rl_levels = 0;
1437                 bidi_it->invalid_rl_levels++;
1438               }
1439           }
1440         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1441                  || (bidi_it->next_en_pos > bidi_it->charpos
1442                      && bidi_it->next_en_type == WEAK_EN))
1443           type = WEAK_EN;
1444         break;
1445       case LRE: /* X3 */
1446       case LRO: /* X5 */
1447         bidi_it->type_after_w1 = type;
1448         bidi_check_type (bidi_it->type_after_w1);
1449         type = WEAK_BN; /* X9/Retaining */
1450         if (bidi_it->ignore_bn_limit <= -1)
1451           {
1452             if (current_level <= BIDI_MAXLEVEL - 5)
1453               {
1454                 /* Compute the least even embedding level greater than
1455                    the current level.  */
1456                 new_level = ((current_level + 2) & ~1);
1457                 if (bidi_it->type_after_w1 == LRE)
1458                   override = NEUTRAL_DIR;
1459                 else
1460                   override = L2R;
1461                 bidi_push_embedding_level (bidi_it, new_level, override);
1462               }
1463             else
1464               {
1465                 bidi_it->invalid_levels++;
1466                 /* invalid_rl_levels counts invalid levels encountered
1467                    while the embedding level was already too high for
1468                    LRE/LRO, but not for RLE/RLO.  That is because
1469                    there may be exactly one PDF which we should not
1470                    ignore even though invalid_levels is non-zero.
1471                    invalid_rl_levels helps to know what PDF is
1472                    that.  */
1473                 if (bidi_it->invalid_rl_levels >= 0)
1474                   bidi_it->invalid_rl_levels++;
1475               }
1476           }
1477         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1478                  || (bidi_it->next_en_pos > bidi_it->charpos
1479                      && bidi_it->next_en_type == WEAK_EN))
1480           type = WEAK_EN;
1481         break;
1482       case PDF: /* X7 */
1483         bidi_it->type_after_w1 = type;
1484         bidi_check_type (bidi_it->type_after_w1);
1485         type = WEAK_BN; /* X9/Retaining */
1486         if (bidi_it->ignore_bn_limit <= -1)
1487           {
1488             if (!bidi_it->invalid_rl_levels)
1489               {
1490                 new_level = bidi_pop_embedding_level (bidi_it);
1491                 bidi_it->invalid_rl_levels = -1;
1492                 if (bidi_it->invalid_levels)
1493                   bidi_it->invalid_levels--;
1494                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1495               }
1496             if (!bidi_it->invalid_levels)
1497               new_level = bidi_pop_embedding_level (bidi_it);
1498             else
1499               {
1500                 bidi_it->invalid_levels--;
1501                 bidi_it->invalid_rl_levels--;
1502               }
1503           }
1504         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1505                  || (bidi_it->next_en_pos > bidi_it->charpos
1506                      && bidi_it->next_en_type == WEAK_EN))
1507           type = WEAK_EN;
1508         break;
1509       default:
1510         /* Nothing.  */
1511         break;
1512     }
1513
1514   bidi_it->type = type;
1515   bidi_check_type (bidi_it->type);
1516
1517   return new_level;
1518 }
1519
1520 /* Given an iterator state in BIDI_IT, advance one character position
1521    in the buffer/string to the next character (in the logical order),
1522    resolve any explicit embeddings and directional overrides, and
1523    return the embedding level of the character after resolving
1524    explicit directives and ignoring empty embeddings.  */
1525 static int
1526 bidi_resolve_explicit (struct bidi_it *bidi_it)
1527 {
1528   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1529   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1530   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1531   const unsigned char *s
1532     = (STRINGP (bidi_it->string.lstring)
1533        ? SDATA (bidi_it->string.lstring)
1534        : bidi_it->string.s);
1535
1536   if (prev_level < new_level
1537       && bidi_it->type == WEAK_BN
1538       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1539       && bidi_it->charpos < eob         /* not already at EOB */
1540       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1541                                                    + bidi_it->ch_len, s,
1542                                                    bidi_it->string.unibyte)))
1543     {
1544       /* Avoid pushing and popping embedding levels if the level run
1545          is empty, as this breaks level runs where it shouldn't.
1546          UAX#9 removes all the explicit embedding and override codes,
1547          so empty embeddings disappear without a trace.  We need to
1548          behave as if we did the same.  */
1549       struct bidi_it saved_it;
1550       int level = prev_level;
1551
1552       bidi_copy_it (&saved_it, bidi_it);
1553
1554       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1555                                                        + bidi_it->ch_len, s,
1556                                                        bidi_it->string.unibyte)))
1557         {
1558           /* This advances to the next character, skipping any
1559              characters covered by display strings.  */
1560           level = bidi_resolve_explicit_1 (bidi_it);
1561           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1562              a pointer to its data is no longer valid.  */
1563           if (STRINGP (bidi_it->string.lstring))
1564             s = SDATA (bidi_it->string.lstring);
1565         }
1566
1567       if (bidi_it->nchars <= 0)
1568         abort ();
1569       if (level == prev_level)  /* empty embedding */
1570         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1571       else                      /* this embedding is non-empty */
1572         saved_it.ignore_bn_limit = -2;
1573
1574       bidi_copy_it (bidi_it, &saved_it);
1575       if (bidi_it->ignore_bn_limit > -1)
1576         {
1577           /* We pushed a level, but we shouldn't have.  Undo that. */
1578           if (!bidi_it->invalid_rl_levels)
1579             {
1580               new_level = bidi_pop_embedding_level (bidi_it);
1581               bidi_it->invalid_rl_levels = -1;
1582               if (bidi_it->invalid_levels)
1583                 bidi_it->invalid_levels--;
1584             }
1585           if (!bidi_it->invalid_levels)
1586             new_level = bidi_pop_embedding_level (bidi_it);
1587           else
1588             {
1589               bidi_it->invalid_levels--;
1590               bidi_it->invalid_rl_levels--;
1591             }
1592         }
1593     }
1594
1595   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1596     {
1597       bidi_set_paragraph_end (bidi_it);
1598       /* This is needed by bidi_resolve_weak below, and in L1.  */
1599       bidi_it->type_after_w1 = bidi_it->type;
1600       bidi_check_type (bidi_it->type_after_w1);
1601     }
1602
1603   return new_level;
1604 }
1605
1606 /* Advance in the buffer/string, resolve weak types and return the
1607    type of the next character after weak type resolution.  */
1608 static bidi_type_t
1609 bidi_resolve_weak (struct bidi_it *bidi_it)
1610 {
1611   bidi_type_t type;
1612   bidi_dir_t override;
1613   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1614   int new_level  = bidi_resolve_explicit (bidi_it);
1615   int next_char;
1616   bidi_type_t type_of_next;
1617   struct bidi_it saved_it;
1618   EMACS_INT eob
1619     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1620        ? bidi_it->string.schars : ZV);
1621
1622   type = bidi_it->type;
1623   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1624
1625   if (type == UNKNOWN_BT
1626       || type == LRE
1627       || type == LRO
1628       || type == RLE
1629       || type == RLO
1630       || type == PDF)
1631     abort ();
1632
1633   if (new_level != prev_level
1634       || bidi_it->type == NEUTRAL_B)
1635     {
1636       /* We've got a new embedding level run, compute the directional
1637          type of sor and initialize per-run variables (UAX#9, clause
1638          X10).  */
1639       bidi_set_sor_type (bidi_it, prev_level, new_level);
1640     }
1641   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1642            || type == WEAK_BN || type == STRONG_AL)
1643     bidi_it->type_after_w1 = type;      /* needed in L1 */
1644   bidi_check_type (bidi_it->type_after_w1);
1645
1646   /* Level and directional override status are already recorded in
1647      bidi_it, and do not need any change; see X6.  */
1648   if (override == R2L)          /* X6 */
1649     type = STRONG_R;
1650   else if (override == L2R)
1651     type = STRONG_L;
1652   else
1653     {
1654       if (type == WEAK_NSM)     /* W1 */
1655         {
1656           /* Note that we don't need to consider the case where the
1657              prev character has its type overridden by an RLO or LRO,
1658              because then either the type of this NSM would have been
1659              also overridden, or the previous character is outside the
1660              current level run, and thus not relevant to this NSM.
1661              This is why NSM gets the type_after_w1 of the previous
1662              character.  */
1663           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1664               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1665               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1666             type = bidi_it->prev.type_after_w1;
1667           else if (bidi_it->sor == R2L)
1668             type = STRONG_R;
1669           else if (bidi_it->sor == L2R)
1670             type = STRONG_L;
1671           else /* shouldn't happen! */
1672             abort ();
1673         }
1674       if (type == WEAK_EN       /* W2 */
1675           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1676         type = WEAK_AN;
1677       else if (type == STRONG_AL) /* W3 */
1678         type = STRONG_R;
1679       else if ((type == WEAK_ES /* W4 */
1680                 && bidi_it->prev.type_after_w1 == WEAK_EN
1681                 && bidi_it->prev.orig_type == WEAK_EN)
1682                || (type == WEAK_CS
1683                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1684                         && bidi_it->prev.orig_type == WEAK_EN)
1685                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1686         {
1687           const unsigned char *s
1688             = (STRINGP (bidi_it->string.lstring)
1689                ? SDATA (bidi_it->string.lstring)
1690                : bidi_it->string.s);
1691
1692           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1693                        ? BIDI_EOB
1694                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1695                                            s, bidi_it->string.unibyte));
1696           type_of_next = bidi_get_type (next_char, override);
1697
1698           if (type_of_next == WEAK_BN
1699               || bidi_explicit_dir_char (next_char))
1700             {
1701               bidi_copy_it (&saved_it, bidi_it);
1702               while (bidi_resolve_explicit (bidi_it) == new_level
1703                      && bidi_it->type == WEAK_BN)
1704                 ;
1705               type_of_next = bidi_it->type;
1706               bidi_copy_it (bidi_it, &saved_it);
1707             }
1708
1709           /* If the next character is EN, but the last strong-type
1710              character is AL, that next EN will be changed to AN when
1711              we process it in W2 above.  So in that case, this ES
1712              should not be changed into EN.  */
1713           if (type == WEAK_ES
1714               && type_of_next == WEAK_EN
1715               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1716             type = WEAK_EN;
1717           else if (type == WEAK_CS)
1718             {
1719               if (bidi_it->prev.type_after_w1 == WEAK_AN
1720                   && (type_of_next == WEAK_AN
1721                       /* If the next character is EN, but the last
1722                          strong-type character is AL, EN will be later
1723                          changed to AN when we process it in W2 above.
1724                          So in that case, this ES should not be
1725                          changed into EN.  */
1726                       || (type_of_next == WEAK_EN
1727                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1728                 type = WEAK_AN;
1729               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1730                        && type_of_next == WEAK_EN
1731                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1732                 type = WEAK_EN;
1733             }
1734         }
1735       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1736                || type == WEAK_BN)      /* W5/Retaining */
1737         {
1738           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1739             type = WEAK_EN;
1740           else if (bidi_it->next_en_pos > bidi_it->charpos
1741                    && bidi_it->next_en_type != WEAK_BN)
1742             {
1743               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1744                 type = WEAK_EN;
1745             }
1746           else if (bidi_it->next_en_pos >=0)
1747             {
1748               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1749               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1750                                         ? SDATA (bidi_it->string.lstring)
1751                                         : bidi_it->string.s);
1752
1753               if (bidi_it->nchars <= 0)
1754                 abort ();
1755               next_char
1756                 = (bidi_it->charpos + bidi_it->nchars >= eob
1757                    ? BIDI_EOB
1758                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1759                                        bidi_it->string.unibyte));
1760               type_of_next = bidi_get_type (next_char, override);
1761
1762               if (type_of_next == WEAK_ET
1763                   || type_of_next == WEAK_BN
1764                   || bidi_explicit_dir_char (next_char))
1765                 {
1766                   bidi_copy_it (&saved_it, bidi_it);
1767                   while (bidi_resolve_explicit (bidi_it) == new_level
1768                          && (bidi_it->type == WEAK_BN
1769                              || bidi_it->type == WEAK_ET))
1770                     ;
1771                   type_of_next = bidi_it->type;
1772                   en_pos = bidi_it->charpos;
1773                   bidi_copy_it (bidi_it, &saved_it);
1774                 }
1775               /* Remember this position, to speed up processing of the
1776                  next ETs.  */
1777               bidi_it->next_en_pos = en_pos;
1778               if (type_of_next == WEAK_EN)
1779                 {
1780                   /* If the last strong character is AL, the EN we've
1781                      found will become AN when we get to it (W2). */
1782                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1783                     type_of_next = WEAK_AN;
1784                   else if (type == WEAK_BN)
1785                     type = NEUTRAL_ON; /* W6/Retaining */
1786                   else
1787                     type = WEAK_EN;
1788                 }
1789               else if (type_of_next == NEUTRAL_B)
1790                 /* Record the fact that there are no more ENs from
1791                    here to the end of paragraph, to avoid entering the
1792                    loop above ever again in this paragraph.  */
1793                 bidi_it->next_en_pos = -1;
1794               /* Record the type of the character where we ended our search.  */
1795               bidi_it->next_en_type = type_of_next;
1796             }
1797         }
1798     }
1799
1800   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1801       || (type == WEAK_BN
1802           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1803               || bidi_it->prev.type_after_w1 == WEAK_ES
1804               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1805     type = NEUTRAL_ON;
1806
1807   /* Store the type we've got so far, before we clobber it with strong
1808      types in W7 and while resolving neutral types.  But leave alone
1809      the original types that were recorded above, because we will need
1810      them for the L1 clause.  */
1811   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1812     bidi_it->type_after_w1 = type;
1813   bidi_check_type (bidi_it->type_after_w1);
1814
1815   if (type == WEAK_EN)  /* W7 */
1816     {
1817       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1818           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1819         type = STRONG_L;
1820     }
1821
1822   bidi_it->type = type;
1823   bidi_check_type (bidi_it->type);
1824   return type;
1825 }
1826
1827 /* Resolve the type of a neutral character according to the type of
1828    surrounding strong text and the current embedding level.  */
1829 static inline bidi_type_t
1830 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1831 {
1832   /* N1: European and Arabic numbers are treated as though they were R.  */
1833   if (next_type == WEAK_EN || next_type == WEAK_AN)
1834     next_type = STRONG_R;
1835   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1836     prev_type = STRONG_R;
1837
1838   if (next_type == prev_type)   /* N1 */
1839     return next_type;
1840   else if ((lev & 1) == 0)      /* N2 */
1841     return STRONG_L;
1842   else
1843     return STRONG_R;
1844 }
1845
1846 static bidi_type_t
1847 bidi_resolve_neutral (struct bidi_it *bidi_it)
1848 {
1849   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1850   bidi_type_t type = bidi_resolve_weak (bidi_it);
1851   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1852
1853   if (!(type == STRONG_R
1854         || type == STRONG_L
1855         || type == WEAK_BN
1856         || type == WEAK_EN
1857         || type == WEAK_AN
1858         || type == NEUTRAL_B
1859         || type == NEUTRAL_S
1860         || type == NEUTRAL_WS
1861         || type == NEUTRAL_ON))
1862     abort ();
1863
1864   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1865                             we are already at paragraph end.  */
1866        && bidi_get_category (type) == NEUTRAL)
1867       || (type == WEAK_BN && prev_level == current_level))
1868     {
1869       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1870         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1871                                        bidi_it->next_for_neutral.type,
1872                                        current_level);
1873       /* The next two "else if" clauses are shortcuts for the
1874          important special case when we have a long sequence of
1875          neutral or WEAK_BN characters, such as whitespace or nulls or
1876          other control characters, on the base embedding level of the
1877          paragraph, and that sequence goes all the way to the end of
1878          the paragraph and follows a character whose resolved
1879          directionality is identical to the base embedding level.
1880          (This is what happens in a buffer with plain L2R text that
1881          happens to include long sequences of control characters.)  By
1882          virtue of N1, the result of examining this long sequence will
1883          always be either STRONG_L or STRONG_R, depending on the base
1884          embedding level.  So we use this fact directly instead of
1885          entering the expensive loop in the "else" clause.  */
1886       else if (current_level == 0
1887                && bidi_it->prev_for_neutral.type == STRONG_L
1888                && !bidi_explicit_dir_char (bidi_it->ch))
1889         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1890                                        STRONG_L, current_level);
1891       else if (/* current level is 1 */
1892                current_level == 1
1893                /* base embedding level is also 1 */
1894                && bidi_it->level_stack[0].level == 1
1895                /* previous character is one of those considered R for
1896                   the purposes of W5 */
1897                && (bidi_it->prev_for_neutral.type == STRONG_R
1898                    || bidi_it->prev_for_neutral.type == WEAK_EN
1899                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1900                && !bidi_explicit_dir_char (bidi_it->ch))
1901         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1902                                        STRONG_R, current_level);
1903       else
1904         {
1905           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1906              the assumption of batch-style processing; see clauses W4,
1907              W5, and especially N1, which require to look far forward
1908              (as well as back) in the buffer/string.  May the fleas of
1909              a thousand camels infest the armpits of those who design
1910              supposedly general-purpose algorithms by looking at their
1911              own implementations, and fail to consider other possible
1912              implementations!  */
1913           struct bidi_it saved_it;
1914           bidi_type_t next_type;
1915
1916           if (bidi_it->scan_dir == -1)
1917             abort ();
1918
1919           bidi_copy_it (&saved_it, bidi_it);
1920           /* Scan the text forward until we find the first non-neutral
1921              character, and then use that to resolve the neutral we
1922              are dealing with now.  We also cache the scanned iterator
1923              states, to salvage some of the effort later.  */
1924           bidi_cache_iterator_state (bidi_it, 0);
1925           do {
1926             /* Record the info about the previous character, so that
1927                it will be cached below with this state.  */
1928             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1929                 && bidi_it->type != WEAK_BN)
1930               bidi_remember_char (&bidi_it->prev, bidi_it);
1931             type = bidi_resolve_weak (bidi_it);
1932             /* Paragraph separators have their levels fully resolved
1933                at this point, so cache them as resolved.  */
1934             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1935             /* FIXME: implement L1 here, by testing for a newline and
1936                resetting the level for any sequence of whitespace
1937                characters adjacent to it.  */
1938           } while (!(type == NEUTRAL_B
1939                      || (type != WEAK_BN
1940                          && bidi_get_category (type) != NEUTRAL)
1941                      /* This is all per level run, so stop when we
1942                         reach the end of this level run.  */
1943                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1944                          != current_level)));
1945
1946           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1947
1948           switch (type)
1949             {
1950               case STRONG_L:
1951               case STRONG_R:
1952               case STRONG_AL:
1953                 /* Actually, STRONG_AL cannot happen here, because
1954                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
1955                 xassert (type != STRONG_AL);
1956                 next_type = type;
1957                 break;
1958               case WEAK_EN:
1959               case WEAK_AN:
1960                 /* N1: ``European and Arabic numbers are treated as
1961                    though they were R.''  */
1962                 next_type = STRONG_R;
1963                 break;
1964               case WEAK_BN:
1965                 if (!bidi_explicit_dir_char (bidi_it->ch))
1966                   abort ();             /* can't happen: BNs are skipped */
1967                 /* FALLTHROUGH */
1968               case NEUTRAL_B:
1969                 /* Marched all the way to the end of this level run.
1970                    We need to use the eor type, whose information is
1971                    stored by bidi_set_sor_type in the prev_for_neutral
1972                    member.  */
1973                 if (saved_it.type != WEAK_BN
1974                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1975                   next_type = bidi_it->prev_for_neutral.type;
1976                 else
1977                   {
1978                     /* This is a BN which does not adjoin neutrals.
1979                        Leave its type alone.  */
1980                     bidi_copy_it (bidi_it, &saved_it);
1981                     return bidi_it->type;
1982                   }
1983                 break;
1984               default:
1985                 abort ();
1986             }
1987           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1988                                          next_type, current_level);
1989           saved_it.next_for_neutral.type = next_type;
1990           saved_it.type = type;
1991           bidi_check_type (next_type);
1992           bidi_check_type (type);
1993           bidi_copy_it (bidi_it, &saved_it);
1994         }
1995     }
1996   return type;
1997 }
1998
1999 /* Given an iterator state in BIDI_IT, advance one character position
2000    in the buffer/string to the next character (in the logical order),
2001    resolve the bidi type of that next character, and return that
2002    type.  */
2003 static bidi_type_t
2004 bidi_type_of_next_char (struct bidi_it *bidi_it)
2005 {
2006   bidi_type_t type;
2007
2008   /* This should always be called during a forward scan.  */
2009   if (bidi_it->scan_dir != 1)
2010     abort ();
2011
2012   /* Reset the limit until which to ignore BNs if we step out of the
2013      area where we found only empty levels.  */
2014   if ((bidi_it->ignore_bn_limit > -1
2015        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2016       || (bidi_it->ignore_bn_limit == -2
2017           && !bidi_explicit_dir_char (bidi_it->ch)))
2018     bidi_it->ignore_bn_limit = -1;
2019
2020   type = bidi_resolve_neutral (bidi_it);
2021
2022   return type;
2023 }
2024
2025 /* Given an iterator state BIDI_IT, advance one character position in
2026    the buffer/string to the next character (in the current scan
2027    direction), resolve the embedding and implicit levels of that next
2028    character, and return the resulting level.  */
2029 static int
2030 bidi_level_of_next_char (struct bidi_it *bidi_it)
2031 {
2032   bidi_type_t type;
2033   int level, prev_level = -1;
2034   struct bidi_saved_info next_for_neutral;
2035   EMACS_INT next_char_pos = -2;
2036
2037   if (bidi_it->scan_dir == 1)
2038     {
2039       EMACS_INT eob
2040         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2041            ? bidi_it->string.schars : ZV);
2042
2043       /* There's no sense in trying to advance if we hit end of text.  */
2044       if (bidi_it->charpos >= eob)
2045         return bidi_it->resolved_level;
2046
2047       /* Record the info about the previous character.  */
2048       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2049           && bidi_it->type != WEAK_BN)
2050         bidi_remember_char (&bidi_it->prev, bidi_it);
2051       if (bidi_it->type_after_w1 == STRONG_R
2052           || bidi_it->type_after_w1 == STRONG_L
2053           || bidi_it->type_after_w1 == STRONG_AL)
2054         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2055       /* FIXME: it sounds like we don't need both prev and
2056          prev_for_neutral members, but I'm leaving them both for now.  */
2057       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2058           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2059         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2060
2061       /* If we overstepped the characters used for resolving neutrals
2062          and whitespace, invalidate their info in the iterator.  */
2063       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2064         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2065       if (bidi_it->next_en_pos >= 0
2066           && bidi_it->charpos >= bidi_it->next_en_pos)
2067         {
2068           bidi_it->next_en_pos = 0;
2069           bidi_it->next_en_type = UNKNOWN_BT;
2070         }
2071       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2072           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2073         bidi_it->next_for_ws.type = UNKNOWN_BT;
2074
2075       /* This must be taken before we fill the iterator with the info
2076          about the next char.  If we scan backwards, the iterator
2077          state must be already cached, so there's no need to know the
2078          embedding level of the previous character, since we will be
2079          returning to our caller shortly.  */
2080       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2081     }
2082   next_for_neutral = bidi_it->next_for_neutral;
2083
2084   /* Perhaps the character we want is already cached.  If it is, the
2085      call to bidi_cache_find below will return a type other than
2086      UNKNOWN_BT.  */
2087   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2088     {
2089       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2090                  ? 0 : 1);
2091       if (bidi_it->scan_dir > 0)
2092         {
2093           if (bidi_it->nchars <= 0)
2094             abort ();
2095           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2096         }
2097       else if (bidi_it->charpos >= bob)
2098         /* Implementation note: we allow next_char_pos to be as low as
2099            0 for buffers or -1 for strings, and that is okay because
2100            that's the "position" of the sentinel iterator state we
2101            cached at the beginning of the iteration.  */
2102         next_char_pos = bidi_it->charpos - 1;
2103       if (next_char_pos >= bob - 1)
2104         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2105       else
2106         type = UNKNOWN_BT;
2107     }
2108   else
2109     type = UNKNOWN_BT;
2110   if (type != UNKNOWN_BT)
2111     {
2112       /* Don't lose the information for resolving neutrals!  The
2113          cached states could have been cached before their
2114          next_for_neutral member was computed.  If we are on our way
2115          forward, we can simply take the info from the previous
2116          state.  */
2117       if (bidi_it->scan_dir == 1
2118           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2119         bidi_it->next_for_neutral = next_for_neutral;
2120
2121       /* If resolved_level is -1, it means this state was cached
2122          before it was completely resolved, so we cannot return
2123          it.  */
2124       if (bidi_it->resolved_level != -1)
2125         return bidi_it->resolved_level;
2126     }
2127   if (bidi_it->scan_dir == -1)
2128     /* If we are going backwards, the iterator state is already cached
2129        from previous scans, and should be fully resolved.  */
2130     abort ();
2131
2132   if (type == UNKNOWN_BT)
2133     type = bidi_type_of_next_char (bidi_it);
2134
2135   if (type == NEUTRAL_B)
2136     return bidi_it->resolved_level;
2137
2138   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2139   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2140       || (type == WEAK_BN && prev_level == level))
2141     {
2142       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2143         abort ();
2144
2145       /* If the cached state shows a neutral character, it was not
2146          resolved by bidi_resolve_neutral, so do it now.  */
2147       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2148                                      bidi_it->next_for_neutral.type,
2149                                      level);
2150     }
2151
2152   if (!(type == STRONG_R
2153         || type == STRONG_L
2154         || type == WEAK_BN
2155         || type == WEAK_EN
2156         || type == WEAK_AN))
2157     abort ();
2158   bidi_it->type = type;
2159   bidi_check_type (bidi_it->type);
2160
2161   /* For L1 below, we need to know, for each WS character, whether
2162      it belongs to a sequence of WS characters preceding a newline
2163      or a TAB or a paragraph separator.  */
2164   if (bidi_it->orig_type == NEUTRAL_WS
2165       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2166     {
2167       int ch;
2168       EMACS_INT clen = bidi_it->ch_len;
2169       EMACS_INT bpos = bidi_it->bytepos;
2170       EMACS_INT cpos = bidi_it->charpos;
2171       EMACS_INT disp_pos = bidi_it->disp_pos;
2172       EMACS_INT nc = bidi_it->nchars;
2173       struct bidi_string_data bs = bidi_it->string;
2174       bidi_type_t chtype;
2175       int fwp = bidi_it->frame_window_p;
2176       int dpp = bidi_it->disp_prop;
2177
2178       if (bidi_it->nchars <= 0)
2179         abort ();
2180       do {
2181         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2182                               fwp, &clen, &nc);
2183         if (ch == '\n' || ch == BIDI_EOB)
2184           chtype = NEUTRAL_B;
2185         else
2186           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2187       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2188                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2189       bidi_it->next_for_ws.type = chtype;
2190       bidi_check_type (bidi_it->next_for_ws.type);
2191       bidi_it->next_for_ws.charpos = cpos;
2192       bidi_it->next_for_ws.bytepos = bpos;
2193     }
2194
2195   /* Resolve implicit levels, with a twist: PDFs get the embedding
2196      level of the embedding they terminate.  See below for the
2197      reason.  */
2198   if (bidi_it->orig_type == PDF
2199       /* Don't do this if this formatting code didn't change the
2200          embedding level due to invalid or empty embeddings.  */
2201       && prev_level != level)
2202     {
2203       /* Don't look in UAX#9 for the reason for this: it's our own
2204          private quirk.  The reason is that we want the formatting
2205          codes to be delivered so that they bracket the text of their
2206          embedding.  For example, given the text
2207
2208              {RLO}teST{PDF}
2209
2210          we want it to be displayed as
2211
2212              {PDF}STet{RLO}
2213
2214          not as
2215
2216              STet{RLO}{PDF}
2217
2218          which will result because we bump up the embedding level as
2219          soon as we see the RLO and pop it as soon as we see the PDF,
2220          so RLO itself has the same embedding level as "teST", and
2221          thus would be normally delivered last, just before the PDF.
2222          The switch below fiddles with the level of PDF so that this
2223          ugly side effect does not happen.
2224
2225          (This is, of course, only important if the formatting codes
2226          are actually displayed, but Emacs does need to display them
2227          if the user wants to.)  */
2228       level = prev_level;
2229     }
2230   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2231            || bidi_it->orig_type == NEUTRAL_S
2232            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2233            || (bidi_it->orig_type == NEUTRAL_WS
2234                && (bidi_it->next_for_ws.type == NEUTRAL_B
2235                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2236     level = bidi_it->level_stack[0].level;
2237   else if ((level & 1) == 0) /* I1 */
2238     {
2239       if (type == STRONG_R)
2240         level++;
2241       else if (type == WEAK_EN || type == WEAK_AN)
2242         level += 2;
2243     }
2244   else                  /* I2 */
2245     {
2246       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2247         level++;
2248     }
2249
2250   bidi_it->resolved_level = level;
2251   return level;
2252 }
2253
2254 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2255    non-zero, we are at the end of a level, and we need to prepare to
2256    resume the scan of the lower level.
2257
2258    If this level's other edge is cached, we simply jump to it, filling
2259    the iterator structure with the iterator state on the other edge.
2260    Otherwise, we walk the buffer or string until we come back to the
2261    same level as LEVEL.
2262
2263    Note: we are not talking here about a ``level run'' in the UAX#9
2264    sense of the term, but rather about a ``level'' which includes
2265    all the levels higher than it.  In other words, given the levels
2266    like this:
2267
2268          11111112222222333333334443343222222111111112223322111
2269                 A      B                    C
2270
2271    and assuming we are at point A scanning left to right, this
2272    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2273    at point B.  */
2274 static void
2275 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2276 {
2277   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2278   ptrdiff_t idx;
2279
2280   /* Try the cache first.  */
2281   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2282       >= bidi_cache_start)
2283     bidi_cache_fetch_state (idx, bidi_it);
2284   else
2285     {
2286       int new_level;
2287
2288       if (end_flag)
2289         abort (); /* if we are at end of level, its edges must be cached */
2290
2291       bidi_cache_iterator_state (bidi_it, 1);
2292       do {
2293         new_level = bidi_level_of_next_char (bidi_it);
2294         bidi_cache_iterator_state (bidi_it, 1);
2295       } while (new_level >= level);
2296     }
2297 }
2298
2299 void
2300 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2301 {
2302   int old_level, new_level, next_level;
2303   struct bidi_it sentinel;
2304   struct gcpro gcpro1;
2305
2306   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2307     abort ();
2308
2309   if (bidi_it->scan_dir == 0)
2310     {
2311       bidi_it->scan_dir = 1;    /* default to logical order */
2312     }
2313
2314   /* The code below can call eval, and thus cause GC.  If we are
2315      iterating a Lisp string, make sure it won't be GCed.  */
2316   if (STRINGP (bidi_it->string.lstring))
2317     GCPRO1 (bidi_it->string.lstring);
2318
2319   /* If we just passed a newline, initialize for the next line.  */
2320   if (!bidi_it->first_elt
2321       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2322     bidi_line_init (bidi_it);
2323
2324   /* Prepare the sentinel iterator state, and cache it.  When we bump
2325      into it, scanning backwards, we'll know that the last non-base
2326      level is exhausted.  */
2327   if (bidi_cache_idx == bidi_cache_start)
2328     {
2329       bidi_copy_it (&sentinel, bidi_it);
2330       if (bidi_it->first_elt)
2331         {
2332           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2333           sentinel.bytepos--;
2334           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2335           sentinel.ch_len = 1;
2336           sentinel.nchars = 1;
2337         }
2338       bidi_cache_iterator_state (&sentinel, 1);
2339     }
2340
2341   old_level = bidi_it->resolved_level;
2342   new_level = bidi_level_of_next_char (bidi_it);
2343
2344   /* Reordering of resolved levels (clause L2) is implemented by
2345      jumping to the other edge of the level and flipping direction of
2346      scanning the text whenever we find a level change.  */
2347   if (new_level != old_level)
2348     {
2349       int ascending = new_level > old_level;
2350       int level_to_search = ascending ? old_level + 1 : old_level;
2351       int incr = ascending ? 1 : -1;
2352       int expected_next_level = old_level + incr;
2353
2354       /* Jump (or walk) to the other edge of this level.  */
2355       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2356       /* Switch scan direction and peek at the next character in the
2357          new direction.  */
2358       bidi_it->scan_dir = -bidi_it->scan_dir;
2359
2360       /* The following loop handles the case where the resolved level
2361          jumps by more than one.  This is typical for numbers inside a
2362          run of text with left-to-right embedding direction, but can
2363          also happen in other situations.  In those cases the decision
2364          where to continue after a level change, and in what direction,
2365          is tricky.  For example, given a text like below:
2366
2367                   abcdefgh
2368                   11336622
2369
2370          (where the numbers below the text show the resolved levels),
2371          the result of reordering according to UAX#9 should be this:
2372
2373                   efdcghba
2374
2375          This is implemented by the loop below which flips direction
2376          and jumps to the other edge of the level each time it finds
2377          the new level not to be the expected one.  The expected level
2378          is always one more or one less than the previous one.  */
2379       next_level = bidi_peek_at_next_level (bidi_it);
2380       while (next_level != expected_next_level)
2381         {
2382           expected_next_level += incr;
2383           level_to_search += incr;
2384           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2385           bidi_it->scan_dir = -bidi_it->scan_dir;
2386           next_level = bidi_peek_at_next_level (bidi_it);
2387         }
2388
2389       /* Finally, deliver the next character in the new direction.  */
2390       next_level = bidi_level_of_next_char (bidi_it);
2391     }
2392
2393   /* Take note when we have just processed the newline that precedes
2394      the end of the paragraph.  The next time we are about to be
2395      called, set_iterator_to_next will automatically reinit the
2396      paragraph direction, if needed.  We do this at the newline before
2397      the paragraph separator, because the next character might not be
2398      the first character of the next paragraph, due to the bidi
2399      reordering, whereas we _must_ know the paragraph base direction
2400      _before_ we process the paragraph's text, since the base
2401      direction affects the reordering.  */
2402   if (bidi_it->scan_dir == 1
2403       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2404     {
2405       /* The paragraph direction of the entire string, once
2406          determined, is in effect for the entire string.  Setting the
2407          separator limit to the end of the string prevents
2408          bidi_paragraph_init from being called automatically on this
2409          string.  */
2410       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2411         bidi_it->separator_limit = bidi_it->string.schars;
2412       else if (bidi_it->bytepos < ZV_BYTE)
2413         {
2414           EMACS_INT sep_len
2415             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2416                                      bidi_it->bytepos + bidi_it->ch_len);
2417           if (bidi_it->nchars <= 0)
2418             abort ();
2419           if (sep_len >= 0)
2420             {
2421               bidi_it->new_paragraph = 1;
2422               /* Record the buffer position of the last character of the
2423                  paragraph separator.  */
2424               bidi_it->separator_limit
2425                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2426             }
2427         }
2428     }
2429
2430   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2431     {
2432       /* If we are at paragraph's base embedding level and beyond the
2433          last cached position, the cache's job is done and we can
2434          discard it.  */
2435       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2436           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2437                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2438         bidi_cache_reset ();
2439         /* But as long as we are caching during forward scan, we must
2440            cache each state, or else the cache integrity will be
2441            compromised: it assumes cached states correspond to buffer
2442            positions 1:1.  */
2443       else
2444         bidi_cache_iterator_state (bidi_it, 1);
2445     }
2446
2447   if (STRINGP (bidi_it->string.lstring))
2448     UNGCPRO;
2449 }
2450
2451 /* This is meant to be called from within the debugger, whenever you
2452    wish to examine the cache contents.  */
2453 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2454 void
2455 bidi_dump_cached_states (void)
2456 {
2457   ptrdiff_t i;
2458   int ndigits = 1;
2459
2460   if (bidi_cache_idx == 0)
2461     {
2462       fprintf (stderr, "The cache is empty.\n");
2463       return;
2464     }
2465   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2466            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2467
2468   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2469     ndigits++;
2470   fputs ("ch  ", stderr);
2471   for (i = 0; i < bidi_cache_idx; i++)
2472     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2473   fputs ("\n", stderr);
2474   fputs ("lvl ", stderr);
2475   for (i = 0; i < bidi_cache_idx; i++)
2476     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2477   fputs ("\n", stderr);
2478   fputs ("pos ", stderr);
2479   for (i = 0; i < bidi_cache_idx; i++)
2480     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2481   fputs ("\n", stderr);
2482 }