src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 #if defined(EXITFREE) || defined(PROTO)
 382     void
 383 free_regexp_stuff()
 384 {
 385     vim_free(reg_prev_sub);
 386 }
 387 #endif
 388
 389 /*
 390  * REGEXP_INRANGE contains all characters which are always special in a []
 391  * range after '\'.
 392  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 393  * These are:
 394  *  \n  - New line (NL).
 395  *  \r  - Carriage Return (CR).
 396  *  \t  - Tab (TAB).
 397  *  \e  - Escape (ESC).
 398  *  \b  - Backspace (Ctrl_H).
 399  *  \d  - Character code in decimal, eg \d123
 400  *  \o  - Character code in octal, eg \o80
 401  *  \x  - Character code in hex, eg \x4a
 402  *  \u  - Multibyte character code, eg \u20ac
 403  *  \U  - Long multibyte character code, eg \U12345678
 404  */
 405 static char_u REGEXP_INRANGE[] = "]^-n\\";
 406 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 407
 408 static int      backslash_trans __ARGS((int c));
 409 static int      get_char_class __ARGS((char_u **pp));
 410 static int      get_equi_class __ARGS((char_u **pp));
 411 static void     reg_equi_class __ARGS((int c));
 412 static int      get_coll_element __ARGS((char_u **pp));
 413 static char_u   *skip_anyof __ARGS((char_u *p));
 414 static void     init_class_tab __ARGS((void));
 415
 416 /*
 417  * Translate '\x' to its control character, except "\n", which is Magic.
 418  */
 419     static int
 420 backslash_trans(c)
 421     int         c;
 422 {
 423     switch (c)
 424     {
 425         case 'r':   return CAR;
 426         case 't':   return TAB;
 427         case 'e':   return ESC;
 428         case 'b':   return BS;
 429     }
 430     return c;
 431 }
 432
 433 /*
 434  * Check for a character class name "[:name:]".  "pp" points to the '['.
 435  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 436  * recognized.  Otherwise "pp" is advanced to after the item.
 437  */
 438     static int
 439 get_char_class(pp)
 440     char_u      **pp;
 441 {
 442     static const char *(class_names[]) =
 443     {
 444         "alnum:]",
 445 #define CLASS_ALNUM 0
 446         "alpha:]",
 447 #define CLASS_ALPHA 1
 448         "blank:]",
 449 #define CLASS_BLANK 2
 450         "cntrl:]",
 451 #define CLASS_CNTRL 3
 452         "digit:]",
 453 #define CLASS_DIGIT 4
 454         "graph:]",
 455 #define CLASS_GRAPH 5
 456         "lower:]",
 457 #define CLASS_LOWER 6
 458         "print:]",
 459 #define CLASS_PRINT 7
 460         "punct:]",
 461 #define CLASS_PUNCT 8
 462         "space:]",
 463 #define CLASS_SPACE 9
 464         "upper:]",
 465 #define CLASS_UPPER 10
 466         "xdigit:]",
 467 #define CLASS_XDIGIT 11
 468         "tab:]",
 469 #define CLASS_TAB 12
 470         "return:]",
 471 #define CLASS_RETURN 13
 472         "backspace:]",
 473 #define CLASS_BACKSPACE 14
 474         "escape:]",
 475 #define CLASS_ESCAPE 15
 476     };
 477 #define CLASS_NONE 99
 478     int i;
 479
 480     if ((*pp)[1] == ':')
 481     {
 482         for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
 483             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 484             {
 485                 *pp += STRLEN(class_names[i]) + 2;
 486                 return i;
 487             }
 488     }
 489     return CLASS_NONE;
 490 }
 491
 492 /*
 493  * Specific version of character class functions.
 494  * Using a table to keep this fast.
 495  */
 496 static short    class_tab[256];
 497
 498 #define     RI_DIGIT    0x01
 499 #define     RI_HEX      0x02
 500 #define     RI_OCTAL    0x04
 501 #define     RI_WORD     0x08
 502 #define     RI_HEAD     0x10
 503 #define     RI_ALPHA    0x20
 504 #define     RI_LOWER    0x40
 505 #define     RI_UPPER    0x80
 506 #define     RI_WHITE    0x100
 507
 508     static void
 509 init_class_tab()
 510 {
 511     int         i;
 512     static int  done = FALSE;
 513
 514     if (done)
 515         return;
 516
 517     for (i = 0; i < 256; ++i)
 518     {
 519         if (i >= '0' && i <= '7')
 520             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 521         else if (i >= '8' && i <= '9')
 522             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 523         else if (i >= 'a' && i <= 'f')
 524             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 525 #ifdef EBCDIC
 526         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 527                                                     || (i >= 's' && i <= 'z'))
 528 #else
 529         else if (i >= 'g' && i <= 'z')
 530 #endif
 531             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 532         else if (i >= 'A' && i <= 'F')
 533             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 534 #ifdef EBCDIC
 535         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 536                                                     || (i >= 'S' && i <= 'Z'))
 537 #else
 538         else if (i >= 'G' && i <= 'Z')
 539 #endif
 540             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 541         else if (i == '_')
 542             class_tab[i] = RI_WORD + RI_HEAD;
 543         else
 544             class_tab[i] = 0;
 545     }
 546     class_tab[' '] |= RI_WHITE;
 547     class_tab['\t'] |= RI_WHITE;
 548     done = TRUE;
 549 }
 550
 551 #ifdef FEAT_MBYTE
 552 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 553 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 554 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 555 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 556 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 557 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 558 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 559 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 560 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 561 #else
 562 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 563 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 564 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 565 # define ri_word(c)     (class_tab[c] & RI_WORD)
 566 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 567 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 568 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 569 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 570 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 571 #endif
 572
 573 /* flags for regflags */
 574 #define RF_ICASE    1   /* ignore case */
 575 #define RF_NOICASE  2   /* don't ignore case */
 576 #define RF_HASNL    4   /* can match a NL */
 577 #define RF_ICOMBINE 8   /* ignore combining characters */
 578 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 579
 580 /*
 581  * Global work variables for vim_regcomp().
 582  */
 583
 584 static char_u   *regparse;      /* Input-scan pointer. */
 585 static int      prevchr_len;    /* byte length of previous char */
 586 static int      num_complex_braces; /* Complex \{...} count */
 587 static int      regnpar;        /* () count. */
 588 #ifdef FEAT_SYN_HL
 589 static int      regnzpar;       /* \z() count. */
 590 static int      re_has_z;       /* \z item detected */
 591 #endif
 592 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 593 static long     regsize;        /* Code size. */
 594 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 595 static unsigned regflags;       /* RF_ flags for prog */
 596 static long     brace_min[10];  /* Minimums for complex brace repeats */
 597 static long     brace_max[10];  /* Maximums for complex brace repeats */
 598 static int      brace_count[10]; /* Current counts for complex brace repeats */
 599 #if defined(FEAT_SYN_HL) || defined(PROTO)
 600 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 601 #endif
 602 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 603
 604 static int      reg_magic;      /* magicness of the pattern: */
 605 #define MAGIC_NONE      1       /* "\V" very unmagic */
 606 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 607 #define MAGIC_ON        3       /* "\m" or 'magic' */
 608 #define MAGIC_ALL       4       /* "\v" very magic */
 609
 610 static int      reg_string;     /* matching with a string instead of a buffer
 611                                    line */
 612 static int      reg_strict;     /* "[abc" is illegal */
 613
 614 /*
 615  * META contains all characters that may be magic, except '^' and '$'.
 616  */
 617
 618 #ifdef EBCDIC
 619 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 620 #else
 621 /* META[] is used often enough to justify turning it into a table. */
 622 static char_u META_flags[] = {
 623     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 624     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 625 /*                 %  &     (  )  *  +        .    */
 626     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 627 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 628     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 629 /*  @  A     C  D     F     H  I     K  L  M     O */
 630     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 631 /*  P        S     U  V  W  X     Z  [           _ */
 632     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 633 /*     a     c  d     f     h  i     k  l  m  n  o */
 634     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 635 /*  p        s     u  v  w  x     z  {  |     ~    */
 636     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 637 };
 638 #endif
 639
 640 static int      curchr;
 641
 642 /* arguments for reg() */
 643 #define REG_NOPAREN     0       /* toplevel reg() */
 644 #define REG_PAREN       1       /* \(\) */
 645 #define REG_ZPAREN      2       /* \z(\) */
 646 #define REG_NPAREN      3       /* \%(\) */
 647
 648 /*
 649  * Forward declarations for vim_regcomp()'s friends.
 650  */
 651 static void     initchr __ARGS((char_u *));
 652 static int      getchr __ARGS((void));
 653 static void     skipchr_keepstart __ARGS((void));
 654 static int      peekchr __ARGS((void));
 655 static void     skipchr __ARGS((void));
 656 static void     ungetchr __ARGS((void));
 657 static int      gethexchrs __ARGS((int maxinputlen));
 658 static int      getoctchrs __ARGS((void));
 659 static int      getdecchrs __ARGS((void));
 660 static int      coll_get_char __ARGS((void));
 661 static void     regcomp_start __ARGS((char_u *expr, int flags));
 662 static char_u   *reg __ARGS((int, int *));
 663 static char_u   *regbranch __ARGS((int *flagp));
 664 static char_u   *regconcat __ARGS((int *flagp));
 665 static char_u   *regpiece __ARGS((int *));
 666 static char_u   *regatom __ARGS((int *));
 667 static char_u   *regnode __ARGS((int));
 668 #ifdef FEAT_MBYTE
 669 static int      use_multibytecode __ARGS((int c));
 670 #endif
 671 static int      prog_magic_wrong __ARGS((void));
 672 static char_u   *regnext __ARGS((char_u *));
 673 static void     regc __ARGS((int b));
 674 #ifdef FEAT_MBYTE
 675 static void     regmbc __ARGS((int c));
 676 #else
 677 # define regmbc(c) regc(c)
 678 #endif
 679 static void     reginsert __ARGS((int, char_u *));
 680 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 681 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 682 static int      read_limits __ARGS((long *, long *));
 683 static void     regtail __ARGS((char_u *, char_u *));
 684 static void     regoptail __ARGS((char_u *, char_u *));
 685
 686 /*
 687  * Return TRUE if compiled regular expression "prog" can match a line break.
 688  */
 689     int
 690 re_multiline(prog)
 691     regprog_T *prog;
 692 {
 693     return (prog->regflags & RF_HASNL);
 694 }
 695
 696 /*
 697  * Return TRUE if compiled regular expression "prog" looks before the start
 698  * position (pattern contains "\@<=" or "\@<!").
 699  */
 700     int
 701 re_lookbehind(prog)
 702     regprog_T *prog;
 703 {
 704     return (prog->regflags & RF_LOOKBH);
 705 }
 706
 707 /*
 708  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 709  * Returns a character representing the class. Zero means that no item was
 710  * recognized.  Otherwise "pp" is advanced to after the item.
 711  */
 712     static int
 713 get_equi_class(pp)
 714     char_u      **pp;
 715 {
 716     int         c;
 717     int         l = 1;
 718     char_u      *p = *pp;
 719
 720     if (p[1] == '=')
 721     {
 722 #ifdef FEAT_MBYTE
 723         if (has_mbyte)
 724             l = (*mb_ptr2len)(p + 2);
 725 #endif
 726         if (p[l + 2] == '=' && p[l + 3] == ']')
 727         {
 728 #ifdef FEAT_MBYTE
 729             if (has_mbyte)
 730                 c = mb_ptr2char(p + 2);
 731             else
 732 #endif
 733                 c = p[2];
 734             *pp += l + 4;
 735             return c;
 736         }
 737     }
 738     return 0;
 739 }
 740
 741 /*
 742  * Produce the bytes for equivalence class "c".
 743  * Currently only handles latin1, latin9 and utf-8.
 744  */
 745     static void
 746 reg_equi_class(c)
 747     int     c;
 748 {
 749 #ifdef FEAT_MBYTE
 750     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 751                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 752 #endif
 753     {
 754         switch (c)
 755         {
 756             case 'A': case '\300': case '\301': case '\302':
 757             case '\303': case '\304': case '\305':
 758                       regmbc('A'); regmbc('\300'); regmbc('\301');
 759                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 760                       regmbc('\305');
 761                       return;
 762             case 'C': case '\307':
 763                       regmbc('C'); regmbc('\307');
 764                       return;
 765             case 'E': case '\310': case '\311': case '\312': case '\313':
 766                       regmbc('E'); regmbc('\310'); regmbc('\311');
 767                       regmbc('\312'); regmbc('\313');
 768                       return;
 769             case 'I': case '\314': case '\315': case '\316': case '\317':
 770                       regmbc('I'); regmbc('\314'); regmbc('\315');
 771                       regmbc('\316'); regmbc('\317');
 772                       return;
 773             case 'N': case '\321':
 774                       regmbc('N'); regmbc('\321');
 775                       return;
 776             case 'O': case '\322': case '\323': case '\324': case '\325':
 777             case '\326':
 778                       regmbc('O'); regmbc('\322'); regmbc('\323');
 779                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 780                       return;
 781             case 'U': case '\331': case '\332': case '\333': case '\334':
 782                       regmbc('U'); regmbc('\331'); regmbc('\332');
 783                       regmbc('\333'); regmbc('\334');
 784                       return;
 785             case 'Y': case '\335':
 786                       regmbc('Y'); regmbc('\335');
 787                       return;
 788             case 'a': case '\340': case '\341': case '\342':
 789             case '\343': case '\344': case '\345':
 790                       regmbc('a'); regmbc('\340'); regmbc('\341');
 791                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 792                       regmbc('\345');
 793                       return;
 794             case 'c': case '\347':
 795                       regmbc('c'); regmbc('\347');
 796                       return;
 797             case 'e': case '\350': case '\351': case '\352': case '\353':
 798                       regmbc('e'); regmbc('\350'); regmbc('\351');
 799                       regmbc('\352'); regmbc('\353');
 800                       return;
 801             case 'i': case '\354': case '\355': case '\356': case '\357':
 802                       regmbc('i'); regmbc('\354'); regmbc('\355');
 803                       regmbc('\356'); regmbc('\357');
 804                       return;
 805             case 'n': case '\361':
 806                       regmbc('n'); regmbc('\361');
 807                       return;
 808             case 'o': case '\362': case '\363': case '\364': case '\365':
 809             case '\366':
 810                       regmbc('o'); regmbc('\362'); regmbc('\363');
 811                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 812                       return;
 813             case 'u': case '\371': case '\372': case '\373': case '\374':
 814                       regmbc('u'); regmbc('\371'); regmbc('\372');
 815                       regmbc('\373'); regmbc('\374');
 816                       return;
 817             case 'y': case '\375': case '\377':
 818                       regmbc('y'); regmbc('\375'); regmbc('\377');
 819                       return;
 820         }
 821     }
 822     regmbc(c);
 823 }
 824
 825 /*
 826  * Check for a collating element "[.a.]".  "pp" points to the '['.
 827  * Returns a character. Zero means that no item was recognized.  Otherwise
 828  * "pp" is advanced to after the item.
 829  * Currently only single characters are recognized!
 830  */
 831     static int
 832 get_coll_element(pp)
 833     char_u      **pp;
 834 {
 835     int         c;
 836     int         l = 1;
 837     char_u      *p = *pp;
 838
 839     if (p[1] == '.')
 840     {
 841 #ifdef FEAT_MBYTE
 842         if (has_mbyte)
 843             l = (*mb_ptr2len)(p + 2);
 844 #endif
 845         if (p[l + 2] == '.' && p[l + 3] == ']')
 846         {
 847 #ifdef FEAT_MBYTE
 848             if (has_mbyte)
 849                 c = mb_ptr2char(p + 2);
 850             else
 851 #endif
 852                 c = p[2];
 853             *pp += l + 4;
 854             return c;
 855         }
 856     }
 857     return 0;
 858 }
 859
 860
 861 /*
 862  * Skip over a "[]" range.
 863  * "p" must point to the character after the '['.
 864  * The returned pointer is on the matching ']', or the terminating NUL.
 865  */
 866     static char_u *
 867 skip_anyof(p)
 868     char_u      *p;
 869 {
 870     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 871     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 872 #ifdef FEAT_MBYTE
 873     int         l;
 874 #endif
 875
 876     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 877     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 878
 879     if (*p == '^')      /* Complement of range. */
 880         ++p;
 881     if (*p == ']' || *p == '-')
 882         ++p;
 883     while (*p != NUL && *p != ']')
 884     {
 885 #ifdef FEAT_MBYTE
 886         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 887             p += l;
 888         else
 889 #endif
 890             if (*p == '-')
 891             {
 892                 ++p;
 893                 if (*p != ']' && *p != NUL)
 894                     mb_ptr_adv(p);
 895             }
 896         else if (*p == '\\'
 897                 && !cpo_bsl
 898                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 899                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 900             p += 2;
 901         else if (*p == '[')
 902         {
 903             if (get_char_class(&p) == CLASS_NONE
 904                     && get_equi_class(&p) == 0
 905                     && get_coll_element(&p) == 0)
 906                 ++p; /* It was not a class name */
 907         }
 908         else
 909             ++p;
 910     }
 911
 912     return p;
 913 }
 914
 915 /*
 916  * Skip past regular expression.
 917  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 918  * Take care of characters with a backslash in front of it.
 919  * Skip strings inside [ and ].
 920  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 921  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 922  * is changed in-place.
 923  */
 924     char_u *
 925 skip_regexp(startp, dirc, magic, newp)
 926     char_u      *startp;
 927     int         dirc;
 928     int         magic;
 929     char_u      **newp;
 930 {
 931     int         mymagic;
 932     char_u      *p = startp;
 933
 934     if (magic)
 935         mymagic = MAGIC_ON;
 936     else
 937         mymagic = MAGIC_OFF;
 938
 939     for (; p[0] != NUL; mb_ptr_adv(p))
 940     {
 941         if (p[0] == dirc)       /* found end of regexp */
 942             break;
 943         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 944                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 945         {
 946             p = skip_anyof(p + 1);
 947             if (p[0] == NUL)
 948                 break;
 949         }
 950         else if (p[0] == '\\' && p[1] != NUL)
 951         {
 952             if (dirc == '?' && newp != NULL && p[1] == '?')
 953             {
 954                 /* change "\?" to "?", make a copy first. */
 955                 if (*newp == NULL)
 956                 {
 957                     *newp = vim_strsave(startp);
 958                     if (*newp != NULL)
 959                         p = *newp + (p - startp);
 960                 }
 961                 if (*newp != NULL)
 962                     mch_memmove(p, p + 1, STRLEN(p));
 963                 else
 964                     ++p;
 965             }
 966             else
 967                 ++p;    /* skip next character */
 968             if (*p == 'v')
 969                 mymagic = MAGIC_ALL;
 970             else if (*p == 'V')
 971                 mymagic = MAGIC_NONE;
 972         }
 973     }
 974     return p;
 975 }
 976
 977 /*
 978  * vim_regcomp() - compile a regular expression into internal code
 979  * Returns the program in allocated space.  Returns NULL for an error.
 980  *
 981  * We can't allocate space until we know how big the compiled form will be,
 982  * but we can't compile it (and thus know how big it is) until we've got a
 983  * place to put the code.  So we cheat:  we compile it twice, once with code
 984  * generation turned off and size counting turned on, and once "for real".
 985  * This also means that we don't allocate space until we are sure that the
 986  * thing really will compile successfully, and we never have to move the
 987  * code and thus invalidate pointers into it.  (Note that it has to be in
 988  * one piece because vim_free() must be able to free it all.)
 989  *
 990  * Whether upper/lower case is to be ignored is decided when executing the
 991  * program, it does not matter here.
 992  *
 993  * Beware that the optimization-preparation code in here knows about some
 994  * of the structure of the compiled regexp.
 995  * "re_flags": RE_MAGIC and/or RE_STRING.
 996  */
 997     regprog_T *
 998 vim_regcomp(expr, re_flags)
 999     char_u      *expr;
1000     int         re_flags;
1001 {
1002     regprog_T   *r;
1003     char_u      *scan;
1004     char_u      *longest;
1005     int         len;
1006     int         flags;
1007
1008     if (expr == NULL)
1009         EMSG_RET_NULL(_(e_null));
1010
1011     init_class_tab();
1012
1013     /*
1014      * First pass: determine size, legality.
1015      */
1016     regcomp_start(expr, re_flags);
1017     regcode = JUST_CALC_SIZE;
1018     regc(REGMAGIC);
1019     if (reg(REG_NOPAREN, &flags) == NULL)
1020         return NULL;
1021
1022     /* Small enough for pointer-storage convention? */
1023 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1024     if (regsize >= 65536L - 256L)
1025         EMSG_RET_NULL(_("E339: Pattern too long"));
1026 #endif
1027
1028     /* Allocate space. */
1029     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1030     if (r == NULL)
1031         return NULL;
1032
1033     /*
1034      * Second pass: emit code.
1035      */
1036     regcomp_start(expr, re_flags);
1037     regcode = r->program;
1038     regc(REGMAGIC);
1039     if (reg(REG_NOPAREN, &flags) == NULL)
1040     {
1041         vim_free(r);
1042         return NULL;
1043     }
1044
1045     /* Dig out information for optimizations. */
1046     r->regstart = NUL;          /* Worst-case defaults. */
1047     r->reganch = 0;
1048     r->regmust = NULL;
1049     r->regmlen = 0;
1050     r->regflags = regflags;
1051     if (flags & HASNL)
1052         r->regflags |= RF_HASNL;
1053     if (flags & HASLOOKBH)
1054         r->regflags |= RF_LOOKBH;
1055 #ifdef FEAT_SYN_HL
1056     /* Remember whether this pattern has any \z specials in it. */
1057     r->reghasz = re_has_z;
1058 #endif
1059     scan = r->program + 1;      /* First BRANCH. */
1060     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1061     {
1062         scan = OPERAND(scan);
1063
1064         /* Starting-point info. */
1065         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1066         {
1067             r->reganch++;
1068             scan = regnext(scan);
1069         }
1070
1071         if (OP(scan) == EXACTLY)
1072         {
1073 #ifdef FEAT_MBYTE
1074             if (has_mbyte)
1075                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1076             else
1077 #endif
1078                 r->regstart = *OPERAND(scan);
1079         }
1080         else if ((OP(scan) == BOW
1081                     || OP(scan) == EOW
1082                     || OP(scan) == NOTHING
1083                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1084                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1085                  && OP(regnext(scan)) == EXACTLY)
1086         {
1087 #ifdef FEAT_MBYTE
1088             if (has_mbyte)
1089                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1090             else
1091 #endif
1092                 r->regstart = *OPERAND(regnext(scan));
1093         }
1094
1095         /*
1096          * If there's something expensive in the r.e., find the longest
1097          * literal string that must appear and make it the regmust.  Resolve
1098          * ties in favor of later strings, since the regstart check works
1099          * with the beginning of the r.e. and avoiding duplication
1100          * strengthens checking.  Not a strong reason, but sufficient in the
1101          * absence of others.
1102          */
1103         /*
1104          * When the r.e. starts with BOW, it is faster to look for a regmust
1105          * first. Used a lot for "#" and "*" commands. (Added by mool).
1106          */
1107         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1108                                                           && !(flags & HASNL))
1109         {
1110             longest = NULL;
1111             len = 0;
1112             for (; scan != NULL; scan = regnext(scan))
1113                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1114                 {
1115                     longest = OPERAND(scan);
1116                     len = (int)STRLEN(OPERAND(scan));
1117                 }
1118             r->regmust = longest;
1119             r->regmlen = len;
1120         }
1121     }
1122 #ifdef DEBUG
1123     regdump(expr, r);
1124 #endif
1125     return r;
1126 }
1127
1128 /*
1129  * Setup to parse the regexp.  Used once to get the length and once to do it.
1130  */
1131     static void
1132 regcomp_start(expr, re_flags)
1133     char_u      *expr;
1134     int         re_flags;           /* see vim_regcomp() */
1135 {
1136     initchr(expr);
1137     if (re_flags & RE_MAGIC)
1138         reg_magic = MAGIC_ON;
1139     else
1140         reg_magic = MAGIC_OFF;
1141     reg_string = (re_flags & RE_STRING);
1142     reg_strict = (re_flags & RE_STRICT);
1143
1144     num_complex_braces = 0;
1145     regnpar = 1;
1146     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1147 #ifdef FEAT_SYN_HL
1148     regnzpar = 1;
1149     re_has_z = 0;
1150 #endif
1151     regsize = 0L;
1152     regflags = 0;
1153 #if defined(FEAT_SYN_HL) || defined(PROTO)
1154     had_eol = FALSE;
1155 #endif
1156 }
1157
1158 #if defined(FEAT_SYN_HL) || defined(PROTO)
1159 /*
1160  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1161  * found.  This is messy, but it works fine.
1162  */
1163     int
1164 vim_regcomp_had_eol()
1165 {
1166     return had_eol;
1167 }
1168 #endif
1169
1170 /*
1171  * reg - regular expression, i.e. main body or parenthesized thing
1172  *
1173  * Caller must absorb opening parenthesis.
1174  *
1175  * Combining parenthesis handling with the base level of regular expression
1176  * is a trifle forced, but the need to tie the tails of the branches to what
1177  * follows makes it hard to avoid.
1178  */
1179     static char_u *
1180 reg(paren, flagp)
1181     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1182     int         *flagp;
1183 {
1184     char_u      *ret;
1185     char_u      *br;
1186     char_u      *ender;
1187     int         parno = 0;
1188     int         flags;
1189
1190     *flagp = HASWIDTH;          /* Tentatively. */
1191
1192 #ifdef FEAT_SYN_HL
1193     if (paren == REG_ZPAREN)
1194     {
1195         /* Make a ZOPEN node. */
1196         if (regnzpar >= NSUBEXP)
1197             EMSG_RET_NULL(_("E50: Too many \\z("));
1198         parno = regnzpar;
1199         regnzpar++;
1200         ret = regnode(ZOPEN + parno);
1201     }
1202     else
1203 #endif
1204         if (paren == REG_PAREN)
1205     {
1206         /* Make a MOPEN node. */
1207         if (regnpar >= NSUBEXP)
1208             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1209         parno = regnpar;
1210         ++regnpar;
1211         ret = regnode(MOPEN + parno);
1212     }
1213     else if (paren == REG_NPAREN)
1214     {
1215         /* Make a NOPEN node. */
1216         ret = regnode(NOPEN);
1217     }
1218     else
1219         ret = NULL;
1220
1221     /* Pick up the branches, linking them together. */
1222     br = regbranch(&flags);
1223     if (br == NULL)
1224         return NULL;
1225     if (ret != NULL)
1226         regtail(ret, br);       /* [MZ]OPEN -> first. */
1227     else
1228         ret = br;
1229     /* If one of the branches can be zero-width, the whole thing can.
1230      * If one of the branches has * at start or matches a line-break, the
1231      * whole thing can. */
1232     if (!(flags & HASWIDTH))
1233         *flagp &= ~HASWIDTH;
1234     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1235     while (peekchr() == Magic('|'))
1236     {
1237         skipchr();
1238         br = regbranch(&flags);
1239         if (br == NULL)
1240             return NULL;
1241         regtail(ret, br);       /* BRANCH -> BRANCH. */
1242         if (!(flags & HASWIDTH))
1243             *flagp &= ~HASWIDTH;
1244         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1245     }
1246
1247     /* Make a closing node, and hook it on the end. */
1248     ender = regnode(
1249 #ifdef FEAT_SYN_HL
1250             paren == REG_ZPAREN ? ZCLOSE + parno :
1251 #endif
1252             paren == REG_PAREN ? MCLOSE + parno :
1253             paren == REG_NPAREN ? NCLOSE : END);
1254     regtail(ret, ender);
1255
1256     /* Hook the tails of the branches to the closing node. */
1257     for (br = ret; br != NULL; br = regnext(br))
1258         regoptail(br, ender);
1259
1260     /* Check for proper termination. */
1261     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1262     {
1263 #ifdef FEAT_SYN_HL
1264         if (paren == REG_ZPAREN)
1265             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1266         else
1267 #endif
1268             if (paren == REG_NPAREN)
1269             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1270         else
1271             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1272     }
1273     else if (paren == REG_NOPAREN && peekchr() != NUL)
1274     {
1275         if (curchr == Magic(')'))
1276             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1277         else
1278             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1279         /* NOTREACHED */
1280     }
1281     /*
1282      * Here we set the flag allowing back references to this set of
1283      * parentheses.
1284      */
1285     if (paren == REG_PAREN)
1286         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1287     return ret;
1288 }
1289
1290 /*
1291  * regbranch - one alternative of an | operator
1292  *
1293  * Implements the & operator.
1294  */
1295     static char_u *
1296 regbranch(flagp)
1297     int         *flagp;
1298 {
1299     char_u      *ret;
1300     char_u      *chain = NULL;
1301     char_u      *latest;
1302     int         flags;
1303
1304     *flagp = WORST | HASNL;             /* Tentatively. */
1305
1306     ret = regnode(BRANCH);
1307     for (;;)
1308     {
1309         latest = regconcat(&flags);
1310         if (latest == NULL)
1311             return NULL;
1312         /* If one of the branches has width, the whole thing has.  If one of
1313          * the branches anchors at start-of-line, the whole thing does.
1314          * If one of the branches uses look-behind, the whole thing does. */
1315         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1316         /* If one of the branches doesn't match a line-break, the whole thing
1317          * doesn't. */
1318         *flagp &= ~HASNL | (flags & HASNL);
1319         if (chain != NULL)
1320             regtail(chain, latest);
1321         if (peekchr() != Magic('&'))
1322             break;
1323         skipchr();
1324         regtail(latest, regnode(END)); /* operand ends */
1325         reginsert(MATCH, latest);
1326         chain = latest;
1327     }
1328
1329     return ret;
1330 }
1331
1332 /*
1333  * regbranch - one alternative of an | or & operator
1334  *
1335  * Implements the concatenation operator.
1336  */
1337     static char_u *
1338 regconcat(flagp)
1339     int         *flagp;
1340 {
1341     char_u      *first = NULL;
1342     char_u      *chain = NULL;
1343     char_u      *latest;
1344     int         flags;
1345     int         cont = TRUE;
1346
1347     *flagp = WORST;             /* Tentatively. */
1348
1349     while (cont)
1350     {
1351         switch (peekchr())
1352         {
1353             case NUL:
1354             case Magic('|'):
1355             case Magic('&'):
1356             case Magic(')'):
1357                             cont = FALSE;
1358                             break;
1359             case Magic('Z'):
1360 #ifdef FEAT_MBYTE
1361                             regflags |= RF_ICOMBINE;
1362 #endif
1363                             skipchr_keepstart();
1364                             break;
1365             case Magic('c'):
1366                             regflags |= RF_ICASE;
1367                             skipchr_keepstart();
1368                             break;
1369             case Magic('C'):
1370                             regflags |= RF_NOICASE;
1371                             skipchr_keepstart();
1372                             break;
1373             case Magic('v'):
1374                             reg_magic = MAGIC_ALL;
1375                             skipchr_keepstart();
1376                             curchr = -1;
1377                             break;
1378             case Magic('m'):
1379                             reg_magic = MAGIC_ON;
1380                             skipchr_keepstart();
1381                             curchr = -1;
1382                             break;
1383             case Magic('M'):
1384                             reg_magic = MAGIC_OFF;
1385                             skipchr_keepstart();
1386                             curchr = -1;
1387                             break;
1388             case Magic('V'):
1389                             reg_magic = MAGIC_NONE;
1390                             skipchr_keepstart();
1391                             curchr = -1;
1392                             break;
1393             default:
1394                             latest = regpiece(&flags);
1395                             if (latest == NULL)
1396                                 return NULL;
1397                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1398                             if (chain == NULL)  /* First piece. */
1399                                 *flagp |= flags & SPSTART;
1400                             else
1401                                 regtail(chain, latest);
1402                             chain = latest;
1403                             if (first == NULL)
1404                                 first = latest;
1405                             break;
1406         }
1407     }
1408     if (first == NULL)          /* Loop ran zero times. */
1409         first = regnode(NOTHING);
1410     return first;
1411 }
1412
1413 /*
1414  * regpiece - something followed by possible [*+=]
1415  *
1416  * Note that the branching code sequences used for = and the general cases
1417  * of * and + are somewhat optimized:  they use the same NOTHING node as
1418  * both the endmarker for their branch list and the body of the last branch.
1419  * It might seem that this node could be dispensed with entirely, but the
1420  * endmarker role is not redundant.
1421  */
1422     static char_u *
1423 regpiece(flagp)
1424     int             *flagp;
1425 {
1426     char_u          *ret;
1427     int             op;
1428     char_u          *next;
1429     int             flags;
1430     long            minval;
1431     long            maxval;
1432
1433     ret = regatom(&flags);
1434     if (ret == NULL)
1435         return NULL;
1436
1437     op = peekchr();
1438     if (re_multi_type(op) == NOT_MULTI)
1439     {
1440         *flagp = flags;
1441         return ret;
1442     }
1443     /* default flags */
1444     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1445
1446     skipchr();
1447     switch (op)
1448     {
1449         case Magic('*'):
1450             if (flags & SIMPLE)
1451                 reginsert(STAR, ret);
1452             else
1453             {
1454                 /* Emit x* as (x&|), where & means "self". */
1455                 reginsert(BRANCH, ret); /* Either x */
1456                 regoptail(ret, regnode(BACK));  /* and loop */
1457                 regoptail(ret, ret);    /* back */
1458                 regtail(ret, regnode(BRANCH));  /* or */
1459                 regtail(ret, regnode(NOTHING)); /* null. */
1460             }
1461             break;
1462
1463         case Magic('+'):
1464             if (flags & SIMPLE)
1465                 reginsert(PLUS, ret);
1466             else
1467             {
1468                 /* Emit x+ as x(&|), where & means "self". */
1469                 next = regnode(BRANCH); /* Either */
1470                 regtail(ret, next);
1471                 regtail(regnode(BACK), ret);    /* loop back */
1472                 regtail(next, regnode(BRANCH)); /* or */
1473                 regtail(ret, regnode(NOTHING)); /* null. */
1474             }
1475             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1476             break;
1477
1478         case Magic('@'):
1479             {
1480                 int     lop = END;
1481
1482                 switch (no_Magic(getchr()))
1483                 {
1484                     case '=': lop = MATCH; break;                 /* \@= */
1485                     case '!': lop = NOMATCH; break;               /* \@! */
1486                     case '>': lop = SUBPAT; break;                /* \@> */
1487                     case '<': switch (no_Magic(getchr()))
1488                               {
1489                                   case '=': lop = BEHIND; break;   /* \@<= */
1490                                   case '!': lop = NOBEHIND; break; /* \@<! */
1491                               }
1492                 }
1493                 if (lop == END)
1494                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1495                                                       reg_magic == MAGIC_ALL);
1496                 /* Look behind must match with behind_pos. */
1497                 if (lop == BEHIND || lop == NOBEHIND)
1498                 {
1499                     regtail(ret, regnode(BHPOS));
1500                     *flagp |= HASLOOKBH;
1501                 }
1502                 regtail(ret, regnode(END)); /* operand ends */
1503                 reginsert(lop, ret);
1504                 break;
1505             }
1506
1507         case Magic('?'):
1508         case Magic('='):
1509             /* Emit x= as (x|) */
1510             reginsert(BRANCH, ret);             /* Either x */
1511             regtail(ret, regnode(BRANCH));      /* or */
1512             next = regnode(NOTHING);            /* null. */
1513             regtail(ret, next);
1514             regoptail(ret, next);
1515             break;
1516
1517         case Magic('{'):
1518             if (!read_limits(&minval, &maxval))
1519                 return NULL;
1520             if (flags & SIMPLE)
1521             {
1522                 reginsert(BRACE_SIMPLE, ret);
1523                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1524             }
1525             else
1526             {
1527                 if (num_complex_braces >= 10)
1528                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1529                                                       reg_magic == MAGIC_ALL);
1530                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1531                 regoptail(ret, regnode(BACK));
1532                 regoptail(ret, ret);
1533                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1534                 ++num_complex_braces;
1535             }
1536             if (minval > 0 && maxval > 0)
1537                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1538             break;
1539     }
1540     if (re_multi_type(peekchr()) != NOT_MULTI)
1541     {
1542         /* Can't have a multi follow a multi. */
1543         if (peekchr() == Magic('*'))
1544             sprintf((char *)IObuff, _("E61: Nested %s*"),
1545                                             reg_magic >= MAGIC_ON ? "" : "\\");
1546         else
1547             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1548                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1549         EMSG_RET_NULL(IObuff);
1550     }
1551
1552     return ret;
1553 }
1554
1555 /*
1556  * regatom - the lowest level
1557  *
1558  * Optimization:  gobbles an entire sequence of ordinary characters so that
1559  * it can turn them into a single node, which is smaller to store and
1560  * faster to run.  Don't do this when one_exactly is set.
1561  */
1562     static char_u *
1563 regatom(flagp)
1564     int            *flagp;
1565 {
1566     char_u          *ret;
1567     int             flags;
1568     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1569     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1570     int             c;
1571     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1572     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1573                                     FNAME, SFNAME, PRINT, SPRINT,
1574                                     WHITE, NWHITE, DIGIT, NDIGIT,
1575                                     HEX, NHEX, OCTAL, NOCTAL,
1576                                     WORD, NWORD, HEAD, NHEAD,
1577                                     ALPHA, NALPHA, LOWER, NLOWER,
1578                                     UPPER, NUPPER
1579                                     };
1580     char_u          *p;
1581     int             extra = 0;
1582
1583     *flagp = WORST;             /* Tentatively. */
1584     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1585     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1586
1587     c = getchr();
1588     switch (c)
1589     {
1590       case Magic('^'):
1591         ret = regnode(BOL);
1592         break;
1593
1594       case Magic('$'):
1595         ret = regnode(EOL);
1596 #if defined(FEAT_SYN_HL) || defined(PROTO)
1597         had_eol = TRUE;
1598 #endif
1599         break;
1600
1601       case Magic('<'):
1602         ret = regnode(BOW);
1603         break;
1604
1605       case Magic('>'):
1606         ret = regnode(EOW);
1607         break;
1608
1609       case Magic('_'):
1610         c = no_Magic(getchr());
1611         if (c == '^')           /* "\_^" is start-of-line */
1612         {
1613             ret = regnode(BOL);
1614             break;
1615         }
1616         if (c == '$')           /* "\_$" is end-of-line */
1617         {
1618             ret = regnode(EOL);
1619 #if defined(FEAT_SYN_HL) || defined(PROTO)
1620             had_eol = TRUE;
1621 #endif
1622             break;
1623         }
1624
1625         extra = ADD_NL;
1626         *flagp |= HASNL;
1627
1628         /* "\_[" is character range plus newline */
1629         if (c == '[')
1630             goto collection;
1631
1632         /* "\_x" is character class plus newline */
1633         /*FALLTHROUGH*/
1634
1635         /*
1636          * Character classes.
1637          */
1638       case Magic('.'):
1639       case Magic('i'):
1640       case Magic('I'):
1641       case Magic('k'):
1642       case Magic('K'):
1643       case Magic('f'):
1644       case Magic('F'):
1645       case Magic('p'):
1646       case Magic('P'):
1647       case Magic('s'):
1648       case Magic('S'):
1649       case Magic('d'):
1650       case Magic('D'):
1651       case Magic('x'):
1652       case Magic('X'):
1653       case Magic('o'):
1654       case Magic('O'):
1655       case Magic('w'):
1656       case Magic('W'):
1657       case Magic('h'):
1658       case Magic('H'):
1659       case Magic('a'):
1660       case Magic('A'):
1661       case Magic('l'):
1662       case Magic('L'):
1663       case Magic('u'):
1664       case Magic('U'):
1665         p = vim_strchr(classchars, no_Magic(c));
1666         if (p == NULL)
1667             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1668 #ifdef FEAT_MBYTE
1669         /* When '.' is followed by a composing char ignore the dot, so that
1670          * the composing char is matched here. */
1671         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1672         {
1673             c = getchr();
1674             goto do_multibyte;
1675         }
1676 #endif
1677         ret = regnode(classcodes[p - classchars] + extra);
1678         *flagp |= HASWIDTH | SIMPLE;
1679         break;
1680
1681       case Magic('n'):
1682         if (reg_string)
1683         {
1684             /* In a string "\n" matches a newline character. */
1685             ret = regnode(EXACTLY);
1686             regc(NL);
1687             regc(NUL);
1688             *flagp |= HASWIDTH | SIMPLE;
1689         }
1690         else
1691         {
1692             /* In buffer text "\n" matches the end of a line. */
1693             ret = regnode(NEWL);
1694             *flagp |= HASWIDTH | HASNL;
1695         }
1696         break;
1697
1698       case Magic('('):
1699         if (one_exactly)
1700             EMSG_ONE_RET_NULL;
1701         ret = reg(REG_PAREN, &flags);
1702         if (ret == NULL)
1703             return NULL;
1704         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1705         break;
1706
1707       case NUL:
1708       case Magic('|'):
1709       case Magic('&'):
1710       case Magic(')'):
1711         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1712         /* NOTREACHED */
1713
1714       case Magic('='):
1715       case Magic('?'):
1716       case Magic('+'):
1717       case Magic('@'):
1718       case Magic('{'):
1719       case Magic('*'):
1720         c = no_Magic(c);
1721         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1722                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1723                 ? "" : "\\", c);
1724         EMSG_RET_NULL(IObuff);
1725         /* NOTREACHED */
1726
1727       case Magic('~'):          /* previous substitute pattern */
1728             if (reg_prev_sub != NULL)
1729             {
1730                 char_u      *lp;
1731
1732                 ret = regnode(EXACTLY);
1733                 lp = reg_prev_sub;
1734                 while (*lp != NUL)
1735                     regc(*lp++);
1736                 regc(NUL);
1737                 if (*reg_prev_sub != NUL)
1738                 {
1739                     *flagp |= HASWIDTH;
1740                     if ((lp - reg_prev_sub) == 1)
1741                         *flagp |= SIMPLE;
1742                 }
1743             }
1744             else
1745                 EMSG_RET_NULL(_(e_nopresub));
1746             break;
1747
1748       case Magic('1'):
1749       case Magic('2'):
1750       case Magic('3'):
1751       case Magic('4'):
1752       case Magic('5'):
1753       case Magic('6'):
1754       case Magic('7'):
1755       case Magic('8'):
1756       case Magic('9'):
1757             {
1758                 int                 refnum;
1759
1760                 refnum = c - Magic('0');
1761                 /*
1762                  * Check if the back reference is legal. We must have seen the
1763                  * close brace.
1764                  * TODO: Should also check that we don't refer to something
1765                  * that is repeated (+*=): what instance of the repetition
1766                  * should we match?
1767                  */
1768                 if (!had_endbrace[refnum])
1769                 {
1770                     /* Trick: check if "@<=" or "@<!" follows, in which case
1771                      * the \1 can appear before the referenced match. */
1772                     for (p = regparse; *p != NUL; ++p)
1773                         if (p[0] == '@' && p[1] == '<'
1774                                               && (p[2] == '!' || p[2] == '='))
1775                             break;
1776                     if (*p == NUL)
1777                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1778                 }
1779                 ret = regnode(BACKREF + refnum);
1780             }
1781             break;
1782
1783       case Magic('z'):
1784         {
1785             c = no_Magic(getchr());
1786             switch (c)
1787             {
1788 #ifdef FEAT_SYN_HL
1789                 case '(': if (reg_do_extmatch != REX_SET)
1790                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1791                           if (one_exactly)
1792                               EMSG_ONE_RET_NULL;
1793                           ret = reg(REG_ZPAREN, &flags);
1794                           if (ret == NULL)
1795                               return NULL;
1796                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1797                           re_has_z = REX_SET;
1798                           break;
1799
1800                 case '1':
1801                 case '2':
1802                 case '3':
1803                 case '4':
1804                 case '5':
1805                 case '6':
1806                 case '7':
1807                 case '8':
1808                 case '9': if (reg_do_extmatch != REX_USE)
1809                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1810                           ret = regnode(ZREF + c - '0');
1811                           re_has_z = REX_USE;
1812                           break;
1813 #endif
1814
1815                 case 's': ret = regnode(MOPEN + 0);
1816                           break;
1817
1818                 case 'e': ret = regnode(MCLOSE + 0);
1819                           break;
1820
1821                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1822             }
1823         }
1824         break;
1825
1826       case Magic('%'):
1827         {
1828             c = no_Magic(getchr());
1829             switch (c)
1830             {
1831                 /* () without a back reference */
1832                 case '(':
1833                     if (one_exactly)
1834                         EMSG_ONE_RET_NULL;
1835                     ret = reg(REG_NPAREN, &flags);
1836                     if (ret == NULL)
1837                         return NULL;
1838                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1839                     break;
1840
1841                 /* Catch \%^ and \%$ regardless of where they appear in the
1842                  * pattern -- regardless of whether or not it makes sense. */
1843                 case '^':
1844                     ret = regnode(RE_BOF);
1845                     break;
1846
1847                 case '$':
1848                     ret = regnode(RE_EOF);
1849                     break;
1850
1851                 case '#':
1852                     ret = regnode(CURSOR);
1853                     break;
1854
1855                 case 'V':
1856                     ret = regnode(RE_VISUAL);
1857                     break;
1858
1859                 /* \%[abc]: Emit as a list of branches, all ending at the last
1860                  * branch which matches nothing. */
1861                 case '[':
1862                           if (one_exactly)      /* doesn't nest */
1863                               EMSG_ONE_RET_NULL;
1864                           {
1865                               char_u    *lastbranch;
1866                               char_u    *lastnode = NULL;
1867                               char_u    *br;
1868
1869                               ret = NULL;
1870                               while ((c = getchr()) != ']')
1871                               {
1872                                   if (c == NUL)
1873                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1874                                                       reg_magic == MAGIC_ALL);
1875                                   br = regnode(BRANCH);
1876                                   if (ret == NULL)
1877                                       ret = br;
1878                                   else
1879                                       regtail(lastnode, br);
1880
1881                                   ungetchr();
1882                                   one_exactly = TRUE;
1883                                   lastnode = regatom(flagp);
1884                                   one_exactly = FALSE;
1885                                   if (lastnode == NULL)
1886                                       return NULL;
1887                               }
1888                               if (ret == NULL)
1889                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1890                                                       reg_magic == MAGIC_ALL);
1891                               lastbranch = regnode(BRANCH);
1892                               br = regnode(NOTHING);
1893                               if (ret != JUST_CALC_SIZE)
1894                               {
1895                                   regtail(lastnode, br);
1896                                   regtail(lastbranch, br);
1897                                   /* connect all branches to the NOTHING
1898                                    * branch at the end */
1899                                   for (br = ret; br != lastnode; )
1900                                   {
1901                                       if (OP(br) == BRANCH)
1902                                       {
1903                                           regtail(br, lastbranch);
1904                                           br = OPERAND(br);
1905                                       }
1906                                       else
1907                                           br = regnext(br);
1908                                   }
1909                               }
1910                               *flagp &= ~HASWIDTH;
1911                               break;
1912                           }
1913
1914                 case 'd':   /* %d123 decimal */
1915                 case 'o':   /* %o123 octal */
1916                 case 'x':   /* %xab hex 2 */
1917                 case 'u':   /* %uabcd hex 4 */
1918                 case 'U':   /* %U1234abcd hex 8 */
1919                           {
1920                               int i;
1921
1922                               switch (c)
1923                               {
1924                                   case 'd': i = getdecchrs(); break;
1925                                   case 'o': i = getoctchrs(); break;
1926                                   case 'x': i = gethexchrs(2); break;
1927                                   case 'u': i = gethexchrs(4); break;
1928                                   case 'U': i = gethexchrs(8); break;
1929                                   default:  i = -1; break;
1930                               }
1931
1932                               if (i < 0)
1933                                   EMSG_M_RET_NULL(
1934                                         _("E678: Invalid character after %s%%[dxouU]"),
1935                                         reg_magic == MAGIC_ALL);
1936 #ifdef FEAT_MBYTE
1937                               if (use_multibytecode(i))
1938                                   ret = regnode(MULTIBYTECODE);
1939                               else
1940 #endif
1941                                   ret = regnode(EXACTLY);
1942                               if (i == 0)
1943                                   regc(0x0a);
1944                               else
1945 #ifdef FEAT_MBYTE
1946                                   regmbc(i);
1947 #else
1948                                   regc(i);
1949 #endif
1950                               regc(NUL);
1951                               *flagp |= HASWIDTH;
1952                               break;
1953                           }
1954
1955                 default:
1956                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1957                                                                  || c == '\'')
1958                           {
1959                               long_u    n = 0;
1960                               int       cmp;
1961
1962                               cmp = c;
1963                               if (cmp == '<' || cmp == '>')
1964                                   c = getchr();
1965                               while (VIM_ISDIGIT(c))
1966                               {
1967                                   n = n * 10 + (c - '0');
1968                                   c = getchr();
1969                               }
1970                               if (c == '\'' && n == 0)
1971                               {
1972                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1973                                   c = getchr();
1974                                   ret = regnode(RE_MARK);
1975                                   if (ret == JUST_CALC_SIZE)
1976                                       regsize += 2;
1977                                   else
1978                                   {
1979                                       *regcode++ = c;
1980                                       *regcode++ = cmp;
1981                                   }
1982                                   break;
1983                               }
1984                               else if (c == 'l' || c == 'c' || c == 'v')
1985                               {
1986                                   if (c == 'l')
1987                                       ret = regnode(RE_LNUM);
1988                                   else if (c == 'c')
1989                                       ret = regnode(RE_COL);
1990                                   else
1991                                       ret = regnode(RE_VCOL);
1992                                   if (ret == JUST_CALC_SIZE)
1993                                       regsize += 5;
1994                                   else
1995                                   {
1996                                       /* put the number and the optional
1997                                        * comparator after the opcode */
1998                                       regcode = re_put_long(regcode, n);
1999                                       *regcode++ = cmp;
2000                                   }
2001                                   break;
2002                               }
2003                           }
2004
2005                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
2006                                                       reg_magic == MAGIC_ALL);
2007             }
2008         }
2009         break;
2010
2011       case Magic('['):
2012 collection:
2013         {
2014             char_u      *lp;
2015
2016             /*
2017              * If there is no matching ']', we assume the '[' is a normal
2018              * character.  This makes 'incsearch' and ":help [" work.
2019              */
2020             lp = skip_anyof(regparse);
2021             if (*lp == ']')     /* there is a matching ']' */
2022             {
2023                 int     startc = -1;    /* > 0 when next '-' is a range */
2024                 int     endc;
2025
2026                 /*
2027                  * In a character class, different parsing rules apply.
2028                  * Not even \ is special anymore, nothing is.
2029                  */
2030                 if (*regparse == '^')       /* Complement of range. */
2031                 {
2032                     ret = regnode(ANYBUT + extra);
2033                     regparse++;
2034                 }
2035                 else
2036                     ret = regnode(ANYOF + extra);
2037
2038                 /* At the start ']' and '-' mean the literal character. */
2039                 if (*regparse == ']' || *regparse == '-')
2040                 {
2041                     startc = *regparse;
2042                     regc(*regparse++);
2043                 }
2044
2045                 while (*regparse != NUL && *regparse != ']')
2046                 {
2047                     if (*regparse == '-')
2048                     {
2049                         ++regparse;
2050                         /* The '-' is not used for a range at the end and
2051                          * after or before a '\n'. */
2052                         if (*regparse == ']' || *regparse == NUL
2053                                 || startc == -1
2054                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2055                         {
2056                             regc('-');
2057                             startc = '-';       /* [--x] is a range */
2058                         }
2059                         else
2060                         {
2061                             /* Also accept "a-[.z.]" */
2062                             endc = 0;
2063                             if (*regparse == '[')
2064                                 endc = get_coll_element(&regparse);
2065                             if (endc == 0)
2066                             {
2067 #ifdef FEAT_MBYTE
2068                                 if (has_mbyte)
2069                                     endc = mb_ptr2char_adv(&regparse);
2070                                 else
2071 #endif
2072                                     endc = *regparse++;
2073                             }
2074
2075                             /* Handle \o40, \x20 and \u20AC style sequences */
2076                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2077                                 endc = coll_get_char();
2078
2079                             if (startc > endc)
2080                                 EMSG_RET_NULL(_(e_invrange));
2081 #ifdef FEAT_MBYTE
2082                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2083                                                  || (*mb_char2len)(endc) > 1))
2084                             {
2085                                 /* Limit to a range of 256 chars */
2086                                 if (endc > startc + 256)
2087                                     EMSG_RET_NULL(_(e_invrange));
2088                                 while (++startc <= endc)
2089                                     regmbc(startc);
2090                             }
2091                             else
2092 #endif
2093                             {
2094 #ifdef EBCDIC
2095                                 int     alpha_only = FALSE;
2096
2097                                 /* for alphabetical range skip the gaps
2098                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2099                                 if (isalpha(startc) && isalpha(endc))
2100                                     alpha_only = TRUE;
2101 #endif
2102                                 while (++startc <= endc)
2103 #ifdef EBCDIC
2104                                     if (!alpha_only || isalpha(startc))
2105 #endif
2106                                         regc(startc);
2107                             }
2108                             startc = -1;
2109                         }
2110                     }
2111                     /*
2112                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2113                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2114                      * 'cpoptions' is not included.
2115                      * Posix doesn't recognize backslash at all.
2116                      */
2117                     else if (*regparse == '\\'
2118                             && !cpo_bsl
2119                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2120                                 || (!cpo_lit
2121                                     && vim_strchr(REGEXP_ABBR,
2122                                                        regparse[1]) != NULL)))
2123                     {
2124                         regparse++;
2125                         if (*regparse == 'n')
2126                         {
2127                             /* '\n' in range: also match NL */
2128                             if (ret != JUST_CALC_SIZE)
2129                             {
2130                                 if (*ret == ANYBUT)
2131                                     *ret = ANYBUT + ADD_NL;
2132                                 else if (*ret == ANYOF)
2133                                     *ret = ANYOF + ADD_NL;
2134                                 /* else: must have had a \n already */
2135                             }
2136                             *flagp |= HASNL;
2137                             regparse++;
2138                             startc = -1;
2139                         }
2140                         else if (*regparse == 'd'
2141                                 || *regparse == 'o'
2142                                 || *regparse == 'x'
2143                                 || *regparse == 'u'
2144                                 || *regparse == 'U')
2145                         {
2146                             startc = coll_get_char();
2147                             if (startc == 0)
2148                                 regc(0x0a);
2149                             else
2150 #ifdef FEAT_MBYTE
2151                                 regmbc(startc);
2152 #else
2153                                 regc(startc);
2154 #endif
2155                         }
2156                         else
2157                         {
2158                             startc = backslash_trans(*regparse++);
2159                             regc(startc);
2160                         }
2161                     }
2162                     else if (*regparse == '[')
2163                     {
2164                         int c_class;
2165                         int cu;
2166
2167                         c_class = get_char_class(&regparse);
2168                         startc = -1;
2169                         /* Characters assumed to be 8 bits! */
2170                         switch (c_class)
2171                         {
2172                             case CLASS_NONE:
2173                                 c_class = get_equi_class(&regparse);
2174                                 if (c_class != 0)
2175                                 {
2176                                     /* produce equivalence class */
2177                                     reg_equi_class(c_class);
2178                                 }
2179                                 else if ((c_class =
2180                                             get_coll_element(&regparse)) != 0)
2181                                 {
2182                                     /* produce a collating element */
2183                                     regmbc(c_class);
2184                                 }
2185                                 else
2186                                 {
2187                                     /* literal '[', allow [[-x] as a range */
2188                                     startc = *regparse++;
2189                                     regc(startc);
2190                                 }
2191                                 break;
2192                             case CLASS_ALNUM:
2193                                 for (cu = 1; cu <= 255; cu++)
2194                                     if (isalnum(cu))
2195                                         regc(cu);
2196                                 break;
2197                             case CLASS_ALPHA:
2198                                 for (cu = 1; cu <= 255; cu++)
2199                                     if (isalpha(cu))
2200                                         regc(cu);
2201                                 break;
2202                             case CLASS_BLANK:
2203                                 regc(' ');
2204                                 regc('\t');
2205                                 break;
2206                             case CLASS_CNTRL:
2207                                 for (cu = 1; cu <= 255; cu++)
2208                                     if (iscntrl(cu))
2209                                         regc(cu);
2210                                 break;
2211                             case CLASS_DIGIT:
2212                                 for (cu = 1; cu <= 255; cu++)
2213                                     if (VIM_ISDIGIT(cu))
2214                                         regc(cu);
2215                                 break;
2216                             case CLASS_GRAPH:
2217                                 for (cu = 1; cu <= 255; cu++)
2218                                     if (isgraph(cu))
2219                                         regc(cu);
2220                                 break;
2221                             case CLASS_LOWER:
2222                                 for (cu = 1; cu <= 255; cu++)
2223                                     if (MB_ISLOWER(cu))
2224                                         regc(cu);
2225                                 break;
2226                             case CLASS_PRINT:
2227                                 for (cu = 1; cu <= 255; cu++)
2228                                     if (vim_isprintc(cu))
2229                                         regc(cu);
2230                                 break;
2231                             case CLASS_PUNCT:
2232                                 for (cu = 1; cu <= 255; cu++)
2233                                     if (ispunct(cu))
2234                                         regc(cu);
2235                                 break;
2236                             case CLASS_SPACE:
2237                                 for (cu = 9; cu <= 13; cu++)
2238                                     regc(cu);
2239                                 regc(' ');
2240                                 break;
2241                             case CLASS_UPPER:
2242                                 for (cu = 1; cu <= 255; cu++)
2243                                     if (MB_ISUPPER(cu))
2244                                         regc(cu);
2245                                 break;
2246                             case CLASS_XDIGIT:
2247                                 for (cu = 1; cu <= 255; cu++)
2248                                     if (vim_isxdigit(cu))
2249                                         regc(cu);
2250                                 break;
2251                             case CLASS_TAB:
2252                                 regc('\t');
2253                                 break;
2254                             case CLASS_RETURN:
2255                                 regc('\r');
2256                                 break;
2257                             case CLASS_BACKSPACE:
2258                                 regc('\b');
2259                                 break;
2260                             case CLASS_ESCAPE:
2261                                 regc('\033');
2262                                 break;
2263                         }
2264                     }
2265                     else
2266                     {
2267 #ifdef FEAT_MBYTE
2268                         if (has_mbyte)
2269                         {
2270                             int len;
2271
2272                             /* produce a multibyte character, including any
2273                              * following composing characters */
2274                             startc = mb_ptr2char(regparse);
2275                             len = (*mb_ptr2len)(regparse);
2276                             if (enc_utf8 && utf_char2len(startc) != len)
2277                                 startc = -1;    /* composing chars */
2278                             while (--len >= 0)
2279                                 regc(*regparse++);
2280                         }
2281                         else
2282 #endif
2283                         {
2284                             startc = *regparse++;
2285                             regc(startc);
2286                         }
2287                     }
2288                 }
2289                 regc(NUL);
2290                 prevchr_len = 1;        /* last char was the ']' */
2291                 if (*regparse != ']')
2292                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2293                 skipchr();          /* let's be friends with the lexer again */
2294                 *flagp |= HASWIDTH | SIMPLE;
2295                 break;
2296             }
2297             else if (reg_strict)
2298                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2299                                                        reg_magic > MAGIC_OFF);
2300         }
2301         /* FALLTHROUGH */
2302
2303       default:
2304         {
2305             int         len;
2306
2307 #ifdef FEAT_MBYTE
2308             /* A multi-byte character is handled as a separate atom if it's
2309              * before a multi and when it's a composing char. */
2310             if (use_multibytecode(c))
2311             {
2312 do_multibyte:
2313                 ret = regnode(MULTIBYTECODE);
2314                 regmbc(c);
2315                 *flagp |= HASWIDTH | SIMPLE;
2316                 break;
2317             }
2318 #endif
2319
2320             ret = regnode(EXACTLY);
2321
2322             /*
2323              * Append characters as long as:
2324              * - there is no following multi, we then need the character in
2325              *   front of it as a single character operand
2326              * - not running into a Magic character
2327              * - "one_exactly" is not set
2328              * But always emit at least one character.  Might be a Multi,
2329              * e.g., a "[" without matching "]".
2330              */
2331             for (len = 0; c != NUL && (len == 0
2332                         || (re_multi_type(peekchr()) == NOT_MULTI
2333                             && !one_exactly
2334                             && !is_Magic(c))); ++len)
2335             {
2336                 c = no_Magic(c);
2337 #ifdef FEAT_MBYTE
2338                 if (has_mbyte)
2339                 {
2340                     regmbc(c);
2341                     if (enc_utf8)
2342                     {
2343                         int     l;
2344
2345                         /* Need to get composing character too. */
2346                         for (;;)
2347                         {
2348                             l = utf_ptr2len(regparse);
2349                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2350                                 break;
2351                             regmbc(utf_ptr2char(regparse));
2352                             skipchr();
2353                         }
2354                     }
2355                 }
2356                 else
2357 #endif
2358                     regc(c);
2359                 c = getchr();
2360             }
2361             ungetchr();
2362
2363             regc(NUL);
2364             *flagp |= HASWIDTH;
2365             if (len == 1)
2366                 *flagp |= SIMPLE;
2367         }
2368         break;
2369     }
2370
2371     return ret;
2372 }
2373
2374 #ifdef FEAT_MBYTE
2375 /*
2376  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2377  * character "c".
2378  */
2379     static int
2380 use_multibytecode(c)
2381     int c;
2382 {
2383     return has_mbyte && (*mb_char2len)(c) > 1
2384                      && (re_multi_type(peekchr()) != NOT_MULTI
2385                              || (enc_utf8 && utf_iscomposing(c)));
2386 }
2387 #endif
2388
2389 /*
2390  * emit a node
2391  * Return pointer to generated code.
2392  */
2393     static char_u *
2394 regnode(op)
2395     int         op;
2396 {
2397     char_u  *ret;
2398
2399     ret = regcode;
2400     if (ret == JUST_CALC_SIZE)
2401         regsize += 3;
2402     else
2403     {
2404         *regcode++ = op;
2405         *regcode++ = NUL;               /* Null "next" pointer. */
2406         *regcode++ = NUL;
2407     }
2408     return ret;
2409 }
2410
2411 /*
2412  * Emit (if appropriate) a byte of code
2413  */
2414     static void
2415 regc(b)
2416     int         b;
2417 {
2418     if (regcode == JUST_CALC_SIZE)
2419         regsize++;
2420     else
2421         *regcode++ = b;
2422 }
2423
2424 #ifdef FEAT_MBYTE
2425 /*
2426  * Emit (if appropriate) a multi-byte character of code
2427  */
2428     static void
2429 regmbc(c)
2430     int         c;
2431 {
2432     if (regcode == JUST_CALC_SIZE)
2433         regsize += (*mb_char2len)(c);
2434     else
2435         regcode += (*mb_char2bytes)(c, regcode);
2436 }
2437 #endif
2438
2439 /*
2440  * reginsert - insert an operator in front of already-emitted operand
2441  *
2442  * Means relocating the operand.
2443  */
2444     static void
2445 reginsert(op, opnd)
2446     int         op;
2447     char_u     *opnd;
2448 {
2449     char_u      *src;
2450     char_u      *dst;
2451     char_u      *place;
2452
2453     if (regcode == JUST_CALC_SIZE)
2454     {
2455         regsize += 3;
2456         return;
2457     }
2458     src = regcode;
2459     regcode += 3;
2460     dst = regcode;
2461     while (src > opnd)
2462         *--dst = *--src;
2463
2464     place = opnd;               /* Op node, where operand used to be. */
2465     *place++ = op;
2466     *place++ = NUL;
2467     *place = NUL;
2468 }
2469
2470 /*
2471  * reginsert_limits - insert an operator in front of already-emitted operand.
2472  * The operator has the given limit values as operands.  Also set next pointer.
2473  *
2474  * Means relocating the operand.
2475  */
2476     static void
2477 reginsert_limits(op, minval, maxval, opnd)
2478     int         op;
2479     long        minval;
2480     long        maxval;
2481     char_u      *opnd;
2482 {
2483     char_u      *src;
2484     char_u      *dst;
2485     char_u      *place;
2486
2487     if (regcode == JUST_CALC_SIZE)
2488     {
2489         regsize += 11;
2490         return;
2491     }
2492     src = regcode;
2493     regcode += 11;
2494     dst = regcode;
2495     while (src > opnd)
2496         *--dst = *--src;
2497
2498     place = opnd;               /* Op node, where operand used to be. */
2499     *place++ = op;
2500     *place++ = NUL;
2501     *place++ = NUL;
2502     place = re_put_long(place, (long_u)minval);
2503     place = re_put_long(place, (long_u)maxval);
2504     regtail(opnd, place);
2505 }
2506
2507 /*
2508  * Write a long as four bytes at "p" and return pointer to the next char.
2509  */
2510     static char_u *
2511 re_put_long(p, val)
2512     char_u      *p;
2513     long_u      val;
2514 {
2515     *p++ = (char_u) ((val >> 24) & 0377);
2516     *p++ = (char_u) ((val >> 16) & 0377);
2517     *p++ = (char_u) ((val >> 8) & 0377);
2518     *p++ = (char_u) (val & 0377);
2519     return p;
2520 }
2521
2522 /*
2523  * regtail - set the next-pointer at the end of a node chain
2524  */
2525     static void
2526 regtail(p, val)
2527     char_u      *p;
2528     char_u      *val;
2529 {
2530     char_u      *scan;
2531     char_u      *temp;
2532     int         offset;
2533
2534     if (p == JUST_CALC_SIZE)
2535         return;
2536
2537     /* Find last node. */
2538     scan = p;
2539     for (;;)
2540     {
2541         temp = regnext(scan);
2542         if (temp == NULL)
2543             break;
2544         scan = temp;
2545     }
2546
2547     if (OP(scan) == BACK)
2548         offset = (int)(scan - val);
2549     else
2550         offset = (int)(val - scan);
2551     *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2552     *(scan + 2) = (char_u) (offset & 0377);
2553 }
2554
2555 /*
2556  * regoptail - regtail on item after a BRANCH; nop if none
2557  */
2558     static void
2559 regoptail(p, val)
2560     char_u      *p;
2561     char_u      *val;
2562 {
2563     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2564     if (p == NULL || p == JUST_CALC_SIZE
2565             || (OP(p) != BRANCH
2566                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2567         return;
2568     regtail(OPERAND(p), val);
2569 }
2570
2571 /*
2572  * getchr() - get the next character from the pattern. We know about
2573  * magic and such, so therefore we need a lexical analyzer.
2574  */
2575
2576 /* static int       curchr; */
2577 static int      prevprevchr;
2578 static int      prevchr;
2579 static int      nextchr;    /* used for ungetchr() */
2580 /*
2581  * Note: prevchr is sometimes -1 when we are not at the start,
2582  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2583  * taken to be magic -- webb
2584  */
2585 static int      at_start;       /* True when on the first character */
2586 static int      prev_at_start;  /* True when on the second character */
2587
2588     static void
2589 initchr(str)
2590     char_u *str;
2591 {
2592     regparse = str;
2593     prevchr_len = 0;
2594     curchr = prevprevchr = prevchr = nextchr = -1;
2595     at_start = TRUE;
2596     prev_at_start = FALSE;
2597 }
2598
2599     static int
2600 peekchr()
2601 {
2602     static int  after_slash = FALSE;
2603
2604     if (curchr == -1)
2605     {
2606         switch (curchr = regparse[0])
2607         {
2608         case '.':
2609         case '[':
2610         case '~':
2611             /* magic when 'magic' is on */
2612             if (reg_magic >= MAGIC_ON)
2613                 curchr = Magic(curchr);
2614             break;
2615         case '(':
2616         case ')':
2617         case '{':
2618         case '%':
2619         case '+':
2620         case '=':
2621         case '?':
2622         case '@':
2623         case '!':
2624         case '&':
2625         case '|':
2626         case '<':
2627         case '>':
2628         case '#':       /* future ext. */
2629         case '"':       /* future ext. */
2630         case '\'':      /* future ext. */
2631         case ',':       /* future ext. */
2632         case '-':       /* future ext. */
2633         case ':':       /* future ext. */
2634         case ';':       /* future ext. */
2635         case '`':       /* future ext. */
2636         case '/':       /* Can't be used in / command */
2637             /* magic only after "\v" */
2638             if (reg_magic == MAGIC_ALL)
2639                 curchr = Magic(curchr);
2640             break;
2641         case '*':
2642             /* * is not magic as the very first character, eg "?*ptr", when
2643              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2644              * "\(\*" is not magic, thus must be magic if "after_slash" */
2645             if (reg_magic >= MAGIC_ON
2646                     && !at_start
2647                     && !(prev_at_start && prevchr == Magic('^'))
2648                     && (after_slash
2649                         || (prevchr != Magic('(')
2650                             && prevchr != Magic('&')
2651                             && prevchr != Magic('|'))))
2652                 curchr = Magic('*');
2653             break;
2654         case '^':
2655             /* '^' is only magic as the very first character and if it's after
2656              * "\(", "\|", "\&' or "\n" */
2657             if (reg_magic >= MAGIC_OFF
2658                     && (at_start
2659                         || reg_magic == MAGIC_ALL
2660                         || prevchr == Magic('(')
2661                         || prevchr == Magic('|')
2662                         || prevchr == Magic('&')
2663                         || prevchr == Magic('n')
2664                         || (no_Magic(prevchr) == '('
2665                             && prevprevchr == Magic('%'))))
2666             {
2667                 curchr = Magic('^');
2668                 at_start = TRUE;
2669                 prev_at_start = FALSE;
2670             }
2671             break;
2672         case '$':
2673             /* '$' is only magic as the very last char and if it's in front of
2674              * either "\|", "\)", "\&", or "\n" */
2675             if (reg_magic >= MAGIC_OFF)
2676             {
2677                 char_u *p = regparse + 1;
2678
2679                 /* ignore \c \C \m and \M after '$' */
2680                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2681                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2682                     p += 2;
2683                 if (p[0] == NUL
2684                         || (p[0] == '\\'
2685                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2686                                 || p[1] == 'n'))
2687                         || reg_magic == MAGIC_ALL)
2688                     curchr = Magic('$');
2689             }
2690             break;
2691         case '\\':
2692             {
2693                 int c = regparse[1];
2694
2695                 if (c == NUL)
2696                     curchr = '\\';      /* trailing '\' */
2697                 else if (
2698 #ifdef EBCDIC
2699                         vim_strchr(META, c)
2700 #else
2701                         c <= '~' && META_flags[c]
2702 #endif
2703                         )
2704                 {
2705                     /*
2706                      * META contains everything that may be magic sometimes,
2707                      * except ^ and $ ("\^" and "\$" are only magic after
2708                      * "\v").  We now fetch the next character and toggle its
2709                      * magicness.  Therefore, \ is so meta-magic that it is
2710                      * not in META.
2711                      */
2712                     curchr = -1;
2713                     prev_at_start = at_start;
2714                     at_start = FALSE;   /* be able to say "/\*ptr" */
2715                     ++regparse;
2716                     ++after_slash;
2717                     peekchr();
2718                     --regparse;
2719                     --after_slash;
2720                     curchr = toggle_Magic(curchr);
2721                 }
2722                 else if (vim_strchr(REGEXP_ABBR, c))
2723                 {
2724                     /*
2725                      * Handle abbreviations, like "\t" for TAB -- webb
2726                      */
2727                     curchr = backslash_trans(c);
2728                 }
2729                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2730                     curchr = toggle_Magic(c);
2731                 else
2732                 {
2733                     /*
2734                      * Next character can never be (made) magic?
2735                      * Then backslashing it won't do anything.
2736                      */
2737 #ifdef FEAT_MBYTE
2738                     if (has_mbyte)
2739                         curchr = (*mb_ptr2char)(regparse + 1);
2740                     else
2741 #endif
2742                         curchr = c;
2743                 }
2744                 break;
2745             }
2746
2747 #ifdef FEAT_MBYTE
2748         default:
2749             if (has_mbyte)
2750                 curchr = (*mb_ptr2char)(regparse);
2751 #endif
2752         }
2753     }
2754
2755     return curchr;
2756 }
2757
2758 /*
2759  * Eat one lexed character.  Do this in a way that we can undo it.
2760  */
2761     static void
2762 skipchr()
2763 {
2764     /* peekchr() eats a backslash, do the same here */
2765     if (*regparse == '\\')
2766         prevchr_len = 1;
2767     else
2768         prevchr_len = 0;
2769     if (regparse[prevchr_len] != NUL)
2770     {
2771 #ifdef FEAT_MBYTE
2772         if (enc_utf8)
2773             prevchr_len += utf_char2len(mb_ptr2char(regparse + prevchr_len));
2774         else if (has_mbyte)
2775             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2776         else
2777 #endif
2778             ++prevchr_len;
2779     }
2780     regparse += prevchr_len;
2781     prev_at_start = at_start;
2782     at_start = FALSE;
2783     prevprevchr = prevchr;
2784     prevchr = curchr;
2785     curchr = nextchr;       /* use previously unget char, or -1 */
2786     nextchr = -1;
2787 }
2788
2789 /*
2790  * Skip a character while keeping the value of prev_at_start for at_start.
2791  * prevchr and prevprevchr are also kept.
2792  */
2793     static void
2794 skipchr_keepstart()
2795 {
2796     int as = prev_at_start;
2797     int pr = prevchr;
2798     int prpr = prevprevchr;
2799
2800     skipchr();
2801     at_start = as;
2802     prevchr = pr;
2803     prevprevchr = prpr;
2804 }
2805
2806     static int
2807 getchr()
2808 {
2809     int chr = peekchr();
2810
2811     skipchr();
2812     return chr;
2813 }
2814
2815 /*
2816  * put character back.  Works only once!
2817  */
2818     static void
2819 ungetchr()
2820 {
2821     nextchr = curchr;
2822     curchr = prevchr;
2823     prevchr = prevprevchr;
2824     at_start = prev_at_start;
2825     prev_at_start = FALSE;
2826
2827     /* Backup regparse, so that it's at the same position as before the
2828      * getchr(). */
2829     regparse -= prevchr_len;
2830 }
2831
2832 /*
2833  * Get and return the value of the hex string at the current position.
2834  * Return -1 if there is no valid hex number.
2835  * The position is updated:
2836  *     blahblah\%x20asdf
2837  *         before-^ ^-after
2838  * The parameter controls the maximum number of input characters. This will be
2839  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2840  */
2841     static int
2842 gethexchrs(maxinputlen)
2843     int         maxinputlen;
2844 {
2845     int         nr = 0;
2846     int         c;
2847     int         i;
2848
2849     for (i = 0; i < maxinputlen; ++i)
2850     {
2851         c = regparse[0];
2852         if (!vim_isxdigit(c))
2853             break;
2854         nr <<= 4;
2855         nr |= hex2nr(c);
2856         ++regparse;
2857     }
2858
2859     if (i == 0)
2860         return -1;
2861     return nr;
2862 }
2863
2864 /*
2865  * get and return the value of the decimal string immediately after the
2866  * current position. Return -1 for invalid.  Consumes all digits.
2867  */
2868     static int
2869 getdecchrs()
2870 {
2871     int         nr = 0;
2872     int         c;
2873     int         i;
2874
2875     for (i = 0; ; ++i)
2876     {
2877         c = regparse[0];
2878         if (c < '0' || c > '9')
2879             break;
2880         nr *= 10;
2881         nr += c - '0';
2882         ++regparse;
2883     }
2884
2885     if (i == 0)
2886         return -1;
2887     return nr;
2888 }
2889
2890 /*
2891  * get and return the value of the octal string immediately after the current
2892  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2893  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2894  * treat 8 or 9 as recognised characters. Position is updated:
2895  *     blahblah\%o210asdf
2896  *         before-^  ^-after
2897  */
2898     static int
2899 getoctchrs()
2900 {
2901     int         nr = 0;
2902     int         c;
2903     int         i;
2904
2905     for (i = 0; i < 3 && nr < 040; ++i)
2906     {
2907         c = regparse[0];
2908         if (c < '0' || c > '7')
2909             break;
2910         nr <<= 3;
2911         nr |= hex2nr(c);
2912         ++regparse;
2913     }
2914
2915     if (i == 0)
2916         return -1;
2917     return nr;
2918 }
2919
2920 /*
2921  * Get a number after a backslash that is inside [].
2922  * When nothing is recognized return a backslash.
2923  */
2924     static int
2925 coll_get_char()
2926 {
2927     int     nr = -1;
2928
2929     switch (*regparse++)
2930     {
2931         case 'd': nr = getdecchrs(); break;
2932         case 'o': nr = getoctchrs(); break;
2933         case 'x': nr = gethexchrs(2); break;
2934         case 'u': nr = gethexchrs(4); break;
2935         case 'U': nr = gethexchrs(8); break;
2936     }
2937     if (nr < 0)
2938     {
2939         /* If getting the number fails be backwards compatible: the character
2940          * is a backslash. */
2941         --regparse;
2942         nr = '\\';
2943     }
2944     return nr;
2945 }
2946
2947 /*
2948  * read_limits - Read two integers to be taken as a minimum and maximum.
2949  * If the first character is '-', then the range is reversed.
2950  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2951  * missing, a very big number is the default.
2952  */
2953     static int
2954 read_limits(minval, maxval)
2955     long        *minval;
2956     long        *maxval;
2957 {
2958     int         reverse = FALSE;
2959     char_u      *first_char;
2960     long        tmp;
2961
2962     if (*regparse == '-')
2963     {
2964         /* Starts with '-', so reverse the range later */
2965         regparse++;
2966         reverse = TRUE;
2967     }
2968     first_char = regparse;
2969     *minval = getdigits(&regparse);
2970     if (*regparse == ',')           /* There is a comma */
2971     {
2972         if (vim_isdigit(*++regparse))
2973             *maxval = getdigits(&regparse);
2974         else
2975             *maxval = MAX_LIMIT;
2976     }
2977     else if (VIM_ISDIGIT(*first_char))
2978         *maxval = *minval;          /* It was \{n} or \{-n} */
2979     else
2980         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2981     if (*regparse == '\\')
2982         regparse++;     /* Allow either \{...} or \{...\} */
2983     if (*regparse != '}')
2984     {
2985         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2986                                           reg_magic == MAGIC_ALL ? "" : "\\");
2987         EMSG_RET_FAIL(IObuff);
2988     }
2989
2990     /*
2991      * Reverse the range if there was a '-', or make sure it is in the right
2992      * order otherwise.
2993      */
2994     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2995     {
2996         tmp = *minval;
2997         *minval = *maxval;
2998         *maxval = tmp;
2999     }
3000     skipchr();          /* let's be friends with the lexer again */
3001     return OK;
3002 }
3003
3004 /*
3005  * vim_regexec and friends
3006  */
3007
3008 /*
3009  * Global work variables for vim_regexec().
3010  */
3011
3012 /* The current match-position is remembered with these variables: */
3013 static linenr_T reglnum;        /* line number, relative to first line */
3014 static char_u   *regline;       /* start of current line */
3015 static char_u   *reginput;      /* current input, points into "regline" */
3016
3017 static int      need_clear_subexpr;     /* subexpressions still need to be
3018                                          * cleared */
3019 #ifdef FEAT_SYN_HL
3020 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3021                                                  * still need to be cleared */
3022 #endif
3023
3024 /*
3025  * Structure used to save the current input state, when it needs to be
3026  * restored after trying a match.  Used by reg_save() and reg_restore().
3027  * Also stores the length of "backpos".
3028  */
3029 typedef struct
3030 {
3031     union
3032     {
3033         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3034         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3035     } rs_u;
3036     int         rs_len;
3037 } regsave_T;
3038
3039 /* struct to save start/end pointer/position in for \(\) */
3040 typedef struct
3041 {
3042     union
3043     {
3044         char_u  *ptr;
3045         lpos_T  pos;
3046     } se_u;
3047 } save_se_T;
3048
3049 static char_u   *reg_getline __ARGS((linenr_T lnum));
3050 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col));
3051 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3052 static void     cleanup_subexpr __ARGS((void));
3053 #ifdef FEAT_SYN_HL
3054 static void     cleanup_zsubexpr __ARGS((void));
3055 #endif
3056 static void     reg_nextline __ARGS((void));
3057 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3058 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3059 static int      reg_save_equal __ARGS((regsave_T *save));
3060 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3061 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3062
3063 /* Save the sub-expressions before attempting a match. */
3064 #define save_se(savep, posp, pp) \
3065     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3066
3067 /* After a failed match restore the sub-expressions. */
3068 #define restore_se(savep, posp, pp) { \
3069     if (REG_MULTI) \
3070         *(posp) = (savep)->se_u.pos; \
3071     else \
3072         *(pp) = (savep)->se_u.ptr; }
3073
3074 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3075 static int      regmatch __ARGS((char_u *prog));
3076 static int      regrepeat __ARGS((char_u *p, long maxcount));
3077
3078 #ifdef DEBUG
3079 int             regnarrate = 0;
3080 #endif
3081
3082 /*
3083  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3084  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3085  * contains '\c' or '\C' the value is overruled.
3086  */
3087 static int      ireg_ic;
3088
3089 #ifdef FEAT_MBYTE
3090 /*
3091  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3092  * in the regexp.  Defaults to false, always.
3093  */
3094 static int      ireg_icombine;
3095 #endif
3096
3097 /*
3098  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3099  * there is no maximum.
3100  */
3101 static colnr_T  ireg_maxcol;
3102
3103 /*
3104  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3105  * slow, we keep one allocated piece of memory and only re-allocate it when
3106  * it's too small.  It's freed in vim_regexec_both() when finished.
3107  */
3108 static char_u   *reg_tofree;
3109 static unsigned reg_tofreelen;
3110
3111 /*
3112  * These variables are set when executing a regexp to speed up the execution.
3113  * Which ones are set depends on whether a single-line or multi-line match is
3114  * done:
3115  *                      single-line             multi-line
3116  * reg_match            &regmatch_T             NULL
3117  * reg_mmatch           NULL                    &regmmatch_T
3118  * reg_startp           reg_match->startp       <invalid>
3119  * reg_endp             reg_match->endp         <invalid>
3120  * reg_startpos         <invalid>               reg_mmatch->startpos
3121  * reg_endpos           <invalid>               reg_mmatch->endpos
3122  * reg_win              NULL                    window in which to search
3123  * reg_buf              <invalid>               buffer in which to search
3124  * reg_firstlnum        <invalid>               first line in which to search
3125  * reg_maxline          0                       last line nr
3126  * reg_line_lbr         FALSE or TRUE           FALSE
3127  */
3128 static regmatch_T       *reg_match;
3129 static regmmatch_T      *reg_mmatch;
3130 static char_u           **reg_startp = NULL;
3131 static char_u           **reg_endp = NULL;
3132 static lpos_T           *reg_startpos = NULL;
3133 static lpos_T           *reg_endpos = NULL;
3134 static win_T            *reg_win;
3135 static buf_T            *reg_buf;
3136 static linenr_T         reg_firstlnum;
3137 static linenr_T         reg_maxline;
3138 static int              reg_line_lbr;       /* "\n" in string is line break */
3139
3140 /* Values for rs_state in regitem_T. */
3141 typedef enum regstate_E
3142 {
3143     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3144     , RS_MOPEN          /* MOPEN + [0-9] */
3145     , RS_MCLOSE         /* MCLOSE + [0-9] */
3146 #ifdef FEAT_SYN_HL
3147     , RS_ZOPEN          /* ZOPEN + [0-9] */
3148     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3149 #endif
3150     , RS_BRANCH         /* BRANCH */
3151     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3152     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3153     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3154     , RS_NOMATCH        /* NOMATCH */
3155     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3156     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3157     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3158     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3159 } regstate_T;
3160
3161 /*
3162  * When there are alternatives a regstate_T is put on the regstack to remember
3163  * what we are doing.
3164  * Before it may be another type of item, depending on rs_state, to remember
3165  * more things.
3166  */
3167 typedef struct regitem_S
3168 {
3169     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3170     char_u      *rs_scan;       /* current node in program */
3171     union
3172     {
3173         save_se_T  sesave;
3174         regsave_T  regsave;
3175     } rs_un;                    /* room for saving reginput */
3176     short       rs_no;          /* submatch nr */
3177 } regitem_T;
3178
3179 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3180 static void regstack_pop __ARGS((char_u **scan));
3181
3182 /* used for BEHIND and NOBEHIND matching */
3183 typedef struct regbehind_S
3184 {
3185     regsave_T   save_after;
3186     regsave_T   save_behind;
3187 } regbehind_T;
3188
3189 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3190 typedef struct regstar_S
3191 {
3192     int         nextb;          /* next byte */
3193     int         nextb_ic;       /* next byte reverse case */
3194     long        count;
3195     long        minval;
3196     long        maxval;
3197 } regstar_T;
3198
3199 /* used to store input position when a BACK was encountered, so that we now if
3200  * we made any progress since the last time. */
3201 typedef struct backpos_S
3202 {
3203     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3204     regsave_T   bp_pos;         /* last input position */
3205 } backpos_T;
3206
3207 /*
3208  * regstack and backpos are used by regmatch().  They are kept over calls to
3209  * avoid invoking malloc() and free() often.
3210  */
3211 static garray_T regstack;       /* stack with regitem_T items, sometimes
3212                                    preceded by regstar_T or regbehind_T. */
3213 static garray_T backpos;        /* table with backpos_T for BACK */
3214
3215 /*
3216  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3217  */
3218     static char_u *
3219 reg_getline(lnum)
3220     linenr_T    lnum;
3221 {
3222     /* when looking behind for a match/no-match lnum is negative.  But we
3223      * can't go before line 1 */
3224     if (reg_firstlnum + lnum < 1)
3225         return NULL;
3226     if (lnum > reg_maxline)
3227         /* Must have matched the "\n" in the last line. */
3228         return (char_u *)"";
3229     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3230 }
3231
3232 static regsave_T behind_pos;
3233
3234 #ifdef FEAT_SYN_HL
3235 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3236 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3237 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3238 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3239 #endif
3240
3241 /* TRUE if using multi-line regexp. */
3242 #define REG_MULTI       (reg_match == NULL)
3243
3244 /*
3245  * Match a regexp against a string.
3246  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3247  * Uses curbuf for line count and 'iskeyword'.
3248  *
3249  * Return TRUE if there is a match, FALSE if not.
3250  */
3251     int
3252 vim_regexec(rmp, line, col)
3253     regmatch_T  *rmp;
3254     char_u      *line;  /* string to match against */
3255     colnr_T     col;    /* column to start looking for match */
3256 {
3257     reg_match = rmp;
3258     reg_mmatch = NULL;
3259     reg_maxline = 0;
3260     reg_line_lbr = FALSE;
3261     reg_win = NULL;
3262     ireg_ic = rmp->rm_ic;
3263 #ifdef FEAT_MBYTE
3264     ireg_icombine = FALSE;
3265 #endif
3266     ireg_maxcol = 0;
3267     return (vim_regexec_both(line, col) != 0);
3268 }
3269
3270 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3271         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3272 /*
3273  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3274  */
3275     int
3276 vim_regexec_nl(rmp, line, col)
3277     regmatch_T  *rmp;
3278     char_u      *line;  /* string to match against */
3279     colnr_T     col;    /* column to start looking for match */
3280 {
3281     reg_match = rmp;
3282     reg_mmatch = NULL;
3283     reg_maxline = 0;
3284     reg_line_lbr = TRUE;
3285     reg_win = NULL;
3286     ireg_ic = rmp->rm_ic;
3287 #ifdef FEAT_MBYTE
3288     ireg_icombine = FALSE;
3289 #endif
3290     ireg_maxcol = 0;
3291     return (vim_regexec_both(line, col) != 0);
3292 }
3293 #endif
3294
3295 /*
3296  * Match a regexp against multiple lines.
3297  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3298  * Uses curbuf for line count and 'iskeyword'.
3299  *
3300  * Return zero if there is no match.  Return number of lines contained in the
3301  * match otherwise.
3302  */
3303     long
3304 vim_regexec_multi(rmp, win, buf, lnum, col)
3305     regmmatch_T *rmp;
3306     win_T       *win;           /* window in which to search or NULL */
3307     buf_T       *buf;           /* buffer in which to search */
3308     linenr_T    lnum;           /* nr of line to start looking for match */
3309     colnr_T     col;            /* column to start looking for match */
3310 {
3311     long        r;
3312     buf_T       *save_curbuf = curbuf;
3313
3314     reg_match = NULL;
3315     reg_mmatch = rmp;
3316     reg_buf = buf;
3317     reg_win = win;
3318     reg_firstlnum = lnum;
3319     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3320     reg_line_lbr = FALSE;
3321     ireg_ic = rmp->rmm_ic;
3322 #ifdef FEAT_MBYTE
3323     ireg_icombine = FALSE;
3324 #endif
3325     ireg_maxcol = rmp->rmm_maxcol;
3326
3327     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3328     curbuf = buf;
3329     r = vim_regexec_both(NULL, col);
3330     curbuf = save_curbuf;
3331
3332     return r;
3333 }
3334
3335 /*
3336  * Match a regexp against a string ("line" points to the string) or multiple
3337  * lines ("line" is NULL, use reg_getline()).
3338  */
3339     static long
3340 vim_regexec_both(line, col)
3341     char_u      *line;
3342     colnr_T     col;            /* column to start looking for match */
3343 {
3344     regprog_T   *prog;
3345     char_u      *s;
3346     long        retval = 0L;
3347
3348     reg_tofree = NULL;
3349
3350     /* Init the regstack empty.  Use an item size of 1 byte, since we push
3351      * different things onto it.  Use a large grow size to avoid reallocating
3352      * it too often. */
3353     ga_init2(&regstack, 1, 10000);
3354
3355     /* Init the backpos table empty. */
3356     ga_init2(&backpos, sizeof(backpos_T), 10);
3357
3358     if (REG_MULTI)
3359     {
3360         prog = reg_mmatch->regprog;
3361         line = reg_getline((linenr_T)0);
3362         reg_startpos = reg_mmatch->startpos;
3363         reg_endpos = reg_mmatch->endpos;
3364     }
3365     else
3366     {
3367         prog = reg_match->regprog;
3368         reg_startp = reg_match->startp;
3369         reg_endp = reg_match->endp;
3370     }
3371
3372     /* Be paranoid... */
3373     if (prog == NULL || line == NULL)
3374     {
3375         EMSG(_(e_null));
3376         goto theend;
3377     }
3378
3379     /* Check validity of program. */
3380     if (prog_magic_wrong())
3381         goto theend;
3382
3383     /* If the start column is past the maximum column: no need to try. */
3384     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3385         goto theend;
3386
3387     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3388     if (prog->regflags & RF_ICASE)
3389         ireg_ic = TRUE;
3390     else if (prog->regflags & RF_NOICASE)
3391         ireg_ic = FALSE;
3392
3393 #ifdef FEAT_MBYTE
3394     /* If pattern contains "\Z" overrule value of ireg_icombine */
3395     if (prog->regflags & RF_ICOMBINE)
3396         ireg_icombine = TRUE;
3397 #endif
3398
3399     /* If there is a "must appear" string, look for it. */
3400     if (prog->regmust != NULL)
3401     {
3402         int c;
3403
3404 #ifdef FEAT_MBYTE
3405         if (has_mbyte)
3406             c = (*mb_ptr2char)(prog->regmust);
3407         else
3408 #endif
3409             c = *prog->regmust;
3410         s = line + col;
3411
3412         /*
3413          * This is used very often, esp. for ":global".  Use three versions of
3414          * the loop to avoid overhead of conditions.
3415          */
3416         if (!ireg_ic
3417 #ifdef FEAT_MBYTE
3418                 && !has_mbyte
3419 #endif
3420                 )
3421             while ((s = vim_strbyte(s, c)) != NULL)
3422             {
3423                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3424                     break;              /* Found it. */
3425                 ++s;
3426             }
3427 #ifdef FEAT_MBYTE
3428         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3429             while ((s = vim_strchr(s, c)) != NULL)
3430             {
3431                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3432                     break;              /* Found it. */
3433                 mb_ptr_adv(s);
3434             }
3435 #endif
3436         else
3437             while ((s = cstrchr(s, c)) != NULL)
3438             {
3439                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3440                     break;              /* Found it. */
3441                 mb_ptr_adv(s);
3442             }
3443         if (s == NULL)          /* Not present. */
3444             goto theend;
3445     }
3446
3447     regline = line;
3448     reglnum = 0;
3449
3450     /* Simplest case: Anchored match need be tried only once. */
3451     if (prog->reganch)
3452     {
3453         int     c;
3454
3455 #ifdef FEAT_MBYTE
3456         if (has_mbyte)
3457             c = (*mb_ptr2char)(regline + col);
3458         else
3459 #endif
3460             c = regline[col];
3461         if (prog->regstart == NUL
3462                 || prog->regstart == c
3463                 || (ireg_ic && ((
3464 #ifdef FEAT_MBYTE
3465                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3466                         || (c < 255 && prog->regstart < 255 &&
3467 #endif
3468                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3469             retval = regtry(prog, col);
3470         else
3471             retval = 0;
3472     }
3473     else
3474     {
3475         /* Messy cases:  unanchored match. */
3476         while (!got_int)
3477         {
3478             if (prog->regstart != NUL)
3479             {
3480                 /* Skip until the char we know it must start with.
3481                  * Used often, do some work to avoid call overhead. */
3482                 if (!ireg_ic
3483 #ifdef FEAT_MBYTE
3484                             && !has_mbyte
3485 #endif
3486                             )
3487                     s = vim_strbyte(regline + col, prog->regstart);
3488                 else
3489                     s = cstrchr(regline + col, prog->regstart);
3490                 if (s == NULL)
3491                 {
3492                     retval = 0;
3493                     break;
3494                 }
3495                 col = (int)(s - regline);
3496             }
3497
3498             /* Check for maximum column to try. */
3499             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3500             {
3501                 retval = 0;
3502                 break;
3503             }
3504
3505             retval = regtry(prog, col);
3506             if (retval > 0)
3507                 break;
3508
3509             /* if not currently on the first line, get it again */
3510             if (reglnum != 0)
3511             {
3512                 reglnum = 0;
3513                 regline = reg_getline((linenr_T)0);
3514             }
3515             if (regline[col] == NUL)
3516                 break;
3517 #ifdef FEAT_MBYTE
3518             if (has_mbyte)
3519                 col += (*mb_ptr2len)(regline + col);
3520             else
3521 #endif
3522                 ++col;
3523         }
3524     }
3525
3526 theend:
3527     vim_free(reg_tofree);
3528     ga_clear(&regstack);
3529     ga_clear(&backpos);
3530
3531     return retval;
3532 }
3533
3534 #ifdef FEAT_SYN_HL
3535 static reg_extmatch_T *make_extmatch __ARGS((void));
3536
3537 /*
3538  * Create a new extmatch and mark it as referenced once.
3539  */
3540     static reg_extmatch_T *
3541 make_extmatch()
3542 {
3543     reg_extmatch_T      *em;
3544
3545     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3546     if (em != NULL)
3547         em->refcnt = 1;
3548     return em;
3549 }
3550
3551 /*
3552  * Add a reference to an extmatch.
3553  */
3554     reg_extmatch_T *
3555 ref_extmatch(em)
3556     reg_extmatch_T      *em;
3557 {
3558     if (em != NULL)
3559         em->refcnt++;
3560     return em;
3561 }
3562
3563 /*
3564  * Remove a reference to an extmatch.  If there are no references left, free
3565  * the info.
3566  */
3567     void
3568 unref_extmatch(em)
3569     reg_extmatch_T      *em;
3570 {
3571     int i;
3572
3573     if (em != NULL && --em->refcnt <= 0)
3574     {
3575         for (i = 0; i < NSUBEXP; ++i)
3576             vim_free(em->matches[i]);
3577         vim_free(em);
3578     }
3579 }
3580 #endif
3581
3582 /*
3583  * regtry - try match of "prog" with at regline["col"].
3584  * Returns 0 for failure, number of lines contained in the match otherwise.
3585  */
3586     static long
3587 regtry(prog, col)
3588     regprog_T   *prog;
3589     colnr_T     col;
3590 {
3591     reginput = regline + col;
3592     need_clear_subexpr = TRUE;
3593 #ifdef FEAT_SYN_HL
3594     /* Clear the external match subpointers if necessary. */
3595     if (prog->reghasz == REX_SET)
3596         need_clear_zsubexpr = TRUE;
3597 #endif
3598
3599     if (regmatch(prog->program + 1) == 0)
3600         return 0;
3601
3602     cleanup_subexpr();
3603     if (REG_MULTI)
3604     {
3605         if (reg_startpos[0].lnum < 0)
3606         {
3607             reg_startpos[0].lnum = 0;
3608             reg_startpos[0].col = col;
3609         }
3610         if (reg_endpos[0].lnum < 0)
3611         {
3612             reg_endpos[0].lnum = reglnum;
3613             reg_endpos[0].col = (int)(reginput - regline);
3614         }
3615         else
3616             /* Use line number of "\ze". */
3617             reglnum = reg_endpos[0].lnum;
3618     }
3619     else
3620     {
3621         if (reg_startp[0] == NULL)
3622             reg_startp[0] = regline + col;
3623         if (reg_endp[0] == NULL)
3624             reg_endp[0] = reginput;
3625     }
3626 #ifdef FEAT_SYN_HL
3627     /* Package any found \z(...\) matches for export. Default is none. */
3628     unref_extmatch(re_extmatch_out);
3629     re_extmatch_out = NULL;
3630
3631     if (prog->reghasz == REX_SET)
3632     {
3633         int             i;
3634
3635         cleanup_zsubexpr();
3636         re_extmatch_out = make_extmatch();
3637         for (i = 0; i < NSUBEXP; i++)
3638         {
3639             if (REG_MULTI)
3640             {
3641                 /* Only accept single line matches. */
3642                 if (reg_startzpos[i].lnum >= 0
3643                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3644                     re_extmatch_out->matches[i] =
3645                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3646                                                        + reg_startzpos[i].col,
3647                                    reg_endzpos[i].col - reg_startzpos[i].col);
3648             }
3649             else
3650             {
3651                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3652                     re_extmatch_out->matches[i] =
3653                             vim_strnsave(reg_startzp[i],
3654                                         (int)(reg_endzp[i] - reg_startzp[i]));
3655             }
3656         }
3657     }
3658 #endif
3659     return 1 + reglnum;
3660 }
3661
3662 #ifdef FEAT_MBYTE
3663 static int reg_prev_class __ARGS((void));
3664
3665 /*
3666  * Get class of previous character.
3667  */
3668     static int
3669 reg_prev_class()
3670 {
3671     if (reginput > regline)
3672         return mb_get_class(reginput - 1
3673                                      - (*mb_head_off)(regline, reginput - 1));
3674     return -1;
3675 }
3676
3677 #endif
3678 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3679
3680 /*
3681  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3682  * to regmatch(), but they are here to reduce the amount of stack space used
3683  * (it can be called recursively many times).
3684  */
3685 static long     bl_minval;
3686 static long     bl_maxval;
3687
3688 /*
3689  * regmatch - main matching routine
3690  *
3691  * Conceptually the strategy is simple: Check to see whether the current node
3692  * matches, push an item onto the regstack and loop to see whether the rest
3693  * matches, and then act accordingly.  In practice we make some effort to
3694  * avoid using the regstack, in particular by going through "ordinary" nodes
3695  * (that don't need to know whether the rest of the match failed) by a nested
3696  * loop.
3697  *
3698  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3699  * the last matched character.
3700  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3701  * undefined state!
3702  */
3703     static int
3704 regmatch(scan)
3705     char_u      *scan;          /* Current node. */
3706 {
3707   char_u        *next;          /* Next node. */
3708   int           op;
3709   int           c;
3710   regitem_T     *rp;
3711   int           no;
3712   int           status;         /* one of the RA_ values: */
3713 #define RA_FAIL         1       /* something failed, abort */
3714 #define RA_CONT         2       /* continue in inner loop */
3715 #define RA_BREAK        3       /* break inner loop */
3716 #define RA_MATCH        4       /* successful match */
3717 #define RA_NOMATCH      5       /* didn't match */
3718
3719   /* Init the regstack and backpos table empty.  They are initialized and
3720    * freed in vim_regexec_both() to reduce malloc()/free() calls. */
3721   regstack.ga_len = 0;
3722   backpos.ga_len = 0;
3723
3724   /*
3725    * Repeat until "regstack" is empty.
3726    */
3727   for (;;)
3728   {
3729     /* Some patterns my cause a long time to match, even though they are not
3730      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3731     fast_breakcheck();
3732
3733 #ifdef DEBUG
3734     if (scan != NULL && regnarrate)
3735     {
3736         mch_errmsg(regprop(scan));
3737         mch_errmsg("(\n");
3738     }
3739 #endif
3740
3741     /*
3742      * Repeat for items that can be matched sequentially, without using the
3743      * regstack.
3744      */
3745     for (;;)
3746     {
3747         if (got_int || scan == NULL)
3748         {
3749             status = RA_FAIL;
3750             break;
3751         }
3752         status = RA_CONT;
3753
3754 #ifdef DEBUG
3755         if (regnarrate)
3756         {
3757             mch_errmsg(regprop(scan));
3758             mch_errmsg("...\n");
3759 # ifdef FEAT_SYN_HL
3760             if (re_extmatch_in != NULL)
3761             {
3762                 int i;
3763
3764                 mch_errmsg(_("External submatches:\n"));
3765                 for (i = 0; i < NSUBEXP; i++)
3766                 {
3767                     mch_errmsg("    \"");
3768                     if (re_extmatch_in->matches[i] != NULL)
3769                         mch_errmsg(re_extmatch_in->matches[i]);
3770                     mch_errmsg("\"\n");
3771                 }
3772             }
3773 # endif
3774         }
3775 #endif
3776         next = regnext(scan);
3777
3778         op = OP(scan);
3779         /* Check for character class with NL added. */
3780         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3781                                 && *reginput == NUL && reglnum <= reg_maxline)
3782         {
3783             reg_nextline();
3784         }
3785         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3786         {
3787             ADVANCE_REGINPUT();
3788         }
3789         else
3790         {
3791           if (WITH_NL(op))
3792               op -= ADD_NL;
3793 #ifdef FEAT_MBYTE
3794           if (has_mbyte)
3795               c = (*mb_ptr2char)(reginput);
3796           else
3797 #endif
3798               c = *reginput;
3799           switch (op)
3800           {
3801           case BOL:
3802             if (reginput != regline)
3803                 status = RA_NOMATCH;
3804             break;
3805
3806           case EOL:
3807             if (c != NUL)
3808                 status = RA_NOMATCH;
3809             break;
3810
3811           case RE_BOF:
3812             /* Passing -1 to the getline() function provided for the search
3813              * should always return NULL if the current line is the first
3814              * line of the file. */
3815             if (reglnum != 0 || reginput != regline
3816                         || (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
3817                 status = RA_NOMATCH;
3818             break;
3819
3820           case RE_EOF:
3821             if (reglnum != reg_maxline || c != NUL)
3822                 status = RA_NOMATCH;
3823             break;
3824
3825           case CURSOR:
3826             /* Check if the buffer is in a window and compare the
3827              * reg_win->w_cursor position to the match position. */
3828             if (reg_win == NULL
3829                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3830                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3831                 status = RA_NOMATCH;
3832             break;
3833
3834           case RE_MARK:
3835             /* Compare the mark position to the match position.  NOTE: Always
3836              * uses the current buffer. */
3837             {
3838                 int     mark = OPERAND(scan)[0];
3839                 int     cmp = OPERAND(scan)[1];
3840                 pos_T   *pos;
3841
3842                 pos = getmark(mark, FALSE);
3843                 if (pos == NULL              /* mark doesn't exist */
3844                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3845                         || (pos->lnum == reglnum + reg_firstlnum
3846                                 ? (pos->col == (colnr_T)(reginput - regline)
3847                                     ? (cmp == '<' || cmp == '>')
3848                                     : (pos->col < (colnr_T)(reginput - regline)
3849                                         ? cmp != '>'
3850                                         : cmp != '<'))
3851                                 : (pos->lnum < reglnum + reg_firstlnum
3852                                     ? cmp != '>'
3853                                     : cmp != '<')))
3854                     status = RA_NOMATCH;
3855             }
3856             break;
3857
3858           case RE_VISUAL:
3859 #ifdef FEAT_VISUAL
3860             /* Check if the buffer is the current buffer. and whether the
3861              * position is inside the Visual area. */
3862             if (reg_buf != curbuf || VIsual.lnum == 0)
3863                 status = RA_NOMATCH;
3864             else
3865             {
3866                 pos_T       top, bot;
3867                 linenr_T    lnum;
3868                 colnr_T     col;
3869                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3870                 int         mode;
3871
3872                 if (VIsual_active)
3873                 {
3874                     if (lt(VIsual, wp->w_cursor))
3875                     {
3876                         top = VIsual;
3877                         bot = wp->w_cursor;
3878                     }
3879                     else
3880                     {
3881                         top = wp->w_cursor;
3882                         bot = VIsual;
3883                     }
3884                     mode = VIsual_mode;
3885                 }
3886                 else
3887                 {
3888                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3889                     {
3890                         top = curbuf->b_visual.vi_start;
3891                         bot = curbuf->b_visual.vi_end;
3892                     }
3893                     else
3894                     {
3895                         top = curbuf->b_visual.vi_end;
3896                         bot = curbuf->b_visual.vi_start;
3897                     }
3898                     mode = curbuf->b_visual.vi_mode;
3899                 }
3900                 lnum = reglnum + reg_firstlnum;
3901                 col = (colnr_T)(reginput - regline);
3902                 if (lnum < top.lnum || lnum > bot.lnum)
3903                     status = RA_NOMATCH;
3904                 else if (mode == 'v')
3905                 {
3906                     if ((lnum == top.lnum && col < top.col)
3907                             || (lnum == bot.lnum
3908                                          && col >= bot.col + (*p_sel != 'e')))
3909                         status = RA_NOMATCH;
3910                 }
3911                 else if (mode == Ctrl_V)
3912                 {
3913                     colnr_T         start, end;
3914                     colnr_T         start2, end2;
3915                     colnr_T         cols;
3916
3917                     getvvcol(wp, &top, &start, NULL, &end);
3918                     getvvcol(wp, &bot, &start2, NULL, &end2);
3919                     if (start2 < start)
3920                         start = start2;
3921                     if (end2 > end)
3922                         end = end2;
3923                     if (top.col == MAXCOL || bot.col == MAXCOL)
3924                         end = MAXCOL;
3925                     cols = win_linetabsize(wp,
3926                                       regline, (colnr_T)(reginput - regline));
3927                     if (cols < start || cols > end - (*p_sel == 'e'))
3928                         status = RA_NOMATCH;
3929                 }
3930             }
3931 #else
3932             status = RA_NOMATCH;
3933 #endif
3934             break;
3935
3936           case RE_LNUM:
3937             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3938                                                                         scan))
3939                 status = RA_NOMATCH;
3940             break;
3941
3942           case RE_COL:
3943             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
3944                 status = RA_NOMATCH;
3945             break;
3946
3947           case RE_VCOL:
3948             if (!re_num_cmp((long_u)win_linetabsize(
3949                             reg_win == NULL ? curwin : reg_win,
3950                             regline, (colnr_T)(reginput - regline)) + 1, scan))
3951                 status = RA_NOMATCH;
3952             break;
3953
3954           case BOW:     /* \<word; reginput points to w */
3955             if (c == NUL)       /* Can't match at end of line */
3956                 status = RA_NOMATCH;
3957 #ifdef FEAT_MBYTE
3958             else if (has_mbyte)
3959             {
3960                 int this_class;
3961
3962                 /* Get class of current and previous char (if it exists). */
3963                 this_class = mb_get_class(reginput);
3964                 if (this_class <= 1)
3965                     status = RA_NOMATCH;  /* not on a word at all */
3966                 else if (reg_prev_class() == this_class)
3967                     status = RA_NOMATCH;  /* previous char is in same word */
3968             }
3969 #endif
3970             else
3971             {
3972                 if (!vim_iswordc(c)
3973                         || (reginput > regline && vim_iswordc(reginput[-1])))
3974                     status = RA_NOMATCH;
3975             }
3976             break;
3977
3978           case EOW:     /* word\>; reginput points after d */
3979             if (reginput == regline)    /* Can't match at start of line */
3980                 status = RA_NOMATCH;
3981 #ifdef FEAT_MBYTE
3982             else if (has_mbyte)
3983             {
3984                 int this_class, prev_class;
3985
3986                 /* Get class of current and previous char (if it exists). */
3987                 this_class = mb_get_class(reginput);
3988                 prev_class = reg_prev_class();
3989                 if (this_class == prev_class
3990                         || prev_class == 0 || prev_class == 1)
3991                     status = RA_NOMATCH;
3992             }
3993 #endif
3994             else
3995             {
3996                 if (!vim_iswordc(reginput[-1])
3997                         || (reginput[0] != NUL && vim_iswordc(c)))
3998                     status = RA_NOMATCH;
3999             }
4000             break; /* Matched with EOW */
4001
4002           case ANY:
4003             if (c == NUL)
4004                 status = RA_NOMATCH;
4005             else
4006                 ADVANCE_REGINPUT();
4007             break;
4008
4009           case IDENT:
4010             if (!vim_isIDc(c))
4011                 status = RA_NOMATCH;
4012             else
4013                 ADVANCE_REGINPUT();
4014             break;
4015
4016           case SIDENT:
4017             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4018                 status = RA_NOMATCH;
4019             else
4020                 ADVANCE_REGINPUT();
4021             break;
4022
4023           case KWORD:
4024             if (!vim_iswordp(reginput))
4025                 status = RA_NOMATCH;
4026             else
4027                 ADVANCE_REGINPUT();
4028             break;
4029
4030           case SKWORD:
4031             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4032                 status = RA_NOMATCH;
4033             else
4034                 ADVANCE_REGINPUT();
4035             break;
4036
4037           case FNAME:
4038             if (!vim_isfilec(c))
4039                 status = RA_NOMATCH;
4040             else
4041                 ADVANCE_REGINPUT();
4042             break;
4043
4044           case SFNAME:
4045             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4046                 status = RA_NOMATCH;
4047             else
4048                 ADVANCE_REGINPUT();
4049             break;
4050
4051           case PRINT:
4052             if (ptr2cells(reginput) != 1)
4053                 status = RA_NOMATCH;
4054             else
4055                 ADVANCE_REGINPUT();
4056             break;
4057
4058           case SPRINT:
4059             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4060                 status = RA_NOMATCH;
4061             else
4062                 ADVANCE_REGINPUT();
4063             break;
4064
4065           case WHITE:
4066             if (!vim_iswhite(c))
4067                 status = RA_NOMATCH;
4068             else
4069                 ADVANCE_REGINPUT();
4070             break;
4071
4072           case NWHITE:
4073             if (c == NUL || vim_iswhite(c))
4074                 status = RA_NOMATCH;
4075             else
4076                 ADVANCE_REGINPUT();
4077             break;
4078
4079           case DIGIT:
4080             if (!ri_digit(c))
4081                 status = RA_NOMATCH;
4082             else
4083                 ADVANCE_REGINPUT();
4084             break;
4085
4086           case NDIGIT:
4087             if (c == NUL || ri_digit(c))
4088                 status = RA_NOMATCH;
4089             else
4090                 ADVANCE_REGINPUT();
4091             break;
4092
4093           case HEX:
4094             if (!ri_hex(c))
4095                 status = RA_NOMATCH;
4096             else
4097                 ADVANCE_REGINPUT();
4098             break;
4099
4100           case NHEX:
4101             if (c == NUL || ri_hex(c))
4102                 status = RA_NOMATCH;
4103             else
4104                 ADVANCE_REGINPUT();
4105             break;
4106
4107           case OCTAL:
4108             if (!ri_octal(c))
4109                 status = RA_NOMATCH;
4110             else
4111                 ADVANCE_REGINPUT();
4112             break;
4113
4114           case NOCTAL:
4115             if (c == NUL || ri_octal(c))
4116                 status = RA_NOMATCH;
4117             else
4118                 ADVANCE_REGINPUT();
4119             break;
4120
4121           case WORD:
4122             if (!ri_word(c))
4123                 status = RA_NOMATCH;
4124             else
4125                 ADVANCE_REGINPUT();
4126             break;
4127
4128           case NWORD:
4129             if (c == NUL || ri_word(c))
4130                 status = RA_NOMATCH;
4131             else
4132                 ADVANCE_REGINPUT();
4133             break;
4134
4135           case HEAD:
4136             if (!ri_head(c))
4137                 status = RA_NOMATCH;
4138             else
4139                 ADVANCE_REGINPUT();
4140             break;
4141
4142           case NHEAD:
4143             if (c == NUL || ri_head(c))
4144                 status = RA_NOMATCH;
4145             else
4146                 ADVANCE_REGINPUT();
4147             break;
4148
4149           case ALPHA:
4150             if (!ri_alpha(c))
4151                 status = RA_NOMATCH;
4152             else
4153                 ADVANCE_REGINPUT();
4154             break;
4155
4156           case NALPHA:
4157             if (c == NUL || ri_alpha(c))
4158                 status = RA_NOMATCH;
4159             else
4160                 ADVANCE_REGINPUT();
4161             break;
4162
4163           case LOWER:
4164             if (!ri_lower(c))
4165                 status = RA_NOMATCH;
4166             else
4167                 ADVANCE_REGINPUT();
4168             break;
4169
4170           case NLOWER:
4171             if (c == NUL || ri_lower(c))
4172                 status = RA_NOMATCH;
4173             else
4174                 ADVANCE_REGINPUT();
4175             break;
4176
4177           case UPPER:
4178             if (!ri_upper(c))
4179                 status = RA_NOMATCH;
4180             else
4181                 ADVANCE_REGINPUT();
4182             break;
4183
4184           case NUPPER:
4185             if (c == NUL || ri_upper(c))
4186                 status = RA_NOMATCH;
4187             else
4188                 ADVANCE_REGINPUT();
4189             break;
4190
4191           case EXACTLY:
4192             {
4193                 int     len;
4194                 char_u  *opnd;
4195
4196                 opnd = OPERAND(scan);
4197                 /* Inline the first byte, for speed. */
4198                 if (*opnd != *reginput
4199                         && (!ireg_ic || (
4200 #ifdef FEAT_MBYTE
4201                             !enc_utf8 &&
4202 #endif
4203                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4204                     status = RA_NOMATCH;
4205                 else if (*opnd == NUL)
4206                 {
4207                     /* match empty string always works; happens when "~" is
4208                      * empty. */
4209                 }
4210                 else if (opnd[1] == NUL
4211 #ifdef FEAT_MBYTE
4212                             && !(enc_utf8 && ireg_ic)
4213 #endif
4214                         )
4215                     ++reginput;         /* matched a single char */
4216                 else
4217                 {
4218                     len = (int)STRLEN(opnd);
4219                     /* Need to match first byte again for multi-byte. */
4220                     if (cstrncmp(opnd, reginput, &len) != 0)
4221                         status = RA_NOMATCH;
4222 #ifdef FEAT_MBYTE
4223                     /* Check for following composing character. */
4224                     else if (enc_utf8
4225                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4226                     {
4227                         /* raaron: This code makes a composing character get
4228                          * ignored, which is the correct behavior (sometimes)
4229                          * for voweled Hebrew texts. */
4230                         if (!ireg_icombine)
4231                             status = RA_NOMATCH;
4232                     }
4233 #endif
4234                     else
4235                         reginput += len;
4236                 }
4237             }
4238             break;
4239
4240           case ANYOF:
4241           case ANYBUT:
4242             if (c == NUL)
4243                 status = RA_NOMATCH;
4244             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4245                 status = RA_NOMATCH;
4246             else
4247                 ADVANCE_REGINPUT();
4248             break;
4249
4250 #ifdef FEAT_MBYTE
4251           case MULTIBYTECODE:
4252             if (has_mbyte)
4253             {
4254                 int     i, len;
4255                 char_u  *opnd;
4256                 int     opndc = 0, inpc;
4257
4258                 opnd = OPERAND(scan);
4259                 /* Safety check (just in case 'encoding' was changed since
4260                  * compiling the program). */
4261                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4262                 {
4263                     status = RA_NOMATCH;
4264                     break;
4265                 }
4266                 if (enc_utf8)
4267                     opndc = mb_ptr2char(opnd);
4268                 if (enc_utf8 && utf_iscomposing(opndc))
4269                 {
4270                     /* When only a composing char is given match at any
4271                      * position where that composing char appears. */
4272                     status = RA_NOMATCH;
4273                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4274                     {
4275                         inpc = mb_ptr2char(reginput + i);
4276                         if (!utf_iscomposing(inpc))
4277                         {
4278                             if (i > 0)
4279                                 break;
4280                         }
4281                         else if (opndc == inpc)
4282                         {
4283                             /* Include all following composing chars. */
4284                             len = i + mb_ptr2len(reginput + i);
4285                             status = RA_MATCH;
4286                             break;
4287                         }
4288                     }
4289                 }
4290                 else
4291                     for (i = 0; i < len; ++i)
4292                         if (opnd[i] != reginput[i])
4293                         {
4294                             status = RA_NOMATCH;
4295                             break;
4296                         }
4297                 reginput += len;
4298             }
4299             else
4300                 status = RA_NOMATCH;
4301             break;
4302 #endif
4303
4304           case NOTHING:
4305             break;
4306
4307           case BACK:
4308             {
4309                 int             i;
4310                 backpos_T       *bp;
4311
4312                 /*
4313                  * When we run into BACK we need to check if we don't keep
4314                  * looping without matching any input.  The second and later
4315                  * times a BACK is encountered it fails if the input is still
4316                  * at the same position as the previous time.
4317                  * The positions are stored in "backpos" and found by the
4318                  * current value of "scan", the position in the RE program.
4319                  */
4320                 bp = (backpos_T *)backpos.ga_data;
4321                 for (i = 0; i < backpos.ga_len; ++i)
4322                     if (bp[i].bp_scan == scan)
4323                         break;
4324                 if (i == backpos.ga_len)
4325                 {
4326                     /* First time at this BACK, make room to store the pos. */
4327                     if (ga_grow(&backpos, 1) == FAIL)
4328                         status = RA_FAIL;
4329                     else
4330                     {
4331                         /* get "ga_data" again, it may have changed */
4332                         bp = (backpos_T *)backpos.ga_data;
4333                         bp[i].bp_scan = scan;
4334                         ++backpos.ga_len;
4335                     }
4336                 }
4337                 else if (reg_save_equal(&bp[i].bp_pos))
4338                     /* Still at same position as last time, fail. */
4339                     status = RA_NOMATCH;
4340
4341                 if (status != RA_FAIL && status != RA_NOMATCH)
4342                     reg_save(&bp[i].bp_pos, &backpos);
4343             }
4344             break;
4345
4346           case MOPEN + 0:   /* Match start: \zs */
4347           case MOPEN + 1:   /* \( */
4348           case MOPEN + 2:
4349           case MOPEN + 3:
4350           case MOPEN + 4:
4351           case MOPEN + 5:
4352           case MOPEN + 6:
4353           case MOPEN + 7:
4354           case MOPEN + 8:
4355           case MOPEN + 9:
4356             {
4357                 no = op - MOPEN;
4358                 cleanup_subexpr();
4359                 rp = regstack_push(RS_MOPEN, scan);
4360                 if (rp == NULL)
4361                     status = RA_FAIL;
4362                 else
4363                 {
4364                     rp->rs_no = no;
4365                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4366                                                              &reg_startp[no]);
4367                     /* We simply continue and handle the result when done. */
4368                 }
4369             }
4370             break;
4371
4372           case NOPEN:       /* \%( */
4373           case NCLOSE:      /* \) after \%( */
4374                 if (regstack_push(RS_NOPEN, scan) == NULL)
4375                     status = RA_FAIL;
4376                 /* We simply continue and handle the result when done. */
4377                 break;
4378
4379 #ifdef FEAT_SYN_HL
4380           case ZOPEN + 1:
4381           case ZOPEN + 2:
4382           case ZOPEN + 3:
4383           case ZOPEN + 4:
4384           case ZOPEN + 5:
4385           case ZOPEN + 6:
4386           case ZOPEN + 7:
4387           case ZOPEN + 8:
4388           case ZOPEN + 9:
4389             {
4390                 no = op - ZOPEN;
4391                 cleanup_zsubexpr();
4392                 rp = regstack_push(RS_ZOPEN, scan);
4393                 if (rp == NULL)
4394                     status = RA_FAIL;
4395                 else
4396                 {
4397                     rp->rs_no = no;
4398                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4399                                                              &reg_startzp[no]);
4400                     /* We simply continue and handle the result when done. */
4401                 }
4402             }
4403             break;
4404 #endif
4405
4406           case MCLOSE + 0:  /* Match end: \ze */
4407           case MCLOSE + 1:  /* \) */
4408           case MCLOSE + 2:
4409           case MCLOSE + 3:
4410           case MCLOSE + 4:
4411           case MCLOSE + 5:
4412           case MCLOSE + 6:
4413           case MCLOSE + 7:
4414           case MCLOSE + 8:
4415           case MCLOSE + 9:
4416             {
4417                 no = op - MCLOSE;
4418                 cleanup_subexpr();
4419                 rp = regstack_push(RS_MCLOSE, scan);
4420                 if (rp == NULL)
4421                     status = RA_FAIL;
4422                 else
4423                 {
4424                     rp->rs_no = no;
4425                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4426                     /* We simply continue and handle the result when done. */
4427                 }
4428             }
4429             break;
4430
4431 #ifdef FEAT_SYN_HL
4432           case ZCLOSE + 1:  /* \) after \z( */
4433           case ZCLOSE + 2:
4434           case ZCLOSE + 3:
4435           case ZCLOSE + 4:
4436           case ZCLOSE + 5:
4437           case ZCLOSE + 6:
4438           case ZCLOSE + 7:
4439           case ZCLOSE + 8:
4440           case ZCLOSE + 9:
4441             {
4442                 no = op - ZCLOSE;
4443                 cleanup_zsubexpr();
4444                 rp = regstack_push(RS_ZCLOSE, scan);
4445                 if (rp == NULL)
4446                     status = RA_FAIL;
4447                 else
4448                 {
4449                     rp->rs_no = no;
4450                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4451                                                               &reg_endzp[no]);
4452                     /* We simply continue and handle the result when done. */
4453                 }
4454             }
4455             break;
4456 #endif
4457
4458           case BACKREF + 1:
4459           case BACKREF + 2:
4460           case BACKREF + 3:
4461           case BACKREF + 4:
4462           case BACKREF + 5:
4463           case BACKREF + 6:
4464           case BACKREF + 7:
4465           case BACKREF + 8:
4466           case BACKREF + 9:
4467             {
4468                 int             len;
4469                 linenr_T        clnum;
4470                 colnr_T         ccol;
4471                 char_u          *p;
4472
4473                 no = op - BACKREF;
4474                 cleanup_subexpr();
4475                 if (!REG_MULTI)         /* Single-line regexp */
4476                 {
4477                     if (reg_endp[no] == NULL)
4478                     {
4479                         /* Backref was not set: Match an empty string. */
4480                         len = 0;
4481                     }
4482                     else
4483                     {
4484                         /* Compare current input with back-ref in the same
4485                          * line. */
4486                         len = (int)(reg_endp[no] - reg_startp[no]);
4487                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4488                             status = RA_NOMATCH;
4489                     }
4490                 }
4491                 else                            /* Multi-line regexp */
4492                 {
4493                     if (reg_endpos[no].lnum < 0)
4494                     {
4495                         /* Backref was not set: Match an empty string. */
4496                         len = 0;
4497                     }
4498                     else
4499                     {
4500                         if (reg_startpos[no].lnum == reglnum
4501                                 && reg_endpos[no].lnum == reglnum)
4502                         {
4503                             /* Compare back-ref within the current line. */
4504                             len = reg_endpos[no].col - reg_startpos[no].col;
4505                             if (cstrncmp(regline + reg_startpos[no].col,
4506                                                           reginput, &len) != 0)
4507                                 status = RA_NOMATCH;
4508                         }
4509                         else
4510                         {
4511                             /* Messy situation: Need to compare between two
4512                              * lines. */
4513                             ccol = reg_startpos[no].col;
4514                             clnum = reg_startpos[no].lnum;
4515                             for (;;)
4516                             {
4517                                 /* Since getting one line may invalidate
4518                                  * the other, need to make copy.  Slow! */
4519                                 if (regline != reg_tofree)
4520                                 {
4521                                     len = (int)STRLEN(regline);
4522                                     if (reg_tofree == NULL
4523                                                  || len >= (int)reg_tofreelen)
4524                                     {
4525                                         len += 50;      /* get some extra */
4526                                         vim_free(reg_tofree);
4527                                         reg_tofree = alloc(len);
4528                                         if (reg_tofree == NULL)
4529                                         {
4530                                             status = RA_FAIL; /* outof memory!*/
4531                                             break;
4532                                         }
4533                                         reg_tofreelen = len;
4534                                     }
4535                                     STRCPY(reg_tofree, regline);
4536                                     reginput = reg_tofree
4537                                                        + (reginput - regline);
4538                                     regline = reg_tofree;
4539                                 }
4540
4541                                 /* Get the line to compare with. */
4542                                 p = reg_getline(clnum);
4543                                 if (clnum == reg_endpos[no].lnum)
4544                                     len = reg_endpos[no].col - ccol;
4545                                 else
4546                                     len = (int)STRLEN(p + ccol);
4547
4548                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4549                                 {
4550                                     status = RA_NOMATCH;  /* doesn't match */
4551                                     break;
4552                                 }
4553                                 if (clnum == reg_endpos[no].lnum)
4554                                     break;              /* match and at end! */
4555                                 if (reglnum >= reg_maxline)
4556                                 {
4557                                     status = RA_NOMATCH;  /* text too short */
4558                                     break;
4559                                 }
4560
4561                                 /* Advance to next line. */
4562                                 reg_nextline();
4563                                 ++clnum;
4564                                 ccol = 0;
4565                                 if (got_int)
4566                                 {
4567                                     status = RA_FAIL;
4568                                     break;
4569                                 }
4570                             }
4571
4572                             /* found a match!  Note that regline may now point
4573                              * to a copy of the line, that should not matter. */
4574                         }
4575                     }
4576                 }
4577
4578                 /* Matched the backref, skip over it. */
4579                 reginput += len;
4580             }
4581             break;
4582
4583 #ifdef FEAT_SYN_HL
4584           case ZREF + 1:
4585           case ZREF + 2:
4586           case ZREF + 3:
4587           case ZREF + 4:
4588           case ZREF + 5:
4589           case ZREF + 6:
4590           case ZREF + 7:
4591           case ZREF + 8:
4592           case ZREF + 9:
4593             {
4594                 int     len;
4595
4596                 cleanup_zsubexpr();
4597                 no = op - ZREF;
4598                 if (re_extmatch_in != NULL
4599                         && re_extmatch_in->matches[no] != NULL)
4600                 {
4601                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4602                     if (cstrncmp(re_extmatch_in->matches[no],
4603                                                           reginput, &len) != 0)
4604                         status = RA_NOMATCH;
4605                     else
4606                         reginput += len;
4607                 }
4608                 else
4609                 {
4610                     /* Backref was not set: Match an empty string. */
4611                 }
4612             }
4613             break;
4614 #endif
4615
4616           case BRANCH:
4617             {
4618                 if (OP(next) != BRANCH) /* No choice. */
4619                     next = OPERAND(scan);       /* Avoid recursion. */
4620                 else
4621                 {
4622                     rp = regstack_push(RS_BRANCH, scan);
4623                     if (rp == NULL)
4624                         status = RA_FAIL;
4625                     else
4626                         status = RA_BREAK;      /* rest is below */
4627                 }
4628             }
4629             break;
4630
4631           case BRACE_LIMITS:
4632             {
4633                 if (OP(next) == BRACE_SIMPLE)
4634                 {
4635                     bl_minval = OPERAND_MIN(scan);
4636                     bl_maxval = OPERAND_MAX(scan);
4637                 }
4638                 else if (OP(next) >= BRACE_COMPLEX
4639                         && OP(next) < BRACE_COMPLEX + 10)
4640                 {
4641                     no = OP(next) - BRACE_COMPLEX;
4642                     brace_min[no] = OPERAND_MIN(scan);
4643                     brace_max[no] = OPERAND_MAX(scan);
4644                     brace_count[no] = 0;
4645                 }
4646                 else
4647                 {
4648                     EMSG(_(e_internal));            /* Shouldn't happen */
4649                     status = RA_FAIL;
4650                 }
4651             }
4652             break;
4653
4654           case BRACE_COMPLEX + 0:
4655           case BRACE_COMPLEX + 1:
4656           case BRACE_COMPLEX + 2:
4657           case BRACE_COMPLEX + 3:
4658           case BRACE_COMPLEX + 4:
4659           case BRACE_COMPLEX + 5:
4660           case BRACE_COMPLEX + 6:
4661           case BRACE_COMPLEX + 7:
4662           case BRACE_COMPLEX + 8:
4663           case BRACE_COMPLEX + 9:
4664             {
4665                 no = op - BRACE_COMPLEX;
4666                 ++brace_count[no];
4667
4668                 /* If not matched enough times yet, try one more */
4669                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4670                                              ? brace_min[no] : brace_max[no]))
4671                 {
4672                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4673                     if (rp == NULL)
4674                         status = RA_FAIL;
4675                     else
4676                     {
4677                         rp->rs_no = no;
4678                         reg_save(&rp->rs_un.regsave, &backpos);
4679                         next = OPERAND(scan);
4680                         /* We continue and handle the result when done. */
4681                     }
4682                     break;
4683                 }
4684
4685                 /* If matched enough times, may try matching some more */
4686                 if (brace_min[no] <= brace_max[no])
4687                 {
4688                     /* Range is the normal way around, use longest match */
4689                     if (brace_count[no] <= brace_max[no])
4690                     {
4691                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4692                         if (rp == NULL)
4693                             status = RA_FAIL;
4694                         else
4695                         {
4696                             rp->rs_no = no;
4697                             reg_save(&rp->rs_un.regsave, &backpos);
4698                             next = OPERAND(scan);
4699                             /* We continue and handle the result when done. */
4700                         }
4701                     }
4702                 }
4703                 else
4704                 {
4705                     /* Range is backwards, use shortest match first */
4706                     if (brace_count[no] <= brace_min[no])
4707                     {
4708                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4709                         if (rp == NULL)
4710                             status = RA_FAIL;
4711                         else
4712                         {
4713                             reg_save(&rp->rs_un.regsave, &backpos);
4714                             /* We continue and handle the result when done. */
4715                         }
4716                     }
4717                 }
4718             }
4719             break;
4720
4721           case BRACE_SIMPLE:
4722           case STAR:
4723           case PLUS:
4724             {
4725                 regstar_T       rst;
4726
4727                 /*
4728                  * Lookahead to avoid useless match attempts when we know
4729                  * what character comes next.
4730                  */
4731                 if (OP(next) == EXACTLY)
4732                 {
4733                     rst.nextb = *OPERAND(next);
4734                     if (ireg_ic)
4735                     {
4736                         if (MB_ISUPPER(rst.nextb))
4737                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4738                         else
4739                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4740                     }
4741                     else
4742                         rst.nextb_ic = rst.nextb;
4743                 }
4744                 else
4745                 {
4746                     rst.nextb = NUL;
4747                     rst.nextb_ic = NUL;
4748                 }
4749                 if (op != BRACE_SIMPLE)
4750                 {
4751                     rst.minval = (op == STAR) ? 0 : 1;
4752                     rst.maxval = MAX_LIMIT;
4753                 }
4754                 else
4755                 {
4756                     rst.minval = bl_minval;
4757                     rst.maxval = bl_maxval;
4758                 }
4759
4760                 /*
4761                  * When maxval > minval, try matching as much as possible, up
4762                  * to maxval.  When maxval < minval, try matching at least the
4763                  * minimal number (since the range is backwards, that's also
4764                  * maxval!).
4765                  */
4766                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4767                 if (got_int)
4768                 {
4769                     status = RA_FAIL;
4770                     break;
4771                 }
4772                 if (rst.minval <= rst.maxval
4773                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4774                 {
4775                     /* It could match.  Prepare for trying to match what
4776                      * follows.  The code is below.  Parameters are stored in
4777                      * a regstar_T on the regstack. */
4778                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4779                     {
4780                         EMSG(_(e_maxmempat));
4781                         status = RA_FAIL;
4782                     }
4783                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4784                         status = RA_FAIL;
4785                     else
4786                     {
4787                         regstack.ga_len += sizeof(regstar_T);
4788                         rp = regstack_push(rst.minval <= rst.maxval
4789                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4790                         if (rp == NULL)
4791                             status = RA_FAIL;
4792                         else
4793                         {
4794                             *(((regstar_T *)rp) - 1) = rst;
4795                             status = RA_BREAK;      /* skip the restore bits */
4796                         }
4797                     }
4798                 }
4799                 else
4800                     status = RA_NOMATCH;
4801
4802             }
4803             break;
4804
4805           case NOMATCH:
4806           case MATCH:
4807           case SUBPAT:
4808             rp = regstack_push(RS_NOMATCH, scan);
4809             if (rp == NULL)
4810                 status = RA_FAIL;
4811             else
4812             {
4813                 rp->rs_no = op;
4814                 reg_save(&rp->rs_un.regsave, &backpos);
4815                 next = OPERAND(scan);
4816                 /* We continue and handle the result when done. */
4817             }
4818             break;
4819
4820           case BEHIND:
4821           case NOBEHIND:
4822             /* Need a bit of room to store extra positions. */
4823             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4824             {
4825                 EMSG(_(e_maxmempat));
4826                 status = RA_FAIL;
4827             }
4828             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4829                 status = RA_FAIL;
4830             else
4831             {
4832                 regstack.ga_len += sizeof(regbehind_T);
4833                 rp = regstack_push(RS_BEHIND1, scan);
4834                 if (rp == NULL)
4835                     status = RA_FAIL;
4836                 else
4837                 {
4838                     rp->rs_no = op;
4839                     reg_save(&rp->rs_un.regsave, &backpos);
4840                     /* First try if what follows matches.  If it does then we
4841                      * check the behind match by looping. */
4842                 }
4843             }
4844             break;
4845
4846           case BHPOS:
4847             if (REG_MULTI)
4848             {
4849                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4850                         || behind_pos.rs_u.pos.lnum != reglnum)
4851                     status = RA_NOMATCH;
4852             }
4853             else if (behind_pos.rs_u.ptr != reginput)
4854                 status = RA_NOMATCH;
4855             break;
4856
4857           case NEWL:
4858             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4859                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4860                 status = RA_NOMATCH;
4861             else if (reg_line_lbr)
4862                 ADVANCE_REGINPUT();
4863             else
4864                 reg_nextline();
4865             break;
4866
4867           case END:
4868             status = RA_MATCH;  /* Success! */
4869             break;
4870
4871           default:
4872             EMSG(_(e_re_corr));
4873 #ifdef DEBUG
4874             printf("Illegal op code %d\n", op);
4875 #endif
4876             status = RA_FAIL;
4877             break;
4878           }
4879         }
4880
4881         /* If we can't continue sequentially, break the inner loop. */
4882         if (status != RA_CONT)
4883             break;
4884
4885         /* Continue in inner loop, advance to next item. */
4886         scan = next;
4887
4888     } /* end of inner loop */
4889
4890     /*
4891      * If there is something on the regstack execute the code for the state.
4892      * If the state is popped then loop and use the older state.
4893      */
4894     while (regstack.ga_len > 0 && status != RA_FAIL)
4895     {
4896         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4897         switch (rp->rs_state)
4898         {
4899           case RS_NOPEN:
4900             /* Result is passed on as-is, simply pop the state. */
4901             regstack_pop(&scan);
4902             break;
4903
4904           case RS_MOPEN:
4905             /* Pop the state.  Restore pointers when there is no match. */
4906             if (status == RA_NOMATCH)
4907                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4908                                                   &reg_startp[rp->rs_no]);
4909             regstack_pop(&scan);
4910             break;
4911
4912 #ifdef FEAT_SYN_HL
4913           case RS_ZOPEN:
4914             /* Pop the state.  Restore pointers when there is no match. */
4915             if (status == RA_NOMATCH)
4916                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4917                                                  &reg_startzp[rp->rs_no]);
4918             regstack_pop(&scan);
4919             break;
4920 #endif
4921
4922           case RS_MCLOSE:
4923             /* Pop the state.  Restore pointers when there is no match. */
4924             if (status == RA_NOMATCH)
4925                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4926                                                     &reg_endp[rp->rs_no]);
4927             regstack_pop(&scan);
4928             break;
4929
4930 #ifdef FEAT_SYN_HL
4931           case RS_ZCLOSE:
4932             /* Pop the state.  Restore pointers when there is no match. */
4933             if (status == RA_NOMATCH)
4934                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4935                                                    &reg_endzp[rp->rs_no]);
4936             regstack_pop(&scan);
4937             break;
4938 #endif
4939
4940           case RS_BRANCH:
4941             if (status == RA_MATCH)
4942                 /* this branch matched, use it */
4943                 regstack_pop(&scan);
4944             else
4945             {
4946                 if (status != RA_BREAK)
4947                 {
4948                     /* After a non-matching branch: try next one. */
4949                     reg_restore(&rp->rs_un.regsave, &backpos);
4950                     scan = rp->rs_scan;
4951                 }
4952                 if (scan == NULL || OP(scan) != BRANCH)
4953                 {
4954                     /* no more branches, didn't find a match */
4955                     status = RA_NOMATCH;
4956                     regstack_pop(&scan);
4957                 }
4958                 else
4959                 {
4960                     /* Prepare to try a branch. */
4961                     rp->rs_scan = regnext(scan);
4962                     reg_save(&rp->rs_un.regsave, &backpos);
4963                     scan = OPERAND(scan);
4964                 }
4965             }
4966             break;
4967
4968           case RS_BRCPLX_MORE:
4969             /* Pop the state.  Restore pointers when there is no match. */
4970             if (status == RA_NOMATCH)
4971             {
4972                 reg_restore(&rp->rs_un.regsave, &backpos);
4973                 --brace_count[rp->rs_no];       /* decrement match count */
4974             }
4975             regstack_pop(&scan);
4976             break;
4977
4978           case RS_BRCPLX_LONG:
4979             /* Pop the state.  Restore pointers when there is no match. */
4980             if (status == RA_NOMATCH)
4981             {
4982                 /* There was no match, but we did find enough matches. */
4983                 reg_restore(&rp->rs_un.regsave, &backpos);
4984                 --brace_count[rp->rs_no];
4985                 /* continue with the items after "\{}" */
4986                 status = RA_CONT;
4987             }
4988             regstack_pop(&scan);
4989             if (status == RA_CONT)
4990                 scan = regnext(scan);
4991             break;
4992
4993           case RS_BRCPLX_SHORT:
4994             /* Pop the state.  Restore pointers when there is no match. */
4995             if (status == RA_NOMATCH)
4996                 /* There was no match, try to match one more item. */
4997                 reg_restore(&rp->rs_un.regsave, &backpos);
4998             regstack_pop(&scan);
4999             if (status == RA_NOMATCH)
5000             {
5001                 scan = OPERAND(scan);
5002                 status = RA_CONT;
5003             }
5004             break;
5005
5006           case RS_NOMATCH:
5007             /* Pop the state.  If the operand matches for NOMATCH or
5008              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5009              * except for SUBPAT, and continue with the next item. */
5010             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5011                 status = RA_NOMATCH;
5012             else
5013             {
5014                 status = RA_CONT;
5015                 if (rp->rs_no != SUBPAT)        /* zero-width */
5016                     reg_restore(&rp->rs_un.regsave, &backpos);
5017             }
5018             regstack_pop(&scan);
5019             if (status == RA_CONT)
5020                 scan = regnext(scan);
5021             break;
5022
5023           case RS_BEHIND1:
5024             if (status == RA_NOMATCH)
5025             {
5026                 regstack_pop(&scan);
5027                 regstack.ga_len -= sizeof(regbehind_T);
5028             }
5029             else
5030             {
5031                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5032                  * the behind part does (not) match before the current
5033                  * position in the input.  This must be done at every
5034                  * position in the input and checking if the match ends at
5035                  * the current position. */
5036
5037                 /* save the position after the found match for next */
5038                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5039
5040                 /* start looking for a match with operand at the current
5041                  * position.  Go back one character until we find the
5042                  * result, hitting the start of the line or the previous
5043                  * line (for multi-line matching).
5044                  * Set behind_pos to where the match should end, BHPOS
5045                  * will match it.  Save the current value. */
5046                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5047                 behind_pos = rp->rs_un.regsave;
5048
5049                 rp->rs_state = RS_BEHIND2;
5050
5051                 reg_restore(&rp->rs_un.regsave, &backpos);
5052                 scan = OPERAND(rp->rs_scan);
5053             }
5054             break;
5055
5056           case RS_BEHIND2:
5057             /*
5058              * Looping for BEHIND / NOBEHIND match.
5059              */
5060             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5061             {
5062                 /* found a match that ends where "next" started */
5063                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5064                 if (rp->rs_no == BEHIND)
5065                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5066                                                                     &backpos);
5067                 else
5068                     /* But we didn't want a match. */
5069                     status = RA_NOMATCH;
5070                 regstack_pop(&scan);
5071                 regstack.ga_len -= sizeof(regbehind_T);
5072             }
5073             else
5074             {
5075                 /* No match: Go back one character.  May go to previous
5076                  * line once. */
5077                 no = OK;
5078                 if (REG_MULTI)
5079                 {
5080                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5081                     {
5082                         if (rp->rs_un.regsave.rs_u.pos.lnum
5083                                         < behind_pos.rs_u.pos.lnum
5084                                 || reg_getline(
5085                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5086                                                                   == NULL)
5087                             no = FAIL;
5088                         else
5089                         {
5090                             reg_restore(&rp->rs_un.regsave, &backpos);
5091                             rp->rs_un.regsave.rs_u.pos.col =
5092                                                  (colnr_T)STRLEN(regline);
5093                         }
5094                     }
5095                     else
5096                         --rp->rs_un.regsave.rs_u.pos.col;
5097                 }
5098                 else
5099                 {
5100                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5101                         no = FAIL;
5102                     else
5103                         --rp->rs_un.regsave.rs_u.ptr;
5104                 }
5105                 if (no == OK)
5106                 {
5107                     /* Advanced, prepare for finding match again. */
5108                     reg_restore(&rp->rs_un.regsave, &backpos);
5109                     scan = OPERAND(rp->rs_scan);
5110                 }
5111                 else
5112                 {
5113                     /* Can't advance.  For NOBEHIND that's a match. */
5114                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5115                     if (rp->rs_no == NOBEHIND)
5116                     {
5117                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5118                                                                     &backpos);
5119                         status = RA_MATCH;
5120                     }
5121                     else
5122                         status = RA_NOMATCH;
5123                     regstack_pop(&scan);
5124                     regstack.ga_len -= sizeof(regbehind_T);
5125                 }
5126             }
5127             break;
5128
5129           case RS_STAR_LONG:
5130           case RS_STAR_SHORT:
5131             {
5132                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5133
5134                 if (status == RA_MATCH)
5135                 {
5136                     regstack_pop(&scan);
5137                     regstack.ga_len -= sizeof(regstar_T);
5138                     break;
5139                 }
5140
5141                 /* Tried once already, restore input pointers. */
5142                 if (status != RA_BREAK)
5143                     reg_restore(&rp->rs_un.regsave, &backpos);
5144
5145                 /* Repeat until we found a position where it could match. */
5146                 for (;;)
5147                 {
5148                     if (status != RA_BREAK)
5149                     {
5150                         /* Tried first position already, advance. */
5151                         if (rp->rs_state == RS_STAR_LONG)
5152                         {
5153                             /* Trying for longest match, but couldn't or
5154                              * didn't match -- back up one char. */
5155                             if (--rst->count < rst->minval)
5156                                 break;
5157                             if (reginput == regline)
5158                             {
5159                                 /* backup to last char of previous line */
5160                                 --reglnum;
5161                                 regline = reg_getline(reglnum);
5162                                 /* Just in case regrepeat() didn't count
5163                                  * right. */
5164                                 if (regline == NULL)
5165                                     break;
5166                                 reginput = regline + STRLEN(regline);
5167                                 fast_breakcheck();
5168                             }
5169                             else
5170                                 mb_ptr_back(regline, reginput);
5171                         }
5172                         else
5173                         {
5174                             /* Range is backwards, use shortest match first.
5175                              * Careful: maxval and minval are exchanged!
5176                              * Couldn't or didn't match: try advancing one
5177                              * char. */
5178                             if (rst->count == rst->minval
5179                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5180                                 break;
5181                             ++rst->count;
5182                         }
5183                         if (got_int)
5184                             break;
5185                     }
5186                     else
5187                         status = RA_NOMATCH;
5188
5189                     /* If it could match, try it. */
5190                     if (rst->nextb == NUL || *reginput == rst->nextb
5191                                              || *reginput == rst->nextb_ic)
5192                     {
5193                         reg_save(&rp->rs_un.regsave, &backpos);
5194                         scan = regnext(rp->rs_scan);
5195                         status = RA_CONT;
5196                         break;
5197                     }
5198                 }
5199                 if (status != RA_CONT)
5200                 {
5201                     /* Failed. */
5202                     regstack_pop(&scan);
5203                     regstack.ga_len -= sizeof(regstar_T);
5204                     status = RA_NOMATCH;
5205                 }
5206             }
5207             break;
5208         }
5209
5210         /* If we want to continue the inner loop or didn't pop a state
5211          * continue matching loop */
5212         if (status == RA_CONT || rp == (regitem_T *)
5213                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5214             break;
5215     }
5216
5217     /* May need to continue with the inner loop, starting at "scan". */
5218     if (status == RA_CONT)
5219         continue;
5220
5221     /*
5222      * If the regstack is empty or something failed we are done.
5223      */
5224     if (regstack.ga_len == 0 || status == RA_FAIL)
5225     {
5226         if (scan == NULL)
5227         {
5228             /*
5229              * We get here only if there's trouble -- normally "case END" is
5230              * the terminating point.
5231              */
5232             EMSG(_(e_re_corr));
5233 #ifdef DEBUG
5234             printf("Premature EOL\n");
5235 #endif
5236         }
5237         if (status == RA_FAIL)
5238             got_int = TRUE;
5239         return (status == RA_MATCH);
5240     }
5241
5242   } /* End of loop until the regstack is empty. */
5243
5244   /* NOTREACHED */
5245 }
5246
5247 /*
5248  * Push an item onto the regstack.
5249  * Returns pointer to new item.  Returns NULL when out of memory.
5250  */
5251     static regitem_T *
5252 regstack_push(state, scan)
5253     regstate_T  state;
5254     char_u      *scan;
5255 {
5256     regitem_T   *rp;
5257
5258     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5259     {
5260         EMSG(_(e_maxmempat));
5261         return NULL;
5262     }
5263     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5264         return NULL;
5265
5266     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5267     rp->rs_state = state;
5268     rp->rs_scan = scan;
5269
5270     regstack.ga_len += sizeof(regitem_T);
5271     return rp;
5272 }
5273
5274 /*
5275  * Pop an item from the regstack.
5276  */
5277     static void
5278 regstack_pop(scan)
5279     char_u      **scan;
5280 {
5281     regitem_T   *rp;
5282
5283     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5284     *scan = rp->rs_scan;
5285
5286     regstack.ga_len -= sizeof(regitem_T);
5287 }
5288
5289 /*
5290  * regrepeat - repeatedly match something simple, return how many.
5291  * Advances reginput (and reglnum) to just after the matched chars.
5292  */
5293     static int
5294 regrepeat(p, maxcount)
5295     char_u      *p;
5296     long        maxcount;   /* maximum number of matches allowed */
5297 {
5298     long        count = 0;
5299     char_u      *scan;
5300     char_u      *opnd;
5301     int         mask;
5302     int         testval = 0;
5303
5304     scan = reginput;        /* Make local copy of reginput for speed. */
5305     opnd = OPERAND(p);
5306     switch (OP(p))
5307     {
5308       case ANY:
5309       case ANY + ADD_NL:
5310         while (count < maxcount)
5311         {
5312             /* Matching anything means we continue until end-of-line (or
5313              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5314             while (*scan != NUL && count < maxcount)
5315             {
5316                 ++count;
5317                 mb_ptr_adv(scan);
5318             }
5319             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5320                                          || reg_line_lbr || count == maxcount)
5321                 break;
5322             ++count;            /* count the line-break */
5323             reg_nextline();
5324             scan = reginput;
5325             if (got_int)
5326                 break;
5327         }
5328         break;
5329
5330       case IDENT:
5331       case IDENT + ADD_NL:
5332         testval = TRUE;
5333         /*FALLTHROUGH*/
5334       case SIDENT:
5335       case SIDENT + ADD_NL:
5336         while (count < maxcount)
5337         {
5338             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5339             {
5340                 mb_ptr_adv(scan);
5341             }
5342             else if (*scan == NUL)
5343             {
5344                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5345                                                               || reg_line_lbr)
5346                     break;
5347                 reg_nextline();
5348                 scan = reginput;
5349                 if (got_int)
5350                     break;
5351             }
5352             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5353                 ++scan;
5354             else
5355                 break;
5356             ++count;
5357         }
5358         break;
5359
5360       case KWORD:
5361       case KWORD + ADD_NL:
5362         testval = TRUE;
5363         /*FALLTHROUGH*/
5364       case SKWORD:
5365       case SKWORD + ADD_NL:
5366         while (count < maxcount)
5367         {
5368             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5369             {
5370                 mb_ptr_adv(scan);
5371             }
5372             else if (*scan == NUL)
5373             {
5374                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5375                                                               || reg_line_lbr)
5376                     break;
5377                 reg_nextline();
5378                 scan = reginput;
5379                 if (got_int)
5380                     break;
5381             }
5382             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5383                 ++scan;
5384             else
5385                 break;
5386             ++count;
5387         }
5388         break;
5389
5390       case FNAME:
5391       case FNAME + ADD_NL:
5392         testval = TRUE;
5393         /*FALLTHROUGH*/
5394       case SFNAME:
5395       case SFNAME + ADD_NL:
5396         while (count < maxcount)
5397         {
5398             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5399             {
5400                 mb_ptr_adv(scan);
5401             }
5402             else if (*scan == NUL)
5403             {
5404                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5405                                                               || reg_line_lbr)
5406                     break;
5407                 reg_nextline();
5408                 scan = reginput;
5409                 if (got_int)
5410                     break;
5411             }
5412             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5413                 ++scan;
5414             else
5415                 break;
5416             ++count;
5417         }
5418         break;
5419
5420       case PRINT:
5421       case PRINT + ADD_NL:
5422         testval = TRUE;
5423         /*FALLTHROUGH*/
5424       case SPRINT:
5425       case SPRINT + ADD_NL:
5426         while (count < maxcount)
5427         {
5428             if (*scan == NUL)
5429             {
5430                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5431                                                               || reg_line_lbr)
5432                     break;
5433                 reg_nextline();
5434                 scan = reginput;
5435                 if (got_int)
5436                     break;
5437             }
5438             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5439             {
5440                 mb_ptr_adv(scan);
5441             }
5442             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5443                 ++scan;
5444             else
5445                 break;
5446             ++count;
5447         }
5448         break;
5449
5450       case WHITE:
5451       case WHITE + ADD_NL:
5452         testval = mask = RI_WHITE;
5453 do_class:
5454         while (count < maxcount)
5455         {
5456 #ifdef FEAT_MBYTE
5457             int         l;
5458 #endif
5459             if (*scan == NUL)
5460             {
5461                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5462                                                               || reg_line_lbr)
5463                     break;
5464                 reg_nextline();
5465                 scan = reginput;
5466                 if (got_int)
5467                     break;
5468             }
5469 #ifdef FEAT_MBYTE
5470             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5471             {
5472                 if (testval != 0)
5473                     break;
5474                 scan += l;
5475             }
5476 #endif
5477             else if ((class_tab[*scan] & mask) == testval)
5478                 ++scan;
5479             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5480                 ++scan;
5481             else
5482                 break;
5483             ++count;
5484         }
5485         break;
5486
5487       case NWHITE:
5488       case NWHITE + ADD_NL:
5489         mask = RI_WHITE;
5490         goto do_class;
5491       case DIGIT:
5492       case DIGIT + ADD_NL:
5493         testval = mask = RI_DIGIT;
5494         goto do_class;
5495       case NDIGIT:
5496       case NDIGIT + ADD_NL:
5497         mask = RI_DIGIT;
5498         goto do_class;
5499       case HEX:
5500       case HEX + ADD_NL:
5501         testval = mask = RI_HEX;
5502         goto do_class;
5503       case NHEX:
5504       case NHEX + ADD_NL:
5505         mask = RI_HEX;
5506         goto do_class;
5507       case OCTAL:
5508       case OCTAL + ADD_NL:
5509         testval = mask = RI_OCTAL;
5510         goto do_class;
5511       case NOCTAL:
5512       case NOCTAL + ADD_NL:
5513         mask = RI_OCTAL;
5514         goto do_class;
5515       case WORD:
5516       case WORD + ADD_NL:
5517         testval = mask = RI_WORD;
5518         goto do_class;
5519       case NWORD:
5520       case NWORD + ADD_NL:
5521         mask = RI_WORD;
5522         goto do_class;
5523       case HEAD:
5524       case HEAD + ADD_NL:
5525         testval = mask = RI_HEAD;
5526         goto do_class;
5527       case NHEAD:
5528       case NHEAD + ADD_NL:
5529         mask = RI_HEAD;
5530         goto do_class;
5531       case ALPHA:
5532       case ALPHA + ADD_NL:
5533         testval = mask = RI_ALPHA;
5534         goto do_class;
5535       case NALPHA:
5536       case NALPHA + ADD_NL:
5537         mask = RI_ALPHA;
5538         goto do_class;
5539       case LOWER:
5540       case LOWER + ADD_NL:
5541         testval = mask = RI_LOWER;
5542         goto do_class;
5543       case NLOWER:
5544       case NLOWER + ADD_NL:
5545         mask = RI_LOWER;
5546         goto do_class;
5547       case UPPER:
5548       case UPPER + ADD_NL:
5549         testval = mask = RI_UPPER;
5550         goto do_class;
5551       case NUPPER:
5552       case NUPPER + ADD_NL:
5553         mask = RI_UPPER;
5554         goto do_class;
5555
5556       case EXACTLY:
5557         {
5558             int     cu, cl;
5559
5560             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5561              * would have been used for it.  It does handle single-byte
5562              * characters, such as latin1. */
5563             if (ireg_ic)
5564             {
5565                 cu = MB_TOUPPER(*opnd);
5566                 cl = MB_TOLOWER(*opnd);
5567                 while (count < maxcount && (*scan == cu || *scan == cl))
5568                 {
5569                     count++;
5570                     scan++;
5571                 }
5572             }
5573             else
5574             {
5575                 cu = *opnd;
5576                 while (count < maxcount && *scan == cu)
5577                 {
5578                     count++;
5579                     scan++;
5580                 }
5581             }
5582             break;
5583         }
5584
5585 #ifdef FEAT_MBYTE
5586       case MULTIBYTECODE:
5587         {
5588             int         i, len, cf = 0;
5589
5590             /* Safety check (just in case 'encoding' was changed since
5591              * compiling the program). */
5592             if ((len = (*mb_ptr2len)(opnd)) > 1)
5593             {
5594                 if (ireg_ic && enc_utf8)
5595                     cf = utf_fold(utf_ptr2char(opnd));
5596                 while (count < maxcount)
5597                 {
5598                     for (i = 0; i < len; ++i)
5599                         if (opnd[i] != scan[i])
5600                             break;
5601                     if (i < len && (!ireg_ic || !enc_utf8
5602                                         || utf_fold(utf_ptr2char(scan)) != cf))
5603                         break;
5604                     scan += len;
5605                     ++count;
5606                 }
5607             }
5608         }
5609         break;
5610 #endif
5611
5612       case ANYOF:
5613       case ANYOF + ADD_NL:
5614         testval = TRUE;
5615         /*FALLTHROUGH*/
5616
5617       case ANYBUT:
5618       case ANYBUT + ADD_NL:
5619         while (count < maxcount)
5620         {
5621 #ifdef FEAT_MBYTE
5622             int len;
5623 #endif
5624             if (*scan == NUL)
5625             {
5626                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5627                                                               || reg_line_lbr)
5628                     break;
5629                 reg_nextline();
5630                 scan = reginput;
5631                 if (got_int)
5632                     break;
5633             }
5634             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5635                 ++scan;
5636 #ifdef FEAT_MBYTE
5637             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5638             {
5639                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5640                     break;
5641                 scan += len;
5642             }
5643 #endif
5644             else
5645             {
5646                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5647                     break;
5648                 ++scan;
5649             }
5650             ++count;
5651         }
5652         break;
5653
5654       case NEWL:
5655         while (count < maxcount
5656                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5657                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5658         {
5659             count++;
5660             if (reg_line_lbr)
5661                 ADVANCE_REGINPUT();
5662             else
5663                 reg_nextline();
5664             scan = reginput;
5665             if (got_int)
5666                 break;
5667         }
5668         break;
5669
5670       default:                  /* Oh dear.  Called inappropriately. */
5671         EMSG(_(e_re_corr));
5672 #ifdef DEBUG
5673         printf("Called regrepeat with op code %d\n", OP(p));
5674 #endif
5675         break;
5676     }
5677
5678     reginput = scan;
5679
5680     return (int)count;
5681 }
5682
5683 /*
5684  * regnext - dig the "next" pointer out of a node
5685  */
5686     static char_u *
5687 regnext(p)
5688     char_u  *p;
5689 {
5690     int     offset;
5691
5692     if (p == JUST_CALC_SIZE)
5693         return NULL;
5694
5695     offset = NEXT(p);
5696     if (offset == 0)
5697         return NULL;
5698
5699     if (OP(p) == BACK)
5700         return p - offset;
5701     else
5702         return p + offset;
5703 }
5704
5705 /*
5706  * Check the regexp program for its magic number.
5707  * Return TRUE if it's wrong.
5708  */
5709     static int
5710 prog_magic_wrong()
5711 {
5712     if (UCHARAT(REG_MULTI
5713                 ? reg_mmatch->regprog->program
5714                 : reg_match->regprog->program) != REGMAGIC)
5715     {
5716         EMSG(_(e_re_corr));
5717         return TRUE;
5718     }
5719     return FALSE;
5720 }
5721
5722 /*
5723  * Cleanup the subexpressions, if this wasn't done yet.
5724  * This construction is used to clear the subexpressions only when they are
5725  * used (to increase speed).
5726  */
5727     static void
5728 cleanup_subexpr()
5729 {
5730     if (need_clear_subexpr)
5731     {
5732         if (REG_MULTI)
5733         {
5734             /* Use 0xff to set lnum to -1 */
5735             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5736             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5737         }
5738         else
5739         {
5740             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5741             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5742         }
5743         need_clear_subexpr = FALSE;
5744     }
5745 }
5746
5747 #ifdef FEAT_SYN_HL
5748     static void
5749 cleanup_zsubexpr()
5750 {
5751     if (need_clear_zsubexpr)
5752     {
5753         if (REG_MULTI)
5754         {
5755             /* Use 0xff to set lnum to -1 */
5756             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5757             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5758         }
5759         else
5760         {
5761             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5762             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5763         }
5764         need_clear_zsubexpr = FALSE;
5765     }
5766 }
5767 #endif
5768
5769 /*
5770  * Advance reglnum, regline and reginput to the next line.
5771  */
5772     static void
5773 reg_nextline()
5774 {
5775     regline = reg_getline(++reglnum);
5776     reginput = regline;
5777     fast_breakcheck();
5778 }
5779
5780 /*
5781  * Save the input line and position in a regsave_T.
5782  */
5783     static void
5784 reg_save(save, gap)
5785     regsave_T   *save;
5786     garray_T    *gap;
5787 {
5788     if (REG_MULTI)
5789     {
5790         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5791         save->rs_u.pos.lnum = reglnum;
5792     }
5793     else
5794         save->rs_u.ptr = reginput;
5795     save->rs_len = gap->ga_len;
5796 }
5797
5798 /*
5799  * Restore the input line and position from a regsave_T.
5800  */
5801     static void
5802 reg_restore(save, gap)
5803     regsave_T   *save;
5804     garray_T    *gap;
5805 {
5806     if (REG_MULTI)
5807     {
5808         if (reglnum != save->rs_u.pos.lnum)
5809         {
5810             /* only call reg_getline() when the line number changed to save
5811              * a bit of time */
5812             reglnum = save->rs_u.pos.lnum;
5813             regline = reg_getline(reglnum);
5814         }
5815         reginput = regline + save->rs_u.pos.col;
5816     }
5817     else
5818         reginput = save->rs_u.ptr;
5819     gap->ga_len = save->rs_len;
5820 }
5821
5822 /*
5823  * Return TRUE if current position is equal to saved position.
5824  */
5825     static int
5826 reg_save_equal(save)
5827     regsave_T   *save;
5828 {
5829     if (REG_MULTI)
5830         return reglnum == save->rs_u.pos.lnum
5831                                   && reginput == regline + save->rs_u.pos.col;
5832     return reginput == save->rs_u.ptr;
5833 }
5834
5835 /*
5836  * Tentatively set the sub-expression start to the current position (after
5837  * calling regmatch() they will have changed).  Need to save the existing
5838  * values for when there is no match.
5839  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5840  * depending on REG_MULTI.
5841  */
5842     static void
5843 save_se_multi(savep, posp)
5844     save_se_T   *savep;
5845     lpos_T      *posp;
5846 {
5847     savep->se_u.pos = *posp;
5848     posp->lnum = reglnum;
5849     posp->col = (colnr_T)(reginput - regline);
5850 }
5851
5852     static void
5853 save_se_one(savep, pp)
5854     save_se_T   *savep;
5855     char_u      **pp;
5856 {
5857     savep->se_u.ptr = *pp;
5858     *pp = reginput;
5859 }
5860
5861 /*
5862  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5863  */
5864     static int
5865 re_num_cmp(val, scan)
5866     long_u      val;
5867     char_u      *scan;
5868 {
5869     long_u  n = OPERAND_MIN(scan);
5870
5871     if (OPERAND_CMP(scan) == '>')
5872         return val > n;
5873     if (OPERAND_CMP(scan) == '<')
5874         return val < n;
5875     return val == n;
5876 }
5877
5878
5879 #ifdef DEBUG
5880
5881 /*
5882  * regdump - dump a regexp onto stdout in vaguely comprehensible form
5883  */
5884     static void
5885 regdump(pattern, r)
5886     char_u      *pattern;
5887     regprog_T   *r;
5888 {
5889     char_u  *s;
5890     int     op = EXACTLY;       /* Arbitrary non-END op. */
5891     char_u  *next;
5892     char_u  *end = NULL;
5893
5894     printf("\r\nregcomp(%s):\r\n", pattern);
5895
5896     s = r->program + 1;
5897     /*
5898      * Loop until we find the END that isn't before a referred next (an END
5899      * can also appear in a NOMATCH operand).
5900      */
5901     while (op != END || s <= end)
5902     {
5903         op = OP(s);
5904         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5905         next = regnext(s);
5906         if (next == NULL)       /* Next ptr. */
5907             printf("(0)");
5908         else
5909             printf("(%d)", (int)((s - r->program) + (next - s)));
5910         if (end < next)
5911             end = next;
5912         if (op == BRACE_LIMITS)
5913         {
5914             /* Two short ints */
5915             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5916             s += 8;
5917         }
5918         s += 3;
5919         if (op == ANYOF || op == ANYOF + ADD_NL
5920                 || op == ANYBUT || op == ANYBUT + ADD_NL
5921                 || op == EXACTLY)
5922         {
5923             /* Literal string, where present. */
5924             while (*s != NUL)
5925                 printf("%c", *s++);
5926             s++;
5927         }
5928         printf("\r\n");
5929     }
5930
5931     /* Header fields of interest. */
5932     if (r->regstart != NUL)
5933         printf("start `%s' 0x%x; ", r->regstart < 256
5934                 ? (char *)transchar(r->regstart)
5935                 : "multibyte", r->regstart);
5936     if (r->reganch)
5937         printf("anchored; ");
5938     if (r->regmust != NULL)
5939         printf("must have \"%s\"", r->regmust);
5940     printf("\r\n");
5941 }
5942
5943 /*
5944  * regprop - printable representation of opcode
5945  */
5946     static char_u *
5947 regprop(op)
5948     char_u         *op;
5949 {
5950     char_u          *p;
5951     static char_u   buf[50];
5952
5953     (void) strcpy(buf, ":");
5954
5955     switch (OP(op))
5956     {
5957       case BOL:
5958         p = "BOL";
5959         break;
5960       case EOL:
5961         p = "EOL";
5962         break;
5963       case RE_BOF:
5964         p = "BOF";
5965         break;
5966       case RE_EOF:
5967         p = "EOF";
5968         break;
5969       case CURSOR:
5970         p = "CURSOR";
5971         break;
5972       case RE_VISUAL:
5973         p = "RE_VISUAL";
5974         break;
5975       case RE_LNUM:
5976         p = "RE_LNUM";
5977         break;
5978       case RE_MARK:
5979         p = "RE_MARK";
5980         break;
5981       case RE_COL:
5982         p = "RE_COL";
5983         break;
5984       case RE_VCOL:
5985         p = "RE_VCOL";
5986         break;
5987       case BOW:
5988         p = "BOW";
5989         break;
5990       case EOW:
5991         p = "EOW";
5992         break;
5993       case ANY:
5994         p = "ANY";
5995         break;
5996       case ANY + ADD_NL:
5997         p = "ANY+NL";
5998         break;
5999       case ANYOF:
6000         p = "ANYOF";
6001         break;
6002       case ANYOF + ADD_NL:
6003         p = "ANYOF+NL";
6004         break;
6005       case ANYBUT:
6006         p = "ANYBUT";
6007         break;
6008       case ANYBUT + ADD_NL:
6009         p = "ANYBUT+NL";
6010         break;
6011       case IDENT:
6012         p = "IDENT";
6013         break;
6014       case IDENT + ADD_NL:
6015         p = "IDENT+NL";
6016         break;
6017       case SIDENT:
6018         p = "SIDENT";
6019         break;
6020       case SIDENT + ADD_NL:
6021         p = "SIDENT+NL";
6022         break;
6023       case KWORD:
6024         p = "KWORD";
6025         break;
6026       case KWORD + ADD_NL:
6027         p = "KWORD+NL";
6028         break;
6029       case SKWORD:
6030         p = "SKWORD";
6031         break;
6032       case SKWORD + ADD_NL:
6033         p = "SKWORD+NL";
6034         break;
6035       case FNAME:
6036         p = "FNAME";
6037         break;
6038       case FNAME + ADD_NL:
6039         p = "FNAME+NL";
6040         break;
6041       case SFNAME:
6042         p = "SFNAME";
6043         break;
6044       case SFNAME + ADD_NL:
6045         p = "SFNAME+NL";
6046         break;
6047       case PRINT:
6048         p = "PRINT";
6049         break;
6050       case PRINT + ADD_NL:
6051         p = "PRINT+NL";
6052         break;
6053       case SPRINT:
6054         p = "SPRINT";
6055         break;
6056       case SPRINT + ADD_NL:
6057         p = "SPRINT+NL";
6058         break;
6059       case WHITE:
6060         p = "WHITE";
6061         break;
6062       case WHITE + ADD_NL:
6063         p = "WHITE+NL";
6064         break;
6065       case NWHITE:
6066         p = "NWHITE";
6067         break;
6068       case NWHITE + ADD_NL:
6069         p = "NWHITE+NL";
6070         break;
6071       case DIGIT:
6072         p = "DIGIT";
6073         break;
6074       case DIGIT + ADD_NL:
6075         p = "DIGIT+NL";
6076         break;
6077       case NDIGIT:
6078         p = "NDIGIT";
6079         break;
6080       case NDIGIT + ADD_NL:
6081         p = "NDIGIT+NL";
6082         break;
6083       case HEX:
6084         p = "HEX";
6085         break;
6086       case HEX + ADD_NL:
6087         p = "HEX+NL";
6088         break;
6089       case NHEX:
6090         p = "NHEX";
6091         break;
6092       case NHEX + ADD_NL:
6093         p = "NHEX+NL";
6094         break;
6095       case OCTAL:
6096         p = "OCTAL";
6097         break;
6098       case OCTAL + ADD_NL:
6099         p = "OCTAL+NL";
6100         break;
6101       case NOCTAL:
6102         p = "NOCTAL";
6103         break;
6104       case NOCTAL + ADD_NL:
6105         p = "NOCTAL+NL";
6106         break;
6107       case WORD:
6108         p = "WORD";
6109         break;
6110       case WORD + ADD_NL:
6111         p = "WORD+NL";
6112         break;
6113       case NWORD:
6114         p = "NWORD";
6115         break;
6116       case NWORD + ADD_NL:
6117         p = "NWORD+NL";
6118         break;
6119       case HEAD:
6120         p = "HEAD";
6121         break;
6122       case HEAD + ADD_NL:
6123         p = "HEAD+NL";
6124         break;
6125       case NHEAD:
6126         p = "NHEAD";
6127         break;
6128       case NHEAD + ADD_NL:
6129         p = "NHEAD+NL";
6130         break;
6131       case ALPHA:
6132         p = "ALPHA";
6133         break;
6134       case ALPHA + ADD_NL:
6135         p = "ALPHA+NL";
6136         break;
6137       case NALPHA:
6138         p = "NALPHA";
6139         break;
6140       case NALPHA + ADD_NL:
6141         p = "NALPHA+NL";
6142         break;
6143       case LOWER:
6144         p = "LOWER";
6145         break;
6146       case LOWER + ADD_NL:
6147         p = "LOWER+NL";
6148         break;
6149       case NLOWER:
6150         p = "NLOWER";
6151         break;
6152       case NLOWER + ADD_NL:
6153         p = "NLOWER+NL";
6154         break;
6155       case UPPER:
6156         p = "UPPER";
6157         break;
6158       case UPPER + ADD_NL:
6159         p = "UPPER+NL";
6160         break;
6161       case NUPPER:
6162         p = "NUPPER";
6163         break;
6164       case NUPPER + ADD_NL:
6165         p = "NUPPER+NL";
6166         break;
6167       case BRANCH:
6168         p = "BRANCH";
6169         break;
6170       case EXACTLY:
6171         p = "EXACTLY";
6172         break;
6173       case NOTHING:
6174         p = "NOTHING";
6175         break;
6176       case BACK:
6177         p = "BACK";
6178         break;
6179       case END:
6180         p = "END";
6181         break;
6182       case MOPEN + 0:
6183         p = "MATCH START";
6184         break;
6185       case MOPEN + 1:
6186       case MOPEN + 2:
6187       case MOPEN + 3:
6188       case MOPEN + 4:
6189       case MOPEN + 5:
6190       case MOPEN + 6:
6191       case MOPEN + 7:
6192       case MOPEN + 8:
6193       case MOPEN + 9:
6194         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6195         p = NULL;
6196         break;
6197       case MCLOSE + 0:
6198         p = "MATCH END";
6199         break;
6200       case MCLOSE + 1:
6201       case MCLOSE + 2:
6202       case MCLOSE + 3:
6203       case MCLOSE + 4:
6204       case MCLOSE + 5:
6205       case MCLOSE + 6:
6206       case MCLOSE + 7:
6207       case MCLOSE + 8:
6208       case MCLOSE + 9:
6209         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6210         p = NULL;
6211         break;
6212       case BACKREF + 1:
6213       case BACKREF + 2:
6214       case BACKREF + 3:
6215       case BACKREF + 4:
6216       case BACKREF + 5:
6217       case BACKREF + 6:
6218       case BACKREF + 7:
6219       case BACKREF + 8:
6220       case BACKREF + 9:
6221         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6222         p = NULL;
6223         break;
6224       case NOPEN:
6225         p = "NOPEN";
6226         break;
6227       case NCLOSE:
6228         p = "NCLOSE";
6229         break;
6230 #ifdef FEAT_SYN_HL
6231       case ZOPEN + 1:
6232       case ZOPEN + 2:
6233       case ZOPEN + 3:
6234       case ZOPEN + 4:
6235       case ZOPEN + 5:
6236       case ZOPEN + 6:
6237       case ZOPEN + 7:
6238       case ZOPEN + 8:
6239       case ZOPEN + 9:
6240         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6241         p = NULL;
6242         break;
6243       case ZCLOSE + 1:
6244       case ZCLOSE + 2:
6245       case ZCLOSE + 3:
6246       case ZCLOSE + 4:
6247       case ZCLOSE + 5:
6248       case ZCLOSE + 6:
6249       case ZCLOSE + 7:
6250       case ZCLOSE + 8:
6251       case ZCLOSE + 9:
6252         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6253         p = NULL;
6254         break;
6255       case ZREF + 1:
6256       case ZREF + 2:
6257       case ZREF + 3:
6258       case ZREF + 4:
6259       case ZREF + 5:
6260       case ZREF + 6:
6261       case ZREF + 7:
6262       case ZREF + 8:
6263       case ZREF + 9:
6264         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6265         p = NULL;
6266         break;
6267 #endif
6268       case STAR:
6269         p = "STAR";
6270         break;
6271       case PLUS:
6272         p = "PLUS";
6273         break;
6274       case NOMATCH:
6275         p = "NOMATCH";
6276         break;
6277       case MATCH:
6278         p = "MATCH";
6279         break;
6280       case BEHIND:
6281         p = "BEHIND";
6282         break;
6283       case NOBEHIND:
6284         p = "NOBEHIND";
6285         break;
6286       case SUBPAT:
6287         p = "SUBPAT";
6288         break;
6289       case BRACE_LIMITS:
6290         p = "BRACE_LIMITS";
6291         break;
6292       case BRACE_SIMPLE:
6293         p = "BRACE_SIMPLE";
6294         break;
6295       case BRACE_COMPLEX + 0:
6296       case BRACE_COMPLEX + 1:
6297       case BRACE_COMPLEX + 2:
6298       case BRACE_COMPLEX + 3:
6299       case BRACE_COMPLEX + 4:
6300       case BRACE_COMPLEX + 5:
6301       case BRACE_COMPLEX + 6:
6302       case BRACE_COMPLEX + 7:
6303       case BRACE_COMPLEX + 8:
6304       case BRACE_COMPLEX + 9:
6305         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6306         p = NULL;
6307         break;
6308 #ifdef FEAT_MBYTE
6309       case MULTIBYTECODE:
6310         p = "MULTIBYTECODE";
6311         break;
6312 #endif
6313       case NEWL:
6314         p = "NEWL";
6315         break;
6316       default:
6317         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6318         p = NULL;
6319         break;
6320     }
6321     if (p != NULL)
6322         (void) strcat(buf, p);
6323     return buf;
6324 }
6325 #endif
6326
6327 #ifdef FEAT_MBYTE
6328 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6329
6330 typedef struct
6331 {
6332     int a, b, c;
6333 } decomp_T;
6334
6335
6336 /* 0xfb20 - 0xfb4f */
6337 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6338 {
6339     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6340     {0x5d0,0,0},                /* 0xfb21       alt alef */
6341     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6342     {0x5d4,0,0},                /* 0xfb23       alt he */
6343     {0x5db,0,0},                /* 0xfb24       alt kaf */
6344     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6345     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6346     {0x5e8,0,0},                /* 0xfb27       alt resh */
6347     {0x5ea,0,0},                /* 0xfb28       alt tav */
6348     {'+', 0, 0},                /* 0xfb29       alt plus */
6349     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6350     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6351     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6352     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6353     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6354     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6355     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6356     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6357     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6358     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6359     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6360     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6361     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6362     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6363     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6364     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6365     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6366     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6367     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6368     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6369     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6370     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6371     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6372     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6373     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6374     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6375     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6376     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6377     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6378     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6379     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6380     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6381     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6382     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6383     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6384     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6385     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6386     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6387 };
6388
6389     static void
6390 mb_decompose(c, c1, c2, c3)
6391     int c, *c1, *c2, *c3;
6392 {
6393     decomp_T d;
6394
6395     if (c >= 0x4b20 && c <= 0xfb4f)
6396     {
6397         d = decomp_table[c - 0xfb20];
6398         *c1 = d.a;
6399         *c2 = d.b;
6400         *c3 = d.c;
6401     }
6402     else
6403     {
6404         *c1 = c;
6405         *c2 = *c3 = 0;
6406     }
6407 }
6408 #endif
6409
6410 /*
6411  * Compare two strings, ignore case if ireg_ic set.
6412  * Return 0 if strings match, non-zero otherwise.
6413  * Correct the length "*n" when composing characters are ignored.
6414  */
6415     static int
6416 cstrncmp(s1, s2, n)
6417     char_u      *s1, *s2;
6418     int         *n;
6419 {
6420     int         result;
6421
6422     if (!ireg_ic)
6423         result = STRNCMP(s1, s2, *n);
6424     else
6425         result = MB_STRNICMP(s1, s2, *n);
6426
6427 #ifdef FEAT_MBYTE
6428     /* if it failed and it's utf8 and we want to combineignore: */
6429     if (result != 0 && enc_utf8 && ireg_icombine)
6430     {
6431         char_u  *str1, *str2;
6432         int     c1, c2, c11, c12;
6433         int     junk;
6434
6435         /* we have to handle the strcmp ourselves, since it is necessary to
6436          * deal with the composing characters by ignoring them: */
6437         str1 = s1;
6438         str2 = s2;
6439         c1 = c2 = 0;
6440         while ((int)(str1 - s1) < *n)
6441         {
6442             c1 = mb_ptr2char_adv(&str1);
6443             c2 = mb_ptr2char_adv(&str2);
6444
6445             /* decompose the character if necessary, into 'base' characters
6446              * because I don't care about Arabic, I will hard-code the Hebrew
6447              * which I *do* care about!  So sue me... */
6448             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6449             {
6450                 /* decomposition necessary? */
6451                 mb_decompose(c1, &c11, &junk, &junk);
6452                 mb_decompose(c2, &c12, &junk, &junk);
6453                 c1 = c11;
6454                 c2 = c12;
6455                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6456                     break;
6457             }
6458         }
6459         result = c2 - c1;
6460         if (result == 0)
6461             *n = (int)(str2 - s2);
6462     }
6463 #endif
6464
6465     return result;
6466 }
6467
6468 /*
6469  * cstrchr: This function is used a lot for simple searches, keep it fast!
6470  */
6471     static char_u *
6472 cstrchr(s, c)
6473     char_u      *s;
6474     int         c;
6475 {
6476     char_u      *p;
6477     int         cc;
6478
6479     if (!ireg_ic
6480 #ifdef FEAT_MBYTE
6481             || (!enc_utf8 && mb_char2len(c) > 1)
6482 #endif
6483             )
6484         return vim_strchr(s, c);
6485
6486     /* tolower() and toupper() can be slow, comparing twice should be a lot
6487      * faster (esp. when using MS Visual C++!).
6488      * For UTF-8 need to use folded case. */
6489 #ifdef FEAT_MBYTE
6490     if (enc_utf8 && c > 0x80)
6491         cc = utf_fold(c);
6492     else
6493 #endif
6494          if (MB_ISUPPER(c))
6495         cc = MB_TOLOWER(c);
6496     else if (MB_ISLOWER(c))
6497         cc = MB_TOUPPER(c);
6498     else
6499         return vim_strchr(s, c);
6500
6501 #ifdef FEAT_MBYTE
6502     if (has_mbyte)
6503     {
6504         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6505         {
6506             if (enc_utf8 && c > 0x80)
6507             {
6508                 if (utf_fold(utf_ptr2char(p)) == cc)
6509                     return p;
6510             }
6511             else if (*p == c || *p == cc)
6512                 return p;
6513         }
6514     }
6515     else
6516 #endif
6517         /* Faster version for when there are no multi-byte characters. */
6518         for (p = s; *p != NUL; ++p)
6519             if (*p == c || *p == cc)
6520                 return p;
6521
6522     return NULL;
6523 }
6524
6525 /***************************************************************
6526  *                    regsub stuff                             *
6527  ***************************************************************/
6528
6529 /* This stuff below really confuses cc on an SGI -- webb */
6530 #ifdef __sgi
6531 # undef __ARGS
6532 # define __ARGS(x)  ()
6533 #endif
6534
6535 /*
6536  * We should define ftpr as a pointer to a function returning a pointer to
6537  * a function returning a pointer to a function ...
6538  * This is impossible, so we declare a pointer to a function returning a
6539  * pointer to a function returning void. This should work for all compilers.
6540  */
6541 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6542
6543 static fptr_T do_upper __ARGS((int *, int));
6544 static fptr_T do_Upper __ARGS((int *, int));
6545 static fptr_T do_lower __ARGS((int *, int));
6546 static fptr_T do_Lower __ARGS((int *, int));
6547
6548 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6549
6550     static fptr_T
6551 do_upper(d, c)
6552     int         *d;
6553     int         c;
6554 {
6555     *d = MB_TOUPPER(c);
6556
6557     return (fptr_T)NULL;
6558 }
6559
6560     static fptr_T
6561 do_Upper(d, c)
6562     int         *d;
6563     int         c;
6564 {
6565     *d = MB_TOUPPER(c);
6566
6567     return (fptr_T)do_Upper;
6568 }
6569
6570     static fptr_T
6571 do_lower(d, c)
6572     int         *d;
6573     int         c;
6574 {
6575     *d = MB_TOLOWER(c);
6576
6577     return (fptr_T)NULL;
6578 }
6579
6580     static fptr_T
6581 do_Lower(d, c)
6582     int         *d;
6583     int         c;
6584 {
6585     *d = MB_TOLOWER(c);
6586
6587     return (fptr_T)do_Lower;
6588 }
6589
6590 /*
6591  * regtilde(): Replace tildes in the pattern by the old pattern.
6592  *
6593  * Short explanation of the tilde: It stands for the previous replacement
6594  * pattern.  If that previous pattern also contains a ~ we should go back a
6595  * step further...  But we insert the previous pattern into the current one
6596  * and remember that.
6597  * This still does not handle the case where "magic" changes.  So require the
6598  * user to keep his hands off of "magic".
6599  *
6600  * The tildes are parsed once before the first call to vim_regsub().
6601  */
6602     char_u *
6603 regtilde(source, magic)
6604     char_u      *source;
6605     int         magic;
6606 {
6607     char_u      *newsub = source;
6608     char_u      *tmpsub;
6609     char_u      *p;
6610     int         len;
6611     int         prevlen;
6612
6613     for (p = newsub; *p; ++p)
6614     {
6615         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6616         {
6617             if (reg_prev_sub != NULL)
6618             {
6619                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6620                 prevlen = (int)STRLEN(reg_prev_sub);
6621                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6622                 if (tmpsub != NULL)
6623                 {
6624                     /* copy prefix */
6625                     len = (int)(p - newsub);    /* not including ~ */
6626                     mch_memmove(tmpsub, newsub, (size_t)len);
6627                     /* interpret tilde */
6628                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6629                     /* copy postfix */
6630                     if (!magic)
6631                         ++p;                    /* back off \ */
6632                     STRCPY(tmpsub + len + prevlen, p + 1);
6633
6634                     if (newsub != source)       /* already allocated newsub */
6635                         vim_free(newsub);
6636                     newsub = tmpsub;
6637                     p = newsub + len + prevlen;
6638                 }
6639             }
6640             else if (magic)
6641                 mch_memmove(p, p + 1, STRLEN(p));       /* remove '~' */
6642             else
6643                 mch_memmove(p, p + 2, STRLEN(p) - 1);   /* remove '\~' */
6644             --p;
6645         }
6646         else
6647         {
6648             if (*p == '\\' && p[1])             /* skip escaped characters */
6649                 ++p;
6650 #ifdef FEAT_MBYTE
6651             if (has_mbyte)
6652                 p += (*mb_ptr2len)(p) - 1;
6653 #endif
6654         }
6655     }
6656
6657     vim_free(reg_prev_sub);
6658     if (newsub != source)       /* newsub was allocated, just keep it */
6659         reg_prev_sub = newsub;
6660     else                        /* no ~ found, need to save newsub  */
6661         reg_prev_sub = vim_strsave(newsub);
6662     return newsub;
6663 }
6664
6665 #ifdef FEAT_EVAL
6666 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6667
6668 /* These pointers are used instead of reg_match and reg_mmatch for
6669  * reg_submatch().  Needed for when the substitution string is an expression
6670  * that contains a call to substitute() and submatch(). */
6671 static regmatch_T       *submatch_match;
6672 static regmmatch_T      *submatch_mmatch;
6673 #endif
6674
6675 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6676 /*
6677  * vim_regsub() - perform substitutions after a vim_regexec() or
6678  * vim_regexec_multi() match.
6679  *
6680  * If "copy" is TRUE really copy into "dest".
6681  * If "copy" is FALSE nothing is copied, this is just to find out the length
6682  * of the result.
6683  *
6684  * If "backslash" is TRUE, a backslash will be removed later, need to double
6685  * them to keep them, and insert a backslash before a CR to avoid it being
6686  * replaced with a line break later.
6687  *
6688  * Note: The matched text must not change between the call of
6689  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6690  * references invalid!
6691  *
6692  * Returns the size of the replacement, including terminating NUL.
6693  */
6694     int
6695 vim_regsub(rmp, source, dest, copy, magic, backslash)
6696     regmatch_T  *rmp;
6697     char_u      *source;
6698     char_u      *dest;
6699     int         copy;
6700     int         magic;
6701     int         backslash;
6702 {
6703     reg_match = rmp;
6704     reg_mmatch = NULL;
6705     reg_maxline = 0;
6706     return vim_regsub_both(source, dest, copy, magic, backslash);
6707 }
6708 #endif
6709
6710     int
6711 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6712     regmmatch_T *rmp;
6713     linenr_T    lnum;
6714     char_u      *source;
6715     char_u      *dest;
6716     int         copy;
6717     int         magic;
6718     int         backslash;
6719 {
6720     reg_match = NULL;
6721     reg_mmatch = rmp;
6722     reg_buf = curbuf;           /* always works on the current buffer! */
6723     reg_firstlnum = lnum;
6724     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6725     return vim_regsub_both(source, dest, copy, magic, backslash);
6726 }
6727
6728     static int
6729 vim_regsub_both(source, dest, copy, magic, backslash)
6730     char_u      *source;
6731     char_u      *dest;
6732     int         copy;
6733     int         magic;
6734     int         backslash;
6735 {
6736     char_u      *src;
6737     char_u      *dst;
6738     char_u      *s;
6739     int         c;
6740     int         cc;
6741     int         no = -1;
6742     fptr_T      func = (fptr_T)NULL;
6743     linenr_T    clnum = 0;      /* init for GCC */
6744     int         len = 0;        /* init for GCC */
6745 #ifdef FEAT_EVAL
6746     static char_u *eval_result = NULL;
6747 #endif
6748
6749     /* Be paranoid... */
6750     if (source == NULL || dest == NULL)
6751     {
6752         EMSG(_(e_null));
6753         return 0;
6754     }
6755     if (prog_magic_wrong())
6756         return 0;
6757     src = source;
6758     dst = dest;
6759
6760     /*
6761      * When the substitute part starts with "\=" evaluate it as an expression.
6762      */
6763     if (source[0] == '\\' && source[1] == '='
6764 #ifdef FEAT_EVAL
6765             && !can_f_submatch      /* can't do this recursively */
6766 #endif
6767             )
6768     {
6769 #ifdef FEAT_EVAL
6770         /* To make sure that the length doesn't change between checking the
6771          * length and copying the string, and to speed up things, the
6772          * resulting string is saved from the call with "copy" == FALSE to the
6773          * call with "copy" == TRUE. */
6774         if (copy)
6775         {
6776             if (eval_result != NULL)
6777             {
6778                 STRCPY(dest, eval_result);
6779                 dst += STRLEN(eval_result);
6780                 vim_free(eval_result);
6781                 eval_result = NULL;
6782             }
6783         }
6784         else
6785         {
6786             linenr_T    save_reg_maxline;
6787             win_T       *save_reg_win;
6788             int         save_ireg_ic;
6789
6790             vim_free(eval_result);
6791
6792             /* The expression may contain substitute(), which calls us
6793              * recursively.  Make sure submatch() gets the text from the first
6794              * level.  Don't need to save "reg_buf", because
6795              * vim_regexec_multi() can't be called recursively. */
6796             submatch_match = reg_match;
6797             submatch_mmatch = reg_mmatch;
6798             save_reg_maxline = reg_maxline;
6799             save_reg_win = reg_win;
6800             save_ireg_ic = ireg_ic;
6801             can_f_submatch = TRUE;
6802
6803             eval_result = eval_to_string(source + 2, NULL, TRUE);
6804             if (eval_result != NULL)
6805             {
6806                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6807                 {
6808                     /* Change NL to CR, so that it becomes a line break.
6809                      * Skip over a backslashed character. */
6810                     if (*s == NL)
6811                         *s = CAR;
6812                     else if (*s == '\\' && s[1] != NUL)
6813                         ++s;
6814                 }
6815
6816                 dst += STRLEN(eval_result);
6817             }
6818
6819             reg_match = submatch_match;
6820             reg_mmatch = submatch_mmatch;
6821             reg_maxline = save_reg_maxline;
6822             reg_win = save_reg_win;
6823             ireg_ic = save_ireg_ic;
6824             can_f_submatch = FALSE;
6825         }
6826 #endif
6827     }
6828     else
6829       while ((c = *src++) != NUL)
6830       {
6831         if (c == '&' && magic)
6832             no = 0;
6833         else if (c == '\\' && *src != NUL)
6834         {
6835             if (*src == '&' && !magic)
6836             {
6837                 ++src;
6838                 no = 0;
6839             }
6840             else if ('0' <= *src && *src <= '9')
6841             {
6842                 no = *src++ - '0';
6843             }
6844             else if (vim_strchr((char_u *)"uUlLeE", *src))
6845             {
6846                 switch (*src++)
6847                 {
6848                 case 'u':   func = (fptr_T)do_upper;
6849                             continue;
6850                 case 'U':   func = (fptr_T)do_Upper;
6851                             continue;
6852                 case 'l':   func = (fptr_T)do_lower;
6853                             continue;
6854                 case 'L':   func = (fptr_T)do_Lower;
6855                             continue;
6856                 case 'e':
6857                 case 'E':   func = (fptr_T)NULL;
6858                             continue;
6859                 }
6860             }
6861         }
6862         if (no < 0)           /* Ordinary character. */
6863         {
6864             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
6865             {
6866                 /* Copy a special key as-is. */
6867                 if (copy)
6868                 {
6869                     *dst++ = c;
6870                     *dst++ = *src++;
6871                     *dst++ = *src++;
6872                 }
6873                 else
6874                 {
6875                     dst += 3;
6876                     src += 2;
6877                 }
6878                 continue;
6879             }
6880
6881             if (c == '\\' && *src != NUL)
6882             {
6883                 /* Check for abbreviations -- webb */
6884                 switch (*src)
6885                 {
6886                     case 'r':   c = CAR;        ++src;  break;
6887                     case 'n':   c = NL;         ++src;  break;
6888                     case 't':   c = TAB;        ++src;  break;
6889                  /* Oh no!  \e already has meaning in subst pat :-( */
6890                  /* case 'e':   c = ESC;        ++src;  break; */
6891                     case 'b':   c = Ctrl_H;     ++src;  break;
6892
6893                     /* If "backslash" is TRUE the backslash will be removed
6894                      * later.  Used to insert a literal CR. */
6895                     default:    if (backslash)
6896                                 {
6897                                     if (copy)
6898                                         *dst = '\\';
6899                                     ++dst;
6900                                 }
6901                                 c = *src++;
6902                 }
6903             }
6904 #ifdef FEAT_MBYTE
6905             else if (has_mbyte)
6906                 c = mb_ptr2char(src - 1);
6907 #endif
6908
6909             /* Write to buffer, if copy is set. */
6910             if (func == (fptr_T)NULL)   /* just copy */
6911                 cc = c;
6912             else
6913                 /* Turbo C complains without the typecast */
6914                 func = (fptr_T)(func(&cc, c));
6915
6916 #ifdef FEAT_MBYTE
6917             if (has_mbyte)
6918             {
6919                 src += mb_ptr2len(src - 1) - 1;
6920                 if (copy)
6921                     mb_char2bytes(cc, dst);
6922                 dst += mb_char2len(cc) - 1;
6923             }
6924             else
6925 #endif
6926                 if (copy)
6927                     *dst = cc;
6928             dst++;
6929         }
6930         else
6931         {
6932             if (REG_MULTI)
6933             {
6934                 clnum = reg_mmatch->startpos[no].lnum;
6935                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6936                     s = NULL;
6937                 else
6938                 {
6939                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6940                     if (reg_mmatch->endpos[no].lnum == clnum)
6941                         len = reg_mmatch->endpos[no].col
6942                                                - reg_mmatch->startpos[no].col;
6943                     else
6944                         len = (int)STRLEN(s);
6945                 }
6946             }
6947             else
6948             {
6949                 s = reg_match->startp[no];
6950                 if (reg_match->endp[no] == NULL)
6951                     s = NULL;
6952                 else
6953                     len = (int)(reg_match->endp[no] - s);
6954             }
6955             if (s != NULL)
6956             {
6957                 for (;;)
6958                 {
6959                     if (len == 0)
6960                     {
6961                         if (REG_MULTI)
6962                         {
6963                             if (reg_mmatch->endpos[no].lnum == clnum)
6964                                 break;
6965                             if (copy)
6966                                 *dst = CAR;
6967                             ++dst;
6968                             s = reg_getline(++clnum);
6969                             if (reg_mmatch->endpos[no].lnum == clnum)
6970                                 len = reg_mmatch->endpos[no].col;
6971                             else
6972                                 len = (int)STRLEN(s);
6973                         }
6974                         else
6975                             break;
6976                     }
6977                     else if (*s == NUL) /* we hit NUL. */
6978                     {
6979                         if (copy)
6980                             EMSG(_(e_re_damg));
6981                         goto exit;
6982                     }
6983                     else
6984                     {
6985                         if (backslash && (*s == CAR || *s == '\\'))
6986                         {
6987                             /*
6988                              * Insert a backslash in front of a CR, otherwise
6989                              * it will be replaced by a line break.
6990                              * Number of backslashes will be halved later,
6991                              * double them here.
6992                              */
6993                             if (copy)
6994                             {
6995                                 dst[0] = '\\';
6996                                 dst[1] = *s;
6997                             }
6998                             dst += 2;
6999                         }
7000                         else
7001                         {
7002 #ifdef FEAT_MBYTE
7003                             if (has_mbyte)
7004                                 c = mb_ptr2char(s);
7005                             else
7006 #endif
7007                                 c = *s;
7008
7009                             if (func == (fptr_T)NULL)   /* just copy */
7010                                 cc = c;
7011                             else
7012                                 /* Turbo C complains without the typecast */
7013                                 func = (fptr_T)(func(&cc, c));
7014
7015 #ifdef FEAT_MBYTE
7016                             if (has_mbyte)
7017                             {
7018                                 int l;
7019
7020                                 /* Copy composing characters separately, one
7021                                  * at a time. */
7022                                 if (enc_utf8)
7023                                     l = utf_ptr2len(s) - 1;
7024                                 else
7025                                     l = mb_ptr2len(s) - 1;
7026
7027                                 s += l;
7028                                 len -= l;
7029                                 if (copy)
7030                                     mb_char2bytes(cc, dst);
7031                                 dst += mb_char2len(cc) - 1;
7032                             }
7033                             else
7034 #endif
7035                                 if (copy)
7036                                     *dst = cc;
7037                             dst++;
7038                         }
7039
7040                         ++s;
7041                         --len;
7042                     }
7043                 }
7044             }
7045             no = -1;
7046         }
7047       }
7048     if (copy)
7049         *dst = NUL;
7050
7051 exit:
7052     return (int)((dst - dest) + 1);
7053 }
7054
7055 #ifdef FEAT_EVAL
7056 /*
7057  * Used for the submatch() function: get the string from the n'th submatch in
7058  * allocated memory.
7059  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7060  */
7061     char_u *
7062 reg_submatch(no)
7063     int         no;
7064 {
7065     char_u      *retval = NULL;
7066     char_u      *s;
7067     int         len;
7068     int         round;
7069     linenr_T    lnum;
7070
7071     if (!can_f_submatch || no < 0)
7072         return NULL;
7073
7074     if (submatch_match == NULL)
7075     {
7076         /*
7077          * First round: compute the length and allocate memory.
7078          * Second round: copy the text.
7079          */
7080         for (round = 1; round <= 2; ++round)
7081         {
7082             lnum = submatch_mmatch->startpos[no].lnum;
7083             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7084                 return NULL;
7085
7086             s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
7087             if (s == NULL)  /* anti-crash check, cannot happen? */
7088                 break;
7089             if (submatch_mmatch->endpos[no].lnum == lnum)
7090             {
7091                 /* Within one line: take form start to end col. */
7092                 len = submatch_mmatch->endpos[no].col
7093                                           - submatch_mmatch->startpos[no].col;
7094                 if (round == 2)
7095                     vim_strncpy(retval, s, len);
7096                 ++len;
7097             }
7098             else
7099             {
7100                 /* Multiple lines: take start line from start col, middle
7101                  * lines completely and end line up to end col. */
7102                 len = (int)STRLEN(s);
7103                 if (round == 2)
7104                 {
7105                     STRCPY(retval, s);
7106                     retval[len] = '\n';
7107                 }
7108                 ++len;
7109                 ++lnum;
7110                 while (lnum < submatch_mmatch->endpos[no].lnum)
7111                 {
7112                     s = reg_getline(lnum++);
7113                     if (round == 2)
7114                         STRCPY(retval + len, s);
7115                     len += (int)STRLEN(s);
7116                     if (round == 2)
7117                         retval[len] = '\n';
7118                     ++len;
7119                 }
7120                 if (round == 2)
7121                     STRNCPY(retval + len, reg_getline(lnum),
7122                                              submatch_mmatch->endpos[no].col);
7123                 len += submatch_mmatch->endpos[no].col;
7124                 if (round == 2)
7125                     retval[len] = NUL;
7126                 ++len;
7127             }
7128
7129             if (retval == NULL)
7130             {
7131                 retval = lalloc((long_u)len, TRUE);
7132                 if (retval == NULL)
7133                     return NULL;
7134             }
7135         }
7136     }
7137     else
7138     {
7139         if (submatch_match->endp[no] == NULL)
7140             retval = NULL;
7141         else
7142         {
7143             s = submatch_match->startp[no];
7144             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7145         }
7146     }
7147
7148     return retval;
7149 }
7150 #endif