src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 #if defined(EXITFREE) || defined(PROTO)
 382     void
 383 free_regexp_stuff()
 384 {
 385     vim_free(reg_prev_sub);
 386 }
 387 #endif
 388
 389 /*
 390  * REGEXP_INRANGE contains all characters which are always special in a []
 391  * range after '\'.
 392  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 393  * These are:
 394  *  \n  - New line (NL).
 395  *  \r  - Carriage Return (CR).
 396  *  \t  - Tab (TAB).
 397  *  \e  - Escape (ESC).
 398  *  \b  - Backspace (Ctrl_H).
 399  *  \d  - Character code in decimal, eg \d123
 400  *  \o  - Character code in octal, eg \o80
 401  *  \x  - Character code in hex, eg \x4a
 402  *  \u  - Multibyte character code, eg \u20ac
 403  *  \U  - Long multibyte character code, eg \U12345678
 404  */
 405 static char_u REGEXP_INRANGE[] = "]^-n\\";
 406 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 407
 408 static int      backslash_trans __ARGS((int c));
 409 static int      get_char_class __ARGS((char_u **pp));
 410 static int      get_equi_class __ARGS((char_u **pp));
 411 static void     reg_equi_class __ARGS((int c));
 412 static int      get_coll_element __ARGS((char_u **pp));
 413 static char_u   *skip_anyof __ARGS((char_u *p));
 414 static void     init_class_tab __ARGS((void));
 415
 416 /*
 417  * Translate '\x' to its control character, except "\n", which is Magic.
 418  */
 419     static int
 420 backslash_trans(c)
 421     int         c;
 422 {
 423     switch (c)
 424     {
 425         case 'r':   return CAR;
 426         case 't':   return TAB;
 427         case 'e':   return ESC;
 428         case 'b':   return BS;
 429     }
 430     return c;
 431 }
 432
 433 /*
 434  * Check for a character class name "[:name:]".  "pp" points to the '['.
 435  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 436  * recognized.  Otherwise "pp" is advanced to after the item.
 437  */
 438     static int
 439 get_char_class(pp)
 440     char_u      **pp;
 441 {
 442     static const char *(class_names[]) =
 443     {
 444         "alnum:]",
 445 #define CLASS_ALNUM 0
 446         "alpha:]",
 447 #define CLASS_ALPHA 1
 448         "blank:]",
 449 #define CLASS_BLANK 2
 450         "cntrl:]",
 451 #define CLASS_CNTRL 3
 452         "digit:]",
 453 #define CLASS_DIGIT 4
 454         "graph:]",
 455 #define CLASS_GRAPH 5
 456         "lower:]",
 457 #define CLASS_LOWER 6
 458         "print:]",
 459 #define CLASS_PRINT 7
 460         "punct:]",
 461 #define CLASS_PUNCT 8
 462         "space:]",
 463 #define CLASS_SPACE 9
 464         "upper:]",
 465 #define CLASS_UPPER 10
 466         "xdigit:]",
 467 #define CLASS_XDIGIT 11
 468         "tab:]",
 469 #define CLASS_TAB 12
 470         "return:]",
 471 #define CLASS_RETURN 13
 472         "backspace:]",
 473 #define CLASS_BACKSPACE 14
 474         "escape:]",
 475 #define CLASS_ESCAPE 15
 476     };
 477 #define CLASS_NONE 99
 478     int i;
 479
 480     if ((*pp)[1] == ':')
 481     {
 482         for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
 483             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 484             {
 485                 *pp += STRLEN(class_names[i]) + 2;
 486                 return i;
 487             }
 488     }
 489     return CLASS_NONE;
 490 }
 491
 492 /*
 493  * Specific version of character class functions.
 494  * Using a table to keep this fast.
 495  */
 496 static short    class_tab[256];
 497
 498 #define     RI_DIGIT    0x01
 499 #define     RI_HEX      0x02
 500 #define     RI_OCTAL    0x04
 501 #define     RI_WORD     0x08
 502 #define     RI_HEAD     0x10
 503 #define     RI_ALPHA    0x20
 504 #define     RI_LOWER    0x40
 505 #define     RI_UPPER    0x80
 506 #define     RI_WHITE    0x100
 507
 508     static void
 509 init_class_tab()
 510 {
 511     int         i;
 512     static int  done = FALSE;
 513
 514     if (done)
 515         return;
 516
 517     for (i = 0; i < 256; ++i)
 518     {
 519         if (i >= '0' && i <= '7')
 520             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 521         else if (i >= '8' && i <= '9')
 522             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 523         else if (i >= 'a' && i <= 'f')
 524             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 525 #ifdef EBCDIC
 526         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 527                                                     || (i >= 's' && i <= 'z'))
 528 #else
 529         else if (i >= 'g' && i <= 'z')
 530 #endif
 531             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 532         else if (i >= 'A' && i <= 'F')
 533             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 534 #ifdef EBCDIC
 535         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 536                                                     || (i >= 'S' && i <= 'Z'))
 537 #else
 538         else if (i >= 'G' && i <= 'Z')
 539 #endif
 540             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 541         else if (i == '_')
 542             class_tab[i] = RI_WORD + RI_HEAD;
 543         else
 544             class_tab[i] = 0;
 545     }
 546     class_tab[' '] |= RI_WHITE;
 547     class_tab['\t'] |= RI_WHITE;
 548     done = TRUE;
 549 }
 550
 551 #ifdef FEAT_MBYTE
 552 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 553 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 554 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 555 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 556 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 557 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 558 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 559 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 560 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 561 #else
 562 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 563 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 564 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 565 # define ri_word(c)     (class_tab[c] & RI_WORD)
 566 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 567 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 568 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 569 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 570 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 571 #endif
 572
 573 /* flags for regflags */
 574 #define RF_ICASE    1   /* ignore case */
 575 #define RF_NOICASE  2   /* don't ignore case */
 576 #define RF_HASNL    4   /* can match a NL */
 577 #define RF_ICOMBINE 8   /* ignore combining characters */
 578 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 579
 580 /*
 581  * Global work variables for vim_regcomp().
 582  */
 583
 584 static char_u   *regparse;      /* Input-scan pointer. */
 585 static int      prevchr_len;    /* byte length of previous char */
 586 static int      num_complex_braces; /* Complex \{...} count */
 587 static int      regnpar;        /* () count. */
 588 #ifdef FEAT_SYN_HL
 589 static int      regnzpar;       /* \z() count. */
 590 static int      re_has_z;       /* \z item detected */
 591 #endif
 592 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 593 static long     regsize;        /* Code size. */
 594 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 595 static unsigned regflags;       /* RF_ flags for prog */
 596 static long     brace_min[10];  /* Minimums for complex brace repeats */
 597 static long     brace_max[10];  /* Maximums for complex brace repeats */
 598 static int      brace_count[10]; /* Current counts for complex brace repeats */
 599 #if defined(FEAT_SYN_HL) || defined(PROTO)
 600 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 601 #endif
 602 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 603
 604 static int      reg_magic;      /* magicness of the pattern: */
 605 #define MAGIC_NONE      1       /* "\V" very unmagic */
 606 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 607 #define MAGIC_ON        3       /* "\m" or 'magic' */
 608 #define MAGIC_ALL       4       /* "\v" very magic */
 609
 610 static int      reg_string;     /* matching with a string instead of a buffer
 611                                    line */
 612 static int      reg_strict;     /* "[abc" is illegal */
 613
 614 /*
 615  * META contains all characters that may be magic, except '^' and '$'.
 616  */
 617
 618 #ifdef EBCDIC
 619 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 620 #else
 621 /* META[] is used often enough to justify turning it into a table. */
 622 static char_u META_flags[] = {
 623     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 624     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 625 /*                 %  &     (  )  *  +        .    */
 626     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 627 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 628     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 629 /*  @  A     C  D     F     H  I     K  L  M     O */
 630     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 631 /*  P        S     U  V  W  X     Z  [           _ */
 632     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 633 /*     a     c  d     f     h  i     k  l  m  n  o */
 634     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 635 /*  p        s     u  v  w  x     z  {  |     ~    */
 636     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 637 };
 638 #endif
 639
 640 static int      curchr;
 641
 642 /* arguments for reg() */
 643 #define REG_NOPAREN     0       /* toplevel reg() */
 644 #define REG_PAREN       1       /* \(\) */
 645 #define REG_ZPAREN      2       /* \z(\) */
 646 #define REG_NPAREN      3       /* \%(\) */
 647
 648 /*
 649  * Forward declarations for vim_regcomp()'s friends.
 650  */
 651 static void     initchr __ARGS((char_u *));
 652 static int      getchr __ARGS((void));
 653 static void     skipchr_keepstart __ARGS((void));
 654 static int      peekchr __ARGS((void));
 655 static void     skipchr __ARGS((void));
 656 static void     ungetchr __ARGS((void));
 657 static int      gethexchrs __ARGS((int maxinputlen));
 658 static int      getoctchrs __ARGS((void));
 659 static int      getdecchrs __ARGS((void));
 660 static int      coll_get_char __ARGS((void));
 661 static void     regcomp_start __ARGS((char_u *expr, int flags));
 662 static char_u   *reg __ARGS((int, int *));
 663 static char_u   *regbranch __ARGS((int *flagp));
 664 static char_u   *regconcat __ARGS((int *flagp));
 665 static char_u   *regpiece __ARGS((int *));
 666 static char_u   *regatom __ARGS((int *));
 667 static char_u   *regnode __ARGS((int));
 668 #ifdef FEAT_MBYTE
 669 static int      use_multibytecode __ARGS((int c));
 670 #endif
 671 static int      prog_magic_wrong __ARGS((void));
 672 static char_u   *regnext __ARGS((char_u *));
 673 static void     regc __ARGS((int b));
 674 #ifdef FEAT_MBYTE
 675 static void     regmbc __ARGS((int c));
 676 #else
 677 # define regmbc(c) regc(c)
 678 #endif
 679 static void     reginsert __ARGS((int, char_u *));
 680 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 681 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 682 static int      read_limits __ARGS((long *, long *));
 683 static void     regtail __ARGS((char_u *, char_u *));
 684 static void     regoptail __ARGS((char_u *, char_u *));
 685
 686 /*
 687  * Return TRUE if compiled regular expression "prog" can match a line break.
 688  */
 689     int
 690 re_multiline(prog)
 691     regprog_T *prog;
 692 {
 693     return (prog->regflags & RF_HASNL);
 694 }
 695
 696 /*
 697  * Return TRUE if compiled regular expression "prog" looks before the start
 698  * position (pattern contains "\@<=" or "\@<!").
 699  */
 700     int
 701 re_lookbehind(prog)
 702     regprog_T *prog;
 703 {
 704     return (prog->regflags & RF_LOOKBH);
 705 }
 706
 707 /*
 708  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 709  * Returns a character representing the class. Zero means that no item was
 710  * recognized.  Otherwise "pp" is advanced to after the item.
 711  */
 712     static int
 713 get_equi_class(pp)
 714     char_u      **pp;
 715 {
 716     int         c;
 717     int         l = 1;
 718     char_u      *p = *pp;
 719
 720     if (p[1] == '=')
 721     {
 722 #ifdef FEAT_MBYTE
 723         if (has_mbyte)
 724             l = (*mb_ptr2len)(p + 2);
 725 #endif
 726         if (p[l + 2] == '=' && p[l + 3] == ']')
 727         {
 728 #ifdef FEAT_MBYTE
 729             if (has_mbyte)
 730                 c = mb_ptr2char(p + 2);
 731             else
 732 #endif
 733                 c = p[2];
 734             *pp += l + 4;
 735             return c;
 736         }
 737     }
 738     return 0;
 739 }
 740
 741 /*
 742  * Produce the bytes for equivalence class "c".
 743  * Currently only handles latin1, latin9 and utf-8.
 744  */
 745     static void
 746 reg_equi_class(c)
 747     int     c;
 748 {
 749 #ifdef FEAT_MBYTE
 750     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 751                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 752 #endif
 753     {
 754         switch (c)
 755         {
 756             case 'A': case '\300': case '\301': case '\302':
 757             case '\303': case '\304': case '\305':
 758                       regmbc('A'); regmbc('\300'); regmbc('\301');
 759                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 760                       regmbc('\305');
 761                       return;
 762             case 'C': case '\307':
 763                       regmbc('C'); regmbc('\307');
 764                       return;
 765             case 'E': case '\310': case '\311': case '\312': case '\313':
 766                       regmbc('E'); regmbc('\310'); regmbc('\311');
 767                       regmbc('\312'); regmbc('\313');
 768                       return;
 769             case 'I': case '\314': case '\315': case '\316': case '\317':
 770                       regmbc('I'); regmbc('\314'); regmbc('\315');
 771                       regmbc('\316'); regmbc('\317');
 772                       return;
 773             case 'N': case '\321':
 774                       regmbc('N'); regmbc('\321');
 775                       return;
 776             case 'O': case '\322': case '\323': case '\324': case '\325':
 777             case '\326':
 778                       regmbc('O'); regmbc('\322'); regmbc('\323');
 779                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 780                       return;
 781             case 'U': case '\331': case '\332': case '\333': case '\334':
 782                       regmbc('U'); regmbc('\331'); regmbc('\332');
 783                       regmbc('\333'); regmbc('\334');
 784                       return;
 785             case 'Y': case '\335':
 786                       regmbc('Y'); regmbc('\335');
 787                       return;
 788             case 'a': case '\340': case '\341': case '\342':
 789             case '\343': case '\344': case '\345':
 790                       regmbc('a'); regmbc('\340'); regmbc('\341');
 791                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 792                       regmbc('\345');
 793                       return;
 794             case 'c': case '\347':
 795                       regmbc('c'); regmbc('\347');
 796                       return;
 797             case 'e': case '\350': case '\351': case '\352': case '\353':
 798                       regmbc('e'); regmbc('\350'); regmbc('\351');
 799                       regmbc('\352'); regmbc('\353');
 800                       return;
 801             case 'i': case '\354': case '\355': case '\356': case '\357':
 802                       regmbc('i'); regmbc('\354'); regmbc('\355');
 803                       regmbc('\356'); regmbc('\357');
 804                       return;
 805             case 'n': case '\361':
 806                       regmbc('n'); regmbc('\361');
 807                       return;
 808             case 'o': case '\362': case '\363': case '\364': case '\365':
 809             case '\366':
 810                       regmbc('o'); regmbc('\362'); regmbc('\363');
 811                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 812                       return;
 813             case 'u': case '\371': case '\372': case '\373': case '\374':
 814                       regmbc('u'); regmbc('\371'); regmbc('\372');
 815                       regmbc('\373'); regmbc('\374');
 816                       return;
 817             case 'y': case '\375': case '\377':
 818                       regmbc('y'); regmbc('\375'); regmbc('\377');
 819                       return;
 820         }
 821     }
 822     regmbc(c);
 823 }
 824
 825 /*
 826  * Check for a collating element "[.a.]".  "pp" points to the '['.
 827  * Returns a character. Zero means that no item was recognized.  Otherwise
 828  * "pp" is advanced to after the item.
 829  * Currently only single characters are recognized!
 830  */
 831     static int
 832 get_coll_element(pp)
 833     char_u      **pp;
 834 {
 835     int         c;
 836     int         l = 1;
 837     char_u      *p = *pp;
 838
 839     if (p[1] == '.')
 840     {
 841 #ifdef FEAT_MBYTE
 842         if (has_mbyte)
 843             l = (*mb_ptr2len)(p + 2);
 844 #endif
 845         if (p[l + 2] == '.' && p[l + 3] == ']')
 846         {
 847 #ifdef FEAT_MBYTE
 848             if (has_mbyte)
 849                 c = mb_ptr2char(p + 2);
 850             else
 851 #endif
 852                 c = p[2];
 853             *pp += l + 4;
 854             return c;
 855         }
 856     }
 857     return 0;
 858 }
 859
 860
 861 /*
 862  * Skip over a "[]" range.
 863  * "p" must point to the character after the '['.
 864  * The returned pointer is on the matching ']', or the terminating NUL.
 865  */
 866     static char_u *
 867 skip_anyof(p)
 868     char_u      *p;
 869 {
 870     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 871     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 872 #ifdef FEAT_MBYTE
 873     int         l;
 874 #endif
 875
 876     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 877     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 878
 879     if (*p == '^')      /* Complement of range. */
 880         ++p;
 881     if (*p == ']' || *p == '-')
 882         ++p;
 883     while (*p != NUL && *p != ']')
 884     {
 885 #ifdef FEAT_MBYTE
 886         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 887             p += l;
 888         else
 889 #endif
 890             if (*p == '-')
 891             {
 892                 ++p;
 893                 if (*p != ']' && *p != NUL)
 894                     mb_ptr_adv(p);
 895             }
 896         else if (*p == '\\'
 897                 && !cpo_bsl
 898                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 899                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 900             p += 2;
 901         else if (*p == '[')
 902         {
 903             if (get_char_class(&p) == CLASS_NONE
 904                     && get_equi_class(&p) == 0
 905                     && get_coll_element(&p) == 0)
 906                 ++p; /* It was not a class name */
 907         }
 908         else
 909             ++p;
 910     }
 911
 912     return p;
 913 }
 914
 915 /*
 916  * Skip past regular expression.
 917  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 918  * Take care of characters with a backslash in front of it.
 919  * Skip strings inside [ and ].
 920  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 921  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 922  * is changed in-place.
 923  */
 924     char_u *
 925 skip_regexp(startp, dirc, magic, newp)
 926     char_u      *startp;
 927     int         dirc;
 928     int         magic;
 929     char_u      **newp;
 930 {
 931     int         mymagic;
 932     char_u      *p = startp;
 933
 934     if (magic)
 935         mymagic = MAGIC_ON;
 936     else
 937         mymagic = MAGIC_OFF;
 938
 939     for (; p[0] != NUL; mb_ptr_adv(p))
 940     {
 941         if (p[0] == dirc)       /* found end of regexp */
 942             break;
 943         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 944                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 945         {
 946             p = skip_anyof(p + 1);
 947             if (p[0] == NUL)
 948                 break;
 949         }
 950         else if (p[0] == '\\' && p[1] != NUL)
 951         {
 952             if (dirc == '?' && newp != NULL && p[1] == '?')
 953             {
 954                 /* change "\?" to "?", make a copy first. */
 955                 if (*newp == NULL)
 956                 {
 957                     *newp = vim_strsave(startp);
 958                     if (*newp != NULL)
 959                         p = *newp + (p - startp);
 960                 }
 961                 if (*newp != NULL)
 962                     mch_memmove(p, p + 1, STRLEN(p));
 963                 else
 964                     ++p;
 965             }
 966             else
 967                 ++p;    /* skip next character */
 968             if (*p == 'v')
 969                 mymagic = MAGIC_ALL;
 970             else if (*p == 'V')
 971                 mymagic = MAGIC_NONE;
 972         }
 973     }
 974     return p;
 975 }
 976
 977 /*
 978  * vim_regcomp() - compile a regular expression into internal code
 979  * Returns the program in allocated space.  Returns NULL for an error.
 980  *
 981  * We can't allocate space until we know how big the compiled form will be,
 982  * but we can't compile it (and thus know how big it is) until we've got a
 983  * place to put the code.  So we cheat:  we compile it twice, once with code
 984  * generation turned off and size counting turned on, and once "for real".
 985  * This also means that we don't allocate space until we are sure that the
 986  * thing really will compile successfully, and we never have to move the
 987  * code and thus invalidate pointers into it.  (Note that it has to be in
 988  * one piece because vim_free() must be able to free it all.)
 989  *
 990  * Whether upper/lower case is to be ignored is decided when executing the
 991  * program, it does not matter here.
 992  *
 993  * Beware that the optimization-preparation code in here knows about some
 994  * of the structure of the compiled regexp.
 995  * "re_flags": RE_MAGIC and/or RE_STRING.
 996  */
 997     regprog_T *
 998 vim_regcomp(expr, re_flags)
 999     char_u      *expr;
1000     int         re_flags;
1001 {
1002     regprog_T   *r;
1003     char_u      *scan;
1004     char_u      *longest;
1005     int         len;
1006     int         flags;
1007
1008     if (expr == NULL)
1009         EMSG_RET_NULL(_(e_null));
1010
1011     init_class_tab();
1012
1013     /*
1014      * First pass: determine size, legality.
1015      */
1016     regcomp_start(expr, re_flags);
1017     regcode = JUST_CALC_SIZE;
1018     regc(REGMAGIC);
1019     if (reg(REG_NOPAREN, &flags) == NULL)
1020         return NULL;
1021
1022     /* Small enough for pointer-storage convention? */
1023 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1024     if (regsize >= 65536L - 256L)
1025         EMSG_RET_NULL(_("E339: Pattern too long"));
1026 #endif
1027
1028     /* Allocate space. */
1029     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1030     if (r == NULL)
1031         return NULL;
1032
1033     /*
1034      * Second pass: emit code.
1035      */
1036     regcomp_start(expr, re_flags);
1037     regcode = r->program;
1038     regc(REGMAGIC);
1039     if (reg(REG_NOPAREN, &flags) == NULL)
1040     {
1041         vim_free(r);
1042         return NULL;
1043     }
1044
1045     /* Dig out information for optimizations. */
1046     r->regstart = NUL;          /* Worst-case defaults. */
1047     r->reganch = 0;
1048     r->regmust = NULL;
1049     r->regmlen = 0;
1050     r->regflags = regflags;
1051     if (flags & HASNL)
1052         r->regflags |= RF_HASNL;
1053     if (flags & HASLOOKBH)
1054         r->regflags |= RF_LOOKBH;
1055 #ifdef FEAT_SYN_HL
1056     /* Remember whether this pattern has any \z specials in it. */
1057     r->reghasz = re_has_z;
1058 #endif
1059     scan = r->program + 1;      /* First BRANCH. */
1060     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1061     {
1062         scan = OPERAND(scan);
1063
1064         /* Starting-point info. */
1065         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1066         {
1067             r->reganch++;
1068             scan = regnext(scan);
1069         }
1070
1071         if (OP(scan) == EXACTLY)
1072         {
1073 #ifdef FEAT_MBYTE
1074             if (has_mbyte)
1075                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1076             else
1077 #endif
1078                 r->regstart = *OPERAND(scan);
1079         }
1080         else if ((OP(scan) == BOW
1081                     || OP(scan) == EOW
1082                     || OP(scan) == NOTHING
1083                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1084                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1085                  && OP(regnext(scan)) == EXACTLY)
1086         {
1087 #ifdef FEAT_MBYTE
1088             if (has_mbyte)
1089                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1090             else
1091 #endif
1092                 r->regstart = *OPERAND(regnext(scan));
1093         }
1094
1095         /*
1096          * If there's something expensive in the r.e., find the longest
1097          * literal string that must appear and make it the regmust.  Resolve
1098          * ties in favor of later strings, since the regstart check works
1099          * with the beginning of the r.e. and avoiding duplication
1100          * strengthens checking.  Not a strong reason, but sufficient in the
1101          * absence of others.
1102          */
1103         /*
1104          * When the r.e. starts with BOW, it is faster to look for a regmust
1105          * first. Used a lot for "#" and "*" commands. (Added by mool).
1106          */
1107         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1108                                                           && !(flags & HASNL))
1109         {
1110             longest = NULL;
1111             len = 0;
1112             for (; scan != NULL; scan = regnext(scan))
1113                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1114                 {
1115                     longest = OPERAND(scan);
1116                     len = (int)STRLEN(OPERAND(scan));
1117                 }
1118             r->regmust = longest;
1119             r->regmlen = len;
1120         }
1121     }
1122 #ifdef DEBUG
1123     regdump(expr, r);
1124 #endif
1125     return r;
1126 }
1127
1128 /*
1129  * Setup to parse the regexp.  Used once to get the length and once to do it.
1130  */
1131     static void
1132 regcomp_start(expr, re_flags)
1133     char_u      *expr;
1134     int         re_flags;           /* see vim_regcomp() */
1135 {
1136     initchr(expr);
1137     if (re_flags & RE_MAGIC)
1138         reg_magic = MAGIC_ON;
1139     else
1140         reg_magic = MAGIC_OFF;
1141     reg_string = (re_flags & RE_STRING);
1142     reg_strict = (re_flags & RE_STRICT);
1143
1144     num_complex_braces = 0;
1145     regnpar = 1;
1146     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1147 #ifdef FEAT_SYN_HL
1148     regnzpar = 1;
1149     re_has_z = 0;
1150 #endif
1151     regsize = 0L;
1152     regflags = 0;
1153 #if defined(FEAT_SYN_HL) || defined(PROTO)
1154     had_eol = FALSE;
1155 #endif
1156 }
1157
1158 #if defined(FEAT_SYN_HL) || defined(PROTO)
1159 /*
1160  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1161  * found.  This is messy, but it works fine.
1162  */
1163     int
1164 vim_regcomp_had_eol()
1165 {
1166     return had_eol;
1167 }
1168 #endif
1169
1170 /*
1171  * reg - regular expression, i.e. main body or parenthesized thing
1172  *
1173  * Caller must absorb opening parenthesis.
1174  *
1175  * Combining parenthesis handling with the base level of regular expression
1176  * is a trifle forced, but the need to tie the tails of the branches to what
1177  * follows makes it hard to avoid.
1178  */
1179     static char_u *
1180 reg(paren, flagp)
1181     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1182     int         *flagp;
1183 {
1184     char_u      *ret;
1185     char_u      *br;
1186     char_u      *ender;
1187     int         parno = 0;
1188     int         flags;
1189
1190     *flagp = HASWIDTH;          /* Tentatively. */
1191
1192 #ifdef FEAT_SYN_HL
1193     if (paren == REG_ZPAREN)
1194     {
1195         /* Make a ZOPEN node. */
1196         if (regnzpar >= NSUBEXP)
1197             EMSG_RET_NULL(_("E50: Too many \\z("));
1198         parno = regnzpar;
1199         regnzpar++;
1200         ret = regnode(ZOPEN + parno);
1201     }
1202     else
1203 #endif
1204         if (paren == REG_PAREN)
1205     {
1206         /* Make a MOPEN node. */
1207         if (regnpar >= NSUBEXP)
1208             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1209         parno = regnpar;
1210         ++regnpar;
1211         ret = regnode(MOPEN + parno);
1212     }
1213     else if (paren == REG_NPAREN)
1214     {
1215         /* Make a NOPEN node. */
1216         ret = regnode(NOPEN);
1217     }
1218     else
1219         ret = NULL;
1220
1221     /* Pick up the branches, linking them together. */
1222     br = regbranch(&flags);
1223     if (br == NULL)
1224         return NULL;
1225     if (ret != NULL)
1226         regtail(ret, br);       /* [MZ]OPEN -> first. */
1227     else
1228         ret = br;
1229     /* If one of the branches can be zero-width, the whole thing can.
1230      * If one of the branches has * at start or matches a line-break, the
1231      * whole thing can. */
1232     if (!(flags & HASWIDTH))
1233         *flagp &= ~HASWIDTH;
1234     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1235     while (peekchr() == Magic('|'))
1236     {
1237         skipchr();
1238         br = regbranch(&flags);
1239         if (br == NULL)
1240             return NULL;
1241         regtail(ret, br);       /* BRANCH -> BRANCH. */
1242         if (!(flags & HASWIDTH))
1243             *flagp &= ~HASWIDTH;
1244         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1245     }
1246
1247     /* Make a closing node, and hook it on the end. */
1248     ender = regnode(
1249 #ifdef FEAT_SYN_HL
1250             paren == REG_ZPAREN ? ZCLOSE + parno :
1251 #endif
1252             paren == REG_PAREN ? MCLOSE + parno :
1253             paren == REG_NPAREN ? NCLOSE : END);
1254     regtail(ret, ender);
1255
1256     /* Hook the tails of the branches to the closing node. */
1257     for (br = ret; br != NULL; br = regnext(br))
1258         regoptail(br, ender);
1259
1260     /* Check for proper termination. */
1261     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1262     {
1263 #ifdef FEAT_SYN_HL
1264         if (paren == REG_ZPAREN)
1265             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1266         else
1267 #endif
1268             if (paren == REG_NPAREN)
1269             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1270         else
1271             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1272     }
1273     else if (paren == REG_NOPAREN && peekchr() != NUL)
1274     {
1275         if (curchr == Magic(')'))
1276             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1277         else
1278             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1279         /* NOTREACHED */
1280     }
1281     /*
1282      * Here we set the flag allowing back references to this set of
1283      * parentheses.
1284      */
1285     if (paren == REG_PAREN)
1286         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1287     return ret;
1288 }
1289
1290 /*
1291  * Handle one alternative of an | operator.
1292  * Implements the & operator.
1293  */
1294     static char_u *
1295 regbranch(flagp)
1296     int         *flagp;
1297 {
1298     char_u      *ret;
1299     char_u      *chain = NULL;
1300     char_u      *latest;
1301     int         flags;
1302
1303     *flagp = WORST | HASNL;             /* Tentatively. */
1304
1305     ret = regnode(BRANCH);
1306     for (;;)
1307     {
1308         latest = regconcat(&flags);
1309         if (latest == NULL)
1310             return NULL;
1311         /* If one of the branches has width, the whole thing has.  If one of
1312          * the branches anchors at start-of-line, the whole thing does.
1313          * If one of the branches uses look-behind, the whole thing does. */
1314         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1315         /* If one of the branches doesn't match a line-break, the whole thing
1316          * doesn't. */
1317         *flagp &= ~HASNL | (flags & HASNL);
1318         if (chain != NULL)
1319             regtail(chain, latest);
1320         if (peekchr() != Magic('&'))
1321             break;
1322         skipchr();
1323         regtail(latest, regnode(END)); /* operand ends */
1324         reginsert(MATCH, latest);
1325         chain = latest;
1326     }
1327
1328     return ret;
1329 }
1330
1331 /*
1332  * Handle one alternative of an | or & operator.
1333  * Implements the concatenation operator.
1334  */
1335     static char_u *
1336 regconcat(flagp)
1337     int         *flagp;
1338 {
1339     char_u      *first = NULL;
1340     char_u      *chain = NULL;
1341     char_u      *latest;
1342     int         flags;
1343     int         cont = TRUE;
1344
1345     *flagp = WORST;             /* Tentatively. */
1346
1347     while (cont)
1348     {
1349         switch (peekchr())
1350         {
1351             case NUL:
1352             case Magic('|'):
1353             case Magic('&'):
1354             case Magic(')'):
1355                             cont = FALSE;
1356                             break;
1357             case Magic('Z'):
1358 #ifdef FEAT_MBYTE
1359                             regflags |= RF_ICOMBINE;
1360 #endif
1361                             skipchr_keepstart();
1362                             break;
1363             case Magic('c'):
1364                             regflags |= RF_ICASE;
1365                             skipchr_keepstart();
1366                             break;
1367             case Magic('C'):
1368                             regflags |= RF_NOICASE;
1369                             skipchr_keepstart();
1370                             break;
1371             case Magic('v'):
1372                             reg_magic = MAGIC_ALL;
1373                             skipchr_keepstart();
1374                             curchr = -1;
1375                             break;
1376             case Magic('m'):
1377                             reg_magic = MAGIC_ON;
1378                             skipchr_keepstart();
1379                             curchr = -1;
1380                             break;
1381             case Magic('M'):
1382                             reg_magic = MAGIC_OFF;
1383                             skipchr_keepstart();
1384                             curchr = -1;
1385                             break;
1386             case Magic('V'):
1387                             reg_magic = MAGIC_NONE;
1388                             skipchr_keepstart();
1389                             curchr = -1;
1390                             break;
1391             default:
1392                             latest = regpiece(&flags);
1393                             if (latest == NULL)
1394                                 return NULL;
1395                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1396                             if (chain == NULL)  /* First piece. */
1397                                 *flagp |= flags & SPSTART;
1398                             else
1399                                 regtail(chain, latest);
1400                             chain = latest;
1401                             if (first == NULL)
1402                                 first = latest;
1403                             break;
1404         }
1405     }
1406     if (first == NULL)          /* Loop ran zero times. */
1407         first = regnode(NOTHING);
1408     return first;
1409 }
1410
1411 /*
1412  * regpiece - something followed by possible [*+=]
1413  *
1414  * Note that the branching code sequences used for = and the general cases
1415  * of * and + are somewhat optimized:  they use the same NOTHING node as
1416  * both the endmarker for their branch list and the body of the last branch.
1417  * It might seem that this node could be dispensed with entirely, but the
1418  * endmarker role is not redundant.
1419  */
1420     static char_u *
1421 regpiece(flagp)
1422     int             *flagp;
1423 {
1424     char_u          *ret;
1425     int             op;
1426     char_u          *next;
1427     int             flags;
1428     long            minval;
1429     long            maxval;
1430
1431     ret = regatom(&flags);
1432     if (ret == NULL)
1433         return NULL;
1434
1435     op = peekchr();
1436     if (re_multi_type(op) == NOT_MULTI)
1437     {
1438         *flagp = flags;
1439         return ret;
1440     }
1441     /* default flags */
1442     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1443
1444     skipchr();
1445     switch (op)
1446     {
1447         case Magic('*'):
1448             if (flags & SIMPLE)
1449                 reginsert(STAR, ret);
1450             else
1451             {
1452                 /* Emit x* as (x&|), where & means "self". */
1453                 reginsert(BRANCH, ret); /* Either x */
1454                 regoptail(ret, regnode(BACK));  /* and loop */
1455                 regoptail(ret, ret);    /* back */
1456                 regtail(ret, regnode(BRANCH));  /* or */
1457                 regtail(ret, regnode(NOTHING)); /* null. */
1458             }
1459             break;
1460
1461         case Magic('+'):
1462             if (flags & SIMPLE)
1463                 reginsert(PLUS, ret);
1464             else
1465             {
1466                 /* Emit x+ as x(&|), where & means "self". */
1467                 next = regnode(BRANCH); /* Either */
1468                 regtail(ret, next);
1469                 regtail(regnode(BACK), ret);    /* loop back */
1470                 regtail(next, regnode(BRANCH)); /* or */
1471                 regtail(ret, regnode(NOTHING)); /* null. */
1472             }
1473             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1474             break;
1475
1476         case Magic('@'):
1477             {
1478                 int     lop = END;
1479
1480                 switch (no_Magic(getchr()))
1481                 {
1482                     case '=': lop = MATCH; break;                 /* \@= */
1483                     case '!': lop = NOMATCH; break;               /* \@! */
1484                     case '>': lop = SUBPAT; break;                /* \@> */
1485                     case '<': switch (no_Magic(getchr()))
1486                               {
1487                                   case '=': lop = BEHIND; break;   /* \@<= */
1488                                   case '!': lop = NOBEHIND; break; /* \@<! */
1489                               }
1490                 }
1491                 if (lop == END)
1492                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1493                                                       reg_magic == MAGIC_ALL);
1494                 /* Look behind must match with behind_pos. */
1495                 if (lop == BEHIND || lop == NOBEHIND)
1496                 {
1497                     regtail(ret, regnode(BHPOS));
1498                     *flagp |= HASLOOKBH;
1499                 }
1500                 regtail(ret, regnode(END)); /* operand ends */
1501                 reginsert(lop, ret);
1502                 break;
1503             }
1504
1505         case Magic('?'):
1506         case Magic('='):
1507             /* Emit x= as (x|) */
1508             reginsert(BRANCH, ret);             /* Either x */
1509             regtail(ret, regnode(BRANCH));      /* or */
1510             next = regnode(NOTHING);            /* null. */
1511             regtail(ret, next);
1512             regoptail(ret, next);
1513             break;
1514
1515         case Magic('{'):
1516             if (!read_limits(&minval, &maxval))
1517                 return NULL;
1518             if (flags & SIMPLE)
1519             {
1520                 reginsert(BRACE_SIMPLE, ret);
1521                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1522             }
1523             else
1524             {
1525                 if (num_complex_braces >= 10)
1526                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1527                                                       reg_magic == MAGIC_ALL);
1528                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1529                 regoptail(ret, regnode(BACK));
1530                 regoptail(ret, ret);
1531                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1532                 ++num_complex_braces;
1533             }
1534             if (minval > 0 && maxval > 0)
1535                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1536             break;
1537     }
1538     if (re_multi_type(peekchr()) != NOT_MULTI)
1539     {
1540         /* Can't have a multi follow a multi. */
1541         if (peekchr() == Magic('*'))
1542             sprintf((char *)IObuff, _("E61: Nested %s*"),
1543                                             reg_magic >= MAGIC_ON ? "" : "\\");
1544         else
1545             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1546                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1547         EMSG_RET_NULL(IObuff);
1548     }
1549
1550     return ret;
1551 }
1552
1553 /*
1554  * regatom - the lowest level
1555  *
1556  * Optimization:  gobbles an entire sequence of ordinary characters so that
1557  * it can turn them into a single node, which is smaller to store and
1558  * faster to run.  Don't do this when one_exactly is set.
1559  */
1560     static char_u *
1561 regatom(flagp)
1562     int            *flagp;
1563 {
1564     char_u          *ret;
1565     int             flags;
1566     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1567     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1568     int             c;
1569     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1570     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1571                                     FNAME, SFNAME, PRINT, SPRINT,
1572                                     WHITE, NWHITE, DIGIT, NDIGIT,
1573                                     HEX, NHEX, OCTAL, NOCTAL,
1574                                     WORD, NWORD, HEAD, NHEAD,
1575                                     ALPHA, NALPHA, LOWER, NLOWER,
1576                                     UPPER, NUPPER
1577                                     };
1578     char_u          *p;
1579     int             extra = 0;
1580
1581     *flagp = WORST;             /* Tentatively. */
1582     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1583     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1584
1585     c = getchr();
1586     switch (c)
1587     {
1588       case Magic('^'):
1589         ret = regnode(BOL);
1590         break;
1591
1592       case Magic('$'):
1593         ret = regnode(EOL);
1594 #if defined(FEAT_SYN_HL) || defined(PROTO)
1595         had_eol = TRUE;
1596 #endif
1597         break;
1598
1599       case Magic('<'):
1600         ret = regnode(BOW);
1601         break;
1602
1603       case Magic('>'):
1604         ret = regnode(EOW);
1605         break;
1606
1607       case Magic('_'):
1608         c = no_Magic(getchr());
1609         if (c == '^')           /* "\_^" is start-of-line */
1610         {
1611             ret = regnode(BOL);
1612             break;
1613         }
1614         if (c == '$')           /* "\_$" is end-of-line */
1615         {
1616             ret = regnode(EOL);
1617 #if defined(FEAT_SYN_HL) || defined(PROTO)
1618             had_eol = TRUE;
1619 #endif
1620             break;
1621         }
1622
1623         extra = ADD_NL;
1624         *flagp |= HASNL;
1625
1626         /* "\_[" is character range plus newline */
1627         if (c == '[')
1628             goto collection;
1629
1630         /* "\_x" is character class plus newline */
1631         /*FALLTHROUGH*/
1632
1633         /*
1634          * Character classes.
1635          */
1636       case Magic('.'):
1637       case Magic('i'):
1638       case Magic('I'):
1639       case Magic('k'):
1640       case Magic('K'):
1641       case Magic('f'):
1642       case Magic('F'):
1643       case Magic('p'):
1644       case Magic('P'):
1645       case Magic('s'):
1646       case Magic('S'):
1647       case Magic('d'):
1648       case Magic('D'):
1649       case Magic('x'):
1650       case Magic('X'):
1651       case Magic('o'):
1652       case Magic('O'):
1653       case Magic('w'):
1654       case Magic('W'):
1655       case Magic('h'):
1656       case Magic('H'):
1657       case Magic('a'):
1658       case Magic('A'):
1659       case Magic('l'):
1660       case Magic('L'):
1661       case Magic('u'):
1662       case Magic('U'):
1663         p = vim_strchr(classchars, no_Magic(c));
1664         if (p == NULL)
1665             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1666 #ifdef FEAT_MBYTE
1667         /* When '.' is followed by a composing char ignore the dot, so that
1668          * the composing char is matched here. */
1669         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1670         {
1671             c = getchr();
1672             goto do_multibyte;
1673         }
1674 #endif
1675         ret = regnode(classcodes[p - classchars] + extra);
1676         *flagp |= HASWIDTH | SIMPLE;
1677         break;
1678
1679       case Magic('n'):
1680         if (reg_string)
1681         {
1682             /* In a string "\n" matches a newline character. */
1683             ret = regnode(EXACTLY);
1684             regc(NL);
1685             regc(NUL);
1686             *flagp |= HASWIDTH | SIMPLE;
1687         }
1688         else
1689         {
1690             /* In buffer text "\n" matches the end of a line. */
1691             ret = regnode(NEWL);
1692             *flagp |= HASWIDTH | HASNL;
1693         }
1694         break;
1695
1696       case Magic('('):
1697         if (one_exactly)
1698             EMSG_ONE_RET_NULL;
1699         ret = reg(REG_PAREN, &flags);
1700         if (ret == NULL)
1701             return NULL;
1702         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1703         break;
1704
1705       case NUL:
1706       case Magic('|'):
1707       case Magic('&'):
1708       case Magic(')'):
1709         if (one_exactly)
1710             EMSG_ONE_RET_NULL;
1711         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1712         /* NOTREACHED */
1713
1714       case Magic('='):
1715       case Magic('?'):
1716       case Magic('+'):
1717       case Magic('@'):
1718       case Magic('{'):
1719       case Magic('*'):
1720         c = no_Magic(c);
1721         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1722                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1723                 ? "" : "\\", c);
1724         EMSG_RET_NULL(IObuff);
1725         /* NOTREACHED */
1726
1727       case Magic('~'):          /* previous substitute pattern */
1728             if (reg_prev_sub != NULL)
1729             {
1730                 char_u      *lp;
1731
1732                 ret = regnode(EXACTLY);
1733                 lp = reg_prev_sub;
1734                 while (*lp != NUL)
1735                     regc(*lp++);
1736                 regc(NUL);
1737                 if (*reg_prev_sub != NUL)
1738                 {
1739                     *flagp |= HASWIDTH;
1740                     if ((lp - reg_prev_sub) == 1)
1741                         *flagp |= SIMPLE;
1742                 }
1743             }
1744             else
1745                 EMSG_RET_NULL(_(e_nopresub));
1746             break;
1747
1748       case Magic('1'):
1749       case Magic('2'):
1750       case Magic('3'):
1751       case Magic('4'):
1752       case Magic('5'):
1753       case Magic('6'):
1754       case Magic('7'):
1755       case Magic('8'):
1756       case Magic('9'):
1757             {
1758                 int                 refnum;
1759
1760                 refnum = c - Magic('0');
1761                 /*
1762                  * Check if the back reference is legal. We must have seen the
1763                  * close brace.
1764                  * TODO: Should also check that we don't refer to something
1765                  * that is repeated (+*=): what instance of the repetition
1766                  * should we match?
1767                  */
1768                 if (!had_endbrace[refnum])
1769                 {
1770                     /* Trick: check if "@<=" or "@<!" follows, in which case
1771                      * the \1 can appear before the referenced match. */
1772                     for (p = regparse; *p != NUL; ++p)
1773                         if (p[0] == '@' && p[1] == '<'
1774                                               && (p[2] == '!' || p[2] == '='))
1775                             break;
1776                     if (*p == NUL)
1777                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1778                 }
1779                 ret = regnode(BACKREF + refnum);
1780             }
1781             break;
1782
1783       case Magic('z'):
1784         {
1785             c = no_Magic(getchr());
1786             switch (c)
1787             {
1788 #ifdef FEAT_SYN_HL
1789                 case '(': if (reg_do_extmatch != REX_SET)
1790                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1791                           if (one_exactly)
1792                               EMSG_ONE_RET_NULL;
1793                           ret = reg(REG_ZPAREN, &flags);
1794                           if (ret == NULL)
1795                               return NULL;
1796                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1797                           re_has_z = REX_SET;
1798                           break;
1799
1800                 case '1':
1801                 case '2':
1802                 case '3':
1803                 case '4':
1804                 case '5':
1805                 case '6':
1806                 case '7':
1807                 case '8':
1808                 case '9': if (reg_do_extmatch != REX_USE)
1809                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1810                           ret = regnode(ZREF + c - '0');
1811                           re_has_z = REX_USE;
1812                           break;
1813 #endif
1814
1815                 case 's': ret = regnode(MOPEN + 0);
1816                           break;
1817
1818                 case 'e': ret = regnode(MCLOSE + 0);
1819                           break;
1820
1821                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1822             }
1823         }
1824         break;
1825
1826       case Magic('%'):
1827         {
1828             c = no_Magic(getchr());
1829             switch (c)
1830             {
1831                 /* () without a back reference */
1832                 case '(':
1833                     if (one_exactly)
1834                         EMSG_ONE_RET_NULL;
1835                     ret = reg(REG_NPAREN, &flags);
1836                     if (ret == NULL)
1837                         return NULL;
1838                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1839                     break;
1840
1841                 /* Catch \%^ and \%$ regardless of where they appear in the
1842                  * pattern -- regardless of whether or not it makes sense. */
1843                 case '^':
1844                     ret = regnode(RE_BOF);
1845                     break;
1846
1847                 case '$':
1848                     ret = regnode(RE_EOF);
1849                     break;
1850
1851                 case '#':
1852                     ret = regnode(CURSOR);
1853                     break;
1854
1855                 case 'V':
1856                     ret = regnode(RE_VISUAL);
1857                     break;
1858
1859                 /* \%[abc]: Emit as a list of branches, all ending at the last
1860                  * branch which matches nothing. */
1861                 case '[':
1862                           if (one_exactly)      /* doesn't nest */
1863                               EMSG_ONE_RET_NULL;
1864                           {
1865                               char_u    *lastbranch;
1866                               char_u    *lastnode = NULL;
1867                               char_u    *br;
1868
1869                               ret = NULL;
1870                               while ((c = getchr()) != ']')
1871                               {
1872                                   if (c == NUL)
1873                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1874                                                       reg_magic == MAGIC_ALL);
1875                                   br = regnode(BRANCH);
1876                                   if (ret == NULL)
1877                                       ret = br;
1878                                   else
1879                                       regtail(lastnode, br);
1880
1881                                   ungetchr();
1882                                   one_exactly = TRUE;
1883                                   lastnode = regatom(flagp);
1884                                   one_exactly = FALSE;
1885                                   if (lastnode == NULL)
1886                                       return NULL;
1887                               }
1888                               if (ret == NULL)
1889                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1890                                                       reg_magic == MAGIC_ALL);
1891                               lastbranch = regnode(BRANCH);
1892                               br = regnode(NOTHING);
1893                               if (ret != JUST_CALC_SIZE)
1894                               {
1895                                   regtail(lastnode, br);
1896                                   regtail(lastbranch, br);
1897                                   /* connect all branches to the NOTHING
1898                                    * branch at the end */
1899                                   for (br = ret; br != lastnode; )
1900                                   {
1901                                       if (OP(br) == BRANCH)
1902                                       {
1903                                           regtail(br, lastbranch);
1904                                           br = OPERAND(br);
1905                                       }
1906                                       else
1907                                           br = regnext(br);
1908                                   }
1909                               }
1910                               *flagp &= ~HASWIDTH;
1911                               break;
1912                           }
1913
1914                 case 'd':   /* %d123 decimal */
1915                 case 'o':   /* %o123 octal */
1916                 case 'x':   /* %xab hex 2 */
1917                 case 'u':   /* %uabcd hex 4 */
1918                 case 'U':   /* %U1234abcd hex 8 */
1919                           {
1920                               int i;
1921
1922                               switch (c)
1923                               {
1924                                   case 'd': i = getdecchrs(); break;
1925                                   case 'o': i = getoctchrs(); break;
1926                                   case 'x': i = gethexchrs(2); break;
1927                                   case 'u': i = gethexchrs(4); break;
1928                                   case 'U': i = gethexchrs(8); break;
1929                                   default:  i = -1; break;
1930                               }
1931
1932                               if (i < 0)
1933                                   EMSG_M_RET_NULL(
1934                                         _("E678: Invalid character after %s%%[dxouU]"),
1935                                         reg_magic == MAGIC_ALL);
1936 #ifdef FEAT_MBYTE
1937                               if (use_multibytecode(i))
1938                                   ret = regnode(MULTIBYTECODE);
1939                               else
1940 #endif
1941                                   ret = regnode(EXACTLY);
1942                               if (i == 0)
1943                                   regc(0x0a);
1944                               else
1945 #ifdef FEAT_MBYTE
1946                                   regmbc(i);
1947 #else
1948                                   regc(i);
1949 #endif
1950                               regc(NUL);
1951                               *flagp |= HASWIDTH;
1952                               break;
1953                           }
1954
1955                 default:
1956                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1957                                                                  || c == '\'')
1958                           {
1959                               long_u    n = 0;
1960                               int       cmp;
1961
1962                               cmp = c;
1963                               if (cmp == '<' || cmp == '>')
1964                                   c = getchr();
1965                               while (VIM_ISDIGIT(c))
1966                               {
1967                                   n = n * 10 + (c - '0');
1968                                   c = getchr();
1969                               }
1970                               if (c == '\'' && n == 0)
1971                               {
1972                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1973                                   c = getchr();
1974                                   ret = regnode(RE_MARK);
1975                                   if (ret == JUST_CALC_SIZE)
1976                                       regsize += 2;
1977                                   else
1978                                   {
1979                                       *regcode++ = c;
1980                                       *regcode++ = cmp;
1981                                   }
1982                                   break;
1983                               }
1984                               else if (c == 'l' || c == 'c' || c == 'v')
1985                               {
1986                                   if (c == 'l')
1987                                       ret = regnode(RE_LNUM);
1988                                   else if (c == 'c')
1989                                       ret = regnode(RE_COL);
1990                                   else
1991                                       ret = regnode(RE_VCOL);
1992                                   if (ret == JUST_CALC_SIZE)
1993                                       regsize += 5;
1994                                   else
1995                                   {
1996                                       /* put the number and the optional
1997                                        * comparator after the opcode */
1998                                       regcode = re_put_long(regcode, n);
1999                                       *regcode++ = cmp;
2000                                   }
2001                                   break;
2002                               }
2003                           }
2004
2005                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
2006                                                       reg_magic == MAGIC_ALL);
2007             }
2008         }
2009         break;
2010
2011       case Magic('['):
2012 collection:
2013         {
2014             char_u      *lp;
2015
2016             /*
2017              * If there is no matching ']', we assume the '[' is a normal
2018              * character.  This makes 'incsearch' and ":help [" work.
2019              */
2020             lp = skip_anyof(regparse);
2021             if (*lp == ']')     /* there is a matching ']' */
2022             {
2023                 int     startc = -1;    /* > 0 when next '-' is a range */
2024                 int     endc;
2025
2026                 /*
2027                  * In a character class, different parsing rules apply.
2028                  * Not even \ is special anymore, nothing is.
2029                  */
2030                 if (*regparse == '^')       /* Complement of range. */
2031                 {
2032                     ret = regnode(ANYBUT + extra);
2033                     regparse++;
2034                 }
2035                 else
2036                     ret = regnode(ANYOF + extra);
2037
2038                 /* At the start ']' and '-' mean the literal character. */
2039                 if (*regparse == ']' || *regparse == '-')
2040                 {
2041                     startc = *regparse;
2042                     regc(*regparse++);
2043                 }
2044
2045                 while (*regparse != NUL && *regparse != ']')
2046                 {
2047                     if (*regparse == '-')
2048                     {
2049                         ++regparse;
2050                         /* The '-' is not used for a range at the end and
2051                          * after or before a '\n'. */
2052                         if (*regparse == ']' || *regparse == NUL
2053                                 || startc == -1
2054                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2055                         {
2056                             regc('-');
2057                             startc = '-';       /* [--x] is a range */
2058                         }
2059                         else
2060                         {
2061                             /* Also accept "a-[.z.]" */
2062                             endc = 0;
2063                             if (*regparse == '[')
2064                                 endc = get_coll_element(&regparse);
2065                             if (endc == 0)
2066                             {
2067 #ifdef FEAT_MBYTE
2068                                 if (has_mbyte)
2069                                     endc = mb_ptr2char_adv(&regparse);
2070                                 else
2071 #endif
2072                                     endc = *regparse++;
2073                             }
2074
2075                             /* Handle \o40, \x20 and \u20AC style sequences */
2076                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2077                                 endc = coll_get_char();
2078
2079                             if (startc > endc)
2080                                 EMSG_RET_NULL(_(e_invrange));
2081 #ifdef FEAT_MBYTE
2082                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2083                                                  || (*mb_char2len)(endc) > 1))
2084                             {
2085                                 /* Limit to a range of 256 chars */
2086                                 if (endc > startc + 256)
2087                                     EMSG_RET_NULL(_(e_invrange));
2088                                 while (++startc <= endc)
2089                                     regmbc(startc);
2090                             }
2091                             else
2092 #endif
2093                             {
2094 #ifdef EBCDIC
2095                                 int     alpha_only = FALSE;
2096
2097                                 /* for alphabetical range skip the gaps
2098                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2099                                 if (isalpha(startc) && isalpha(endc))
2100                                     alpha_only = TRUE;
2101 #endif
2102                                 while (++startc <= endc)
2103 #ifdef EBCDIC
2104                                     if (!alpha_only || isalpha(startc))
2105 #endif
2106                                         regc(startc);
2107                             }
2108                             startc = -1;
2109                         }
2110                     }
2111                     /*
2112                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2113                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2114                      * 'cpoptions' is not included.
2115                      * Posix doesn't recognize backslash at all.
2116                      */
2117                     else if (*regparse == '\\'
2118                             && !cpo_bsl
2119                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2120                                 || (!cpo_lit
2121                                     && vim_strchr(REGEXP_ABBR,
2122                                                        regparse[1]) != NULL)))
2123                     {
2124                         regparse++;
2125                         if (*regparse == 'n')
2126                         {
2127                             /* '\n' in range: also match NL */
2128                             if (ret != JUST_CALC_SIZE)
2129                             {
2130                                 if (*ret == ANYBUT)
2131                                     *ret = ANYBUT + ADD_NL;
2132                                 else if (*ret == ANYOF)
2133                                     *ret = ANYOF + ADD_NL;
2134                                 /* else: must have had a \n already */
2135                             }
2136                             *flagp |= HASNL;
2137                             regparse++;
2138                             startc = -1;
2139                         }
2140                         else if (*regparse == 'd'
2141                                 || *regparse == 'o'
2142                                 || *regparse == 'x'
2143                                 || *regparse == 'u'
2144                                 || *regparse == 'U')
2145                         {
2146                             startc = coll_get_char();
2147                             if (startc == 0)
2148                                 regc(0x0a);
2149                             else
2150 #ifdef FEAT_MBYTE
2151                                 regmbc(startc);
2152 #else
2153                                 regc(startc);
2154 #endif
2155                         }
2156                         else
2157                         {
2158                             startc = backslash_trans(*regparse++);
2159                             regc(startc);
2160                         }
2161                     }
2162                     else if (*regparse == '[')
2163                     {
2164                         int c_class;
2165                         int cu;
2166
2167                         c_class = get_char_class(&regparse);
2168                         startc = -1;
2169                         /* Characters assumed to be 8 bits! */
2170                         switch (c_class)
2171                         {
2172                             case CLASS_NONE:
2173                                 c_class = get_equi_class(&regparse);
2174                                 if (c_class != 0)
2175                                 {
2176                                     /* produce equivalence class */
2177                                     reg_equi_class(c_class);
2178                                 }
2179                                 else if ((c_class =
2180                                             get_coll_element(&regparse)) != 0)
2181                                 {
2182                                     /* produce a collating element */
2183                                     regmbc(c_class);
2184                                 }
2185                                 else
2186                                 {
2187                                     /* literal '[', allow [[-x] as a range */
2188                                     startc = *regparse++;
2189                                     regc(startc);
2190                                 }
2191                                 break;
2192                             case CLASS_ALNUM:
2193                                 for (cu = 1; cu <= 255; cu++)
2194                                     if (isalnum(cu))
2195                                         regc(cu);
2196                                 break;
2197                             case CLASS_ALPHA:
2198                                 for (cu = 1; cu <= 255; cu++)
2199                                     if (isalpha(cu))
2200                                         regc(cu);
2201                                 break;
2202                             case CLASS_BLANK:
2203                                 regc(' ');
2204                                 regc('\t');
2205                                 break;
2206                             case CLASS_CNTRL:
2207                                 for (cu = 1; cu <= 255; cu++)
2208                                     if (iscntrl(cu))
2209                                         regc(cu);
2210                                 break;
2211                             case CLASS_DIGIT:
2212                                 for (cu = 1; cu <= 255; cu++)
2213                                     if (VIM_ISDIGIT(cu))
2214                                         regc(cu);
2215                                 break;
2216                             case CLASS_GRAPH:
2217                                 for (cu = 1; cu <= 255; cu++)
2218                                     if (isgraph(cu))
2219                                         regc(cu);
2220                                 break;
2221                             case CLASS_LOWER:
2222                                 for (cu = 1; cu <= 255; cu++)
2223                                     if (MB_ISLOWER(cu))
2224                                         regc(cu);
2225                                 break;
2226                             case CLASS_PRINT:
2227                                 for (cu = 1; cu <= 255; cu++)
2228                                     if (vim_isprintc(cu))
2229                                         regc(cu);
2230                                 break;
2231                             case CLASS_PUNCT:
2232                                 for (cu = 1; cu <= 255; cu++)
2233                                     if (ispunct(cu))
2234                                         regc(cu);
2235                                 break;
2236                             case CLASS_SPACE:
2237                                 for (cu = 9; cu <= 13; cu++)
2238                                     regc(cu);
2239                                 regc(' ');
2240                                 break;
2241                             case CLASS_UPPER:
2242                                 for (cu = 1; cu <= 255; cu++)
2243                                     if (MB_ISUPPER(cu))
2244                                         regc(cu);
2245                                 break;
2246                             case CLASS_XDIGIT:
2247                                 for (cu = 1; cu <= 255; cu++)
2248                                     if (vim_isxdigit(cu))
2249                                         regc(cu);
2250                                 break;
2251                             case CLASS_TAB:
2252                                 regc('\t');
2253                                 break;
2254                             case CLASS_RETURN:
2255                                 regc('\r');
2256                                 break;
2257                             case CLASS_BACKSPACE:
2258                                 regc('\b');
2259                                 break;
2260                             case CLASS_ESCAPE:
2261                                 regc('\033');
2262                                 break;
2263                         }
2264                     }
2265                     else
2266                     {
2267 #ifdef FEAT_MBYTE
2268                         if (has_mbyte)
2269                         {
2270                             int len;
2271
2272                             /* produce a multibyte character, including any
2273                              * following composing characters */
2274                             startc = mb_ptr2char(regparse);
2275                             len = (*mb_ptr2len)(regparse);
2276                             if (enc_utf8 && utf_char2len(startc) != len)
2277                                 startc = -1;    /* composing chars */
2278                             while (--len >= 0)
2279                                 regc(*regparse++);
2280                         }
2281                         else
2282 #endif
2283                         {
2284                             startc = *regparse++;
2285                             regc(startc);
2286                         }
2287                     }
2288                 }
2289                 regc(NUL);
2290                 prevchr_len = 1;        /* last char was the ']' */
2291                 if (*regparse != ']')
2292                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2293                 skipchr();          /* let's be friends with the lexer again */
2294                 *flagp |= HASWIDTH | SIMPLE;
2295                 break;
2296             }
2297             else if (reg_strict)
2298                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2299                                                        reg_magic > MAGIC_OFF);
2300         }
2301         /* FALLTHROUGH */
2302
2303       default:
2304         {
2305             int         len;
2306
2307 #ifdef FEAT_MBYTE
2308             /* A multi-byte character is handled as a separate atom if it's
2309              * before a multi and when it's a composing char. */
2310             if (use_multibytecode(c))
2311             {
2312 do_multibyte:
2313                 ret = regnode(MULTIBYTECODE);
2314                 regmbc(c);
2315                 *flagp |= HASWIDTH | SIMPLE;
2316                 break;
2317             }
2318 #endif
2319
2320             ret = regnode(EXACTLY);
2321
2322             /*
2323              * Append characters as long as:
2324              * - there is no following multi, we then need the character in
2325              *   front of it as a single character operand
2326              * - not running into a Magic character
2327              * - "one_exactly" is not set
2328              * But always emit at least one character.  Might be a Multi,
2329              * e.g., a "[" without matching "]".
2330              */
2331             for (len = 0; c != NUL && (len == 0
2332                         || (re_multi_type(peekchr()) == NOT_MULTI
2333                             && !one_exactly
2334                             && !is_Magic(c))); ++len)
2335             {
2336                 c = no_Magic(c);
2337 #ifdef FEAT_MBYTE
2338                 if (has_mbyte)
2339                 {
2340                     regmbc(c);
2341                     if (enc_utf8)
2342                     {
2343                         int     l;
2344
2345                         /* Need to get composing character too. */
2346                         for (;;)
2347                         {
2348                             l = utf_ptr2len(regparse);
2349                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2350                                 break;
2351                             regmbc(utf_ptr2char(regparse));
2352                             skipchr();
2353                         }
2354                     }
2355                 }
2356                 else
2357 #endif
2358                     regc(c);
2359                 c = getchr();
2360             }
2361             ungetchr();
2362
2363             regc(NUL);
2364             *flagp |= HASWIDTH;
2365             if (len == 1)
2366                 *flagp |= SIMPLE;
2367         }
2368         break;
2369     }
2370
2371     return ret;
2372 }
2373
2374 #ifdef FEAT_MBYTE
2375 /*
2376  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2377  * character "c".
2378  */
2379     static int
2380 use_multibytecode(c)
2381     int c;
2382 {
2383     return has_mbyte && (*mb_char2len)(c) > 1
2384                      && (re_multi_type(peekchr()) != NOT_MULTI
2385                              || (enc_utf8 && utf_iscomposing(c)));
2386 }
2387 #endif
2388
2389 /*
2390  * emit a node
2391  * Return pointer to generated code.
2392  */
2393     static char_u *
2394 regnode(op)
2395     int         op;
2396 {
2397     char_u  *ret;
2398
2399     ret = regcode;
2400     if (ret == JUST_CALC_SIZE)
2401         regsize += 3;
2402     else
2403     {
2404         *regcode++ = op;
2405         *regcode++ = NUL;               /* Null "next" pointer. */
2406         *regcode++ = NUL;
2407     }
2408     return ret;
2409 }
2410
2411 /*
2412  * Emit (if appropriate) a byte of code
2413  */
2414     static void
2415 regc(b)
2416     int         b;
2417 {
2418     if (regcode == JUST_CALC_SIZE)
2419         regsize++;
2420     else
2421         *regcode++ = b;
2422 }
2423
2424 #ifdef FEAT_MBYTE
2425 /*
2426  * Emit (if appropriate) a multi-byte character of code
2427  */
2428     static void
2429 regmbc(c)
2430     int         c;
2431 {
2432     if (regcode == JUST_CALC_SIZE)
2433         regsize += (*mb_char2len)(c);
2434     else
2435         regcode += (*mb_char2bytes)(c, regcode);
2436 }
2437 #endif
2438
2439 /*
2440  * reginsert - insert an operator in front of already-emitted operand
2441  *
2442  * Means relocating the operand.
2443  */
2444     static void
2445 reginsert(op, opnd)
2446     int         op;
2447     char_u     *opnd;
2448 {
2449     char_u      *src;
2450     char_u      *dst;
2451     char_u      *place;
2452
2453     if (regcode == JUST_CALC_SIZE)
2454     {
2455         regsize += 3;
2456         return;
2457     }
2458     src = regcode;
2459     regcode += 3;
2460     dst = regcode;
2461     while (src > opnd)
2462         *--dst = *--src;
2463
2464     place = opnd;               /* Op node, where operand used to be. */
2465     *place++ = op;
2466     *place++ = NUL;
2467     *place = NUL;
2468 }
2469
2470 /*
2471  * reginsert_limits - insert an operator in front of already-emitted operand.
2472  * The operator has the given limit values as operands.  Also set next pointer.
2473  *
2474  * Means relocating the operand.
2475  */
2476     static void
2477 reginsert_limits(op, minval, maxval, opnd)
2478     int         op;
2479     long        minval;
2480     long        maxval;
2481     char_u      *opnd;
2482 {
2483     char_u      *src;
2484     char_u      *dst;
2485     char_u      *place;
2486
2487     if (regcode == JUST_CALC_SIZE)
2488     {
2489         regsize += 11;
2490         return;
2491     }
2492     src = regcode;
2493     regcode += 11;
2494     dst = regcode;
2495     while (src > opnd)
2496         *--dst = *--src;
2497
2498     place = opnd;               /* Op node, where operand used to be. */
2499     *place++ = op;
2500     *place++ = NUL;
2501     *place++ = NUL;
2502     place = re_put_long(place, (long_u)minval);
2503     place = re_put_long(place, (long_u)maxval);
2504     regtail(opnd, place);
2505 }
2506
2507 /*
2508  * Write a long as four bytes at "p" and return pointer to the next char.
2509  */
2510     static char_u *
2511 re_put_long(p, val)
2512     char_u      *p;
2513     long_u      val;
2514 {
2515     *p++ = (char_u) ((val >> 24) & 0377);
2516     *p++ = (char_u) ((val >> 16) & 0377);
2517     *p++ = (char_u) ((val >> 8) & 0377);
2518     *p++ = (char_u) (val & 0377);
2519     return p;
2520 }
2521
2522 /*
2523  * regtail - set the next-pointer at the end of a node chain
2524  */
2525     static void
2526 regtail(p, val)
2527     char_u      *p;
2528     char_u      *val;
2529 {
2530     char_u      *scan;
2531     char_u      *temp;
2532     int         offset;
2533
2534     if (p == JUST_CALC_SIZE)
2535         return;
2536
2537     /* Find last node. */
2538     scan = p;
2539     for (;;)
2540     {
2541         temp = regnext(scan);
2542         if (temp == NULL)
2543             break;
2544         scan = temp;
2545     }
2546
2547     if (OP(scan) == BACK)
2548         offset = (int)(scan - val);
2549     else
2550         offset = (int)(val - scan);
2551     *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2552     *(scan + 2) = (char_u) (offset & 0377);
2553 }
2554
2555 /*
2556  * regoptail - regtail on item after a BRANCH; nop if none
2557  */
2558     static void
2559 regoptail(p, val)
2560     char_u      *p;
2561     char_u      *val;
2562 {
2563     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2564     if (p == NULL || p == JUST_CALC_SIZE
2565             || (OP(p) != BRANCH
2566                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2567         return;
2568     regtail(OPERAND(p), val);
2569 }
2570
2571 /*
2572  * getchr() - get the next character from the pattern. We know about
2573  * magic and such, so therefore we need a lexical analyzer.
2574  */
2575
2576 /* static int       curchr; */
2577 static int      prevprevchr;
2578 static int      prevchr;
2579 static int      nextchr;    /* used for ungetchr() */
2580 /*
2581  * Note: prevchr is sometimes -1 when we are not at the start,
2582  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2583  * taken to be magic -- webb
2584  */
2585 static int      at_start;       /* True when on the first character */
2586 static int      prev_at_start;  /* True when on the second character */
2587
2588     static void
2589 initchr(str)
2590     char_u *str;
2591 {
2592     regparse = str;
2593     prevchr_len = 0;
2594     curchr = prevprevchr = prevchr = nextchr = -1;
2595     at_start = TRUE;
2596     prev_at_start = FALSE;
2597 }
2598
2599     static int
2600 peekchr()
2601 {
2602     static int  after_slash = FALSE;
2603
2604     if (curchr == -1)
2605     {
2606         switch (curchr = regparse[0])
2607         {
2608         case '.':
2609         case '[':
2610         case '~':
2611             /* magic when 'magic' is on */
2612             if (reg_magic >= MAGIC_ON)
2613                 curchr = Magic(curchr);
2614             break;
2615         case '(':
2616         case ')':
2617         case '{':
2618         case '%':
2619         case '+':
2620         case '=':
2621         case '?':
2622         case '@':
2623         case '!':
2624         case '&':
2625         case '|':
2626         case '<':
2627         case '>':
2628         case '#':       /* future ext. */
2629         case '"':       /* future ext. */
2630         case '\'':      /* future ext. */
2631         case ',':       /* future ext. */
2632         case '-':       /* future ext. */
2633         case ':':       /* future ext. */
2634         case ';':       /* future ext. */
2635         case '`':       /* future ext. */
2636         case '/':       /* Can't be used in / command */
2637             /* magic only after "\v" */
2638             if (reg_magic == MAGIC_ALL)
2639                 curchr = Magic(curchr);
2640             break;
2641         case '*':
2642             /* * is not magic as the very first character, eg "?*ptr", when
2643              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2644              * "\(\*" is not magic, thus must be magic if "after_slash" */
2645             if (reg_magic >= MAGIC_ON
2646                     && !at_start
2647                     && !(prev_at_start && prevchr == Magic('^'))
2648                     && (after_slash
2649                         || (prevchr != Magic('(')
2650                             && prevchr != Magic('&')
2651                             && prevchr != Magic('|'))))
2652                 curchr = Magic('*');
2653             break;
2654         case '^':
2655             /* '^' is only magic as the very first character and if it's after
2656              * "\(", "\|", "\&' or "\n" */
2657             if (reg_magic >= MAGIC_OFF
2658                     && (at_start
2659                         || reg_magic == MAGIC_ALL
2660                         || prevchr == Magic('(')
2661                         || prevchr == Magic('|')
2662                         || prevchr == Magic('&')
2663                         || prevchr == Magic('n')
2664                         || (no_Magic(prevchr) == '('
2665                             && prevprevchr == Magic('%'))))
2666             {
2667                 curchr = Magic('^');
2668                 at_start = TRUE;
2669                 prev_at_start = FALSE;
2670             }
2671             break;
2672         case '$':
2673             /* '$' is only magic as the very last char and if it's in front of
2674              * either "\|", "\)", "\&", or "\n" */
2675             if (reg_magic >= MAGIC_OFF)
2676             {
2677                 char_u *p = regparse + 1;
2678
2679                 /* ignore \c \C \m and \M after '$' */
2680                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2681                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2682                     p += 2;
2683                 if (p[0] == NUL
2684                         || (p[0] == '\\'
2685                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2686                                 || p[1] == 'n'))
2687                         || reg_magic == MAGIC_ALL)
2688                     curchr = Magic('$');
2689             }
2690             break;
2691         case '\\':
2692             {
2693                 int c = regparse[1];
2694
2695                 if (c == NUL)
2696                     curchr = '\\';      /* trailing '\' */
2697                 else if (
2698 #ifdef EBCDIC
2699                         vim_strchr(META, c)
2700 #else
2701                         c <= '~' && META_flags[c]
2702 #endif
2703                         )
2704                 {
2705                     /*
2706                      * META contains everything that may be magic sometimes,
2707                      * except ^ and $ ("\^" and "\$" are only magic after
2708                      * "\v").  We now fetch the next character and toggle its
2709                      * magicness.  Therefore, \ is so meta-magic that it is
2710                      * not in META.
2711                      */
2712                     curchr = -1;
2713                     prev_at_start = at_start;
2714                     at_start = FALSE;   /* be able to say "/\*ptr" */
2715                     ++regparse;
2716                     ++after_slash;
2717                     peekchr();
2718                     --regparse;
2719                     --after_slash;
2720                     curchr = toggle_Magic(curchr);
2721                 }
2722                 else if (vim_strchr(REGEXP_ABBR, c))
2723                 {
2724                     /*
2725                      * Handle abbreviations, like "\t" for TAB -- webb
2726                      */
2727                     curchr = backslash_trans(c);
2728                 }
2729                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2730                     curchr = toggle_Magic(c);
2731                 else
2732                 {
2733                     /*
2734                      * Next character can never be (made) magic?
2735                      * Then backslashing it won't do anything.
2736                      */
2737 #ifdef FEAT_MBYTE
2738                     if (has_mbyte)
2739                         curchr = (*mb_ptr2char)(regparse + 1);
2740                     else
2741 #endif
2742                         curchr = c;
2743                 }
2744                 break;
2745             }
2746
2747 #ifdef FEAT_MBYTE
2748         default:
2749             if (has_mbyte)
2750                 curchr = (*mb_ptr2char)(regparse);
2751 #endif
2752         }
2753     }
2754
2755     return curchr;
2756 }
2757
2758 /*
2759  * Eat one lexed character.  Do this in a way that we can undo it.
2760  */
2761     static void
2762 skipchr()
2763 {
2764     /* peekchr() eats a backslash, do the same here */
2765     if (*regparse == '\\')
2766         prevchr_len = 1;
2767     else
2768         prevchr_len = 0;
2769     if (regparse[prevchr_len] != NUL)
2770     {
2771 #ifdef FEAT_MBYTE
2772         if (enc_utf8)
2773             /* exclude composing chars that mb_ptr2len does include */
2774             prevchr_len += utf_ptr2len(regparse + prevchr_len);
2775         else if (has_mbyte)
2776             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2777         else
2778 #endif
2779             ++prevchr_len;
2780     }
2781     regparse += prevchr_len;
2782     prev_at_start = at_start;
2783     at_start = FALSE;
2784     prevprevchr = prevchr;
2785     prevchr = curchr;
2786     curchr = nextchr;       /* use previously unget char, or -1 */
2787     nextchr = -1;
2788 }
2789
2790 /*
2791  * Skip a character while keeping the value of prev_at_start for at_start.
2792  * prevchr and prevprevchr are also kept.
2793  */
2794     static void
2795 skipchr_keepstart()
2796 {
2797     int as = prev_at_start;
2798     int pr = prevchr;
2799     int prpr = prevprevchr;
2800
2801     skipchr();
2802     at_start = as;
2803     prevchr = pr;
2804     prevprevchr = prpr;
2805 }
2806
2807     static int
2808 getchr()
2809 {
2810     int chr = peekchr();
2811
2812     skipchr();
2813     return chr;
2814 }
2815
2816 /*
2817  * put character back.  Works only once!
2818  */
2819     static void
2820 ungetchr()
2821 {
2822     nextchr = curchr;
2823     curchr = prevchr;
2824     prevchr = prevprevchr;
2825     at_start = prev_at_start;
2826     prev_at_start = FALSE;
2827
2828     /* Backup regparse, so that it's at the same position as before the
2829      * getchr(). */
2830     regparse -= prevchr_len;
2831 }
2832
2833 /*
2834  * Get and return the value of the hex string at the current position.
2835  * Return -1 if there is no valid hex number.
2836  * The position is updated:
2837  *     blahblah\%x20asdf
2838  *         before-^ ^-after
2839  * The parameter controls the maximum number of input characters. This will be
2840  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2841  */
2842     static int
2843 gethexchrs(maxinputlen)
2844     int         maxinputlen;
2845 {
2846     int         nr = 0;
2847     int         c;
2848     int         i;
2849
2850     for (i = 0; i < maxinputlen; ++i)
2851     {
2852         c = regparse[0];
2853         if (!vim_isxdigit(c))
2854             break;
2855         nr <<= 4;
2856         nr |= hex2nr(c);
2857         ++regparse;
2858     }
2859
2860     if (i == 0)
2861         return -1;
2862     return nr;
2863 }
2864
2865 /*
2866  * get and return the value of the decimal string immediately after the
2867  * current position. Return -1 for invalid.  Consumes all digits.
2868  */
2869     static int
2870 getdecchrs()
2871 {
2872     int         nr = 0;
2873     int         c;
2874     int         i;
2875
2876     for (i = 0; ; ++i)
2877     {
2878         c = regparse[0];
2879         if (c < '0' || c > '9')
2880             break;
2881         nr *= 10;
2882         nr += c - '0';
2883         ++regparse;
2884     }
2885
2886     if (i == 0)
2887         return -1;
2888     return nr;
2889 }
2890
2891 /*
2892  * get and return the value of the octal string immediately after the current
2893  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2894  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2895  * treat 8 or 9 as recognised characters. Position is updated:
2896  *     blahblah\%o210asdf
2897  *         before-^  ^-after
2898  */
2899     static int
2900 getoctchrs()
2901 {
2902     int         nr = 0;
2903     int         c;
2904     int         i;
2905
2906     for (i = 0; i < 3 && nr < 040; ++i)
2907     {
2908         c = regparse[0];
2909         if (c < '0' || c > '7')
2910             break;
2911         nr <<= 3;
2912         nr |= hex2nr(c);
2913         ++regparse;
2914     }
2915
2916     if (i == 0)
2917         return -1;
2918     return nr;
2919 }
2920
2921 /*
2922  * Get a number after a backslash that is inside [].
2923  * When nothing is recognized return a backslash.
2924  */
2925     static int
2926 coll_get_char()
2927 {
2928     int     nr = -1;
2929
2930     switch (*regparse++)
2931     {
2932         case 'd': nr = getdecchrs(); break;
2933         case 'o': nr = getoctchrs(); break;
2934         case 'x': nr = gethexchrs(2); break;
2935         case 'u': nr = gethexchrs(4); break;
2936         case 'U': nr = gethexchrs(8); break;
2937     }
2938     if (nr < 0)
2939     {
2940         /* If getting the number fails be backwards compatible: the character
2941          * is a backslash. */
2942         --regparse;
2943         nr = '\\';
2944     }
2945     return nr;
2946 }
2947
2948 /*
2949  * read_limits - Read two integers to be taken as a minimum and maximum.
2950  * If the first character is '-', then the range is reversed.
2951  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2952  * missing, a very big number is the default.
2953  */
2954     static int
2955 read_limits(minval, maxval)
2956     long        *minval;
2957     long        *maxval;
2958 {
2959     int         reverse = FALSE;
2960     char_u      *first_char;
2961     long        tmp;
2962
2963     if (*regparse == '-')
2964     {
2965         /* Starts with '-', so reverse the range later */
2966         regparse++;
2967         reverse = TRUE;
2968     }
2969     first_char = regparse;
2970     *minval = getdigits(&regparse);
2971     if (*regparse == ',')           /* There is a comma */
2972     {
2973         if (vim_isdigit(*++regparse))
2974             *maxval = getdigits(&regparse);
2975         else
2976             *maxval = MAX_LIMIT;
2977     }
2978     else if (VIM_ISDIGIT(*first_char))
2979         *maxval = *minval;          /* It was \{n} or \{-n} */
2980     else
2981         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2982     if (*regparse == '\\')
2983         regparse++;     /* Allow either \{...} or \{...\} */
2984     if (*regparse != '}')
2985     {
2986         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2987                                           reg_magic == MAGIC_ALL ? "" : "\\");
2988         EMSG_RET_FAIL(IObuff);
2989     }
2990
2991     /*
2992      * Reverse the range if there was a '-', or make sure it is in the right
2993      * order otherwise.
2994      */
2995     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2996     {
2997         tmp = *minval;
2998         *minval = *maxval;
2999         *maxval = tmp;
3000     }
3001     skipchr();          /* let's be friends with the lexer again */
3002     return OK;
3003 }
3004
3005 /*
3006  * vim_regexec and friends
3007  */
3008
3009 /*
3010  * Global work variables for vim_regexec().
3011  */
3012
3013 /* The current match-position is remembered with these variables: */
3014 static linenr_T reglnum;        /* line number, relative to first line */
3015 static char_u   *regline;       /* start of current line */
3016 static char_u   *reginput;      /* current input, points into "regline" */
3017
3018 static int      need_clear_subexpr;     /* subexpressions still need to be
3019                                          * cleared */
3020 #ifdef FEAT_SYN_HL
3021 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3022                                                  * still need to be cleared */
3023 #endif
3024
3025 /*
3026  * Structure used to save the current input state, when it needs to be
3027  * restored after trying a match.  Used by reg_save() and reg_restore().
3028  * Also stores the length of "backpos".
3029  */
3030 typedef struct
3031 {
3032     union
3033     {
3034         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3035         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3036     } rs_u;
3037     int         rs_len;
3038 } regsave_T;
3039
3040 /* struct to save start/end pointer/position in for \(\) */
3041 typedef struct
3042 {
3043     union
3044     {
3045         char_u  *ptr;
3046         lpos_T  pos;
3047     } se_u;
3048 } save_se_T;
3049
3050 static char_u   *reg_getline __ARGS((linenr_T lnum));
3051 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col));
3052 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3053 static void     cleanup_subexpr __ARGS((void));
3054 #ifdef FEAT_SYN_HL
3055 static void     cleanup_zsubexpr __ARGS((void));
3056 #endif
3057 static void     reg_nextline __ARGS((void));
3058 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3059 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3060 static int      reg_save_equal __ARGS((regsave_T *save));
3061 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3062 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3063
3064 /* Save the sub-expressions before attempting a match. */
3065 #define save_se(savep, posp, pp) \
3066     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3067
3068 /* After a failed match restore the sub-expressions. */
3069 #define restore_se(savep, posp, pp) { \
3070     if (REG_MULTI) \
3071         *(posp) = (savep)->se_u.pos; \
3072     else \
3073         *(pp) = (savep)->se_u.ptr; }
3074
3075 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3076 static int      regmatch __ARGS((char_u *prog));
3077 static int      regrepeat __ARGS((char_u *p, long maxcount));
3078
3079 #ifdef DEBUG
3080 int             regnarrate = 0;
3081 #endif
3082
3083 /*
3084  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3085  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3086  * contains '\c' or '\C' the value is overruled.
3087  */
3088 static int      ireg_ic;
3089
3090 #ifdef FEAT_MBYTE
3091 /*
3092  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3093  * in the regexp.  Defaults to false, always.
3094  */
3095 static int      ireg_icombine;
3096 #endif
3097
3098 /*
3099  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3100  * there is no maximum.
3101  */
3102 static colnr_T  ireg_maxcol;
3103
3104 /*
3105  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3106  * slow, we keep one allocated piece of memory and only re-allocate it when
3107  * it's too small.  It's freed in vim_regexec_both() when finished.
3108  */
3109 static char_u   *reg_tofree = NULL;
3110 static unsigned reg_tofreelen;
3111
3112 /*
3113  * These variables are set when executing a regexp to speed up the execution.
3114  * Which ones are set depends on whether a single-line or multi-line match is
3115  * done:
3116  *                      single-line             multi-line
3117  * reg_match            &regmatch_T             NULL
3118  * reg_mmatch           NULL                    &regmmatch_T
3119  * reg_startp           reg_match->startp       <invalid>
3120  * reg_endp             reg_match->endp         <invalid>
3121  * reg_startpos         <invalid>               reg_mmatch->startpos
3122  * reg_endpos           <invalid>               reg_mmatch->endpos
3123  * reg_win              NULL                    window in which to search
3124  * reg_buf              <invalid>               buffer in which to search
3125  * reg_firstlnum        <invalid>               first line in which to search
3126  * reg_maxline          0                       last line nr
3127  * reg_line_lbr         FALSE or TRUE           FALSE
3128  */
3129 static regmatch_T       *reg_match;
3130 static regmmatch_T      *reg_mmatch;
3131 static char_u           **reg_startp = NULL;
3132 static char_u           **reg_endp = NULL;
3133 static lpos_T           *reg_startpos = NULL;
3134 static lpos_T           *reg_endpos = NULL;
3135 static win_T            *reg_win;
3136 static buf_T            *reg_buf;
3137 static linenr_T         reg_firstlnum;
3138 static linenr_T         reg_maxline;
3139 static int              reg_line_lbr;       /* "\n" in string is line break */
3140
3141 /* Values for rs_state in regitem_T. */
3142 typedef enum regstate_E
3143 {
3144     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3145     , RS_MOPEN          /* MOPEN + [0-9] */
3146     , RS_MCLOSE         /* MCLOSE + [0-9] */
3147 #ifdef FEAT_SYN_HL
3148     , RS_ZOPEN          /* ZOPEN + [0-9] */
3149     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3150 #endif
3151     , RS_BRANCH         /* BRANCH */
3152     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3153     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3154     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3155     , RS_NOMATCH        /* NOMATCH */
3156     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3157     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3158     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3159     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3160 } regstate_T;
3161
3162 /*
3163  * When there are alternatives a regstate_T is put on the regstack to remember
3164  * what we are doing.
3165  * Before it may be another type of item, depending on rs_state, to remember
3166  * more things.
3167  */
3168 typedef struct regitem_S
3169 {
3170     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3171     char_u      *rs_scan;       /* current node in program */
3172     union
3173     {
3174         save_se_T  sesave;
3175         regsave_T  regsave;
3176     } rs_un;                    /* room for saving reginput */
3177     short       rs_no;          /* submatch nr */
3178 } regitem_T;
3179
3180 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3181 static void regstack_pop __ARGS((char_u **scan));
3182
3183 /* used for BEHIND and NOBEHIND matching */
3184 typedef struct regbehind_S
3185 {
3186     regsave_T   save_after;
3187     regsave_T   save_behind;
3188 } regbehind_T;
3189
3190 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3191 typedef struct regstar_S
3192 {
3193     int         nextb;          /* next byte */
3194     int         nextb_ic;       /* next byte reverse case */
3195     long        count;
3196     long        minval;
3197     long        maxval;
3198 } regstar_T;
3199
3200 /* used to store input position when a BACK was encountered, so that we now if
3201  * we made any progress since the last time. */
3202 typedef struct backpos_S
3203 {
3204     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3205     regsave_T   bp_pos;         /* last input position */
3206 } backpos_T;
3207
3208 /*
3209  * regstack and backpos are used by regmatch().  They are kept over calls to
3210  * avoid invoking malloc() and free() often.
3211  */
3212 static garray_T regstack;       /* stack with regitem_T items, sometimes
3213                                    preceded by regstar_T or regbehind_T. */
3214 static garray_T backpos;        /* table with backpos_T for BACK */
3215
3216 /*
3217  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3218  */
3219     static char_u *
3220 reg_getline(lnum)
3221     linenr_T    lnum;
3222 {
3223     /* when looking behind for a match/no-match lnum is negative.  But we
3224      * can't go before line 1 */
3225     if (reg_firstlnum + lnum < 1)
3226         return NULL;
3227     if (lnum > reg_maxline)
3228         /* Must have matched the "\n" in the last line. */
3229         return (char_u *)"";
3230     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3231 }
3232
3233 static regsave_T behind_pos;
3234
3235 #ifdef FEAT_SYN_HL
3236 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3237 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3238 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3239 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3240 #endif
3241
3242 /* TRUE if using multi-line regexp. */
3243 #define REG_MULTI       (reg_match == NULL)
3244
3245 /*
3246  * Match a regexp against a string.
3247  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3248  * Uses curbuf for line count and 'iskeyword'.
3249  *
3250  * Return TRUE if there is a match, FALSE if not.
3251  */
3252     int
3253 vim_regexec(rmp, line, col)
3254     regmatch_T  *rmp;
3255     char_u      *line;  /* string to match against */
3256     colnr_T     col;    /* column to start looking for match */
3257 {
3258     reg_match = rmp;
3259     reg_mmatch = NULL;
3260     reg_maxline = 0;
3261     reg_line_lbr = FALSE;
3262     reg_win = NULL;
3263     ireg_ic = rmp->rm_ic;
3264 #ifdef FEAT_MBYTE
3265     ireg_icombine = FALSE;
3266 #endif
3267     ireg_maxcol = 0;
3268     return (vim_regexec_both(line, col) != 0);
3269 }
3270
3271 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3272         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3273 /*
3274  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3275  */
3276     int
3277 vim_regexec_nl(rmp, line, col)
3278     regmatch_T  *rmp;
3279     char_u      *line;  /* string to match against */
3280     colnr_T     col;    /* column to start looking for match */
3281 {
3282     reg_match = rmp;
3283     reg_mmatch = NULL;
3284     reg_maxline = 0;
3285     reg_line_lbr = TRUE;
3286     reg_win = NULL;
3287     ireg_ic = rmp->rm_ic;
3288 #ifdef FEAT_MBYTE
3289     ireg_icombine = FALSE;
3290 #endif
3291     ireg_maxcol = 0;
3292     return (vim_regexec_both(line, col) != 0);
3293 }
3294 #endif
3295
3296 /*
3297  * Match a regexp against multiple lines.
3298  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3299  * Uses curbuf for line count and 'iskeyword'.
3300  *
3301  * Return zero if there is no match.  Return number of lines contained in the
3302  * match otherwise.
3303  */
3304     long
3305 vim_regexec_multi(rmp, win, buf, lnum, col)
3306     regmmatch_T *rmp;
3307     win_T       *win;           /* window in which to search or NULL */
3308     buf_T       *buf;           /* buffer in which to search */
3309     linenr_T    lnum;           /* nr of line to start looking for match */
3310     colnr_T     col;            /* column to start looking for match */
3311 {
3312     long        r;
3313     buf_T       *save_curbuf = curbuf;
3314
3315     reg_match = NULL;
3316     reg_mmatch = rmp;
3317     reg_buf = buf;
3318     reg_win = win;
3319     reg_firstlnum = lnum;
3320     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3321     reg_line_lbr = FALSE;
3322     ireg_ic = rmp->rmm_ic;
3323 #ifdef FEAT_MBYTE
3324     ireg_icombine = FALSE;
3325 #endif
3326     ireg_maxcol = rmp->rmm_maxcol;
3327
3328     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3329     curbuf = buf;
3330     r = vim_regexec_both(NULL, col);
3331     curbuf = save_curbuf;
3332
3333     return r;
3334 }
3335
3336 /*
3337  * Match a regexp against a string ("line" points to the string) or multiple
3338  * lines ("line" is NULL, use reg_getline()).
3339  */
3340     static long
3341 vim_regexec_both(line, col)
3342     char_u      *line;
3343     colnr_T     col;            /* column to start looking for match */
3344 {
3345     regprog_T   *prog;
3346     char_u      *s;
3347     long        retval = 0L;
3348
3349     reg_tofree = NULL;
3350
3351     /* Init the regstack empty.  Use an item size of 1 byte, since we push
3352      * different things onto it.  Use a large grow size to avoid reallocating
3353      * it too often. */
3354     ga_init2(&regstack, 1, 10000);
3355
3356     /* Init the backpos table empty. */
3357     ga_init2(&backpos, sizeof(backpos_T), 10);
3358
3359     if (REG_MULTI)
3360     {
3361         prog = reg_mmatch->regprog;
3362         line = reg_getline((linenr_T)0);
3363         reg_startpos = reg_mmatch->startpos;
3364         reg_endpos = reg_mmatch->endpos;
3365     }
3366     else
3367     {
3368         prog = reg_match->regprog;
3369         reg_startp = reg_match->startp;
3370         reg_endp = reg_match->endp;
3371     }
3372
3373     /* Be paranoid... */
3374     if (prog == NULL || line == NULL)
3375     {
3376         EMSG(_(e_null));
3377         goto theend;
3378     }
3379
3380     /* Check validity of program. */
3381     if (prog_magic_wrong())
3382         goto theend;
3383
3384     /* If the start column is past the maximum column: no need to try. */
3385     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3386         goto theend;
3387
3388     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3389     if (prog->regflags & RF_ICASE)
3390         ireg_ic = TRUE;
3391     else if (prog->regflags & RF_NOICASE)
3392         ireg_ic = FALSE;
3393
3394 #ifdef FEAT_MBYTE
3395     /* If pattern contains "\Z" overrule value of ireg_icombine */
3396     if (prog->regflags & RF_ICOMBINE)
3397         ireg_icombine = TRUE;
3398 #endif
3399
3400     /* If there is a "must appear" string, look for it. */
3401     if (prog->regmust != NULL)
3402     {
3403         int c;
3404
3405 #ifdef FEAT_MBYTE
3406         if (has_mbyte)
3407             c = (*mb_ptr2char)(prog->regmust);
3408         else
3409 #endif
3410             c = *prog->regmust;
3411         s = line + col;
3412
3413         /*
3414          * This is used very often, esp. for ":global".  Use three versions of
3415          * the loop to avoid overhead of conditions.
3416          */
3417         if (!ireg_ic
3418 #ifdef FEAT_MBYTE
3419                 && !has_mbyte
3420 #endif
3421                 )
3422             while ((s = vim_strbyte(s, c)) != NULL)
3423             {
3424                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3425                     break;              /* Found it. */
3426                 ++s;
3427             }
3428 #ifdef FEAT_MBYTE
3429         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3430             while ((s = vim_strchr(s, c)) != NULL)
3431             {
3432                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3433                     break;              /* Found it. */
3434                 mb_ptr_adv(s);
3435             }
3436 #endif
3437         else
3438             while ((s = cstrchr(s, c)) != NULL)
3439             {
3440                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3441                     break;              /* Found it. */
3442                 mb_ptr_adv(s);
3443             }
3444         if (s == NULL)          /* Not present. */
3445             goto theend;
3446     }
3447
3448     regline = line;
3449     reglnum = 0;
3450
3451     /* Simplest case: Anchored match need be tried only once. */
3452     if (prog->reganch)
3453     {
3454         int     c;
3455
3456 #ifdef FEAT_MBYTE
3457         if (has_mbyte)
3458             c = (*mb_ptr2char)(regline + col);
3459         else
3460 #endif
3461             c = regline[col];
3462         if (prog->regstart == NUL
3463                 || prog->regstart == c
3464                 || (ireg_ic && ((
3465 #ifdef FEAT_MBYTE
3466                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3467                         || (c < 255 && prog->regstart < 255 &&
3468 #endif
3469                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3470             retval = regtry(prog, col);
3471         else
3472             retval = 0;
3473     }
3474     else
3475     {
3476         /* Messy cases:  unanchored match. */
3477         while (!got_int)
3478         {
3479             if (prog->regstart != NUL)
3480             {
3481                 /* Skip until the char we know it must start with.
3482                  * Used often, do some work to avoid call overhead. */
3483                 if (!ireg_ic
3484 #ifdef FEAT_MBYTE
3485                             && !has_mbyte
3486 #endif
3487                             )
3488                     s = vim_strbyte(regline + col, prog->regstart);
3489                 else
3490                     s = cstrchr(regline + col, prog->regstart);
3491                 if (s == NULL)
3492                 {
3493                     retval = 0;
3494                     break;
3495                 }
3496                 col = (int)(s - regline);
3497             }
3498
3499             /* Check for maximum column to try. */
3500             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3501             {
3502                 retval = 0;
3503                 break;
3504             }
3505
3506             retval = regtry(prog, col);
3507             if (retval > 0)
3508                 break;
3509
3510             /* if not currently on the first line, get it again */
3511             if (reglnum != 0)
3512             {
3513                 reglnum = 0;
3514                 regline = reg_getline((linenr_T)0);
3515             }
3516             if (regline[col] == NUL)
3517                 break;
3518 #ifdef FEAT_MBYTE
3519             if (has_mbyte)
3520                 col += (*mb_ptr2len)(regline + col);
3521             else
3522 #endif
3523                 ++col;
3524         }
3525     }
3526
3527 theend:
3528     vim_free(reg_tofree);
3529     ga_clear(&regstack);
3530     ga_clear(&backpos);
3531
3532     return retval;
3533 }
3534
3535 #ifdef FEAT_SYN_HL
3536 static reg_extmatch_T *make_extmatch __ARGS((void));
3537
3538 /*
3539  * Create a new extmatch and mark it as referenced once.
3540  */
3541     static reg_extmatch_T *
3542 make_extmatch()
3543 {
3544     reg_extmatch_T      *em;
3545
3546     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3547     if (em != NULL)
3548         em->refcnt = 1;
3549     return em;
3550 }
3551
3552 /*
3553  * Add a reference to an extmatch.
3554  */
3555     reg_extmatch_T *
3556 ref_extmatch(em)
3557     reg_extmatch_T      *em;
3558 {
3559     if (em != NULL)
3560         em->refcnt++;
3561     return em;
3562 }
3563
3564 /*
3565  * Remove a reference to an extmatch.  If there are no references left, free
3566  * the info.
3567  */
3568     void
3569 unref_extmatch(em)
3570     reg_extmatch_T      *em;
3571 {
3572     int i;
3573
3574     if (em != NULL && --em->refcnt <= 0)
3575     {
3576         for (i = 0; i < NSUBEXP; ++i)
3577             vim_free(em->matches[i]);
3578         vim_free(em);
3579     }
3580 }
3581 #endif
3582
3583 /*
3584  * regtry - try match of "prog" with at regline["col"].
3585  * Returns 0 for failure, number of lines contained in the match otherwise.
3586  */
3587     static long
3588 regtry(prog, col)
3589     regprog_T   *prog;
3590     colnr_T     col;
3591 {
3592     reginput = regline + col;
3593     need_clear_subexpr = TRUE;
3594 #ifdef FEAT_SYN_HL
3595     /* Clear the external match subpointers if necessary. */
3596     if (prog->reghasz == REX_SET)
3597         need_clear_zsubexpr = TRUE;
3598 #endif
3599
3600     if (regmatch(prog->program + 1) == 0)
3601         return 0;
3602
3603     cleanup_subexpr();
3604     if (REG_MULTI)
3605     {
3606         if (reg_startpos[0].lnum < 0)
3607         {
3608             reg_startpos[0].lnum = 0;
3609             reg_startpos[0].col = col;
3610         }
3611         if (reg_endpos[0].lnum < 0)
3612         {
3613             reg_endpos[0].lnum = reglnum;
3614             reg_endpos[0].col = (int)(reginput - regline);
3615         }
3616         else
3617             /* Use line number of "\ze". */
3618             reglnum = reg_endpos[0].lnum;
3619     }
3620     else
3621     {
3622         if (reg_startp[0] == NULL)
3623             reg_startp[0] = regline + col;
3624         if (reg_endp[0] == NULL)
3625             reg_endp[0] = reginput;
3626     }
3627 #ifdef FEAT_SYN_HL
3628     /* Package any found \z(...\) matches for export. Default is none. */
3629     unref_extmatch(re_extmatch_out);
3630     re_extmatch_out = NULL;
3631
3632     if (prog->reghasz == REX_SET)
3633     {
3634         int             i;
3635
3636         cleanup_zsubexpr();
3637         re_extmatch_out = make_extmatch();
3638         for (i = 0; i < NSUBEXP; i++)
3639         {
3640             if (REG_MULTI)
3641             {
3642                 /* Only accept single line matches. */
3643                 if (reg_startzpos[i].lnum >= 0
3644                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3645                     re_extmatch_out->matches[i] =
3646                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3647                                                        + reg_startzpos[i].col,
3648                                    reg_endzpos[i].col - reg_startzpos[i].col);
3649             }
3650             else
3651             {
3652                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3653                     re_extmatch_out->matches[i] =
3654                             vim_strnsave(reg_startzp[i],
3655                                         (int)(reg_endzp[i] - reg_startzp[i]));
3656             }
3657         }
3658     }
3659 #endif
3660     return 1 + reglnum;
3661 }
3662
3663 #ifdef FEAT_MBYTE
3664 static int reg_prev_class __ARGS((void));
3665
3666 /*
3667  * Get class of previous character.
3668  */
3669     static int
3670 reg_prev_class()
3671 {
3672     if (reginput > regline)
3673         return mb_get_class(reginput - 1
3674                                      - (*mb_head_off)(regline, reginput - 1));
3675     return -1;
3676 }
3677
3678 #endif
3679 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3680
3681 /*
3682  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3683  * to regmatch(), but they are here to reduce the amount of stack space used
3684  * (it can be called recursively many times).
3685  */
3686 static long     bl_minval;
3687 static long     bl_maxval;
3688
3689 /*
3690  * regmatch - main matching routine
3691  *
3692  * Conceptually the strategy is simple: Check to see whether the current node
3693  * matches, push an item onto the regstack and loop to see whether the rest
3694  * matches, and then act accordingly.  In practice we make some effort to
3695  * avoid using the regstack, in particular by going through "ordinary" nodes
3696  * (that don't need to know whether the rest of the match failed) by a nested
3697  * loop.
3698  *
3699  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3700  * the last matched character.
3701  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3702  * undefined state!
3703  */
3704     static int
3705 regmatch(scan)
3706     char_u      *scan;          /* Current node. */
3707 {
3708   char_u        *next;          /* Next node. */
3709   int           op;
3710   int           c;
3711   regitem_T     *rp;
3712   int           no;
3713   int           status;         /* one of the RA_ values: */
3714 #define RA_FAIL         1       /* something failed, abort */
3715 #define RA_CONT         2       /* continue in inner loop */
3716 #define RA_BREAK        3       /* break inner loop */
3717 #define RA_MATCH        4       /* successful match */
3718 #define RA_NOMATCH      5       /* didn't match */
3719
3720   /* Init the regstack and backpos table empty.  They are initialized and
3721    * freed in vim_regexec_both() to reduce malloc()/free() calls. */
3722   regstack.ga_len = 0;
3723   backpos.ga_len = 0;
3724
3725   /*
3726    * Repeat until "regstack" is empty.
3727    */
3728   for (;;)
3729   {
3730     /* Some patterns my cause a long time to match, even though they are not
3731      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3732     fast_breakcheck();
3733
3734 #ifdef DEBUG
3735     if (scan != NULL && regnarrate)
3736     {
3737         mch_errmsg(regprop(scan));
3738         mch_errmsg("(\n");
3739     }
3740 #endif
3741
3742     /*
3743      * Repeat for items that can be matched sequentially, without using the
3744      * regstack.
3745      */
3746     for (;;)
3747     {
3748         if (got_int || scan == NULL)
3749         {
3750             status = RA_FAIL;
3751             break;
3752         }
3753         status = RA_CONT;
3754
3755 #ifdef DEBUG
3756         if (regnarrate)
3757         {
3758             mch_errmsg(regprop(scan));
3759             mch_errmsg("...\n");
3760 # ifdef FEAT_SYN_HL
3761             if (re_extmatch_in != NULL)
3762             {
3763                 int i;
3764
3765                 mch_errmsg(_("External submatches:\n"));
3766                 for (i = 0; i < NSUBEXP; i++)
3767                 {
3768                     mch_errmsg("    \"");
3769                     if (re_extmatch_in->matches[i] != NULL)
3770                         mch_errmsg(re_extmatch_in->matches[i]);
3771                     mch_errmsg("\"\n");
3772                 }
3773             }
3774 # endif
3775         }
3776 #endif
3777         next = regnext(scan);
3778
3779         op = OP(scan);
3780         /* Check for character class with NL added. */
3781         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3782                                 && *reginput == NUL && reglnum <= reg_maxline)
3783         {
3784             reg_nextline();
3785         }
3786         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3787         {
3788             ADVANCE_REGINPUT();
3789         }
3790         else
3791         {
3792           if (WITH_NL(op))
3793               op -= ADD_NL;
3794 #ifdef FEAT_MBYTE
3795           if (has_mbyte)
3796               c = (*mb_ptr2char)(reginput);
3797           else
3798 #endif
3799               c = *reginput;
3800           switch (op)
3801           {
3802           case BOL:
3803             if (reginput != regline)
3804                 status = RA_NOMATCH;
3805             break;
3806
3807           case EOL:
3808             if (c != NUL)
3809                 status = RA_NOMATCH;
3810             break;
3811
3812           case RE_BOF:
3813             /* We're not at the beginning of the file when below the first
3814              * line where we started, not at the start of the line or we
3815              * didn't start at the first line of the buffer. */
3816             if (reglnum != 0 || reginput != regline
3817                                           || (REG_MULTI && reg_firstlnum > 1))
3818                 status = RA_NOMATCH;
3819             break;
3820
3821           case RE_EOF:
3822             if (reglnum != reg_maxline || c != NUL)
3823                 status = RA_NOMATCH;
3824             break;
3825
3826           case CURSOR:
3827             /* Check if the buffer is in a window and compare the
3828              * reg_win->w_cursor position to the match position. */
3829             if (reg_win == NULL
3830                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3831                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3832                 status = RA_NOMATCH;
3833             break;
3834
3835           case RE_MARK:
3836             /* Compare the mark position to the match position.  NOTE: Always
3837              * uses the current buffer. */
3838             {
3839                 int     mark = OPERAND(scan)[0];
3840                 int     cmp = OPERAND(scan)[1];
3841                 pos_T   *pos;
3842
3843                 pos = getmark(mark, FALSE);
3844                 if (pos == NULL              /* mark doesn't exist */
3845                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3846                         || (pos->lnum == reglnum + reg_firstlnum
3847                                 ? (pos->col == (colnr_T)(reginput - regline)
3848                                     ? (cmp == '<' || cmp == '>')
3849                                     : (pos->col < (colnr_T)(reginput - regline)
3850                                         ? cmp != '>'
3851                                         : cmp != '<'))
3852                                 : (pos->lnum < reglnum + reg_firstlnum
3853                                     ? cmp != '>'
3854                                     : cmp != '<')))
3855                     status = RA_NOMATCH;
3856             }
3857             break;
3858
3859           case RE_VISUAL:
3860 #ifdef FEAT_VISUAL
3861             /* Check if the buffer is the current buffer. and whether the
3862              * position is inside the Visual area. */
3863             if (reg_buf != curbuf || VIsual.lnum == 0)
3864                 status = RA_NOMATCH;
3865             else
3866             {
3867                 pos_T       top, bot;
3868                 linenr_T    lnum;
3869                 colnr_T     col;
3870                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3871                 int         mode;
3872
3873                 if (VIsual_active)
3874                 {
3875                     if (lt(VIsual, wp->w_cursor))
3876                     {
3877                         top = VIsual;
3878                         bot = wp->w_cursor;
3879                     }
3880                     else
3881                     {
3882                         top = wp->w_cursor;
3883                         bot = VIsual;
3884                     }
3885                     mode = VIsual_mode;
3886                 }
3887                 else
3888                 {
3889                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3890                     {
3891                         top = curbuf->b_visual.vi_start;
3892                         bot = curbuf->b_visual.vi_end;
3893                     }
3894                     else
3895                     {
3896                         top = curbuf->b_visual.vi_end;
3897                         bot = curbuf->b_visual.vi_start;
3898                     }
3899                     mode = curbuf->b_visual.vi_mode;
3900                 }
3901                 lnum = reglnum + reg_firstlnum;
3902                 col = (colnr_T)(reginput - regline);
3903                 if (lnum < top.lnum || lnum > bot.lnum)
3904                     status = RA_NOMATCH;
3905                 else if (mode == 'v')
3906                 {
3907                     if ((lnum == top.lnum && col < top.col)
3908                             || (lnum == bot.lnum
3909                                          && col >= bot.col + (*p_sel != 'e')))
3910                         status = RA_NOMATCH;
3911                 }
3912                 else if (mode == Ctrl_V)
3913                 {
3914                     colnr_T         start, end;
3915                     colnr_T         start2, end2;
3916                     colnr_T         cols;
3917
3918                     getvvcol(wp, &top, &start, NULL, &end);
3919                     getvvcol(wp, &bot, &start2, NULL, &end2);
3920                     if (start2 < start)
3921                         start = start2;
3922                     if (end2 > end)
3923                         end = end2;
3924                     if (top.col == MAXCOL || bot.col == MAXCOL)
3925                         end = MAXCOL;
3926                     cols = win_linetabsize(wp,
3927                                       regline, (colnr_T)(reginput - regline));
3928                     if (cols < start || cols > end - (*p_sel == 'e'))
3929                         status = RA_NOMATCH;
3930                 }
3931             }
3932 #else
3933             status = RA_NOMATCH;
3934 #endif
3935             break;
3936
3937           case RE_LNUM:
3938             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3939                                                                         scan))
3940                 status = RA_NOMATCH;
3941             break;
3942
3943           case RE_COL:
3944             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
3945                 status = RA_NOMATCH;
3946             break;
3947
3948           case RE_VCOL:
3949             if (!re_num_cmp((long_u)win_linetabsize(
3950                             reg_win == NULL ? curwin : reg_win,
3951                             regline, (colnr_T)(reginput - regline)) + 1, scan))
3952                 status = RA_NOMATCH;
3953             break;
3954
3955           case BOW:     /* \<word; reginput points to w */
3956             if (c == NUL)       /* Can't match at end of line */
3957                 status = RA_NOMATCH;
3958 #ifdef FEAT_MBYTE
3959             else if (has_mbyte)
3960             {
3961                 int this_class;
3962
3963                 /* Get class of current and previous char (if it exists). */
3964                 this_class = mb_get_class(reginput);
3965                 if (this_class <= 1)
3966                     status = RA_NOMATCH;  /* not on a word at all */
3967                 else if (reg_prev_class() == this_class)
3968                     status = RA_NOMATCH;  /* previous char is in same word */
3969             }
3970 #endif
3971             else
3972             {
3973                 if (!vim_iswordc(c)
3974                         || (reginput > regline && vim_iswordc(reginput[-1])))
3975                     status = RA_NOMATCH;
3976             }
3977             break;
3978
3979           case EOW:     /* word\>; reginput points after d */
3980             if (reginput == regline)    /* Can't match at start of line */
3981                 status = RA_NOMATCH;
3982 #ifdef FEAT_MBYTE
3983             else if (has_mbyte)
3984             {
3985                 int this_class, prev_class;
3986
3987                 /* Get class of current and previous char (if it exists). */
3988                 this_class = mb_get_class(reginput);
3989                 prev_class = reg_prev_class();
3990                 if (this_class == prev_class
3991                         || prev_class == 0 || prev_class == 1)
3992                     status = RA_NOMATCH;
3993             }
3994 #endif
3995             else
3996             {
3997                 if (!vim_iswordc(reginput[-1])
3998                         || (reginput[0] != NUL && vim_iswordc(c)))
3999                     status = RA_NOMATCH;
4000             }
4001             break; /* Matched with EOW */
4002
4003           case ANY:
4004             if (c == NUL)
4005                 status = RA_NOMATCH;
4006             else
4007                 ADVANCE_REGINPUT();
4008             break;
4009
4010           case IDENT:
4011             if (!vim_isIDc(c))
4012                 status = RA_NOMATCH;
4013             else
4014                 ADVANCE_REGINPUT();
4015             break;
4016
4017           case SIDENT:
4018             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4019                 status = RA_NOMATCH;
4020             else
4021                 ADVANCE_REGINPUT();
4022             break;
4023
4024           case KWORD:
4025             if (!vim_iswordp(reginput))
4026                 status = RA_NOMATCH;
4027             else
4028                 ADVANCE_REGINPUT();
4029             break;
4030
4031           case SKWORD:
4032             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4033                 status = RA_NOMATCH;
4034             else
4035                 ADVANCE_REGINPUT();
4036             break;
4037
4038           case FNAME:
4039             if (!vim_isfilec(c))
4040                 status = RA_NOMATCH;
4041             else
4042                 ADVANCE_REGINPUT();
4043             break;
4044
4045           case SFNAME:
4046             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4047                 status = RA_NOMATCH;
4048             else
4049                 ADVANCE_REGINPUT();
4050             break;
4051
4052           case PRINT:
4053             if (ptr2cells(reginput) != 1)
4054                 status = RA_NOMATCH;
4055             else
4056                 ADVANCE_REGINPUT();
4057             break;
4058
4059           case SPRINT:
4060             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4061                 status = RA_NOMATCH;
4062             else
4063                 ADVANCE_REGINPUT();
4064             break;
4065
4066           case WHITE:
4067             if (!vim_iswhite(c))
4068                 status = RA_NOMATCH;
4069             else
4070                 ADVANCE_REGINPUT();
4071             break;
4072
4073           case NWHITE:
4074             if (c == NUL || vim_iswhite(c))
4075                 status = RA_NOMATCH;
4076             else
4077                 ADVANCE_REGINPUT();
4078             break;
4079
4080           case DIGIT:
4081             if (!ri_digit(c))
4082                 status = RA_NOMATCH;
4083             else
4084                 ADVANCE_REGINPUT();
4085             break;
4086
4087           case NDIGIT:
4088             if (c == NUL || ri_digit(c))
4089                 status = RA_NOMATCH;
4090             else
4091                 ADVANCE_REGINPUT();
4092             break;
4093
4094           case HEX:
4095             if (!ri_hex(c))
4096                 status = RA_NOMATCH;
4097             else
4098                 ADVANCE_REGINPUT();
4099             break;
4100
4101           case NHEX:
4102             if (c == NUL || ri_hex(c))
4103                 status = RA_NOMATCH;
4104             else
4105                 ADVANCE_REGINPUT();
4106             break;
4107
4108           case OCTAL:
4109             if (!ri_octal(c))
4110                 status = RA_NOMATCH;
4111             else
4112                 ADVANCE_REGINPUT();
4113             break;
4114
4115           case NOCTAL:
4116             if (c == NUL || ri_octal(c))
4117                 status = RA_NOMATCH;
4118             else
4119                 ADVANCE_REGINPUT();
4120             break;
4121
4122           case WORD:
4123             if (!ri_word(c))
4124                 status = RA_NOMATCH;
4125             else
4126                 ADVANCE_REGINPUT();
4127             break;
4128
4129           case NWORD:
4130             if (c == NUL || ri_word(c))
4131                 status = RA_NOMATCH;
4132             else
4133                 ADVANCE_REGINPUT();
4134             break;
4135
4136           case HEAD:
4137             if (!ri_head(c))
4138                 status = RA_NOMATCH;
4139             else
4140                 ADVANCE_REGINPUT();
4141             break;
4142
4143           case NHEAD:
4144             if (c == NUL || ri_head(c))
4145                 status = RA_NOMATCH;
4146             else
4147                 ADVANCE_REGINPUT();
4148             break;
4149
4150           case ALPHA:
4151             if (!ri_alpha(c))
4152                 status = RA_NOMATCH;
4153             else
4154                 ADVANCE_REGINPUT();
4155             break;
4156
4157           case NALPHA:
4158             if (c == NUL || ri_alpha(c))
4159                 status = RA_NOMATCH;
4160             else
4161                 ADVANCE_REGINPUT();
4162             break;
4163
4164           case LOWER:
4165             if (!ri_lower(c))
4166                 status = RA_NOMATCH;
4167             else
4168                 ADVANCE_REGINPUT();
4169             break;
4170
4171           case NLOWER:
4172             if (c == NUL || ri_lower(c))
4173                 status = RA_NOMATCH;
4174             else
4175                 ADVANCE_REGINPUT();
4176             break;
4177
4178           case UPPER:
4179             if (!ri_upper(c))
4180                 status = RA_NOMATCH;
4181             else
4182                 ADVANCE_REGINPUT();
4183             break;
4184
4185           case NUPPER:
4186             if (c == NUL || ri_upper(c))
4187                 status = RA_NOMATCH;
4188             else
4189                 ADVANCE_REGINPUT();
4190             break;
4191
4192           case EXACTLY:
4193             {
4194                 int     len;
4195                 char_u  *opnd;
4196
4197                 opnd = OPERAND(scan);
4198                 /* Inline the first byte, for speed. */
4199                 if (*opnd != *reginput
4200                         && (!ireg_ic || (
4201 #ifdef FEAT_MBYTE
4202                             !enc_utf8 &&
4203 #endif
4204                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4205                     status = RA_NOMATCH;
4206                 else if (*opnd == NUL)
4207                 {
4208                     /* match empty string always works; happens when "~" is
4209                      * empty. */
4210                 }
4211                 else if (opnd[1] == NUL
4212 #ifdef FEAT_MBYTE
4213                             && !(enc_utf8 && ireg_ic)
4214 #endif
4215                         )
4216                     ++reginput;         /* matched a single char */
4217                 else
4218                 {
4219                     len = (int)STRLEN(opnd);
4220                     /* Need to match first byte again for multi-byte. */
4221                     if (cstrncmp(opnd, reginput, &len) != 0)
4222                         status = RA_NOMATCH;
4223 #ifdef FEAT_MBYTE
4224                     /* Check for following composing character. */
4225                     else if (enc_utf8
4226                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4227                     {
4228                         /* raaron: This code makes a composing character get
4229                          * ignored, which is the correct behavior (sometimes)
4230                          * for voweled Hebrew texts. */
4231                         if (!ireg_icombine)
4232                             status = RA_NOMATCH;
4233                     }
4234 #endif
4235                     else
4236                         reginput += len;
4237                 }
4238             }
4239             break;
4240
4241           case ANYOF:
4242           case ANYBUT:
4243             if (c == NUL)
4244                 status = RA_NOMATCH;
4245             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4246                 status = RA_NOMATCH;
4247             else
4248                 ADVANCE_REGINPUT();
4249             break;
4250
4251 #ifdef FEAT_MBYTE
4252           case MULTIBYTECODE:
4253             if (has_mbyte)
4254             {
4255                 int     i, len;
4256                 char_u  *opnd;
4257                 int     opndc = 0, inpc;
4258
4259                 opnd = OPERAND(scan);
4260                 /* Safety check (just in case 'encoding' was changed since
4261                  * compiling the program). */
4262                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4263                 {
4264                     status = RA_NOMATCH;
4265                     break;
4266                 }
4267                 if (enc_utf8)
4268                     opndc = mb_ptr2char(opnd);
4269                 if (enc_utf8 && utf_iscomposing(opndc))
4270                 {
4271                     /* When only a composing char is given match at any
4272                      * position where that composing char appears. */
4273                     status = RA_NOMATCH;
4274                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4275                     {
4276                         inpc = mb_ptr2char(reginput + i);
4277                         if (!utf_iscomposing(inpc))
4278                         {
4279                             if (i > 0)
4280                                 break;
4281                         }
4282                         else if (opndc == inpc)
4283                         {
4284                             /* Include all following composing chars. */
4285                             len = i + mb_ptr2len(reginput + i);
4286                             status = RA_MATCH;
4287                             break;
4288                         }
4289                     }
4290                 }
4291                 else
4292                     for (i = 0; i < len; ++i)
4293                         if (opnd[i] != reginput[i])
4294                         {
4295                             status = RA_NOMATCH;
4296                             break;
4297                         }
4298                 reginput += len;
4299             }
4300             else
4301                 status = RA_NOMATCH;
4302             break;
4303 #endif
4304
4305           case NOTHING:
4306             break;
4307
4308           case BACK:
4309             {
4310                 int             i;
4311                 backpos_T       *bp;
4312
4313                 /*
4314                  * When we run into BACK we need to check if we don't keep
4315                  * looping without matching any input.  The second and later
4316                  * times a BACK is encountered it fails if the input is still
4317                  * at the same position as the previous time.
4318                  * The positions are stored in "backpos" and found by the
4319                  * current value of "scan", the position in the RE program.
4320                  */
4321                 bp = (backpos_T *)backpos.ga_data;
4322                 for (i = 0; i < backpos.ga_len; ++i)
4323                     if (bp[i].bp_scan == scan)
4324                         break;
4325                 if (i == backpos.ga_len)
4326                 {
4327                     /* First time at this BACK, make room to store the pos. */
4328                     if (ga_grow(&backpos, 1) == FAIL)
4329                         status = RA_FAIL;
4330                     else
4331                     {
4332                         /* get "ga_data" again, it may have changed */
4333                         bp = (backpos_T *)backpos.ga_data;
4334                         bp[i].bp_scan = scan;
4335                         ++backpos.ga_len;
4336                     }
4337                 }
4338                 else if (reg_save_equal(&bp[i].bp_pos))
4339                     /* Still at same position as last time, fail. */
4340                     status = RA_NOMATCH;
4341
4342                 if (status != RA_FAIL && status != RA_NOMATCH)
4343                     reg_save(&bp[i].bp_pos, &backpos);
4344             }
4345             break;
4346
4347           case MOPEN + 0:   /* Match start: \zs */
4348           case MOPEN + 1:   /* \( */
4349           case MOPEN + 2:
4350           case MOPEN + 3:
4351           case MOPEN + 4:
4352           case MOPEN + 5:
4353           case MOPEN + 6:
4354           case MOPEN + 7:
4355           case MOPEN + 8:
4356           case MOPEN + 9:
4357             {
4358                 no = op - MOPEN;
4359                 cleanup_subexpr();
4360                 rp = regstack_push(RS_MOPEN, scan);
4361                 if (rp == NULL)
4362                     status = RA_FAIL;
4363                 else
4364                 {
4365                     rp->rs_no = no;
4366                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4367                                                              &reg_startp[no]);
4368                     /* We simply continue and handle the result when done. */
4369                 }
4370             }
4371             break;
4372
4373           case NOPEN:       /* \%( */
4374           case NCLOSE:      /* \) after \%( */
4375                 if (regstack_push(RS_NOPEN, scan) == NULL)
4376                     status = RA_FAIL;
4377                 /* We simply continue and handle the result when done. */
4378                 break;
4379
4380 #ifdef FEAT_SYN_HL
4381           case ZOPEN + 1:
4382           case ZOPEN + 2:
4383           case ZOPEN + 3:
4384           case ZOPEN + 4:
4385           case ZOPEN + 5:
4386           case ZOPEN + 6:
4387           case ZOPEN + 7:
4388           case ZOPEN + 8:
4389           case ZOPEN + 9:
4390             {
4391                 no = op - ZOPEN;
4392                 cleanup_zsubexpr();
4393                 rp = regstack_push(RS_ZOPEN, scan);
4394                 if (rp == NULL)
4395                     status = RA_FAIL;
4396                 else
4397                 {
4398                     rp->rs_no = no;
4399                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4400                                                              &reg_startzp[no]);
4401                     /* We simply continue and handle the result when done. */
4402                 }
4403             }
4404             break;
4405 #endif
4406
4407           case MCLOSE + 0:  /* Match end: \ze */
4408           case MCLOSE + 1:  /* \) */
4409           case MCLOSE + 2:
4410           case MCLOSE + 3:
4411           case MCLOSE + 4:
4412           case MCLOSE + 5:
4413           case MCLOSE + 6:
4414           case MCLOSE + 7:
4415           case MCLOSE + 8:
4416           case MCLOSE + 9:
4417             {
4418                 no = op - MCLOSE;
4419                 cleanup_subexpr();
4420                 rp = regstack_push(RS_MCLOSE, scan);
4421                 if (rp == NULL)
4422                     status = RA_FAIL;
4423                 else
4424                 {
4425                     rp->rs_no = no;
4426                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4427                     /* We simply continue and handle the result when done. */
4428                 }
4429             }
4430             break;
4431
4432 #ifdef FEAT_SYN_HL
4433           case ZCLOSE + 1:  /* \) after \z( */
4434           case ZCLOSE + 2:
4435           case ZCLOSE + 3:
4436           case ZCLOSE + 4:
4437           case ZCLOSE + 5:
4438           case ZCLOSE + 6:
4439           case ZCLOSE + 7:
4440           case ZCLOSE + 8:
4441           case ZCLOSE + 9:
4442             {
4443                 no = op - ZCLOSE;
4444                 cleanup_zsubexpr();
4445                 rp = regstack_push(RS_ZCLOSE, scan);
4446                 if (rp == NULL)
4447                     status = RA_FAIL;
4448                 else
4449                 {
4450                     rp->rs_no = no;
4451                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4452                                                               &reg_endzp[no]);
4453                     /* We simply continue and handle the result when done. */
4454                 }
4455             }
4456             break;
4457 #endif
4458
4459           case BACKREF + 1:
4460           case BACKREF + 2:
4461           case BACKREF + 3:
4462           case BACKREF + 4:
4463           case BACKREF + 5:
4464           case BACKREF + 6:
4465           case BACKREF + 7:
4466           case BACKREF + 8:
4467           case BACKREF + 9:
4468             {
4469                 int             len;
4470                 linenr_T        clnum;
4471                 colnr_T         ccol;
4472                 char_u          *p;
4473
4474                 no = op - BACKREF;
4475                 cleanup_subexpr();
4476                 if (!REG_MULTI)         /* Single-line regexp */
4477                 {
4478                     if (reg_endp[no] == NULL)
4479                     {
4480                         /* Backref was not set: Match an empty string. */
4481                         len = 0;
4482                     }
4483                     else
4484                     {
4485                         /* Compare current input with back-ref in the same
4486                          * line. */
4487                         len = (int)(reg_endp[no] - reg_startp[no]);
4488                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4489                             status = RA_NOMATCH;
4490                     }
4491                 }
4492                 else                            /* Multi-line regexp */
4493                 {
4494                     if (reg_endpos[no].lnum < 0)
4495                     {
4496                         /* Backref was not set: Match an empty string. */
4497                         len = 0;
4498                     }
4499                     else
4500                     {
4501                         if (reg_startpos[no].lnum == reglnum
4502                                 && reg_endpos[no].lnum == reglnum)
4503                         {
4504                             /* Compare back-ref within the current line. */
4505                             len = reg_endpos[no].col - reg_startpos[no].col;
4506                             if (cstrncmp(regline + reg_startpos[no].col,
4507                                                           reginput, &len) != 0)
4508                                 status = RA_NOMATCH;
4509                         }
4510                         else
4511                         {
4512                             /* Messy situation: Need to compare between two
4513                              * lines. */
4514                             ccol = reg_startpos[no].col;
4515                             clnum = reg_startpos[no].lnum;
4516                             for (;;)
4517                             {
4518                                 /* Since getting one line may invalidate
4519                                  * the other, need to make copy.  Slow! */
4520                                 if (regline != reg_tofree)
4521                                 {
4522                                     len = (int)STRLEN(regline);
4523                                     if (reg_tofree == NULL
4524                                                  || len >= (int)reg_tofreelen)
4525                                     {
4526                                         len += 50;      /* get some extra */
4527                                         vim_free(reg_tofree);
4528                                         reg_tofree = alloc(len);
4529                                         if (reg_tofree == NULL)
4530                                         {
4531                                             status = RA_FAIL; /* outof memory!*/
4532                                             break;
4533                                         }
4534                                         reg_tofreelen = len;
4535                                     }
4536                                     STRCPY(reg_tofree, regline);
4537                                     reginput = reg_tofree
4538                                                        + (reginput - regline);
4539                                     regline = reg_tofree;
4540                                 }
4541
4542                                 /* Get the line to compare with. */
4543                                 p = reg_getline(clnum);
4544                                 if (clnum == reg_endpos[no].lnum)
4545                                     len = reg_endpos[no].col - ccol;
4546                                 else
4547                                     len = (int)STRLEN(p + ccol);
4548
4549                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4550                                 {
4551                                     status = RA_NOMATCH;  /* doesn't match */
4552                                     break;
4553                                 }
4554                                 if (clnum == reg_endpos[no].lnum)
4555                                     break;              /* match and at end! */
4556                                 if (reglnum >= reg_maxline)
4557                                 {
4558                                     status = RA_NOMATCH;  /* text too short */
4559                                     break;
4560                                 }
4561
4562                                 /* Advance to next line. */
4563                                 reg_nextline();
4564                                 ++clnum;
4565                                 ccol = 0;
4566                                 if (got_int)
4567                                 {
4568                                     status = RA_FAIL;
4569                                     break;
4570                                 }
4571                             }
4572
4573                             /* found a match!  Note that regline may now point
4574                              * to a copy of the line, that should not matter. */
4575                         }
4576                     }
4577                 }
4578
4579                 /* Matched the backref, skip over it. */
4580                 reginput += len;
4581             }
4582             break;
4583
4584 #ifdef FEAT_SYN_HL
4585           case ZREF + 1:
4586           case ZREF + 2:
4587           case ZREF + 3:
4588           case ZREF + 4:
4589           case ZREF + 5:
4590           case ZREF + 6:
4591           case ZREF + 7:
4592           case ZREF + 8:
4593           case ZREF + 9:
4594             {
4595                 int     len;
4596
4597                 cleanup_zsubexpr();
4598                 no = op - ZREF;
4599                 if (re_extmatch_in != NULL
4600                         && re_extmatch_in->matches[no] != NULL)
4601                 {
4602                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4603                     if (cstrncmp(re_extmatch_in->matches[no],
4604                                                           reginput, &len) != 0)
4605                         status = RA_NOMATCH;
4606                     else
4607                         reginput += len;
4608                 }
4609                 else
4610                 {
4611                     /* Backref was not set: Match an empty string. */
4612                 }
4613             }
4614             break;
4615 #endif
4616
4617           case BRANCH:
4618             {
4619                 if (OP(next) != BRANCH) /* No choice. */
4620                     next = OPERAND(scan);       /* Avoid recursion. */
4621                 else
4622                 {
4623                     rp = regstack_push(RS_BRANCH, scan);
4624                     if (rp == NULL)
4625                         status = RA_FAIL;
4626                     else
4627                         status = RA_BREAK;      /* rest is below */
4628                 }
4629             }
4630             break;
4631
4632           case BRACE_LIMITS:
4633             {
4634                 if (OP(next) == BRACE_SIMPLE)
4635                 {
4636                     bl_minval = OPERAND_MIN(scan);
4637                     bl_maxval = OPERAND_MAX(scan);
4638                 }
4639                 else if (OP(next) >= BRACE_COMPLEX
4640                         && OP(next) < BRACE_COMPLEX + 10)
4641                 {
4642                     no = OP(next) - BRACE_COMPLEX;
4643                     brace_min[no] = OPERAND_MIN(scan);
4644                     brace_max[no] = OPERAND_MAX(scan);
4645                     brace_count[no] = 0;
4646                 }
4647                 else
4648                 {
4649                     EMSG(_(e_internal));            /* Shouldn't happen */
4650                     status = RA_FAIL;
4651                 }
4652             }
4653             break;
4654
4655           case BRACE_COMPLEX + 0:
4656           case BRACE_COMPLEX + 1:
4657           case BRACE_COMPLEX + 2:
4658           case BRACE_COMPLEX + 3:
4659           case BRACE_COMPLEX + 4:
4660           case BRACE_COMPLEX + 5:
4661           case BRACE_COMPLEX + 6:
4662           case BRACE_COMPLEX + 7:
4663           case BRACE_COMPLEX + 8:
4664           case BRACE_COMPLEX + 9:
4665             {
4666                 no = op - BRACE_COMPLEX;
4667                 ++brace_count[no];
4668
4669                 /* If not matched enough times yet, try one more */
4670                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4671                                              ? brace_min[no] : brace_max[no]))
4672                 {
4673                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4674                     if (rp == NULL)
4675                         status = RA_FAIL;
4676                     else
4677                     {
4678                         rp->rs_no = no;
4679                         reg_save(&rp->rs_un.regsave, &backpos);
4680                         next = OPERAND(scan);
4681                         /* We continue and handle the result when done. */
4682                     }
4683                     break;
4684                 }
4685
4686                 /* If matched enough times, may try matching some more */
4687                 if (brace_min[no] <= brace_max[no])
4688                 {
4689                     /* Range is the normal way around, use longest match */
4690                     if (brace_count[no] <= brace_max[no])
4691                     {
4692                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4693                         if (rp == NULL)
4694                             status = RA_FAIL;
4695                         else
4696                         {
4697                             rp->rs_no = no;
4698                             reg_save(&rp->rs_un.regsave, &backpos);
4699                             next = OPERAND(scan);
4700                             /* We continue and handle the result when done. */
4701                         }
4702                     }
4703                 }
4704                 else
4705                 {
4706                     /* Range is backwards, use shortest match first */
4707                     if (brace_count[no] <= brace_min[no])
4708                     {
4709                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4710                         if (rp == NULL)
4711                             status = RA_FAIL;
4712                         else
4713                         {
4714                             reg_save(&rp->rs_un.regsave, &backpos);
4715                             /* We continue and handle the result when done. */
4716                         }
4717                     }
4718                 }
4719             }
4720             break;
4721
4722           case BRACE_SIMPLE:
4723           case STAR:
4724           case PLUS:
4725             {
4726                 regstar_T       rst;
4727
4728                 /*
4729                  * Lookahead to avoid useless match attempts when we know
4730                  * what character comes next.
4731                  */
4732                 if (OP(next) == EXACTLY)
4733                 {
4734                     rst.nextb = *OPERAND(next);
4735                     if (ireg_ic)
4736                     {
4737                         if (MB_ISUPPER(rst.nextb))
4738                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4739                         else
4740                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4741                     }
4742                     else
4743                         rst.nextb_ic = rst.nextb;
4744                 }
4745                 else
4746                 {
4747                     rst.nextb = NUL;
4748                     rst.nextb_ic = NUL;
4749                 }
4750                 if (op != BRACE_SIMPLE)
4751                 {
4752                     rst.minval = (op == STAR) ? 0 : 1;
4753                     rst.maxval = MAX_LIMIT;
4754                 }
4755                 else
4756                 {
4757                     rst.minval = bl_minval;
4758                     rst.maxval = bl_maxval;
4759                 }
4760
4761                 /*
4762                  * When maxval > minval, try matching as much as possible, up
4763                  * to maxval.  When maxval < minval, try matching at least the
4764                  * minimal number (since the range is backwards, that's also
4765                  * maxval!).
4766                  */
4767                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4768                 if (got_int)
4769                 {
4770                     status = RA_FAIL;
4771                     break;
4772                 }
4773                 if (rst.minval <= rst.maxval
4774                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4775                 {
4776                     /* It could match.  Prepare for trying to match what
4777                      * follows.  The code is below.  Parameters are stored in
4778                      * a regstar_T on the regstack. */
4779                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4780                     {
4781                         EMSG(_(e_maxmempat));
4782                         status = RA_FAIL;
4783                     }
4784                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4785                         status = RA_FAIL;
4786                     else
4787                     {
4788                         regstack.ga_len += sizeof(regstar_T);
4789                         rp = regstack_push(rst.minval <= rst.maxval
4790                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4791                         if (rp == NULL)
4792                             status = RA_FAIL;
4793                         else
4794                         {
4795                             *(((regstar_T *)rp) - 1) = rst;
4796                             status = RA_BREAK;      /* skip the restore bits */
4797                         }
4798                     }
4799                 }
4800                 else
4801                     status = RA_NOMATCH;
4802
4803             }
4804             break;
4805
4806           case NOMATCH:
4807           case MATCH:
4808           case SUBPAT:
4809             rp = regstack_push(RS_NOMATCH, scan);
4810             if (rp == NULL)
4811                 status = RA_FAIL;
4812             else
4813             {
4814                 rp->rs_no = op;
4815                 reg_save(&rp->rs_un.regsave, &backpos);
4816                 next = OPERAND(scan);
4817                 /* We continue and handle the result when done. */
4818             }
4819             break;
4820
4821           case BEHIND:
4822           case NOBEHIND:
4823             /* Need a bit of room to store extra positions. */
4824             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4825             {
4826                 EMSG(_(e_maxmempat));
4827                 status = RA_FAIL;
4828             }
4829             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4830                 status = RA_FAIL;
4831             else
4832             {
4833                 regstack.ga_len += sizeof(regbehind_T);
4834                 rp = regstack_push(RS_BEHIND1, scan);
4835                 if (rp == NULL)
4836                     status = RA_FAIL;
4837                 else
4838                 {
4839                     rp->rs_no = op;
4840                     reg_save(&rp->rs_un.regsave, &backpos);
4841                     /* First try if what follows matches.  If it does then we
4842                      * check the behind match by looping. */
4843                 }
4844             }
4845             break;
4846
4847           case BHPOS:
4848             if (REG_MULTI)
4849             {
4850                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4851                         || behind_pos.rs_u.pos.lnum != reglnum)
4852                     status = RA_NOMATCH;
4853             }
4854             else if (behind_pos.rs_u.ptr != reginput)
4855                 status = RA_NOMATCH;
4856             break;
4857
4858           case NEWL:
4859             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4860                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4861                 status = RA_NOMATCH;
4862             else if (reg_line_lbr)
4863                 ADVANCE_REGINPUT();
4864             else
4865                 reg_nextline();
4866             break;
4867
4868           case END:
4869             status = RA_MATCH;  /* Success! */
4870             break;
4871
4872           default:
4873             EMSG(_(e_re_corr));
4874 #ifdef DEBUG
4875             printf("Illegal op code %d\n", op);
4876 #endif
4877             status = RA_FAIL;
4878             break;
4879           }
4880         }
4881
4882         /* If we can't continue sequentially, break the inner loop. */
4883         if (status != RA_CONT)
4884             break;
4885
4886         /* Continue in inner loop, advance to next item. */
4887         scan = next;
4888
4889     } /* end of inner loop */
4890
4891     /*
4892      * If there is something on the regstack execute the code for the state.
4893      * If the state is popped then loop and use the older state.
4894      */
4895     while (regstack.ga_len > 0 && status != RA_FAIL)
4896     {
4897         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4898         switch (rp->rs_state)
4899         {
4900           case RS_NOPEN:
4901             /* Result is passed on as-is, simply pop the state. */
4902             regstack_pop(&scan);
4903             break;
4904
4905           case RS_MOPEN:
4906             /* Pop the state.  Restore pointers when there is no match. */
4907             if (status == RA_NOMATCH)
4908                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4909                                                   &reg_startp[rp->rs_no]);
4910             regstack_pop(&scan);
4911             break;
4912
4913 #ifdef FEAT_SYN_HL
4914           case RS_ZOPEN:
4915             /* Pop the state.  Restore pointers when there is no match. */
4916             if (status == RA_NOMATCH)
4917                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4918                                                  &reg_startzp[rp->rs_no]);
4919             regstack_pop(&scan);
4920             break;
4921 #endif
4922
4923           case RS_MCLOSE:
4924             /* Pop the state.  Restore pointers when there is no match. */
4925             if (status == RA_NOMATCH)
4926                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4927                                                     &reg_endp[rp->rs_no]);
4928             regstack_pop(&scan);
4929             break;
4930
4931 #ifdef FEAT_SYN_HL
4932           case RS_ZCLOSE:
4933             /* Pop the state.  Restore pointers when there is no match. */
4934             if (status == RA_NOMATCH)
4935                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4936                                                    &reg_endzp[rp->rs_no]);
4937             regstack_pop(&scan);
4938             break;
4939 #endif
4940
4941           case RS_BRANCH:
4942             if (status == RA_MATCH)
4943                 /* this branch matched, use it */
4944                 regstack_pop(&scan);
4945             else
4946             {
4947                 if (status != RA_BREAK)
4948                 {
4949                     /* After a non-matching branch: try next one. */
4950                     reg_restore(&rp->rs_un.regsave, &backpos);
4951                     scan = rp->rs_scan;
4952                 }
4953                 if (scan == NULL || OP(scan) != BRANCH)
4954                 {
4955                     /* no more branches, didn't find a match */
4956                     status = RA_NOMATCH;
4957                     regstack_pop(&scan);
4958                 }
4959                 else
4960                 {
4961                     /* Prepare to try a branch. */
4962                     rp->rs_scan = regnext(scan);
4963                     reg_save(&rp->rs_un.regsave, &backpos);
4964                     scan = OPERAND(scan);
4965                 }
4966             }
4967             break;
4968
4969           case RS_BRCPLX_MORE:
4970             /* Pop the state.  Restore pointers when there is no match. */
4971             if (status == RA_NOMATCH)
4972             {
4973                 reg_restore(&rp->rs_un.regsave, &backpos);
4974                 --brace_count[rp->rs_no];       /* decrement match count */
4975             }
4976             regstack_pop(&scan);
4977             break;
4978
4979           case RS_BRCPLX_LONG:
4980             /* Pop the state.  Restore pointers when there is no match. */
4981             if (status == RA_NOMATCH)
4982             {
4983                 /* There was no match, but we did find enough matches. */
4984                 reg_restore(&rp->rs_un.regsave, &backpos);
4985                 --brace_count[rp->rs_no];
4986                 /* continue with the items after "\{}" */
4987                 status = RA_CONT;
4988             }
4989             regstack_pop(&scan);
4990             if (status == RA_CONT)
4991                 scan = regnext(scan);
4992             break;
4993
4994           case RS_BRCPLX_SHORT:
4995             /* Pop the state.  Restore pointers when there is no match. */
4996             if (status == RA_NOMATCH)
4997                 /* There was no match, try to match one more item. */
4998                 reg_restore(&rp->rs_un.regsave, &backpos);
4999             regstack_pop(&scan);
5000             if (status == RA_NOMATCH)
5001             {
5002                 scan = OPERAND(scan);
5003                 status = RA_CONT;
5004             }
5005             break;
5006
5007           case RS_NOMATCH:
5008             /* Pop the state.  If the operand matches for NOMATCH or
5009              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5010              * except for SUBPAT, and continue with the next item. */
5011             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5012                 status = RA_NOMATCH;
5013             else
5014             {
5015                 status = RA_CONT;
5016                 if (rp->rs_no != SUBPAT)        /* zero-width */
5017                     reg_restore(&rp->rs_un.regsave, &backpos);
5018             }
5019             regstack_pop(&scan);
5020             if (status == RA_CONT)
5021                 scan = regnext(scan);
5022             break;
5023
5024           case RS_BEHIND1:
5025             if (status == RA_NOMATCH)
5026             {
5027                 regstack_pop(&scan);
5028                 regstack.ga_len -= sizeof(regbehind_T);
5029             }
5030             else
5031             {
5032                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5033                  * the behind part does (not) match before the current
5034                  * position in the input.  This must be done at every
5035                  * position in the input and checking if the match ends at
5036                  * the current position. */
5037
5038                 /* save the position after the found match for next */
5039                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5040
5041                 /* start looking for a match with operand at the current
5042                  * position.  Go back one character until we find the
5043                  * result, hitting the start of the line or the previous
5044                  * line (for multi-line matching).
5045                  * Set behind_pos to where the match should end, BHPOS
5046                  * will match it.  Save the current value. */
5047                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5048                 behind_pos = rp->rs_un.regsave;
5049
5050                 rp->rs_state = RS_BEHIND2;
5051
5052                 reg_restore(&rp->rs_un.regsave, &backpos);
5053                 scan = OPERAND(rp->rs_scan);
5054             }
5055             break;
5056
5057           case RS_BEHIND2:
5058             /*
5059              * Looping for BEHIND / NOBEHIND match.
5060              */
5061             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5062             {
5063                 /* found a match that ends where "next" started */
5064                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5065                 if (rp->rs_no == BEHIND)
5066                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5067                                                                     &backpos);
5068                 else
5069                     /* But we didn't want a match. */
5070                     status = RA_NOMATCH;
5071                 regstack_pop(&scan);
5072                 regstack.ga_len -= sizeof(regbehind_T);
5073             }
5074             else
5075             {
5076                 /* No match: Go back one character.  May go to previous
5077                  * line once. */
5078                 no = OK;
5079                 if (REG_MULTI)
5080                 {
5081                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5082                     {
5083                         if (rp->rs_un.regsave.rs_u.pos.lnum
5084                                         < behind_pos.rs_u.pos.lnum
5085                                 || reg_getline(
5086                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5087                                                                   == NULL)
5088                             no = FAIL;
5089                         else
5090                         {
5091                             reg_restore(&rp->rs_un.regsave, &backpos);
5092                             rp->rs_un.regsave.rs_u.pos.col =
5093                                                  (colnr_T)STRLEN(regline);
5094                         }
5095                     }
5096                     else
5097                         --rp->rs_un.regsave.rs_u.pos.col;
5098                 }
5099                 else
5100                 {
5101                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5102                         no = FAIL;
5103                     else
5104                         --rp->rs_un.regsave.rs_u.ptr;
5105                 }
5106                 if (no == OK)
5107                 {
5108                     /* Advanced, prepare for finding match again. */
5109                     reg_restore(&rp->rs_un.regsave, &backpos);
5110                     scan = OPERAND(rp->rs_scan);
5111                 }
5112                 else
5113                 {
5114                     /* Can't advance.  For NOBEHIND that's a match. */
5115                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5116                     if (rp->rs_no == NOBEHIND)
5117                     {
5118                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5119                                                                     &backpos);
5120                         status = RA_MATCH;
5121                     }
5122                     else
5123                         status = RA_NOMATCH;
5124                     regstack_pop(&scan);
5125                     regstack.ga_len -= sizeof(regbehind_T);
5126                 }
5127             }
5128             break;
5129
5130           case RS_STAR_LONG:
5131           case RS_STAR_SHORT:
5132             {
5133                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5134
5135                 if (status == RA_MATCH)
5136                 {
5137                     regstack_pop(&scan);
5138                     regstack.ga_len -= sizeof(regstar_T);
5139                     break;
5140                 }
5141
5142                 /* Tried once already, restore input pointers. */
5143                 if (status != RA_BREAK)
5144                     reg_restore(&rp->rs_un.regsave, &backpos);
5145
5146                 /* Repeat until we found a position where it could match. */
5147                 for (;;)
5148                 {
5149                     if (status != RA_BREAK)
5150                     {
5151                         /* Tried first position already, advance. */
5152                         if (rp->rs_state == RS_STAR_LONG)
5153                         {
5154                             /* Trying for longest match, but couldn't or
5155                              * didn't match -- back up one char. */
5156                             if (--rst->count < rst->minval)
5157                                 break;
5158                             if (reginput == regline)
5159                             {
5160                                 /* backup to last char of previous line */
5161                                 --reglnum;
5162                                 regline = reg_getline(reglnum);
5163                                 /* Just in case regrepeat() didn't count
5164                                  * right. */
5165                                 if (regline == NULL)
5166                                     break;
5167                                 reginput = regline + STRLEN(regline);
5168                                 fast_breakcheck();
5169                             }
5170                             else
5171                                 mb_ptr_back(regline, reginput);
5172                         }
5173                         else
5174                         {
5175                             /* Range is backwards, use shortest match first.
5176                              * Careful: maxval and minval are exchanged!
5177                              * Couldn't or didn't match: try advancing one
5178                              * char. */
5179                             if (rst->count == rst->minval
5180                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5181                                 break;
5182                             ++rst->count;
5183                         }
5184                         if (got_int)
5185                             break;
5186                     }
5187                     else
5188                         status = RA_NOMATCH;
5189
5190                     /* If it could match, try it. */
5191                     if (rst->nextb == NUL || *reginput == rst->nextb
5192                                              || *reginput == rst->nextb_ic)
5193                     {
5194                         reg_save(&rp->rs_un.regsave, &backpos);
5195                         scan = regnext(rp->rs_scan);
5196                         status = RA_CONT;
5197                         break;
5198                     }
5199                 }
5200                 if (status != RA_CONT)
5201                 {
5202                     /* Failed. */
5203                     regstack_pop(&scan);
5204                     regstack.ga_len -= sizeof(regstar_T);
5205                     status = RA_NOMATCH;
5206                 }
5207             }
5208             break;
5209         }
5210
5211         /* If we want to continue the inner loop or didn't pop a state
5212          * continue matching loop */
5213         if (status == RA_CONT || rp == (regitem_T *)
5214                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5215             break;
5216     }
5217
5218     /* May need to continue with the inner loop, starting at "scan". */
5219     if (status == RA_CONT)
5220         continue;
5221
5222     /*
5223      * If the regstack is empty or something failed we are done.
5224      */
5225     if (regstack.ga_len == 0 || status == RA_FAIL)
5226     {
5227         if (scan == NULL)
5228         {
5229             /*
5230              * We get here only if there's trouble -- normally "case END" is
5231              * the terminating point.
5232              */
5233             EMSG(_(e_re_corr));
5234 #ifdef DEBUG
5235             printf("Premature EOL\n");
5236 #endif
5237         }
5238         if (status == RA_FAIL)
5239             got_int = TRUE;
5240         return (status == RA_MATCH);
5241     }
5242
5243   } /* End of loop until the regstack is empty. */
5244
5245   /* NOTREACHED */
5246 }
5247
5248 /*
5249  * Push an item onto the regstack.
5250  * Returns pointer to new item.  Returns NULL when out of memory.
5251  */
5252     static regitem_T *
5253 regstack_push(state, scan)
5254     regstate_T  state;
5255     char_u      *scan;
5256 {
5257     regitem_T   *rp;
5258
5259     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5260     {
5261         EMSG(_(e_maxmempat));
5262         return NULL;
5263     }
5264     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5265         return NULL;
5266
5267     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5268     rp->rs_state = state;
5269     rp->rs_scan = scan;
5270
5271     regstack.ga_len += sizeof(regitem_T);
5272     return rp;
5273 }
5274
5275 /*
5276  * Pop an item from the regstack.
5277  */
5278     static void
5279 regstack_pop(scan)
5280     char_u      **scan;
5281 {
5282     regitem_T   *rp;
5283
5284     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5285     *scan = rp->rs_scan;
5286
5287     regstack.ga_len -= sizeof(regitem_T);
5288 }
5289
5290 /*
5291  * regrepeat - repeatedly match something simple, return how many.
5292  * Advances reginput (and reglnum) to just after the matched chars.
5293  */
5294     static int
5295 regrepeat(p, maxcount)
5296     char_u      *p;
5297     long        maxcount;   /* maximum number of matches allowed */
5298 {
5299     long        count = 0;
5300     char_u      *scan;
5301     char_u      *opnd;
5302     int         mask;
5303     int         testval = 0;
5304
5305     scan = reginput;        /* Make local copy of reginput for speed. */
5306     opnd = OPERAND(p);
5307     switch (OP(p))
5308     {
5309       case ANY:
5310       case ANY + ADD_NL:
5311         while (count < maxcount)
5312         {
5313             /* Matching anything means we continue until end-of-line (or
5314              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5315             while (*scan != NUL && count < maxcount)
5316             {
5317                 ++count;
5318                 mb_ptr_adv(scan);
5319             }
5320             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5321                                          || reg_line_lbr || count == maxcount)
5322                 break;
5323             ++count;            /* count the line-break */
5324             reg_nextline();
5325             scan = reginput;
5326             if (got_int)
5327                 break;
5328         }
5329         break;
5330
5331       case IDENT:
5332       case IDENT + ADD_NL:
5333         testval = TRUE;
5334         /*FALLTHROUGH*/
5335       case SIDENT:
5336       case SIDENT + ADD_NL:
5337         while (count < maxcount)
5338         {
5339             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5340             {
5341                 mb_ptr_adv(scan);
5342             }
5343             else if (*scan == NUL)
5344             {
5345                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5346                                                               || reg_line_lbr)
5347                     break;
5348                 reg_nextline();
5349                 scan = reginput;
5350                 if (got_int)
5351                     break;
5352             }
5353             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5354                 ++scan;
5355             else
5356                 break;
5357             ++count;
5358         }
5359         break;
5360
5361       case KWORD:
5362       case KWORD + ADD_NL:
5363         testval = TRUE;
5364         /*FALLTHROUGH*/
5365       case SKWORD:
5366       case SKWORD + ADD_NL:
5367         while (count < maxcount)
5368         {
5369             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5370             {
5371                 mb_ptr_adv(scan);
5372             }
5373             else if (*scan == NUL)
5374             {
5375                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5376                                                               || reg_line_lbr)
5377                     break;
5378                 reg_nextline();
5379                 scan = reginput;
5380                 if (got_int)
5381                     break;
5382             }
5383             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5384                 ++scan;
5385             else
5386                 break;
5387             ++count;
5388         }
5389         break;
5390
5391       case FNAME:
5392       case FNAME + ADD_NL:
5393         testval = TRUE;
5394         /*FALLTHROUGH*/
5395       case SFNAME:
5396       case SFNAME + ADD_NL:
5397         while (count < maxcount)
5398         {
5399             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5400             {
5401                 mb_ptr_adv(scan);
5402             }
5403             else if (*scan == NUL)
5404             {
5405                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5406                                                               || reg_line_lbr)
5407                     break;
5408                 reg_nextline();
5409                 scan = reginput;
5410                 if (got_int)
5411                     break;
5412             }
5413             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5414                 ++scan;
5415             else
5416                 break;
5417             ++count;
5418         }
5419         break;
5420
5421       case PRINT:
5422       case PRINT + ADD_NL:
5423         testval = TRUE;
5424         /*FALLTHROUGH*/
5425       case SPRINT:
5426       case SPRINT + ADD_NL:
5427         while (count < maxcount)
5428         {
5429             if (*scan == NUL)
5430             {
5431                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5432                                                               || reg_line_lbr)
5433                     break;
5434                 reg_nextline();
5435                 scan = reginput;
5436                 if (got_int)
5437                     break;
5438             }
5439             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5440             {
5441                 mb_ptr_adv(scan);
5442             }
5443             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5444                 ++scan;
5445             else
5446                 break;
5447             ++count;
5448         }
5449         break;
5450
5451       case WHITE:
5452       case WHITE + ADD_NL:
5453         testval = mask = RI_WHITE;
5454 do_class:
5455         while (count < maxcount)
5456         {
5457 #ifdef FEAT_MBYTE
5458             int         l;
5459 #endif
5460             if (*scan == NUL)
5461             {
5462                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5463                                                               || reg_line_lbr)
5464                     break;
5465                 reg_nextline();
5466                 scan = reginput;
5467                 if (got_int)
5468                     break;
5469             }
5470 #ifdef FEAT_MBYTE
5471             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5472             {
5473                 if (testval != 0)
5474                     break;
5475                 scan += l;
5476             }
5477 #endif
5478             else if ((class_tab[*scan] & mask) == testval)
5479                 ++scan;
5480             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5481                 ++scan;
5482             else
5483                 break;
5484             ++count;
5485         }
5486         break;
5487
5488       case NWHITE:
5489       case NWHITE + ADD_NL:
5490         mask = RI_WHITE;
5491         goto do_class;
5492       case DIGIT:
5493       case DIGIT + ADD_NL:
5494         testval = mask = RI_DIGIT;
5495         goto do_class;
5496       case NDIGIT:
5497       case NDIGIT + ADD_NL:
5498         mask = RI_DIGIT;
5499         goto do_class;
5500       case HEX:
5501       case HEX + ADD_NL:
5502         testval = mask = RI_HEX;
5503         goto do_class;
5504       case NHEX:
5505       case NHEX + ADD_NL:
5506         mask = RI_HEX;
5507         goto do_class;
5508       case OCTAL:
5509       case OCTAL + ADD_NL:
5510         testval = mask = RI_OCTAL;
5511         goto do_class;
5512       case NOCTAL:
5513       case NOCTAL + ADD_NL:
5514         mask = RI_OCTAL;
5515         goto do_class;
5516       case WORD:
5517       case WORD + ADD_NL:
5518         testval = mask = RI_WORD;
5519         goto do_class;
5520       case NWORD:
5521       case NWORD + ADD_NL:
5522         mask = RI_WORD;
5523         goto do_class;
5524       case HEAD:
5525       case HEAD + ADD_NL:
5526         testval = mask = RI_HEAD;
5527         goto do_class;
5528       case NHEAD:
5529       case NHEAD + ADD_NL:
5530         mask = RI_HEAD;
5531         goto do_class;
5532       case ALPHA:
5533       case ALPHA + ADD_NL:
5534         testval = mask = RI_ALPHA;
5535         goto do_class;
5536       case NALPHA:
5537       case NALPHA + ADD_NL:
5538         mask = RI_ALPHA;
5539         goto do_class;
5540       case LOWER:
5541       case LOWER + ADD_NL:
5542         testval = mask = RI_LOWER;
5543         goto do_class;
5544       case NLOWER:
5545       case NLOWER + ADD_NL:
5546         mask = RI_LOWER;
5547         goto do_class;
5548       case UPPER:
5549       case UPPER + ADD_NL:
5550         testval = mask = RI_UPPER;
5551         goto do_class;
5552       case NUPPER:
5553       case NUPPER + ADD_NL:
5554         mask = RI_UPPER;
5555         goto do_class;
5556
5557       case EXACTLY:
5558         {
5559             int     cu, cl;
5560
5561             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5562              * would have been used for it.  It does handle single-byte
5563              * characters, such as latin1. */
5564             if (ireg_ic)
5565             {
5566                 cu = MB_TOUPPER(*opnd);
5567                 cl = MB_TOLOWER(*opnd);
5568                 while (count < maxcount && (*scan == cu || *scan == cl))
5569                 {
5570                     count++;
5571                     scan++;
5572                 }
5573             }
5574             else
5575             {
5576                 cu = *opnd;
5577                 while (count < maxcount && *scan == cu)
5578                 {
5579                     count++;
5580                     scan++;
5581                 }
5582             }
5583             break;
5584         }
5585
5586 #ifdef FEAT_MBYTE
5587       case MULTIBYTECODE:
5588         {
5589             int         i, len, cf = 0;
5590
5591             /* Safety check (just in case 'encoding' was changed since
5592              * compiling the program). */
5593             if ((len = (*mb_ptr2len)(opnd)) > 1)
5594             {
5595                 if (ireg_ic && enc_utf8)
5596                     cf = utf_fold(utf_ptr2char(opnd));
5597                 while (count < maxcount)
5598                 {
5599                     for (i = 0; i < len; ++i)
5600                         if (opnd[i] != scan[i])
5601                             break;
5602                     if (i < len && (!ireg_ic || !enc_utf8
5603                                         || utf_fold(utf_ptr2char(scan)) != cf))
5604                         break;
5605                     scan += len;
5606                     ++count;
5607                 }
5608             }
5609         }
5610         break;
5611 #endif
5612
5613       case ANYOF:
5614       case ANYOF + ADD_NL:
5615         testval = TRUE;
5616         /*FALLTHROUGH*/
5617
5618       case ANYBUT:
5619       case ANYBUT + ADD_NL:
5620         while (count < maxcount)
5621         {
5622 #ifdef FEAT_MBYTE
5623             int len;
5624 #endif
5625             if (*scan == NUL)
5626             {
5627                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5628                                                               || reg_line_lbr)
5629                     break;
5630                 reg_nextline();
5631                 scan = reginput;
5632                 if (got_int)
5633                     break;
5634             }
5635             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5636                 ++scan;
5637 #ifdef FEAT_MBYTE
5638             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5639             {
5640                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5641                     break;
5642                 scan += len;
5643             }
5644 #endif
5645             else
5646             {
5647                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5648                     break;
5649                 ++scan;
5650             }
5651             ++count;
5652         }
5653         break;
5654
5655       case NEWL:
5656         while (count < maxcount
5657                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5658                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5659         {
5660             count++;
5661             if (reg_line_lbr)
5662                 ADVANCE_REGINPUT();
5663             else
5664                 reg_nextline();
5665             scan = reginput;
5666             if (got_int)
5667                 break;
5668         }
5669         break;
5670
5671       default:                  /* Oh dear.  Called inappropriately. */
5672         EMSG(_(e_re_corr));
5673 #ifdef DEBUG
5674         printf("Called regrepeat with op code %d\n", OP(p));
5675 #endif
5676         break;
5677     }
5678
5679     reginput = scan;
5680
5681     return (int)count;
5682 }
5683
5684 /*
5685  * regnext - dig the "next" pointer out of a node
5686  */
5687     static char_u *
5688 regnext(p)
5689     char_u  *p;
5690 {
5691     int     offset;
5692
5693     if (p == JUST_CALC_SIZE)
5694         return NULL;
5695
5696     offset = NEXT(p);
5697     if (offset == 0)
5698         return NULL;
5699
5700     if (OP(p) == BACK)
5701         return p - offset;
5702     else
5703         return p + offset;
5704 }
5705
5706 /*
5707  * Check the regexp program for its magic number.
5708  * Return TRUE if it's wrong.
5709  */
5710     static int
5711 prog_magic_wrong()
5712 {
5713     if (UCHARAT(REG_MULTI
5714                 ? reg_mmatch->regprog->program
5715                 : reg_match->regprog->program) != REGMAGIC)
5716     {
5717         EMSG(_(e_re_corr));
5718         return TRUE;
5719     }
5720     return FALSE;
5721 }
5722
5723 /*
5724  * Cleanup the subexpressions, if this wasn't done yet.
5725  * This construction is used to clear the subexpressions only when they are
5726  * used (to increase speed).
5727  */
5728     static void
5729 cleanup_subexpr()
5730 {
5731     if (need_clear_subexpr)
5732     {
5733         if (REG_MULTI)
5734         {
5735             /* Use 0xff to set lnum to -1 */
5736             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5737             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5738         }
5739         else
5740         {
5741             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5742             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5743         }
5744         need_clear_subexpr = FALSE;
5745     }
5746 }
5747
5748 #ifdef FEAT_SYN_HL
5749     static void
5750 cleanup_zsubexpr()
5751 {
5752     if (need_clear_zsubexpr)
5753     {
5754         if (REG_MULTI)
5755         {
5756             /* Use 0xff to set lnum to -1 */
5757             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5758             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5759         }
5760         else
5761         {
5762             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5763             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5764         }
5765         need_clear_zsubexpr = FALSE;
5766     }
5767 }
5768 #endif
5769
5770 /*
5771  * Advance reglnum, regline and reginput to the next line.
5772  */
5773     static void
5774 reg_nextline()
5775 {
5776     regline = reg_getline(++reglnum);
5777     reginput = regline;
5778     fast_breakcheck();
5779 }
5780
5781 /*
5782  * Save the input line and position in a regsave_T.
5783  */
5784     static void
5785 reg_save(save, gap)
5786     regsave_T   *save;
5787     garray_T    *gap;
5788 {
5789     if (REG_MULTI)
5790     {
5791         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5792         save->rs_u.pos.lnum = reglnum;
5793     }
5794     else
5795         save->rs_u.ptr = reginput;
5796     save->rs_len = gap->ga_len;
5797 }
5798
5799 /*
5800  * Restore the input line and position from a regsave_T.
5801  */
5802     static void
5803 reg_restore(save, gap)
5804     regsave_T   *save;
5805     garray_T    *gap;
5806 {
5807     if (REG_MULTI)
5808     {
5809         if (reglnum != save->rs_u.pos.lnum)
5810         {
5811             /* only call reg_getline() when the line number changed to save
5812              * a bit of time */
5813             reglnum = save->rs_u.pos.lnum;
5814             regline = reg_getline(reglnum);
5815         }
5816         reginput = regline + save->rs_u.pos.col;
5817     }
5818     else
5819         reginput = save->rs_u.ptr;
5820     gap->ga_len = save->rs_len;
5821 }
5822
5823 /*
5824  * Return TRUE if current position is equal to saved position.
5825  */
5826     static int
5827 reg_save_equal(save)
5828     regsave_T   *save;
5829 {
5830     if (REG_MULTI)
5831         return reglnum == save->rs_u.pos.lnum
5832                                   && reginput == regline + save->rs_u.pos.col;
5833     return reginput == save->rs_u.ptr;
5834 }
5835
5836 /*
5837  * Tentatively set the sub-expression start to the current position (after
5838  * calling regmatch() they will have changed).  Need to save the existing
5839  * values for when there is no match.
5840  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5841  * depending on REG_MULTI.
5842  */
5843     static void
5844 save_se_multi(savep, posp)
5845     save_se_T   *savep;
5846     lpos_T      *posp;
5847 {
5848     savep->se_u.pos = *posp;
5849     posp->lnum = reglnum;
5850     posp->col = (colnr_T)(reginput - regline);
5851 }
5852
5853     static void
5854 save_se_one(savep, pp)
5855     save_se_T   *savep;
5856     char_u      **pp;
5857 {
5858     savep->se_u.ptr = *pp;
5859     *pp = reginput;
5860 }
5861
5862 /*
5863  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5864  */
5865     static int
5866 re_num_cmp(val, scan)
5867     long_u      val;
5868     char_u      *scan;
5869 {
5870     long_u  n = OPERAND_MIN(scan);
5871
5872     if (OPERAND_CMP(scan) == '>')
5873         return val > n;
5874     if (OPERAND_CMP(scan) == '<')
5875         return val < n;
5876     return val == n;
5877 }
5878
5879
5880 #ifdef DEBUG
5881
5882 /*
5883  * regdump - dump a regexp onto stdout in vaguely comprehensible form
5884  */
5885     static void
5886 regdump(pattern, r)
5887     char_u      *pattern;
5888     regprog_T   *r;
5889 {
5890     char_u  *s;
5891     int     op = EXACTLY;       /* Arbitrary non-END op. */
5892     char_u  *next;
5893     char_u  *end = NULL;
5894
5895     printf("\r\nregcomp(%s):\r\n", pattern);
5896
5897     s = r->program + 1;
5898     /*
5899      * Loop until we find the END that isn't before a referred next (an END
5900      * can also appear in a NOMATCH operand).
5901      */
5902     while (op != END || s <= end)
5903     {
5904         op = OP(s);
5905         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5906         next = regnext(s);
5907         if (next == NULL)       /* Next ptr. */
5908             printf("(0)");
5909         else
5910             printf("(%d)", (int)((s - r->program) + (next - s)));
5911         if (end < next)
5912             end = next;
5913         if (op == BRACE_LIMITS)
5914         {
5915             /* Two short ints */
5916             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5917             s += 8;
5918         }
5919         s += 3;
5920         if (op == ANYOF || op == ANYOF + ADD_NL
5921                 || op == ANYBUT || op == ANYBUT + ADD_NL
5922                 || op == EXACTLY)
5923         {
5924             /* Literal string, where present. */
5925             while (*s != NUL)
5926                 printf("%c", *s++);
5927             s++;
5928         }
5929         printf("\r\n");
5930     }
5931
5932     /* Header fields of interest. */
5933     if (r->regstart != NUL)
5934         printf("start `%s' 0x%x; ", r->regstart < 256
5935                 ? (char *)transchar(r->regstart)
5936                 : "multibyte", r->regstart);
5937     if (r->reganch)
5938         printf("anchored; ");
5939     if (r->regmust != NULL)
5940         printf("must have \"%s\"", r->regmust);
5941     printf("\r\n");
5942 }
5943
5944 /*
5945  * regprop - printable representation of opcode
5946  */
5947     static char_u *
5948 regprop(op)
5949     char_u         *op;
5950 {
5951     char_u          *p;
5952     static char_u   buf[50];
5953
5954     (void) strcpy(buf, ":");
5955
5956     switch (OP(op))
5957     {
5958       case BOL:
5959         p = "BOL";
5960         break;
5961       case EOL:
5962         p = "EOL";
5963         break;
5964       case RE_BOF:
5965         p = "BOF";
5966         break;
5967       case RE_EOF:
5968         p = "EOF";
5969         break;
5970       case CURSOR:
5971         p = "CURSOR";
5972         break;
5973       case RE_VISUAL:
5974         p = "RE_VISUAL";
5975         break;
5976       case RE_LNUM:
5977         p = "RE_LNUM";
5978         break;
5979       case RE_MARK:
5980         p = "RE_MARK";
5981         break;
5982       case RE_COL:
5983         p = "RE_COL";
5984         break;
5985       case RE_VCOL:
5986         p = "RE_VCOL";
5987         break;
5988       case BOW:
5989         p = "BOW";
5990         break;
5991       case EOW:
5992         p = "EOW";
5993         break;
5994       case ANY:
5995         p = "ANY";
5996         break;
5997       case ANY + ADD_NL:
5998         p = "ANY+NL";
5999         break;
6000       case ANYOF:
6001         p = "ANYOF";
6002         break;
6003       case ANYOF + ADD_NL:
6004         p = "ANYOF+NL";
6005         break;
6006       case ANYBUT:
6007         p = "ANYBUT";
6008         break;
6009       case ANYBUT + ADD_NL:
6010         p = "ANYBUT+NL";
6011         break;
6012       case IDENT:
6013         p = "IDENT";
6014         break;
6015       case IDENT + ADD_NL:
6016         p = "IDENT+NL";
6017         break;
6018       case SIDENT:
6019         p = "SIDENT";
6020         break;
6021       case SIDENT + ADD_NL:
6022         p = "SIDENT+NL";
6023         break;
6024       case KWORD:
6025         p = "KWORD";
6026         break;
6027       case KWORD + ADD_NL:
6028         p = "KWORD+NL";
6029         break;
6030       case SKWORD:
6031         p = "SKWORD";
6032         break;
6033       case SKWORD + ADD_NL:
6034         p = "SKWORD+NL";
6035         break;
6036       case FNAME:
6037         p = "FNAME";
6038         break;
6039       case FNAME + ADD_NL:
6040         p = "FNAME+NL";
6041         break;
6042       case SFNAME:
6043         p = "SFNAME";
6044         break;
6045       case SFNAME + ADD_NL:
6046         p = "SFNAME+NL";
6047         break;
6048       case PRINT:
6049         p = "PRINT";
6050         break;
6051       case PRINT + ADD_NL:
6052         p = "PRINT+NL";
6053         break;
6054       case SPRINT:
6055         p = "SPRINT";
6056         break;
6057       case SPRINT + ADD_NL:
6058         p = "SPRINT+NL";
6059         break;
6060       case WHITE:
6061         p = "WHITE";
6062         break;
6063       case WHITE + ADD_NL:
6064         p = "WHITE+NL";
6065         break;
6066       case NWHITE:
6067         p = "NWHITE";
6068         break;
6069       case NWHITE + ADD_NL:
6070         p = "NWHITE+NL";
6071         break;
6072       case DIGIT:
6073         p = "DIGIT";
6074         break;
6075       case DIGIT + ADD_NL:
6076         p = "DIGIT+NL";
6077         break;
6078       case NDIGIT:
6079         p = "NDIGIT";
6080         break;
6081       case NDIGIT + ADD_NL:
6082         p = "NDIGIT+NL";
6083         break;
6084       case HEX:
6085         p = "HEX";
6086         break;
6087       case HEX + ADD_NL:
6088         p = "HEX+NL";
6089         break;
6090       case NHEX:
6091         p = "NHEX";
6092         break;
6093       case NHEX + ADD_NL:
6094         p = "NHEX+NL";
6095         break;
6096       case OCTAL:
6097         p = "OCTAL";
6098         break;
6099       case OCTAL + ADD_NL:
6100         p = "OCTAL+NL";
6101         break;
6102       case NOCTAL:
6103         p = "NOCTAL";
6104         break;
6105       case NOCTAL + ADD_NL:
6106         p = "NOCTAL+NL";
6107         break;
6108       case WORD:
6109         p = "WORD";
6110         break;
6111       case WORD + ADD_NL:
6112         p = "WORD+NL";
6113         break;
6114       case NWORD:
6115         p = "NWORD";
6116         break;
6117       case NWORD + ADD_NL:
6118         p = "NWORD+NL";
6119         break;
6120       case HEAD:
6121         p = "HEAD";
6122         break;
6123       case HEAD + ADD_NL:
6124         p = "HEAD+NL";
6125         break;
6126       case NHEAD:
6127         p = "NHEAD";
6128         break;
6129       case NHEAD + ADD_NL:
6130         p = "NHEAD+NL";
6131         break;
6132       case ALPHA:
6133         p = "ALPHA";
6134         break;
6135       case ALPHA + ADD_NL:
6136         p = "ALPHA+NL";
6137         break;
6138       case NALPHA:
6139         p = "NALPHA";
6140         break;
6141       case NALPHA + ADD_NL:
6142         p = "NALPHA+NL";
6143         break;
6144       case LOWER:
6145         p = "LOWER";
6146         break;
6147       case LOWER + ADD_NL:
6148         p = "LOWER+NL";
6149         break;
6150       case NLOWER:
6151         p = "NLOWER";
6152         break;
6153       case NLOWER + ADD_NL:
6154         p = "NLOWER+NL";
6155         break;
6156       case UPPER:
6157         p = "UPPER";
6158         break;
6159       case UPPER + ADD_NL:
6160         p = "UPPER+NL";
6161         break;
6162       case NUPPER:
6163         p = "NUPPER";
6164         break;
6165       case NUPPER + ADD_NL:
6166         p = "NUPPER+NL";
6167         break;
6168       case BRANCH:
6169         p = "BRANCH";
6170         break;
6171       case EXACTLY:
6172         p = "EXACTLY";
6173         break;
6174       case NOTHING:
6175         p = "NOTHING";
6176         break;
6177       case BACK:
6178         p = "BACK";
6179         break;
6180       case END:
6181         p = "END";
6182         break;
6183       case MOPEN + 0:
6184         p = "MATCH START";
6185         break;
6186       case MOPEN + 1:
6187       case MOPEN + 2:
6188       case MOPEN + 3:
6189       case MOPEN + 4:
6190       case MOPEN + 5:
6191       case MOPEN + 6:
6192       case MOPEN + 7:
6193       case MOPEN + 8:
6194       case MOPEN + 9:
6195         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6196         p = NULL;
6197         break;
6198       case MCLOSE + 0:
6199         p = "MATCH END";
6200         break;
6201       case MCLOSE + 1:
6202       case MCLOSE + 2:
6203       case MCLOSE + 3:
6204       case MCLOSE + 4:
6205       case MCLOSE + 5:
6206       case MCLOSE + 6:
6207       case MCLOSE + 7:
6208       case MCLOSE + 8:
6209       case MCLOSE + 9:
6210         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6211         p = NULL;
6212         break;
6213       case BACKREF + 1:
6214       case BACKREF + 2:
6215       case BACKREF + 3:
6216       case BACKREF + 4:
6217       case BACKREF + 5:
6218       case BACKREF + 6:
6219       case BACKREF + 7:
6220       case BACKREF + 8:
6221       case BACKREF + 9:
6222         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6223         p = NULL;
6224         break;
6225       case NOPEN:
6226         p = "NOPEN";
6227         break;
6228       case NCLOSE:
6229         p = "NCLOSE";
6230         break;
6231 #ifdef FEAT_SYN_HL
6232       case ZOPEN + 1:
6233       case ZOPEN + 2:
6234       case ZOPEN + 3:
6235       case ZOPEN + 4:
6236       case ZOPEN + 5:
6237       case ZOPEN + 6:
6238       case ZOPEN + 7:
6239       case ZOPEN + 8:
6240       case ZOPEN + 9:
6241         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6242         p = NULL;
6243         break;
6244       case ZCLOSE + 1:
6245       case ZCLOSE + 2:
6246       case ZCLOSE + 3:
6247       case ZCLOSE + 4:
6248       case ZCLOSE + 5:
6249       case ZCLOSE + 6:
6250       case ZCLOSE + 7:
6251       case ZCLOSE + 8:
6252       case ZCLOSE + 9:
6253         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6254         p = NULL;
6255         break;
6256       case ZREF + 1:
6257       case ZREF + 2:
6258       case ZREF + 3:
6259       case ZREF + 4:
6260       case ZREF + 5:
6261       case ZREF + 6:
6262       case ZREF + 7:
6263       case ZREF + 8:
6264       case ZREF + 9:
6265         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6266         p = NULL;
6267         break;
6268 #endif
6269       case STAR:
6270         p = "STAR";
6271         break;
6272       case PLUS:
6273         p = "PLUS";
6274         break;
6275       case NOMATCH:
6276         p = "NOMATCH";
6277         break;
6278       case MATCH:
6279         p = "MATCH";
6280         break;
6281       case BEHIND:
6282         p = "BEHIND";
6283         break;
6284       case NOBEHIND:
6285         p = "NOBEHIND";
6286         break;
6287       case SUBPAT:
6288         p = "SUBPAT";
6289         break;
6290       case BRACE_LIMITS:
6291         p = "BRACE_LIMITS";
6292         break;
6293       case BRACE_SIMPLE:
6294         p = "BRACE_SIMPLE";
6295         break;
6296       case BRACE_COMPLEX + 0:
6297       case BRACE_COMPLEX + 1:
6298       case BRACE_COMPLEX + 2:
6299       case BRACE_COMPLEX + 3:
6300       case BRACE_COMPLEX + 4:
6301       case BRACE_COMPLEX + 5:
6302       case BRACE_COMPLEX + 6:
6303       case BRACE_COMPLEX + 7:
6304       case BRACE_COMPLEX + 8:
6305       case BRACE_COMPLEX + 9:
6306         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6307         p = NULL;
6308         break;
6309 #ifdef FEAT_MBYTE
6310       case MULTIBYTECODE:
6311         p = "MULTIBYTECODE";
6312         break;
6313 #endif
6314       case NEWL:
6315         p = "NEWL";
6316         break;
6317       default:
6318         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6319         p = NULL;
6320         break;
6321     }
6322     if (p != NULL)
6323         (void) strcat(buf, p);
6324     return buf;
6325 }
6326 #endif
6327
6328 #ifdef FEAT_MBYTE
6329 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6330
6331 typedef struct
6332 {
6333     int a, b, c;
6334 } decomp_T;
6335
6336
6337 /* 0xfb20 - 0xfb4f */
6338 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6339 {
6340     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6341     {0x5d0,0,0},                /* 0xfb21       alt alef */
6342     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6343     {0x5d4,0,0},                /* 0xfb23       alt he */
6344     {0x5db,0,0},                /* 0xfb24       alt kaf */
6345     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6346     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6347     {0x5e8,0,0},                /* 0xfb27       alt resh */
6348     {0x5ea,0,0},                /* 0xfb28       alt tav */
6349     {'+', 0, 0},                /* 0xfb29       alt plus */
6350     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6351     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6352     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6353     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6354     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6355     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6356     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6357     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6358     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6359     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6360     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6361     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6362     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6363     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6364     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6365     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6366     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6367     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6368     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6369     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6370     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6371     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6372     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6373     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6374     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6375     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6376     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6377     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6378     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6379     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6380     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6381     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6382     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6383     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6384     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6385     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6386     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6387     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6388 };
6389
6390     static void
6391 mb_decompose(c, c1, c2, c3)
6392     int c, *c1, *c2, *c3;
6393 {
6394     decomp_T d;
6395
6396     if (c >= 0x4b20 && c <= 0xfb4f)
6397     {
6398         d = decomp_table[c - 0xfb20];
6399         *c1 = d.a;
6400         *c2 = d.b;
6401         *c3 = d.c;
6402     }
6403     else
6404     {
6405         *c1 = c;
6406         *c2 = *c3 = 0;
6407     }
6408 }
6409 #endif
6410
6411 /*
6412  * Compare two strings, ignore case if ireg_ic set.
6413  * Return 0 if strings match, non-zero otherwise.
6414  * Correct the length "*n" when composing characters are ignored.
6415  */
6416     static int
6417 cstrncmp(s1, s2, n)
6418     char_u      *s1, *s2;
6419     int         *n;
6420 {
6421     int         result;
6422
6423     if (!ireg_ic)
6424         result = STRNCMP(s1, s2, *n);
6425     else
6426         result = MB_STRNICMP(s1, s2, *n);
6427
6428 #ifdef FEAT_MBYTE
6429     /* if it failed and it's utf8 and we want to combineignore: */
6430     if (result != 0 && enc_utf8 && ireg_icombine)
6431     {
6432         char_u  *str1, *str2;
6433         int     c1, c2, c11, c12;
6434         int     junk;
6435
6436         /* we have to handle the strcmp ourselves, since it is necessary to
6437          * deal with the composing characters by ignoring them: */
6438         str1 = s1;
6439         str2 = s2;
6440         c1 = c2 = 0;
6441         while ((int)(str1 - s1) < *n)
6442         {
6443             c1 = mb_ptr2char_adv(&str1);
6444             c2 = mb_ptr2char_adv(&str2);
6445
6446             /* decompose the character if necessary, into 'base' characters
6447              * because I don't care about Arabic, I will hard-code the Hebrew
6448              * which I *do* care about!  So sue me... */
6449             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6450             {
6451                 /* decomposition necessary? */
6452                 mb_decompose(c1, &c11, &junk, &junk);
6453                 mb_decompose(c2, &c12, &junk, &junk);
6454                 c1 = c11;
6455                 c2 = c12;
6456                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6457                     break;
6458             }
6459         }
6460         result = c2 - c1;
6461         if (result == 0)
6462             *n = (int)(str2 - s2);
6463     }
6464 #endif
6465
6466     return result;
6467 }
6468
6469 /*
6470  * cstrchr: This function is used a lot for simple searches, keep it fast!
6471  */
6472     static char_u *
6473 cstrchr(s, c)
6474     char_u      *s;
6475     int         c;
6476 {
6477     char_u      *p;
6478     int         cc;
6479
6480     if (!ireg_ic
6481 #ifdef FEAT_MBYTE
6482             || (!enc_utf8 && mb_char2len(c) > 1)
6483 #endif
6484             )
6485         return vim_strchr(s, c);
6486
6487     /* tolower() and toupper() can be slow, comparing twice should be a lot
6488      * faster (esp. when using MS Visual C++!).
6489      * For UTF-8 need to use folded case. */
6490 #ifdef FEAT_MBYTE
6491     if (enc_utf8 && c > 0x80)
6492         cc = utf_fold(c);
6493     else
6494 #endif
6495          if (MB_ISUPPER(c))
6496         cc = MB_TOLOWER(c);
6497     else if (MB_ISLOWER(c))
6498         cc = MB_TOUPPER(c);
6499     else
6500         return vim_strchr(s, c);
6501
6502 #ifdef FEAT_MBYTE
6503     if (has_mbyte)
6504     {
6505         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6506         {
6507             if (enc_utf8 && c > 0x80)
6508             {
6509                 if (utf_fold(utf_ptr2char(p)) == cc)
6510                     return p;
6511             }
6512             else if (*p == c || *p == cc)
6513                 return p;
6514         }
6515     }
6516     else
6517 #endif
6518         /* Faster version for when there are no multi-byte characters. */
6519         for (p = s; *p != NUL; ++p)
6520             if (*p == c || *p == cc)
6521                 return p;
6522
6523     return NULL;
6524 }
6525
6526 /***************************************************************
6527  *                    regsub stuff                             *
6528  ***************************************************************/
6529
6530 /* This stuff below really confuses cc on an SGI -- webb */
6531 #ifdef __sgi
6532 # undef __ARGS
6533 # define __ARGS(x)  ()
6534 #endif
6535
6536 /*
6537  * We should define ftpr as a pointer to a function returning a pointer to
6538  * a function returning a pointer to a function ...
6539  * This is impossible, so we declare a pointer to a function returning a
6540  * pointer to a function returning void. This should work for all compilers.
6541  */
6542 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6543
6544 static fptr_T do_upper __ARGS((int *, int));
6545 static fptr_T do_Upper __ARGS((int *, int));
6546 static fptr_T do_lower __ARGS((int *, int));
6547 static fptr_T do_Lower __ARGS((int *, int));
6548
6549 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6550
6551     static fptr_T
6552 do_upper(d, c)
6553     int         *d;
6554     int         c;
6555 {
6556     *d = MB_TOUPPER(c);
6557
6558     return (fptr_T)NULL;
6559 }
6560
6561     static fptr_T
6562 do_Upper(d, c)
6563     int         *d;
6564     int         c;
6565 {
6566     *d = MB_TOUPPER(c);
6567
6568     return (fptr_T)do_Upper;
6569 }
6570
6571     static fptr_T
6572 do_lower(d, c)
6573     int         *d;
6574     int         c;
6575 {
6576     *d = MB_TOLOWER(c);
6577
6578     return (fptr_T)NULL;
6579 }
6580
6581     static fptr_T
6582 do_Lower(d, c)
6583     int         *d;
6584     int         c;
6585 {
6586     *d = MB_TOLOWER(c);
6587
6588     return (fptr_T)do_Lower;
6589 }
6590
6591 /*
6592  * regtilde(): Replace tildes in the pattern by the old pattern.
6593  *
6594  * Short explanation of the tilde: It stands for the previous replacement
6595  * pattern.  If that previous pattern also contains a ~ we should go back a
6596  * step further...  But we insert the previous pattern into the current one
6597  * and remember that.
6598  * This still does not handle the case where "magic" changes.  So require the
6599  * user to keep his hands off of "magic".
6600  *
6601  * The tildes are parsed once before the first call to vim_regsub().
6602  */
6603     char_u *
6604 regtilde(source, magic)
6605     char_u      *source;
6606     int         magic;
6607 {
6608     char_u      *newsub = source;
6609     char_u      *tmpsub;
6610     char_u      *p;
6611     int         len;
6612     int         prevlen;
6613
6614     for (p = newsub; *p; ++p)
6615     {
6616         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6617         {
6618             if (reg_prev_sub != NULL)
6619             {
6620                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6621                 prevlen = (int)STRLEN(reg_prev_sub);
6622                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6623                 if (tmpsub != NULL)
6624                 {
6625                     /* copy prefix */
6626                     len = (int)(p - newsub);    /* not including ~ */
6627                     mch_memmove(tmpsub, newsub, (size_t)len);
6628                     /* interpret tilde */
6629                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6630                     /* copy postfix */
6631                     if (!magic)
6632                         ++p;                    /* back off \ */
6633                     STRCPY(tmpsub + len + prevlen, p + 1);
6634
6635                     if (newsub != source)       /* already allocated newsub */
6636                         vim_free(newsub);
6637                     newsub = tmpsub;
6638                     p = newsub + len + prevlen;
6639                 }
6640             }
6641             else if (magic)
6642                 mch_memmove(p, p + 1, STRLEN(p));       /* remove '~' */
6643             else
6644                 mch_memmove(p, p + 2, STRLEN(p) - 1);   /* remove '\~' */
6645             --p;
6646         }
6647         else
6648         {
6649             if (*p == '\\' && p[1])             /* skip escaped characters */
6650                 ++p;
6651 #ifdef FEAT_MBYTE
6652             if (has_mbyte)
6653                 p += (*mb_ptr2len)(p) - 1;
6654 #endif
6655         }
6656     }
6657
6658     vim_free(reg_prev_sub);
6659     if (newsub != source)       /* newsub was allocated, just keep it */
6660         reg_prev_sub = newsub;
6661     else                        /* no ~ found, need to save newsub  */
6662         reg_prev_sub = vim_strsave(newsub);
6663     return newsub;
6664 }
6665
6666 #ifdef FEAT_EVAL
6667 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6668
6669 /* These pointers are used instead of reg_match and reg_mmatch for
6670  * reg_submatch().  Needed for when the substitution string is an expression
6671  * that contains a call to substitute() and submatch(). */
6672 static regmatch_T       *submatch_match;
6673 static regmmatch_T      *submatch_mmatch;
6674 #endif
6675
6676 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6677 /*
6678  * vim_regsub() - perform substitutions after a vim_regexec() or
6679  * vim_regexec_multi() match.
6680  *
6681  * If "copy" is TRUE really copy into "dest".
6682  * If "copy" is FALSE nothing is copied, this is just to find out the length
6683  * of the result.
6684  *
6685  * If "backslash" is TRUE, a backslash will be removed later, need to double
6686  * them to keep them, and insert a backslash before a CR to avoid it being
6687  * replaced with a line break later.
6688  *
6689  * Note: The matched text must not change between the call of
6690  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6691  * references invalid!
6692  *
6693  * Returns the size of the replacement, including terminating NUL.
6694  */
6695     int
6696 vim_regsub(rmp, source, dest, copy, magic, backslash)
6697     regmatch_T  *rmp;
6698     char_u      *source;
6699     char_u      *dest;
6700     int         copy;
6701     int         magic;
6702     int         backslash;
6703 {
6704     reg_match = rmp;
6705     reg_mmatch = NULL;
6706     reg_maxline = 0;
6707     return vim_regsub_both(source, dest, copy, magic, backslash);
6708 }
6709 #endif
6710
6711     int
6712 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6713     regmmatch_T *rmp;
6714     linenr_T    lnum;
6715     char_u      *source;
6716     char_u      *dest;
6717     int         copy;
6718     int         magic;
6719     int         backslash;
6720 {
6721     reg_match = NULL;
6722     reg_mmatch = rmp;
6723     reg_buf = curbuf;           /* always works on the current buffer! */
6724     reg_firstlnum = lnum;
6725     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6726     return vim_regsub_both(source, dest, copy, magic, backslash);
6727 }
6728
6729     static int
6730 vim_regsub_both(source, dest, copy, magic, backslash)
6731     char_u      *source;
6732     char_u      *dest;
6733     int         copy;
6734     int         magic;
6735     int         backslash;
6736 {
6737     char_u      *src;
6738     char_u      *dst;
6739     char_u      *s;
6740     int         c;
6741     int         cc;
6742     int         no = -1;
6743     fptr_T      func = (fptr_T)NULL;
6744     linenr_T    clnum = 0;      /* init for GCC */
6745     int         len = 0;        /* init for GCC */
6746 #ifdef FEAT_EVAL
6747     static char_u *eval_result = NULL;
6748 #endif
6749
6750     /* Be paranoid... */
6751     if (source == NULL || dest == NULL)
6752     {
6753         EMSG(_(e_null));
6754         return 0;
6755     }
6756     if (prog_magic_wrong())
6757         return 0;
6758     src = source;
6759     dst = dest;
6760
6761     /*
6762      * When the substitute part starts with "\=" evaluate it as an expression.
6763      */
6764     if (source[0] == '\\' && source[1] == '='
6765 #ifdef FEAT_EVAL
6766             && !can_f_submatch      /* can't do this recursively */
6767 #endif
6768             )
6769     {
6770 #ifdef FEAT_EVAL
6771         /* To make sure that the length doesn't change between checking the
6772          * length and copying the string, and to speed up things, the
6773          * resulting string is saved from the call with "copy" == FALSE to the
6774          * call with "copy" == TRUE. */
6775         if (copy)
6776         {
6777             if (eval_result != NULL)
6778             {
6779                 STRCPY(dest, eval_result);
6780                 dst += STRLEN(eval_result);
6781                 vim_free(eval_result);
6782                 eval_result = NULL;
6783             }
6784         }
6785         else
6786         {
6787             linenr_T    save_reg_maxline;
6788             win_T       *save_reg_win;
6789             int         save_ireg_ic;
6790
6791             vim_free(eval_result);
6792
6793             /* The expression may contain substitute(), which calls us
6794              * recursively.  Make sure submatch() gets the text from the first
6795              * level.  Don't need to save "reg_buf", because
6796              * vim_regexec_multi() can't be called recursively. */
6797             submatch_match = reg_match;
6798             submatch_mmatch = reg_mmatch;
6799             save_reg_maxline = reg_maxline;
6800             save_reg_win = reg_win;
6801             save_ireg_ic = ireg_ic;
6802             can_f_submatch = TRUE;
6803
6804             eval_result = eval_to_string(source + 2, NULL, TRUE);
6805             if (eval_result != NULL)
6806             {
6807                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6808                 {
6809                     /* Change NL to CR, so that it becomes a line break.
6810                      * Skip over a backslashed character. */
6811                     if (*s == NL)
6812                         *s = CAR;
6813                     else if (*s == '\\' && s[1] != NUL)
6814                         ++s;
6815                 }
6816
6817                 dst += STRLEN(eval_result);
6818             }
6819
6820             reg_match = submatch_match;
6821             reg_mmatch = submatch_mmatch;
6822             reg_maxline = save_reg_maxline;
6823             reg_win = save_reg_win;
6824             ireg_ic = save_ireg_ic;
6825             can_f_submatch = FALSE;
6826         }
6827 #endif
6828     }
6829     else
6830       while ((c = *src++) != NUL)
6831       {
6832         if (c == '&' && magic)
6833             no = 0;
6834         else if (c == '\\' && *src != NUL)
6835         {
6836             if (*src == '&' && !magic)
6837             {
6838                 ++src;
6839                 no = 0;
6840             }
6841             else if ('0' <= *src && *src <= '9')
6842             {
6843                 no = *src++ - '0';
6844             }
6845             else if (vim_strchr((char_u *)"uUlLeE", *src))
6846             {
6847                 switch (*src++)
6848                 {
6849                 case 'u':   func = (fptr_T)do_upper;
6850                             continue;
6851                 case 'U':   func = (fptr_T)do_Upper;
6852                             continue;
6853                 case 'l':   func = (fptr_T)do_lower;
6854                             continue;
6855                 case 'L':   func = (fptr_T)do_Lower;
6856                             continue;
6857                 case 'e':
6858                 case 'E':   func = (fptr_T)NULL;
6859                             continue;
6860                 }
6861             }
6862         }
6863         if (no < 0)           /* Ordinary character. */
6864         {
6865             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
6866             {
6867                 /* Copy a special key as-is. */
6868                 if (copy)
6869                 {
6870                     *dst++ = c;
6871                     *dst++ = *src++;
6872                     *dst++ = *src++;
6873                 }
6874                 else
6875                 {
6876                     dst += 3;
6877                     src += 2;
6878                 }
6879                 continue;
6880             }
6881
6882             if (c == '\\' && *src != NUL)
6883             {
6884                 /* Check for abbreviations -- webb */
6885                 switch (*src)
6886                 {
6887                     case 'r':   c = CAR;        ++src;  break;
6888                     case 'n':   c = NL;         ++src;  break;
6889                     case 't':   c = TAB;        ++src;  break;
6890                  /* Oh no!  \e already has meaning in subst pat :-( */
6891                  /* case 'e':   c = ESC;        ++src;  break; */
6892                     case 'b':   c = Ctrl_H;     ++src;  break;
6893
6894                     /* If "backslash" is TRUE the backslash will be removed
6895                      * later.  Used to insert a literal CR. */
6896                     default:    if (backslash)
6897                                 {
6898                                     if (copy)
6899                                         *dst = '\\';
6900                                     ++dst;
6901                                 }
6902                                 c = *src++;
6903                 }
6904             }
6905 #ifdef FEAT_MBYTE
6906             else if (has_mbyte)
6907                 c = mb_ptr2char(src - 1);
6908 #endif
6909
6910             /* Write to buffer, if copy is set. */
6911             if (func == (fptr_T)NULL)   /* just copy */
6912                 cc = c;
6913             else
6914                 /* Turbo C complains without the typecast */
6915                 func = (fptr_T)(func(&cc, c));
6916
6917 #ifdef FEAT_MBYTE
6918             if (has_mbyte)
6919             {
6920                 src += mb_ptr2len(src - 1) - 1;
6921                 if (copy)
6922                     mb_char2bytes(cc, dst);
6923                 dst += mb_char2len(cc) - 1;
6924             }
6925             else
6926 #endif
6927                 if (copy)
6928                     *dst = cc;
6929             dst++;
6930         }
6931         else
6932         {
6933             if (REG_MULTI)
6934             {
6935                 clnum = reg_mmatch->startpos[no].lnum;
6936                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6937                     s = NULL;
6938                 else
6939                 {
6940                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6941                     if (reg_mmatch->endpos[no].lnum == clnum)
6942                         len = reg_mmatch->endpos[no].col
6943                                                - reg_mmatch->startpos[no].col;
6944                     else
6945                         len = (int)STRLEN(s);
6946                 }
6947             }
6948             else
6949             {
6950                 s = reg_match->startp[no];
6951                 if (reg_match->endp[no] == NULL)
6952                     s = NULL;
6953                 else
6954                     len = (int)(reg_match->endp[no] - s);
6955             }
6956             if (s != NULL)
6957             {
6958                 for (;;)
6959                 {
6960                     if (len == 0)
6961                     {
6962                         if (REG_MULTI)
6963                         {
6964                             if (reg_mmatch->endpos[no].lnum == clnum)
6965                                 break;
6966                             if (copy)
6967                                 *dst = CAR;
6968                             ++dst;
6969                             s = reg_getline(++clnum);
6970                             if (reg_mmatch->endpos[no].lnum == clnum)
6971                                 len = reg_mmatch->endpos[no].col;
6972                             else
6973                                 len = (int)STRLEN(s);
6974                         }
6975                         else
6976                             break;
6977                     }
6978                     else if (*s == NUL) /* we hit NUL. */
6979                     {
6980                         if (copy)
6981                             EMSG(_(e_re_damg));
6982                         goto exit;
6983                     }
6984                     else
6985                     {
6986                         if (backslash && (*s == CAR || *s == '\\'))
6987                         {
6988                             /*
6989                              * Insert a backslash in front of a CR, otherwise
6990                              * it will be replaced by a line break.
6991                              * Number of backslashes will be halved later,
6992                              * double them here.
6993                              */
6994                             if (copy)
6995                             {
6996                                 dst[0] = '\\';
6997                                 dst[1] = *s;
6998                             }
6999                             dst += 2;
7000                         }
7001                         else
7002                         {
7003 #ifdef FEAT_MBYTE
7004                             if (has_mbyte)
7005                                 c = mb_ptr2char(s);
7006                             else
7007 #endif
7008                                 c = *s;
7009
7010                             if (func == (fptr_T)NULL)   /* just copy */
7011                                 cc = c;
7012                             else
7013                                 /* Turbo C complains without the typecast */
7014                                 func = (fptr_T)(func(&cc, c));
7015
7016 #ifdef FEAT_MBYTE
7017                             if (has_mbyte)
7018                             {
7019                                 int l;
7020
7021                                 /* Copy composing characters separately, one
7022                                  * at a time. */
7023                                 if (enc_utf8)
7024                                     l = utf_ptr2len(s) - 1;
7025                                 else
7026                                     l = mb_ptr2len(s) - 1;
7027
7028                                 s += l;
7029                                 len -= l;
7030                                 if (copy)
7031                                     mb_char2bytes(cc, dst);
7032                                 dst += mb_char2len(cc) - 1;
7033                             }
7034                             else
7035 #endif
7036                                 if (copy)
7037                                     *dst = cc;
7038                             dst++;
7039                         }
7040
7041                         ++s;
7042                         --len;
7043                     }
7044                 }
7045             }
7046             no = -1;
7047         }
7048       }
7049     if (copy)
7050         *dst = NUL;
7051
7052 exit:
7053     return (int)((dst - dest) + 1);
7054 }
7055
7056 #ifdef FEAT_EVAL
7057 /*
7058  * Used for the submatch() function: get the string from the n'th submatch in
7059  * allocated memory.
7060  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7061  */
7062     char_u *
7063 reg_submatch(no)
7064     int         no;
7065 {
7066     char_u      *retval = NULL;
7067     char_u      *s;
7068     int         len;
7069     int         round;
7070     linenr_T    lnum;
7071
7072     if (!can_f_submatch || no < 0)
7073         return NULL;
7074
7075     if (submatch_match == NULL)
7076     {
7077         /*
7078          * First round: compute the length and allocate memory.
7079          * Second round: copy the text.
7080          */
7081         for (round = 1; round <= 2; ++round)
7082         {
7083             lnum = submatch_mmatch->startpos[no].lnum;
7084             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7085                 return NULL;
7086
7087             s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
7088             if (s == NULL)  /* anti-crash check, cannot happen? */
7089                 break;
7090             if (submatch_mmatch->endpos[no].lnum == lnum)
7091             {
7092                 /* Within one line: take form start to end col. */
7093                 len = submatch_mmatch->endpos[no].col
7094                                           - submatch_mmatch->startpos[no].col;
7095                 if (round == 2)
7096                     vim_strncpy(retval, s, len);
7097                 ++len;
7098             }
7099             else
7100             {
7101                 /* Multiple lines: take start line from start col, middle
7102                  * lines completely and end line up to end col. */
7103                 len = (int)STRLEN(s);
7104                 if (round == 2)
7105                 {
7106                     STRCPY(retval, s);
7107                     retval[len] = '\n';
7108                 }
7109                 ++len;
7110                 ++lnum;
7111                 while (lnum < submatch_mmatch->endpos[no].lnum)
7112                 {
7113                     s = reg_getline(lnum++);
7114                     if (round == 2)
7115                         STRCPY(retval + len, s);
7116                     len += (int)STRLEN(s);
7117                     if (round == 2)
7118                         retval[len] = '\n';
7119                     ++len;
7120                 }
7121                 if (round == 2)
7122                     STRNCPY(retval + len, reg_getline(lnum),
7123                                              submatch_mmatch->endpos[no].col);
7124                 len += submatch_mmatch->endpos[no].col;
7125                 if (round == 2)
7126                     retval[len] = NUL;
7127                 ++len;
7128             }
7129
7130             if (retval == NULL)
7131             {
7132                 retval = lalloc((long_u)len, TRUE);
7133                 if (retval == NULL)
7134                     return NULL;
7135             }
7136         }
7137     }
7138     else
7139     {
7140         if (submatch_match->endp[no] == NULL)
7141             retval = NULL;
7142         else
7143         {
7144             s = submatch_match->startp[no];
7145             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7146         }
7147     }
7148
7149     return retval;
7150 }
7151 #endif