src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 /*
 382  * REGEXP_INRANGE contains all characters which are always special in a []
 383  * range after '\'.
 384  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 385  * These are:
 386  *  \n  - New line (NL).
 387  *  \r  - Carriage Return (CR).
 388  *  \t  - Tab (TAB).
 389  *  \e  - Escape (ESC).
 390  *  \b  - Backspace (Ctrl_H).
 391  *  \d  - Character code in decimal, eg \d123
 392  *  \o  - Character code in octal, eg \o80
 393  *  \x  - Character code in hex, eg \x4a
 394  *  \u  - Multibyte character code, eg \u20ac
 395  *  \U  - Long multibyte character code, eg \U12345678
 396  */
 397 static char_u REGEXP_INRANGE[] = "]^-n\\";
 398 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 399
 400 static int      backslash_trans __ARGS((int c));
 401 static int      get_char_class __ARGS((char_u **pp));
 402 static int      get_equi_class __ARGS((char_u **pp));
 403 static void     reg_equi_class __ARGS((int c));
 404 static int      get_coll_element __ARGS((char_u **pp));
 405 static char_u   *skip_anyof __ARGS((char_u *p));
 406 static void     init_class_tab __ARGS((void));
 407
 408 /*
 409  * Translate '\x' to its control character, except "\n", which is Magic.
 410  */
 411     static int
 412 backslash_trans(c)
 413     int         c;
 414 {
 415     switch (c)
 416     {
 417         case 'r':   return CAR;
 418         case 't':   return TAB;
 419         case 'e':   return ESC;
 420         case 'b':   return BS;
 421     }
 422     return c;
 423 }
 424
 425 /*
 426  * Check for a character class name "[:name:]".  "pp" points to the '['.
 427  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 428  * recognized.  Otherwise "pp" is advanced to after the item.
 429  */
 430     static int
 431 get_char_class(pp)
 432     char_u      **pp;
 433 {
 434     static const char *(class_names[]) =
 435     {
 436         "alnum:]",
 437 #define CLASS_ALNUM 0
 438         "alpha:]",
 439 #define CLASS_ALPHA 1
 440         "blank:]",
 441 #define CLASS_BLANK 2
 442         "cntrl:]",
 443 #define CLASS_CNTRL 3
 444         "digit:]",
 445 #define CLASS_DIGIT 4
 446         "graph:]",
 447 #define CLASS_GRAPH 5
 448         "lower:]",
 449 #define CLASS_LOWER 6
 450         "print:]",
 451 #define CLASS_PRINT 7
 452         "punct:]",
 453 #define CLASS_PUNCT 8
 454         "space:]",
 455 #define CLASS_SPACE 9
 456         "upper:]",
 457 #define CLASS_UPPER 10
 458         "xdigit:]",
 459 #define CLASS_XDIGIT 11
 460         "tab:]",
 461 #define CLASS_TAB 12
 462         "return:]",
 463 #define CLASS_RETURN 13
 464         "backspace:]",
 465 #define CLASS_BACKSPACE 14
 466         "escape:]",
 467 #define CLASS_ESCAPE 15
 468     };
 469 #define CLASS_NONE 99
 470     int i;
 471
 472     if ((*pp)[1] == ':')
 473     {
 474         for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
 475             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 476             {
 477                 *pp += STRLEN(class_names[i]) + 2;
 478                 return i;
 479             }
 480     }
 481     return CLASS_NONE;
 482 }
 483
 484 /*
 485  * Specific version of character class functions.
 486  * Using a table to keep this fast.
 487  */
 488 static short    class_tab[256];
 489
 490 #define     RI_DIGIT    0x01
 491 #define     RI_HEX      0x02
 492 #define     RI_OCTAL    0x04
 493 #define     RI_WORD     0x08
 494 #define     RI_HEAD     0x10
 495 #define     RI_ALPHA    0x20
 496 #define     RI_LOWER    0x40
 497 #define     RI_UPPER    0x80
 498 #define     RI_WHITE    0x100
 499
 500     static void
 501 init_class_tab()
 502 {
 503     int         i;
 504     static int  done = FALSE;
 505
 506     if (done)
 507         return;
 508
 509     for (i = 0; i < 256; ++i)
 510     {
 511         if (i >= '0' && i <= '7')
 512             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 513         else if (i >= '8' && i <= '9')
 514             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 515         else if (i >= 'a' && i <= 'f')
 516             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 517 #ifdef EBCDIC
 518         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 519                                                     || (i >= 's' && i <= 'z'))
 520 #else
 521         else if (i >= 'g' && i <= 'z')
 522 #endif
 523             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 524         else if (i >= 'A' && i <= 'F')
 525             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 526 #ifdef EBCDIC
 527         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 528                                                     || (i >= 'S' && i <= 'Z'))
 529 #else
 530         else if (i >= 'G' && i <= 'Z')
 531 #endif
 532             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 533         else if (i == '_')
 534             class_tab[i] = RI_WORD + RI_HEAD;
 535         else
 536             class_tab[i] = 0;
 537     }
 538     class_tab[' '] |= RI_WHITE;
 539     class_tab['\t'] |= RI_WHITE;
 540     done = TRUE;
 541 }
 542
 543 #ifdef FEAT_MBYTE
 544 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 545 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 546 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 547 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 548 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 549 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 550 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 551 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 552 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 553 #else
 554 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 555 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 556 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 557 # define ri_word(c)     (class_tab[c] & RI_WORD)
 558 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 559 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 560 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 561 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 562 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 563 #endif
 564
 565 /* flags for regflags */
 566 #define RF_ICASE    1   /* ignore case */
 567 #define RF_NOICASE  2   /* don't ignore case */
 568 #define RF_HASNL    4   /* can match a NL */
 569 #define RF_ICOMBINE 8   /* ignore combining characters */
 570 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 571
 572 /*
 573  * Global work variables for vim_regcomp().
 574  */
 575
 576 static char_u   *regparse;      /* Input-scan pointer. */
 577 static int      prevchr_len;    /* byte length of previous char */
 578 static int      num_complex_braces; /* Complex \{...} count */
 579 static int      regnpar;        /* () count. */
 580 #ifdef FEAT_SYN_HL
 581 static int      regnzpar;       /* \z() count. */
 582 static int      re_has_z;       /* \z item detected */
 583 #endif
 584 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 585 static long     regsize;        /* Code size. */
 586 static int      reg_toolong;    /* TRUE when offset out of range */
 587 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 588 static unsigned regflags;       /* RF_ flags for prog */
 589 static long     brace_min[10];  /* Minimums for complex brace repeats */
 590 static long     brace_max[10];  /* Maximums for complex brace repeats */
 591 static int      brace_count[10]; /* Current counts for complex brace repeats */
 592 #if defined(FEAT_SYN_HL) || defined(PROTO)
 593 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 594 #endif
 595 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 596
 597 static int      reg_magic;      /* magicness of the pattern: */
 598 #define MAGIC_NONE      1       /* "\V" very unmagic */
 599 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 600 #define MAGIC_ON        3       /* "\m" or 'magic' */
 601 #define MAGIC_ALL       4       /* "\v" very magic */
 602
 603 static int      reg_string;     /* matching with a string instead of a buffer
 604                                    line */
 605 static int      reg_strict;     /* "[abc" is illegal */
 606
 607 /*
 608  * META contains all characters that may be magic, except '^' and '$'.
 609  */
 610
 611 #ifdef EBCDIC
 612 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 613 #else
 614 /* META[] is used often enough to justify turning it into a table. */
 615 static char_u META_flags[] = {
 616     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 617     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 618 /*                 %  &     (  )  *  +        .    */
 619     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 620 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 621     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 622 /*  @  A     C  D     F     H  I     K  L  M     O */
 623     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 624 /*  P        S     U  V  W  X     Z  [           _ */
 625     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 626 /*     a     c  d     f     h  i     k  l  m  n  o */
 627     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 628 /*  p        s     u  v  w  x     z  {  |     ~    */
 629     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 630 };
 631 #endif
 632
 633 static int      curchr;
 634
 635 /* arguments for reg() */
 636 #define REG_NOPAREN     0       /* toplevel reg() */
 637 #define REG_PAREN       1       /* \(\) */
 638 #define REG_ZPAREN      2       /* \z(\) */
 639 #define REG_NPAREN      3       /* \%(\) */
 640
 641 /*
 642  * Forward declarations for vim_regcomp()'s friends.
 643  */
 644 static void     initchr __ARGS((char_u *));
 645 static int      getchr __ARGS((void));
 646 static void     skipchr_keepstart __ARGS((void));
 647 static int      peekchr __ARGS((void));
 648 static void     skipchr __ARGS((void));
 649 static void     ungetchr __ARGS((void));
 650 static int      gethexchrs __ARGS((int maxinputlen));
 651 static int      getoctchrs __ARGS((void));
 652 static int      getdecchrs __ARGS((void));
 653 static int      coll_get_char __ARGS((void));
 654 static void     regcomp_start __ARGS((char_u *expr, int flags));
 655 static char_u   *reg __ARGS((int, int *));
 656 static char_u   *regbranch __ARGS((int *flagp));
 657 static char_u   *regconcat __ARGS((int *flagp));
 658 static char_u   *regpiece __ARGS((int *));
 659 static char_u   *regatom __ARGS((int *));
 660 static char_u   *regnode __ARGS((int));
 661 #ifdef FEAT_MBYTE
 662 static int      use_multibytecode __ARGS((int c));
 663 #endif
 664 static int      prog_magic_wrong __ARGS((void));
 665 static char_u   *regnext __ARGS((char_u *));
 666 static void     regc __ARGS((int b));
 667 #ifdef FEAT_MBYTE
 668 static void     regmbc __ARGS((int c));
 669 #else
 670 # define regmbc(c) regc(c)
 671 #endif
 672 static void     reginsert __ARGS((int, char_u *));
 673 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 674 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 675 static int      read_limits __ARGS((long *, long *));
 676 static void     regtail __ARGS((char_u *, char_u *));
 677 static void     regoptail __ARGS((char_u *, char_u *));
 678
 679 /*
 680  * Return TRUE if compiled regular expression "prog" can match a line break.
 681  */
 682     int
 683 re_multiline(prog)
 684     regprog_T *prog;
 685 {
 686     return (prog->regflags & RF_HASNL);
 687 }
 688
 689 /*
 690  * Return TRUE if compiled regular expression "prog" looks before the start
 691  * position (pattern contains "\@<=" or "\@<!").
 692  */
 693     int
 694 re_lookbehind(prog)
 695     regprog_T *prog;
 696 {
 697     return (prog->regflags & RF_LOOKBH);
 698 }
 699
 700 /*
 701  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 702  * Returns a character representing the class. Zero means that no item was
 703  * recognized.  Otherwise "pp" is advanced to after the item.
 704  */
 705     static int
 706 get_equi_class(pp)
 707     char_u      **pp;
 708 {
 709     int         c;
 710     int         l = 1;
 711     char_u      *p = *pp;
 712
 713     if (p[1] == '=')
 714     {
 715 #ifdef FEAT_MBYTE
 716         if (has_mbyte)
 717             l = (*mb_ptr2len)(p + 2);
 718 #endif
 719         if (p[l + 2] == '=' && p[l + 3] == ']')
 720         {
 721 #ifdef FEAT_MBYTE
 722             if (has_mbyte)
 723                 c = mb_ptr2char(p + 2);
 724             else
 725 #endif
 726                 c = p[2];
 727             *pp += l + 4;
 728             return c;
 729         }
 730     }
 731     return 0;
 732 }
 733
 734 /*
 735  * Produce the bytes for equivalence class "c".
 736  * Currently only handles latin1, latin9 and utf-8.
 737  */
 738     static void
 739 reg_equi_class(c)
 740     int     c;
 741 {
 742 #ifdef FEAT_MBYTE
 743     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 744                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 745 #endif
 746     {
 747         switch (c)
 748         {
 749             case 'A': case '\300': case '\301': case '\302':
 750             case '\303': case '\304': case '\305':
 751                       regmbc('A'); regmbc('\300'); regmbc('\301');
 752                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 753                       regmbc('\305');
 754                       return;
 755             case 'C': case '\307':
 756                       regmbc('C'); regmbc('\307');
 757                       return;
 758             case 'E': case '\310': case '\311': case '\312': case '\313':
 759                       regmbc('E'); regmbc('\310'); regmbc('\311');
 760                       regmbc('\312'); regmbc('\313');
 761                       return;
 762             case 'I': case '\314': case '\315': case '\316': case '\317':
 763                       regmbc('I'); regmbc('\314'); regmbc('\315');
 764                       regmbc('\316'); regmbc('\317');
 765                       return;
 766             case 'N': case '\321':
 767                       regmbc('N'); regmbc('\321');
 768                       return;
 769             case 'O': case '\322': case '\323': case '\324': case '\325':
 770             case '\326':
 771                       regmbc('O'); regmbc('\322'); regmbc('\323');
 772                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 773                       return;
 774             case 'U': case '\331': case '\332': case '\333': case '\334':
 775                       regmbc('U'); regmbc('\331'); regmbc('\332');
 776                       regmbc('\333'); regmbc('\334');
 777                       return;
 778             case 'Y': case '\335':
 779                       regmbc('Y'); regmbc('\335');
 780                       return;
 781             case 'a': case '\340': case '\341': case '\342':
 782             case '\343': case '\344': case '\345':
 783                       regmbc('a'); regmbc('\340'); regmbc('\341');
 784                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 785                       regmbc('\345');
 786                       return;
 787             case 'c': case '\347':
 788                       regmbc('c'); regmbc('\347');
 789                       return;
 790             case 'e': case '\350': case '\351': case '\352': case '\353':
 791                       regmbc('e'); regmbc('\350'); regmbc('\351');
 792                       regmbc('\352'); regmbc('\353');
 793                       return;
 794             case 'i': case '\354': case '\355': case '\356': case '\357':
 795                       regmbc('i'); regmbc('\354'); regmbc('\355');
 796                       regmbc('\356'); regmbc('\357');
 797                       return;
 798             case 'n': case '\361':
 799                       regmbc('n'); regmbc('\361');
 800                       return;
 801             case 'o': case '\362': case '\363': case '\364': case '\365':
 802             case '\366':
 803                       regmbc('o'); regmbc('\362'); regmbc('\363');
 804                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 805                       return;
 806             case 'u': case '\371': case '\372': case '\373': case '\374':
 807                       regmbc('u'); regmbc('\371'); regmbc('\372');
 808                       regmbc('\373'); regmbc('\374');
 809                       return;
 810             case 'y': case '\375': case '\377':
 811                       regmbc('y'); regmbc('\375'); regmbc('\377');
 812                       return;
 813         }
 814     }
 815     regmbc(c);
 816 }
 817
 818 /*
 819  * Check for a collating element "[.a.]".  "pp" points to the '['.
 820  * Returns a character. Zero means that no item was recognized.  Otherwise
 821  * "pp" is advanced to after the item.
 822  * Currently only single characters are recognized!
 823  */
 824     static int
 825 get_coll_element(pp)
 826     char_u      **pp;
 827 {
 828     int         c;
 829     int         l = 1;
 830     char_u      *p = *pp;
 831
 832     if (p[1] == '.')
 833     {
 834 #ifdef FEAT_MBYTE
 835         if (has_mbyte)
 836             l = (*mb_ptr2len)(p + 2);
 837 #endif
 838         if (p[l + 2] == '.' && p[l + 3] == ']')
 839         {
 840 #ifdef FEAT_MBYTE
 841             if (has_mbyte)
 842                 c = mb_ptr2char(p + 2);
 843             else
 844 #endif
 845                 c = p[2];
 846             *pp += l + 4;
 847             return c;
 848         }
 849     }
 850     return 0;
 851 }
 852
 853
 854 /*
 855  * Skip over a "[]" range.
 856  * "p" must point to the character after the '['.
 857  * The returned pointer is on the matching ']', or the terminating NUL.
 858  */
 859     static char_u *
 860 skip_anyof(p)
 861     char_u      *p;
 862 {
 863     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 864     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 865 #ifdef FEAT_MBYTE
 866     int         l;
 867 #endif
 868
 869     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 870     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 871
 872     if (*p == '^')      /* Complement of range. */
 873         ++p;
 874     if (*p == ']' || *p == '-')
 875         ++p;
 876     while (*p != NUL && *p != ']')
 877     {
 878 #ifdef FEAT_MBYTE
 879         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 880             p += l;
 881         else
 882 #endif
 883             if (*p == '-')
 884             {
 885                 ++p;
 886                 if (*p != ']' && *p != NUL)
 887                     mb_ptr_adv(p);
 888             }
 889         else if (*p == '\\'
 890                 && !cpo_bsl
 891                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 892                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 893             p += 2;
 894         else if (*p == '[')
 895         {
 896             if (get_char_class(&p) == CLASS_NONE
 897                     && get_equi_class(&p) == 0
 898                     && get_coll_element(&p) == 0)
 899                 ++p; /* It was not a class name */
 900         }
 901         else
 902             ++p;
 903     }
 904
 905     return p;
 906 }
 907
 908 /*
 909  * Skip past regular expression.
 910  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 911  * Take care of characters with a backslash in front of it.
 912  * Skip strings inside [ and ].
 913  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 914  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 915  * is changed in-place.
 916  */
 917     char_u *
 918 skip_regexp(startp, dirc, magic, newp)
 919     char_u      *startp;
 920     int         dirc;
 921     int         magic;
 922     char_u      **newp;
 923 {
 924     int         mymagic;
 925     char_u      *p = startp;
 926
 927     if (magic)
 928         mymagic = MAGIC_ON;
 929     else
 930         mymagic = MAGIC_OFF;
 931
 932     for (; p[0] != NUL; mb_ptr_adv(p))
 933     {
 934         if (p[0] == dirc)       /* found end of regexp */
 935             break;
 936         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 937                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 938         {
 939             p = skip_anyof(p + 1);
 940             if (p[0] == NUL)
 941                 break;
 942         }
 943         else if (p[0] == '\\' && p[1] != NUL)
 944         {
 945             if (dirc == '?' && newp != NULL && p[1] == '?')
 946             {
 947                 /* change "\?" to "?", make a copy first. */
 948                 if (*newp == NULL)
 949                 {
 950                     *newp = vim_strsave(startp);
 951                     if (*newp != NULL)
 952                         p = *newp + (p - startp);
 953                 }
 954                 if (*newp != NULL)
 955                     STRMOVE(p, p + 1);
 956                 else
 957                     ++p;
 958             }
 959             else
 960                 ++p;    /* skip next character */
 961             if (*p == 'v')
 962                 mymagic = MAGIC_ALL;
 963             else if (*p == 'V')
 964                 mymagic = MAGIC_NONE;
 965         }
 966     }
 967     return p;
 968 }
 969
 970 /*
 971  * vim_regcomp() - compile a regular expression into internal code
 972  * Returns the program in allocated space.  Returns NULL for an error.
 973  *
 974  * We can't allocate space until we know how big the compiled form will be,
 975  * but we can't compile it (and thus know how big it is) until we've got a
 976  * place to put the code.  So we cheat:  we compile it twice, once with code
 977  * generation turned off and size counting turned on, and once "for real".
 978  * This also means that we don't allocate space until we are sure that the
 979  * thing really will compile successfully, and we never have to move the
 980  * code and thus invalidate pointers into it.  (Note that it has to be in
 981  * one piece because vim_free() must be able to free it all.)
 982  *
 983  * Whether upper/lower case is to be ignored is decided when executing the
 984  * program, it does not matter here.
 985  *
 986  * Beware that the optimization-preparation code in here knows about some
 987  * of the structure of the compiled regexp.
 988  * "re_flags": RE_MAGIC and/or RE_STRING.
 989  */
 990     regprog_T *
 991 vim_regcomp(expr, re_flags)
 992     char_u      *expr;
 993     int         re_flags;
 994 {
 995     regprog_T   *r;
 996     char_u      *scan;
 997     char_u      *longest;
 998     int         len;
 999     int         flags;
1000
1001     if (expr == NULL)
1002         EMSG_RET_NULL(_(e_null));
1003
1004     init_class_tab();
1005
1006     /*
1007      * First pass: determine size, legality.
1008      */
1009     regcomp_start(expr, re_flags);
1010     regcode = JUST_CALC_SIZE;
1011     regc(REGMAGIC);
1012     if (reg(REG_NOPAREN, &flags) == NULL)
1013         return NULL;
1014
1015     /* Small enough for pointer-storage convention? */
1016 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1017     if (regsize >= 65536L - 256L)
1018         EMSG_RET_NULL(_("E339: Pattern too long"));
1019 #endif
1020
1021     /* Allocate space. */
1022     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1023     if (r == NULL)
1024         return NULL;
1025
1026     /*
1027      * Second pass: emit code.
1028      */
1029     regcomp_start(expr, re_flags);
1030     regcode = r->program;
1031     regc(REGMAGIC);
1032     if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
1033     {
1034         vim_free(r);
1035         if (reg_toolong)
1036             EMSG_RET_NULL(_("E339: Pattern too long"));
1037         return NULL;
1038     }
1039
1040     /* Dig out information for optimizations. */
1041     r->regstart = NUL;          /* Worst-case defaults. */
1042     r->reganch = 0;
1043     r->regmust = NULL;
1044     r->regmlen = 0;
1045     r->regflags = regflags;
1046     if (flags & HASNL)
1047         r->regflags |= RF_HASNL;
1048     if (flags & HASLOOKBH)
1049         r->regflags |= RF_LOOKBH;
1050 #ifdef FEAT_SYN_HL
1051     /* Remember whether this pattern has any \z specials in it. */
1052     r->reghasz = re_has_z;
1053 #endif
1054     scan = r->program + 1;      /* First BRANCH. */
1055     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1056     {
1057         scan = OPERAND(scan);
1058
1059         /* Starting-point info. */
1060         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1061         {
1062             r->reganch++;
1063             scan = regnext(scan);
1064         }
1065
1066         if (OP(scan) == EXACTLY)
1067         {
1068 #ifdef FEAT_MBYTE
1069             if (has_mbyte)
1070                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1071             else
1072 #endif
1073                 r->regstart = *OPERAND(scan);
1074         }
1075         else if ((OP(scan) == BOW
1076                     || OP(scan) == EOW
1077                     || OP(scan) == NOTHING
1078                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1079                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1080                  && OP(regnext(scan)) == EXACTLY)
1081         {
1082 #ifdef FEAT_MBYTE
1083             if (has_mbyte)
1084                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1085             else
1086 #endif
1087                 r->regstart = *OPERAND(regnext(scan));
1088         }
1089
1090         /*
1091          * If there's something expensive in the r.e., find the longest
1092          * literal string that must appear and make it the regmust.  Resolve
1093          * ties in favor of later strings, since the regstart check works
1094          * with the beginning of the r.e. and avoiding duplication
1095          * strengthens checking.  Not a strong reason, but sufficient in the
1096          * absence of others.
1097          */
1098         /*
1099          * When the r.e. starts with BOW, it is faster to look for a regmust
1100          * first. Used a lot for "#" and "*" commands. (Added by mool).
1101          */
1102         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1103                                                           && !(flags & HASNL))
1104         {
1105             longest = NULL;
1106             len = 0;
1107             for (; scan != NULL; scan = regnext(scan))
1108                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1109                 {
1110                     longest = OPERAND(scan);
1111                     len = (int)STRLEN(OPERAND(scan));
1112                 }
1113             r->regmust = longest;
1114             r->regmlen = len;
1115         }
1116     }
1117 #ifdef DEBUG
1118     regdump(expr, r);
1119 #endif
1120     return r;
1121 }
1122
1123 /*
1124  * Setup to parse the regexp.  Used once to get the length and once to do it.
1125  */
1126     static void
1127 regcomp_start(expr, re_flags)
1128     char_u      *expr;
1129     int         re_flags;           /* see vim_regcomp() */
1130 {
1131     initchr(expr);
1132     if (re_flags & RE_MAGIC)
1133         reg_magic = MAGIC_ON;
1134     else
1135         reg_magic = MAGIC_OFF;
1136     reg_string = (re_flags & RE_STRING);
1137     reg_strict = (re_flags & RE_STRICT);
1138
1139     num_complex_braces = 0;
1140     regnpar = 1;
1141     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1142 #ifdef FEAT_SYN_HL
1143     regnzpar = 1;
1144     re_has_z = 0;
1145 #endif
1146     regsize = 0L;
1147     reg_toolong = FALSE;
1148     regflags = 0;
1149 #if defined(FEAT_SYN_HL) || defined(PROTO)
1150     had_eol = FALSE;
1151 #endif
1152 }
1153
1154 #if defined(FEAT_SYN_HL) || defined(PROTO)
1155 /*
1156  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1157  * found.  This is messy, but it works fine.
1158  */
1159     int
1160 vim_regcomp_had_eol()
1161 {
1162     return had_eol;
1163 }
1164 #endif
1165
1166 /*
1167  * reg - regular expression, i.e. main body or parenthesized thing
1168  *
1169  * Caller must absorb opening parenthesis.
1170  *
1171  * Combining parenthesis handling with the base level of regular expression
1172  * is a trifle forced, but the need to tie the tails of the branches to what
1173  * follows makes it hard to avoid.
1174  */
1175     static char_u *
1176 reg(paren, flagp)
1177     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1178     int         *flagp;
1179 {
1180     char_u      *ret;
1181     char_u      *br;
1182     char_u      *ender;
1183     int         parno = 0;
1184     int         flags;
1185
1186     *flagp = HASWIDTH;          /* Tentatively. */
1187
1188 #ifdef FEAT_SYN_HL
1189     if (paren == REG_ZPAREN)
1190     {
1191         /* Make a ZOPEN node. */
1192         if (regnzpar >= NSUBEXP)
1193             EMSG_RET_NULL(_("E50: Too many \\z("));
1194         parno = regnzpar;
1195         regnzpar++;
1196         ret = regnode(ZOPEN + parno);
1197     }
1198     else
1199 #endif
1200         if (paren == REG_PAREN)
1201     {
1202         /* Make a MOPEN node. */
1203         if (regnpar >= NSUBEXP)
1204             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1205         parno = regnpar;
1206         ++regnpar;
1207         ret = regnode(MOPEN + parno);
1208     }
1209     else if (paren == REG_NPAREN)
1210     {
1211         /* Make a NOPEN node. */
1212         ret = regnode(NOPEN);
1213     }
1214     else
1215         ret = NULL;
1216
1217     /* Pick up the branches, linking them together. */
1218     br = regbranch(&flags);
1219     if (br == NULL)
1220         return NULL;
1221     if (ret != NULL)
1222         regtail(ret, br);       /* [MZ]OPEN -> first. */
1223     else
1224         ret = br;
1225     /* If one of the branches can be zero-width, the whole thing can.
1226      * If one of the branches has * at start or matches a line-break, the
1227      * whole thing can. */
1228     if (!(flags & HASWIDTH))
1229         *flagp &= ~HASWIDTH;
1230     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1231     while (peekchr() == Magic('|'))
1232     {
1233         skipchr();
1234         br = regbranch(&flags);
1235         if (br == NULL || reg_toolong)
1236             return NULL;
1237         regtail(ret, br);       /* BRANCH -> BRANCH. */
1238         if (!(flags & HASWIDTH))
1239             *flagp &= ~HASWIDTH;
1240         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1241     }
1242
1243     /* Make a closing node, and hook it on the end. */
1244     ender = regnode(
1245 #ifdef FEAT_SYN_HL
1246             paren == REG_ZPAREN ? ZCLOSE + parno :
1247 #endif
1248             paren == REG_PAREN ? MCLOSE + parno :
1249             paren == REG_NPAREN ? NCLOSE : END);
1250     regtail(ret, ender);
1251
1252     /* Hook the tails of the branches to the closing node. */
1253     for (br = ret; br != NULL; br = regnext(br))
1254         regoptail(br, ender);
1255
1256     /* Check for proper termination. */
1257     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1258     {
1259 #ifdef FEAT_SYN_HL
1260         if (paren == REG_ZPAREN)
1261             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1262         else
1263 #endif
1264             if (paren == REG_NPAREN)
1265             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1266         else
1267             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1268     }
1269     else if (paren == REG_NOPAREN && peekchr() != NUL)
1270     {
1271         if (curchr == Magic(')'))
1272             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1273         else
1274             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1275         /* NOTREACHED */
1276     }
1277     /*
1278      * Here we set the flag allowing back references to this set of
1279      * parentheses.
1280      */
1281     if (paren == REG_PAREN)
1282         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1283     return ret;
1284 }
1285
1286 /*
1287  * Handle one alternative of an | operator.
1288  * Implements the & operator.
1289  */
1290     static char_u *
1291 regbranch(flagp)
1292     int         *flagp;
1293 {
1294     char_u      *ret;
1295     char_u      *chain = NULL;
1296     char_u      *latest;
1297     int         flags;
1298
1299     *flagp = WORST | HASNL;             /* Tentatively. */
1300
1301     ret = regnode(BRANCH);
1302     for (;;)
1303     {
1304         latest = regconcat(&flags);
1305         if (latest == NULL)
1306             return NULL;
1307         /* If one of the branches has width, the whole thing has.  If one of
1308          * the branches anchors at start-of-line, the whole thing does.
1309          * If one of the branches uses look-behind, the whole thing does. */
1310         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1311         /* If one of the branches doesn't match a line-break, the whole thing
1312          * doesn't. */
1313         *flagp &= ~HASNL | (flags & HASNL);
1314         if (chain != NULL)
1315             regtail(chain, latest);
1316         if (peekchr() != Magic('&'))
1317             break;
1318         skipchr();
1319         regtail(latest, regnode(END)); /* operand ends */
1320         if (reg_toolong)
1321             break;
1322         reginsert(MATCH, latest);
1323         chain = latest;
1324     }
1325
1326     return ret;
1327 }
1328
1329 /*
1330  * Handle one alternative of an | or & operator.
1331  * Implements the concatenation operator.
1332  */
1333     static char_u *
1334 regconcat(flagp)
1335     int         *flagp;
1336 {
1337     char_u      *first = NULL;
1338     char_u      *chain = NULL;
1339     char_u      *latest;
1340     int         flags;
1341     int         cont = TRUE;
1342
1343     *flagp = WORST;             /* Tentatively. */
1344
1345     while (cont)
1346     {
1347         switch (peekchr())
1348         {
1349             case NUL:
1350             case Magic('|'):
1351             case Magic('&'):
1352             case Magic(')'):
1353                             cont = FALSE;
1354                             break;
1355             case Magic('Z'):
1356 #ifdef FEAT_MBYTE
1357                             regflags |= RF_ICOMBINE;
1358 #endif
1359                             skipchr_keepstart();
1360                             break;
1361             case Magic('c'):
1362                             regflags |= RF_ICASE;
1363                             skipchr_keepstart();
1364                             break;
1365             case Magic('C'):
1366                             regflags |= RF_NOICASE;
1367                             skipchr_keepstart();
1368                             break;
1369             case Magic('v'):
1370                             reg_magic = MAGIC_ALL;
1371                             skipchr_keepstart();
1372                             curchr = -1;
1373                             break;
1374             case Magic('m'):
1375                             reg_magic = MAGIC_ON;
1376                             skipchr_keepstart();
1377                             curchr = -1;
1378                             break;
1379             case Magic('M'):
1380                             reg_magic = MAGIC_OFF;
1381                             skipchr_keepstart();
1382                             curchr = -1;
1383                             break;
1384             case Magic('V'):
1385                             reg_magic = MAGIC_NONE;
1386                             skipchr_keepstart();
1387                             curchr = -1;
1388                             break;
1389             default:
1390                             latest = regpiece(&flags);
1391                             if (latest == NULL || reg_toolong)
1392                                 return NULL;
1393                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1394                             if (chain == NULL)  /* First piece. */
1395                                 *flagp |= flags & SPSTART;
1396                             else
1397                                 regtail(chain, latest);
1398                             chain = latest;
1399                             if (first == NULL)
1400                                 first = latest;
1401                             break;
1402         }
1403     }
1404     if (first == NULL)          /* Loop ran zero times. */
1405         first = regnode(NOTHING);
1406     return first;
1407 }
1408
1409 /*
1410  * regpiece - something followed by possible [*+=]
1411  *
1412  * Note that the branching code sequences used for = and the general cases
1413  * of * and + are somewhat optimized:  they use the same NOTHING node as
1414  * both the endmarker for their branch list and the body of the last branch.
1415  * It might seem that this node could be dispensed with entirely, but the
1416  * endmarker role is not redundant.
1417  */
1418     static char_u *
1419 regpiece(flagp)
1420     int             *flagp;
1421 {
1422     char_u          *ret;
1423     int             op;
1424     char_u          *next;
1425     int             flags;
1426     long            minval;
1427     long            maxval;
1428
1429     ret = regatom(&flags);
1430     if (ret == NULL)
1431         return NULL;
1432
1433     op = peekchr();
1434     if (re_multi_type(op) == NOT_MULTI)
1435     {
1436         *flagp = flags;
1437         return ret;
1438     }
1439     /* default flags */
1440     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1441
1442     skipchr();
1443     switch (op)
1444     {
1445         case Magic('*'):
1446             if (flags & SIMPLE)
1447                 reginsert(STAR, ret);
1448             else
1449             {
1450                 /* Emit x* as (x&|), where & means "self". */
1451                 reginsert(BRANCH, ret); /* Either x */
1452                 regoptail(ret, regnode(BACK));  /* and loop */
1453                 regoptail(ret, ret);    /* back */
1454                 regtail(ret, regnode(BRANCH));  /* or */
1455                 regtail(ret, regnode(NOTHING)); /* null. */
1456             }
1457             break;
1458
1459         case Magic('+'):
1460             if (flags & SIMPLE)
1461                 reginsert(PLUS, ret);
1462             else
1463             {
1464                 /* Emit x+ as x(&|), where & means "self". */
1465                 next = regnode(BRANCH); /* Either */
1466                 regtail(ret, next);
1467                 regtail(regnode(BACK), ret);    /* loop back */
1468                 regtail(next, regnode(BRANCH)); /* or */
1469                 regtail(ret, regnode(NOTHING)); /* null. */
1470             }
1471             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1472             break;
1473
1474         case Magic('@'):
1475             {
1476                 int     lop = END;
1477
1478                 switch (no_Magic(getchr()))
1479                 {
1480                     case '=': lop = MATCH; break;                 /* \@= */
1481                     case '!': lop = NOMATCH; break;               /* \@! */
1482                     case '>': lop = SUBPAT; break;                /* \@> */
1483                     case '<': switch (no_Magic(getchr()))
1484                               {
1485                                   case '=': lop = BEHIND; break;   /* \@<= */
1486                                   case '!': lop = NOBEHIND; break; /* \@<! */
1487                               }
1488                 }
1489                 if (lop == END)
1490                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1491                                                       reg_magic == MAGIC_ALL);
1492                 /* Look behind must match with behind_pos. */
1493                 if (lop == BEHIND || lop == NOBEHIND)
1494                 {
1495                     regtail(ret, regnode(BHPOS));
1496                     *flagp |= HASLOOKBH;
1497                 }
1498                 regtail(ret, regnode(END)); /* operand ends */
1499                 reginsert(lop, ret);
1500                 break;
1501             }
1502
1503         case Magic('?'):
1504         case Magic('='):
1505             /* Emit x= as (x|) */
1506             reginsert(BRANCH, ret);             /* Either x */
1507             regtail(ret, regnode(BRANCH));      /* or */
1508             next = regnode(NOTHING);            /* null. */
1509             regtail(ret, next);
1510             regoptail(ret, next);
1511             break;
1512
1513         case Magic('{'):
1514             if (!read_limits(&minval, &maxval))
1515                 return NULL;
1516             if (flags & SIMPLE)
1517             {
1518                 reginsert(BRACE_SIMPLE, ret);
1519                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1520             }
1521             else
1522             {
1523                 if (num_complex_braces >= 10)
1524                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1525                                                       reg_magic == MAGIC_ALL);
1526                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1527                 regoptail(ret, regnode(BACK));
1528                 regoptail(ret, ret);
1529                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1530                 ++num_complex_braces;
1531             }
1532             if (minval > 0 && maxval > 0)
1533                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1534             break;
1535     }
1536     if (re_multi_type(peekchr()) != NOT_MULTI)
1537     {
1538         /* Can't have a multi follow a multi. */
1539         if (peekchr() == Magic('*'))
1540             sprintf((char *)IObuff, _("E61: Nested %s*"),
1541                                             reg_magic >= MAGIC_ON ? "" : "\\");
1542         else
1543             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1544                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1545         EMSG_RET_NULL(IObuff);
1546     }
1547
1548     return ret;
1549 }
1550
1551 /*
1552  * regatom - the lowest level
1553  *
1554  * Optimization:  gobbles an entire sequence of ordinary characters so that
1555  * it can turn them into a single node, which is smaller to store and
1556  * faster to run.  Don't do this when one_exactly is set.
1557  */
1558     static char_u *
1559 regatom(flagp)
1560     int            *flagp;
1561 {
1562     char_u          *ret;
1563     int             flags;
1564     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1565     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1566     int             c;
1567     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1568     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1569                                     FNAME, SFNAME, PRINT, SPRINT,
1570                                     WHITE, NWHITE, DIGIT, NDIGIT,
1571                                     HEX, NHEX, OCTAL, NOCTAL,
1572                                     WORD, NWORD, HEAD, NHEAD,
1573                                     ALPHA, NALPHA, LOWER, NLOWER,
1574                                     UPPER, NUPPER
1575                                     };
1576     char_u          *p;
1577     int             extra = 0;
1578
1579     *flagp = WORST;             /* Tentatively. */
1580     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1581     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1582
1583     c = getchr();
1584     switch (c)
1585     {
1586       case Magic('^'):
1587         ret = regnode(BOL);
1588         break;
1589
1590       case Magic('$'):
1591         ret = regnode(EOL);
1592 #if defined(FEAT_SYN_HL) || defined(PROTO)
1593         had_eol = TRUE;
1594 #endif
1595         break;
1596
1597       case Magic('<'):
1598         ret = regnode(BOW);
1599         break;
1600
1601       case Magic('>'):
1602         ret = regnode(EOW);
1603         break;
1604
1605       case Magic('_'):
1606         c = no_Magic(getchr());
1607         if (c == '^')           /* "\_^" is start-of-line */
1608         {
1609             ret = regnode(BOL);
1610             break;
1611         }
1612         if (c == '$')           /* "\_$" is end-of-line */
1613         {
1614             ret = regnode(EOL);
1615 #if defined(FEAT_SYN_HL) || defined(PROTO)
1616             had_eol = TRUE;
1617 #endif
1618             break;
1619         }
1620
1621         extra = ADD_NL;
1622         *flagp |= HASNL;
1623
1624         /* "\_[" is character range plus newline */
1625         if (c == '[')
1626             goto collection;
1627
1628         /* "\_x" is character class plus newline */
1629         /*FALLTHROUGH*/
1630
1631         /*
1632          * Character classes.
1633          */
1634       case Magic('.'):
1635       case Magic('i'):
1636       case Magic('I'):
1637       case Magic('k'):
1638       case Magic('K'):
1639       case Magic('f'):
1640       case Magic('F'):
1641       case Magic('p'):
1642       case Magic('P'):
1643       case Magic('s'):
1644       case Magic('S'):
1645       case Magic('d'):
1646       case Magic('D'):
1647       case Magic('x'):
1648       case Magic('X'):
1649       case Magic('o'):
1650       case Magic('O'):
1651       case Magic('w'):
1652       case Magic('W'):
1653       case Magic('h'):
1654       case Magic('H'):
1655       case Magic('a'):
1656       case Magic('A'):
1657       case Magic('l'):
1658       case Magic('L'):
1659       case Magic('u'):
1660       case Magic('U'):
1661         p = vim_strchr(classchars, no_Magic(c));
1662         if (p == NULL)
1663             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1664 #ifdef FEAT_MBYTE
1665         /* When '.' is followed by a composing char ignore the dot, so that
1666          * the composing char is matched here. */
1667         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1668         {
1669             c = getchr();
1670             goto do_multibyte;
1671         }
1672 #endif
1673         ret = regnode(classcodes[p - classchars] + extra);
1674         *flagp |= HASWIDTH | SIMPLE;
1675         break;
1676
1677       case Magic('n'):
1678         if (reg_string)
1679         {
1680             /* In a string "\n" matches a newline character. */
1681             ret = regnode(EXACTLY);
1682             regc(NL);
1683             regc(NUL);
1684             *flagp |= HASWIDTH | SIMPLE;
1685         }
1686         else
1687         {
1688             /* In buffer text "\n" matches the end of a line. */
1689             ret = regnode(NEWL);
1690             *flagp |= HASWIDTH | HASNL;
1691         }
1692         break;
1693
1694       case Magic('('):
1695         if (one_exactly)
1696             EMSG_ONE_RET_NULL;
1697         ret = reg(REG_PAREN, &flags);
1698         if (ret == NULL)
1699             return NULL;
1700         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1701         break;
1702
1703       case NUL:
1704       case Magic('|'):
1705       case Magic('&'):
1706       case Magic(')'):
1707         if (one_exactly)
1708             EMSG_ONE_RET_NULL;
1709         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1710         /* NOTREACHED */
1711
1712       case Magic('='):
1713       case Magic('?'):
1714       case Magic('+'):
1715       case Magic('@'):
1716       case Magic('{'):
1717       case Magic('*'):
1718         c = no_Magic(c);
1719         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1720                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1721                 ? "" : "\\", c);
1722         EMSG_RET_NULL(IObuff);
1723         /* NOTREACHED */
1724
1725       case Magic('~'):          /* previous substitute pattern */
1726             if (reg_prev_sub != NULL)
1727             {
1728                 char_u      *lp;
1729
1730                 ret = regnode(EXACTLY);
1731                 lp = reg_prev_sub;
1732                 while (*lp != NUL)
1733                     regc(*lp++);
1734                 regc(NUL);
1735                 if (*reg_prev_sub != NUL)
1736                 {
1737                     *flagp |= HASWIDTH;
1738                     if ((lp - reg_prev_sub) == 1)
1739                         *flagp |= SIMPLE;
1740                 }
1741             }
1742             else
1743                 EMSG_RET_NULL(_(e_nopresub));
1744             break;
1745
1746       case Magic('1'):
1747       case Magic('2'):
1748       case Magic('3'):
1749       case Magic('4'):
1750       case Magic('5'):
1751       case Magic('6'):
1752       case Magic('7'):
1753       case Magic('8'):
1754       case Magic('9'):
1755             {
1756                 int                 refnum;
1757
1758                 refnum = c - Magic('0');
1759                 /*
1760                  * Check if the back reference is legal. We must have seen the
1761                  * close brace.
1762                  * TODO: Should also check that we don't refer to something
1763                  * that is repeated (+*=): what instance of the repetition
1764                  * should we match?
1765                  */
1766                 if (!had_endbrace[refnum])
1767                 {
1768                     /* Trick: check if "@<=" or "@<!" follows, in which case
1769                      * the \1 can appear before the referenced match. */
1770                     for (p = regparse; *p != NUL; ++p)
1771                         if (p[0] == '@' && p[1] == '<'
1772                                               && (p[2] == '!' || p[2] == '='))
1773                             break;
1774                     if (*p == NUL)
1775                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1776                 }
1777                 ret = regnode(BACKREF + refnum);
1778             }
1779             break;
1780
1781       case Magic('z'):
1782         {
1783             c = no_Magic(getchr());
1784             switch (c)
1785             {
1786 #ifdef FEAT_SYN_HL
1787                 case '(': if (reg_do_extmatch != REX_SET)
1788                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1789                           if (one_exactly)
1790                               EMSG_ONE_RET_NULL;
1791                           ret = reg(REG_ZPAREN, &flags);
1792                           if (ret == NULL)
1793                               return NULL;
1794                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1795                           re_has_z = REX_SET;
1796                           break;
1797
1798                 case '1':
1799                 case '2':
1800                 case '3':
1801                 case '4':
1802                 case '5':
1803                 case '6':
1804                 case '7':
1805                 case '8':
1806                 case '9': if (reg_do_extmatch != REX_USE)
1807                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1808                           ret = regnode(ZREF + c - '0');
1809                           re_has_z = REX_USE;
1810                           break;
1811 #endif
1812
1813                 case 's': ret = regnode(MOPEN + 0);
1814                           break;
1815
1816                 case 'e': ret = regnode(MCLOSE + 0);
1817                           break;
1818
1819                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1820             }
1821         }
1822         break;
1823
1824       case Magic('%'):
1825         {
1826             c = no_Magic(getchr());
1827             switch (c)
1828             {
1829                 /* () without a back reference */
1830                 case '(':
1831                     if (one_exactly)
1832                         EMSG_ONE_RET_NULL;
1833                     ret = reg(REG_NPAREN, &flags);
1834                     if (ret == NULL)
1835                         return NULL;
1836                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1837                     break;
1838
1839                 /* Catch \%^ and \%$ regardless of where they appear in the
1840                  * pattern -- regardless of whether or not it makes sense. */
1841                 case '^':
1842                     ret = regnode(RE_BOF);
1843                     break;
1844
1845                 case '$':
1846                     ret = regnode(RE_EOF);
1847                     break;
1848
1849                 case '#':
1850                     ret = regnode(CURSOR);
1851                     break;
1852
1853                 case 'V':
1854                     ret = regnode(RE_VISUAL);
1855                     break;
1856
1857                 /* \%[abc]: Emit as a list of branches, all ending at the last
1858                  * branch which matches nothing. */
1859                 case '[':
1860                           if (one_exactly)      /* doesn't nest */
1861                               EMSG_ONE_RET_NULL;
1862                           {
1863                               char_u    *lastbranch;
1864                               char_u    *lastnode = NULL;
1865                               char_u    *br;
1866
1867                               ret = NULL;
1868                               while ((c = getchr()) != ']')
1869                               {
1870                                   if (c == NUL)
1871                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1872                                                       reg_magic == MAGIC_ALL);
1873                                   br = regnode(BRANCH);
1874                                   if (ret == NULL)
1875                                       ret = br;
1876                                   else
1877                                       regtail(lastnode, br);
1878
1879                                   ungetchr();
1880                                   one_exactly = TRUE;
1881                                   lastnode = regatom(flagp);
1882                                   one_exactly = FALSE;
1883                                   if (lastnode == NULL)
1884                                       return NULL;
1885                               }
1886                               if (ret == NULL)
1887                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1888                                                       reg_magic == MAGIC_ALL);
1889                               lastbranch = regnode(BRANCH);
1890                               br = regnode(NOTHING);
1891                               if (ret != JUST_CALC_SIZE)
1892                               {
1893                                   regtail(lastnode, br);
1894                                   regtail(lastbranch, br);
1895                                   /* connect all branches to the NOTHING
1896                                    * branch at the end */
1897                                   for (br = ret; br != lastnode; )
1898                                   {
1899                                       if (OP(br) == BRANCH)
1900                                       {
1901                                           regtail(br, lastbranch);
1902                                           br = OPERAND(br);
1903                                       }
1904                                       else
1905                                           br = regnext(br);
1906                                   }
1907                               }
1908                               *flagp &= ~(HASWIDTH | SIMPLE);
1909                               break;
1910                           }
1911
1912                 case 'd':   /* %d123 decimal */
1913                 case 'o':   /* %o123 octal */
1914                 case 'x':   /* %xab hex 2 */
1915                 case 'u':   /* %uabcd hex 4 */
1916                 case 'U':   /* %U1234abcd hex 8 */
1917                           {
1918                               int i;
1919
1920                               switch (c)
1921                               {
1922                                   case 'd': i = getdecchrs(); break;
1923                                   case 'o': i = getoctchrs(); break;
1924                                   case 'x': i = gethexchrs(2); break;
1925                                   case 'u': i = gethexchrs(4); break;
1926                                   case 'U': i = gethexchrs(8); break;
1927                                   default:  i = -1; break;
1928                               }
1929
1930                               if (i < 0)
1931                                   EMSG_M_RET_NULL(
1932                                         _("E678: Invalid character after %s%%[dxouU]"),
1933                                         reg_magic == MAGIC_ALL);
1934 #ifdef FEAT_MBYTE
1935                               if (use_multibytecode(i))
1936                                   ret = regnode(MULTIBYTECODE);
1937                               else
1938 #endif
1939                                   ret = regnode(EXACTLY);
1940                               if (i == 0)
1941                                   regc(0x0a);
1942                               else
1943 #ifdef FEAT_MBYTE
1944                                   regmbc(i);
1945 #else
1946                                   regc(i);
1947 #endif
1948                               regc(NUL);
1949                               *flagp |= HASWIDTH;
1950                               break;
1951                           }
1952
1953                 default:
1954                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1955                                                                  || c == '\'')
1956                           {
1957                               long_u    n = 0;
1958                               int       cmp;
1959
1960                               cmp = c;
1961                               if (cmp == '<' || cmp == '>')
1962                                   c = getchr();
1963                               while (VIM_ISDIGIT(c))
1964                               {
1965                                   n = n * 10 + (c - '0');
1966                                   c = getchr();
1967                               }
1968                               if (c == '\'' && n == 0)
1969                               {
1970                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1971                                   c = getchr();
1972                                   ret = regnode(RE_MARK);
1973                                   if (ret == JUST_CALC_SIZE)
1974                                       regsize += 2;
1975                                   else
1976                                   {
1977                                       *regcode++ = c;
1978                                       *regcode++ = cmp;
1979                                   }
1980                                   break;
1981                               }
1982                               else if (c == 'l' || c == 'c' || c == 'v')
1983                               {
1984                                   if (c == 'l')
1985                                       ret = regnode(RE_LNUM);
1986                                   else if (c == 'c')
1987                                       ret = regnode(RE_COL);
1988                                   else
1989                                       ret = regnode(RE_VCOL);
1990                                   if (ret == JUST_CALC_SIZE)
1991                                       regsize += 5;
1992                                   else
1993                                   {
1994                                       /* put the number and the optional
1995                                        * comparator after the opcode */
1996                                       regcode = re_put_long(regcode, n);
1997                                       *regcode++ = cmp;
1998                                   }
1999                                   break;
2000                               }
2001                           }
2002
2003                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
2004                                                       reg_magic == MAGIC_ALL);
2005             }
2006         }
2007         break;
2008
2009       case Magic('['):
2010 collection:
2011         {
2012             char_u      *lp;
2013
2014             /*
2015              * If there is no matching ']', we assume the '[' is a normal
2016              * character.  This makes 'incsearch' and ":help [" work.
2017              */
2018             lp = skip_anyof(regparse);
2019             if (*lp == ']')     /* there is a matching ']' */
2020             {
2021                 int     startc = -1;    /* > 0 when next '-' is a range */
2022                 int     endc;
2023
2024                 /*
2025                  * In a character class, different parsing rules apply.
2026                  * Not even \ is special anymore, nothing is.
2027                  */
2028                 if (*regparse == '^')       /* Complement of range. */
2029                 {
2030                     ret = regnode(ANYBUT + extra);
2031                     regparse++;
2032                 }
2033                 else
2034                     ret = regnode(ANYOF + extra);
2035
2036                 /* At the start ']' and '-' mean the literal character. */
2037                 if (*regparse == ']' || *regparse == '-')
2038                 {
2039                     startc = *regparse;
2040                     regc(*regparse++);
2041                 }
2042
2043                 while (*regparse != NUL && *regparse != ']')
2044                 {
2045                     if (*regparse == '-')
2046                     {
2047                         ++regparse;
2048                         /* The '-' is not used for a range at the end and
2049                          * after or before a '\n'. */
2050                         if (*regparse == ']' || *regparse == NUL
2051                                 || startc == -1
2052                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2053                         {
2054                             regc('-');
2055                             startc = '-';       /* [--x] is a range */
2056                         }
2057                         else
2058                         {
2059                             /* Also accept "a-[.z.]" */
2060                             endc = 0;
2061                             if (*regparse == '[')
2062                                 endc = get_coll_element(&regparse);
2063                             if (endc == 0)
2064                             {
2065 #ifdef FEAT_MBYTE
2066                                 if (has_mbyte)
2067                                     endc = mb_ptr2char_adv(&regparse);
2068                                 else
2069 #endif
2070                                     endc = *regparse++;
2071                             }
2072
2073                             /* Handle \o40, \x20 and \u20AC style sequences */
2074                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2075                                 endc = coll_get_char();
2076
2077                             if (startc > endc)
2078                                 EMSG_RET_NULL(_(e_invrange));
2079 #ifdef FEAT_MBYTE
2080                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2081                                                  || (*mb_char2len)(endc) > 1))
2082                             {
2083                                 /* Limit to a range of 256 chars */
2084                                 if (endc > startc + 256)
2085                                     EMSG_RET_NULL(_(e_invrange));
2086                                 while (++startc <= endc)
2087                                     regmbc(startc);
2088                             }
2089                             else
2090 #endif
2091                             {
2092 #ifdef EBCDIC
2093                                 int     alpha_only = FALSE;
2094
2095                                 /* for alphabetical range skip the gaps
2096                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2097                                 if (isalpha(startc) && isalpha(endc))
2098                                     alpha_only = TRUE;
2099 #endif
2100                                 while (++startc <= endc)
2101 #ifdef EBCDIC
2102                                     if (!alpha_only || isalpha(startc))
2103 #endif
2104                                         regc(startc);
2105                             }
2106                             startc = -1;
2107                         }
2108                     }
2109                     /*
2110                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2111                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2112                      * 'cpoptions' is not included.
2113                      * Posix doesn't recognize backslash at all.
2114                      */
2115                     else if (*regparse == '\\'
2116                             && !cpo_bsl
2117                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2118                                 || (!cpo_lit
2119                                     && vim_strchr(REGEXP_ABBR,
2120                                                        regparse[1]) != NULL)))
2121                     {
2122                         regparse++;
2123                         if (*regparse == 'n')
2124                         {
2125                             /* '\n' in range: also match NL */
2126                             if (ret != JUST_CALC_SIZE)
2127                             {
2128                                 if (*ret == ANYBUT)
2129                                     *ret = ANYBUT + ADD_NL;
2130                                 else if (*ret == ANYOF)
2131                                     *ret = ANYOF + ADD_NL;
2132                                 /* else: must have had a \n already */
2133                             }
2134                             *flagp |= HASNL;
2135                             regparse++;
2136                             startc = -1;
2137                         }
2138                         else if (*regparse == 'd'
2139                                 || *regparse == 'o'
2140                                 || *regparse == 'x'
2141                                 || *regparse == 'u'
2142                                 || *regparse == 'U')
2143                         {
2144                             startc = coll_get_char();
2145                             if (startc == 0)
2146                                 regc(0x0a);
2147                             else
2148 #ifdef FEAT_MBYTE
2149                                 regmbc(startc);
2150 #else
2151                                 regc(startc);
2152 #endif
2153                         }
2154                         else
2155                         {
2156                             startc = backslash_trans(*regparse++);
2157                             regc(startc);
2158                         }
2159                     }
2160                     else if (*regparse == '[')
2161                     {
2162                         int c_class;
2163                         int cu;
2164
2165                         c_class = get_char_class(&regparse);
2166                         startc = -1;
2167                         /* Characters assumed to be 8 bits! */
2168                         switch (c_class)
2169                         {
2170                             case CLASS_NONE:
2171                                 c_class = get_equi_class(&regparse);
2172                                 if (c_class != 0)
2173                                 {
2174                                     /* produce equivalence class */
2175                                     reg_equi_class(c_class);
2176                                 }
2177                                 else if ((c_class =
2178                                             get_coll_element(&regparse)) != 0)
2179                                 {
2180                                     /* produce a collating element */
2181                                     regmbc(c_class);
2182                                 }
2183                                 else
2184                                 {
2185                                     /* literal '[', allow [[-x] as a range */
2186                                     startc = *regparse++;
2187                                     regc(startc);
2188                                 }
2189                                 break;
2190                             case CLASS_ALNUM:
2191                                 for (cu = 1; cu <= 255; cu++)
2192                                     if (isalnum(cu))
2193                                         regc(cu);
2194                                 break;
2195                             case CLASS_ALPHA:
2196                                 for (cu = 1; cu <= 255; cu++)
2197                                     if (isalpha(cu))
2198                                         regc(cu);
2199                                 break;
2200                             case CLASS_BLANK:
2201                                 regc(' ');
2202                                 regc('\t');
2203                                 break;
2204                             case CLASS_CNTRL:
2205                                 for (cu = 1; cu <= 255; cu++)
2206                                     if (iscntrl(cu))
2207                                         regc(cu);
2208                                 break;
2209                             case CLASS_DIGIT:
2210                                 for (cu = 1; cu <= 255; cu++)
2211                                     if (VIM_ISDIGIT(cu))
2212                                         regc(cu);
2213                                 break;
2214                             case CLASS_GRAPH:
2215                                 for (cu = 1; cu <= 255; cu++)
2216                                     if (isgraph(cu))
2217                                         regc(cu);
2218                                 break;
2219                             case CLASS_LOWER:
2220                                 for (cu = 1; cu <= 255; cu++)
2221                                     if (MB_ISLOWER(cu))
2222                                         regc(cu);
2223                                 break;
2224                             case CLASS_PRINT:
2225                                 for (cu = 1; cu <= 255; cu++)
2226                                     if (vim_isprintc(cu))
2227                                         regc(cu);
2228                                 break;
2229                             case CLASS_PUNCT:
2230                                 for (cu = 1; cu <= 255; cu++)
2231                                     if (ispunct(cu))
2232                                         regc(cu);
2233                                 break;
2234                             case CLASS_SPACE:
2235                                 for (cu = 9; cu <= 13; cu++)
2236                                     regc(cu);
2237                                 regc(' ');
2238                                 break;
2239                             case CLASS_UPPER:
2240                                 for (cu = 1; cu <= 255; cu++)
2241                                     if (MB_ISUPPER(cu))
2242                                         regc(cu);
2243                                 break;
2244                             case CLASS_XDIGIT:
2245                                 for (cu = 1; cu <= 255; cu++)
2246                                     if (vim_isxdigit(cu))
2247                                         regc(cu);
2248                                 break;
2249                             case CLASS_TAB:
2250                                 regc('\t');
2251                                 break;
2252                             case CLASS_RETURN:
2253                                 regc('\r');
2254                                 break;
2255                             case CLASS_BACKSPACE:
2256                                 regc('\b');
2257                                 break;
2258                             case CLASS_ESCAPE:
2259                                 regc('\033');
2260                                 break;
2261                         }
2262                     }
2263                     else
2264                     {
2265 #ifdef FEAT_MBYTE
2266                         if (has_mbyte)
2267                         {
2268                             int len;
2269
2270                             /* produce a multibyte character, including any
2271                              * following composing characters */
2272                             startc = mb_ptr2char(regparse);
2273                             len = (*mb_ptr2len)(regparse);
2274                             if (enc_utf8 && utf_char2len(startc) != len)
2275                                 startc = -1;    /* composing chars */
2276                             while (--len >= 0)
2277                                 regc(*regparse++);
2278                         }
2279                         else
2280 #endif
2281                         {
2282                             startc = *regparse++;
2283                             regc(startc);
2284                         }
2285                     }
2286                 }
2287                 regc(NUL);
2288                 prevchr_len = 1;        /* last char was the ']' */
2289                 if (*regparse != ']')
2290                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2291                 skipchr();          /* let's be friends with the lexer again */
2292                 *flagp |= HASWIDTH | SIMPLE;
2293                 break;
2294             }
2295             else if (reg_strict)
2296                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2297                                                        reg_magic > MAGIC_OFF);
2298         }
2299         /* FALLTHROUGH */
2300
2301       default:
2302         {
2303             int         len;
2304
2305 #ifdef FEAT_MBYTE
2306             /* A multi-byte character is handled as a separate atom if it's
2307              * before a multi and when it's a composing char. */
2308             if (use_multibytecode(c))
2309             {
2310 do_multibyte:
2311                 ret = regnode(MULTIBYTECODE);
2312                 regmbc(c);
2313                 *flagp |= HASWIDTH | SIMPLE;
2314                 break;
2315             }
2316 #endif
2317
2318             ret = regnode(EXACTLY);
2319
2320             /*
2321              * Append characters as long as:
2322              * - there is no following multi, we then need the character in
2323              *   front of it as a single character operand
2324              * - not running into a Magic character
2325              * - "one_exactly" is not set
2326              * But always emit at least one character.  Might be a Multi,
2327              * e.g., a "[" without matching "]".
2328              */
2329             for (len = 0; c != NUL && (len == 0
2330                         || (re_multi_type(peekchr()) == NOT_MULTI
2331                             && !one_exactly
2332                             && !is_Magic(c))); ++len)
2333             {
2334                 c = no_Magic(c);
2335 #ifdef FEAT_MBYTE
2336                 if (has_mbyte)
2337                 {
2338                     regmbc(c);
2339                     if (enc_utf8)
2340                     {
2341                         int     l;
2342
2343                         /* Need to get composing character too. */
2344                         for (;;)
2345                         {
2346                             l = utf_ptr2len(regparse);
2347                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2348                                 break;
2349                             regmbc(utf_ptr2char(regparse));
2350                             skipchr();
2351                         }
2352                     }
2353                 }
2354                 else
2355 #endif
2356                     regc(c);
2357                 c = getchr();
2358             }
2359             ungetchr();
2360
2361             regc(NUL);
2362             *flagp |= HASWIDTH;
2363             if (len == 1)
2364                 *flagp |= SIMPLE;
2365         }
2366         break;
2367     }
2368
2369     return ret;
2370 }
2371
2372 #ifdef FEAT_MBYTE
2373 /*
2374  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2375  * character "c".
2376  */
2377     static int
2378 use_multibytecode(c)
2379     int c;
2380 {
2381     return has_mbyte && (*mb_char2len)(c) > 1
2382                      && (re_multi_type(peekchr()) != NOT_MULTI
2383                              || (enc_utf8 && utf_iscomposing(c)));
2384 }
2385 #endif
2386
2387 /*
2388  * emit a node
2389  * Return pointer to generated code.
2390  */
2391     static char_u *
2392 regnode(op)
2393     int         op;
2394 {
2395     char_u  *ret;
2396
2397     ret = regcode;
2398     if (ret == JUST_CALC_SIZE)
2399         regsize += 3;
2400     else
2401     {
2402         *regcode++ = op;
2403         *regcode++ = NUL;               /* Null "next" pointer. */
2404         *regcode++ = NUL;
2405     }
2406     return ret;
2407 }
2408
2409 /*
2410  * Emit (if appropriate) a byte of code
2411  */
2412     static void
2413 regc(b)
2414     int         b;
2415 {
2416     if (regcode == JUST_CALC_SIZE)
2417         regsize++;
2418     else
2419         *regcode++ = b;
2420 }
2421
2422 #ifdef FEAT_MBYTE
2423 /*
2424  * Emit (if appropriate) a multi-byte character of code
2425  */
2426     static void
2427 regmbc(c)
2428     int         c;
2429 {
2430     if (regcode == JUST_CALC_SIZE)
2431         regsize += (*mb_char2len)(c);
2432     else
2433         regcode += (*mb_char2bytes)(c, regcode);
2434 }
2435 #endif
2436
2437 /*
2438  * reginsert - insert an operator in front of already-emitted operand
2439  *
2440  * Means relocating the operand.
2441  */
2442     static void
2443 reginsert(op, opnd)
2444     int         op;
2445     char_u     *opnd;
2446 {
2447     char_u      *src;
2448     char_u      *dst;
2449     char_u      *place;
2450
2451     if (regcode == JUST_CALC_SIZE)
2452     {
2453         regsize += 3;
2454         return;
2455     }
2456     src = regcode;
2457     regcode += 3;
2458     dst = regcode;
2459     while (src > opnd)
2460         *--dst = *--src;
2461
2462     place = opnd;               /* Op node, where operand used to be. */
2463     *place++ = op;
2464     *place++ = NUL;
2465     *place = NUL;
2466 }
2467
2468 /*
2469  * reginsert_limits - insert an operator in front of already-emitted operand.
2470  * The operator has the given limit values as operands.  Also set next pointer.
2471  *
2472  * Means relocating the operand.
2473  */
2474     static void
2475 reginsert_limits(op, minval, maxval, opnd)
2476     int         op;
2477     long        minval;
2478     long        maxval;
2479     char_u      *opnd;
2480 {
2481     char_u      *src;
2482     char_u      *dst;
2483     char_u      *place;
2484
2485     if (regcode == JUST_CALC_SIZE)
2486     {
2487         regsize += 11;
2488         return;
2489     }
2490     src = regcode;
2491     regcode += 11;
2492     dst = regcode;
2493     while (src > opnd)
2494         *--dst = *--src;
2495
2496     place = opnd;               /* Op node, where operand used to be. */
2497     *place++ = op;
2498     *place++ = NUL;
2499     *place++ = NUL;
2500     place = re_put_long(place, (long_u)minval);
2501     place = re_put_long(place, (long_u)maxval);
2502     regtail(opnd, place);
2503 }
2504
2505 /*
2506  * Write a long as four bytes at "p" and return pointer to the next char.
2507  */
2508     static char_u *
2509 re_put_long(p, val)
2510     char_u      *p;
2511     long_u      val;
2512 {
2513     *p++ = (char_u) ((val >> 24) & 0377);
2514     *p++ = (char_u) ((val >> 16) & 0377);
2515     *p++ = (char_u) ((val >> 8) & 0377);
2516     *p++ = (char_u) (val & 0377);
2517     return p;
2518 }
2519
2520 /*
2521  * regtail - set the next-pointer at the end of a node chain
2522  */
2523     static void
2524 regtail(p, val)
2525     char_u      *p;
2526     char_u      *val;
2527 {
2528     char_u      *scan;
2529     char_u      *temp;
2530     int         offset;
2531
2532     if (p == JUST_CALC_SIZE)
2533         return;
2534
2535     /* Find last node. */
2536     scan = p;
2537     for (;;)
2538     {
2539         temp = regnext(scan);
2540         if (temp == NULL)
2541             break;
2542         scan = temp;
2543     }
2544
2545     if (OP(scan) == BACK)
2546         offset = (int)(scan - val);
2547     else
2548         offset = (int)(val - scan);
2549     /* When the offset uses more than 16 bits it can no longer fit in the two
2550      * bytes avaliable.  Use a global flag to avoid having to check return
2551      * values in too many places. */
2552     if (offset > 0xffff)
2553         reg_toolong = TRUE;
2554     else
2555     {
2556         *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2557         *(scan + 2) = (char_u) (offset & 0377);
2558     }
2559 }
2560
2561 /*
2562  * regoptail - regtail on item after a BRANCH; nop if none
2563  */
2564     static void
2565 regoptail(p, val)
2566     char_u      *p;
2567     char_u      *val;
2568 {
2569     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2570     if (p == NULL || p == JUST_CALC_SIZE
2571             || (OP(p) != BRANCH
2572                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2573         return;
2574     regtail(OPERAND(p), val);
2575 }
2576
2577 /*
2578  * getchr() - get the next character from the pattern. We know about
2579  * magic and such, so therefore we need a lexical analyzer.
2580  */
2581
2582 /* static int       curchr; */
2583 static int      prevprevchr;
2584 static int      prevchr;
2585 static int      nextchr;    /* used for ungetchr() */
2586 /*
2587  * Note: prevchr is sometimes -1 when we are not at the start,
2588  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2589  * taken to be magic -- webb
2590  */
2591 static int      at_start;       /* True when on the first character */
2592 static int      prev_at_start;  /* True when on the second character */
2593
2594     static void
2595 initchr(str)
2596     char_u *str;
2597 {
2598     regparse = str;
2599     prevchr_len = 0;
2600     curchr = prevprevchr = prevchr = nextchr = -1;
2601     at_start = TRUE;
2602     prev_at_start = FALSE;
2603 }
2604
2605     static int
2606 peekchr()
2607 {
2608     static int  after_slash = FALSE;
2609
2610     if (curchr == -1)
2611     {
2612         switch (curchr = regparse[0])
2613         {
2614         case '.':
2615         case '[':
2616         case '~':
2617             /* magic when 'magic' is on */
2618             if (reg_magic >= MAGIC_ON)
2619                 curchr = Magic(curchr);
2620             break;
2621         case '(':
2622         case ')':
2623         case '{':
2624         case '%':
2625         case '+':
2626         case '=':
2627         case '?':
2628         case '@':
2629         case '!':
2630         case '&':
2631         case '|':
2632         case '<':
2633         case '>':
2634         case '#':       /* future ext. */
2635         case '"':       /* future ext. */
2636         case '\'':      /* future ext. */
2637         case ',':       /* future ext. */
2638         case '-':       /* future ext. */
2639         case ':':       /* future ext. */
2640         case ';':       /* future ext. */
2641         case '`':       /* future ext. */
2642         case '/':       /* Can't be used in / command */
2643             /* magic only after "\v" */
2644             if (reg_magic == MAGIC_ALL)
2645                 curchr = Magic(curchr);
2646             break;
2647         case '*':
2648             /* * is not magic as the very first character, eg "?*ptr", when
2649              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2650              * "\(\*" is not magic, thus must be magic if "after_slash" */
2651             if (reg_magic >= MAGIC_ON
2652                     && !at_start
2653                     && !(prev_at_start && prevchr == Magic('^'))
2654                     && (after_slash
2655                         || (prevchr != Magic('(')
2656                             && prevchr != Magic('&')
2657                             && prevchr != Magic('|'))))
2658                 curchr = Magic('*');
2659             break;
2660         case '^':
2661             /* '^' is only magic as the very first character and if it's after
2662              * "\(", "\|", "\&' or "\n" */
2663             if (reg_magic >= MAGIC_OFF
2664                     && (at_start
2665                         || reg_magic == MAGIC_ALL
2666                         || prevchr == Magic('(')
2667                         || prevchr == Magic('|')
2668                         || prevchr == Magic('&')
2669                         || prevchr == Magic('n')
2670                         || (no_Magic(prevchr) == '('
2671                             && prevprevchr == Magic('%'))))
2672             {
2673                 curchr = Magic('^');
2674                 at_start = TRUE;
2675                 prev_at_start = FALSE;
2676             }
2677             break;
2678         case '$':
2679             /* '$' is only magic as the very last char and if it's in front of
2680              * either "\|", "\)", "\&", or "\n" */
2681             if (reg_magic >= MAGIC_OFF)
2682             {
2683                 char_u *p = regparse + 1;
2684
2685                 /* ignore \c \C \m and \M after '$' */
2686                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2687                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2688                     p += 2;
2689                 if (p[0] == NUL
2690                         || (p[0] == '\\'
2691                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2692                                 || p[1] == 'n'))
2693                         || reg_magic == MAGIC_ALL)
2694                     curchr = Magic('$');
2695             }
2696             break;
2697         case '\\':
2698             {
2699                 int c = regparse[1];
2700
2701                 if (c == NUL)
2702                     curchr = '\\';      /* trailing '\' */
2703                 else if (
2704 #ifdef EBCDIC
2705                         vim_strchr(META, c)
2706 #else
2707                         c <= '~' && META_flags[c]
2708 #endif
2709                         )
2710                 {
2711                     /*
2712                      * META contains everything that may be magic sometimes,
2713                      * except ^ and $ ("\^" and "\$" are only magic after
2714                      * "\v").  We now fetch the next character and toggle its
2715                      * magicness.  Therefore, \ is so meta-magic that it is
2716                      * not in META.
2717                      */
2718                     curchr = -1;
2719                     prev_at_start = at_start;
2720                     at_start = FALSE;   /* be able to say "/\*ptr" */
2721                     ++regparse;
2722                     ++after_slash;
2723                     peekchr();
2724                     --regparse;
2725                     --after_slash;
2726                     curchr = toggle_Magic(curchr);
2727                 }
2728                 else if (vim_strchr(REGEXP_ABBR, c))
2729                 {
2730                     /*
2731                      * Handle abbreviations, like "\t" for TAB -- webb
2732                      */
2733                     curchr = backslash_trans(c);
2734                 }
2735                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2736                     curchr = toggle_Magic(c);
2737                 else
2738                 {
2739                     /*
2740                      * Next character can never be (made) magic?
2741                      * Then backslashing it won't do anything.
2742                      */
2743 #ifdef FEAT_MBYTE
2744                     if (has_mbyte)
2745                         curchr = (*mb_ptr2char)(regparse + 1);
2746                     else
2747 #endif
2748                         curchr = c;
2749                 }
2750                 break;
2751             }
2752
2753 #ifdef FEAT_MBYTE
2754         default:
2755             if (has_mbyte)
2756                 curchr = (*mb_ptr2char)(regparse);
2757 #endif
2758         }
2759     }
2760
2761     return curchr;
2762 }
2763
2764 /*
2765  * Eat one lexed character.  Do this in a way that we can undo it.
2766  */
2767     static void
2768 skipchr()
2769 {
2770     /* peekchr() eats a backslash, do the same here */
2771     if (*regparse == '\\')
2772         prevchr_len = 1;
2773     else
2774         prevchr_len = 0;
2775     if (regparse[prevchr_len] != NUL)
2776     {
2777 #ifdef FEAT_MBYTE
2778         if (enc_utf8)
2779             /* exclude composing chars that mb_ptr2len does include */
2780             prevchr_len += utf_ptr2len(regparse + prevchr_len);
2781         else if (has_mbyte)
2782             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2783         else
2784 #endif
2785             ++prevchr_len;
2786     }
2787     regparse += prevchr_len;
2788     prev_at_start = at_start;
2789     at_start = FALSE;
2790     prevprevchr = prevchr;
2791     prevchr = curchr;
2792     curchr = nextchr;       /* use previously unget char, or -1 */
2793     nextchr = -1;
2794 }
2795
2796 /*
2797  * Skip a character while keeping the value of prev_at_start for at_start.
2798  * prevchr and prevprevchr are also kept.
2799  */
2800     static void
2801 skipchr_keepstart()
2802 {
2803     int as = prev_at_start;
2804     int pr = prevchr;
2805     int prpr = prevprevchr;
2806
2807     skipchr();
2808     at_start = as;
2809     prevchr = pr;
2810     prevprevchr = prpr;
2811 }
2812
2813     static int
2814 getchr()
2815 {
2816     int chr = peekchr();
2817
2818     skipchr();
2819     return chr;
2820 }
2821
2822 /*
2823  * put character back.  Works only once!
2824  */
2825     static void
2826 ungetchr()
2827 {
2828     nextchr = curchr;
2829     curchr = prevchr;
2830     prevchr = prevprevchr;
2831     at_start = prev_at_start;
2832     prev_at_start = FALSE;
2833
2834     /* Backup regparse, so that it's at the same position as before the
2835      * getchr(). */
2836     regparse -= prevchr_len;
2837 }
2838
2839 /*
2840  * Get and return the value of the hex string at the current position.
2841  * Return -1 if there is no valid hex number.
2842  * The position is updated:
2843  *     blahblah\%x20asdf
2844  *         before-^ ^-after
2845  * The parameter controls the maximum number of input characters. This will be
2846  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2847  */
2848     static int
2849 gethexchrs(maxinputlen)
2850     int         maxinputlen;
2851 {
2852     int         nr = 0;
2853     int         c;
2854     int         i;
2855
2856     for (i = 0; i < maxinputlen; ++i)
2857     {
2858         c = regparse[0];
2859         if (!vim_isxdigit(c))
2860             break;
2861         nr <<= 4;
2862         nr |= hex2nr(c);
2863         ++regparse;
2864     }
2865
2866     if (i == 0)
2867         return -1;
2868     return nr;
2869 }
2870
2871 /*
2872  * get and return the value of the decimal string immediately after the
2873  * current position. Return -1 for invalid.  Consumes all digits.
2874  */
2875     static int
2876 getdecchrs()
2877 {
2878     int         nr = 0;
2879     int         c;
2880     int         i;
2881
2882     for (i = 0; ; ++i)
2883     {
2884         c = regparse[0];
2885         if (c < '0' || c > '9')
2886             break;
2887         nr *= 10;
2888         nr += c - '0';
2889         ++regparse;
2890     }
2891
2892     if (i == 0)
2893         return -1;
2894     return nr;
2895 }
2896
2897 /*
2898  * get and return the value of the octal string immediately after the current
2899  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2900  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2901  * treat 8 or 9 as recognised characters. Position is updated:
2902  *     blahblah\%o210asdf
2903  *         before-^  ^-after
2904  */
2905     static int
2906 getoctchrs()
2907 {
2908     int         nr = 0;
2909     int         c;
2910     int         i;
2911
2912     for (i = 0; i < 3 && nr < 040; ++i)
2913     {
2914         c = regparse[0];
2915         if (c < '0' || c > '7')
2916             break;
2917         nr <<= 3;
2918         nr |= hex2nr(c);
2919         ++regparse;
2920     }
2921
2922     if (i == 0)
2923         return -1;
2924     return nr;
2925 }
2926
2927 /*
2928  * Get a number after a backslash that is inside [].
2929  * When nothing is recognized return a backslash.
2930  */
2931     static int
2932 coll_get_char()
2933 {
2934     int     nr = -1;
2935
2936     switch (*regparse++)
2937     {
2938         case 'd': nr = getdecchrs(); break;
2939         case 'o': nr = getoctchrs(); break;
2940         case 'x': nr = gethexchrs(2); break;
2941         case 'u': nr = gethexchrs(4); break;
2942         case 'U': nr = gethexchrs(8); break;
2943     }
2944     if (nr < 0)
2945     {
2946         /* If getting the number fails be backwards compatible: the character
2947          * is a backslash. */
2948         --regparse;
2949         nr = '\\';
2950     }
2951     return nr;
2952 }
2953
2954 /*
2955  * read_limits - Read two integers to be taken as a minimum and maximum.
2956  * If the first character is '-', then the range is reversed.
2957  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2958  * missing, a very big number is the default.
2959  */
2960     static int
2961 read_limits(minval, maxval)
2962     long        *minval;
2963     long        *maxval;
2964 {
2965     int         reverse = FALSE;
2966     char_u      *first_char;
2967     long        tmp;
2968
2969     if (*regparse == '-')
2970     {
2971         /* Starts with '-', so reverse the range later */
2972         regparse++;
2973         reverse = TRUE;
2974     }
2975     first_char = regparse;
2976     *minval = getdigits(&regparse);
2977     if (*regparse == ',')           /* There is a comma */
2978     {
2979         if (vim_isdigit(*++regparse))
2980             *maxval = getdigits(&regparse);
2981         else
2982             *maxval = MAX_LIMIT;
2983     }
2984     else if (VIM_ISDIGIT(*first_char))
2985         *maxval = *minval;          /* It was \{n} or \{-n} */
2986     else
2987         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2988     if (*regparse == '\\')
2989         regparse++;     /* Allow either \{...} or \{...\} */
2990     if (*regparse != '}')
2991     {
2992         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2993                                           reg_magic == MAGIC_ALL ? "" : "\\");
2994         EMSG_RET_FAIL(IObuff);
2995     }
2996
2997     /*
2998      * Reverse the range if there was a '-', or make sure it is in the right
2999      * order otherwise.
3000      */
3001     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3002     {
3003         tmp = *minval;
3004         *minval = *maxval;
3005         *maxval = tmp;
3006     }
3007     skipchr();          /* let's be friends with the lexer again */
3008     return OK;
3009 }
3010
3011 /*
3012  * vim_regexec and friends
3013  */
3014
3015 /*
3016  * Global work variables for vim_regexec().
3017  */
3018
3019 /* The current match-position is remembered with these variables: */
3020 static linenr_T reglnum;        /* line number, relative to first line */
3021 static char_u   *regline;       /* start of current line */
3022 static char_u   *reginput;      /* current input, points into "regline" */
3023
3024 static int      need_clear_subexpr;     /* subexpressions still need to be
3025                                          * cleared */
3026 #ifdef FEAT_SYN_HL
3027 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3028                                                  * still need to be cleared */
3029 #endif
3030
3031 /*
3032  * Structure used to save the current input state, when it needs to be
3033  * restored after trying a match.  Used by reg_save() and reg_restore().
3034  * Also stores the length of "backpos".
3035  */
3036 typedef struct
3037 {
3038     union
3039     {
3040         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3041         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3042     } rs_u;
3043     int         rs_len;
3044 } regsave_T;
3045
3046 /* struct to save start/end pointer/position in for \(\) */
3047 typedef struct
3048 {
3049     union
3050     {
3051         char_u  *ptr;
3052         lpos_T  pos;
3053     } se_u;
3054 } save_se_T;
3055
3056 /* used for BEHIND and NOBEHIND matching */
3057 typedef struct regbehind_S
3058 {
3059     regsave_T   save_after;
3060     regsave_T   save_behind;
3061     int         save_need_clear_subexpr;
3062     save_se_T   save_start[NSUBEXP];
3063     save_se_T   save_end[NSUBEXP];
3064 } regbehind_T;
3065
3066 static char_u   *reg_getline __ARGS((linenr_T lnum));
3067 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3068 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3069 static void     cleanup_subexpr __ARGS((void));
3070 #ifdef FEAT_SYN_HL
3071 static void     cleanup_zsubexpr __ARGS((void));
3072 #endif
3073 static void     save_subexpr __ARGS((regbehind_T *bp));
3074 static void     restore_subexpr __ARGS((regbehind_T *bp));
3075 static void     reg_nextline __ARGS((void));
3076 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3077 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3078 static int      reg_save_equal __ARGS((regsave_T *save));
3079 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3080 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3081
3082 /* Save the sub-expressions before attempting a match. */
3083 #define save_se(savep, posp, pp) \
3084     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3085
3086 /* After a failed match restore the sub-expressions. */
3087 #define restore_se(savep, posp, pp) { \
3088     if (REG_MULTI) \
3089         *(posp) = (savep)->se_u.pos; \
3090     else \
3091         *(pp) = (savep)->se_u.ptr; }
3092
3093 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3094 static int      regmatch __ARGS((char_u *prog));
3095 static int      regrepeat __ARGS((char_u *p, long maxcount));
3096
3097 #ifdef DEBUG
3098 int             regnarrate = 0;
3099 #endif
3100
3101 /*
3102  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3103  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3104  * contains '\c' or '\C' the value is overruled.
3105  */
3106 static int      ireg_ic;
3107
3108 #ifdef FEAT_MBYTE
3109 /*
3110  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3111  * in the regexp.  Defaults to false, always.
3112  */
3113 static int      ireg_icombine;
3114 #endif
3115
3116 /*
3117  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3118  * there is no maximum.
3119  */
3120 static colnr_T  ireg_maxcol;
3121
3122 /*
3123  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3124  * slow, we keep one allocated piece of memory and only re-allocate it when
3125  * it's too small.  It's freed in vim_regexec_both() when finished.
3126  */
3127 static char_u   *reg_tofree = NULL;
3128 static unsigned reg_tofreelen;
3129
3130 /*
3131  * These variables are set when executing a regexp to speed up the execution.
3132  * Which ones are set depends on whether a single-line or multi-line match is
3133  * done:
3134  *                      single-line             multi-line
3135  * reg_match            &regmatch_T             NULL
3136  * reg_mmatch           NULL                    &regmmatch_T
3137  * reg_startp           reg_match->startp       <invalid>
3138  * reg_endp             reg_match->endp         <invalid>
3139  * reg_startpos         <invalid>               reg_mmatch->startpos
3140  * reg_endpos           <invalid>               reg_mmatch->endpos
3141  * reg_win              NULL                    window in which to search
3142  * reg_buf              <invalid>               buffer in which to search
3143  * reg_firstlnum        <invalid>               first line in which to search
3144  * reg_maxline          0                       last line nr
3145  * reg_line_lbr         FALSE or TRUE           FALSE
3146  */
3147 static regmatch_T       *reg_match;
3148 static regmmatch_T      *reg_mmatch;
3149 static char_u           **reg_startp = NULL;
3150 static char_u           **reg_endp = NULL;
3151 static lpos_T           *reg_startpos = NULL;
3152 static lpos_T           *reg_endpos = NULL;
3153 static win_T            *reg_win;
3154 static buf_T            *reg_buf;
3155 static linenr_T         reg_firstlnum;
3156 static linenr_T         reg_maxline;
3157 static int              reg_line_lbr;       /* "\n" in string is line break */
3158
3159 /* Values for rs_state in regitem_T. */
3160 typedef enum regstate_E
3161 {
3162     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3163     , RS_MOPEN          /* MOPEN + [0-9] */
3164     , RS_MCLOSE         /* MCLOSE + [0-9] */
3165 #ifdef FEAT_SYN_HL
3166     , RS_ZOPEN          /* ZOPEN + [0-9] */
3167     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3168 #endif
3169     , RS_BRANCH         /* BRANCH */
3170     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3171     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3172     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3173     , RS_NOMATCH        /* NOMATCH */
3174     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3175     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3176     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3177     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3178 } regstate_T;
3179
3180 /*
3181  * When there are alternatives a regstate_T is put on the regstack to remember
3182  * what we are doing.
3183  * Before it may be another type of item, depending on rs_state, to remember
3184  * more things.
3185  */
3186 typedef struct regitem_S
3187 {
3188     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3189     char_u      *rs_scan;       /* current node in program */
3190     union
3191     {
3192         save_se_T  sesave;
3193         regsave_T  regsave;
3194     } rs_un;                    /* room for saving reginput */
3195     short       rs_no;          /* submatch nr or BEHIND/NOBEHIND */
3196 } regitem_T;
3197
3198 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3199 static void regstack_pop __ARGS((char_u **scan));
3200
3201 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3202 typedef struct regstar_S
3203 {
3204     int         nextb;          /* next byte */
3205     int         nextb_ic;       /* next byte reverse case */
3206     long        count;
3207     long        minval;
3208     long        maxval;
3209 } regstar_T;
3210
3211 /* used to store input position when a BACK was encountered, so that we now if
3212  * we made any progress since the last time. */
3213 typedef struct backpos_S
3214 {
3215     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3216     regsave_T   bp_pos;         /* last input position */
3217 } backpos_T;
3218
3219 /*
3220  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3221  * to avoid invoking malloc() and free() often.
3222  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3223  * or regbehind_T.
3224  * "backpos_T" is a table with backpos_T for BACK
3225  */
3226 static garray_T regstack = {0, 0, 0, 0, NULL};
3227 static garray_T backpos = {0, 0, 0, 0, NULL};
3228
3229 /*
3230  * Both for regstack and backpos tables we use the following strategy of
3231  * allocation (to reduce malloc/free calls):
3232  * - Initial size is fairly small.
3233  * - When needed, the tables are grown bigger (8 times at first, double after
3234  *   that).
3235  * - After executing the match we free the memory only if the array has grown.
3236  *   Thus the memory is kept allocated when it's at the initial size.
3237  * This makes it fast while not keeping a lot of memory allocated.
3238  * A three times speed increase was observed when using many simple patterns.
3239  */
3240 #define REGSTACK_INITIAL        2048
3241 #define BACKPOS_INITIAL         64
3242
3243 #if defined(EXITFREE) || defined(PROTO)
3244     void
3245 free_regexp_stuff()
3246 {
3247     ga_clear(&regstack);
3248     ga_clear(&backpos);
3249     vim_free(reg_tofree);
3250     vim_free(reg_prev_sub);
3251 }
3252 #endif
3253
3254 /*
3255  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3256  */
3257     static char_u *
3258 reg_getline(lnum)
3259     linenr_T    lnum;
3260 {
3261     /* when looking behind for a match/no-match lnum is negative.  But we
3262      * can't go before line 1 */
3263     if (reg_firstlnum + lnum < 1)
3264         return NULL;
3265     if (lnum > reg_maxline)
3266         /* Must have matched the "\n" in the last line. */
3267         return (char_u *)"";
3268     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3269 }
3270
3271 static regsave_T behind_pos;
3272
3273 #ifdef FEAT_SYN_HL
3274 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3275 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3276 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3277 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3278 #endif
3279
3280 /* TRUE if using multi-line regexp. */
3281 #define REG_MULTI       (reg_match == NULL)
3282
3283 /*
3284  * Match a regexp against a string.
3285  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3286  * Uses curbuf for line count and 'iskeyword'.
3287  *
3288  * Return TRUE if there is a match, FALSE if not.
3289  */
3290     int
3291 vim_regexec(rmp, line, col)
3292     regmatch_T  *rmp;
3293     char_u      *line;  /* string to match against */
3294     colnr_T     col;    /* column to start looking for match */
3295 {
3296     reg_match = rmp;
3297     reg_mmatch = NULL;
3298     reg_maxline = 0;
3299     reg_line_lbr = FALSE;
3300     reg_win = NULL;
3301     ireg_ic = rmp->rm_ic;
3302 #ifdef FEAT_MBYTE
3303     ireg_icombine = FALSE;
3304 #endif
3305     ireg_maxcol = 0;
3306     return (vim_regexec_both(line, col, NULL) != 0);
3307 }
3308
3309 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3310         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3311 /*
3312  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3313  */
3314     int
3315 vim_regexec_nl(rmp, line, col)
3316     regmatch_T  *rmp;
3317     char_u      *line;  /* string to match against */
3318     colnr_T     col;    /* column to start looking for match */
3319 {
3320     reg_match = rmp;
3321     reg_mmatch = NULL;
3322     reg_maxline = 0;
3323     reg_line_lbr = TRUE;
3324     reg_win = NULL;
3325     ireg_ic = rmp->rm_ic;
3326 #ifdef FEAT_MBYTE
3327     ireg_icombine = FALSE;
3328 #endif
3329     ireg_maxcol = 0;
3330     return (vim_regexec_both(line, col, NULL) != 0);
3331 }
3332 #endif
3333
3334 /*
3335  * Match a regexp against multiple lines.
3336  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3337  * Uses curbuf for line count and 'iskeyword'.
3338  *
3339  * Return zero if there is no match.  Return number of lines contained in the
3340  * match otherwise.
3341  */
3342     long
3343 vim_regexec_multi(rmp, win, buf, lnum, col, tm)
3344     regmmatch_T *rmp;
3345     win_T       *win;           /* window in which to search or NULL */
3346     buf_T       *buf;           /* buffer in which to search */
3347     linenr_T    lnum;           /* nr of line to start looking for match */
3348     colnr_T     col;            /* column to start looking for match */
3349     proftime_T  *tm;            /* timeout limit or NULL */
3350 {
3351     long        r;
3352     buf_T       *save_curbuf = curbuf;
3353
3354     reg_match = NULL;
3355     reg_mmatch = rmp;
3356     reg_buf = buf;
3357     reg_win = win;
3358     reg_firstlnum = lnum;
3359     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3360     reg_line_lbr = FALSE;
3361     ireg_ic = rmp->rmm_ic;
3362 #ifdef FEAT_MBYTE
3363     ireg_icombine = FALSE;
3364 #endif
3365     ireg_maxcol = rmp->rmm_maxcol;
3366
3367     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3368     curbuf = buf;
3369     r = vim_regexec_both(NULL, col, tm);
3370     curbuf = save_curbuf;
3371
3372     return r;
3373 }
3374
3375 /*
3376  * Match a regexp against a string ("line" points to the string) or multiple
3377  * lines ("line" is NULL, use reg_getline()).
3378  */
3379     static long
3380 vim_regexec_both(line, col, tm)
3381     char_u      *line;
3382     colnr_T     col;            /* column to start looking for match */
3383     proftime_T  *tm UNUSED;     /* timeout limit or NULL */
3384 {
3385     regprog_T   *prog;
3386     char_u      *s;
3387     long        retval = 0L;
3388
3389     /* Create "regstack" and "backpos" if they are not allocated yet.
3390      * We allocate *_INITIAL amount of bytes first and then set the grow size
3391      * to much bigger value to avoid many malloc calls in case of deep regular
3392      * expressions.  */
3393     if (regstack.ga_data == NULL)
3394     {
3395         /* Use an item size of 1 byte, since we push different things
3396          * onto the regstack. */
3397         ga_init2(&regstack, 1, REGSTACK_INITIAL);
3398         ga_grow(&regstack, REGSTACK_INITIAL);
3399         regstack.ga_growsize = REGSTACK_INITIAL * 8;
3400     }
3401
3402     if (backpos.ga_data == NULL)
3403     {
3404         ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3405         ga_grow(&backpos, BACKPOS_INITIAL);
3406         backpos.ga_growsize = BACKPOS_INITIAL * 8;
3407     }
3408
3409     if (REG_MULTI)
3410     {
3411         prog = reg_mmatch->regprog;
3412         line = reg_getline((linenr_T)0);
3413         reg_startpos = reg_mmatch->startpos;
3414         reg_endpos = reg_mmatch->endpos;
3415     }
3416     else
3417     {
3418         prog = reg_match->regprog;
3419         reg_startp = reg_match->startp;
3420         reg_endp = reg_match->endp;
3421     }
3422
3423     /* Be paranoid... */
3424     if (prog == NULL || line == NULL)
3425     {
3426         EMSG(_(e_null));
3427         goto theend;
3428     }
3429
3430     /* Check validity of program. */
3431     if (prog_magic_wrong())
3432         goto theend;
3433
3434     /* If the start column is past the maximum column: no need to try. */
3435     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3436         goto theend;
3437
3438     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3439     if (prog->regflags & RF_ICASE)
3440         ireg_ic = TRUE;
3441     else if (prog->regflags & RF_NOICASE)
3442         ireg_ic = FALSE;
3443
3444 #ifdef FEAT_MBYTE
3445     /* If pattern contains "\Z" overrule value of ireg_icombine */
3446     if (prog->regflags & RF_ICOMBINE)
3447         ireg_icombine = TRUE;
3448 #endif
3449
3450     /* If there is a "must appear" string, look for it. */
3451     if (prog->regmust != NULL)
3452     {
3453         int c;
3454
3455 #ifdef FEAT_MBYTE
3456         if (has_mbyte)
3457             c = (*mb_ptr2char)(prog->regmust);
3458         else
3459 #endif
3460             c = *prog->regmust;
3461         s = line + col;
3462
3463         /*
3464          * This is used very often, esp. for ":global".  Use three versions of
3465          * the loop to avoid overhead of conditions.
3466          */
3467         if (!ireg_ic
3468 #ifdef FEAT_MBYTE
3469                 && !has_mbyte
3470 #endif
3471                 )
3472             while ((s = vim_strbyte(s, c)) != NULL)
3473             {
3474                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3475                     break;              /* Found it. */
3476                 ++s;
3477             }
3478 #ifdef FEAT_MBYTE
3479         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3480             while ((s = vim_strchr(s, c)) != NULL)
3481             {
3482                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3483                     break;              /* Found it. */
3484                 mb_ptr_adv(s);
3485             }
3486 #endif
3487         else
3488             while ((s = cstrchr(s, c)) != NULL)
3489             {
3490                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3491                     break;              /* Found it. */
3492                 mb_ptr_adv(s);
3493             }
3494         if (s == NULL)          /* Not present. */
3495             goto theend;
3496     }
3497
3498     regline = line;
3499     reglnum = 0;
3500
3501     /* Simplest case: Anchored match need be tried only once. */
3502     if (prog->reganch)
3503     {
3504         int     c;
3505
3506 #ifdef FEAT_MBYTE
3507         if (has_mbyte)
3508             c = (*mb_ptr2char)(regline + col);
3509         else
3510 #endif
3511             c = regline[col];
3512         if (prog->regstart == NUL
3513                 || prog->regstart == c
3514                 || (ireg_ic && ((
3515 #ifdef FEAT_MBYTE
3516                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3517                         || (c < 255 && prog->regstart < 255 &&
3518 #endif
3519                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3520             retval = regtry(prog, col);
3521         else
3522             retval = 0;
3523     }
3524     else
3525     {
3526 #ifdef FEAT_RELTIME
3527         int tm_count = 0;
3528 #endif
3529         /* Messy cases:  unanchored match. */
3530         while (!got_int)
3531         {
3532             if (prog->regstart != NUL)
3533             {
3534                 /* Skip until the char we know it must start with.
3535                  * Used often, do some work to avoid call overhead. */
3536                 if (!ireg_ic
3537 #ifdef FEAT_MBYTE
3538                             && !has_mbyte
3539 #endif
3540                             )
3541                     s = vim_strbyte(regline + col, prog->regstart);
3542                 else
3543                     s = cstrchr(regline + col, prog->regstart);
3544                 if (s == NULL)
3545                 {
3546                     retval = 0;
3547                     break;
3548                 }
3549                 col = (int)(s - regline);
3550             }
3551
3552             /* Check for maximum column to try. */
3553             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3554             {
3555                 retval = 0;
3556                 break;
3557             }
3558
3559             retval = regtry(prog, col);
3560             if (retval > 0)
3561                 break;
3562
3563             /* if not currently on the first line, get it again */
3564             if (reglnum != 0)
3565             {
3566                 reglnum = 0;
3567                 regline = reg_getline((linenr_T)0);
3568             }
3569             if (regline[col] == NUL)
3570                 break;
3571 #ifdef FEAT_MBYTE
3572             if (has_mbyte)
3573                 col += (*mb_ptr2len)(regline + col);
3574             else
3575 #endif
3576                 ++col;
3577 #ifdef FEAT_RELTIME
3578             /* Check for timeout once in a twenty times to avoid overhead. */
3579             if (tm != NULL && ++tm_count == 20)
3580             {
3581                 tm_count = 0;
3582                 if (profile_passed_limit(tm))
3583                     break;
3584             }
3585 #endif
3586         }
3587     }
3588
3589 theend:
3590     /* Free "reg_tofree" when it's a bit big.
3591      * Free regstack and backpos if they are bigger than their initial size. */
3592     if (reg_tofreelen > 400)
3593     {
3594         vim_free(reg_tofree);
3595         reg_tofree = NULL;
3596     }
3597     if (regstack.ga_maxlen > REGSTACK_INITIAL)
3598         ga_clear(&regstack);
3599     if (backpos.ga_maxlen > BACKPOS_INITIAL)
3600         ga_clear(&backpos);
3601
3602     return retval;
3603 }
3604
3605 #ifdef FEAT_SYN_HL
3606 static reg_extmatch_T *make_extmatch __ARGS((void));
3607
3608 /*
3609  * Create a new extmatch and mark it as referenced once.
3610  */
3611     static reg_extmatch_T *
3612 make_extmatch()
3613 {
3614     reg_extmatch_T      *em;
3615
3616     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3617     if (em != NULL)
3618         em->refcnt = 1;
3619     return em;
3620 }
3621
3622 /*
3623  * Add a reference to an extmatch.
3624  */
3625     reg_extmatch_T *
3626 ref_extmatch(em)
3627     reg_extmatch_T      *em;
3628 {
3629     if (em != NULL)
3630         em->refcnt++;
3631     return em;
3632 }
3633
3634 /*
3635  * Remove a reference to an extmatch.  If there are no references left, free
3636  * the info.
3637  */
3638     void
3639 unref_extmatch(em)
3640     reg_extmatch_T      *em;
3641 {
3642     int i;
3643
3644     if (em != NULL && --em->refcnt <= 0)
3645     {
3646         for (i = 0; i < NSUBEXP; ++i)
3647             vim_free(em->matches[i]);
3648         vim_free(em);
3649     }
3650 }
3651 #endif
3652
3653 /*
3654  * regtry - try match of "prog" with at regline["col"].
3655  * Returns 0 for failure, number of lines contained in the match otherwise.
3656  */
3657     static long
3658 regtry(prog, col)
3659     regprog_T   *prog;
3660     colnr_T     col;
3661 {
3662     reginput = regline + col;
3663     need_clear_subexpr = TRUE;
3664 #ifdef FEAT_SYN_HL
3665     /* Clear the external match subpointers if necessary. */
3666     if (prog->reghasz == REX_SET)
3667         need_clear_zsubexpr = TRUE;
3668 #endif
3669
3670     if (regmatch(prog->program + 1) == 0)
3671         return 0;
3672
3673     cleanup_subexpr();
3674     if (REG_MULTI)
3675     {
3676         if (reg_startpos[0].lnum < 0)
3677         {
3678             reg_startpos[0].lnum = 0;
3679             reg_startpos[0].col = col;
3680         }
3681         if (reg_endpos[0].lnum < 0)
3682         {
3683             reg_endpos[0].lnum = reglnum;
3684             reg_endpos[0].col = (int)(reginput - regline);
3685         }
3686         else
3687             /* Use line number of "\ze". */
3688             reglnum = reg_endpos[0].lnum;
3689     }
3690     else
3691     {
3692         if (reg_startp[0] == NULL)
3693             reg_startp[0] = regline + col;
3694         if (reg_endp[0] == NULL)
3695             reg_endp[0] = reginput;
3696     }
3697 #ifdef FEAT_SYN_HL
3698     /* Package any found \z(...\) matches for export. Default is none. */
3699     unref_extmatch(re_extmatch_out);
3700     re_extmatch_out = NULL;
3701
3702     if (prog->reghasz == REX_SET)
3703     {
3704         int             i;
3705
3706         cleanup_zsubexpr();
3707         re_extmatch_out = make_extmatch();
3708         for (i = 0; i < NSUBEXP; i++)
3709         {
3710             if (REG_MULTI)
3711             {
3712                 /* Only accept single line matches. */
3713                 if (reg_startzpos[i].lnum >= 0
3714                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3715                     re_extmatch_out->matches[i] =
3716                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3717                                                        + reg_startzpos[i].col,
3718                                    reg_endzpos[i].col - reg_startzpos[i].col);
3719             }
3720             else
3721             {
3722                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3723                     re_extmatch_out->matches[i] =
3724                             vim_strnsave(reg_startzp[i],
3725                                         (int)(reg_endzp[i] - reg_startzp[i]));
3726             }
3727         }
3728     }
3729 #endif
3730     return 1 + reglnum;
3731 }
3732
3733 #ifdef FEAT_MBYTE
3734 static int reg_prev_class __ARGS((void));
3735
3736 /*
3737  * Get class of previous character.
3738  */
3739     static int
3740 reg_prev_class()
3741 {
3742     if (reginput > regline)
3743         return mb_get_class(reginput - 1
3744                                      - (*mb_head_off)(regline, reginput - 1));
3745     return -1;
3746 }
3747
3748 #endif
3749 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3750
3751 /*
3752  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3753  * to regmatch(), but they are here to reduce the amount of stack space used
3754  * (it can be called recursively many times).
3755  */
3756 static long     bl_minval;
3757 static long     bl_maxval;
3758
3759 /*
3760  * regmatch - main matching routine
3761  *
3762  * Conceptually the strategy is simple: Check to see whether the current node
3763  * matches, push an item onto the regstack and loop to see whether the rest
3764  * matches, and then act accordingly.  In practice we make some effort to
3765  * avoid using the regstack, in particular by going through "ordinary" nodes
3766  * (that don't need to know whether the rest of the match failed) by a nested
3767  * loop.
3768  *
3769  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3770  * the last matched character.
3771  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3772  * undefined state!
3773  */
3774     static int
3775 regmatch(scan)
3776     char_u      *scan;          /* Current node. */
3777 {
3778   char_u        *next;          /* Next node. */
3779   int           op;
3780   int           c;
3781   regitem_T     *rp;
3782   int           no;
3783   int           status;         /* one of the RA_ values: */
3784 #define RA_FAIL         1       /* something failed, abort */
3785 #define RA_CONT         2       /* continue in inner loop */
3786 #define RA_BREAK        3       /* break inner loop */
3787 #define RA_MATCH        4       /* successful match */
3788 #define RA_NOMATCH      5       /* didn't match */
3789
3790   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
3791    * vim_regexec_both() to reduce malloc()/free() calls. */
3792   regstack.ga_len = 0;
3793   backpos.ga_len = 0;
3794
3795   /*
3796    * Repeat until "regstack" is empty.
3797    */
3798   for (;;)
3799   {
3800     /* Some patterns my cause a long time to match, even though they are not
3801      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3802     fast_breakcheck();
3803
3804 #ifdef DEBUG
3805     if (scan != NULL && regnarrate)
3806     {
3807         mch_errmsg(regprop(scan));
3808         mch_errmsg("(\n");
3809     }
3810 #endif
3811
3812     /*
3813      * Repeat for items that can be matched sequentially, without using the
3814      * regstack.
3815      */
3816     for (;;)
3817     {
3818         if (got_int || scan == NULL)
3819         {
3820             status = RA_FAIL;
3821             break;
3822         }
3823         status = RA_CONT;
3824
3825 #ifdef DEBUG
3826         if (regnarrate)
3827         {
3828             mch_errmsg(regprop(scan));
3829             mch_errmsg("...\n");
3830 # ifdef FEAT_SYN_HL
3831             if (re_extmatch_in != NULL)
3832             {
3833                 int i;
3834
3835                 mch_errmsg(_("External submatches:\n"));
3836                 for (i = 0; i < NSUBEXP; i++)
3837                 {
3838                     mch_errmsg("    \"");
3839                     if (re_extmatch_in->matches[i] != NULL)
3840                         mch_errmsg(re_extmatch_in->matches[i]);
3841                     mch_errmsg("\"\n");
3842                 }
3843             }
3844 # endif
3845         }
3846 #endif
3847         next = regnext(scan);
3848
3849         op = OP(scan);
3850         /* Check for character class with NL added. */
3851         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3852                                 && *reginput == NUL && reglnum <= reg_maxline)
3853         {
3854             reg_nextline();
3855         }
3856         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3857         {
3858             ADVANCE_REGINPUT();
3859         }
3860         else
3861         {
3862           if (WITH_NL(op))
3863               op -= ADD_NL;
3864 #ifdef FEAT_MBYTE
3865           if (has_mbyte)
3866               c = (*mb_ptr2char)(reginput);
3867           else
3868 #endif
3869               c = *reginput;
3870           switch (op)
3871           {
3872           case BOL:
3873             if (reginput != regline)
3874                 status = RA_NOMATCH;
3875             break;
3876
3877           case EOL:
3878             if (c != NUL)
3879                 status = RA_NOMATCH;
3880             break;
3881
3882           case RE_BOF:
3883             /* We're not at the beginning of the file when below the first
3884              * line where we started, not at the start of the line or we
3885              * didn't start at the first line of the buffer. */
3886             if (reglnum != 0 || reginput != regline
3887                                           || (REG_MULTI && reg_firstlnum > 1))
3888                 status = RA_NOMATCH;
3889             break;
3890
3891           case RE_EOF:
3892             if (reglnum != reg_maxline || c != NUL)
3893                 status = RA_NOMATCH;
3894             break;
3895
3896           case CURSOR:
3897             /* Check if the buffer is in a window and compare the
3898              * reg_win->w_cursor position to the match position. */
3899             if (reg_win == NULL
3900                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3901                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3902                 status = RA_NOMATCH;
3903             break;
3904
3905           case RE_MARK:
3906             /* Compare the mark position to the match position.  NOTE: Always
3907              * uses the current buffer. */
3908             {
3909                 int     mark = OPERAND(scan)[0];
3910                 int     cmp = OPERAND(scan)[1];
3911                 pos_T   *pos;
3912
3913                 pos = getmark(mark, FALSE);
3914                 if (pos == NULL              /* mark doesn't exist */
3915                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3916                         || (pos->lnum == reglnum + reg_firstlnum
3917                                 ? (pos->col == (colnr_T)(reginput - regline)
3918                                     ? (cmp == '<' || cmp == '>')
3919                                     : (pos->col < (colnr_T)(reginput - regline)
3920                                         ? cmp != '>'
3921                                         : cmp != '<'))
3922                                 : (pos->lnum < reglnum + reg_firstlnum
3923                                     ? cmp != '>'
3924                                     : cmp != '<')))
3925                     status = RA_NOMATCH;
3926             }
3927             break;
3928
3929           case RE_VISUAL:
3930 #ifdef FEAT_VISUAL
3931             /* Check if the buffer is the current buffer. and whether the
3932              * position is inside the Visual area. */
3933             if (reg_buf != curbuf || VIsual.lnum == 0)
3934                 status = RA_NOMATCH;
3935             else
3936             {
3937                 pos_T       top, bot;
3938                 linenr_T    lnum;
3939                 colnr_T     col;
3940                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3941                 int         mode;
3942
3943                 if (VIsual_active)
3944                 {
3945                     if (lt(VIsual, wp->w_cursor))
3946                     {
3947                         top = VIsual;
3948                         bot = wp->w_cursor;
3949                     }
3950                     else
3951                     {
3952                         top = wp->w_cursor;
3953                         bot = VIsual;
3954                     }
3955                     mode = VIsual_mode;
3956                 }
3957                 else
3958                 {
3959                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3960                     {
3961                         top = curbuf->b_visual.vi_start;
3962                         bot = curbuf->b_visual.vi_end;
3963                     }
3964                     else
3965                     {
3966                         top = curbuf->b_visual.vi_end;
3967                         bot = curbuf->b_visual.vi_start;
3968                     }
3969                     mode = curbuf->b_visual.vi_mode;
3970                 }
3971                 lnum = reglnum + reg_firstlnum;
3972                 col = (colnr_T)(reginput - regline);
3973                 if (lnum < top.lnum || lnum > bot.lnum)
3974                     status = RA_NOMATCH;
3975                 else if (mode == 'v')
3976                 {
3977                     if ((lnum == top.lnum && col < top.col)
3978                             || (lnum == bot.lnum
3979                                          && col >= bot.col + (*p_sel != 'e')))
3980                         status = RA_NOMATCH;
3981                 }
3982                 else if (mode == Ctrl_V)
3983                 {
3984                     colnr_T         start, end;
3985                     colnr_T         start2, end2;
3986                     colnr_T         cols;
3987
3988                     getvvcol(wp, &top, &start, NULL, &end);
3989                     getvvcol(wp, &bot, &start2, NULL, &end2);
3990                     if (start2 < start)
3991                         start = start2;
3992                     if (end2 > end)
3993                         end = end2;
3994                     if (top.col == MAXCOL || bot.col == MAXCOL)
3995                         end = MAXCOL;
3996                     cols = win_linetabsize(wp,
3997                                       regline, (colnr_T)(reginput - regline));
3998                     if (cols < start || cols > end - (*p_sel == 'e'))
3999                         status = RA_NOMATCH;
4000                 }
4001             }
4002 #else
4003             status = RA_NOMATCH;
4004 #endif
4005             break;
4006
4007           case RE_LNUM:
4008             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4009                                                                         scan))
4010                 status = RA_NOMATCH;
4011             break;
4012
4013           case RE_COL:
4014             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
4015                 status = RA_NOMATCH;
4016             break;
4017
4018           case RE_VCOL:
4019             if (!re_num_cmp((long_u)win_linetabsize(
4020                             reg_win == NULL ? curwin : reg_win,
4021                             regline, (colnr_T)(reginput - regline)) + 1, scan))
4022                 status = RA_NOMATCH;
4023             break;
4024
4025           case BOW:     /* \<word; reginput points to w */
4026             if (c == NUL)       /* Can't match at end of line */
4027                 status = RA_NOMATCH;
4028 #ifdef FEAT_MBYTE
4029             else if (has_mbyte)
4030             {
4031                 int this_class;
4032
4033                 /* Get class of current and previous char (if it exists). */
4034                 this_class = mb_get_class(reginput);
4035                 if (this_class <= 1)
4036                     status = RA_NOMATCH;  /* not on a word at all */
4037                 else if (reg_prev_class() == this_class)
4038                     status = RA_NOMATCH;  /* previous char is in same word */
4039             }
4040 #endif
4041             else
4042             {
4043                 if (!vim_iswordc(c)
4044                         || (reginput > regline && vim_iswordc(reginput[-1])))
4045                     status = RA_NOMATCH;
4046             }
4047             break;
4048
4049           case EOW:     /* word\>; reginput points after d */
4050             if (reginput == regline)    /* Can't match at start of line */
4051                 status = RA_NOMATCH;
4052 #ifdef FEAT_MBYTE
4053             else if (has_mbyte)
4054             {
4055                 int this_class, prev_class;
4056
4057                 /* Get class of current and previous char (if it exists). */
4058                 this_class = mb_get_class(reginput);
4059                 prev_class = reg_prev_class();
4060                 if (this_class == prev_class
4061                         || prev_class == 0 || prev_class == 1)
4062                     status = RA_NOMATCH;
4063             }
4064 #endif
4065             else
4066             {
4067                 if (!vim_iswordc(reginput[-1])
4068                         || (reginput[0] != NUL && vim_iswordc(c)))
4069                     status = RA_NOMATCH;
4070             }
4071             break; /* Matched with EOW */
4072
4073           case ANY:
4074             if (c == NUL)
4075                 status = RA_NOMATCH;
4076             else
4077                 ADVANCE_REGINPUT();
4078             break;
4079
4080           case IDENT:
4081             if (!vim_isIDc(c))
4082                 status = RA_NOMATCH;
4083             else
4084                 ADVANCE_REGINPUT();
4085             break;
4086
4087           case SIDENT:
4088             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4089                 status = RA_NOMATCH;
4090             else
4091                 ADVANCE_REGINPUT();
4092             break;
4093
4094           case KWORD:
4095             if (!vim_iswordp(reginput))
4096                 status = RA_NOMATCH;
4097             else
4098                 ADVANCE_REGINPUT();
4099             break;
4100
4101           case SKWORD:
4102             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4103                 status = RA_NOMATCH;
4104             else
4105                 ADVANCE_REGINPUT();
4106             break;
4107
4108           case FNAME:
4109             if (!vim_isfilec(c))
4110                 status = RA_NOMATCH;
4111             else
4112                 ADVANCE_REGINPUT();
4113             break;
4114
4115           case SFNAME:
4116             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4117                 status = RA_NOMATCH;
4118             else
4119                 ADVANCE_REGINPUT();
4120             break;
4121
4122           case PRINT:
4123             if (ptr2cells(reginput) != 1)
4124                 status = RA_NOMATCH;
4125             else
4126                 ADVANCE_REGINPUT();
4127             break;
4128
4129           case SPRINT:
4130             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4131                 status = RA_NOMATCH;
4132             else
4133                 ADVANCE_REGINPUT();
4134             break;
4135
4136           case WHITE:
4137             if (!vim_iswhite(c))
4138                 status = RA_NOMATCH;
4139             else
4140                 ADVANCE_REGINPUT();
4141             break;
4142
4143           case NWHITE:
4144             if (c == NUL || vim_iswhite(c))
4145                 status = RA_NOMATCH;
4146             else
4147                 ADVANCE_REGINPUT();
4148             break;
4149
4150           case DIGIT:
4151             if (!ri_digit(c))
4152                 status = RA_NOMATCH;
4153             else
4154                 ADVANCE_REGINPUT();
4155             break;
4156
4157           case NDIGIT:
4158             if (c == NUL || ri_digit(c))
4159                 status = RA_NOMATCH;
4160             else
4161                 ADVANCE_REGINPUT();
4162             break;
4163
4164           case HEX:
4165             if (!ri_hex(c))
4166                 status = RA_NOMATCH;
4167             else
4168                 ADVANCE_REGINPUT();
4169             break;
4170
4171           case NHEX:
4172             if (c == NUL || ri_hex(c))
4173                 status = RA_NOMATCH;
4174             else
4175                 ADVANCE_REGINPUT();
4176             break;
4177
4178           case OCTAL:
4179             if (!ri_octal(c))
4180                 status = RA_NOMATCH;
4181             else
4182                 ADVANCE_REGINPUT();
4183             break;
4184
4185           case NOCTAL:
4186             if (c == NUL || ri_octal(c))
4187                 status = RA_NOMATCH;
4188             else
4189                 ADVANCE_REGINPUT();
4190             break;
4191
4192           case WORD:
4193             if (!ri_word(c))
4194                 status = RA_NOMATCH;
4195             else
4196                 ADVANCE_REGINPUT();
4197             break;
4198
4199           case NWORD:
4200             if (c == NUL || ri_word(c))
4201                 status = RA_NOMATCH;
4202             else
4203                 ADVANCE_REGINPUT();
4204             break;
4205
4206           case HEAD:
4207             if (!ri_head(c))
4208                 status = RA_NOMATCH;
4209             else
4210                 ADVANCE_REGINPUT();
4211             break;
4212
4213           case NHEAD:
4214             if (c == NUL || ri_head(c))
4215                 status = RA_NOMATCH;
4216             else
4217                 ADVANCE_REGINPUT();
4218             break;
4219
4220           case ALPHA:
4221             if (!ri_alpha(c))
4222                 status = RA_NOMATCH;
4223             else
4224                 ADVANCE_REGINPUT();
4225             break;
4226
4227           case NALPHA:
4228             if (c == NUL || ri_alpha(c))
4229                 status = RA_NOMATCH;
4230             else
4231                 ADVANCE_REGINPUT();
4232             break;
4233
4234           case LOWER:
4235             if (!ri_lower(c))
4236                 status = RA_NOMATCH;
4237             else
4238                 ADVANCE_REGINPUT();
4239             break;
4240
4241           case NLOWER:
4242             if (c == NUL || ri_lower(c))
4243                 status = RA_NOMATCH;
4244             else
4245                 ADVANCE_REGINPUT();
4246             break;
4247
4248           case UPPER:
4249             if (!ri_upper(c))
4250                 status = RA_NOMATCH;
4251             else
4252                 ADVANCE_REGINPUT();
4253             break;
4254
4255           case NUPPER:
4256             if (c == NUL || ri_upper(c))
4257                 status = RA_NOMATCH;
4258             else
4259                 ADVANCE_REGINPUT();
4260             break;
4261
4262           case EXACTLY:
4263             {
4264                 int     len;
4265                 char_u  *opnd;
4266
4267                 opnd = OPERAND(scan);
4268                 /* Inline the first byte, for speed. */
4269                 if (*opnd != *reginput
4270                         && (!ireg_ic || (
4271 #ifdef FEAT_MBYTE
4272                             !enc_utf8 &&
4273 #endif
4274                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4275                     status = RA_NOMATCH;
4276                 else if (*opnd == NUL)
4277                 {
4278                     /* match empty string always works; happens when "~" is
4279                      * empty. */
4280                 }
4281                 else if (opnd[1] == NUL
4282 #ifdef FEAT_MBYTE
4283                             && !(enc_utf8 && ireg_ic)
4284 #endif
4285                         )
4286                     ++reginput;         /* matched a single char */
4287                 else
4288                 {
4289                     len = (int)STRLEN(opnd);
4290                     /* Need to match first byte again for multi-byte. */
4291                     if (cstrncmp(opnd, reginput, &len) != 0)
4292                         status = RA_NOMATCH;
4293 #ifdef FEAT_MBYTE
4294                     /* Check for following composing character. */
4295                     else if (enc_utf8
4296                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4297                     {
4298                         /* raaron: This code makes a composing character get
4299                          * ignored, which is the correct behavior (sometimes)
4300                          * for voweled Hebrew texts. */
4301                         if (!ireg_icombine)
4302                             status = RA_NOMATCH;
4303                     }
4304 #endif
4305                     else
4306                         reginput += len;
4307                 }
4308             }
4309             break;
4310
4311           case ANYOF:
4312           case ANYBUT:
4313             if (c == NUL)
4314                 status = RA_NOMATCH;
4315             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4316                 status = RA_NOMATCH;
4317             else
4318                 ADVANCE_REGINPUT();
4319             break;
4320
4321 #ifdef FEAT_MBYTE
4322           case MULTIBYTECODE:
4323             if (has_mbyte)
4324             {
4325                 int     i, len;
4326                 char_u  *opnd;
4327                 int     opndc = 0, inpc;
4328
4329                 opnd = OPERAND(scan);
4330                 /* Safety check (just in case 'encoding' was changed since
4331                  * compiling the program). */
4332                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4333                 {
4334                     status = RA_NOMATCH;
4335                     break;
4336                 }
4337                 if (enc_utf8)
4338                     opndc = mb_ptr2char(opnd);
4339                 if (enc_utf8 && utf_iscomposing(opndc))
4340                 {
4341                     /* When only a composing char is given match at any
4342                      * position where that composing char appears. */
4343                     status = RA_NOMATCH;
4344                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4345                     {
4346                         inpc = mb_ptr2char(reginput + i);
4347                         if (!utf_iscomposing(inpc))
4348                         {
4349                             if (i > 0)
4350                                 break;
4351                         }
4352                         else if (opndc == inpc)
4353                         {
4354                             /* Include all following composing chars. */
4355                             len = i + mb_ptr2len(reginput + i);
4356                             status = RA_MATCH;
4357                             break;
4358                         }
4359                     }
4360                 }
4361                 else
4362                     for (i = 0; i < len; ++i)
4363                         if (opnd[i] != reginput[i])
4364                         {
4365                             status = RA_NOMATCH;
4366                             break;
4367                         }
4368                 reginput += len;
4369             }
4370             else
4371                 status = RA_NOMATCH;
4372             break;
4373 #endif
4374
4375           case NOTHING:
4376             break;
4377
4378           case BACK:
4379             {
4380                 int             i;
4381                 backpos_T       *bp;
4382
4383                 /*
4384                  * When we run into BACK we need to check if we don't keep
4385                  * looping without matching any input.  The second and later
4386                  * times a BACK is encountered it fails if the input is still
4387                  * at the same position as the previous time.
4388                  * The positions are stored in "backpos" and found by the
4389                  * current value of "scan", the position in the RE program.
4390                  */
4391                 bp = (backpos_T *)backpos.ga_data;
4392                 for (i = 0; i < backpos.ga_len; ++i)
4393                     if (bp[i].bp_scan == scan)
4394                         break;
4395                 if (i == backpos.ga_len)
4396                 {
4397                     /* First time at this BACK, make room to store the pos. */
4398                     if (ga_grow(&backpos, 1) == FAIL)
4399                         status = RA_FAIL;
4400                     else
4401                     {
4402                         /* get "ga_data" again, it may have changed */
4403                         bp = (backpos_T *)backpos.ga_data;
4404                         bp[i].bp_scan = scan;
4405                         ++backpos.ga_len;
4406                     }
4407                 }
4408                 else if (reg_save_equal(&bp[i].bp_pos))
4409                     /* Still at same position as last time, fail. */
4410                     status = RA_NOMATCH;
4411
4412                 if (status != RA_FAIL && status != RA_NOMATCH)
4413                     reg_save(&bp[i].bp_pos, &backpos);
4414             }
4415             break;
4416
4417           case MOPEN + 0:   /* Match start: \zs */
4418           case MOPEN + 1:   /* \( */
4419           case MOPEN + 2:
4420           case MOPEN + 3:
4421           case MOPEN + 4:
4422           case MOPEN + 5:
4423           case MOPEN + 6:
4424           case MOPEN + 7:
4425           case MOPEN + 8:
4426           case MOPEN + 9:
4427             {
4428                 no = op - MOPEN;
4429                 cleanup_subexpr();
4430                 rp = regstack_push(RS_MOPEN, scan);
4431                 if (rp == NULL)
4432                     status = RA_FAIL;
4433                 else
4434                 {
4435                     rp->rs_no = no;
4436                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4437                                                              &reg_startp[no]);
4438                     /* We simply continue and handle the result when done. */
4439                 }
4440             }
4441             break;
4442
4443           case NOPEN:       /* \%( */
4444           case NCLOSE:      /* \) after \%( */
4445                 if (regstack_push(RS_NOPEN, scan) == NULL)
4446                     status = RA_FAIL;
4447                 /* We simply continue and handle the result when done. */
4448                 break;
4449
4450 #ifdef FEAT_SYN_HL
4451           case ZOPEN + 1:
4452           case ZOPEN + 2:
4453           case ZOPEN + 3:
4454           case ZOPEN + 4:
4455           case ZOPEN + 5:
4456           case ZOPEN + 6:
4457           case ZOPEN + 7:
4458           case ZOPEN + 8:
4459           case ZOPEN + 9:
4460             {
4461                 no = op - ZOPEN;
4462                 cleanup_zsubexpr();
4463                 rp = regstack_push(RS_ZOPEN, scan);
4464                 if (rp == NULL)
4465                     status = RA_FAIL;
4466                 else
4467                 {
4468                     rp->rs_no = no;
4469                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4470                                                              &reg_startzp[no]);
4471                     /* We simply continue and handle the result when done. */
4472                 }
4473             }
4474             break;
4475 #endif
4476
4477           case MCLOSE + 0:  /* Match end: \ze */
4478           case MCLOSE + 1:  /* \) */
4479           case MCLOSE + 2:
4480           case MCLOSE + 3:
4481           case MCLOSE + 4:
4482           case MCLOSE + 5:
4483           case MCLOSE + 6:
4484           case MCLOSE + 7:
4485           case MCLOSE + 8:
4486           case MCLOSE + 9:
4487             {
4488                 no = op - MCLOSE;
4489                 cleanup_subexpr();
4490                 rp = regstack_push(RS_MCLOSE, scan);
4491                 if (rp == NULL)
4492                     status = RA_FAIL;
4493                 else
4494                 {
4495                     rp->rs_no = no;
4496                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4497                     /* We simply continue and handle the result when done. */
4498                 }
4499             }
4500             break;
4501
4502 #ifdef FEAT_SYN_HL
4503           case ZCLOSE + 1:  /* \) after \z( */
4504           case ZCLOSE + 2:
4505           case ZCLOSE + 3:
4506           case ZCLOSE + 4:
4507           case ZCLOSE + 5:
4508           case ZCLOSE + 6:
4509           case ZCLOSE + 7:
4510           case ZCLOSE + 8:
4511           case ZCLOSE + 9:
4512             {
4513                 no = op - ZCLOSE;
4514                 cleanup_zsubexpr();
4515                 rp = regstack_push(RS_ZCLOSE, scan);
4516                 if (rp == NULL)
4517                     status = RA_FAIL;
4518                 else
4519                 {
4520                     rp->rs_no = no;
4521                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4522                                                               &reg_endzp[no]);
4523                     /* We simply continue and handle the result when done. */
4524                 }
4525             }
4526             break;
4527 #endif
4528
4529           case BACKREF + 1:
4530           case BACKREF + 2:
4531           case BACKREF + 3:
4532           case BACKREF + 4:
4533           case BACKREF + 5:
4534           case BACKREF + 6:
4535           case BACKREF + 7:
4536           case BACKREF + 8:
4537           case BACKREF + 9:
4538             {
4539                 int             len;
4540                 linenr_T        clnum;
4541                 colnr_T         ccol;
4542                 char_u          *p;
4543
4544                 no = op - BACKREF;
4545                 cleanup_subexpr();
4546                 if (!REG_MULTI)         /* Single-line regexp */
4547                 {
4548                     if (reg_startp[no] == NULL || reg_endp[no] == NULL)
4549                     {
4550                         /* Backref was not set: Match an empty string. */
4551                         len = 0;
4552                     }
4553                     else
4554                     {
4555                         /* Compare current input with back-ref in the same
4556                          * line. */
4557                         len = (int)(reg_endp[no] - reg_startp[no]);
4558                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4559                             status = RA_NOMATCH;
4560                     }
4561                 }
4562                 else                            /* Multi-line regexp */
4563                 {
4564                     if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
4565                     {
4566                         /* Backref was not set: Match an empty string. */
4567                         len = 0;
4568                     }
4569                     else
4570                     {
4571                         if (reg_startpos[no].lnum == reglnum
4572                                 && reg_endpos[no].lnum == reglnum)
4573                         {
4574                             /* Compare back-ref within the current line. */
4575                             len = reg_endpos[no].col - reg_startpos[no].col;
4576                             if (cstrncmp(regline + reg_startpos[no].col,
4577                                                           reginput, &len) != 0)
4578                                 status = RA_NOMATCH;
4579                         }
4580                         else
4581                         {
4582                             /* Messy situation: Need to compare between two
4583                              * lines. */
4584                             ccol = reg_startpos[no].col;
4585                             clnum = reg_startpos[no].lnum;
4586                             for (;;)
4587                             {
4588                                 /* Since getting one line may invalidate
4589                                  * the other, need to make copy.  Slow! */
4590                                 if (regline != reg_tofree)
4591                                 {
4592                                     len = (int)STRLEN(regline);
4593                                     if (reg_tofree == NULL
4594                                                  || len >= (int)reg_tofreelen)
4595                                     {
4596                                         len += 50;      /* get some extra */
4597                                         vim_free(reg_tofree);
4598                                         reg_tofree = alloc(len);
4599                                         if (reg_tofree == NULL)
4600                                         {
4601                                             status = RA_FAIL; /* outof memory!*/
4602                                             break;
4603                                         }
4604                                         reg_tofreelen = len;
4605                                     }
4606                                     STRCPY(reg_tofree, regline);
4607                                     reginput = reg_tofree
4608                                                        + (reginput - regline);
4609                                     regline = reg_tofree;
4610                                 }
4611
4612                                 /* Get the line to compare with. */
4613                                 p = reg_getline(clnum);
4614                                 if (clnum == reg_endpos[no].lnum)
4615                                     len = reg_endpos[no].col - ccol;
4616                                 else
4617                                     len = (int)STRLEN(p + ccol);
4618
4619                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4620                                 {
4621                                     status = RA_NOMATCH;  /* doesn't match */
4622                                     break;
4623                                 }
4624                                 if (clnum == reg_endpos[no].lnum)
4625                                     break;              /* match and at end! */
4626                                 if (reglnum >= reg_maxline)
4627                                 {
4628                                     status = RA_NOMATCH;  /* text too short */
4629                                     break;
4630                                 }
4631
4632                                 /* Advance to next line. */
4633                                 reg_nextline();
4634                                 ++clnum;
4635                                 ccol = 0;
4636                                 if (got_int)
4637                                 {
4638                                     status = RA_FAIL;
4639                                     break;
4640                                 }
4641                             }
4642
4643                             /* found a match!  Note that regline may now point
4644                              * to a copy of the line, that should not matter. */
4645                         }
4646                     }
4647                 }
4648
4649                 /* Matched the backref, skip over it. */
4650                 reginput += len;
4651             }
4652             break;
4653
4654 #ifdef FEAT_SYN_HL
4655           case ZREF + 1:
4656           case ZREF + 2:
4657           case ZREF + 3:
4658           case ZREF + 4:
4659           case ZREF + 5:
4660           case ZREF + 6:
4661           case ZREF + 7:
4662           case ZREF + 8:
4663           case ZREF + 9:
4664             {
4665                 int     len;
4666
4667                 cleanup_zsubexpr();
4668                 no = op - ZREF;
4669                 if (re_extmatch_in != NULL
4670                         && re_extmatch_in->matches[no] != NULL)
4671                 {
4672                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4673                     if (cstrncmp(re_extmatch_in->matches[no],
4674                                                           reginput, &len) != 0)
4675                         status = RA_NOMATCH;
4676                     else
4677                         reginput += len;
4678                 }
4679                 else
4680                 {
4681                     /* Backref was not set: Match an empty string. */
4682                 }
4683             }
4684             break;
4685 #endif
4686
4687           case BRANCH:
4688             {
4689                 if (OP(next) != BRANCH) /* No choice. */
4690                     next = OPERAND(scan);       /* Avoid recursion. */
4691                 else
4692                 {
4693                     rp = regstack_push(RS_BRANCH, scan);
4694                     if (rp == NULL)
4695                         status = RA_FAIL;
4696                     else
4697                         status = RA_BREAK;      /* rest is below */
4698                 }
4699             }
4700             break;
4701
4702           case BRACE_LIMITS:
4703             {
4704                 if (OP(next) == BRACE_SIMPLE)
4705                 {
4706                     bl_minval = OPERAND_MIN(scan);
4707                     bl_maxval = OPERAND_MAX(scan);
4708                 }
4709                 else if (OP(next) >= BRACE_COMPLEX
4710                         && OP(next) < BRACE_COMPLEX + 10)
4711                 {
4712                     no = OP(next) - BRACE_COMPLEX;
4713                     brace_min[no] = OPERAND_MIN(scan);
4714                     brace_max[no] = OPERAND_MAX(scan);
4715                     brace_count[no] = 0;
4716                 }
4717                 else
4718                 {
4719                     EMSG(_(e_internal));            /* Shouldn't happen */
4720                     status = RA_FAIL;
4721                 }
4722             }
4723             break;
4724
4725           case BRACE_COMPLEX + 0:
4726           case BRACE_COMPLEX + 1:
4727           case BRACE_COMPLEX + 2:
4728           case BRACE_COMPLEX + 3:
4729           case BRACE_COMPLEX + 4:
4730           case BRACE_COMPLEX + 5:
4731           case BRACE_COMPLEX + 6:
4732           case BRACE_COMPLEX + 7:
4733           case BRACE_COMPLEX + 8:
4734           case BRACE_COMPLEX + 9:
4735             {
4736                 no = op - BRACE_COMPLEX;
4737                 ++brace_count[no];
4738
4739                 /* If not matched enough times yet, try one more */
4740                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4741                                              ? brace_min[no] : brace_max[no]))
4742                 {
4743                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4744                     if (rp == NULL)
4745                         status = RA_FAIL;
4746                     else
4747                     {
4748                         rp->rs_no = no;
4749                         reg_save(&rp->rs_un.regsave, &backpos);
4750                         next = OPERAND(scan);
4751                         /* We continue and handle the result when done. */
4752                     }
4753                     break;
4754                 }
4755
4756                 /* If matched enough times, may try matching some more */
4757                 if (brace_min[no] <= brace_max[no])
4758                 {
4759                     /* Range is the normal way around, use longest match */
4760                     if (brace_count[no] <= brace_max[no])
4761                     {
4762                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4763                         if (rp == NULL)
4764                             status = RA_FAIL;
4765                         else
4766                         {
4767                             rp->rs_no = no;
4768                             reg_save(&rp->rs_un.regsave, &backpos);
4769                             next = OPERAND(scan);
4770                             /* We continue and handle the result when done. */
4771                         }
4772                     }
4773                 }
4774                 else
4775                 {
4776                     /* Range is backwards, use shortest match first */
4777                     if (brace_count[no] <= brace_min[no])
4778                     {
4779                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4780                         if (rp == NULL)
4781                             status = RA_FAIL;
4782                         else
4783                         {
4784                             reg_save(&rp->rs_un.regsave, &backpos);
4785                             /* We continue and handle the result when done. */
4786                         }
4787                     }
4788                 }
4789             }
4790             break;
4791
4792           case BRACE_SIMPLE:
4793           case STAR:
4794           case PLUS:
4795             {
4796                 regstar_T       rst;
4797
4798                 /*
4799                  * Lookahead to avoid useless match attempts when we know
4800                  * what character comes next.
4801                  */
4802                 if (OP(next) == EXACTLY)
4803                 {
4804                     rst.nextb = *OPERAND(next);
4805                     if (ireg_ic)
4806                     {
4807                         if (MB_ISUPPER(rst.nextb))
4808                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4809                         else
4810                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4811                     }
4812                     else
4813                         rst.nextb_ic = rst.nextb;
4814                 }
4815                 else
4816                 {
4817                     rst.nextb = NUL;
4818                     rst.nextb_ic = NUL;
4819                 }
4820                 if (op != BRACE_SIMPLE)
4821                 {
4822                     rst.minval = (op == STAR) ? 0 : 1;
4823                     rst.maxval = MAX_LIMIT;
4824                 }
4825                 else
4826                 {
4827                     rst.minval = bl_minval;
4828                     rst.maxval = bl_maxval;
4829                 }
4830
4831                 /*
4832                  * When maxval > minval, try matching as much as possible, up
4833                  * to maxval.  When maxval < minval, try matching at least the
4834                  * minimal number (since the range is backwards, that's also
4835                  * maxval!).
4836                  */
4837                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4838                 if (got_int)
4839                 {
4840                     status = RA_FAIL;
4841                     break;
4842                 }
4843                 if (rst.minval <= rst.maxval
4844                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4845                 {
4846                     /* It could match.  Prepare for trying to match what
4847                      * follows.  The code is below.  Parameters are stored in
4848                      * a regstar_T on the regstack. */
4849                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4850                     {
4851                         EMSG(_(e_maxmempat));
4852                         status = RA_FAIL;
4853                     }
4854                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4855                         status = RA_FAIL;
4856                     else
4857                     {
4858                         regstack.ga_len += sizeof(regstar_T);
4859                         rp = regstack_push(rst.minval <= rst.maxval
4860                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4861                         if (rp == NULL)
4862                             status = RA_FAIL;
4863                         else
4864                         {
4865                             *(((regstar_T *)rp) - 1) = rst;
4866                             status = RA_BREAK;      /* skip the restore bits */
4867                         }
4868                     }
4869                 }
4870                 else
4871                     status = RA_NOMATCH;
4872
4873             }
4874             break;
4875
4876           case NOMATCH:
4877           case MATCH:
4878           case SUBPAT:
4879             rp = regstack_push(RS_NOMATCH, scan);
4880             if (rp == NULL)
4881                 status = RA_FAIL;
4882             else
4883             {
4884                 rp->rs_no = op;
4885                 reg_save(&rp->rs_un.regsave, &backpos);
4886                 next = OPERAND(scan);
4887                 /* We continue and handle the result when done. */
4888             }
4889             break;
4890
4891           case BEHIND:
4892           case NOBEHIND:
4893             /* Need a bit of room to store extra positions. */
4894             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4895             {
4896                 EMSG(_(e_maxmempat));
4897                 status = RA_FAIL;
4898             }
4899             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4900                 status = RA_FAIL;
4901             else
4902             {
4903                 regstack.ga_len += sizeof(regbehind_T);
4904                 rp = regstack_push(RS_BEHIND1, scan);
4905                 if (rp == NULL)
4906                     status = RA_FAIL;
4907                 else
4908                 {
4909                     /* Need to save the subexpr to be able to restore them
4910                      * when there is a match but we don't use it. */
4911                     save_subexpr(((regbehind_T *)rp) - 1);
4912
4913                     rp->rs_no = op;
4914                     reg_save(&rp->rs_un.regsave, &backpos);
4915                     /* First try if what follows matches.  If it does then we
4916                      * check the behind match by looping. */
4917                 }
4918             }
4919             break;
4920
4921           case BHPOS:
4922             if (REG_MULTI)
4923             {
4924                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4925                         || behind_pos.rs_u.pos.lnum != reglnum)
4926                     status = RA_NOMATCH;
4927             }
4928             else if (behind_pos.rs_u.ptr != reginput)
4929                 status = RA_NOMATCH;
4930             break;
4931
4932           case NEWL:
4933             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4934                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4935                 status = RA_NOMATCH;
4936             else if (reg_line_lbr)
4937                 ADVANCE_REGINPUT();
4938             else
4939                 reg_nextline();
4940             break;
4941
4942           case END:
4943             status = RA_MATCH;  /* Success! */
4944             break;
4945
4946           default:
4947             EMSG(_(e_re_corr));
4948 #ifdef DEBUG
4949             printf("Illegal op code %d\n", op);
4950 #endif
4951             status = RA_FAIL;
4952             break;
4953           }
4954         }
4955
4956         /* If we can't continue sequentially, break the inner loop. */
4957         if (status != RA_CONT)
4958             break;
4959
4960         /* Continue in inner loop, advance to next item. */
4961         scan = next;
4962
4963     } /* end of inner loop */
4964
4965     /*
4966      * If there is something on the regstack execute the code for the state.
4967      * If the state is popped then loop and use the older state.
4968      */
4969     while (regstack.ga_len > 0 && status != RA_FAIL)
4970     {
4971         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4972         switch (rp->rs_state)
4973         {
4974           case RS_NOPEN:
4975             /* Result is passed on as-is, simply pop the state. */
4976             regstack_pop(&scan);
4977             break;
4978
4979           case RS_MOPEN:
4980             /* Pop the state.  Restore pointers when there is no match. */
4981             if (status == RA_NOMATCH)
4982                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4983                                                   &reg_startp[rp->rs_no]);
4984             regstack_pop(&scan);
4985             break;
4986
4987 #ifdef FEAT_SYN_HL
4988           case RS_ZOPEN:
4989             /* Pop the state.  Restore pointers when there is no match. */
4990             if (status == RA_NOMATCH)
4991                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4992                                                  &reg_startzp[rp->rs_no]);
4993             regstack_pop(&scan);
4994             break;
4995 #endif
4996
4997           case RS_MCLOSE:
4998             /* Pop the state.  Restore pointers when there is no match. */
4999             if (status == RA_NOMATCH)
5000                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5001                                                     &reg_endp[rp->rs_no]);
5002             regstack_pop(&scan);
5003             break;
5004
5005 #ifdef FEAT_SYN_HL
5006           case RS_ZCLOSE:
5007             /* Pop the state.  Restore pointers when there is no match. */
5008             if (status == RA_NOMATCH)
5009                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5010                                                    &reg_endzp[rp->rs_no]);
5011             regstack_pop(&scan);
5012             break;
5013 #endif
5014
5015           case RS_BRANCH:
5016             if (status == RA_MATCH)
5017                 /* this branch matched, use it */
5018                 regstack_pop(&scan);
5019             else
5020             {
5021                 if (status != RA_BREAK)
5022                 {
5023                     /* After a non-matching branch: try next one. */
5024                     reg_restore(&rp->rs_un.regsave, &backpos);
5025                     scan = rp->rs_scan;
5026                 }
5027                 if (scan == NULL || OP(scan) != BRANCH)
5028                 {
5029                     /* no more branches, didn't find a match */
5030                     status = RA_NOMATCH;
5031                     regstack_pop(&scan);
5032                 }
5033                 else
5034                 {
5035                     /* Prepare to try a branch. */
5036                     rp->rs_scan = regnext(scan);
5037                     reg_save(&rp->rs_un.regsave, &backpos);
5038                     scan = OPERAND(scan);
5039                 }
5040             }
5041             break;
5042
5043           case RS_BRCPLX_MORE:
5044             /* Pop the state.  Restore pointers when there is no match. */
5045             if (status == RA_NOMATCH)
5046             {
5047                 reg_restore(&rp->rs_un.regsave, &backpos);
5048                 --brace_count[rp->rs_no];       /* decrement match count */
5049             }
5050             regstack_pop(&scan);
5051             break;
5052
5053           case RS_BRCPLX_LONG:
5054             /* Pop the state.  Restore pointers when there is no match. */
5055             if (status == RA_NOMATCH)
5056             {
5057                 /* There was no match, but we did find enough matches. */
5058                 reg_restore(&rp->rs_un.regsave, &backpos);
5059                 --brace_count[rp->rs_no];
5060                 /* continue with the items after "\{}" */
5061                 status = RA_CONT;
5062             }
5063             regstack_pop(&scan);
5064             if (status == RA_CONT)
5065                 scan = regnext(scan);
5066             break;
5067
5068           case RS_BRCPLX_SHORT:
5069             /* Pop the state.  Restore pointers when there is no match. */
5070             if (status == RA_NOMATCH)
5071                 /* There was no match, try to match one more item. */
5072                 reg_restore(&rp->rs_un.regsave, &backpos);
5073             regstack_pop(&scan);
5074             if (status == RA_NOMATCH)
5075             {
5076                 scan = OPERAND(scan);
5077                 status = RA_CONT;
5078             }
5079             break;
5080
5081           case RS_NOMATCH:
5082             /* Pop the state.  If the operand matches for NOMATCH or
5083              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5084              * except for SUBPAT, and continue with the next item. */
5085             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5086                 status = RA_NOMATCH;
5087             else
5088             {
5089                 status = RA_CONT;
5090                 if (rp->rs_no != SUBPAT)        /* zero-width */
5091                     reg_restore(&rp->rs_un.regsave, &backpos);
5092             }
5093             regstack_pop(&scan);
5094             if (status == RA_CONT)
5095                 scan = regnext(scan);
5096             break;
5097
5098           case RS_BEHIND1:
5099             if (status == RA_NOMATCH)
5100             {
5101                 regstack_pop(&scan);
5102                 regstack.ga_len -= sizeof(regbehind_T);
5103             }
5104             else
5105             {
5106                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5107                  * the behind part does (not) match before the current
5108                  * position in the input.  This must be done at every
5109                  * position in the input and checking if the match ends at
5110                  * the current position. */
5111
5112                 /* save the position after the found match for next */
5113                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5114
5115                 /* start looking for a match with operand at the current
5116                  * position.  Go back one character until we find the
5117                  * result, hitting the start of the line or the previous
5118                  * line (for multi-line matching).
5119                  * Set behind_pos to where the match should end, BHPOS
5120                  * will match it.  Save the current value. */
5121                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5122                 behind_pos = rp->rs_un.regsave;
5123
5124                 rp->rs_state = RS_BEHIND2;
5125
5126                 reg_restore(&rp->rs_un.regsave, &backpos);
5127                 scan = OPERAND(rp->rs_scan);
5128             }
5129             break;
5130
5131           case RS_BEHIND2:
5132             /*
5133              * Looping for BEHIND / NOBEHIND match.
5134              */
5135             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5136             {
5137                 /* found a match that ends where "next" started */
5138                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5139                 if (rp->rs_no == BEHIND)
5140                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5141                                                                     &backpos);
5142                 else
5143                 {
5144                     /* But we didn't want a match.  Need to restore the
5145                      * subexpr, because what follows matched, so they have
5146                      * been set. */
5147                     status = RA_NOMATCH;
5148                     restore_subexpr(((regbehind_T *)rp) - 1);
5149                 }
5150                 regstack_pop(&scan);
5151                 regstack.ga_len -= sizeof(regbehind_T);
5152             }
5153             else
5154             {
5155                 /* No match or a match that doesn't end where we want it: Go
5156                  * back one character.  May go to previous line once. */
5157                 no = OK;
5158                 if (REG_MULTI)
5159                 {
5160                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5161                     {
5162                         if (rp->rs_un.regsave.rs_u.pos.lnum
5163                                         < behind_pos.rs_u.pos.lnum
5164                                 || reg_getline(
5165                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5166                                                                   == NULL)
5167                             no = FAIL;
5168                         else
5169                         {
5170                             reg_restore(&rp->rs_un.regsave, &backpos);
5171                             rp->rs_un.regsave.rs_u.pos.col =
5172                                                  (colnr_T)STRLEN(regline);
5173                         }
5174                     }
5175                     else
5176                         --rp->rs_un.regsave.rs_u.pos.col;
5177                 }
5178                 else
5179                 {
5180                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5181                         no = FAIL;
5182                     else
5183                         --rp->rs_un.regsave.rs_u.ptr;
5184                 }
5185                 if (no == OK)
5186                 {
5187                     /* Advanced, prepare for finding match again. */
5188                     reg_restore(&rp->rs_un.regsave, &backpos);
5189                     scan = OPERAND(rp->rs_scan);
5190                     if (status == RA_MATCH)
5191                     {
5192                         /* We did match, so subexpr may have been changed,
5193                          * need to restore them for the next try. */
5194                         status = RA_NOMATCH;
5195                         restore_subexpr(((regbehind_T *)rp) - 1);
5196                     }
5197                 }
5198                 else
5199                 {
5200                     /* Can't advance.  For NOBEHIND that's a match. */
5201                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5202                     if (rp->rs_no == NOBEHIND)
5203                     {
5204                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5205                                                                     &backpos);
5206                         status = RA_MATCH;
5207                     }
5208                     else
5209                     {
5210                         /* We do want a proper match.  Need to restore the
5211                          * subexpr if we had a match, because they may have
5212                          * been set. */
5213                         if (status == RA_MATCH)
5214                         {
5215                             status = RA_NOMATCH;
5216                             restore_subexpr(((regbehind_T *)rp) - 1);
5217                         }
5218                     }
5219                     regstack_pop(&scan);
5220                     regstack.ga_len -= sizeof(regbehind_T);
5221                 }
5222             }
5223             break;
5224
5225           case RS_STAR_LONG:
5226           case RS_STAR_SHORT:
5227             {
5228                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5229
5230                 if (status == RA_MATCH)
5231                 {
5232                     regstack_pop(&scan);
5233                     regstack.ga_len -= sizeof(regstar_T);
5234                     break;
5235                 }
5236
5237                 /* Tried once already, restore input pointers. */
5238                 if (status != RA_BREAK)
5239                     reg_restore(&rp->rs_un.regsave, &backpos);
5240
5241                 /* Repeat until we found a position where it could match. */
5242                 for (;;)
5243                 {
5244                     if (status != RA_BREAK)
5245                     {
5246                         /* Tried first position already, advance. */
5247                         if (rp->rs_state == RS_STAR_LONG)
5248                         {
5249                             /* Trying for longest match, but couldn't or
5250                              * didn't match -- back up one char. */
5251                             if (--rst->count < rst->minval)
5252                                 break;
5253                             if (reginput == regline)
5254                             {
5255                                 /* backup to last char of previous line */
5256                                 --reglnum;
5257                                 regline = reg_getline(reglnum);
5258                                 /* Just in case regrepeat() didn't count
5259                                  * right. */
5260                                 if (regline == NULL)
5261                                     break;
5262                                 reginput = regline + STRLEN(regline);
5263                                 fast_breakcheck();
5264                             }
5265                             else
5266                                 mb_ptr_back(regline, reginput);
5267                         }
5268                         else
5269                         {
5270                             /* Range is backwards, use shortest match first.
5271                              * Careful: maxval and minval are exchanged!
5272                              * Couldn't or didn't match: try advancing one
5273                              * char. */
5274                             if (rst->count == rst->minval
5275                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5276                                 break;
5277                             ++rst->count;
5278                         }
5279                         if (got_int)
5280                             break;
5281                     }
5282                     else
5283                         status = RA_NOMATCH;
5284
5285                     /* If it could match, try it. */
5286                     if (rst->nextb == NUL || *reginput == rst->nextb
5287                                              || *reginput == rst->nextb_ic)
5288                     {
5289                         reg_save(&rp->rs_un.regsave, &backpos);
5290                         scan = regnext(rp->rs_scan);
5291                         status = RA_CONT;
5292                         break;
5293                     }
5294                 }
5295                 if (status != RA_CONT)
5296                 {
5297                     /* Failed. */
5298                     regstack_pop(&scan);
5299                     regstack.ga_len -= sizeof(regstar_T);
5300                     status = RA_NOMATCH;
5301                 }
5302             }
5303             break;
5304         }
5305
5306         /* If we want to continue the inner loop or didn't pop a state
5307          * continue matching loop */
5308         if (status == RA_CONT || rp == (regitem_T *)
5309                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5310             break;
5311     }
5312
5313     /* May need to continue with the inner loop, starting at "scan". */
5314     if (status == RA_CONT)
5315         continue;
5316
5317     /*
5318      * If the regstack is empty or something failed we are done.
5319      */
5320     if (regstack.ga_len == 0 || status == RA_FAIL)
5321     {
5322         if (scan == NULL)
5323         {
5324             /*
5325              * We get here only if there's trouble -- normally "case END" is
5326              * the terminating point.
5327              */
5328             EMSG(_(e_re_corr));
5329 #ifdef DEBUG
5330             printf("Premature EOL\n");
5331 #endif
5332         }
5333         if (status == RA_FAIL)
5334             got_int = TRUE;
5335         return (status == RA_MATCH);
5336     }
5337
5338   } /* End of loop until the regstack is empty. */
5339
5340   /* NOTREACHED */
5341 }
5342
5343 /*
5344  * Push an item onto the regstack.
5345  * Returns pointer to new item.  Returns NULL when out of memory.
5346  */
5347     static regitem_T *
5348 regstack_push(state, scan)
5349     regstate_T  state;
5350     char_u      *scan;
5351 {
5352     regitem_T   *rp;
5353
5354     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5355     {
5356         EMSG(_(e_maxmempat));
5357         return NULL;
5358     }
5359     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5360         return NULL;
5361
5362     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5363     rp->rs_state = state;
5364     rp->rs_scan = scan;
5365
5366     regstack.ga_len += sizeof(regitem_T);
5367     return rp;
5368 }
5369
5370 /*
5371  * Pop an item from the regstack.
5372  */
5373     static void
5374 regstack_pop(scan)
5375     char_u      **scan;
5376 {
5377     regitem_T   *rp;
5378
5379     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5380     *scan = rp->rs_scan;
5381
5382     regstack.ga_len -= sizeof(regitem_T);
5383 }
5384
5385 /*
5386  * regrepeat - repeatedly match something simple, return how many.
5387  * Advances reginput (and reglnum) to just after the matched chars.
5388  */
5389     static int
5390 regrepeat(p, maxcount)
5391     char_u      *p;
5392     long        maxcount;   /* maximum number of matches allowed */
5393 {
5394     long        count = 0;
5395     char_u      *scan;
5396     char_u      *opnd;
5397     int         mask;
5398     int         testval = 0;
5399
5400     scan = reginput;        /* Make local copy of reginput for speed. */
5401     opnd = OPERAND(p);
5402     switch (OP(p))
5403     {
5404       case ANY:
5405       case ANY + ADD_NL:
5406         while (count < maxcount)
5407         {
5408             /* Matching anything means we continue until end-of-line (or
5409              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5410             while (*scan != NUL && count < maxcount)
5411             {
5412                 ++count;
5413                 mb_ptr_adv(scan);
5414             }
5415             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5416                                          || reg_line_lbr || count == maxcount)
5417                 break;
5418             ++count;            /* count the line-break */
5419             reg_nextline();
5420             scan = reginput;
5421             if (got_int)
5422                 break;
5423         }
5424         break;
5425
5426       case IDENT:
5427       case IDENT + ADD_NL:
5428         testval = TRUE;
5429         /*FALLTHROUGH*/
5430       case SIDENT:
5431       case SIDENT + ADD_NL:
5432         while (count < maxcount)
5433         {
5434             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5435             {
5436                 mb_ptr_adv(scan);
5437             }
5438             else if (*scan == NUL)
5439             {
5440                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5441                                                               || reg_line_lbr)
5442                     break;
5443                 reg_nextline();
5444                 scan = reginput;
5445                 if (got_int)
5446                     break;
5447             }
5448             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5449                 ++scan;
5450             else
5451                 break;
5452             ++count;
5453         }
5454         break;
5455
5456       case KWORD:
5457       case KWORD + ADD_NL:
5458         testval = TRUE;
5459         /*FALLTHROUGH*/
5460       case SKWORD:
5461       case SKWORD + ADD_NL:
5462         while (count < maxcount)
5463         {
5464             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5465             {
5466                 mb_ptr_adv(scan);
5467             }
5468             else if (*scan == NUL)
5469             {
5470                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5471                                                               || reg_line_lbr)
5472                     break;
5473                 reg_nextline();
5474                 scan = reginput;
5475                 if (got_int)
5476                     break;
5477             }
5478             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5479                 ++scan;
5480             else
5481                 break;
5482             ++count;
5483         }
5484         break;
5485
5486       case FNAME:
5487       case FNAME + ADD_NL:
5488         testval = TRUE;
5489         /*FALLTHROUGH*/
5490       case SFNAME:
5491       case SFNAME + ADD_NL:
5492         while (count < maxcount)
5493         {
5494             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5495             {
5496                 mb_ptr_adv(scan);
5497             }
5498             else if (*scan == NUL)
5499             {
5500                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5501                                                               || reg_line_lbr)
5502                     break;
5503                 reg_nextline();
5504                 scan = reginput;
5505                 if (got_int)
5506                     break;
5507             }
5508             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5509                 ++scan;
5510             else
5511                 break;
5512             ++count;
5513         }
5514         break;
5515
5516       case PRINT:
5517       case PRINT + ADD_NL:
5518         testval = TRUE;
5519         /*FALLTHROUGH*/
5520       case SPRINT:
5521       case SPRINT + ADD_NL:
5522         while (count < maxcount)
5523         {
5524             if (*scan == NUL)
5525             {
5526                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5527                                                               || reg_line_lbr)
5528                     break;
5529                 reg_nextline();
5530                 scan = reginput;
5531                 if (got_int)
5532                     break;
5533             }
5534             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5535             {
5536                 mb_ptr_adv(scan);
5537             }
5538             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5539                 ++scan;
5540             else
5541                 break;
5542             ++count;
5543         }
5544         break;
5545
5546       case WHITE:
5547       case WHITE + ADD_NL:
5548         testval = mask = RI_WHITE;
5549 do_class:
5550         while (count < maxcount)
5551         {
5552 #ifdef FEAT_MBYTE
5553             int         l;
5554 #endif
5555             if (*scan == NUL)
5556             {
5557                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5558                                                               || reg_line_lbr)
5559                     break;
5560                 reg_nextline();
5561                 scan = reginput;
5562                 if (got_int)
5563                     break;
5564             }
5565 #ifdef FEAT_MBYTE
5566             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5567             {
5568                 if (testval != 0)
5569                     break;
5570                 scan += l;
5571             }
5572 #endif
5573             else if ((class_tab[*scan] & mask) == testval)
5574                 ++scan;
5575             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5576                 ++scan;
5577             else
5578                 break;
5579             ++count;
5580         }
5581         break;
5582
5583       case NWHITE:
5584       case NWHITE + ADD_NL:
5585         mask = RI_WHITE;
5586         goto do_class;
5587       case DIGIT:
5588       case DIGIT + ADD_NL:
5589         testval = mask = RI_DIGIT;
5590         goto do_class;
5591       case NDIGIT:
5592       case NDIGIT + ADD_NL:
5593         mask = RI_DIGIT;
5594         goto do_class;
5595       case HEX:
5596       case HEX + ADD_NL:
5597         testval = mask = RI_HEX;
5598         goto do_class;
5599       case NHEX:
5600       case NHEX + ADD_NL:
5601         mask = RI_HEX;
5602         goto do_class;
5603       case OCTAL:
5604       case OCTAL + ADD_NL:
5605         testval = mask = RI_OCTAL;
5606         goto do_class;
5607       case NOCTAL:
5608       case NOCTAL + ADD_NL:
5609         mask = RI_OCTAL;
5610         goto do_class;
5611       case WORD:
5612       case WORD + ADD_NL:
5613         testval = mask = RI_WORD;
5614         goto do_class;
5615       case NWORD:
5616       case NWORD + ADD_NL:
5617         mask = RI_WORD;
5618         goto do_class;
5619       case HEAD:
5620       case HEAD + ADD_NL:
5621         testval = mask = RI_HEAD;
5622         goto do_class;
5623       case NHEAD:
5624       case NHEAD + ADD_NL:
5625         mask = RI_HEAD;
5626         goto do_class;
5627       case ALPHA:
5628       case ALPHA + ADD_NL:
5629         testval = mask = RI_ALPHA;
5630         goto do_class;
5631       case NALPHA:
5632       case NALPHA + ADD_NL:
5633         mask = RI_ALPHA;
5634         goto do_class;
5635       case LOWER:
5636       case LOWER + ADD_NL:
5637         testval = mask = RI_LOWER;
5638         goto do_class;
5639       case NLOWER:
5640       case NLOWER + ADD_NL:
5641         mask = RI_LOWER;
5642         goto do_class;
5643       case UPPER:
5644       case UPPER + ADD_NL:
5645         testval = mask = RI_UPPER;
5646         goto do_class;
5647       case NUPPER:
5648       case NUPPER + ADD_NL:
5649         mask = RI_UPPER;
5650         goto do_class;
5651
5652       case EXACTLY:
5653         {
5654             int     cu, cl;
5655
5656             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5657              * would have been used for it.  It does handle single-byte
5658              * characters, such as latin1. */
5659             if (ireg_ic)
5660             {
5661                 cu = MB_TOUPPER(*opnd);
5662                 cl = MB_TOLOWER(*opnd);
5663                 while (count < maxcount && (*scan == cu || *scan == cl))
5664                 {
5665                     count++;
5666                     scan++;
5667                 }
5668             }
5669             else
5670             {
5671                 cu = *opnd;
5672                 while (count < maxcount && *scan == cu)
5673                 {
5674                     count++;
5675                     scan++;
5676                 }
5677             }
5678             break;
5679         }
5680
5681 #ifdef FEAT_MBYTE
5682       case MULTIBYTECODE:
5683         {
5684             int         i, len, cf = 0;
5685
5686             /* Safety check (just in case 'encoding' was changed since
5687              * compiling the program). */
5688             if ((len = (*mb_ptr2len)(opnd)) > 1)
5689             {
5690                 if (ireg_ic && enc_utf8)
5691                     cf = utf_fold(utf_ptr2char(opnd));
5692                 while (count < maxcount)
5693                 {
5694                     for (i = 0; i < len; ++i)
5695                         if (opnd[i] != scan[i])
5696                             break;
5697                     if (i < len && (!ireg_ic || !enc_utf8
5698                                         || utf_fold(utf_ptr2char(scan)) != cf))
5699                         break;
5700                     scan += len;
5701                     ++count;
5702                 }
5703             }
5704         }
5705         break;
5706 #endif
5707
5708       case ANYOF:
5709       case ANYOF + ADD_NL:
5710         testval = TRUE;
5711         /*FALLTHROUGH*/
5712
5713       case ANYBUT:
5714       case ANYBUT + ADD_NL:
5715         while (count < maxcount)
5716         {
5717 #ifdef FEAT_MBYTE
5718             int len;
5719 #endif
5720             if (*scan == NUL)
5721             {
5722                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5723                                                               || reg_line_lbr)
5724                     break;
5725                 reg_nextline();
5726                 scan = reginput;
5727                 if (got_int)
5728                     break;
5729             }
5730             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5731                 ++scan;
5732 #ifdef FEAT_MBYTE
5733             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5734             {
5735                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5736                     break;
5737                 scan += len;
5738             }
5739 #endif
5740             else
5741             {
5742                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5743                     break;
5744                 ++scan;
5745             }
5746             ++count;
5747         }
5748         break;
5749
5750       case NEWL:
5751         while (count < maxcount
5752                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5753                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5754         {
5755             count++;
5756             if (reg_line_lbr)
5757                 ADVANCE_REGINPUT();
5758             else
5759                 reg_nextline();
5760             scan = reginput;
5761             if (got_int)
5762                 break;
5763         }
5764         break;
5765
5766       default:                  /* Oh dear.  Called inappropriately. */
5767         EMSG(_(e_re_corr));
5768 #ifdef DEBUG
5769         printf("Called regrepeat with op code %d\n", OP(p));
5770 #endif
5771         break;
5772     }
5773
5774     reginput = scan;
5775
5776     return (int)count;
5777 }
5778
5779 /*
5780  * regnext - dig the "next" pointer out of a node
5781  * Returns NULL when calculating size, when there is no next item and when
5782  * there is an error.
5783  */
5784     static char_u *
5785 regnext(p)
5786     char_u  *p;
5787 {
5788     int     offset;
5789
5790     if (p == JUST_CALC_SIZE || reg_toolong)
5791         return NULL;
5792
5793     offset = NEXT(p);
5794     if (offset == 0)
5795         return NULL;
5796
5797     if (OP(p) == BACK)
5798         return p - offset;
5799     else
5800         return p + offset;
5801 }
5802
5803 /*
5804  * Check the regexp program for its magic number.
5805  * Return TRUE if it's wrong.
5806  */
5807     static int
5808 prog_magic_wrong()
5809 {
5810     if (UCHARAT(REG_MULTI
5811                 ? reg_mmatch->regprog->program
5812                 : reg_match->regprog->program) != REGMAGIC)
5813     {
5814         EMSG(_(e_re_corr));
5815         return TRUE;
5816     }
5817     return FALSE;
5818 }
5819
5820 /*
5821  * Cleanup the subexpressions, if this wasn't done yet.
5822  * This construction is used to clear the subexpressions only when they are
5823  * used (to increase speed).
5824  */
5825     static void
5826 cleanup_subexpr()
5827 {
5828     if (need_clear_subexpr)
5829     {
5830         if (REG_MULTI)
5831         {
5832             /* Use 0xff to set lnum to -1 */
5833             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5834             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5835         }
5836         else
5837         {
5838             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5839             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5840         }
5841         need_clear_subexpr = FALSE;
5842     }
5843 }
5844
5845 #ifdef FEAT_SYN_HL
5846     static void
5847 cleanup_zsubexpr()
5848 {
5849     if (need_clear_zsubexpr)
5850     {
5851         if (REG_MULTI)
5852         {
5853             /* Use 0xff to set lnum to -1 */
5854             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5855             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5856         }
5857         else
5858         {
5859             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5860             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5861         }
5862         need_clear_zsubexpr = FALSE;
5863     }
5864 }
5865 #endif
5866
5867 /*
5868  * Save the current subexpr to "bp", so that they can be restored
5869  * later by restore_subexpr().
5870  */
5871     static void
5872 save_subexpr(bp)
5873     regbehind_T *bp;
5874 {
5875     int i;
5876
5877     /* When "need_clear_subexpr" is set we don't need to save the values, only
5878      * remember that this flag needs to be set again when restoring. */
5879     bp->save_need_clear_subexpr = need_clear_subexpr;
5880     if (!need_clear_subexpr)
5881     {
5882         for (i = 0; i < NSUBEXP; ++i)
5883         {
5884             if (REG_MULTI)
5885             {
5886                 bp->save_start[i].se_u.pos = reg_startpos[i];
5887                 bp->save_end[i].se_u.pos = reg_endpos[i];
5888             }
5889             else
5890             {
5891                 bp->save_start[i].se_u.ptr = reg_startp[i];
5892                 bp->save_end[i].se_u.ptr = reg_endp[i];
5893             }
5894         }
5895     }
5896 }
5897
5898 /*
5899  * Restore the subexpr from "bp".
5900  */
5901     static void
5902 restore_subexpr(bp)
5903     regbehind_T *bp;
5904 {
5905     int i;
5906
5907     /* Only need to restore saved values when they are not to be cleared. */
5908     need_clear_subexpr = bp->save_need_clear_subexpr;
5909     if (!need_clear_subexpr)
5910     {
5911         for (i = 0; i < NSUBEXP; ++i)
5912         {
5913             if (REG_MULTI)
5914             {
5915                 reg_startpos[i] = bp->save_start[i].se_u.pos;
5916                 reg_endpos[i] = bp->save_end[i].se_u.pos;
5917             }
5918             else
5919             {
5920                 reg_startp[i] = bp->save_start[i].se_u.ptr;
5921                 reg_endp[i] = bp->save_end[i].se_u.ptr;
5922             }
5923         }
5924     }
5925 }
5926
5927 /*
5928  * Advance reglnum, regline and reginput to the next line.
5929  */
5930     static void
5931 reg_nextline()
5932 {
5933     regline = reg_getline(++reglnum);
5934     reginput = regline;
5935     fast_breakcheck();
5936 }
5937
5938 /*
5939  * Save the input line and position in a regsave_T.
5940  */
5941     static void
5942 reg_save(save, gap)
5943     regsave_T   *save;
5944     garray_T    *gap;
5945 {
5946     if (REG_MULTI)
5947     {
5948         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5949         save->rs_u.pos.lnum = reglnum;
5950     }
5951     else
5952         save->rs_u.ptr = reginput;
5953     save->rs_len = gap->ga_len;
5954 }
5955
5956 /*
5957  * Restore the input line and position from a regsave_T.
5958  */
5959     static void
5960 reg_restore(save, gap)
5961     regsave_T   *save;
5962     garray_T    *gap;
5963 {
5964     if (REG_MULTI)
5965     {
5966         if (reglnum != save->rs_u.pos.lnum)
5967         {
5968             /* only call reg_getline() when the line number changed to save
5969              * a bit of time */
5970             reglnum = save->rs_u.pos.lnum;
5971             regline = reg_getline(reglnum);
5972         }
5973         reginput = regline + save->rs_u.pos.col;
5974     }
5975     else
5976         reginput = save->rs_u.ptr;
5977     gap->ga_len = save->rs_len;
5978 }
5979
5980 /*
5981  * Return TRUE if current position is equal to saved position.
5982  */
5983     static int
5984 reg_save_equal(save)
5985     regsave_T   *save;
5986 {
5987     if (REG_MULTI)
5988         return reglnum == save->rs_u.pos.lnum
5989                                   && reginput == regline + save->rs_u.pos.col;
5990     return reginput == save->rs_u.ptr;
5991 }
5992
5993 /*
5994  * Tentatively set the sub-expression start to the current position (after
5995  * calling regmatch() they will have changed).  Need to save the existing
5996  * values for when there is no match.
5997  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5998  * depending on REG_MULTI.
5999  */
6000     static void
6001 save_se_multi(savep, posp)
6002     save_se_T   *savep;
6003     lpos_T      *posp;
6004 {
6005     savep->se_u.pos = *posp;
6006     posp->lnum = reglnum;
6007     posp->col = (colnr_T)(reginput - regline);
6008 }
6009
6010     static void
6011 save_se_one(savep, pp)
6012     save_se_T   *savep;
6013     char_u      **pp;
6014 {
6015     savep->se_u.ptr = *pp;
6016     *pp = reginput;
6017 }
6018
6019 /*
6020  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6021  */
6022     static int
6023 re_num_cmp(val, scan)
6024     long_u      val;
6025     char_u      *scan;
6026 {
6027     long_u  n = OPERAND_MIN(scan);
6028
6029     if (OPERAND_CMP(scan) == '>')
6030         return val > n;
6031     if (OPERAND_CMP(scan) == '<')
6032         return val < n;
6033     return val == n;
6034 }
6035
6036
6037 #ifdef DEBUG
6038
6039 /*
6040  * regdump - dump a regexp onto stdout in vaguely comprehensible form
6041  */
6042     static void
6043 regdump(pattern, r)
6044     char_u      *pattern;
6045     regprog_T   *r;
6046 {
6047     char_u  *s;
6048     int     op = EXACTLY;       /* Arbitrary non-END op. */
6049     char_u  *next;
6050     char_u  *end = NULL;
6051
6052     printf("\r\nregcomp(%s):\r\n", pattern);
6053
6054     s = r->program + 1;
6055     /*
6056      * Loop until we find the END that isn't before a referred next (an END
6057      * can also appear in a NOMATCH operand).
6058      */
6059     while (op != END || s <= end)
6060     {
6061         op = OP(s);
6062         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
6063         next = regnext(s);
6064         if (next == NULL)       /* Next ptr. */
6065             printf("(0)");
6066         else
6067             printf("(%d)", (int)((s - r->program) + (next - s)));
6068         if (end < next)
6069             end = next;
6070         if (op == BRACE_LIMITS)
6071         {
6072             /* Two short ints */
6073             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
6074             s += 8;
6075         }
6076         s += 3;
6077         if (op == ANYOF || op == ANYOF + ADD_NL
6078                 || op == ANYBUT || op == ANYBUT + ADD_NL
6079                 || op == EXACTLY)
6080         {
6081             /* Literal string, where present. */
6082             while (*s != NUL)
6083                 printf("%c", *s++);
6084             s++;
6085         }
6086         printf("\r\n");
6087     }
6088
6089     /* Header fields of interest. */
6090     if (r->regstart != NUL)
6091         printf("start `%s' 0x%x; ", r->regstart < 256
6092                 ? (char *)transchar(r->regstart)
6093                 : "multibyte", r->regstart);
6094     if (r->reganch)
6095         printf("anchored; ");
6096     if (r->regmust != NULL)
6097         printf("must have \"%s\"", r->regmust);
6098     printf("\r\n");
6099 }
6100
6101 /*
6102  * regprop - printable representation of opcode
6103  */
6104     static char_u *
6105 regprop(op)
6106     char_u         *op;
6107 {
6108     char_u          *p;
6109     static char_u   buf[50];
6110
6111     (void) strcpy(buf, ":");
6112
6113     switch (OP(op))
6114     {
6115       case BOL:
6116         p = "BOL";
6117         break;
6118       case EOL:
6119         p = "EOL";
6120         break;
6121       case RE_BOF:
6122         p = "BOF";
6123         break;
6124       case RE_EOF:
6125         p = "EOF";
6126         break;
6127       case CURSOR:
6128         p = "CURSOR";
6129         break;
6130       case RE_VISUAL:
6131         p = "RE_VISUAL";
6132         break;
6133       case RE_LNUM:
6134         p = "RE_LNUM";
6135         break;
6136       case RE_MARK:
6137         p = "RE_MARK";
6138         break;
6139       case RE_COL:
6140         p = "RE_COL";
6141         break;
6142       case RE_VCOL:
6143         p = "RE_VCOL";
6144         break;
6145       case BOW:
6146         p = "BOW";
6147         break;
6148       case EOW:
6149         p = "EOW";
6150         break;
6151       case ANY:
6152         p = "ANY";
6153         break;
6154       case ANY + ADD_NL:
6155         p = "ANY+NL";
6156         break;
6157       case ANYOF:
6158         p = "ANYOF";
6159         break;
6160       case ANYOF + ADD_NL:
6161         p = "ANYOF+NL";
6162         break;
6163       case ANYBUT:
6164         p = "ANYBUT";
6165         break;
6166       case ANYBUT + ADD_NL:
6167         p = "ANYBUT+NL";
6168         break;
6169       case IDENT:
6170         p = "IDENT";
6171         break;
6172       case IDENT + ADD_NL:
6173         p = "IDENT+NL";
6174         break;
6175       case SIDENT:
6176         p = "SIDENT";
6177         break;
6178       case SIDENT + ADD_NL:
6179         p = "SIDENT+NL";
6180         break;
6181       case KWORD:
6182         p = "KWORD";
6183         break;
6184       case KWORD + ADD_NL:
6185         p = "KWORD+NL";
6186         break;
6187       case SKWORD:
6188         p = "SKWORD";
6189         break;
6190       case SKWORD + ADD_NL:
6191         p = "SKWORD+NL";
6192         break;
6193       case FNAME:
6194         p = "FNAME";
6195         break;
6196       case FNAME + ADD_NL:
6197         p = "FNAME+NL";
6198         break;
6199       case SFNAME:
6200         p = "SFNAME";
6201         break;
6202       case SFNAME + ADD_NL:
6203         p = "SFNAME+NL";
6204         break;
6205       case PRINT:
6206         p = "PRINT";
6207         break;
6208       case PRINT + ADD_NL:
6209         p = "PRINT+NL";
6210         break;
6211       case SPRINT:
6212         p = "SPRINT";
6213         break;
6214       case SPRINT + ADD_NL:
6215         p = "SPRINT+NL";
6216         break;
6217       case WHITE:
6218         p = "WHITE";
6219         break;
6220       case WHITE + ADD_NL:
6221         p = "WHITE+NL";
6222         break;
6223       case NWHITE:
6224         p = "NWHITE";
6225         break;
6226       case NWHITE + ADD_NL:
6227         p = "NWHITE+NL";
6228         break;
6229       case DIGIT:
6230         p = "DIGIT";
6231         break;
6232       case DIGIT + ADD_NL:
6233         p = "DIGIT+NL";
6234         break;
6235       case NDIGIT:
6236         p = "NDIGIT";
6237         break;
6238       case NDIGIT + ADD_NL:
6239         p = "NDIGIT+NL";
6240         break;
6241       case HEX:
6242         p = "HEX";
6243         break;
6244       case HEX + ADD_NL:
6245         p = "HEX+NL";
6246         break;
6247       case NHEX:
6248         p = "NHEX";
6249         break;
6250       case NHEX + ADD_NL:
6251         p = "NHEX+NL";
6252         break;
6253       case OCTAL:
6254         p = "OCTAL";
6255         break;
6256       case OCTAL + ADD_NL:
6257         p = "OCTAL+NL";
6258         break;
6259       case NOCTAL:
6260         p = "NOCTAL";
6261         break;
6262       case NOCTAL + ADD_NL:
6263         p = "NOCTAL+NL";
6264         break;
6265       case WORD:
6266         p = "WORD";
6267         break;
6268       case WORD + ADD_NL:
6269         p = "WORD+NL";
6270         break;
6271       case NWORD:
6272         p = "NWORD";
6273         break;
6274       case NWORD + ADD_NL:
6275         p = "NWORD+NL";
6276         break;
6277       case HEAD:
6278         p = "HEAD";
6279         break;
6280       case HEAD + ADD_NL:
6281         p = "HEAD+NL";
6282         break;
6283       case NHEAD:
6284         p = "NHEAD";
6285         break;
6286       case NHEAD + ADD_NL:
6287         p = "NHEAD+NL";
6288         break;
6289       case ALPHA:
6290         p = "ALPHA";
6291         break;
6292       case ALPHA + ADD_NL:
6293         p = "ALPHA+NL";
6294         break;
6295       case NALPHA:
6296         p = "NALPHA";
6297         break;
6298       case NALPHA + ADD_NL:
6299         p = "NALPHA+NL";
6300         break;
6301       case LOWER:
6302         p = "LOWER";
6303         break;
6304       case LOWER + ADD_NL:
6305         p = "LOWER+NL";
6306         break;
6307       case NLOWER:
6308         p = "NLOWER";
6309         break;
6310       case NLOWER + ADD_NL:
6311         p = "NLOWER+NL";
6312         break;
6313       case UPPER:
6314         p = "UPPER";
6315         break;
6316       case UPPER + ADD_NL:
6317         p = "UPPER+NL";
6318         break;
6319       case NUPPER:
6320         p = "NUPPER";
6321         break;
6322       case NUPPER + ADD_NL:
6323         p = "NUPPER+NL";
6324         break;
6325       case BRANCH:
6326         p = "BRANCH";
6327         break;
6328       case EXACTLY:
6329         p = "EXACTLY";
6330         break;
6331       case NOTHING:
6332         p = "NOTHING";
6333         break;
6334       case BACK:
6335         p = "BACK";
6336         break;
6337       case END:
6338         p = "END";
6339         break;
6340       case MOPEN + 0:
6341         p = "MATCH START";
6342         break;
6343       case MOPEN + 1:
6344       case MOPEN + 2:
6345       case MOPEN + 3:
6346       case MOPEN + 4:
6347       case MOPEN + 5:
6348       case MOPEN + 6:
6349       case MOPEN + 7:
6350       case MOPEN + 8:
6351       case MOPEN + 9:
6352         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6353         p = NULL;
6354         break;
6355       case MCLOSE + 0:
6356         p = "MATCH END";
6357         break;
6358       case MCLOSE + 1:
6359       case MCLOSE + 2:
6360       case MCLOSE + 3:
6361       case MCLOSE + 4:
6362       case MCLOSE + 5:
6363       case MCLOSE + 6:
6364       case MCLOSE + 7:
6365       case MCLOSE + 8:
6366       case MCLOSE + 9:
6367         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6368         p = NULL;
6369         break;
6370       case BACKREF + 1:
6371       case BACKREF + 2:
6372       case BACKREF + 3:
6373       case BACKREF + 4:
6374       case BACKREF + 5:
6375       case BACKREF + 6:
6376       case BACKREF + 7:
6377       case BACKREF + 8:
6378       case BACKREF + 9:
6379         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6380         p = NULL;
6381         break;
6382       case NOPEN:
6383         p = "NOPEN";
6384         break;
6385       case NCLOSE:
6386         p = "NCLOSE";
6387         break;
6388 #ifdef FEAT_SYN_HL
6389       case ZOPEN + 1:
6390       case ZOPEN + 2:
6391       case ZOPEN + 3:
6392       case ZOPEN + 4:
6393       case ZOPEN + 5:
6394       case ZOPEN + 6:
6395       case ZOPEN + 7:
6396       case ZOPEN + 8:
6397       case ZOPEN + 9:
6398         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6399         p = NULL;
6400         break;
6401       case ZCLOSE + 1:
6402       case ZCLOSE + 2:
6403       case ZCLOSE + 3:
6404       case ZCLOSE + 4:
6405       case ZCLOSE + 5:
6406       case ZCLOSE + 6:
6407       case ZCLOSE + 7:
6408       case ZCLOSE + 8:
6409       case ZCLOSE + 9:
6410         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6411         p = NULL;
6412         break;
6413       case ZREF + 1:
6414       case ZREF + 2:
6415       case ZREF + 3:
6416       case ZREF + 4:
6417       case ZREF + 5:
6418       case ZREF + 6:
6419       case ZREF + 7:
6420       case ZREF + 8:
6421       case ZREF + 9:
6422         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6423         p = NULL;
6424         break;
6425 #endif
6426       case STAR:
6427         p = "STAR";
6428         break;
6429       case PLUS:
6430         p = "PLUS";
6431         break;
6432       case NOMATCH:
6433         p = "NOMATCH";
6434         break;
6435       case MATCH:
6436         p = "MATCH";
6437         break;
6438       case BEHIND:
6439         p = "BEHIND";
6440         break;
6441       case NOBEHIND:
6442         p = "NOBEHIND";
6443         break;
6444       case SUBPAT:
6445         p = "SUBPAT";
6446         break;
6447       case BRACE_LIMITS:
6448         p = "BRACE_LIMITS";
6449         break;
6450       case BRACE_SIMPLE:
6451         p = "BRACE_SIMPLE";
6452         break;
6453       case BRACE_COMPLEX + 0:
6454       case BRACE_COMPLEX + 1:
6455       case BRACE_COMPLEX + 2:
6456       case BRACE_COMPLEX + 3:
6457       case BRACE_COMPLEX + 4:
6458       case BRACE_COMPLEX + 5:
6459       case BRACE_COMPLEX + 6:
6460       case BRACE_COMPLEX + 7:
6461       case BRACE_COMPLEX + 8:
6462       case BRACE_COMPLEX + 9:
6463         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6464         p = NULL;
6465         break;
6466 #ifdef FEAT_MBYTE
6467       case MULTIBYTECODE:
6468         p = "MULTIBYTECODE";
6469         break;
6470 #endif
6471       case NEWL:
6472         p = "NEWL";
6473         break;
6474       default:
6475         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6476         p = NULL;
6477         break;
6478     }
6479     if (p != NULL)
6480         (void) strcat(buf, p);
6481     return buf;
6482 }
6483 #endif
6484
6485 #ifdef FEAT_MBYTE
6486 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6487
6488 typedef struct
6489 {
6490     int a, b, c;
6491 } decomp_T;
6492
6493
6494 /* 0xfb20 - 0xfb4f */
6495 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6496 {
6497     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6498     {0x5d0,0,0},                /* 0xfb21       alt alef */
6499     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6500     {0x5d4,0,0},                /* 0xfb23       alt he */
6501     {0x5db,0,0},                /* 0xfb24       alt kaf */
6502     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6503     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6504     {0x5e8,0,0},                /* 0xfb27       alt resh */
6505     {0x5ea,0,0},                /* 0xfb28       alt tav */
6506     {'+', 0, 0},                /* 0xfb29       alt plus */
6507     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6508     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6509     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6510     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6511     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6512     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6513     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6514     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6515     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6516     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6517     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6518     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6519     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6520     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6521     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6522     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6523     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6524     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6525     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6526     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6527     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6528     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6529     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6530     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6531     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6532     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6533     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6534     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6535     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6536     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6537     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6538     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6539     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6540     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6541     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6542     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6543     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6544     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6545 };
6546
6547     static void
6548 mb_decompose(c, c1, c2, c3)
6549     int c, *c1, *c2, *c3;
6550 {
6551     decomp_T d;
6552
6553     if (c >= 0x4b20 && c <= 0xfb4f)
6554     {
6555         d = decomp_table[c - 0xfb20];
6556         *c1 = d.a;
6557         *c2 = d.b;
6558         *c3 = d.c;
6559     }
6560     else
6561     {
6562         *c1 = c;
6563         *c2 = *c3 = 0;
6564     }
6565 }
6566 #endif
6567
6568 /*
6569  * Compare two strings, ignore case if ireg_ic set.
6570  * Return 0 if strings match, non-zero otherwise.
6571  * Correct the length "*n" when composing characters are ignored.
6572  */
6573     static int
6574 cstrncmp(s1, s2, n)
6575     char_u      *s1, *s2;
6576     int         *n;
6577 {
6578     int         result;
6579
6580     if (!ireg_ic)
6581         result = STRNCMP(s1, s2, *n);
6582     else
6583         result = MB_STRNICMP(s1, s2, *n);
6584
6585 #ifdef FEAT_MBYTE
6586     /* if it failed and it's utf8 and we want to combineignore: */
6587     if (result != 0 && enc_utf8 && ireg_icombine)
6588     {
6589         char_u  *str1, *str2;
6590         int     c1, c2, c11, c12;
6591         int     junk;
6592
6593         /* we have to handle the strcmp ourselves, since it is necessary to
6594          * deal with the composing characters by ignoring them: */
6595         str1 = s1;
6596         str2 = s2;
6597         c1 = c2 = 0;
6598         while ((int)(str1 - s1) < *n)
6599         {
6600             c1 = mb_ptr2char_adv(&str1);
6601             c2 = mb_ptr2char_adv(&str2);
6602
6603             /* decompose the character if necessary, into 'base' characters
6604              * because I don't care about Arabic, I will hard-code the Hebrew
6605              * which I *do* care about!  So sue me... */
6606             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6607             {
6608                 /* decomposition necessary? */
6609                 mb_decompose(c1, &c11, &junk, &junk);
6610                 mb_decompose(c2, &c12, &junk, &junk);
6611                 c1 = c11;
6612                 c2 = c12;
6613                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6614                     break;
6615             }
6616         }
6617         result = c2 - c1;
6618         if (result == 0)
6619             *n = (int)(str2 - s2);
6620     }
6621 #endif
6622
6623     return result;
6624 }
6625
6626 /*
6627  * cstrchr: This function is used a lot for simple searches, keep it fast!
6628  */
6629     static char_u *
6630 cstrchr(s, c)
6631     char_u      *s;
6632     int         c;
6633 {
6634     char_u      *p;
6635     int         cc;
6636
6637     if (!ireg_ic
6638 #ifdef FEAT_MBYTE
6639             || (!enc_utf8 && mb_char2len(c) > 1)
6640 #endif
6641             )
6642         return vim_strchr(s, c);
6643
6644     /* tolower() and toupper() can be slow, comparing twice should be a lot
6645      * faster (esp. when using MS Visual C++!).
6646      * For UTF-8 need to use folded case. */
6647 #ifdef FEAT_MBYTE
6648     if (enc_utf8 && c > 0x80)
6649         cc = utf_fold(c);
6650     else
6651 #endif
6652          if (MB_ISUPPER(c))
6653         cc = MB_TOLOWER(c);
6654     else if (MB_ISLOWER(c))
6655         cc = MB_TOUPPER(c);
6656     else
6657         return vim_strchr(s, c);
6658
6659 #ifdef FEAT_MBYTE
6660     if (has_mbyte)
6661     {
6662         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6663         {
6664             if (enc_utf8 && c > 0x80)
6665             {
6666                 if (utf_fold(utf_ptr2char(p)) == cc)
6667                     return p;
6668             }
6669             else if (*p == c || *p == cc)
6670                 return p;
6671         }
6672     }
6673     else
6674 #endif
6675         /* Faster version for when there are no multi-byte characters. */
6676         for (p = s; *p != NUL; ++p)
6677             if (*p == c || *p == cc)
6678                 return p;
6679
6680     return NULL;
6681 }
6682
6683 /***************************************************************
6684  *                    regsub stuff                             *
6685  ***************************************************************/
6686
6687 /* This stuff below really confuses cc on an SGI -- webb */
6688 #ifdef __sgi
6689 # undef __ARGS
6690 # define __ARGS(x)  ()
6691 #endif
6692
6693 /*
6694  * We should define ftpr as a pointer to a function returning a pointer to
6695  * a function returning a pointer to a function ...
6696  * This is impossible, so we declare a pointer to a function returning a
6697  * pointer to a function returning void. This should work for all compilers.
6698  */
6699 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6700
6701 static fptr_T do_upper __ARGS((int *, int));
6702 static fptr_T do_Upper __ARGS((int *, int));
6703 static fptr_T do_lower __ARGS((int *, int));
6704 static fptr_T do_Lower __ARGS((int *, int));
6705
6706 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6707
6708     static fptr_T
6709 do_upper(d, c)
6710     int         *d;
6711     int         c;
6712 {
6713     *d = MB_TOUPPER(c);
6714
6715     return (fptr_T)NULL;
6716 }
6717
6718     static fptr_T
6719 do_Upper(d, c)
6720     int         *d;
6721     int         c;
6722 {
6723     *d = MB_TOUPPER(c);
6724
6725     return (fptr_T)do_Upper;
6726 }
6727
6728     static fptr_T
6729 do_lower(d, c)
6730     int         *d;
6731     int         c;
6732 {
6733     *d = MB_TOLOWER(c);
6734
6735     return (fptr_T)NULL;
6736 }
6737
6738     static fptr_T
6739 do_Lower(d, c)
6740     int         *d;
6741     int         c;
6742 {
6743     *d = MB_TOLOWER(c);
6744
6745     return (fptr_T)do_Lower;
6746 }
6747
6748 /*
6749  * regtilde(): Replace tildes in the pattern by the old pattern.
6750  *
6751  * Short explanation of the tilde: It stands for the previous replacement
6752  * pattern.  If that previous pattern also contains a ~ we should go back a
6753  * step further...  But we insert the previous pattern into the current one
6754  * and remember that.
6755  * This still does not handle the case where "magic" changes.  So require the
6756  * user to keep his hands off of "magic".
6757  *
6758  * The tildes are parsed once before the first call to vim_regsub().
6759  */
6760     char_u *
6761 regtilde(source, magic)
6762     char_u      *source;
6763     int         magic;
6764 {
6765     char_u      *newsub = source;
6766     char_u      *tmpsub;
6767     char_u      *p;
6768     int         len;
6769     int         prevlen;
6770
6771     for (p = newsub; *p; ++p)
6772     {
6773         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6774         {
6775             if (reg_prev_sub != NULL)
6776             {
6777                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6778                 prevlen = (int)STRLEN(reg_prev_sub);
6779                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6780                 if (tmpsub != NULL)
6781                 {
6782                     /* copy prefix */
6783                     len = (int)(p - newsub);    /* not including ~ */
6784                     mch_memmove(tmpsub, newsub, (size_t)len);
6785                     /* interpret tilde */
6786                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6787                     /* copy postfix */
6788                     if (!magic)
6789                         ++p;                    /* back off \ */
6790                     STRCPY(tmpsub + len + prevlen, p + 1);
6791
6792                     if (newsub != source)       /* already allocated newsub */
6793                         vim_free(newsub);
6794                     newsub = tmpsub;
6795                     p = newsub + len + prevlen;
6796                 }
6797             }
6798             else if (magic)
6799                 STRMOVE(p, p + 1);      /* remove '~' */
6800             else
6801                 STRMOVE(p, p + 2);      /* remove '\~' */
6802             --p;
6803         }
6804         else
6805         {
6806             if (*p == '\\' && p[1])             /* skip escaped characters */
6807                 ++p;
6808 #ifdef FEAT_MBYTE
6809             if (has_mbyte)
6810                 p += (*mb_ptr2len)(p) - 1;
6811 #endif
6812         }
6813     }
6814
6815     vim_free(reg_prev_sub);
6816     if (newsub != source)       /* newsub was allocated, just keep it */
6817         reg_prev_sub = newsub;
6818     else                        /* no ~ found, need to save newsub  */
6819         reg_prev_sub = vim_strsave(newsub);
6820     return newsub;
6821 }
6822
6823 #ifdef FEAT_EVAL
6824 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6825
6826 /* These pointers are used instead of reg_match and reg_mmatch for
6827  * reg_submatch().  Needed for when the substitution string is an expression
6828  * that contains a call to substitute() and submatch(). */
6829 static regmatch_T       *submatch_match;
6830 static regmmatch_T      *submatch_mmatch;
6831 static linenr_T         submatch_firstlnum;
6832 static linenr_T         submatch_maxline;
6833 #endif
6834
6835 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6836 /*
6837  * vim_regsub() - perform substitutions after a vim_regexec() or
6838  * vim_regexec_multi() match.
6839  *
6840  * If "copy" is TRUE really copy into "dest".
6841  * If "copy" is FALSE nothing is copied, this is just to find out the length
6842  * of the result.
6843  *
6844  * If "backslash" is TRUE, a backslash will be removed later, need to double
6845  * them to keep them, and insert a backslash before a CR to avoid it being
6846  * replaced with a line break later.
6847  *
6848  * Note: The matched text must not change between the call of
6849  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6850  * references invalid!
6851  *
6852  * Returns the size of the replacement, including terminating NUL.
6853  */
6854     int
6855 vim_regsub(rmp, source, dest, copy, magic, backslash)
6856     regmatch_T  *rmp;
6857     char_u      *source;
6858     char_u      *dest;
6859     int         copy;
6860     int         magic;
6861     int         backslash;
6862 {
6863     reg_match = rmp;
6864     reg_mmatch = NULL;
6865     reg_maxline = 0;
6866     return vim_regsub_both(source, dest, copy, magic, backslash);
6867 }
6868 #endif
6869
6870     int
6871 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6872     regmmatch_T *rmp;
6873     linenr_T    lnum;
6874     char_u      *source;
6875     char_u      *dest;
6876     int         copy;
6877     int         magic;
6878     int         backslash;
6879 {
6880     reg_match = NULL;
6881     reg_mmatch = rmp;
6882     reg_buf = curbuf;           /* always works on the current buffer! */
6883     reg_firstlnum = lnum;
6884     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6885     return vim_regsub_both(source, dest, copy, magic, backslash);
6886 }
6887
6888     static int
6889 vim_regsub_both(source, dest, copy, magic, backslash)
6890     char_u      *source;
6891     char_u      *dest;
6892     int         copy;
6893     int         magic;
6894     int         backslash;
6895 {
6896     char_u      *src;
6897     char_u      *dst;
6898     char_u      *s;
6899     int         c;
6900     int         cc;
6901     int         no = -1;
6902     fptr_T      func = (fptr_T)NULL;
6903     linenr_T    clnum = 0;      /* init for GCC */
6904     int         len = 0;        /* init for GCC */
6905 #ifdef FEAT_EVAL
6906     static char_u *eval_result = NULL;
6907 #endif
6908
6909     /* Be paranoid... */
6910     if (source == NULL || dest == NULL)
6911     {
6912         EMSG(_(e_null));
6913         return 0;
6914     }
6915     if (prog_magic_wrong())
6916         return 0;
6917     src = source;
6918     dst = dest;
6919
6920     /*
6921      * When the substitute part starts with "\=" evaluate it as an expression.
6922      */
6923     if (source[0] == '\\' && source[1] == '='
6924 #ifdef FEAT_EVAL
6925             && !can_f_submatch      /* can't do this recursively */
6926 #endif
6927             )
6928     {
6929 #ifdef FEAT_EVAL
6930         /* To make sure that the length doesn't change between checking the
6931          * length and copying the string, and to speed up things, the
6932          * resulting string is saved from the call with "copy" == FALSE to the
6933          * call with "copy" == TRUE. */
6934         if (copy)
6935         {
6936             if (eval_result != NULL)
6937             {
6938                 STRCPY(dest, eval_result);
6939                 dst += STRLEN(eval_result);
6940                 vim_free(eval_result);
6941                 eval_result = NULL;
6942             }
6943         }
6944         else
6945         {
6946             win_T       *save_reg_win;
6947             int         save_ireg_ic;
6948
6949             vim_free(eval_result);
6950
6951             /* The expression may contain substitute(), which calls us
6952              * recursively.  Make sure submatch() gets the text from the first
6953              * level.  Don't need to save "reg_buf", because
6954              * vim_regexec_multi() can't be called recursively. */
6955             submatch_match = reg_match;
6956             submatch_mmatch = reg_mmatch;
6957             submatch_firstlnum = reg_firstlnum;
6958             submatch_maxline = reg_maxline;
6959             save_reg_win = reg_win;
6960             save_ireg_ic = ireg_ic;
6961             can_f_submatch = TRUE;
6962
6963             eval_result = eval_to_string(source + 2, NULL, TRUE);
6964             if (eval_result != NULL)
6965             {
6966                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6967                 {
6968                     /* Change NL to CR, so that it becomes a line break.
6969                      * Skip over a backslashed character. */
6970                     if (*s == NL)
6971                         *s = CAR;
6972                     else if (*s == '\\' && s[1] != NUL)
6973                         ++s;
6974                 }
6975
6976                 dst += STRLEN(eval_result);
6977             }
6978
6979             reg_match = submatch_match;
6980             reg_mmatch = submatch_mmatch;
6981             reg_firstlnum = submatch_firstlnum;
6982             reg_maxline = submatch_maxline;
6983             reg_win = save_reg_win;
6984             ireg_ic = save_ireg_ic;
6985             can_f_submatch = FALSE;
6986         }
6987 #endif
6988     }
6989     else
6990       while ((c = *src++) != NUL)
6991       {
6992         if (c == '&' && magic)
6993             no = 0;
6994         else if (c == '\\' && *src != NUL)
6995         {
6996             if (*src == '&' && !magic)
6997             {
6998                 ++src;
6999                 no = 0;
7000             }
7001             else if ('0' <= *src && *src <= '9')
7002             {
7003                 no = *src++ - '0';
7004             }
7005             else if (vim_strchr((char_u *)"uUlLeE", *src))
7006             {
7007                 switch (*src++)
7008                 {
7009                 case 'u':   func = (fptr_T)do_upper;
7010                             continue;
7011                 case 'U':   func = (fptr_T)do_Upper;
7012                             continue;
7013                 case 'l':   func = (fptr_T)do_lower;
7014                             continue;
7015                 case 'L':   func = (fptr_T)do_Lower;
7016                             continue;
7017                 case 'e':
7018                 case 'E':   func = (fptr_T)NULL;
7019                             continue;
7020                 }
7021             }
7022         }
7023         if (no < 0)           /* Ordinary character. */
7024         {
7025             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7026             {
7027                 /* Copy a special key as-is. */
7028                 if (copy)
7029                 {
7030                     *dst++ = c;
7031                     *dst++ = *src++;
7032                     *dst++ = *src++;
7033                 }
7034                 else
7035                 {
7036                     dst += 3;
7037                     src += 2;
7038                 }
7039                 continue;
7040             }
7041
7042             if (c == '\\' && *src != NUL)
7043             {
7044                 /* Check for abbreviations -- webb */
7045                 switch (*src)
7046                 {
7047                     case 'r':   c = CAR;        ++src;  break;
7048                     case 'n':   c = NL;         ++src;  break;
7049                     case 't':   c = TAB;        ++src;  break;
7050                  /* Oh no!  \e already has meaning in subst pat :-( */
7051                  /* case 'e':   c = ESC;        ++src;  break; */
7052                     case 'b':   c = Ctrl_H;     ++src;  break;
7053
7054                     /* If "backslash" is TRUE the backslash will be removed
7055                      * later.  Used to insert a literal CR. */
7056                     default:    if (backslash)
7057                                 {
7058                                     if (copy)
7059                                         *dst = '\\';
7060                                     ++dst;
7061                                 }
7062                                 c = *src++;
7063                 }
7064             }
7065 #ifdef FEAT_MBYTE
7066             else if (has_mbyte)
7067                 c = mb_ptr2char(src - 1);
7068 #endif
7069
7070             /* Write to buffer, if copy is set. */
7071             if (func == (fptr_T)NULL)   /* just copy */
7072                 cc = c;
7073             else
7074                 /* Turbo C complains without the typecast */
7075                 func = (fptr_T)(func(&cc, c));
7076
7077 #ifdef FEAT_MBYTE
7078             if (has_mbyte)
7079             {
7080                 src += mb_ptr2len(src - 1) - 1;
7081                 if (copy)
7082                     mb_char2bytes(cc, dst);
7083                 dst += mb_char2len(cc) - 1;
7084             }
7085             else
7086 #endif
7087                 if (copy)
7088                     *dst = cc;
7089             dst++;
7090         }
7091         else
7092         {
7093             if (REG_MULTI)
7094             {
7095                 clnum = reg_mmatch->startpos[no].lnum;
7096                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7097                     s = NULL;
7098                 else
7099                 {
7100                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7101                     if (reg_mmatch->endpos[no].lnum == clnum)
7102                         len = reg_mmatch->endpos[no].col
7103                                                - reg_mmatch->startpos[no].col;
7104                     else
7105                         len = (int)STRLEN(s);
7106                 }
7107             }
7108             else
7109             {
7110                 s = reg_match->startp[no];
7111                 if (reg_match->endp[no] == NULL)
7112                     s = NULL;
7113                 else
7114                     len = (int)(reg_match->endp[no] - s);
7115             }
7116             if (s != NULL)
7117             {
7118                 for (;;)
7119                 {
7120                     if (len == 0)
7121                     {
7122                         if (REG_MULTI)
7123                         {
7124                             if (reg_mmatch->endpos[no].lnum == clnum)
7125                                 break;
7126                             if (copy)
7127                                 *dst = CAR;
7128                             ++dst;
7129                             s = reg_getline(++clnum);
7130                             if (reg_mmatch->endpos[no].lnum == clnum)
7131                                 len = reg_mmatch->endpos[no].col;
7132                             else
7133                                 len = (int)STRLEN(s);
7134                         }
7135                         else
7136                             break;
7137                     }
7138                     else if (*s == NUL) /* we hit NUL. */
7139                     {
7140                         if (copy)
7141                             EMSG(_(e_re_damg));
7142                         goto exit;
7143                     }
7144                     else
7145                     {
7146                         if (backslash && (*s == CAR || *s == '\\'))
7147                         {
7148                             /*
7149                              * Insert a backslash in front of a CR, otherwise
7150                              * it will be replaced by a line break.
7151                              * Number of backslashes will be halved later,
7152                              * double them here.
7153                              */
7154                             if (copy)
7155                             {
7156                                 dst[0] = '\\';
7157                                 dst[1] = *s;
7158                             }
7159                             dst += 2;
7160                         }
7161                         else
7162                         {
7163 #ifdef FEAT_MBYTE
7164                             if (has_mbyte)
7165                                 c = mb_ptr2char(s);
7166                             else
7167 #endif
7168                                 c = *s;
7169
7170                             if (func == (fptr_T)NULL)   /* just copy */
7171                                 cc = c;
7172                             else
7173                                 /* Turbo C complains without the typecast */
7174                                 func = (fptr_T)(func(&cc, c));
7175
7176 #ifdef FEAT_MBYTE
7177                             if (has_mbyte)
7178                             {
7179                                 int l;
7180
7181                                 /* Copy composing characters separately, one
7182                                  * at a time. */
7183                                 if (enc_utf8)
7184                                     l = utf_ptr2len(s) - 1;
7185                                 else
7186                                     l = mb_ptr2len(s) - 1;
7187
7188                                 s += l;
7189                                 len -= l;
7190                                 if (copy)
7191                                     mb_char2bytes(cc, dst);
7192                                 dst += mb_char2len(cc) - 1;
7193                             }
7194                             else
7195 #endif
7196                                 if (copy)
7197                                     *dst = cc;
7198                             dst++;
7199                         }
7200
7201                         ++s;
7202                         --len;
7203                     }
7204                 }
7205             }
7206             no = -1;
7207         }
7208       }
7209     if (copy)
7210         *dst = NUL;
7211
7212 exit:
7213     return (int)((dst - dest) + 1);
7214 }
7215
7216 #ifdef FEAT_EVAL
7217 static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7218
7219 /*
7220  * Call reg_getline() with the line numbers from the submatch.  If a
7221  * substitute() was used the reg_maxline and other values have been
7222  * overwritten.
7223  */
7224     static char_u *
7225 reg_getline_submatch(lnum)
7226     linenr_T    lnum;
7227 {
7228     char_u *s;
7229     linenr_T save_first = reg_firstlnum;
7230     linenr_T save_max = reg_maxline;
7231
7232     reg_firstlnum = submatch_firstlnum;
7233     reg_maxline = submatch_maxline;
7234
7235     s = reg_getline(lnum);
7236
7237     reg_firstlnum = save_first;
7238     reg_maxline = save_max;
7239     return s;
7240 }
7241
7242 /*
7243  * Used for the submatch() function: get the string from the n'th submatch in
7244  * allocated memory.
7245  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7246  */
7247     char_u *
7248 reg_submatch(no)
7249     int         no;
7250 {
7251     char_u      *retval = NULL;
7252     char_u      *s;
7253     int         len;
7254     int         round;
7255     linenr_T    lnum;
7256
7257     if (!can_f_submatch || no < 0)
7258         return NULL;
7259
7260     if (submatch_match == NULL)
7261     {
7262         /*
7263          * First round: compute the length and allocate memory.
7264          * Second round: copy the text.
7265          */
7266         for (round = 1; round <= 2; ++round)
7267         {
7268             lnum = submatch_mmatch->startpos[no].lnum;
7269             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7270                 return NULL;
7271
7272             s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
7273             if (s == NULL)  /* anti-crash check, cannot happen? */
7274                 break;
7275             if (submatch_mmatch->endpos[no].lnum == lnum)
7276             {
7277                 /* Within one line: take form start to end col. */
7278                 len = submatch_mmatch->endpos[no].col
7279                                           - submatch_mmatch->startpos[no].col;
7280                 if (round == 2)
7281                     vim_strncpy(retval, s, len);
7282                 ++len;
7283             }
7284             else
7285             {
7286                 /* Multiple lines: take start line from start col, middle
7287                  * lines completely and end line up to end col. */
7288                 len = (int)STRLEN(s);
7289                 if (round == 2)
7290                 {
7291                     STRCPY(retval, s);
7292                     retval[len] = '\n';
7293                 }
7294                 ++len;
7295                 ++lnum;
7296                 while (lnum < submatch_mmatch->endpos[no].lnum)
7297                 {
7298                     s = reg_getline_submatch(lnum++);
7299                     if (round == 2)
7300                         STRCPY(retval + len, s);
7301                     len += (int)STRLEN(s);
7302                     if (round == 2)
7303                         retval[len] = '\n';
7304                     ++len;
7305                 }
7306                 if (round == 2)
7307                     STRNCPY(retval + len, reg_getline_submatch(lnum),
7308                                              submatch_mmatch->endpos[no].col);
7309                 len += submatch_mmatch->endpos[no].col;
7310                 if (round == 2)
7311                     retval[len] = NUL;
7312                 ++len;
7313             }
7314
7315             if (retval == NULL)
7316             {
7317                 retval = lalloc((long_u)len, TRUE);
7318                 if (retval == NULL)
7319                     return NULL;
7320             }
7321         }
7322     }
7323     else
7324     {
7325         s = submatch_match->startp[no];
7326         if (s == NULL || submatch_match->endp[no] == NULL)
7327             retval = NULL;
7328         else
7329             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7330     }
7331
7332     return retval;
7333 }
7334 #endif