src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 /*
 382  * REGEXP_INRANGE contains all characters which are always special in a []
 383  * range after '\'.
 384  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 385  * These are:
 386  *  \n  - New line (NL).
 387  *  \r  - Carriage Return (CR).
 388  *  \t  - Tab (TAB).
 389  *  \e  - Escape (ESC).
 390  *  \b  - Backspace (Ctrl_H).
 391  *  \d  - Character code in decimal, eg \d123
 392  *  \o  - Character code in octal, eg \o80
 393  *  \x  - Character code in hex, eg \x4a
 394  *  \u  - Multibyte character code, eg \u20ac
 395  *  \U  - Long multibyte character code, eg \U12345678
 396  */
 397 static char_u REGEXP_INRANGE[] = "]^-n\\";
 398 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 399
 400 static int      backslash_trans __ARGS((int c));
 401 static int      get_char_class __ARGS((char_u **pp));
 402 static int      get_equi_class __ARGS((char_u **pp));
 403 static void     reg_equi_class __ARGS((int c));
 404 static int      get_coll_element __ARGS((char_u **pp));
 405 static char_u   *skip_anyof __ARGS((char_u *p));
 406 static void     init_class_tab __ARGS((void));
 407
 408 /*
 409  * Translate '\x' to its control character, except "\n", which is Magic.
 410  */
 411     static int
 412 backslash_trans(c)
 413     int         c;
 414 {
 415     switch (c)
 416     {
 417         case 'r':   return CAR;
 418         case 't':   return TAB;
 419         case 'e':   return ESC;
 420         case 'b':   return BS;
 421     }
 422     return c;
 423 }
 424
 425 /*
 426  * Check for a character class name "[:name:]".  "pp" points to the '['.
 427  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 428  * recognized.  Otherwise "pp" is advanced to after the item.
 429  */
 430     static int
 431 get_char_class(pp)
 432     char_u      **pp;
 433 {
 434     static const char *(class_names[]) =
 435     {
 436         "alnum:]",
 437 #define CLASS_ALNUM 0
 438         "alpha:]",
 439 #define CLASS_ALPHA 1
 440         "blank:]",
 441 #define CLASS_BLANK 2
 442         "cntrl:]",
 443 #define CLASS_CNTRL 3
 444         "digit:]",
 445 #define CLASS_DIGIT 4
 446         "graph:]",
 447 #define CLASS_GRAPH 5
 448         "lower:]",
 449 #define CLASS_LOWER 6
 450         "print:]",
 451 #define CLASS_PRINT 7
 452         "punct:]",
 453 #define CLASS_PUNCT 8
 454         "space:]",
 455 #define CLASS_SPACE 9
 456         "upper:]",
 457 #define CLASS_UPPER 10
 458         "xdigit:]",
 459 #define CLASS_XDIGIT 11
 460         "tab:]",
 461 #define CLASS_TAB 12
 462         "return:]",
 463 #define CLASS_RETURN 13
 464         "backspace:]",
 465 #define CLASS_BACKSPACE 14
 466         "escape:]",
 467 #define CLASS_ESCAPE 15
 468     };
 469 #define CLASS_NONE 99
 470     int i;
 471
 472     if ((*pp)[1] == ':')
 473     {
 474         for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
 475             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 476             {
 477                 *pp += STRLEN(class_names[i]) + 2;
 478                 return i;
 479             }
 480     }
 481     return CLASS_NONE;
 482 }
 483
 484 /*
 485  * Specific version of character class functions.
 486  * Using a table to keep this fast.
 487  */
 488 static short    class_tab[256];
 489
 490 #define     RI_DIGIT    0x01
 491 #define     RI_HEX      0x02
 492 #define     RI_OCTAL    0x04
 493 #define     RI_WORD     0x08
 494 #define     RI_HEAD     0x10
 495 #define     RI_ALPHA    0x20
 496 #define     RI_LOWER    0x40
 497 #define     RI_UPPER    0x80
 498 #define     RI_WHITE    0x100
 499
 500     static void
 501 init_class_tab()
 502 {
 503     int         i;
 504     static int  done = FALSE;
 505
 506     if (done)
 507         return;
 508
 509     for (i = 0; i < 256; ++i)
 510     {
 511         if (i >= '0' && i <= '7')
 512             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 513         else if (i >= '8' && i <= '9')
 514             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 515         else if (i >= 'a' && i <= 'f')
 516             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 517 #ifdef EBCDIC
 518         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 519                                                     || (i >= 's' && i <= 'z'))
 520 #else
 521         else if (i >= 'g' && i <= 'z')
 522 #endif
 523             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 524         else if (i >= 'A' && i <= 'F')
 525             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 526 #ifdef EBCDIC
 527         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 528                                                     || (i >= 'S' && i <= 'Z'))
 529 #else
 530         else if (i >= 'G' && i <= 'Z')
 531 #endif
 532             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 533         else if (i == '_')
 534             class_tab[i] = RI_WORD + RI_HEAD;
 535         else
 536             class_tab[i] = 0;
 537     }
 538     class_tab[' '] |= RI_WHITE;
 539     class_tab['\t'] |= RI_WHITE;
 540     done = TRUE;
 541 }
 542
 543 #ifdef FEAT_MBYTE
 544 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 545 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 546 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 547 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 548 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 549 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 550 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 551 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 552 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 553 #else
 554 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 555 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 556 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 557 # define ri_word(c)     (class_tab[c] & RI_WORD)
 558 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 559 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 560 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 561 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 562 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 563 #endif
 564
 565 /* flags for regflags */
 566 #define RF_ICASE    1   /* ignore case */
 567 #define RF_NOICASE  2   /* don't ignore case */
 568 #define RF_HASNL    4   /* can match a NL */
 569 #define RF_ICOMBINE 8   /* ignore combining characters */
 570 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 571
 572 /*
 573  * Global work variables for vim_regcomp().
 574  */
 575
 576 static char_u   *regparse;      /* Input-scan pointer. */
 577 static int      prevchr_len;    /* byte length of previous char */
 578 static int      num_complex_braces; /* Complex \{...} count */
 579 static int      regnpar;        /* () count. */
 580 #ifdef FEAT_SYN_HL
 581 static int      regnzpar;       /* \z() count. */
 582 static int      re_has_z;       /* \z item detected */
 583 #endif
 584 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 585 static long     regsize;        /* Code size. */
 586 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 587 static unsigned regflags;       /* RF_ flags for prog */
 588 static long     brace_min[10];  /* Minimums for complex brace repeats */
 589 static long     brace_max[10];  /* Maximums for complex brace repeats */
 590 static int      brace_count[10]; /* Current counts for complex brace repeats */
 591 #if defined(FEAT_SYN_HL) || defined(PROTO)
 592 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 593 #endif
 594 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 595
 596 static int      reg_magic;      /* magicness of the pattern: */
 597 #define MAGIC_NONE      1       /* "\V" very unmagic */
 598 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 599 #define MAGIC_ON        3       /* "\m" or 'magic' */
 600 #define MAGIC_ALL       4       /* "\v" very magic */
 601
 602 static int      reg_string;     /* matching with a string instead of a buffer
 603                                    line */
 604 static int      reg_strict;     /* "[abc" is illegal */
 605
 606 /*
 607  * META contains all characters that may be magic, except '^' and '$'.
 608  */
 609
 610 #ifdef EBCDIC
 611 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 612 #else
 613 /* META[] is used often enough to justify turning it into a table. */
 614 static char_u META_flags[] = {
 615     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 616     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 617 /*                 %  &     (  )  *  +        .    */
 618     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 619 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 620     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 621 /*  @  A     C  D     F     H  I     K  L  M     O */
 622     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 623 /*  P        S     U  V  W  X     Z  [           _ */
 624     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 625 /*     a     c  d     f     h  i     k  l  m  n  o */
 626     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 627 /*  p        s     u  v  w  x     z  {  |     ~    */
 628     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 629 };
 630 #endif
 631
 632 static int      curchr;
 633
 634 /* arguments for reg() */
 635 #define REG_NOPAREN     0       /* toplevel reg() */
 636 #define REG_PAREN       1       /* \(\) */
 637 #define REG_ZPAREN      2       /* \z(\) */
 638 #define REG_NPAREN      3       /* \%(\) */
 639
 640 /*
 641  * Forward declarations for vim_regcomp()'s friends.
 642  */
 643 static void     initchr __ARGS((char_u *));
 644 static int      getchr __ARGS((void));
 645 static void     skipchr_keepstart __ARGS((void));
 646 static int      peekchr __ARGS((void));
 647 static void     skipchr __ARGS((void));
 648 static void     ungetchr __ARGS((void));
 649 static int      gethexchrs __ARGS((int maxinputlen));
 650 static int      getoctchrs __ARGS((void));
 651 static int      getdecchrs __ARGS((void));
 652 static int      coll_get_char __ARGS((void));
 653 static void     regcomp_start __ARGS((char_u *expr, int flags));
 654 static char_u   *reg __ARGS((int, int *));
 655 static char_u   *regbranch __ARGS((int *flagp));
 656 static char_u   *regconcat __ARGS((int *flagp));
 657 static char_u   *regpiece __ARGS((int *));
 658 static char_u   *regatom __ARGS((int *));
 659 static char_u   *regnode __ARGS((int));
 660 #ifdef FEAT_MBYTE
 661 static int      use_multibytecode __ARGS((int c));
 662 #endif
 663 static int      prog_magic_wrong __ARGS((void));
 664 static char_u   *regnext __ARGS((char_u *));
 665 static void     regc __ARGS((int b));
 666 #ifdef FEAT_MBYTE
 667 static void     regmbc __ARGS((int c));
 668 #else
 669 # define regmbc(c) regc(c)
 670 #endif
 671 static void     reginsert __ARGS((int, char_u *));
 672 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 673 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 674 static int      read_limits __ARGS((long *, long *));
 675 static void     regtail __ARGS((char_u *, char_u *));
 676 static void     regoptail __ARGS((char_u *, char_u *));
 677
 678 /*
 679  * Return TRUE if compiled regular expression "prog" can match a line break.
 680  */
 681     int
 682 re_multiline(prog)
 683     regprog_T *prog;
 684 {
 685     return (prog->regflags & RF_HASNL);
 686 }
 687
 688 /*
 689  * Return TRUE if compiled regular expression "prog" looks before the start
 690  * position (pattern contains "\@<=" or "\@<!").
 691  */
 692     int
 693 re_lookbehind(prog)
 694     regprog_T *prog;
 695 {
 696     return (prog->regflags & RF_LOOKBH);
 697 }
 698
 699 /*
 700  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 701  * Returns a character representing the class. Zero means that no item was
 702  * recognized.  Otherwise "pp" is advanced to after the item.
 703  */
 704     static int
 705 get_equi_class(pp)
 706     char_u      **pp;
 707 {
 708     int         c;
 709     int         l = 1;
 710     char_u      *p = *pp;
 711
 712     if (p[1] == '=')
 713     {
 714 #ifdef FEAT_MBYTE
 715         if (has_mbyte)
 716             l = (*mb_ptr2len)(p + 2);
 717 #endif
 718         if (p[l + 2] == '=' && p[l + 3] == ']')
 719         {
 720 #ifdef FEAT_MBYTE
 721             if (has_mbyte)
 722                 c = mb_ptr2char(p + 2);
 723             else
 724 #endif
 725                 c = p[2];
 726             *pp += l + 4;
 727             return c;
 728         }
 729     }
 730     return 0;
 731 }
 732
 733 /*
 734  * Produce the bytes for equivalence class "c".
 735  * Currently only handles latin1, latin9 and utf-8.
 736  */
 737     static void
 738 reg_equi_class(c)
 739     int     c;
 740 {
 741 #ifdef FEAT_MBYTE
 742     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 743                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 744 #endif
 745     {
 746         switch (c)
 747         {
 748             case 'A': case '\300': case '\301': case '\302':
 749             case '\303': case '\304': case '\305':
 750                       regmbc('A'); regmbc('\300'); regmbc('\301');
 751                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 752                       regmbc('\305');
 753                       return;
 754             case 'C': case '\307':
 755                       regmbc('C'); regmbc('\307');
 756                       return;
 757             case 'E': case '\310': case '\311': case '\312': case '\313':
 758                       regmbc('E'); regmbc('\310'); regmbc('\311');
 759                       regmbc('\312'); regmbc('\313');
 760                       return;
 761             case 'I': case '\314': case '\315': case '\316': case '\317':
 762                       regmbc('I'); regmbc('\314'); regmbc('\315');
 763                       regmbc('\316'); regmbc('\317');
 764                       return;
 765             case 'N': case '\321':
 766                       regmbc('N'); regmbc('\321');
 767                       return;
 768             case 'O': case '\322': case '\323': case '\324': case '\325':
 769             case '\326':
 770                       regmbc('O'); regmbc('\322'); regmbc('\323');
 771                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 772                       return;
 773             case 'U': case '\331': case '\332': case '\333': case '\334':
 774                       regmbc('U'); regmbc('\331'); regmbc('\332');
 775                       regmbc('\333'); regmbc('\334');
 776                       return;
 777             case 'Y': case '\335':
 778                       regmbc('Y'); regmbc('\335');
 779                       return;
 780             case 'a': case '\340': case '\341': case '\342':
 781             case '\343': case '\344': case '\345':
 782                       regmbc('a'); regmbc('\340'); regmbc('\341');
 783                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 784                       regmbc('\345');
 785                       return;
 786             case 'c': case '\347':
 787                       regmbc('c'); regmbc('\347');
 788                       return;
 789             case 'e': case '\350': case '\351': case '\352': case '\353':
 790                       regmbc('e'); regmbc('\350'); regmbc('\351');
 791                       regmbc('\352'); regmbc('\353');
 792                       return;
 793             case 'i': case '\354': case '\355': case '\356': case '\357':
 794                       regmbc('i'); regmbc('\354'); regmbc('\355');
 795                       regmbc('\356'); regmbc('\357');
 796                       return;
 797             case 'n': case '\361':
 798                       regmbc('n'); regmbc('\361');
 799                       return;
 800             case 'o': case '\362': case '\363': case '\364': case '\365':
 801             case '\366':
 802                       regmbc('o'); regmbc('\362'); regmbc('\363');
 803                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 804                       return;
 805             case 'u': case '\371': case '\372': case '\373': case '\374':
 806                       regmbc('u'); regmbc('\371'); regmbc('\372');
 807                       regmbc('\373'); regmbc('\374');
 808                       return;
 809             case 'y': case '\375': case '\377':
 810                       regmbc('y'); regmbc('\375'); regmbc('\377');
 811                       return;
 812         }
 813     }
 814     regmbc(c);
 815 }
 816
 817 /*
 818  * Check for a collating element "[.a.]".  "pp" points to the '['.
 819  * Returns a character. Zero means that no item was recognized.  Otherwise
 820  * "pp" is advanced to after the item.
 821  * Currently only single characters are recognized!
 822  */
 823     static int
 824 get_coll_element(pp)
 825     char_u      **pp;
 826 {
 827     int         c;
 828     int         l = 1;
 829     char_u      *p = *pp;
 830
 831     if (p[1] == '.')
 832     {
 833 #ifdef FEAT_MBYTE
 834         if (has_mbyte)
 835             l = (*mb_ptr2len)(p + 2);
 836 #endif
 837         if (p[l + 2] == '.' && p[l + 3] == ']')
 838         {
 839 #ifdef FEAT_MBYTE
 840             if (has_mbyte)
 841                 c = mb_ptr2char(p + 2);
 842             else
 843 #endif
 844                 c = p[2];
 845             *pp += l + 4;
 846             return c;
 847         }
 848     }
 849     return 0;
 850 }
 851
 852
 853 /*
 854  * Skip over a "[]" range.
 855  * "p" must point to the character after the '['.
 856  * The returned pointer is on the matching ']', or the terminating NUL.
 857  */
 858     static char_u *
 859 skip_anyof(p)
 860     char_u      *p;
 861 {
 862     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 863     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 864 #ifdef FEAT_MBYTE
 865     int         l;
 866 #endif
 867
 868     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 869     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 870
 871     if (*p == '^')      /* Complement of range. */
 872         ++p;
 873     if (*p == ']' || *p == '-')
 874         ++p;
 875     while (*p != NUL && *p != ']')
 876     {
 877 #ifdef FEAT_MBYTE
 878         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 879             p += l;
 880         else
 881 #endif
 882             if (*p == '-')
 883             {
 884                 ++p;
 885                 if (*p != ']' && *p != NUL)
 886                     mb_ptr_adv(p);
 887             }
 888         else if (*p == '\\'
 889                 && !cpo_bsl
 890                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 891                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 892             p += 2;
 893         else if (*p == '[')
 894         {
 895             if (get_char_class(&p) == CLASS_NONE
 896                     && get_equi_class(&p) == 0
 897                     && get_coll_element(&p) == 0)
 898                 ++p; /* It was not a class name */
 899         }
 900         else
 901             ++p;
 902     }
 903
 904     return p;
 905 }
 906
 907 /*
 908  * Skip past regular expression.
 909  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 910  * Take care of characters with a backslash in front of it.
 911  * Skip strings inside [ and ].
 912  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 913  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 914  * is changed in-place.
 915  */
 916     char_u *
 917 skip_regexp(startp, dirc, magic, newp)
 918     char_u      *startp;
 919     int         dirc;
 920     int         magic;
 921     char_u      **newp;
 922 {
 923     int         mymagic;
 924     char_u      *p = startp;
 925
 926     if (magic)
 927         mymagic = MAGIC_ON;
 928     else
 929         mymagic = MAGIC_OFF;
 930
 931     for (; p[0] != NUL; mb_ptr_adv(p))
 932     {
 933         if (p[0] == dirc)       /* found end of regexp */
 934             break;
 935         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 936                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 937         {
 938             p = skip_anyof(p + 1);
 939             if (p[0] == NUL)
 940                 break;
 941         }
 942         else if (p[0] == '\\' && p[1] != NUL)
 943         {
 944             if (dirc == '?' && newp != NULL && p[1] == '?')
 945             {
 946                 /* change "\?" to "?", make a copy first. */
 947                 if (*newp == NULL)
 948                 {
 949                     *newp = vim_strsave(startp);
 950                     if (*newp != NULL)
 951                         p = *newp + (p - startp);
 952                 }
 953                 if (*newp != NULL)
 954                     STRMOVE(p, p + 1);
 955                 else
 956                     ++p;
 957             }
 958             else
 959                 ++p;    /* skip next character */
 960             if (*p == 'v')
 961                 mymagic = MAGIC_ALL;
 962             else if (*p == 'V')
 963                 mymagic = MAGIC_NONE;
 964         }
 965     }
 966     return p;
 967 }
 968
 969 /*
 970  * vim_regcomp() - compile a regular expression into internal code
 971  * Returns the program in allocated space.  Returns NULL for an error.
 972  *
 973  * We can't allocate space until we know how big the compiled form will be,
 974  * but we can't compile it (and thus know how big it is) until we've got a
 975  * place to put the code.  So we cheat:  we compile it twice, once with code
 976  * generation turned off and size counting turned on, and once "for real".
 977  * This also means that we don't allocate space until we are sure that the
 978  * thing really will compile successfully, and we never have to move the
 979  * code and thus invalidate pointers into it.  (Note that it has to be in
 980  * one piece because vim_free() must be able to free it all.)
 981  *
 982  * Whether upper/lower case is to be ignored is decided when executing the
 983  * program, it does not matter here.
 984  *
 985  * Beware that the optimization-preparation code in here knows about some
 986  * of the structure of the compiled regexp.
 987  * "re_flags": RE_MAGIC and/or RE_STRING.
 988  */
 989     regprog_T *
 990 vim_regcomp(expr, re_flags)
 991     char_u      *expr;
 992     int         re_flags;
 993 {
 994     regprog_T   *r;
 995     char_u      *scan;
 996     char_u      *longest;
 997     int         len;
 998     int         flags;
 999
1000     if (expr == NULL)
1001         EMSG_RET_NULL(_(e_null));
1002
1003     init_class_tab();
1004
1005     /*
1006      * First pass: determine size, legality.
1007      */
1008     regcomp_start(expr, re_flags);
1009     regcode = JUST_CALC_SIZE;
1010     regc(REGMAGIC);
1011     if (reg(REG_NOPAREN, &flags) == NULL)
1012         return NULL;
1013
1014     /* Small enough for pointer-storage convention? */
1015 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1016     if (regsize >= 65536L - 256L)
1017         EMSG_RET_NULL(_("E339: Pattern too long"));
1018 #endif
1019
1020     /* Allocate space. */
1021     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1022     if (r == NULL)
1023         return NULL;
1024
1025     /*
1026      * Second pass: emit code.
1027      */
1028     regcomp_start(expr, re_flags);
1029     regcode = r->program;
1030     regc(REGMAGIC);
1031     if (reg(REG_NOPAREN, &flags) == NULL)
1032     {
1033         vim_free(r);
1034         return NULL;
1035     }
1036
1037     /* Dig out information for optimizations. */
1038     r->regstart = NUL;          /* Worst-case defaults. */
1039     r->reganch = 0;
1040     r->regmust = NULL;
1041     r->regmlen = 0;
1042     r->regflags = regflags;
1043     if (flags & HASNL)
1044         r->regflags |= RF_HASNL;
1045     if (flags & HASLOOKBH)
1046         r->regflags |= RF_LOOKBH;
1047 #ifdef FEAT_SYN_HL
1048     /* Remember whether this pattern has any \z specials in it. */
1049     r->reghasz = re_has_z;
1050 #endif
1051     scan = r->program + 1;      /* First BRANCH. */
1052     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1053     {
1054         scan = OPERAND(scan);
1055
1056         /* Starting-point info. */
1057         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1058         {
1059             r->reganch++;
1060             scan = regnext(scan);
1061         }
1062
1063         if (OP(scan) == EXACTLY)
1064         {
1065 #ifdef FEAT_MBYTE
1066             if (has_mbyte)
1067                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1068             else
1069 #endif
1070                 r->regstart = *OPERAND(scan);
1071         }
1072         else if ((OP(scan) == BOW
1073                     || OP(scan) == EOW
1074                     || OP(scan) == NOTHING
1075                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1076                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1077                  && OP(regnext(scan)) == EXACTLY)
1078         {
1079 #ifdef FEAT_MBYTE
1080             if (has_mbyte)
1081                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1082             else
1083 #endif
1084                 r->regstart = *OPERAND(regnext(scan));
1085         }
1086
1087         /*
1088          * If there's something expensive in the r.e., find the longest
1089          * literal string that must appear and make it the regmust.  Resolve
1090          * ties in favor of later strings, since the regstart check works
1091          * with the beginning of the r.e. and avoiding duplication
1092          * strengthens checking.  Not a strong reason, but sufficient in the
1093          * absence of others.
1094          */
1095         /*
1096          * When the r.e. starts with BOW, it is faster to look for a regmust
1097          * first. Used a lot for "#" and "*" commands. (Added by mool).
1098          */
1099         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1100                                                           && !(flags & HASNL))
1101         {
1102             longest = NULL;
1103             len = 0;
1104             for (; scan != NULL; scan = regnext(scan))
1105                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1106                 {
1107                     longest = OPERAND(scan);
1108                     len = (int)STRLEN(OPERAND(scan));
1109                 }
1110             r->regmust = longest;
1111             r->regmlen = len;
1112         }
1113     }
1114 #ifdef DEBUG
1115     regdump(expr, r);
1116 #endif
1117     return r;
1118 }
1119
1120 /*
1121  * Setup to parse the regexp.  Used once to get the length and once to do it.
1122  */
1123     static void
1124 regcomp_start(expr, re_flags)
1125     char_u      *expr;
1126     int         re_flags;           /* see vim_regcomp() */
1127 {
1128     initchr(expr);
1129     if (re_flags & RE_MAGIC)
1130         reg_magic = MAGIC_ON;
1131     else
1132         reg_magic = MAGIC_OFF;
1133     reg_string = (re_flags & RE_STRING);
1134     reg_strict = (re_flags & RE_STRICT);
1135
1136     num_complex_braces = 0;
1137     regnpar = 1;
1138     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1139 #ifdef FEAT_SYN_HL
1140     regnzpar = 1;
1141     re_has_z = 0;
1142 #endif
1143     regsize = 0L;
1144     regflags = 0;
1145 #if defined(FEAT_SYN_HL) || defined(PROTO)
1146     had_eol = FALSE;
1147 #endif
1148 }
1149
1150 #if defined(FEAT_SYN_HL) || defined(PROTO)
1151 /*
1152  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1153  * found.  This is messy, but it works fine.
1154  */
1155     int
1156 vim_regcomp_had_eol()
1157 {
1158     return had_eol;
1159 }
1160 #endif
1161
1162 /*
1163  * reg - regular expression, i.e. main body or parenthesized thing
1164  *
1165  * Caller must absorb opening parenthesis.
1166  *
1167  * Combining parenthesis handling with the base level of regular expression
1168  * is a trifle forced, but the need to tie the tails of the branches to what
1169  * follows makes it hard to avoid.
1170  */
1171     static char_u *
1172 reg(paren, flagp)
1173     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1174     int         *flagp;
1175 {
1176     char_u      *ret;
1177     char_u      *br;
1178     char_u      *ender;
1179     int         parno = 0;
1180     int         flags;
1181
1182     *flagp = HASWIDTH;          /* Tentatively. */
1183
1184 #ifdef FEAT_SYN_HL
1185     if (paren == REG_ZPAREN)
1186     {
1187         /* Make a ZOPEN node. */
1188         if (regnzpar >= NSUBEXP)
1189             EMSG_RET_NULL(_("E50: Too many \\z("));
1190         parno = regnzpar;
1191         regnzpar++;
1192         ret = regnode(ZOPEN + parno);
1193     }
1194     else
1195 #endif
1196         if (paren == REG_PAREN)
1197     {
1198         /* Make a MOPEN node. */
1199         if (regnpar >= NSUBEXP)
1200             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1201         parno = regnpar;
1202         ++regnpar;
1203         ret = regnode(MOPEN + parno);
1204     }
1205     else if (paren == REG_NPAREN)
1206     {
1207         /* Make a NOPEN node. */
1208         ret = regnode(NOPEN);
1209     }
1210     else
1211         ret = NULL;
1212
1213     /* Pick up the branches, linking them together. */
1214     br = regbranch(&flags);
1215     if (br == NULL)
1216         return NULL;
1217     if (ret != NULL)
1218         regtail(ret, br);       /* [MZ]OPEN -> first. */
1219     else
1220         ret = br;
1221     /* If one of the branches can be zero-width, the whole thing can.
1222      * If one of the branches has * at start or matches a line-break, the
1223      * whole thing can. */
1224     if (!(flags & HASWIDTH))
1225         *flagp &= ~HASWIDTH;
1226     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1227     while (peekchr() == Magic('|'))
1228     {
1229         skipchr();
1230         br = regbranch(&flags);
1231         if (br == NULL)
1232             return NULL;
1233         regtail(ret, br);       /* BRANCH -> BRANCH. */
1234         if (!(flags & HASWIDTH))
1235             *flagp &= ~HASWIDTH;
1236         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1237     }
1238
1239     /* Make a closing node, and hook it on the end. */
1240     ender = regnode(
1241 #ifdef FEAT_SYN_HL
1242             paren == REG_ZPAREN ? ZCLOSE + parno :
1243 #endif
1244             paren == REG_PAREN ? MCLOSE + parno :
1245             paren == REG_NPAREN ? NCLOSE : END);
1246     regtail(ret, ender);
1247
1248     /* Hook the tails of the branches to the closing node. */
1249     for (br = ret; br != NULL; br = regnext(br))
1250         regoptail(br, ender);
1251
1252     /* Check for proper termination. */
1253     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1254     {
1255 #ifdef FEAT_SYN_HL
1256         if (paren == REG_ZPAREN)
1257             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1258         else
1259 #endif
1260             if (paren == REG_NPAREN)
1261             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1262         else
1263             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1264     }
1265     else if (paren == REG_NOPAREN && peekchr() != NUL)
1266     {
1267         if (curchr == Magic(')'))
1268             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1269         else
1270             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1271         /* NOTREACHED */
1272     }
1273     /*
1274      * Here we set the flag allowing back references to this set of
1275      * parentheses.
1276      */
1277     if (paren == REG_PAREN)
1278         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1279     return ret;
1280 }
1281
1282 /*
1283  * Handle one alternative of an | operator.
1284  * Implements the & operator.
1285  */
1286     static char_u *
1287 regbranch(flagp)
1288     int         *flagp;
1289 {
1290     char_u      *ret;
1291     char_u      *chain = NULL;
1292     char_u      *latest;
1293     int         flags;
1294
1295     *flagp = WORST | HASNL;             /* Tentatively. */
1296
1297     ret = regnode(BRANCH);
1298     for (;;)
1299     {
1300         latest = regconcat(&flags);
1301         if (latest == NULL)
1302             return NULL;
1303         /* If one of the branches has width, the whole thing has.  If one of
1304          * the branches anchors at start-of-line, the whole thing does.
1305          * If one of the branches uses look-behind, the whole thing does. */
1306         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1307         /* If one of the branches doesn't match a line-break, the whole thing
1308          * doesn't. */
1309         *flagp &= ~HASNL | (flags & HASNL);
1310         if (chain != NULL)
1311             regtail(chain, latest);
1312         if (peekchr() != Magic('&'))
1313             break;
1314         skipchr();
1315         regtail(latest, regnode(END)); /* operand ends */
1316         reginsert(MATCH, latest);
1317         chain = latest;
1318     }
1319
1320     return ret;
1321 }
1322
1323 /*
1324  * Handle one alternative of an | or & operator.
1325  * Implements the concatenation operator.
1326  */
1327     static char_u *
1328 regconcat(flagp)
1329     int         *flagp;
1330 {
1331     char_u      *first = NULL;
1332     char_u      *chain = NULL;
1333     char_u      *latest;
1334     int         flags;
1335     int         cont = TRUE;
1336
1337     *flagp = WORST;             /* Tentatively. */
1338
1339     while (cont)
1340     {
1341         switch (peekchr())
1342         {
1343             case NUL:
1344             case Magic('|'):
1345             case Magic('&'):
1346             case Magic(')'):
1347                             cont = FALSE;
1348                             break;
1349             case Magic('Z'):
1350 #ifdef FEAT_MBYTE
1351                             regflags |= RF_ICOMBINE;
1352 #endif
1353                             skipchr_keepstart();
1354                             break;
1355             case Magic('c'):
1356                             regflags |= RF_ICASE;
1357                             skipchr_keepstart();
1358                             break;
1359             case Magic('C'):
1360                             regflags |= RF_NOICASE;
1361                             skipchr_keepstart();
1362                             break;
1363             case Magic('v'):
1364                             reg_magic = MAGIC_ALL;
1365                             skipchr_keepstart();
1366                             curchr = -1;
1367                             break;
1368             case Magic('m'):
1369                             reg_magic = MAGIC_ON;
1370                             skipchr_keepstart();
1371                             curchr = -1;
1372                             break;
1373             case Magic('M'):
1374                             reg_magic = MAGIC_OFF;
1375                             skipchr_keepstart();
1376                             curchr = -1;
1377                             break;
1378             case Magic('V'):
1379                             reg_magic = MAGIC_NONE;
1380                             skipchr_keepstart();
1381                             curchr = -1;
1382                             break;
1383             default:
1384                             latest = regpiece(&flags);
1385                             if (latest == NULL)
1386                                 return NULL;
1387                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1388                             if (chain == NULL)  /* First piece. */
1389                                 *flagp |= flags & SPSTART;
1390                             else
1391                                 regtail(chain, latest);
1392                             chain = latest;
1393                             if (first == NULL)
1394                                 first = latest;
1395                             break;
1396         }
1397     }
1398     if (first == NULL)          /* Loop ran zero times. */
1399         first = regnode(NOTHING);
1400     return first;
1401 }
1402
1403 /*
1404  * regpiece - something followed by possible [*+=]
1405  *
1406  * Note that the branching code sequences used for = and the general cases
1407  * of * and + are somewhat optimized:  they use the same NOTHING node as
1408  * both the endmarker for their branch list and the body of the last branch.
1409  * It might seem that this node could be dispensed with entirely, but the
1410  * endmarker role is not redundant.
1411  */
1412     static char_u *
1413 regpiece(flagp)
1414     int             *flagp;
1415 {
1416     char_u          *ret;
1417     int             op;
1418     char_u          *next;
1419     int             flags;
1420     long            minval;
1421     long            maxval;
1422
1423     ret = regatom(&flags);
1424     if (ret == NULL)
1425         return NULL;
1426
1427     op = peekchr();
1428     if (re_multi_type(op) == NOT_MULTI)
1429     {
1430         *flagp = flags;
1431         return ret;
1432     }
1433     /* default flags */
1434     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1435
1436     skipchr();
1437     switch (op)
1438     {
1439         case Magic('*'):
1440             if (flags & SIMPLE)
1441                 reginsert(STAR, ret);
1442             else
1443             {
1444                 /* Emit x* as (x&|), where & means "self". */
1445                 reginsert(BRANCH, ret); /* Either x */
1446                 regoptail(ret, regnode(BACK));  /* and loop */
1447                 regoptail(ret, ret);    /* back */
1448                 regtail(ret, regnode(BRANCH));  /* or */
1449                 regtail(ret, regnode(NOTHING)); /* null. */
1450             }
1451             break;
1452
1453         case Magic('+'):
1454             if (flags & SIMPLE)
1455                 reginsert(PLUS, ret);
1456             else
1457             {
1458                 /* Emit x+ as x(&|), where & means "self". */
1459                 next = regnode(BRANCH); /* Either */
1460                 regtail(ret, next);
1461                 regtail(regnode(BACK), ret);    /* loop back */
1462                 regtail(next, regnode(BRANCH)); /* or */
1463                 regtail(ret, regnode(NOTHING)); /* null. */
1464             }
1465             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1466             break;
1467
1468         case Magic('@'):
1469             {
1470                 int     lop = END;
1471
1472                 switch (no_Magic(getchr()))
1473                 {
1474                     case '=': lop = MATCH; break;                 /* \@= */
1475                     case '!': lop = NOMATCH; break;               /* \@! */
1476                     case '>': lop = SUBPAT; break;                /* \@> */
1477                     case '<': switch (no_Magic(getchr()))
1478                               {
1479                                   case '=': lop = BEHIND; break;   /* \@<= */
1480                                   case '!': lop = NOBEHIND; break; /* \@<! */
1481                               }
1482                 }
1483                 if (lop == END)
1484                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1485                                                       reg_magic == MAGIC_ALL);
1486                 /* Look behind must match with behind_pos. */
1487                 if (lop == BEHIND || lop == NOBEHIND)
1488                 {
1489                     regtail(ret, regnode(BHPOS));
1490                     *flagp |= HASLOOKBH;
1491                 }
1492                 regtail(ret, regnode(END)); /* operand ends */
1493                 reginsert(lop, ret);
1494                 break;
1495             }
1496
1497         case Magic('?'):
1498         case Magic('='):
1499             /* Emit x= as (x|) */
1500             reginsert(BRANCH, ret);             /* Either x */
1501             regtail(ret, regnode(BRANCH));      /* or */
1502             next = regnode(NOTHING);            /* null. */
1503             regtail(ret, next);
1504             regoptail(ret, next);
1505             break;
1506
1507         case Magic('{'):
1508             if (!read_limits(&minval, &maxval))
1509                 return NULL;
1510             if (flags & SIMPLE)
1511             {
1512                 reginsert(BRACE_SIMPLE, ret);
1513                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1514             }
1515             else
1516             {
1517                 if (num_complex_braces >= 10)
1518                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1519                                                       reg_magic == MAGIC_ALL);
1520                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1521                 regoptail(ret, regnode(BACK));
1522                 regoptail(ret, ret);
1523                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1524                 ++num_complex_braces;
1525             }
1526             if (minval > 0 && maxval > 0)
1527                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1528             break;
1529     }
1530     if (re_multi_type(peekchr()) != NOT_MULTI)
1531     {
1532         /* Can't have a multi follow a multi. */
1533         if (peekchr() == Magic('*'))
1534             sprintf((char *)IObuff, _("E61: Nested %s*"),
1535                                             reg_magic >= MAGIC_ON ? "" : "\\");
1536         else
1537             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1538                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1539         EMSG_RET_NULL(IObuff);
1540     }
1541
1542     return ret;
1543 }
1544
1545 /*
1546  * regatom - the lowest level
1547  *
1548  * Optimization:  gobbles an entire sequence of ordinary characters so that
1549  * it can turn them into a single node, which is smaller to store and
1550  * faster to run.  Don't do this when one_exactly is set.
1551  */
1552     static char_u *
1553 regatom(flagp)
1554     int            *flagp;
1555 {
1556     char_u          *ret;
1557     int             flags;
1558     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1559     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1560     int             c;
1561     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1562     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1563                                     FNAME, SFNAME, PRINT, SPRINT,
1564                                     WHITE, NWHITE, DIGIT, NDIGIT,
1565                                     HEX, NHEX, OCTAL, NOCTAL,
1566                                     WORD, NWORD, HEAD, NHEAD,
1567                                     ALPHA, NALPHA, LOWER, NLOWER,
1568                                     UPPER, NUPPER
1569                                     };
1570     char_u          *p;
1571     int             extra = 0;
1572
1573     *flagp = WORST;             /* Tentatively. */
1574     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1575     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1576
1577     c = getchr();
1578     switch (c)
1579     {
1580       case Magic('^'):
1581         ret = regnode(BOL);
1582         break;
1583
1584       case Magic('$'):
1585         ret = regnode(EOL);
1586 #if defined(FEAT_SYN_HL) || defined(PROTO)
1587         had_eol = TRUE;
1588 #endif
1589         break;
1590
1591       case Magic('<'):
1592         ret = regnode(BOW);
1593         break;
1594
1595       case Magic('>'):
1596         ret = regnode(EOW);
1597         break;
1598
1599       case Magic('_'):
1600         c = no_Magic(getchr());
1601         if (c == '^')           /* "\_^" is start-of-line */
1602         {
1603             ret = regnode(BOL);
1604             break;
1605         }
1606         if (c == '$')           /* "\_$" is end-of-line */
1607         {
1608             ret = regnode(EOL);
1609 #if defined(FEAT_SYN_HL) || defined(PROTO)
1610             had_eol = TRUE;
1611 #endif
1612             break;
1613         }
1614
1615         extra = ADD_NL;
1616         *flagp |= HASNL;
1617
1618         /* "\_[" is character range plus newline */
1619         if (c == '[')
1620             goto collection;
1621
1622         /* "\_x" is character class plus newline */
1623         /*FALLTHROUGH*/
1624
1625         /*
1626          * Character classes.
1627          */
1628       case Magic('.'):
1629       case Magic('i'):
1630       case Magic('I'):
1631       case Magic('k'):
1632       case Magic('K'):
1633       case Magic('f'):
1634       case Magic('F'):
1635       case Magic('p'):
1636       case Magic('P'):
1637       case Magic('s'):
1638       case Magic('S'):
1639       case Magic('d'):
1640       case Magic('D'):
1641       case Magic('x'):
1642       case Magic('X'):
1643       case Magic('o'):
1644       case Magic('O'):
1645       case Magic('w'):
1646       case Magic('W'):
1647       case Magic('h'):
1648       case Magic('H'):
1649       case Magic('a'):
1650       case Magic('A'):
1651       case Magic('l'):
1652       case Magic('L'):
1653       case Magic('u'):
1654       case Magic('U'):
1655         p = vim_strchr(classchars, no_Magic(c));
1656         if (p == NULL)
1657             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1658 #ifdef FEAT_MBYTE
1659         /* When '.' is followed by a composing char ignore the dot, so that
1660          * the composing char is matched here. */
1661         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1662         {
1663             c = getchr();
1664             goto do_multibyte;
1665         }
1666 #endif
1667         ret = regnode(classcodes[p - classchars] + extra);
1668         *flagp |= HASWIDTH | SIMPLE;
1669         break;
1670
1671       case Magic('n'):
1672         if (reg_string)
1673         {
1674             /* In a string "\n" matches a newline character. */
1675             ret = regnode(EXACTLY);
1676             regc(NL);
1677             regc(NUL);
1678             *flagp |= HASWIDTH | SIMPLE;
1679         }
1680         else
1681         {
1682             /* In buffer text "\n" matches the end of a line. */
1683             ret = regnode(NEWL);
1684             *flagp |= HASWIDTH | HASNL;
1685         }
1686         break;
1687
1688       case Magic('('):
1689         if (one_exactly)
1690             EMSG_ONE_RET_NULL;
1691         ret = reg(REG_PAREN, &flags);
1692         if (ret == NULL)
1693             return NULL;
1694         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1695         break;
1696
1697       case NUL:
1698       case Magic('|'):
1699       case Magic('&'):
1700       case Magic(')'):
1701         if (one_exactly)
1702             EMSG_ONE_RET_NULL;
1703         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1704         /* NOTREACHED */
1705
1706       case Magic('='):
1707       case Magic('?'):
1708       case Magic('+'):
1709       case Magic('@'):
1710       case Magic('{'):
1711       case Magic('*'):
1712         c = no_Magic(c);
1713         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1714                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1715                 ? "" : "\\", c);
1716         EMSG_RET_NULL(IObuff);
1717         /* NOTREACHED */
1718
1719       case Magic('~'):          /* previous substitute pattern */
1720             if (reg_prev_sub != NULL)
1721             {
1722                 char_u      *lp;
1723
1724                 ret = regnode(EXACTLY);
1725                 lp = reg_prev_sub;
1726                 while (*lp != NUL)
1727                     regc(*lp++);
1728                 regc(NUL);
1729                 if (*reg_prev_sub != NUL)
1730                 {
1731                     *flagp |= HASWIDTH;
1732                     if ((lp - reg_prev_sub) == 1)
1733                         *flagp |= SIMPLE;
1734                 }
1735             }
1736             else
1737                 EMSG_RET_NULL(_(e_nopresub));
1738             break;
1739
1740       case Magic('1'):
1741       case Magic('2'):
1742       case Magic('3'):
1743       case Magic('4'):
1744       case Magic('5'):
1745       case Magic('6'):
1746       case Magic('7'):
1747       case Magic('8'):
1748       case Magic('9'):
1749             {
1750                 int                 refnum;
1751
1752                 refnum = c - Magic('0');
1753                 /*
1754                  * Check if the back reference is legal. We must have seen the
1755                  * close brace.
1756                  * TODO: Should also check that we don't refer to something
1757                  * that is repeated (+*=): what instance of the repetition
1758                  * should we match?
1759                  */
1760                 if (!had_endbrace[refnum])
1761                 {
1762                     /* Trick: check if "@<=" or "@<!" follows, in which case
1763                      * the \1 can appear before the referenced match. */
1764                     for (p = regparse; *p != NUL; ++p)
1765                         if (p[0] == '@' && p[1] == '<'
1766                                               && (p[2] == '!' || p[2] == '='))
1767                             break;
1768                     if (*p == NUL)
1769                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1770                 }
1771                 ret = regnode(BACKREF + refnum);
1772             }
1773             break;
1774
1775       case Magic('z'):
1776         {
1777             c = no_Magic(getchr());
1778             switch (c)
1779             {
1780 #ifdef FEAT_SYN_HL
1781                 case '(': if (reg_do_extmatch != REX_SET)
1782                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1783                           if (one_exactly)
1784                               EMSG_ONE_RET_NULL;
1785                           ret = reg(REG_ZPAREN, &flags);
1786                           if (ret == NULL)
1787                               return NULL;
1788                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1789                           re_has_z = REX_SET;
1790                           break;
1791
1792                 case '1':
1793                 case '2':
1794                 case '3':
1795                 case '4':
1796                 case '5':
1797                 case '6':
1798                 case '7':
1799                 case '8':
1800                 case '9': if (reg_do_extmatch != REX_USE)
1801                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1802                           ret = regnode(ZREF + c - '0');
1803                           re_has_z = REX_USE;
1804                           break;
1805 #endif
1806
1807                 case 's': ret = regnode(MOPEN + 0);
1808                           break;
1809
1810                 case 'e': ret = regnode(MCLOSE + 0);
1811                           break;
1812
1813                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1814             }
1815         }
1816         break;
1817
1818       case Magic('%'):
1819         {
1820             c = no_Magic(getchr());
1821             switch (c)
1822             {
1823                 /* () without a back reference */
1824                 case '(':
1825                     if (one_exactly)
1826                         EMSG_ONE_RET_NULL;
1827                     ret = reg(REG_NPAREN, &flags);
1828                     if (ret == NULL)
1829                         return NULL;
1830                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1831                     break;
1832
1833                 /* Catch \%^ and \%$ regardless of where they appear in the
1834                  * pattern -- regardless of whether or not it makes sense. */
1835                 case '^':
1836                     ret = regnode(RE_BOF);
1837                     break;
1838
1839                 case '$':
1840                     ret = regnode(RE_EOF);
1841                     break;
1842
1843                 case '#':
1844                     ret = regnode(CURSOR);
1845                     break;
1846
1847                 case 'V':
1848                     ret = regnode(RE_VISUAL);
1849                     break;
1850
1851                 /* \%[abc]: Emit as a list of branches, all ending at the last
1852                  * branch which matches nothing. */
1853                 case '[':
1854                           if (one_exactly)      /* doesn't nest */
1855                               EMSG_ONE_RET_NULL;
1856                           {
1857                               char_u    *lastbranch;
1858                               char_u    *lastnode = NULL;
1859                               char_u    *br;
1860
1861                               ret = NULL;
1862                               while ((c = getchr()) != ']')
1863                               {
1864                                   if (c == NUL)
1865                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1866                                                       reg_magic == MAGIC_ALL);
1867                                   br = regnode(BRANCH);
1868                                   if (ret == NULL)
1869                                       ret = br;
1870                                   else
1871                                       regtail(lastnode, br);
1872
1873                                   ungetchr();
1874                                   one_exactly = TRUE;
1875                                   lastnode = regatom(flagp);
1876                                   one_exactly = FALSE;
1877                                   if (lastnode == NULL)
1878                                       return NULL;
1879                               }
1880                               if (ret == NULL)
1881                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1882                                                       reg_magic == MAGIC_ALL);
1883                               lastbranch = regnode(BRANCH);
1884                               br = regnode(NOTHING);
1885                               if (ret != JUST_CALC_SIZE)
1886                               {
1887                                   regtail(lastnode, br);
1888                                   regtail(lastbranch, br);
1889                                   /* connect all branches to the NOTHING
1890                                    * branch at the end */
1891                                   for (br = ret; br != lastnode; )
1892                                   {
1893                                       if (OP(br) == BRANCH)
1894                                       {
1895                                           regtail(br, lastbranch);
1896                                           br = OPERAND(br);
1897                                       }
1898                                       else
1899                                           br = regnext(br);
1900                                   }
1901                               }
1902                               *flagp &= ~(HASWIDTH | SIMPLE);
1903                               break;
1904                           }
1905
1906                 case 'd':   /* %d123 decimal */
1907                 case 'o':   /* %o123 octal */
1908                 case 'x':   /* %xab hex 2 */
1909                 case 'u':   /* %uabcd hex 4 */
1910                 case 'U':   /* %U1234abcd hex 8 */
1911                           {
1912                               int i;
1913
1914                               switch (c)
1915                               {
1916                                   case 'd': i = getdecchrs(); break;
1917                                   case 'o': i = getoctchrs(); break;
1918                                   case 'x': i = gethexchrs(2); break;
1919                                   case 'u': i = gethexchrs(4); break;
1920                                   case 'U': i = gethexchrs(8); break;
1921                                   default:  i = -1; break;
1922                               }
1923
1924                               if (i < 0)
1925                                   EMSG_M_RET_NULL(
1926                                         _("E678: Invalid character after %s%%[dxouU]"),
1927                                         reg_magic == MAGIC_ALL);
1928 #ifdef FEAT_MBYTE
1929                               if (use_multibytecode(i))
1930                                   ret = regnode(MULTIBYTECODE);
1931                               else
1932 #endif
1933                                   ret = regnode(EXACTLY);
1934                               if (i == 0)
1935                                   regc(0x0a);
1936                               else
1937 #ifdef FEAT_MBYTE
1938                                   regmbc(i);
1939 #else
1940                                   regc(i);
1941 #endif
1942                               regc(NUL);
1943                               *flagp |= HASWIDTH;
1944                               break;
1945                           }
1946
1947                 default:
1948                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1949                                                                  || c == '\'')
1950                           {
1951                               long_u    n = 0;
1952                               int       cmp;
1953
1954                               cmp = c;
1955                               if (cmp == '<' || cmp == '>')
1956                                   c = getchr();
1957                               while (VIM_ISDIGIT(c))
1958                               {
1959                                   n = n * 10 + (c - '0');
1960                                   c = getchr();
1961                               }
1962                               if (c == '\'' && n == 0)
1963                               {
1964                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1965                                   c = getchr();
1966                                   ret = regnode(RE_MARK);
1967                                   if (ret == JUST_CALC_SIZE)
1968                                       regsize += 2;
1969                                   else
1970                                   {
1971                                       *regcode++ = c;
1972                                       *regcode++ = cmp;
1973                                   }
1974                                   break;
1975                               }
1976                               else if (c == 'l' || c == 'c' || c == 'v')
1977                               {
1978                                   if (c == 'l')
1979                                       ret = regnode(RE_LNUM);
1980                                   else if (c == 'c')
1981                                       ret = regnode(RE_COL);
1982                                   else
1983                                       ret = regnode(RE_VCOL);
1984                                   if (ret == JUST_CALC_SIZE)
1985                                       regsize += 5;
1986                                   else
1987                                   {
1988                                       /* put the number and the optional
1989                                        * comparator after the opcode */
1990                                       regcode = re_put_long(regcode, n);
1991                                       *regcode++ = cmp;
1992                                   }
1993                                   break;
1994                               }
1995                           }
1996
1997                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1998                                                       reg_magic == MAGIC_ALL);
1999             }
2000         }
2001         break;
2002
2003       case Magic('['):
2004 collection:
2005         {
2006             char_u      *lp;
2007
2008             /*
2009              * If there is no matching ']', we assume the '[' is a normal
2010              * character.  This makes 'incsearch' and ":help [" work.
2011              */
2012             lp = skip_anyof(regparse);
2013             if (*lp == ']')     /* there is a matching ']' */
2014             {
2015                 int     startc = -1;    /* > 0 when next '-' is a range */
2016                 int     endc;
2017
2018                 /*
2019                  * In a character class, different parsing rules apply.
2020                  * Not even \ is special anymore, nothing is.
2021                  */
2022                 if (*regparse == '^')       /* Complement of range. */
2023                 {
2024                     ret = regnode(ANYBUT + extra);
2025                     regparse++;
2026                 }
2027                 else
2028                     ret = regnode(ANYOF + extra);
2029
2030                 /* At the start ']' and '-' mean the literal character. */
2031                 if (*regparse == ']' || *regparse == '-')
2032                 {
2033                     startc = *regparse;
2034                     regc(*regparse++);
2035                 }
2036
2037                 while (*regparse != NUL && *regparse != ']')
2038                 {
2039                     if (*regparse == '-')
2040                     {
2041                         ++regparse;
2042                         /* The '-' is not used for a range at the end and
2043                          * after or before a '\n'. */
2044                         if (*regparse == ']' || *regparse == NUL
2045                                 || startc == -1
2046                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2047                         {
2048                             regc('-');
2049                             startc = '-';       /* [--x] is a range */
2050                         }
2051                         else
2052                         {
2053                             /* Also accept "a-[.z.]" */
2054                             endc = 0;
2055                             if (*regparse == '[')
2056                                 endc = get_coll_element(&regparse);
2057                             if (endc == 0)
2058                             {
2059 #ifdef FEAT_MBYTE
2060                                 if (has_mbyte)
2061                                     endc = mb_ptr2char_adv(&regparse);
2062                                 else
2063 #endif
2064                                     endc = *regparse++;
2065                             }
2066
2067                             /* Handle \o40, \x20 and \u20AC style sequences */
2068                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2069                                 endc = coll_get_char();
2070
2071                             if (startc > endc)
2072                                 EMSG_RET_NULL(_(e_invrange));
2073 #ifdef FEAT_MBYTE
2074                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2075                                                  || (*mb_char2len)(endc) > 1))
2076                             {
2077                                 /* Limit to a range of 256 chars */
2078                                 if (endc > startc + 256)
2079                                     EMSG_RET_NULL(_(e_invrange));
2080                                 while (++startc <= endc)
2081                                     regmbc(startc);
2082                             }
2083                             else
2084 #endif
2085                             {
2086 #ifdef EBCDIC
2087                                 int     alpha_only = FALSE;
2088
2089                                 /* for alphabetical range skip the gaps
2090                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2091                                 if (isalpha(startc) && isalpha(endc))
2092                                     alpha_only = TRUE;
2093 #endif
2094                                 while (++startc <= endc)
2095 #ifdef EBCDIC
2096                                     if (!alpha_only || isalpha(startc))
2097 #endif
2098                                         regc(startc);
2099                             }
2100                             startc = -1;
2101                         }
2102                     }
2103                     /*
2104                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2105                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2106                      * 'cpoptions' is not included.
2107                      * Posix doesn't recognize backslash at all.
2108                      */
2109                     else if (*regparse == '\\'
2110                             && !cpo_bsl
2111                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2112                                 || (!cpo_lit
2113                                     && vim_strchr(REGEXP_ABBR,
2114                                                        regparse[1]) != NULL)))
2115                     {
2116                         regparse++;
2117                         if (*regparse == 'n')
2118                         {
2119                             /* '\n' in range: also match NL */
2120                             if (ret != JUST_CALC_SIZE)
2121                             {
2122                                 if (*ret == ANYBUT)
2123                                     *ret = ANYBUT + ADD_NL;
2124                                 else if (*ret == ANYOF)
2125                                     *ret = ANYOF + ADD_NL;
2126                                 /* else: must have had a \n already */
2127                             }
2128                             *flagp |= HASNL;
2129                             regparse++;
2130                             startc = -1;
2131                         }
2132                         else if (*regparse == 'd'
2133                                 || *regparse == 'o'
2134                                 || *regparse == 'x'
2135                                 || *regparse == 'u'
2136                                 || *regparse == 'U')
2137                         {
2138                             startc = coll_get_char();
2139                             if (startc == 0)
2140                                 regc(0x0a);
2141                             else
2142 #ifdef FEAT_MBYTE
2143                                 regmbc(startc);
2144 #else
2145                                 regc(startc);
2146 #endif
2147                         }
2148                         else
2149                         {
2150                             startc = backslash_trans(*regparse++);
2151                             regc(startc);
2152                         }
2153                     }
2154                     else if (*regparse == '[')
2155                     {
2156                         int c_class;
2157                         int cu;
2158
2159                         c_class = get_char_class(&regparse);
2160                         startc = -1;
2161                         /* Characters assumed to be 8 bits! */
2162                         switch (c_class)
2163                         {
2164                             case CLASS_NONE:
2165                                 c_class = get_equi_class(&regparse);
2166                                 if (c_class != 0)
2167                                 {
2168                                     /* produce equivalence class */
2169                                     reg_equi_class(c_class);
2170                                 }
2171                                 else if ((c_class =
2172                                             get_coll_element(&regparse)) != 0)
2173                                 {
2174                                     /* produce a collating element */
2175                                     regmbc(c_class);
2176                                 }
2177                                 else
2178                                 {
2179                                     /* literal '[', allow [[-x] as a range */
2180                                     startc = *regparse++;
2181                                     regc(startc);
2182                                 }
2183                                 break;
2184                             case CLASS_ALNUM:
2185                                 for (cu = 1; cu <= 255; cu++)
2186                                     if (isalnum(cu))
2187                                         regc(cu);
2188                                 break;
2189                             case CLASS_ALPHA:
2190                                 for (cu = 1; cu <= 255; cu++)
2191                                     if (isalpha(cu))
2192                                         regc(cu);
2193                                 break;
2194                             case CLASS_BLANK:
2195                                 regc(' ');
2196                                 regc('\t');
2197                                 break;
2198                             case CLASS_CNTRL:
2199                                 for (cu = 1; cu <= 255; cu++)
2200                                     if (iscntrl(cu))
2201                                         regc(cu);
2202                                 break;
2203                             case CLASS_DIGIT:
2204                                 for (cu = 1; cu <= 255; cu++)
2205                                     if (VIM_ISDIGIT(cu))
2206                                         regc(cu);
2207                                 break;
2208                             case CLASS_GRAPH:
2209                                 for (cu = 1; cu <= 255; cu++)
2210                                     if (isgraph(cu))
2211                                         regc(cu);
2212                                 break;
2213                             case CLASS_LOWER:
2214                                 for (cu = 1; cu <= 255; cu++)
2215                                     if (MB_ISLOWER(cu))
2216                                         regc(cu);
2217                                 break;
2218                             case CLASS_PRINT:
2219                                 for (cu = 1; cu <= 255; cu++)
2220                                     if (vim_isprintc(cu))
2221                                         regc(cu);
2222                                 break;
2223                             case CLASS_PUNCT:
2224                                 for (cu = 1; cu <= 255; cu++)
2225                                     if (ispunct(cu))
2226                                         regc(cu);
2227                                 break;
2228                             case CLASS_SPACE:
2229                                 for (cu = 9; cu <= 13; cu++)
2230                                     regc(cu);
2231                                 regc(' ');
2232                                 break;
2233                             case CLASS_UPPER:
2234                                 for (cu = 1; cu <= 255; cu++)
2235                                     if (MB_ISUPPER(cu))
2236                                         regc(cu);
2237                                 break;
2238                             case CLASS_XDIGIT:
2239                                 for (cu = 1; cu <= 255; cu++)
2240                                     if (vim_isxdigit(cu))
2241                                         regc(cu);
2242                                 break;
2243                             case CLASS_TAB:
2244                                 regc('\t');
2245                                 break;
2246                             case CLASS_RETURN:
2247                                 regc('\r');
2248                                 break;
2249                             case CLASS_BACKSPACE:
2250                                 regc('\b');
2251                                 break;
2252                             case CLASS_ESCAPE:
2253                                 regc('\033');
2254                                 break;
2255                         }
2256                     }
2257                     else
2258                     {
2259 #ifdef FEAT_MBYTE
2260                         if (has_mbyte)
2261                         {
2262                             int len;
2263
2264                             /* produce a multibyte character, including any
2265                              * following composing characters */
2266                             startc = mb_ptr2char(regparse);
2267                             len = (*mb_ptr2len)(regparse);
2268                             if (enc_utf8 && utf_char2len(startc) != len)
2269                                 startc = -1;    /* composing chars */
2270                             while (--len >= 0)
2271                                 regc(*regparse++);
2272                         }
2273                         else
2274 #endif
2275                         {
2276                             startc = *regparse++;
2277                             regc(startc);
2278                         }
2279                     }
2280                 }
2281                 regc(NUL);
2282                 prevchr_len = 1;        /* last char was the ']' */
2283                 if (*regparse != ']')
2284                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2285                 skipchr();          /* let's be friends with the lexer again */
2286                 *flagp |= HASWIDTH | SIMPLE;
2287                 break;
2288             }
2289             else if (reg_strict)
2290                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2291                                                        reg_magic > MAGIC_OFF);
2292         }
2293         /* FALLTHROUGH */
2294
2295       default:
2296         {
2297             int         len;
2298
2299 #ifdef FEAT_MBYTE
2300             /* A multi-byte character is handled as a separate atom if it's
2301              * before a multi and when it's a composing char. */
2302             if (use_multibytecode(c))
2303             {
2304 do_multibyte:
2305                 ret = regnode(MULTIBYTECODE);
2306                 regmbc(c);
2307                 *flagp |= HASWIDTH | SIMPLE;
2308                 break;
2309             }
2310 #endif
2311
2312             ret = regnode(EXACTLY);
2313
2314             /*
2315              * Append characters as long as:
2316              * - there is no following multi, we then need the character in
2317              *   front of it as a single character operand
2318              * - not running into a Magic character
2319              * - "one_exactly" is not set
2320              * But always emit at least one character.  Might be a Multi,
2321              * e.g., a "[" without matching "]".
2322              */
2323             for (len = 0; c != NUL && (len == 0
2324                         || (re_multi_type(peekchr()) == NOT_MULTI
2325                             && !one_exactly
2326                             && !is_Magic(c))); ++len)
2327             {
2328                 c = no_Magic(c);
2329 #ifdef FEAT_MBYTE
2330                 if (has_mbyte)
2331                 {
2332                     regmbc(c);
2333                     if (enc_utf8)
2334                     {
2335                         int     l;
2336
2337                         /* Need to get composing character too. */
2338                         for (;;)
2339                         {
2340                             l = utf_ptr2len(regparse);
2341                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2342                                 break;
2343                             regmbc(utf_ptr2char(regparse));
2344                             skipchr();
2345                         }
2346                     }
2347                 }
2348                 else
2349 #endif
2350                     regc(c);
2351                 c = getchr();
2352             }
2353             ungetchr();
2354
2355             regc(NUL);
2356             *flagp |= HASWIDTH;
2357             if (len == 1)
2358                 *flagp |= SIMPLE;
2359         }
2360         break;
2361     }
2362
2363     return ret;
2364 }
2365
2366 #ifdef FEAT_MBYTE
2367 /*
2368  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2369  * character "c".
2370  */
2371     static int
2372 use_multibytecode(c)
2373     int c;
2374 {
2375     return has_mbyte && (*mb_char2len)(c) > 1
2376                      && (re_multi_type(peekchr()) != NOT_MULTI
2377                              || (enc_utf8 && utf_iscomposing(c)));
2378 }
2379 #endif
2380
2381 /*
2382  * emit a node
2383  * Return pointer to generated code.
2384  */
2385     static char_u *
2386 regnode(op)
2387     int         op;
2388 {
2389     char_u  *ret;
2390
2391     ret = regcode;
2392     if (ret == JUST_CALC_SIZE)
2393         regsize += 3;
2394     else
2395     {
2396         *regcode++ = op;
2397         *regcode++ = NUL;               /* Null "next" pointer. */
2398         *regcode++ = NUL;
2399     }
2400     return ret;
2401 }
2402
2403 /*
2404  * Emit (if appropriate) a byte of code
2405  */
2406     static void
2407 regc(b)
2408     int         b;
2409 {
2410     if (regcode == JUST_CALC_SIZE)
2411         regsize++;
2412     else
2413         *regcode++ = b;
2414 }
2415
2416 #ifdef FEAT_MBYTE
2417 /*
2418  * Emit (if appropriate) a multi-byte character of code
2419  */
2420     static void
2421 regmbc(c)
2422     int         c;
2423 {
2424     if (regcode == JUST_CALC_SIZE)
2425         regsize += (*mb_char2len)(c);
2426     else
2427         regcode += (*mb_char2bytes)(c, regcode);
2428 }
2429 #endif
2430
2431 /*
2432  * reginsert - insert an operator in front of already-emitted operand
2433  *
2434  * Means relocating the operand.
2435  */
2436     static void
2437 reginsert(op, opnd)
2438     int         op;
2439     char_u     *opnd;
2440 {
2441     char_u      *src;
2442     char_u      *dst;
2443     char_u      *place;
2444
2445     if (regcode == JUST_CALC_SIZE)
2446     {
2447         regsize += 3;
2448         return;
2449     }
2450     src = regcode;
2451     regcode += 3;
2452     dst = regcode;
2453     while (src > opnd)
2454         *--dst = *--src;
2455
2456     place = opnd;               /* Op node, where operand used to be. */
2457     *place++ = op;
2458     *place++ = NUL;
2459     *place = NUL;
2460 }
2461
2462 /*
2463  * reginsert_limits - insert an operator in front of already-emitted operand.
2464  * The operator has the given limit values as operands.  Also set next pointer.
2465  *
2466  * Means relocating the operand.
2467  */
2468     static void
2469 reginsert_limits(op, minval, maxval, opnd)
2470     int         op;
2471     long        minval;
2472     long        maxval;
2473     char_u      *opnd;
2474 {
2475     char_u      *src;
2476     char_u      *dst;
2477     char_u      *place;
2478
2479     if (regcode == JUST_CALC_SIZE)
2480     {
2481         regsize += 11;
2482         return;
2483     }
2484     src = regcode;
2485     regcode += 11;
2486     dst = regcode;
2487     while (src > opnd)
2488         *--dst = *--src;
2489
2490     place = opnd;               /* Op node, where operand used to be. */
2491     *place++ = op;
2492     *place++ = NUL;
2493     *place++ = NUL;
2494     place = re_put_long(place, (long_u)minval);
2495     place = re_put_long(place, (long_u)maxval);
2496     regtail(opnd, place);
2497 }
2498
2499 /*
2500  * Write a long as four bytes at "p" and return pointer to the next char.
2501  */
2502     static char_u *
2503 re_put_long(p, val)
2504     char_u      *p;
2505     long_u      val;
2506 {
2507     *p++ = (char_u) ((val >> 24) & 0377);
2508     *p++ = (char_u) ((val >> 16) & 0377);
2509     *p++ = (char_u) ((val >> 8) & 0377);
2510     *p++ = (char_u) (val & 0377);
2511     return p;
2512 }
2513
2514 /*
2515  * regtail - set the next-pointer at the end of a node chain
2516  */
2517     static void
2518 regtail(p, val)
2519     char_u      *p;
2520     char_u      *val;
2521 {
2522     char_u      *scan;
2523     char_u      *temp;
2524     int         offset;
2525
2526     if (p == JUST_CALC_SIZE)
2527         return;
2528
2529     /* Find last node. */
2530     scan = p;
2531     for (;;)
2532     {
2533         temp = regnext(scan);
2534         if (temp == NULL)
2535             break;
2536         scan = temp;
2537     }
2538
2539     if (OP(scan) == BACK)
2540         offset = (int)(scan - val);
2541     else
2542         offset = (int)(val - scan);
2543     *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2544     *(scan + 2) = (char_u) (offset & 0377);
2545 }
2546
2547 /*
2548  * regoptail - regtail on item after a BRANCH; nop if none
2549  */
2550     static void
2551 regoptail(p, val)
2552     char_u      *p;
2553     char_u      *val;
2554 {
2555     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2556     if (p == NULL || p == JUST_CALC_SIZE
2557             || (OP(p) != BRANCH
2558                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2559         return;
2560     regtail(OPERAND(p), val);
2561 }
2562
2563 /*
2564  * getchr() - get the next character from the pattern. We know about
2565  * magic and such, so therefore we need a lexical analyzer.
2566  */
2567
2568 /* static int       curchr; */
2569 static int      prevprevchr;
2570 static int      prevchr;
2571 static int      nextchr;    /* used for ungetchr() */
2572 /*
2573  * Note: prevchr is sometimes -1 when we are not at the start,
2574  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2575  * taken to be magic -- webb
2576  */
2577 static int      at_start;       /* True when on the first character */
2578 static int      prev_at_start;  /* True when on the second character */
2579
2580     static void
2581 initchr(str)
2582     char_u *str;
2583 {
2584     regparse = str;
2585     prevchr_len = 0;
2586     curchr = prevprevchr = prevchr = nextchr = -1;
2587     at_start = TRUE;
2588     prev_at_start = FALSE;
2589 }
2590
2591     static int
2592 peekchr()
2593 {
2594     static int  after_slash = FALSE;
2595
2596     if (curchr == -1)
2597     {
2598         switch (curchr = regparse[0])
2599         {
2600         case '.':
2601         case '[':
2602         case '~':
2603             /* magic when 'magic' is on */
2604             if (reg_magic >= MAGIC_ON)
2605                 curchr = Magic(curchr);
2606             break;
2607         case '(':
2608         case ')':
2609         case '{':
2610         case '%':
2611         case '+':
2612         case '=':
2613         case '?':
2614         case '@':
2615         case '!':
2616         case '&':
2617         case '|':
2618         case '<':
2619         case '>':
2620         case '#':       /* future ext. */
2621         case '"':       /* future ext. */
2622         case '\'':      /* future ext. */
2623         case ',':       /* future ext. */
2624         case '-':       /* future ext. */
2625         case ':':       /* future ext. */
2626         case ';':       /* future ext. */
2627         case '`':       /* future ext. */
2628         case '/':       /* Can't be used in / command */
2629             /* magic only after "\v" */
2630             if (reg_magic == MAGIC_ALL)
2631                 curchr = Magic(curchr);
2632             break;
2633         case '*':
2634             /* * is not magic as the very first character, eg "?*ptr", when
2635              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2636              * "\(\*" is not magic, thus must be magic if "after_slash" */
2637             if (reg_magic >= MAGIC_ON
2638                     && !at_start
2639                     && !(prev_at_start && prevchr == Magic('^'))
2640                     && (after_slash
2641                         || (prevchr != Magic('(')
2642                             && prevchr != Magic('&')
2643                             && prevchr != Magic('|'))))
2644                 curchr = Magic('*');
2645             break;
2646         case '^':
2647             /* '^' is only magic as the very first character and if it's after
2648              * "\(", "\|", "\&' or "\n" */
2649             if (reg_magic >= MAGIC_OFF
2650                     && (at_start
2651                         || reg_magic == MAGIC_ALL
2652                         || prevchr == Magic('(')
2653                         || prevchr == Magic('|')
2654                         || prevchr == Magic('&')
2655                         || prevchr == Magic('n')
2656                         || (no_Magic(prevchr) == '('
2657                             && prevprevchr == Magic('%'))))
2658             {
2659                 curchr = Magic('^');
2660                 at_start = TRUE;
2661                 prev_at_start = FALSE;
2662             }
2663             break;
2664         case '$':
2665             /* '$' is only magic as the very last char and if it's in front of
2666              * either "\|", "\)", "\&", or "\n" */
2667             if (reg_magic >= MAGIC_OFF)
2668             {
2669                 char_u *p = regparse + 1;
2670
2671                 /* ignore \c \C \m and \M after '$' */
2672                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2673                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2674                     p += 2;
2675                 if (p[0] == NUL
2676                         || (p[0] == '\\'
2677                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2678                                 || p[1] == 'n'))
2679                         || reg_magic == MAGIC_ALL)
2680                     curchr = Magic('$');
2681             }
2682             break;
2683         case '\\':
2684             {
2685                 int c = regparse[1];
2686
2687                 if (c == NUL)
2688                     curchr = '\\';      /* trailing '\' */
2689                 else if (
2690 #ifdef EBCDIC
2691                         vim_strchr(META, c)
2692 #else
2693                         c <= '~' && META_flags[c]
2694 #endif
2695                         )
2696                 {
2697                     /*
2698                      * META contains everything that may be magic sometimes,
2699                      * except ^ and $ ("\^" and "\$" are only magic after
2700                      * "\v").  We now fetch the next character and toggle its
2701                      * magicness.  Therefore, \ is so meta-magic that it is
2702                      * not in META.
2703                      */
2704                     curchr = -1;
2705                     prev_at_start = at_start;
2706                     at_start = FALSE;   /* be able to say "/\*ptr" */
2707                     ++regparse;
2708                     ++after_slash;
2709                     peekchr();
2710                     --regparse;
2711                     --after_slash;
2712                     curchr = toggle_Magic(curchr);
2713                 }
2714                 else if (vim_strchr(REGEXP_ABBR, c))
2715                 {
2716                     /*
2717                      * Handle abbreviations, like "\t" for TAB -- webb
2718                      */
2719                     curchr = backslash_trans(c);
2720                 }
2721                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2722                     curchr = toggle_Magic(c);
2723                 else
2724                 {
2725                     /*
2726                      * Next character can never be (made) magic?
2727                      * Then backslashing it won't do anything.
2728                      */
2729 #ifdef FEAT_MBYTE
2730                     if (has_mbyte)
2731                         curchr = (*mb_ptr2char)(regparse + 1);
2732                     else
2733 #endif
2734                         curchr = c;
2735                 }
2736                 break;
2737             }
2738
2739 #ifdef FEAT_MBYTE
2740         default:
2741             if (has_mbyte)
2742                 curchr = (*mb_ptr2char)(regparse);
2743 #endif
2744         }
2745     }
2746
2747     return curchr;
2748 }
2749
2750 /*
2751  * Eat one lexed character.  Do this in a way that we can undo it.
2752  */
2753     static void
2754 skipchr()
2755 {
2756     /* peekchr() eats a backslash, do the same here */
2757     if (*regparse == '\\')
2758         prevchr_len = 1;
2759     else
2760         prevchr_len = 0;
2761     if (regparse[prevchr_len] != NUL)
2762     {
2763 #ifdef FEAT_MBYTE
2764         if (enc_utf8)
2765             /* exclude composing chars that mb_ptr2len does include */
2766             prevchr_len += utf_ptr2len(regparse + prevchr_len);
2767         else if (has_mbyte)
2768             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2769         else
2770 #endif
2771             ++prevchr_len;
2772     }
2773     regparse += prevchr_len;
2774     prev_at_start = at_start;
2775     at_start = FALSE;
2776     prevprevchr = prevchr;
2777     prevchr = curchr;
2778     curchr = nextchr;       /* use previously unget char, or -1 */
2779     nextchr = -1;
2780 }
2781
2782 /*
2783  * Skip a character while keeping the value of prev_at_start for at_start.
2784  * prevchr and prevprevchr are also kept.
2785  */
2786     static void
2787 skipchr_keepstart()
2788 {
2789     int as = prev_at_start;
2790     int pr = prevchr;
2791     int prpr = prevprevchr;
2792
2793     skipchr();
2794     at_start = as;
2795     prevchr = pr;
2796     prevprevchr = prpr;
2797 }
2798
2799     static int
2800 getchr()
2801 {
2802     int chr = peekchr();
2803
2804     skipchr();
2805     return chr;
2806 }
2807
2808 /*
2809  * put character back.  Works only once!
2810  */
2811     static void
2812 ungetchr()
2813 {
2814     nextchr = curchr;
2815     curchr = prevchr;
2816     prevchr = prevprevchr;
2817     at_start = prev_at_start;
2818     prev_at_start = FALSE;
2819
2820     /* Backup regparse, so that it's at the same position as before the
2821      * getchr(). */
2822     regparse -= prevchr_len;
2823 }
2824
2825 /*
2826  * Get and return the value of the hex string at the current position.
2827  * Return -1 if there is no valid hex number.
2828  * The position is updated:
2829  *     blahblah\%x20asdf
2830  *         before-^ ^-after
2831  * The parameter controls the maximum number of input characters. This will be
2832  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2833  */
2834     static int
2835 gethexchrs(maxinputlen)
2836     int         maxinputlen;
2837 {
2838     int         nr = 0;
2839     int         c;
2840     int         i;
2841
2842     for (i = 0; i < maxinputlen; ++i)
2843     {
2844         c = regparse[0];
2845         if (!vim_isxdigit(c))
2846             break;
2847         nr <<= 4;
2848         nr |= hex2nr(c);
2849         ++regparse;
2850     }
2851
2852     if (i == 0)
2853         return -1;
2854     return nr;
2855 }
2856
2857 /*
2858  * get and return the value of the decimal string immediately after the
2859  * current position. Return -1 for invalid.  Consumes all digits.
2860  */
2861     static int
2862 getdecchrs()
2863 {
2864     int         nr = 0;
2865     int         c;
2866     int         i;
2867
2868     for (i = 0; ; ++i)
2869     {
2870         c = regparse[0];
2871         if (c < '0' || c > '9')
2872             break;
2873         nr *= 10;
2874         nr += c - '0';
2875         ++regparse;
2876     }
2877
2878     if (i == 0)
2879         return -1;
2880     return nr;
2881 }
2882
2883 /*
2884  * get and return the value of the octal string immediately after the current
2885  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2886  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2887  * treat 8 or 9 as recognised characters. Position is updated:
2888  *     blahblah\%o210asdf
2889  *         before-^  ^-after
2890  */
2891     static int
2892 getoctchrs()
2893 {
2894     int         nr = 0;
2895     int         c;
2896     int         i;
2897
2898     for (i = 0; i < 3 && nr < 040; ++i)
2899     {
2900         c = regparse[0];
2901         if (c < '0' || c > '7')
2902             break;
2903         nr <<= 3;
2904         nr |= hex2nr(c);
2905         ++regparse;
2906     }
2907
2908     if (i == 0)
2909         return -1;
2910     return nr;
2911 }
2912
2913 /*
2914  * Get a number after a backslash that is inside [].
2915  * When nothing is recognized return a backslash.
2916  */
2917     static int
2918 coll_get_char()
2919 {
2920     int     nr = -1;
2921
2922     switch (*regparse++)
2923     {
2924         case 'd': nr = getdecchrs(); break;
2925         case 'o': nr = getoctchrs(); break;
2926         case 'x': nr = gethexchrs(2); break;
2927         case 'u': nr = gethexchrs(4); break;
2928         case 'U': nr = gethexchrs(8); break;
2929     }
2930     if (nr < 0)
2931     {
2932         /* If getting the number fails be backwards compatible: the character
2933          * is a backslash. */
2934         --regparse;
2935         nr = '\\';
2936     }
2937     return nr;
2938 }
2939
2940 /*
2941  * read_limits - Read two integers to be taken as a minimum and maximum.
2942  * If the first character is '-', then the range is reversed.
2943  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2944  * missing, a very big number is the default.
2945  */
2946     static int
2947 read_limits(minval, maxval)
2948     long        *minval;
2949     long        *maxval;
2950 {
2951     int         reverse = FALSE;
2952     char_u      *first_char;
2953     long        tmp;
2954
2955     if (*regparse == '-')
2956     {
2957         /* Starts with '-', so reverse the range later */
2958         regparse++;
2959         reverse = TRUE;
2960     }
2961     first_char = regparse;
2962     *minval = getdigits(&regparse);
2963     if (*regparse == ',')           /* There is a comma */
2964     {
2965         if (vim_isdigit(*++regparse))
2966             *maxval = getdigits(&regparse);
2967         else
2968             *maxval = MAX_LIMIT;
2969     }
2970     else if (VIM_ISDIGIT(*first_char))
2971         *maxval = *minval;          /* It was \{n} or \{-n} */
2972     else
2973         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2974     if (*regparse == '\\')
2975         regparse++;     /* Allow either \{...} or \{...\} */
2976     if (*regparse != '}')
2977     {
2978         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2979                                           reg_magic == MAGIC_ALL ? "" : "\\");
2980         EMSG_RET_FAIL(IObuff);
2981     }
2982
2983     /*
2984      * Reverse the range if there was a '-', or make sure it is in the right
2985      * order otherwise.
2986      */
2987     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2988     {
2989         tmp = *minval;
2990         *minval = *maxval;
2991         *maxval = tmp;
2992     }
2993     skipchr();          /* let's be friends with the lexer again */
2994     return OK;
2995 }
2996
2997 /*
2998  * vim_regexec and friends
2999  */
3000
3001 /*
3002  * Global work variables for vim_regexec().
3003  */
3004
3005 /* The current match-position is remembered with these variables: */
3006 static linenr_T reglnum;        /* line number, relative to first line */
3007 static char_u   *regline;       /* start of current line */
3008 static char_u   *reginput;      /* current input, points into "regline" */
3009
3010 static int      need_clear_subexpr;     /* subexpressions still need to be
3011                                          * cleared */
3012 #ifdef FEAT_SYN_HL
3013 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3014                                                  * still need to be cleared */
3015 #endif
3016
3017 /*
3018  * Structure used to save the current input state, when it needs to be
3019  * restored after trying a match.  Used by reg_save() and reg_restore().
3020  * Also stores the length of "backpos".
3021  */
3022 typedef struct
3023 {
3024     union
3025     {
3026         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3027         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3028     } rs_u;
3029     int         rs_len;
3030 } regsave_T;
3031
3032 /* struct to save start/end pointer/position in for \(\) */
3033 typedef struct
3034 {
3035     union
3036     {
3037         char_u  *ptr;
3038         lpos_T  pos;
3039     } se_u;
3040 } save_se_T;
3041
3042 /* used for BEHIND and NOBEHIND matching */
3043 typedef struct regbehind_S
3044 {
3045     regsave_T   save_after;
3046     regsave_T   save_behind;
3047     int         save_need_clear_subexpr;
3048     save_se_T   save_start[NSUBEXP];
3049     save_se_T   save_end[NSUBEXP];
3050 } regbehind_T;
3051
3052 static char_u   *reg_getline __ARGS((linenr_T lnum));
3053 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3054 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3055 static void     cleanup_subexpr __ARGS((void));
3056 #ifdef FEAT_SYN_HL
3057 static void     cleanup_zsubexpr __ARGS((void));
3058 #endif
3059 static void     save_subexpr __ARGS((regbehind_T *bp));
3060 static void     restore_subexpr __ARGS((regbehind_T *bp));
3061 static void     reg_nextline __ARGS((void));
3062 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3063 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3064 static int      reg_save_equal __ARGS((regsave_T *save));
3065 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3066 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3067
3068 /* Save the sub-expressions before attempting a match. */
3069 #define save_se(savep, posp, pp) \
3070     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3071
3072 /* After a failed match restore the sub-expressions. */
3073 #define restore_se(savep, posp, pp) { \
3074     if (REG_MULTI) \
3075         *(posp) = (savep)->se_u.pos; \
3076     else \
3077         *(pp) = (savep)->se_u.ptr; }
3078
3079 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3080 static int      regmatch __ARGS((char_u *prog));
3081 static int      regrepeat __ARGS((char_u *p, long maxcount));
3082
3083 #ifdef DEBUG
3084 int             regnarrate = 0;
3085 #endif
3086
3087 /*
3088  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3089  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3090  * contains '\c' or '\C' the value is overruled.
3091  */
3092 static int      ireg_ic;
3093
3094 #ifdef FEAT_MBYTE
3095 /*
3096  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3097  * in the regexp.  Defaults to false, always.
3098  */
3099 static int      ireg_icombine;
3100 #endif
3101
3102 /*
3103  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3104  * there is no maximum.
3105  */
3106 static colnr_T  ireg_maxcol;
3107
3108 /*
3109  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3110  * slow, we keep one allocated piece of memory and only re-allocate it when
3111  * it's too small.  It's freed in vim_regexec_both() when finished.
3112  */
3113 static char_u   *reg_tofree = NULL;
3114 static unsigned reg_tofreelen;
3115
3116 /*
3117  * These variables are set when executing a regexp to speed up the execution.
3118  * Which ones are set depends on whether a single-line or multi-line match is
3119  * done:
3120  *                      single-line             multi-line
3121  * reg_match            &regmatch_T             NULL
3122  * reg_mmatch           NULL                    &regmmatch_T
3123  * reg_startp           reg_match->startp       <invalid>
3124  * reg_endp             reg_match->endp         <invalid>
3125  * reg_startpos         <invalid>               reg_mmatch->startpos
3126  * reg_endpos           <invalid>               reg_mmatch->endpos
3127  * reg_win              NULL                    window in which to search
3128  * reg_buf              <invalid>               buffer in which to search
3129  * reg_firstlnum        <invalid>               first line in which to search
3130  * reg_maxline          0                       last line nr
3131  * reg_line_lbr         FALSE or TRUE           FALSE
3132  */
3133 static regmatch_T       *reg_match;
3134 static regmmatch_T      *reg_mmatch;
3135 static char_u           **reg_startp = NULL;
3136 static char_u           **reg_endp = NULL;
3137 static lpos_T           *reg_startpos = NULL;
3138 static lpos_T           *reg_endpos = NULL;
3139 static win_T            *reg_win;
3140 static buf_T            *reg_buf;
3141 static linenr_T         reg_firstlnum;
3142 static linenr_T         reg_maxline;
3143 static int              reg_line_lbr;       /* "\n" in string is line break */
3144
3145 /* Values for rs_state in regitem_T. */
3146 typedef enum regstate_E
3147 {
3148     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3149     , RS_MOPEN          /* MOPEN + [0-9] */
3150     , RS_MCLOSE         /* MCLOSE + [0-9] */
3151 #ifdef FEAT_SYN_HL
3152     , RS_ZOPEN          /* ZOPEN + [0-9] */
3153     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3154 #endif
3155     , RS_BRANCH         /* BRANCH */
3156     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3157     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3158     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3159     , RS_NOMATCH        /* NOMATCH */
3160     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3161     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3162     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3163     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3164 } regstate_T;
3165
3166 /*
3167  * When there are alternatives a regstate_T is put on the regstack to remember
3168  * what we are doing.
3169  * Before it may be another type of item, depending on rs_state, to remember
3170  * more things.
3171  */
3172 typedef struct regitem_S
3173 {
3174     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3175     char_u      *rs_scan;       /* current node in program */
3176     union
3177     {
3178         save_se_T  sesave;
3179         regsave_T  regsave;
3180     } rs_un;                    /* room for saving reginput */
3181     short       rs_no;          /* submatch nr or BEHIND/NOBEHIND */
3182 } regitem_T;
3183
3184 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3185 static void regstack_pop __ARGS((char_u **scan));
3186
3187 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3188 typedef struct regstar_S
3189 {
3190     int         nextb;          /* next byte */
3191     int         nextb_ic;       /* next byte reverse case */
3192     long        count;
3193     long        minval;
3194     long        maxval;
3195 } regstar_T;
3196
3197 /* used to store input position when a BACK was encountered, so that we now if
3198  * we made any progress since the last time. */
3199 typedef struct backpos_S
3200 {
3201     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3202     regsave_T   bp_pos;         /* last input position */
3203 } backpos_T;
3204
3205 /*
3206  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3207  * to avoid invoking malloc() and free() often.
3208  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3209  * or regbehind_T.
3210  * "backpos_T" is a table with backpos_T for BACK
3211  */
3212 static garray_T regstack = {0, 0, 0, 0, NULL};
3213 static garray_T backpos = {0, 0, 0, 0, NULL};
3214
3215 /*
3216  * Both for regstack and backpos tables we use the following strategy of
3217  * allocation (to reduce malloc/free calls):
3218  * - Initial size is fairly small.
3219  * - When needed, the tables are grown bigger (8 times at first, double after
3220  *   that).
3221  * - After executing the match we free the memory only if the array has grown.
3222  *   Thus the memory is kept allocated when it's at the initial size.
3223  * This makes it fast while not keeping a lot of memory allocated.
3224  * A three times speed increase was observed when using many simple patterns.
3225  */
3226 #define REGSTACK_INITIAL        2048
3227 #define BACKPOS_INITIAL         64
3228
3229 #if defined(EXITFREE) || defined(PROTO)
3230     void
3231 free_regexp_stuff()
3232 {
3233     ga_clear(&regstack);
3234     ga_clear(&backpos);
3235     vim_free(reg_tofree);
3236     vim_free(reg_prev_sub);
3237 }
3238 #endif
3239
3240 /*
3241  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3242  */
3243     static char_u *
3244 reg_getline(lnum)
3245     linenr_T    lnum;
3246 {
3247     /* when looking behind for a match/no-match lnum is negative.  But we
3248      * can't go before line 1 */
3249     if (reg_firstlnum + lnum < 1)
3250         return NULL;
3251     if (lnum > reg_maxline)
3252         /* Must have matched the "\n" in the last line. */
3253         return (char_u *)"";
3254     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3255 }
3256
3257 static regsave_T behind_pos;
3258
3259 #ifdef FEAT_SYN_HL
3260 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3261 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3262 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3263 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3264 #endif
3265
3266 /* TRUE if using multi-line regexp. */
3267 #define REG_MULTI       (reg_match == NULL)
3268
3269 /*
3270  * Match a regexp against a string.
3271  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3272  * Uses curbuf for line count and 'iskeyword'.
3273  *
3274  * Return TRUE if there is a match, FALSE if not.
3275  */
3276     int
3277 vim_regexec(rmp, line, col)
3278     regmatch_T  *rmp;
3279     char_u      *line;  /* string to match against */
3280     colnr_T     col;    /* column to start looking for match */
3281 {
3282     reg_match = rmp;
3283     reg_mmatch = NULL;
3284     reg_maxline = 0;
3285     reg_line_lbr = FALSE;
3286     reg_win = NULL;
3287     ireg_ic = rmp->rm_ic;
3288 #ifdef FEAT_MBYTE
3289     ireg_icombine = FALSE;
3290 #endif
3291     ireg_maxcol = 0;
3292     return (vim_regexec_both(line, col, NULL) != 0);
3293 }
3294
3295 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3296         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3297 /*
3298  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3299  */
3300     int
3301 vim_regexec_nl(rmp, line, col)
3302     regmatch_T  *rmp;
3303     char_u      *line;  /* string to match against */
3304     colnr_T     col;    /* column to start looking for match */
3305 {
3306     reg_match = rmp;
3307     reg_mmatch = NULL;
3308     reg_maxline = 0;
3309     reg_line_lbr = TRUE;
3310     reg_win = NULL;
3311     ireg_ic = rmp->rm_ic;
3312 #ifdef FEAT_MBYTE
3313     ireg_icombine = FALSE;
3314 #endif
3315     ireg_maxcol = 0;
3316     return (vim_regexec_both(line, col, NULL) != 0);
3317 }
3318 #endif
3319
3320 /*
3321  * Match a regexp against multiple lines.
3322  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3323  * Uses curbuf for line count and 'iskeyword'.
3324  *
3325  * Return zero if there is no match.  Return number of lines contained in the
3326  * match otherwise.
3327  */
3328     long
3329 vim_regexec_multi(rmp, win, buf, lnum, col, tm)
3330     regmmatch_T *rmp;
3331     win_T       *win;           /* window in which to search or NULL */
3332     buf_T       *buf;           /* buffer in which to search */
3333     linenr_T    lnum;           /* nr of line to start looking for match */
3334     colnr_T     col;            /* column to start looking for match */
3335     proftime_T  *tm;            /* timeout limit or NULL */
3336 {
3337     long        r;
3338     buf_T       *save_curbuf = curbuf;
3339
3340     reg_match = NULL;
3341     reg_mmatch = rmp;
3342     reg_buf = buf;
3343     reg_win = win;
3344     reg_firstlnum = lnum;
3345     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3346     reg_line_lbr = FALSE;
3347     ireg_ic = rmp->rmm_ic;
3348 #ifdef FEAT_MBYTE
3349     ireg_icombine = FALSE;
3350 #endif
3351     ireg_maxcol = rmp->rmm_maxcol;
3352
3353     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3354     curbuf = buf;
3355     r = vim_regexec_both(NULL, col, tm);
3356     curbuf = save_curbuf;
3357
3358     return r;
3359 }
3360
3361 /*
3362  * Match a regexp against a string ("line" points to the string) or multiple
3363  * lines ("line" is NULL, use reg_getline()).
3364  */
3365     static long
3366 vim_regexec_both(line, col, tm)
3367     char_u      *line;
3368     colnr_T     col;            /* column to start looking for match */
3369     proftime_T  *tm UNUSED;     /* timeout limit or NULL */
3370 {
3371     regprog_T   *prog;
3372     char_u      *s;
3373     long        retval = 0L;
3374
3375     /* Create "regstack" and "backpos" if they are not allocated yet.
3376      * We allocate *_INITIAL amount of bytes first and then set the grow size
3377      * to much bigger value to avoid many malloc calls in case of deep regular
3378      * expressions.  */
3379     if (regstack.ga_data == NULL)
3380     {
3381         /* Use an item size of 1 byte, since we push different things
3382          * onto the regstack. */
3383         ga_init2(&regstack, 1, REGSTACK_INITIAL);
3384         ga_grow(&regstack, REGSTACK_INITIAL);
3385         regstack.ga_growsize = REGSTACK_INITIAL * 8;
3386     }
3387
3388     if (backpos.ga_data == NULL)
3389     {
3390         ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3391         ga_grow(&backpos, BACKPOS_INITIAL);
3392         backpos.ga_growsize = BACKPOS_INITIAL * 8;
3393     }
3394
3395     if (REG_MULTI)
3396     {
3397         prog = reg_mmatch->regprog;
3398         line = reg_getline((linenr_T)0);
3399         reg_startpos = reg_mmatch->startpos;
3400         reg_endpos = reg_mmatch->endpos;
3401     }
3402     else
3403     {
3404         prog = reg_match->regprog;
3405         reg_startp = reg_match->startp;
3406         reg_endp = reg_match->endp;
3407     }
3408
3409     /* Be paranoid... */
3410     if (prog == NULL || line == NULL)
3411     {
3412         EMSG(_(e_null));
3413         goto theend;
3414     }
3415
3416     /* Check validity of program. */
3417     if (prog_magic_wrong())
3418         goto theend;
3419
3420     /* If the start column is past the maximum column: no need to try. */
3421     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3422         goto theend;
3423
3424     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3425     if (prog->regflags & RF_ICASE)
3426         ireg_ic = TRUE;
3427     else if (prog->regflags & RF_NOICASE)
3428         ireg_ic = FALSE;
3429
3430 #ifdef FEAT_MBYTE
3431     /* If pattern contains "\Z" overrule value of ireg_icombine */
3432     if (prog->regflags & RF_ICOMBINE)
3433         ireg_icombine = TRUE;
3434 #endif
3435
3436     /* If there is a "must appear" string, look for it. */
3437     if (prog->regmust != NULL)
3438     {
3439         int c;
3440
3441 #ifdef FEAT_MBYTE
3442         if (has_mbyte)
3443             c = (*mb_ptr2char)(prog->regmust);
3444         else
3445 #endif
3446             c = *prog->regmust;
3447         s = line + col;
3448
3449         /*
3450          * This is used very often, esp. for ":global".  Use three versions of
3451          * the loop to avoid overhead of conditions.
3452          */
3453         if (!ireg_ic
3454 #ifdef FEAT_MBYTE
3455                 && !has_mbyte
3456 #endif
3457                 )
3458             while ((s = vim_strbyte(s, c)) != NULL)
3459             {
3460                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3461                     break;              /* Found it. */
3462                 ++s;
3463             }
3464 #ifdef FEAT_MBYTE
3465         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3466             while ((s = vim_strchr(s, c)) != NULL)
3467             {
3468                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3469                     break;              /* Found it. */
3470                 mb_ptr_adv(s);
3471             }
3472 #endif
3473         else
3474             while ((s = cstrchr(s, c)) != NULL)
3475             {
3476                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3477                     break;              /* Found it. */
3478                 mb_ptr_adv(s);
3479             }
3480         if (s == NULL)          /* Not present. */
3481             goto theend;
3482     }
3483
3484     regline = line;
3485     reglnum = 0;
3486
3487     /* Simplest case: Anchored match need be tried only once. */
3488     if (prog->reganch)
3489     {
3490         int     c;
3491
3492 #ifdef FEAT_MBYTE
3493         if (has_mbyte)
3494             c = (*mb_ptr2char)(regline + col);
3495         else
3496 #endif
3497             c = regline[col];
3498         if (prog->regstart == NUL
3499                 || prog->regstart == c
3500                 || (ireg_ic && ((
3501 #ifdef FEAT_MBYTE
3502                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3503                         || (c < 255 && prog->regstart < 255 &&
3504 #endif
3505                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3506             retval = regtry(prog, col);
3507         else
3508             retval = 0;
3509     }
3510     else
3511     {
3512 #ifdef FEAT_RELTIME
3513         int tm_count = 0;
3514 #endif
3515         /* Messy cases:  unanchored match. */
3516         while (!got_int)
3517         {
3518             if (prog->regstart != NUL)
3519             {
3520                 /* Skip until the char we know it must start with.
3521                  * Used often, do some work to avoid call overhead. */
3522                 if (!ireg_ic
3523 #ifdef FEAT_MBYTE
3524                             && !has_mbyte
3525 #endif
3526                             )
3527                     s = vim_strbyte(regline + col, prog->regstart);
3528                 else
3529                     s = cstrchr(regline + col, prog->regstart);
3530                 if (s == NULL)
3531                 {
3532                     retval = 0;
3533                     break;
3534                 }
3535                 col = (int)(s - regline);
3536             }
3537
3538             /* Check for maximum column to try. */
3539             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3540             {
3541                 retval = 0;
3542                 break;
3543             }
3544
3545             retval = regtry(prog, col);
3546             if (retval > 0)
3547                 break;
3548
3549             /* if not currently on the first line, get it again */
3550             if (reglnum != 0)
3551             {
3552                 reglnum = 0;
3553                 regline = reg_getline((linenr_T)0);
3554             }
3555             if (regline[col] == NUL)
3556                 break;
3557 #ifdef FEAT_MBYTE
3558             if (has_mbyte)
3559                 col += (*mb_ptr2len)(regline + col);
3560             else
3561 #endif
3562                 ++col;
3563 #ifdef FEAT_RELTIME
3564             /* Check for timeout once in a twenty times to avoid overhead. */
3565             if (tm != NULL && ++tm_count == 20)
3566             {
3567                 tm_count = 0;
3568                 if (profile_passed_limit(tm))
3569                     break;
3570             }
3571 #endif
3572         }
3573     }
3574
3575 theend:
3576     /* Free "reg_tofree" when it's a bit big.
3577      * Free regstack and backpos if they are bigger than their initial size. */
3578     if (reg_tofreelen > 400)
3579     {
3580         vim_free(reg_tofree);
3581         reg_tofree = NULL;
3582     }
3583     if (regstack.ga_maxlen > REGSTACK_INITIAL)
3584         ga_clear(&regstack);
3585     if (backpos.ga_maxlen > BACKPOS_INITIAL)
3586         ga_clear(&backpos);
3587
3588     return retval;
3589 }
3590
3591 #ifdef FEAT_SYN_HL
3592 static reg_extmatch_T *make_extmatch __ARGS((void));
3593
3594 /*
3595  * Create a new extmatch and mark it as referenced once.
3596  */
3597     static reg_extmatch_T *
3598 make_extmatch()
3599 {
3600     reg_extmatch_T      *em;
3601
3602     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3603     if (em != NULL)
3604         em->refcnt = 1;
3605     return em;
3606 }
3607
3608 /*
3609  * Add a reference to an extmatch.
3610  */
3611     reg_extmatch_T *
3612 ref_extmatch(em)
3613     reg_extmatch_T      *em;
3614 {
3615     if (em != NULL)
3616         em->refcnt++;
3617     return em;
3618 }
3619
3620 /*
3621  * Remove a reference to an extmatch.  If there are no references left, free
3622  * the info.
3623  */
3624     void
3625 unref_extmatch(em)
3626     reg_extmatch_T      *em;
3627 {
3628     int i;
3629
3630     if (em != NULL && --em->refcnt <= 0)
3631     {
3632         for (i = 0; i < NSUBEXP; ++i)
3633             vim_free(em->matches[i]);
3634         vim_free(em);
3635     }
3636 }
3637 #endif
3638
3639 /*
3640  * regtry - try match of "prog" with at regline["col"].
3641  * Returns 0 for failure, number of lines contained in the match otherwise.
3642  */
3643     static long
3644 regtry(prog, col)
3645     regprog_T   *prog;
3646     colnr_T     col;
3647 {
3648     reginput = regline + col;
3649     need_clear_subexpr = TRUE;
3650 #ifdef FEAT_SYN_HL
3651     /* Clear the external match subpointers if necessary. */
3652     if (prog->reghasz == REX_SET)
3653         need_clear_zsubexpr = TRUE;
3654 #endif
3655
3656     if (regmatch(prog->program + 1) == 0)
3657         return 0;
3658
3659     cleanup_subexpr();
3660     if (REG_MULTI)
3661     {
3662         if (reg_startpos[0].lnum < 0)
3663         {
3664             reg_startpos[0].lnum = 0;
3665             reg_startpos[0].col = col;
3666         }
3667         if (reg_endpos[0].lnum < 0)
3668         {
3669             reg_endpos[0].lnum = reglnum;
3670             reg_endpos[0].col = (int)(reginput - regline);
3671         }
3672         else
3673             /* Use line number of "\ze". */
3674             reglnum = reg_endpos[0].lnum;
3675     }
3676     else
3677     {
3678         if (reg_startp[0] == NULL)
3679             reg_startp[0] = regline + col;
3680         if (reg_endp[0] == NULL)
3681             reg_endp[0] = reginput;
3682     }
3683 #ifdef FEAT_SYN_HL
3684     /* Package any found \z(...\) matches for export. Default is none. */
3685     unref_extmatch(re_extmatch_out);
3686     re_extmatch_out = NULL;
3687
3688     if (prog->reghasz == REX_SET)
3689     {
3690         int             i;
3691
3692         cleanup_zsubexpr();
3693         re_extmatch_out = make_extmatch();
3694         for (i = 0; i < NSUBEXP; i++)
3695         {
3696             if (REG_MULTI)
3697             {
3698                 /* Only accept single line matches. */
3699                 if (reg_startzpos[i].lnum >= 0
3700                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3701                     re_extmatch_out->matches[i] =
3702                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3703                                                        + reg_startzpos[i].col,
3704                                    reg_endzpos[i].col - reg_startzpos[i].col);
3705             }
3706             else
3707             {
3708                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3709                     re_extmatch_out->matches[i] =
3710                             vim_strnsave(reg_startzp[i],
3711                                         (int)(reg_endzp[i] - reg_startzp[i]));
3712             }
3713         }
3714     }
3715 #endif
3716     return 1 + reglnum;
3717 }
3718
3719 #ifdef FEAT_MBYTE
3720 static int reg_prev_class __ARGS((void));
3721
3722 /*
3723  * Get class of previous character.
3724  */
3725     static int
3726 reg_prev_class()
3727 {
3728     if (reginput > regline)
3729         return mb_get_class(reginput - 1
3730                                      - (*mb_head_off)(regline, reginput - 1));
3731     return -1;
3732 }
3733
3734 #endif
3735 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3736
3737 /*
3738  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3739  * to regmatch(), but they are here to reduce the amount of stack space used
3740  * (it can be called recursively many times).
3741  */
3742 static long     bl_minval;
3743 static long     bl_maxval;
3744
3745 /*
3746  * regmatch - main matching routine
3747  *
3748  * Conceptually the strategy is simple: Check to see whether the current node
3749  * matches, push an item onto the regstack and loop to see whether the rest
3750  * matches, and then act accordingly.  In practice we make some effort to
3751  * avoid using the regstack, in particular by going through "ordinary" nodes
3752  * (that don't need to know whether the rest of the match failed) by a nested
3753  * loop.
3754  *
3755  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3756  * the last matched character.
3757  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3758  * undefined state!
3759  */
3760     static int
3761 regmatch(scan)
3762     char_u      *scan;          /* Current node. */
3763 {
3764   char_u        *next;          /* Next node. */
3765   int           op;
3766   int           c;
3767   regitem_T     *rp;
3768   int           no;
3769   int           status;         /* one of the RA_ values: */
3770 #define RA_FAIL         1       /* something failed, abort */
3771 #define RA_CONT         2       /* continue in inner loop */
3772 #define RA_BREAK        3       /* break inner loop */
3773 #define RA_MATCH        4       /* successful match */
3774 #define RA_NOMATCH      5       /* didn't match */
3775
3776   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
3777    * vim_regexec_both() to reduce malloc()/free() calls. */
3778   regstack.ga_len = 0;
3779   backpos.ga_len = 0;
3780
3781   /*
3782    * Repeat until "regstack" is empty.
3783    */
3784   for (;;)
3785   {
3786     /* Some patterns my cause a long time to match, even though they are not
3787      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3788     fast_breakcheck();
3789
3790 #ifdef DEBUG
3791     if (scan != NULL && regnarrate)
3792     {
3793         mch_errmsg(regprop(scan));
3794         mch_errmsg("(\n");
3795     }
3796 #endif
3797
3798     /*
3799      * Repeat for items that can be matched sequentially, without using the
3800      * regstack.
3801      */
3802     for (;;)
3803     {
3804         if (got_int || scan == NULL)
3805         {
3806             status = RA_FAIL;
3807             break;
3808         }
3809         status = RA_CONT;
3810
3811 #ifdef DEBUG
3812         if (regnarrate)
3813         {
3814             mch_errmsg(regprop(scan));
3815             mch_errmsg("...\n");
3816 # ifdef FEAT_SYN_HL
3817             if (re_extmatch_in != NULL)
3818             {
3819                 int i;
3820
3821                 mch_errmsg(_("External submatches:\n"));
3822                 for (i = 0; i < NSUBEXP; i++)
3823                 {
3824                     mch_errmsg("    \"");
3825                     if (re_extmatch_in->matches[i] != NULL)
3826                         mch_errmsg(re_extmatch_in->matches[i]);
3827                     mch_errmsg("\"\n");
3828                 }
3829             }
3830 # endif
3831         }
3832 #endif
3833         next = regnext(scan);
3834
3835         op = OP(scan);
3836         /* Check for character class with NL added. */
3837         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3838                                 && *reginput == NUL && reglnum <= reg_maxline)
3839         {
3840             reg_nextline();
3841         }
3842         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3843         {
3844             ADVANCE_REGINPUT();
3845         }
3846         else
3847         {
3848           if (WITH_NL(op))
3849               op -= ADD_NL;
3850 #ifdef FEAT_MBYTE
3851           if (has_mbyte)
3852               c = (*mb_ptr2char)(reginput);
3853           else
3854 #endif
3855               c = *reginput;
3856           switch (op)
3857           {
3858           case BOL:
3859             if (reginput != regline)
3860                 status = RA_NOMATCH;
3861             break;
3862
3863           case EOL:
3864             if (c != NUL)
3865                 status = RA_NOMATCH;
3866             break;
3867
3868           case RE_BOF:
3869             /* We're not at the beginning of the file when below the first
3870              * line where we started, not at the start of the line or we
3871              * didn't start at the first line of the buffer. */
3872             if (reglnum != 0 || reginput != regline
3873                                           || (REG_MULTI && reg_firstlnum > 1))
3874                 status = RA_NOMATCH;
3875             break;
3876
3877           case RE_EOF:
3878             if (reglnum != reg_maxline || c != NUL)
3879                 status = RA_NOMATCH;
3880             break;
3881
3882           case CURSOR:
3883             /* Check if the buffer is in a window and compare the
3884              * reg_win->w_cursor position to the match position. */
3885             if (reg_win == NULL
3886                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3887                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3888                 status = RA_NOMATCH;
3889             break;
3890
3891           case RE_MARK:
3892             /* Compare the mark position to the match position.  NOTE: Always
3893              * uses the current buffer. */
3894             {
3895                 int     mark = OPERAND(scan)[0];
3896                 int     cmp = OPERAND(scan)[1];
3897                 pos_T   *pos;
3898
3899                 pos = getmark(mark, FALSE);
3900                 if (pos == NULL              /* mark doesn't exist */
3901                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3902                         || (pos->lnum == reglnum + reg_firstlnum
3903                                 ? (pos->col == (colnr_T)(reginput - regline)
3904                                     ? (cmp == '<' || cmp == '>')
3905                                     : (pos->col < (colnr_T)(reginput - regline)
3906                                         ? cmp != '>'
3907                                         : cmp != '<'))
3908                                 : (pos->lnum < reglnum + reg_firstlnum
3909                                     ? cmp != '>'
3910                                     : cmp != '<')))
3911                     status = RA_NOMATCH;
3912             }
3913             break;
3914
3915           case RE_VISUAL:
3916 #ifdef FEAT_VISUAL
3917             /* Check if the buffer is the current buffer. and whether the
3918              * position is inside the Visual area. */
3919             if (reg_buf != curbuf || VIsual.lnum == 0)
3920                 status = RA_NOMATCH;
3921             else
3922             {
3923                 pos_T       top, bot;
3924                 linenr_T    lnum;
3925                 colnr_T     col;
3926                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3927                 int         mode;
3928
3929                 if (VIsual_active)
3930                 {
3931                     if (lt(VIsual, wp->w_cursor))
3932                     {
3933                         top = VIsual;
3934                         bot = wp->w_cursor;
3935                     }
3936                     else
3937                     {
3938                         top = wp->w_cursor;
3939                         bot = VIsual;
3940                     }
3941                     mode = VIsual_mode;
3942                 }
3943                 else
3944                 {
3945                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3946                     {
3947                         top = curbuf->b_visual.vi_start;
3948                         bot = curbuf->b_visual.vi_end;
3949                     }
3950                     else
3951                     {
3952                         top = curbuf->b_visual.vi_end;
3953                         bot = curbuf->b_visual.vi_start;
3954                     }
3955                     mode = curbuf->b_visual.vi_mode;
3956                 }
3957                 lnum = reglnum + reg_firstlnum;
3958                 col = (colnr_T)(reginput - regline);
3959                 if (lnum < top.lnum || lnum > bot.lnum)
3960                     status = RA_NOMATCH;
3961                 else if (mode == 'v')
3962                 {
3963                     if ((lnum == top.lnum && col < top.col)
3964                             || (lnum == bot.lnum
3965                                          && col >= bot.col + (*p_sel != 'e')))
3966                         status = RA_NOMATCH;
3967                 }
3968                 else if (mode == Ctrl_V)
3969                 {
3970                     colnr_T         start, end;
3971                     colnr_T         start2, end2;
3972                     colnr_T         cols;
3973
3974                     getvvcol(wp, &top, &start, NULL, &end);
3975                     getvvcol(wp, &bot, &start2, NULL, &end2);
3976                     if (start2 < start)
3977                         start = start2;
3978                     if (end2 > end)
3979                         end = end2;
3980                     if (top.col == MAXCOL || bot.col == MAXCOL)
3981                         end = MAXCOL;
3982                     cols = win_linetabsize(wp,
3983                                       regline, (colnr_T)(reginput - regline));
3984                     if (cols < start || cols > end - (*p_sel == 'e'))
3985                         status = RA_NOMATCH;
3986                 }
3987             }
3988 #else
3989             status = RA_NOMATCH;
3990 #endif
3991             break;
3992
3993           case RE_LNUM:
3994             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3995                                                                         scan))
3996                 status = RA_NOMATCH;
3997             break;
3998
3999           case RE_COL:
4000             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
4001                 status = RA_NOMATCH;
4002             break;
4003
4004           case RE_VCOL:
4005             if (!re_num_cmp((long_u)win_linetabsize(
4006                             reg_win == NULL ? curwin : reg_win,
4007                             regline, (colnr_T)(reginput - regline)) + 1, scan))
4008                 status = RA_NOMATCH;
4009             break;
4010
4011           case BOW:     /* \<word; reginput points to w */
4012             if (c == NUL)       /* Can't match at end of line */
4013                 status = RA_NOMATCH;
4014 #ifdef FEAT_MBYTE
4015             else if (has_mbyte)
4016             {
4017                 int this_class;
4018
4019                 /* Get class of current and previous char (if it exists). */
4020                 this_class = mb_get_class(reginput);
4021                 if (this_class <= 1)
4022                     status = RA_NOMATCH;  /* not on a word at all */
4023                 else if (reg_prev_class() == this_class)
4024                     status = RA_NOMATCH;  /* previous char is in same word */
4025             }
4026 #endif
4027             else
4028             {
4029                 if (!vim_iswordc(c)
4030                         || (reginput > regline && vim_iswordc(reginput[-1])))
4031                     status = RA_NOMATCH;
4032             }
4033             break;
4034
4035           case EOW:     /* word\>; reginput points after d */
4036             if (reginput == regline)    /* Can't match at start of line */
4037                 status = RA_NOMATCH;
4038 #ifdef FEAT_MBYTE
4039             else if (has_mbyte)
4040             {
4041                 int this_class, prev_class;
4042
4043                 /* Get class of current and previous char (if it exists). */
4044                 this_class = mb_get_class(reginput);
4045                 prev_class = reg_prev_class();
4046                 if (this_class == prev_class
4047                         || prev_class == 0 || prev_class == 1)
4048                     status = RA_NOMATCH;
4049             }
4050 #endif
4051             else
4052             {
4053                 if (!vim_iswordc(reginput[-1])
4054                         || (reginput[0] != NUL && vim_iswordc(c)))
4055                     status = RA_NOMATCH;
4056             }
4057             break; /* Matched with EOW */
4058
4059           case ANY:
4060             if (c == NUL)
4061                 status = RA_NOMATCH;
4062             else
4063                 ADVANCE_REGINPUT();
4064             break;
4065
4066           case IDENT:
4067             if (!vim_isIDc(c))
4068                 status = RA_NOMATCH;
4069             else
4070                 ADVANCE_REGINPUT();
4071             break;
4072
4073           case SIDENT:
4074             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4075                 status = RA_NOMATCH;
4076             else
4077                 ADVANCE_REGINPUT();
4078             break;
4079
4080           case KWORD:
4081             if (!vim_iswordp(reginput))
4082                 status = RA_NOMATCH;
4083             else
4084                 ADVANCE_REGINPUT();
4085             break;
4086
4087           case SKWORD:
4088             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4089                 status = RA_NOMATCH;
4090             else
4091                 ADVANCE_REGINPUT();
4092             break;
4093
4094           case FNAME:
4095             if (!vim_isfilec(c))
4096                 status = RA_NOMATCH;
4097             else
4098                 ADVANCE_REGINPUT();
4099             break;
4100
4101           case SFNAME:
4102             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4103                 status = RA_NOMATCH;
4104             else
4105                 ADVANCE_REGINPUT();
4106             break;
4107
4108           case PRINT:
4109             if (ptr2cells(reginput) != 1)
4110                 status = RA_NOMATCH;
4111             else
4112                 ADVANCE_REGINPUT();
4113             break;
4114
4115           case SPRINT:
4116             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4117                 status = RA_NOMATCH;
4118             else
4119                 ADVANCE_REGINPUT();
4120             break;
4121
4122           case WHITE:
4123             if (!vim_iswhite(c))
4124                 status = RA_NOMATCH;
4125             else
4126                 ADVANCE_REGINPUT();
4127             break;
4128
4129           case NWHITE:
4130             if (c == NUL || vim_iswhite(c))
4131                 status = RA_NOMATCH;
4132             else
4133                 ADVANCE_REGINPUT();
4134             break;
4135
4136           case DIGIT:
4137             if (!ri_digit(c))
4138                 status = RA_NOMATCH;
4139             else
4140                 ADVANCE_REGINPUT();
4141             break;
4142
4143           case NDIGIT:
4144             if (c == NUL || ri_digit(c))
4145                 status = RA_NOMATCH;
4146             else
4147                 ADVANCE_REGINPUT();
4148             break;
4149
4150           case HEX:
4151             if (!ri_hex(c))
4152                 status = RA_NOMATCH;
4153             else
4154                 ADVANCE_REGINPUT();
4155             break;
4156
4157           case NHEX:
4158             if (c == NUL || ri_hex(c))
4159                 status = RA_NOMATCH;
4160             else
4161                 ADVANCE_REGINPUT();
4162             break;
4163
4164           case OCTAL:
4165             if (!ri_octal(c))
4166                 status = RA_NOMATCH;
4167             else
4168                 ADVANCE_REGINPUT();
4169             break;
4170
4171           case NOCTAL:
4172             if (c == NUL || ri_octal(c))
4173                 status = RA_NOMATCH;
4174             else
4175                 ADVANCE_REGINPUT();
4176             break;
4177
4178           case WORD:
4179             if (!ri_word(c))
4180                 status = RA_NOMATCH;
4181             else
4182                 ADVANCE_REGINPUT();
4183             break;
4184
4185           case NWORD:
4186             if (c == NUL || ri_word(c))
4187                 status = RA_NOMATCH;
4188             else
4189                 ADVANCE_REGINPUT();
4190             break;
4191
4192           case HEAD:
4193             if (!ri_head(c))
4194                 status = RA_NOMATCH;
4195             else
4196                 ADVANCE_REGINPUT();
4197             break;
4198
4199           case NHEAD:
4200             if (c == NUL || ri_head(c))
4201                 status = RA_NOMATCH;
4202             else
4203                 ADVANCE_REGINPUT();
4204             break;
4205
4206           case ALPHA:
4207             if (!ri_alpha(c))
4208                 status = RA_NOMATCH;
4209             else
4210                 ADVANCE_REGINPUT();
4211             break;
4212
4213           case NALPHA:
4214             if (c == NUL || ri_alpha(c))
4215                 status = RA_NOMATCH;
4216             else
4217                 ADVANCE_REGINPUT();
4218             break;
4219
4220           case LOWER:
4221             if (!ri_lower(c))
4222                 status = RA_NOMATCH;
4223             else
4224                 ADVANCE_REGINPUT();
4225             break;
4226
4227           case NLOWER:
4228             if (c == NUL || ri_lower(c))
4229                 status = RA_NOMATCH;
4230             else
4231                 ADVANCE_REGINPUT();
4232             break;
4233
4234           case UPPER:
4235             if (!ri_upper(c))
4236                 status = RA_NOMATCH;
4237             else
4238                 ADVANCE_REGINPUT();
4239             break;
4240
4241           case NUPPER:
4242             if (c == NUL || ri_upper(c))
4243                 status = RA_NOMATCH;
4244             else
4245                 ADVANCE_REGINPUT();
4246             break;
4247
4248           case EXACTLY:
4249             {
4250                 int     len;
4251                 char_u  *opnd;
4252
4253                 opnd = OPERAND(scan);
4254                 /* Inline the first byte, for speed. */
4255                 if (*opnd != *reginput
4256                         && (!ireg_ic || (
4257 #ifdef FEAT_MBYTE
4258                             !enc_utf8 &&
4259 #endif
4260                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4261                     status = RA_NOMATCH;
4262                 else if (*opnd == NUL)
4263                 {
4264                     /* match empty string always works; happens when "~" is
4265                      * empty. */
4266                 }
4267                 else if (opnd[1] == NUL
4268 #ifdef FEAT_MBYTE
4269                             && !(enc_utf8 && ireg_ic)
4270 #endif
4271                         )
4272                     ++reginput;         /* matched a single char */
4273                 else
4274                 {
4275                     len = (int)STRLEN(opnd);
4276                     /* Need to match first byte again for multi-byte. */
4277                     if (cstrncmp(opnd, reginput, &len) != 0)
4278                         status = RA_NOMATCH;
4279 #ifdef FEAT_MBYTE
4280                     /* Check for following composing character. */
4281                     else if (enc_utf8
4282                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4283                     {
4284                         /* raaron: This code makes a composing character get
4285                          * ignored, which is the correct behavior (sometimes)
4286                          * for voweled Hebrew texts. */
4287                         if (!ireg_icombine)
4288                             status = RA_NOMATCH;
4289                     }
4290 #endif
4291                     else
4292                         reginput += len;
4293                 }
4294             }
4295             break;
4296
4297           case ANYOF:
4298           case ANYBUT:
4299             if (c == NUL)
4300                 status = RA_NOMATCH;
4301             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4302                 status = RA_NOMATCH;
4303             else
4304                 ADVANCE_REGINPUT();
4305             break;
4306
4307 #ifdef FEAT_MBYTE
4308           case MULTIBYTECODE:
4309             if (has_mbyte)
4310             {
4311                 int     i, len;
4312                 char_u  *opnd;
4313                 int     opndc = 0, inpc;
4314
4315                 opnd = OPERAND(scan);
4316                 /* Safety check (just in case 'encoding' was changed since
4317                  * compiling the program). */
4318                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4319                 {
4320                     status = RA_NOMATCH;
4321                     break;
4322                 }
4323                 if (enc_utf8)
4324                     opndc = mb_ptr2char(opnd);
4325                 if (enc_utf8 && utf_iscomposing(opndc))
4326                 {
4327                     /* When only a composing char is given match at any
4328                      * position where that composing char appears. */
4329                     status = RA_NOMATCH;
4330                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4331                     {
4332                         inpc = mb_ptr2char(reginput + i);
4333                         if (!utf_iscomposing(inpc))
4334                         {
4335                             if (i > 0)
4336                                 break;
4337                         }
4338                         else if (opndc == inpc)
4339                         {
4340                             /* Include all following composing chars. */
4341                             len = i + mb_ptr2len(reginput + i);
4342                             status = RA_MATCH;
4343                             break;
4344                         }
4345                     }
4346                 }
4347                 else
4348                     for (i = 0; i < len; ++i)
4349                         if (opnd[i] != reginput[i])
4350                         {
4351                             status = RA_NOMATCH;
4352                             break;
4353                         }
4354                 reginput += len;
4355             }
4356             else
4357                 status = RA_NOMATCH;
4358             break;
4359 #endif
4360
4361           case NOTHING:
4362             break;
4363
4364           case BACK:
4365             {
4366                 int             i;
4367                 backpos_T       *bp;
4368
4369                 /*
4370                  * When we run into BACK we need to check if we don't keep
4371                  * looping without matching any input.  The second and later
4372                  * times a BACK is encountered it fails if the input is still
4373                  * at the same position as the previous time.
4374                  * The positions are stored in "backpos" and found by the
4375                  * current value of "scan", the position in the RE program.
4376                  */
4377                 bp = (backpos_T *)backpos.ga_data;
4378                 for (i = 0; i < backpos.ga_len; ++i)
4379                     if (bp[i].bp_scan == scan)
4380                         break;
4381                 if (i == backpos.ga_len)
4382                 {
4383                     /* First time at this BACK, make room to store the pos. */
4384                     if (ga_grow(&backpos, 1) == FAIL)
4385                         status = RA_FAIL;
4386                     else
4387                     {
4388                         /* get "ga_data" again, it may have changed */
4389                         bp = (backpos_T *)backpos.ga_data;
4390                         bp[i].bp_scan = scan;
4391                         ++backpos.ga_len;
4392                     }
4393                 }
4394                 else if (reg_save_equal(&bp[i].bp_pos))
4395                     /* Still at same position as last time, fail. */
4396                     status = RA_NOMATCH;
4397
4398                 if (status != RA_FAIL && status != RA_NOMATCH)
4399                     reg_save(&bp[i].bp_pos, &backpos);
4400             }
4401             break;
4402
4403           case MOPEN + 0:   /* Match start: \zs */
4404           case MOPEN + 1:   /* \( */
4405           case MOPEN + 2:
4406           case MOPEN + 3:
4407           case MOPEN + 4:
4408           case MOPEN + 5:
4409           case MOPEN + 6:
4410           case MOPEN + 7:
4411           case MOPEN + 8:
4412           case MOPEN + 9:
4413             {
4414                 no = op - MOPEN;
4415                 cleanup_subexpr();
4416                 rp = regstack_push(RS_MOPEN, scan);
4417                 if (rp == NULL)
4418                     status = RA_FAIL;
4419                 else
4420                 {
4421                     rp->rs_no = no;
4422                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4423                                                              &reg_startp[no]);
4424                     /* We simply continue and handle the result when done. */
4425                 }
4426             }
4427             break;
4428
4429           case NOPEN:       /* \%( */
4430           case NCLOSE:      /* \) after \%( */
4431                 if (regstack_push(RS_NOPEN, scan) == NULL)
4432                     status = RA_FAIL;
4433                 /* We simply continue and handle the result when done. */
4434                 break;
4435
4436 #ifdef FEAT_SYN_HL
4437           case ZOPEN + 1:
4438           case ZOPEN + 2:
4439           case ZOPEN + 3:
4440           case ZOPEN + 4:
4441           case ZOPEN + 5:
4442           case ZOPEN + 6:
4443           case ZOPEN + 7:
4444           case ZOPEN + 8:
4445           case ZOPEN + 9:
4446             {
4447                 no = op - ZOPEN;
4448                 cleanup_zsubexpr();
4449                 rp = regstack_push(RS_ZOPEN, scan);
4450                 if (rp == NULL)
4451                     status = RA_FAIL;
4452                 else
4453                 {
4454                     rp->rs_no = no;
4455                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4456                                                              &reg_startzp[no]);
4457                     /* We simply continue and handle the result when done. */
4458                 }
4459             }
4460             break;
4461 #endif
4462
4463           case MCLOSE + 0:  /* Match end: \ze */
4464           case MCLOSE + 1:  /* \) */
4465           case MCLOSE + 2:
4466           case MCLOSE + 3:
4467           case MCLOSE + 4:
4468           case MCLOSE + 5:
4469           case MCLOSE + 6:
4470           case MCLOSE + 7:
4471           case MCLOSE + 8:
4472           case MCLOSE + 9:
4473             {
4474                 no = op - MCLOSE;
4475                 cleanup_subexpr();
4476                 rp = regstack_push(RS_MCLOSE, scan);
4477                 if (rp == NULL)
4478                     status = RA_FAIL;
4479                 else
4480                 {
4481                     rp->rs_no = no;
4482                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4483                     /* We simply continue and handle the result when done. */
4484                 }
4485             }
4486             break;
4487
4488 #ifdef FEAT_SYN_HL
4489           case ZCLOSE + 1:  /* \) after \z( */
4490           case ZCLOSE + 2:
4491           case ZCLOSE + 3:
4492           case ZCLOSE + 4:
4493           case ZCLOSE + 5:
4494           case ZCLOSE + 6:
4495           case ZCLOSE + 7:
4496           case ZCLOSE + 8:
4497           case ZCLOSE + 9:
4498             {
4499                 no = op - ZCLOSE;
4500                 cleanup_zsubexpr();
4501                 rp = regstack_push(RS_ZCLOSE, scan);
4502                 if (rp == NULL)
4503                     status = RA_FAIL;
4504                 else
4505                 {
4506                     rp->rs_no = no;
4507                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4508                                                               &reg_endzp[no]);
4509                     /* We simply continue and handle the result when done. */
4510                 }
4511             }
4512             break;
4513 #endif
4514
4515           case BACKREF + 1:
4516           case BACKREF + 2:
4517           case BACKREF + 3:
4518           case BACKREF + 4:
4519           case BACKREF + 5:
4520           case BACKREF + 6:
4521           case BACKREF + 7:
4522           case BACKREF + 8:
4523           case BACKREF + 9:
4524             {
4525                 int             len;
4526                 linenr_T        clnum;
4527                 colnr_T         ccol;
4528                 char_u          *p;
4529
4530                 no = op - BACKREF;
4531                 cleanup_subexpr();
4532                 if (!REG_MULTI)         /* Single-line regexp */
4533                 {
4534                     if (reg_startp[no] == NULL || reg_endp[no] == NULL)
4535                     {
4536                         /* Backref was not set: Match an empty string. */
4537                         len = 0;
4538                     }
4539                     else
4540                     {
4541                         /* Compare current input with back-ref in the same
4542                          * line. */
4543                         len = (int)(reg_endp[no] - reg_startp[no]);
4544                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4545                             status = RA_NOMATCH;
4546                     }
4547                 }
4548                 else                            /* Multi-line regexp */
4549                 {
4550                     if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
4551                     {
4552                         /* Backref was not set: Match an empty string. */
4553                         len = 0;
4554                     }
4555                     else
4556                     {
4557                         if (reg_startpos[no].lnum == reglnum
4558                                 && reg_endpos[no].lnum == reglnum)
4559                         {
4560                             /* Compare back-ref within the current line. */
4561                             len = reg_endpos[no].col - reg_startpos[no].col;
4562                             if (cstrncmp(regline + reg_startpos[no].col,
4563                                                           reginput, &len) != 0)
4564                                 status = RA_NOMATCH;
4565                         }
4566                         else
4567                         {
4568                             /* Messy situation: Need to compare between two
4569                              * lines. */
4570                             ccol = reg_startpos[no].col;
4571                             clnum = reg_startpos[no].lnum;
4572                             for (;;)
4573                             {
4574                                 /* Since getting one line may invalidate
4575                                  * the other, need to make copy.  Slow! */
4576                                 if (regline != reg_tofree)
4577                                 {
4578                                     len = (int)STRLEN(regline);
4579                                     if (reg_tofree == NULL
4580                                                  || len >= (int)reg_tofreelen)
4581                                     {
4582                                         len += 50;      /* get some extra */
4583                                         vim_free(reg_tofree);
4584                                         reg_tofree = alloc(len);
4585                                         if (reg_tofree == NULL)
4586                                         {
4587                                             status = RA_FAIL; /* outof memory!*/
4588                                             break;
4589                                         }
4590                                         reg_tofreelen = len;
4591                                     }
4592                                     STRCPY(reg_tofree, regline);
4593                                     reginput = reg_tofree
4594                                                        + (reginput - regline);
4595                                     regline = reg_tofree;
4596                                 }
4597
4598                                 /* Get the line to compare with. */
4599                                 p = reg_getline(clnum);
4600                                 if (clnum == reg_endpos[no].lnum)
4601                                     len = reg_endpos[no].col - ccol;
4602                                 else
4603                                     len = (int)STRLEN(p + ccol);
4604
4605                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4606                                 {
4607                                     status = RA_NOMATCH;  /* doesn't match */
4608                                     break;
4609                                 }
4610                                 if (clnum == reg_endpos[no].lnum)
4611                                     break;              /* match and at end! */
4612                                 if (reglnum >= reg_maxline)
4613                                 {
4614                                     status = RA_NOMATCH;  /* text too short */
4615                                     break;
4616                                 }
4617
4618                                 /* Advance to next line. */
4619                                 reg_nextline();
4620                                 ++clnum;
4621                                 ccol = 0;
4622                                 if (got_int)
4623                                 {
4624                                     status = RA_FAIL;
4625                                     break;
4626                                 }
4627                             }
4628
4629                             /* found a match!  Note that regline may now point
4630                              * to a copy of the line, that should not matter. */
4631                         }
4632                     }
4633                 }
4634
4635                 /* Matched the backref, skip over it. */
4636                 reginput += len;
4637             }
4638             break;
4639
4640 #ifdef FEAT_SYN_HL
4641           case ZREF + 1:
4642           case ZREF + 2:
4643           case ZREF + 3:
4644           case ZREF + 4:
4645           case ZREF + 5:
4646           case ZREF + 6:
4647           case ZREF + 7:
4648           case ZREF + 8:
4649           case ZREF + 9:
4650             {
4651                 int     len;
4652
4653                 cleanup_zsubexpr();
4654                 no = op - ZREF;
4655                 if (re_extmatch_in != NULL
4656                         && re_extmatch_in->matches[no] != NULL)
4657                 {
4658                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4659                     if (cstrncmp(re_extmatch_in->matches[no],
4660                                                           reginput, &len) != 0)
4661                         status = RA_NOMATCH;
4662                     else
4663                         reginput += len;
4664                 }
4665                 else
4666                 {
4667                     /* Backref was not set: Match an empty string. */
4668                 }
4669             }
4670             break;
4671 #endif
4672
4673           case BRANCH:
4674             {
4675                 if (OP(next) != BRANCH) /* No choice. */
4676                     next = OPERAND(scan);       /* Avoid recursion. */
4677                 else
4678                 {
4679                     rp = regstack_push(RS_BRANCH, scan);
4680                     if (rp == NULL)
4681                         status = RA_FAIL;
4682                     else
4683                         status = RA_BREAK;      /* rest is below */
4684                 }
4685             }
4686             break;
4687
4688           case BRACE_LIMITS:
4689             {
4690                 if (OP(next) == BRACE_SIMPLE)
4691                 {
4692                     bl_minval = OPERAND_MIN(scan);
4693                     bl_maxval = OPERAND_MAX(scan);
4694                 }
4695                 else if (OP(next) >= BRACE_COMPLEX
4696                         && OP(next) < BRACE_COMPLEX + 10)
4697                 {
4698                     no = OP(next) - BRACE_COMPLEX;
4699                     brace_min[no] = OPERAND_MIN(scan);
4700                     brace_max[no] = OPERAND_MAX(scan);
4701                     brace_count[no] = 0;
4702                 }
4703                 else
4704                 {
4705                     EMSG(_(e_internal));            /* Shouldn't happen */
4706                     status = RA_FAIL;
4707                 }
4708             }
4709             break;
4710
4711           case BRACE_COMPLEX + 0:
4712           case BRACE_COMPLEX + 1:
4713           case BRACE_COMPLEX + 2:
4714           case BRACE_COMPLEX + 3:
4715           case BRACE_COMPLEX + 4:
4716           case BRACE_COMPLEX + 5:
4717           case BRACE_COMPLEX + 6:
4718           case BRACE_COMPLEX + 7:
4719           case BRACE_COMPLEX + 8:
4720           case BRACE_COMPLEX + 9:
4721             {
4722                 no = op - BRACE_COMPLEX;
4723                 ++brace_count[no];
4724
4725                 /* If not matched enough times yet, try one more */
4726                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4727                                              ? brace_min[no] : brace_max[no]))
4728                 {
4729                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4730                     if (rp == NULL)
4731                         status = RA_FAIL;
4732                     else
4733                     {
4734                         rp->rs_no = no;
4735                         reg_save(&rp->rs_un.regsave, &backpos);
4736                         next = OPERAND(scan);
4737                         /* We continue and handle the result when done. */
4738                     }
4739                     break;
4740                 }
4741
4742                 /* If matched enough times, may try matching some more */
4743                 if (brace_min[no] <= brace_max[no])
4744                 {
4745                     /* Range is the normal way around, use longest match */
4746                     if (brace_count[no] <= brace_max[no])
4747                     {
4748                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4749                         if (rp == NULL)
4750                             status = RA_FAIL;
4751                         else
4752                         {
4753                             rp->rs_no = no;
4754                             reg_save(&rp->rs_un.regsave, &backpos);
4755                             next = OPERAND(scan);
4756                             /* We continue and handle the result when done. */
4757                         }
4758                     }
4759                 }
4760                 else
4761                 {
4762                     /* Range is backwards, use shortest match first */
4763                     if (brace_count[no] <= brace_min[no])
4764                     {
4765                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4766                         if (rp == NULL)
4767                             status = RA_FAIL;
4768                         else
4769                         {
4770                             reg_save(&rp->rs_un.regsave, &backpos);
4771                             /* We continue and handle the result when done. */
4772                         }
4773                     }
4774                 }
4775             }
4776             break;
4777
4778           case BRACE_SIMPLE:
4779           case STAR:
4780           case PLUS:
4781             {
4782                 regstar_T       rst;
4783
4784                 /*
4785                  * Lookahead to avoid useless match attempts when we know
4786                  * what character comes next.
4787                  */
4788                 if (OP(next) == EXACTLY)
4789                 {
4790                     rst.nextb = *OPERAND(next);
4791                     if (ireg_ic)
4792                     {
4793                         if (MB_ISUPPER(rst.nextb))
4794                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4795                         else
4796                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4797                     }
4798                     else
4799                         rst.nextb_ic = rst.nextb;
4800                 }
4801                 else
4802                 {
4803                     rst.nextb = NUL;
4804                     rst.nextb_ic = NUL;
4805                 }
4806                 if (op != BRACE_SIMPLE)
4807                 {
4808                     rst.minval = (op == STAR) ? 0 : 1;
4809                     rst.maxval = MAX_LIMIT;
4810                 }
4811                 else
4812                 {
4813                     rst.minval = bl_minval;
4814                     rst.maxval = bl_maxval;
4815                 }
4816
4817                 /*
4818                  * When maxval > minval, try matching as much as possible, up
4819                  * to maxval.  When maxval < minval, try matching at least the
4820                  * minimal number (since the range is backwards, that's also
4821                  * maxval!).
4822                  */
4823                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4824                 if (got_int)
4825                 {
4826                     status = RA_FAIL;
4827                     break;
4828                 }
4829                 if (rst.minval <= rst.maxval
4830                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4831                 {
4832                     /* It could match.  Prepare for trying to match what
4833                      * follows.  The code is below.  Parameters are stored in
4834                      * a regstar_T on the regstack. */
4835                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4836                     {
4837                         EMSG(_(e_maxmempat));
4838                         status = RA_FAIL;
4839                     }
4840                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4841                         status = RA_FAIL;
4842                     else
4843                     {
4844                         regstack.ga_len += sizeof(regstar_T);
4845                         rp = regstack_push(rst.minval <= rst.maxval
4846                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4847                         if (rp == NULL)
4848                             status = RA_FAIL;
4849                         else
4850                         {
4851                             *(((regstar_T *)rp) - 1) = rst;
4852                             status = RA_BREAK;      /* skip the restore bits */
4853                         }
4854                     }
4855                 }
4856                 else
4857                     status = RA_NOMATCH;
4858
4859             }
4860             break;
4861
4862           case NOMATCH:
4863           case MATCH:
4864           case SUBPAT:
4865             rp = regstack_push(RS_NOMATCH, scan);
4866             if (rp == NULL)
4867                 status = RA_FAIL;
4868             else
4869             {
4870                 rp->rs_no = op;
4871                 reg_save(&rp->rs_un.regsave, &backpos);
4872                 next = OPERAND(scan);
4873                 /* We continue and handle the result when done. */
4874             }
4875             break;
4876
4877           case BEHIND:
4878           case NOBEHIND:
4879             /* Need a bit of room to store extra positions. */
4880             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4881             {
4882                 EMSG(_(e_maxmempat));
4883                 status = RA_FAIL;
4884             }
4885             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4886                 status = RA_FAIL;
4887             else
4888             {
4889                 regstack.ga_len += sizeof(regbehind_T);
4890                 rp = regstack_push(RS_BEHIND1, scan);
4891                 if (rp == NULL)
4892                     status = RA_FAIL;
4893                 else
4894                 {
4895                     /* Need to save the subexpr to be able to restore them
4896                      * when there is a match but we don't use it. */
4897                     save_subexpr(((regbehind_T *)rp) - 1);
4898
4899                     rp->rs_no = op;
4900                     reg_save(&rp->rs_un.regsave, &backpos);
4901                     /* First try if what follows matches.  If it does then we
4902                      * check the behind match by looping. */
4903                 }
4904             }
4905             break;
4906
4907           case BHPOS:
4908             if (REG_MULTI)
4909             {
4910                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4911                         || behind_pos.rs_u.pos.lnum != reglnum)
4912                     status = RA_NOMATCH;
4913             }
4914             else if (behind_pos.rs_u.ptr != reginput)
4915                 status = RA_NOMATCH;
4916             break;
4917
4918           case NEWL:
4919             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4920                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4921                 status = RA_NOMATCH;
4922             else if (reg_line_lbr)
4923                 ADVANCE_REGINPUT();
4924             else
4925                 reg_nextline();
4926             break;
4927
4928           case END:
4929             status = RA_MATCH;  /* Success! */
4930             break;
4931
4932           default:
4933             EMSG(_(e_re_corr));
4934 #ifdef DEBUG
4935             printf("Illegal op code %d\n", op);
4936 #endif
4937             status = RA_FAIL;
4938             break;
4939           }
4940         }
4941
4942         /* If we can't continue sequentially, break the inner loop. */
4943         if (status != RA_CONT)
4944             break;
4945
4946         /* Continue in inner loop, advance to next item. */
4947         scan = next;
4948
4949     } /* end of inner loop */
4950
4951     /*
4952      * If there is something on the regstack execute the code for the state.
4953      * If the state is popped then loop and use the older state.
4954      */
4955     while (regstack.ga_len > 0 && status != RA_FAIL)
4956     {
4957         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4958         switch (rp->rs_state)
4959         {
4960           case RS_NOPEN:
4961             /* Result is passed on as-is, simply pop the state. */
4962             regstack_pop(&scan);
4963             break;
4964
4965           case RS_MOPEN:
4966             /* Pop the state.  Restore pointers when there is no match. */
4967             if (status == RA_NOMATCH)
4968                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4969                                                   &reg_startp[rp->rs_no]);
4970             regstack_pop(&scan);
4971             break;
4972
4973 #ifdef FEAT_SYN_HL
4974           case RS_ZOPEN:
4975             /* Pop the state.  Restore pointers when there is no match. */
4976             if (status == RA_NOMATCH)
4977                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4978                                                  &reg_startzp[rp->rs_no]);
4979             regstack_pop(&scan);
4980             break;
4981 #endif
4982
4983           case RS_MCLOSE:
4984             /* Pop the state.  Restore pointers when there is no match. */
4985             if (status == RA_NOMATCH)
4986                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4987                                                     &reg_endp[rp->rs_no]);
4988             regstack_pop(&scan);
4989             break;
4990
4991 #ifdef FEAT_SYN_HL
4992           case RS_ZCLOSE:
4993             /* Pop the state.  Restore pointers when there is no match. */
4994             if (status == RA_NOMATCH)
4995                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4996                                                    &reg_endzp[rp->rs_no]);
4997             regstack_pop(&scan);
4998             break;
4999 #endif
5000
5001           case RS_BRANCH:
5002             if (status == RA_MATCH)
5003                 /* this branch matched, use it */
5004                 regstack_pop(&scan);
5005             else
5006             {
5007                 if (status != RA_BREAK)
5008                 {
5009                     /* After a non-matching branch: try next one. */
5010                     reg_restore(&rp->rs_un.regsave, &backpos);
5011                     scan = rp->rs_scan;
5012                 }
5013                 if (scan == NULL || OP(scan) != BRANCH)
5014                 {
5015                     /* no more branches, didn't find a match */
5016                     status = RA_NOMATCH;
5017                     regstack_pop(&scan);
5018                 }
5019                 else
5020                 {
5021                     /* Prepare to try a branch. */
5022                     rp->rs_scan = regnext(scan);
5023                     reg_save(&rp->rs_un.regsave, &backpos);
5024                     scan = OPERAND(scan);
5025                 }
5026             }
5027             break;
5028
5029           case RS_BRCPLX_MORE:
5030             /* Pop the state.  Restore pointers when there is no match. */
5031             if (status == RA_NOMATCH)
5032             {
5033                 reg_restore(&rp->rs_un.regsave, &backpos);
5034                 --brace_count[rp->rs_no];       /* decrement match count */
5035             }
5036             regstack_pop(&scan);
5037             break;
5038
5039           case RS_BRCPLX_LONG:
5040             /* Pop the state.  Restore pointers when there is no match. */
5041             if (status == RA_NOMATCH)
5042             {
5043                 /* There was no match, but we did find enough matches. */
5044                 reg_restore(&rp->rs_un.regsave, &backpos);
5045                 --brace_count[rp->rs_no];
5046                 /* continue with the items after "\{}" */
5047                 status = RA_CONT;
5048             }
5049             regstack_pop(&scan);
5050             if (status == RA_CONT)
5051                 scan = regnext(scan);
5052             break;
5053
5054           case RS_BRCPLX_SHORT:
5055             /* Pop the state.  Restore pointers when there is no match. */
5056             if (status == RA_NOMATCH)
5057                 /* There was no match, try to match one more item. */
5058                 reg_restore(&rp->rs_un.regsave, &backpos);
5059             regstack_pop(&scan);
5060             if (status == RA_NOMATCH)
5061             {
5062                 scan = OPERAND(scan);
5063                 status = RA_CONT;
5064             }
5065             break;
5066
5067           case RS_NOMATCH:
5068             /* Pop the state.  If the operand matches for NOMATCH or
5069              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5070              * except for SUBPAT, and continue with the next item. */
5071             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5072                 status = RA_NOMATCH;
5073             else
5074             {
5075                 status = RA_CONT;
5076                 if (rp->rs_no != SUBPAT)        /* zero-width */
5077                     reg_restore(&rp->rs_un.regsave, &backpos);
5078             }
5079             regstack_pop(&scan);
5080             if (status == RA_CONT)
5081                 scan = regnext(scan);
5082             break;
5083
5084           case RS_BEHIND1:
5085             if (status == RA_NOMATCH)
5086             {
5087                 regstack_pop(&scan);
5088                 regstack.ga_len -= sizeof(regbehind_T);
5089             }
5090             else
5091             {
5092                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5093                  * the behind part does (not) match before the current
5094                  * position in the input.  This must be done at every
5095                  * position in the input and checking if the match ends at
5096                  * the current position. */
5097
5098                 /* save the position after the found match for next */
5099                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5100
5101                 /* start looking for a match with operand at the current
5102                  * position.  Go back one character until we find the
5103                  * result, hitting the start of the line or the previous
5104                  * line (for multi-line matching).
5105                  * Set behind_pos to where the match should end, BHPOS
5106                  * will match it.  Save the current value. */
5107                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5108                 behind_pos = rp->rs_un.regsave;
5109
5110                 rp->rs_state = RS_BEHIND2;
5111
5112                 reg_restore(&rp->rs_un.regsave, &backpos);
5113                 scan = OPERAND(rp->rs_scan);
5114             }
5115             break;
5116
5117           case RS_BEHIND2:
5118             /*
5119              * Looping for BEHIND / NOBEHIND match.
5120              */
5121             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5122             {
5123                 /* found a match that ends where "next" started */
5124                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5125                 if (rp->rs_no == BEHIND)
5126                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5127                                                                     &backpos);
5128                 else
5129                 {
5130                     /* But we didn't want a match.  Need to restore the
5131                      * subexpr, because what follows matched, so they have
5132                      * been set. */
5133                     status = RA_NOMATCH;
5134                     restore_subexpr(((regbehind_T *)rp) - 1);
5135                 }
5136                 regstack_pop(&scan);
5137                 regstack.ga_len -= sizeof(regbehind_T);
5138             }
5139             else
5140             {
5141                 /* No match or a match that doesn't end where we want it: Go
5142                  * back one character.  May go to previous line once. */
5143                 no = OK;
5144                 if (REG_MULTI)
5145                 {
5146                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5147                     {
5148                         if (rp->rs_un.regsave.rs_u.pos.lnum
5149                                         < behind_pos.rs_u.pos.lnum
5150                                 || reg_getline(
5151                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5152                                                                   == NULL)
5153                             no = FAIL;
5154                         else
5155                         {
5156                             reg_restore(&rp->rs_un.regsave, &backpos);
5157                             rp->rs_un.regsave.rs_u.pos.col =
5158                                                  (colnr_T)STRLEN(regline);
5159                         }
5160                     }
5161                     else
5162                         --rp->rs_un.regsave.rs_u.pos.col;
5163                 }
5164                 else
5165                 {
5166                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5167                         no = FAIL;
5168                     else
5169                         --rp->rs_un.regsave.rs_u.ptr;
5170                 }
5171                 if (no == OK)
5172                 {
5173                     /* Advanced, prepare for finding match again. */
5174                     reg_restore(&rp->rs_un.regsave, &backpos);
5175                     scan = OPERAND(rp->rs_scan);
5176                     if (status == RA_MATCH)
5177                     {
5178                         /* We did match, so subexpr may have been changed,
5179                          * need to restore them for the next try. */
5180                         status = RA_NOMATCH;
5181                         restore_subexpr(((regbehind_T *)rp) - 1);
5182                     }
5183                 }
5184                 else
5185                 {
5186                     /* Can't advance.  For NOBEHIND that's a match. */
5187                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5188                     if (rp->rs_no == NOBEHIND)
5189                     {
5190                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5191                                                                     &backpos);
5192                         status = RA_MATCH;
5193                     }
5194                     else
5195                     {
5196                         /* We do want a proper match.  Need to restore the
5197                          * subexpr if we had a match, because they may have
5198                          * been set. */
5199                         if (status == RA_MATCH)
5200                         {
5201                             status = RA_NOMATCH;
5202                             restore_subexpr(((regbehind_T *)rp) - 1);
5203                         }
5204                     }
5205                     regstack_pop(&scan);
5206                     regstack.ga_len -= sizeof(regbehind_T);
5207                 }
5208             }
5209             break;
5210
5211           case RS_STAR_LONG:
5212           case RS_STAR_SHORT:
5213             {
5214                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5215
5216                 if (status == RA_MATCH)
5217                 {
5218                     regstack_pop(&scan);
5219                     regstack.ga_len -= sizeof(regstar_T);
5220                     break;
5221                 }
5222
5223                 /* Tried once already, restore input pointers. */
5224                 if (status != RA_BREAK)
5225                     reg_restore(&rp->rs_un.regsave, &backpos);
5226
5227                 /* Repeat until we found a position where it could match. */
5228                 for (;;)
5229                 {
5230                     if (status != RA_BREAK)
5231                     {
5232                         /* Tried first position already, advance. */
5233                         if (rp->rs_state == RS_STAR_LONG)
5234                         {
5235                             /* Trying for longest match, but couldn't or
5236                              * didn't match -- back up one char. */
5237                             if (--rst->count < rst->minval)
5238                                 break;
5239                             if (reginput == regline)
5240                             {
5241                                 /* backup to last char of previous line */
5242                                 --reglnum;
5243                                 regline = reg_getline(reglnum);
5244                                 /* Just in case regrepeat() didn't count
5245                                  * right. */
5246                                 if (regline == NULL)
5247                                     break;
5248                                 reginput = regline + STRLEN(regline);
5249                                 fast_breakcheck();
5250                             }
5251                             else
5252                                 mb_ptr_back(regline, reginput);
5253                         }
5254                         else
5255                         {
5256                             /* Range is backwards, use shortest match first.
5257                              * Careful: maxval and minval are exchanged!
5258                              * Couldn't or didn't match: try advancing one
5259                              * char. */
5260                             if (rst->count == rst->minval
5261                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5262                                 break;
5263                             ++rst->count;
5264                         }
5265                         if (got_int)
5266                             break;
5267                     }
5268                     else
5269                         status = RA_NOMATCH;
5270
5271                     /* If it could match, try it. */
5272                     if (rst->nextb == NUL || *reginput == rst->nextb
5273                                              || *reginput == rst->nextb_ic)
5274                     {
5275                         reg_save(&rp->rs_un.regsave, &backpos);
5276                         scan = regnext(rp->rs_scan);
5277                         status = RA_CONT;
5278                         break;
5279                     }
5280                 }
5281                 if (status != RA_CONT)
5282                 {
5283                     /* Failed. */
5284                     regstack_pop(&scan);
5285                     regstack.ga_len -= sizeof(regstar_T);
5286                     status = RA_NOMATCH;
5287                 }
5288             }
5289             break;
5290         }
5291
5292         /* If we want to continue the inner loop or didn't pop a state
5293          * continue matching loop */
5294         if (status == RA_CONT || rp == (regitem_T *)
5295                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5296             break;
5297     }
5298
5299     /* May need to continue with the inner loop, starting at "scan". */
5300     if (status == RA_CONT)
5301         continue;
5302
5303     /*
5304      * If the regstack is empty or something failed we are done.
5305      */
5306     if (regstack.ga_len == 0 || status == RA_FAIL)
5307     {
5308         if (scan == NULL)
5309         {
5310             /*
5311              * We get here only if there's trouble -- normally "case END" is
5312              * the terminating point.
5313              */
5314             EMSG(_(e_re_corr));
5315 #ifdef DEBUG
5316             printf("Premature EOL\n");
5317 #endif
5318         }
5319         if (status == RA_FAIL)
5320             got_int = TRUE;
5321         return (status == RA_MATCH);
5322     }
5323
5324   } /* End of loop until the regstack is empty. */
5325
5326   /* NOTREACHED */
5327 }
5328
5329 /*
5330  * Push an item onto the regstack.
5331  * Returns pointer to new item.  Returns NULL when out of memory.
5332  */
5333     static regitem_T *
5334 regstack_push(state, scan)
5335     regstate_T  state;
5336     char_u      *scan;
5337 {
5338     regitem_T   *rp;
5339
5340     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5341     {
5342         EMSG(_(e_maxmempat));
5343         return NULL;
5344     }
5345     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5346         return NULL;
5347
5348     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5349     rp->rs_state = state;
5350     rp->rs_scan = scan;
5351
5352     regstack.ga_len += sizeof(regitem_T);
5353     return rp;
5354 }
5355
5356 /*
5357  * Pop an item from the regstack.
5358  */
5359     static void
5360 regstack_pop(scan)
5361     char_u      **scan;
5362 {
5363     regitem_T   *rp;
5364
5365     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5366     *scan = rp->rs_scan;
5367
5368     regstack.ga_len -= sizeof(regitem_T);
5369 }
5370
5371 /*
5372  * regrepeat - repeatedly match something simple, return how many.
5373  * Advances reginput (and reglnum) to just after the matched chars.
5374  */
5375     static int
5376 regrepeat(p, maxcount)
5377     char_u      *p;
5378     long        maxcount;   /* maximum number of matches allowed */
5379 {
5380     long        count = 0;
5381     char_u      *scan;
5382     char_u      *opnd;
5383     int         mask;
5384     int         testval = 0;
5385
5386     scan = reginput;        /* Make local copy of reginput for speed. */
5387     opnd = OPERAND(p);
5388     switch (OP(p))
5389     {
5390       case ANY:
5391       case ANY + ADD_NL:
5392         while (count < maxcount)
5393         {
5394             /* Matching anything means we continue until end-of-line (or
5395              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5396             while (*scan != NUL && count < maxcount)
5397             {
5398                 ++count;
5399                 mb_ptr_adv(scan);
5400             }
5401             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5402                                          || reg_line_lbr || count == maxcount)
5403                 break;
5404             ++count;            /* count the line-break */
5405             reg_nextline();
5406             scan = reginput;
5407             if (got_int)
5408                 break;
5409         }
5410         break;
5411
5412       case IDENT:
5413       case IDENT + ADD_NL:
5414         testval = TRUE;
5415         /*FALLTHROUGH*/
5416       case SIDENT:
5417       case SIDENT + ADD_NL:
5418         while (count < maxcount)
5419         {
5420             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5421             {
5422                 mb_ptr_adv(scan);
5423             }
5424             else if (*scan == NUL)
5425             {
5426                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5427                                                               || reg_line_lbr)
5428                     break;
5429                 reg_nextline();
5430                 scan = reginput;
5431                 if (got_int)
5432                     break;
5433             }
5434             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5435                 ++scan;
5436             else
5437                 break;
5438             ++count;
5439         }
5440         break;
5441
5442       case KWORD:
5443       case KWORD + ADD_NL:
5444         testval = TRUE;
5445         /*FALLTHROUGH*/
5446       case SKWORD:
5447       case SKWORD + ADD_NL:
5448         while (count < maxcount)
5449         {
5450             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5451             {
5452                 mb_ptr_adv(scan);
5453             }
5454             else if (*scan == NUL)
5455             {
5456                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5457                                                               || reg_line_lbr)
5458                     break;
5459                 reg_nextline();
5460                 scan = reginput;
5461                 if (got_int)
5462                     break;
5463             }
5464             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5465                 ++scan;
5466             else
5467                 break;
5468             ++count;
5469         }
5470         break;
5471
5472       case FNAME:
5473       case FNAME + ADD_NL:
5474         testval = TRUE;
5475         /*FALLTHROUGH*/
5476       case SFNAME:
5477       case SFNAME + ADD_NL:
5478         while (count < maxcount)
5479         {
5480             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5481             {
5482                 mb_ptr_adv(scan);
5483             }
5484             else if (*scan == NUL)
5485             {
5486                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5487                                                               || reg_line_lbr)
5488                     break;
5489                 reg_nextline();
5490                 scan = reginput;
5491                 if (got_int)
5492                     break;
5493             }
5494             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5495                 ++scan;
5496             else
5497                 break;
5498             ++count;
5499         }
5500         break;
5501
5502       case PRINT:
5503       case PRINT + ADD_NL:
5504         testval = TRUE;
5505         /*FALLTHROUGH*/
5506       case SPRINT:
5507       case SPRINT + ADD_NL:
5508         while (count < maxcount)
5509         {
5510             if (*scan == NUL)
5511             {
5512                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5513                                                               || reg_line_lbr)
5514                     break;
5515                 reg_nextline();
5516                 scan = reginput;
5517                 if (got_int)
5518                     break;
5519             }
5520             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5521             {
5522                 mb_ptr_adv(scan);
5523             }
5524             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5525                 ++scan;
5526             else
5527                 break;
5528             ++count;
5529         }
5530         break;
5531
5532       case WHITE:
5533       case WHITE + ADD_NL:
5534         testval = mask = RI_WHITE;
5535 do_class:
5536         while (count < maxcount)
5537         {
5538 #ifdef FEAT_MBYTE
5539             int         l;
5540 #endif
5541             if (*scan == NUL)
5542             {
5543                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5544                                                               || reg_line_lbr)
5545                     break;
5546                 reg_nextline();
5547                 scan = reginput;
5548                 if (got_int)
5549                     break;
5550             }
5551 #ifdef FEAT_MBYTE
5552             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5553             {
5554                 if (testval != 0)
5555                     break;
5556                 scan += l;
5557             }
5558 #endif
5559             else if ((class_tab[*scan] & mask) == testval)
5560                 ++scan;
5561             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5562                 ++scan;
5563             else
5564                 break;
5565             ++count;
5566         }
5567         break;
5568
5569       case NWHITE:
5570       case NWHITE + ADD_NL:
5571         mask = RI_WHITE;
5572         goto do_class;
5573       case DIGIT:
5574       case DIGIT + ADD_NL:
5575         testval = mask = RI_DIGIT;
5576         goto do_class;
5577       case NDIGIT:
5578       case NDIGIT + ADD_NL:
5579         mask = RI_DIGIT;
5580         goto do_class;
5581       case HEX:
5582       case HEX + ADD_NL:
5583         testval = mask = RI_HEX;
5584         goto do_class;
5585       case NHEX:
5586       case NHEX + ADD_NL:
5587         mask = RI_HEX;
5588         goto do_class;
5589       case OCTAL:
5590       case OCTAL + ADD_NL:
5591         testval = mask = RI_OCTAL;
5592         goto do_class;
5593       case NOCTAL:
5594       case NOCTAL + ADD_NL:
5595         mask = RI_OCTAL;
5596         goto do_class;
5597       case WORD:
5598       case WORD + ADD_NL:
5599         testval = mask = RI_WORD;
5600         goto do_class;
5601       case NWORD:
5602       case NWORD + ADD_NL:
5603         mask = RI_WORD;
5604         goto do_class;
5605       case HEAD:
5606       case HEAD + ADD_NL:
5607         testval = mask = RI_HEAD;
5608         goto do_class;
5609       case NHEAD:
5610       case NHEAD + ADD_NL:
5611         mask = RI_HEAD;
5612         goto do_class;
5613       case ALPHA:
5614       case ALPHA + ADD_NL:
5615         testval = mask = RI_ALPHA;
5616         goto do_class;
5617       case NALPHA:
5618       case NALPHA + ADD_NL:
5619         mask = RI_ALPHA;
5620         goto do_class;
5621       case LOWER:
5622       case LOWER + ADD_NL:
5623         testval = mask = RI_LOWER;
5624         goto do_class;
5625       case NLOWER:
5626       case NLOWER + ADD_NL:
5627         mask = RI_LOWER;
5628         goto do_class;
5629       case UPPER:
5630       case UPPER + ADD_NL:
5631         testval = mask = RI_UPPER;
5632         goto do_class;
5633       case NUPPER:
5634       case NUPPER + ADD_NL:
5635         mask = RI_UPPER;
5636         goto do_class;
5637
5638       case EXACTLY:
5639         {
5640             int     cu, cl;
5641
5642             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5643              * would have been used for it.  It does handle single-byte
5644              * characters, such as latin1. */
5645             if (ireg_ic)
5646             {
5647                 cu = MB_TOUPPER(*opnd);
5648                 cl = MB_TOLOWER(*opnd);
5649                 while (count < maxcount && (*scan == cu || *scan == cl))
5650                 {
5651                     count++;
5652                     scan++;
5653                 }
5654             }
5655             else
5656             {
5657                 cu = *opnd;
5658                 while (count < maxcount && *scan == cu)
5659                 {
5660                     count++;
5661                     scan++;
5662                 }
5663             }
5664             break;
5665         }
5666
5667 #ifdef FEAT_MBYTE
5668       case MULTIBYTECODE:
5669         {
5670             int         i, len, cf = 0;
5671
5672             /* Safety check (just in case 'encoding' was changed since
5673              * compiling the program). */
5674             if ((len = (*mb_ptr2len)(opnd)) > 1)
5675             {
5676                 if (ireg_ic && enc_utf8)
5677                     cf = utf_fold(utf_ptr2char(opnd));
5678                 while (count < maxcount)
5679                 {
5680                     for (i = 0; i < len; ++i)
5681                         if (opnd[i] != scan[i])
5682                             break;
5683                     if (i < len && (!ireg_ic || !enc_utf8
5684                                         || utf_fold(utf_ptr2char(scan)) != cf))
5685                         break;
5686                     scan += len;
5687                     ++count;
5688                 }
5689             }
5690         }
5691         break;
5692 #endif
5693
5694       case ANYOF:
5695       case ANYOF + ADD_NL:
5696         testval = TRUE;
5697         /*FALLTHROUGH*/
5698
5699       case ANYBUT:
5700       case ANYBUT + ADD_NL:
5701         while (count < maxcount)
5702         {
5703 #ifdef FEAT_MBYTE
5704             int len;
5705 #endif
5706             if (*scan == NUL)
5707             {
5708                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5709                                                               || reg_line_lbr)
5710                     break;
5711                 reg_nextline();
5712                 scan = reginput;
5713                 if (got_int)
5714                     break;
5715             }
5716             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5717                 ++scan;
5718 #ifdef FEAT_MBYTE
5719             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5720             {
5721                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5722                     break;
5723                 scan += len;
5724             }
5725 #endif
5726             else
5727             {
5728                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5729                     break;
5730                 ++scan;
5731             }
5732             ++count;
5733         }
5734         break;
5735
5736       case NEWL:
5737         while (count < maxcount
5738                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5739                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5740         {
5741             count++;
5742             if (reg_line_lbr)
5743                 ADVANCE_REGINPUT();
5744             else
5745                 reg_nextline();
5746             scan = reginput;
5747             if (got_int)
5748                 break;
5749         }
5750         break;
5751
5752       default:                  /* Oh dear.  Called inappropriately. */
5753         EMSG(_(e_re_corr));
5754 #ifdef DEBUG
5755         printf("Called regrepeat with op code %d\n", OP(p));
5756 #endif
5757         break;
5758     }
5759
5760     reginput = scan;
5761
5762     return (int)count;
5763 }
5764
5765 /*
5766  * regnext - dig the "next" pointer out of a node
5767  */
5768     static char_u *
5769 regnext(p)
5770     char_u  *p;
5771 {
5772     int     offset;
5773
5774     if (p == JUST_CALC_SIZE)
5775         return NULL;
5776
5777     offset = NEXT(p);
5778     if (offset == 0)
5779         return NULL;
5780
5781     if (OP(p) == BACK)
5782         return p - offset;
5783     else
5784         return p + offset;
5785 }
5786
5787 /*
5788  * Check the regexp program for its magic number.
5789  * Return TRUE if it's wrong.
5790  */
5791     static int
5792 prog_magic_wrong()
5793 {
5794     if (UCHARAT(REG_MULTI
5795                 ? reg_mmatch->regprog->program
5796                 : reg_match->regprog->program) != REGMAGIC)
5797     {
5798         EMSG(_(e_re_corr));
5799         return TRUE;
5800     }
5801     return FALSE;
5802 }
5803
5804 /*
5805  * Cleanup the subexpressions, if this wasn't done yet.
5806  * This construction is used to clear the subexpressions only when they are
5807  * used (to increase speed).
5808  */
5809     static void
5810 cleanup_subexpr()
5811 {
5812     if (need_clear_subexpr)
5813     {
5814         if (REG_MULTI)
5815         {
5816             /* Use 0xff to set lnum to -1 */
5817             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5818             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5819         }
5820         else
5821         {
5822             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5823             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5824         }
5825         need_clear_subexpr = FALSE;
5826     }
5827 }
5828
5829 #ifdef FEAT_SYN_HL
5830     static void
5831 cleanup_zsubexpr()
5832 {
5833     if (need_clear_zsubexpr)
5834     {
5835         if (REG_MULTI)
5836         {
5837             /* Use 0xff to set lnum to -1 */
5838             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5839             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5840         }
5841         else
5842         {
5843             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5844             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5845         }
5846         need_clear_zsubexpr = FALSE;
5847     }
5848 }
5849 #endif
5850
5851 /*
5852  * Save the current subexpr to "bp", so that they can be restored
5853  * later by restore_subexpr().
5854  */
5855     static void
5856 save_subexpr(bp)
5857     regbehind_T *bp;
5858 {
5859     int i;
5860
5861     /* When "need_clear_subexpr" is set we don't need to save the values, only
5862      * remember that this flag needs to be set again when restoring. */
5863     bp->save_need_clear_subexpr = need_clear_subexpr;
5864     if (!need_clear_subexpr)
5865     {
5866         for (i = 0; i < NSUBEXP; ++i)
5867         {
5868             if (REG_MULTI)
5869             {
5870                 bp->save_start[i].se_u.pos = reg_startpos[i];
5871                 bp->save_end[i].se_u.pos = reg_endpos[i];
5872             }
5873             else
5874             {
5875                 bp->save_start[i].se_u.ptr = reg_startp[i];
5876                 bp->save_end[i].se_u.ptr = reg_endp[i];
5877             }
5878         }
5879     }
5880 }
5881
5882 /*
5883  * Restore the subexpr from "bp".
5884  */
5885     static void
5886 restore_subexpr(bp)
5887     regbehind_T *bp;
5888 {
5889     int i;
5890
5891     /* Only need to restore saved values when they are not to be cleared. */
5892     need_clear_subexpr = bp->save_need_clear_subexpr;
5893     if (!need_clear_subexpr)
5894     {
5895         for (i = 0; i < NSUBEXP; ++i)
5896         {
5897             if (REG_MULTI)
5898             {
5899                 reg_startpos[i] = bp->save_start[i].se_u.pos;
5900                 reg_endpos[i] = bp->save_end[i].se_u.pos;
5901             }
5902             else
5903             {
5904                 reg_startp[i] = bp->save_start[i].se_u.ptr;
5905                 reg_endp[i] = bp->save_end[i].se_u.ptr;
5906             }
5907         }
5908     }
5909 }
5910
5911 /*
5912  * Advance reglnum, regline and reginput to the next line.
5913  */
5914     static void
5915 reg_nextline()
5916 {
5917     regline = reg_getline(++reglnum);
5918     reginput = regline;
5919     fast_breakcheck();
5920 }
5921
5922 /*
5923  * Save the input line and position in a regsave_T.
5924  */
5925     static void
5926 reg_save(save, gap)
5927     regsave_T   *save;
5928     garray_T    *gap;
5929 {
5930     if (REG_MULTI)
5931     {
5932         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5933         save->rs_u.pos.lnum = reglnum;
5934     }
5935     else
5936         save->rs_u.ptr = reginput;
5937     save->rs_len = gap->ga_len;
5938 }
5939
5940 /*
5941  * Restore the input line and position from a regsave_T.
5942  */
5943     static void
5944 reg_restore(save, gap)
5945     regsave_T   *save;
5946     garray_T    *gap;
5947 {
5948     if (REG_MULTI)
5949     {
5950         if (reglnum != save->rs_u.pos.lnum)
5951         {
5952             /* only call reg_getline() when the line number changed to save
5953              * a bit of time */
5954             reglnum = save->rs_u.pos.lnum;
5955             regline = reg_getline(reglnum);
5956         }
5957         reginput = regline + save->rs_u.pos.col;
5958     }
5959     else
5960         reginput = save->rs_u.ptr;
5961     gap->ga_len = save->rs_len;
5962 }
5963
5964 /*
5965  * Return TRUE if current position is equal to saved position.
5966  */
5967     static int
5968 reg_save_equal(save)
5969     regsave_T   *save;
5970 {
5971     if (REG_MULTI)
5972         return reglnum == save->rs_u.pos.lnum
5973                                   && reginput == regline + save->rs_u.pos.col;
5974     return reginput == save->rs_u.ptr;
5975 }
5976
5977 /*
5978  * Tentatively set the sub-expression start to the current position (after
5979  * calling regmatch() they will have changed).  Need to save the existing
5980  * values for when there is no match.
5981  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5982  * depending on REG_MULTI.
5983  */
5984     static void
5985 save_se_multi(savep, posp)
5986     save_se_T   *savep;
5987     lpos_T      *posp;
5988 {
5989     savep->se_u.pos = *posp;
5990     posp->lnum = reglnum;
5991     posp->col = (colnr_T)(reginput - regline);
5992 }
5993
5994     static void
5995 save_se_one(savep, pp)
5996     save_se_T   *savep;
5997     char_u      **pp;
5998 {
5999     savep->se_u.ptr = *pp;
6000     *pp = reginput;
6001 }
6002
6003 /*
6004  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6005  */
6006     static int
6007 re_num_cmp(val, scan)
6008     long_u      val;
6009     char_u      *scan;
6010 {
6011     long_u  n = OPERAND_MIN(scan);
6012
6013     if (OPERAND_CMP(scan) == '>')
6014         return val > n;
6015     if (OPERAND_CMP(scan) == '<')
6016         return val < n;
6017     return val == n;
6018 }
6019
6020
6021 #ifdef DEBUG
6022
6023 /*
6024  * regdump - dump a regexp onto stdout in vaguely comprehensible form
6025  */
6026     static void
6027 regdump(pattern, r)
6028     char_u      *pattern;
6029     regprog_T   *r;
6030 {
6031     char_u  *s;
6032     int     op = EXACTLY;       /* Arbitrary non-END op. */
6033     char_u  *next;
6034     char_u  *end = NULL;
6035
6036     printf("\r\nregcomp(%s):\r\n", pattern);
6037
6038     s = r->program + 1;
6039     /*
6040      * Loop until we find the END that isn't before a referred next (an END
6041      * can also appear in a NOMATCH operand).
6042      */
6043     while (op != END || s <= end)
6044     {
6045         op = OP(s);
6046         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
6047         next = regnext(s);
6048         if (next == NULL)       /* Next ptr. */
6049             printf("(0)");
6050         else
6051             printf("(%d)", (int)((s - r->program) + (next - s)));
6052         if (end < next)
6053             end = next;
6054         if (op == BRACE_LIMITS)
6055         {
6056             /* Two short ints */
6057             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
6058             s += 8;
6059         }
6060         s += 3;
6061         if (op == ANYOF || op == ANYOF + ADD_NL
6062                 || op == ANYBUT || op == ANYBUT + ADD_NL
6063                 || op == EXACTLY)
6064         {
6065             /* Literal string, where present. */
6066             while (*s != NUL)
6067                 printf("%c", *s++);
6068             s++;
6069         }
6070         printf("\r\n");
6071     }
6072
6073     /* Header fields of interest. */
6074     if (r->regstart != NUL)
6075         printf("start `%s' 0x%x; ", r->regstart < 256
6076                 ? (char *)transchar(r->regstart)
6077                 : "multibyte", r->regstart);
6078     if (r->reganch)
6079         printf("anchored; ");
6080     if (r->regmust != NULL)
6081         printf("must have \"%s\"", r->regmust);
6082     printf("\r\n");
6083 }
6084
6085 /*
6086  * regprop - printable representation of opcode
6087  */
6088     static char_u *
6089 regprop(op)
6090     char_u         *op;
6091 {
6092     char_u          *p;
6093     static char_u   buf[50];
6094
6095     (void) strcpy(buf, ":");
6096
6097     switch (OP(op))
6098     {
6099       case BOL:
6100         p = "BOL";
6101         break;
6102       case EOL:
6103         p = "EOL";
6104         break;
6105       case RE_BOF:
6106         p = "BOF";
6107         break;
6108       case RE_EOF:
6109         p = "EOF";
6110         break;
6111       case CURSOR:
6112         p = "CURSOR";
6113         break;
6114       case RE_VISUAL:
6115         p = "RE_VISUAL";
6116         break;
6117       case RE_LNUM:
6118         p = "RE_LNUM";
6119         break;
6120       case RE_MARK:
6121         p = "RE_MARK";
6122         break;
6123       case RE_COL:
6124         p = "RE_COL";
6125         break;
6126       case RE_VCOL:
6127         p = "RE_VCOL";
6128         break;
6129       case BOW:
6130         p = "BOW";
6131         break;
6132       case EOW:
6133         p = "EOW";
6134         break;
6135       case ANY:
6136         p = "ANY";
6137         break;
6138       case ANY + ADD_NL:
6139         p = "ANY+NL";
6140         break;
6141       case ANYOF:
6142         p = "ANYOF";
6143         break;
6144       case ANYOF + ADD_NL:
6145         p = "ANYOF+NL";
6146         break;
6147       case ANYBUT:
6148         p = "ANYBUT";
6149         break;
6150       case ANYBUT + ADD_NL:
6151         p = "ANYBUT+NL";
6152         break;
6153       case IDENT:
6154         p = "IDENT";
6155         break;
6156       case IDENT + ADD_NL:
6157         p = "IDENT+NL";
6158         break;
6159       case SIDENT:
6160         p = "SIDENT";
6161         break;
6162       case SIDENT + ADD_NL:
6163         p = "SIDENT+NL";
6164         break;
6165       case KWORD:
6166         p = "KWORD";
6167         break;
6168       case KWORD + ADD_NL:
6169         p = "KWORD+NL";
6170         break;
6171       case SKWORD:
6172         p = "SKWORD";
6173         break;
6174       case SKWORD + ADD_NL:
6175         p = "SKWORD+NL";
6176         break;
6177       case FNAME:
6178         p = "FNAME";
6179         break;
6180       case FNAME + ADD_NL:
6181         p = "FNAME+NL";
6182         break;
6183       case SFNAME:
6184         p = "SFNAME";
6185         break;
6186       case SFNAME + ADD_NL:
6187         p = "SFNAME+NL";
6188         break;
6189       case PRINT:
6190         p = "PRINT";
6191         break;
6192       case PRINT + ADD_NL:
6193         p = "PRINT+NL";
6194         break;
6195       case SPRINT:
6196         p = "SPRINT";
6197         break;
6198       case SPRINT + ADD_NL:
6199         p = "SPRINT+NL";
6200         break;
6201       case WHITE:
6202         p = "WHITE";
6203         break;
6204       case WHITE + ADD_NL:
6205         p = "WHITE+NL";
6206         break;
6207       case NWHITE:
6208         p = "NWHITE";
6209         break;
6210       case NWHITE + ADD_NL:
6211         p = "NWHITE+NL";
6212         break;
6213       case DIGIT:
6214         p = "DIGIT";
6215         break;
6216       case DIGIT + ADD_NL:
6217         p = "DIGIT+NL";
6218         break;
6219       case NDIGIT:
6220         p = "NDIGIT";
6221         break;
6222       case NDIGIT + ADD_NL:
6223         p = "NDIGIT+NL";
6224         break;
6225       case HEX:
6226         p = "HEX";
6227         break;
6228       case HEX + ADD_NL:
6229         p = "HEX+NL";
6230         break;
6231       case NHEX:
6232         p = "NHEX";
6233         break;
6234       case NHEX + ADD_NL:
6235         p = "NHEX+NL";
6236         break;
6237       case OCTAL:
6238         p = "OCTAL";
6239         break;
6240       case OCTAL + ADD_NL:
6241         p = "OCTAL+NL";
6242         break;
6243       case NOCTAL:
6244         p = "NOCTAL";
6245         break;
6246       case NOCTAL + ADD_NL:
6247         p = "NOCTAL+NL";
6248         break;
6249       case WORD:
6250         p = "WORD";
6251         break;
6252       case WORD + ADD_NL:
6253         p = "WORD+NL";
6254         break;
6255       case NWORD:
6256         p = "NWORD";
6257         break;
6258       case NWORD + ADD_NL:
6259         p = "NWORD+NL";
6260         break;
6261       case HEAD:
6262         p = "HEAD";
6263         break;
6264       case HEAD + ADD_NL:
6265         p = "HEAD+NL";
6266         break;
6267       case NHEAD:
6268         p = "NHEAD";
6269         break;
6270       case NHEAD + ADD_NL:
6271         p = "NHEAD+NL";
6272         break;
6273       case ALPHA:
6274         p = "ALPHA";
6275         break;
6276       case ALPHA + ADD_NL:
6277         p = "ALPHA+NL";
6278         break;
6279       case NALPHA:
6280         p = "NALPHA";
6281         break;
6282       case NALPHA + ADD_NL:
6283         p = "NALPHA+NL";
6284         break;
6285       case LOWER:
6286         p = "LOWER";
6287         break;
6288       case LOWER + ADD_NL:
6289         p = "LOWER+NL";
6290         break;
6291       case NLOWER:
6292         p = "NLOWER";
6293         break;
6294       case NLOWER + ADD_NL:
6295         p = "NLOWER+NL";
6296         break;
6297       case UPPER:
6298         p = "UPPER";
6299         break;
6300       case UPPER + ADD_NL:
6301         p = "UPPER+NL";
6302         break;
6303       case NUPPER:
6304         p = "NUPPER";
6305         break;
6306       case NUPPER + ADD_NL:
6307         p = "NUPPER+NL";
6308         break;
6309       case BRANCH:
6310         p = "BRANCH";
6311         break;
6312       case EXACTLY:
6313         p = "EXACTLY";
6314         break;
6315       case NOTHING:
6316         p = "NOTHING";
6317         break;
6318       case BACK:
6319         p = "BACK";
6320         break;
6321       case END:
6322         p = "END";
6323         break;
6324       case MOPEN + 0:
6325         p = "MATCH START";
6326         break;
6327       case MOPEN + 1:
6328       case MOPEN + 2:
6329       case MOPEN + 3:
6330       case MOPEN + 4:
6331       case MOPEN + 5:
6332       case MOPEN + 6:
6333       case MOPEN + 7:
6334       case MOPEN + 8:
6335       case MOPEN + 9:
6336         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6337         p = NULL;
6338         break;
6339       case MCLOSE + 0:
6340         p = "MATCH END";
6341         break;
6342       case MCLOSE + 1:
6343       case MCLOSE + 2:
6344       case MCLOSE + 3:
6345       case MCLOSE + 4:
6346       case MCLOSE + 5:
6347       case MCLOSE + 6:
6348       case MCLOSE + 7:
6349       case MCLOSE + 8:
6350       case MCLOSE + 9:
6351         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6352         p = NULL;
6353         break;
6354       case BACKREF + 1:
6355       case BACKREF + 2:
6356       case BACKREF + 3:
6357       case BACKREF + 4:
6358       case BACKREF + 5:
6359       case BACKREF + 6:
6360       case BACKREF + 7:
6361       case BACKREF + 8:
6362       case BACKREF + 9:
6363         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6364         p = NULL;
6365         break;
6366       case NOPEN:
6367         p = "NOPEN";
6368         break;
6369       case NCLOSE:
6370         p = "NCLOSE";
6371         break;
6372 #ifdef FEAT_SYN_HL
6373       case ZOPEN + 1:
6374       case ZOPEN + 2:
6375       case ZOPEN + 3:
6376       case ZOPEN + 4:
6377       case ZOPEN + 5:
6378       case ZOPEN + 6:
6379       case ZOPEN + 7:
6380       case ZOPEN + 8:
6381       case ZOPEN + 9:
6382         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6383         p = NULL;
6384         break;
6385       case ZCLOSE + 1:
6386       case ZCLOSE + 2:
6387       case ZCLOSE + 3:
6388       case ZCLOSE + 4:
6389       case ZCLOSE + 5:
6390       case ZCLOSE + 6:
6391       case ZCLOSE + 7:
6392       case ZCLOSE + 8:
6393       case ZCLOSE + 9:
6394         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6395         p = NULL;
6396         break;
6397       case ZREF + 1:
6398       case ZREF + 2:
6399       case ZREF + 3:
6400       case ZREF + 4:
6401       case ZREF + 5:
6402       case ZREF + 6:
6403       case ZREF + 7:
6404       case ZREF + 8:
6405       case ZREF + 9:
6406         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6407         p = NULL;
6408         break;
6409 #endif
6410       case STAR:
6411         p = "STAR";
6412         break;
6413       case PLUS:
6414         p = "PLUS";
6415         break;
6416       case NOMATCH:
6417         p = "NOMATCH";
6418         break;
6419       case MATCH:
6420         p = "MATCH";
6421         break;
6422       case BEHIND:
6423         p = "BEHIND";
6424         break;
6425       case NOBEHIND:
6426         p = "NOBEHIND";
6427         break;
6428       case SUBPAT:
6429         p = "SUBPAT";
6430         break;
6431       case BRACE_LIMITS:
6432         p = "BRACE_LIMITS";
6433         break;
6434       case BRACE_SIMPLE:
6435         p = "BRACE_SIMPLE";
6436         break;
6437       case BRACE_COMPLEX + 0:
6438       case BRACE_COMPLEX + 1:
6439       case BRACE_COMPLEX + 2:
6440       case BRACE_COMPLEX + 3:
6441       case BRACE_COMPLEX + 4:
6442       case BRACE_COMPLEX + 5:
6443       case BRACE_COMPLEX + 6:
6444       case BRACE_COMPLEX + 7:
6445       case BRACE_COMPLEX + 8:
6446       case BRACE_COMPLEX + 9:
6447         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6448         p = NULL;
6449         break;
6450 #ifdef FEAT_MBYTE
6451       case MULTIBYTECODE:
6452         p = "MULTIBYTECODE";
6453         break;
6454 #endif
6455       case NEWL:
6456         p = "NEWL";
6457         break;
6458       default:
6459         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6460         p = NULL;
6461         break;
6462     }
6463     if (p != NULL)
6464         (void) strcat(buf, p);
6465     return buf;
6466 }
6467 #endif
6468
6469 #ifdef FEAT_MBYTE
6470 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6471
6472 typedef struct
6473 {
6474     int a, b, c;
6475 } decomp_T;
6476
6477
6478 /* 0xfb20 - 0xfb4f */
6479 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6480 {
6481     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6482     {0x5d0,0,0},                /* 0xfb21       alt alef */
6483     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6484     {0x5d4,0,0},                /* 0xfb23       alt he */
6485     {0x5db,0,0},                /* 0xfb24       alt kaf */
6486     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6487     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6488     {0x5e8,0,0},                /* 0xfb27       alt resh */
6489     {0x5ea,0,0},                /* 0xfb28       alt tav */
6490     {'+', 0, 0},                /* 0xfb29       alt plus */
6491     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6492     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6493     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6494     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6495     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6496     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6497     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6498     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6499     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6500     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6501     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6502     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6503     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6504     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6505     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6506     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6507     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6508     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6509     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6510     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6511     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6512     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6513     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6514     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6515     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6516     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6517     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6518     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6519     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6520     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6521     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6522     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6523     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6524     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6525     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6526     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6527     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6528     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6529 };
6530
6531     static void
6532 mb_decompose(c, c1, c2, c3)
6533     int c, *c1, *c2, *c3;
6534 {
6535     decomp_T d;
6536
6537     if (c >= 0x4b20 && c <= 0xfb4f)
6538     {
6539         d = decomp_table[c - 0xfb20];
6540         *c1 = d.a;
6541         *c2 = d.b;
6542         *c3 = d.c;
6543     }
6544     else
6545     {
6546         *c1 = c;
6547         *c2 = *c3 = 0;
6548     }
6549 }
6550 #endif
6551
6552 /*
6553  * Compare two strings, ignore case if ireg_ic set.
6554  * Return 0 if strings match, non-zero otherwise.
6555  * Correct the length "*n" when composing characters are ignored.
6556  */
6557     static int
6558 cstrncmp(s1, s2, n)
6559     char_u      *s1, *s2;
6560     int         *n;
6561 {
6562     int         result;
6563
6564     if (!ireg_ic)
6565         result = STRNCMP(s1, s2, *n);
6566     else
6567         result = MB_STRNICMP(s1, s2, *n);
6568
6569 #ifdef FEAT_MBYTE
6570     /* if it failed and it's utf8 and we want to combineignore: */
6571     if (result != 0 && enc_utf8 && ireg_icombine)
6572     {
6573         char_u  *str1, *str2;
6574         int     c1, c2, c11, c12;
6575         int     junk;
6576
6577         /* we have to handle the strcmp ourselves, since it is necessary to
6578          * deal with the composing characters by ignoring them: */
6579         str1 = s1;
6580         str2 = s2;
6581         c1 = c2 = 0;
6582         while ((int)(str1 - s1) < *n)
6583         {
6584             c1 = mb_ptr2char_adv(&str1);
6585             c2 = mb_ptr2char_adv(&str2);
6586
6587             /* decompose the character if necessary, into 'base' characters
6588              * because I don't care about Arabic, I will hard-code the Hebrew
6589              * which I *do* care about!  So sue me... */
6590             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6591             {
6592                 /* decomposition necessary? */
6593                 mb_decompose(c1, &c11, &junk, &junk);
6594                 mb_decompose(c2, &c12, &junk, &junk);
6595                 c1 = c11;
6596                 c2 = c12;
6597                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6598                     break;
6599             }
6600         }
6601         result = c2 - c1;
6602         if (result == 0)
6603             *n = (int)(str2 - s2);
6604     }
6605 #endif
6606
6607     return result;
6608 }
6609
6610 /*
6611  * cstrchr: This function is used a lot for simple searches, keep it fast!
6612  */
6613     static char_u *
6614 cstrchr(s, c)
6615     char_u      *s;
6616     int         c;
6617 {
6618     char_u      *p;
6619     int         cc;
6620
6621     if (!ireg_ic
6622 #ifdef FEAT_MBYTE
6623             || (!enc_utf8 && mb_char2len(c) > 1)
6624 #endif
6625             )
6626         return vim_strchr(s, c);
6627
6628     /* tolower() and toupper() can be slow, comparing twice should be a lot
6629      * faster (esp. when using MS Visual C++!).
6630      * For UTF-8 need to use folded case. */
6631 #ifdef FEAT_MBYTE
6632     if (enc_utf8 && c > 0x80)
6633         cc = utf_fold(c);
6634     else
6635 #endif
6636          if (MB_ISUPPER(c))
6637         cc = MB_TOLOWER(c);
6638     else if (MB_ISLOWER(c))
6639         cc = MB_TOUPPER(c);
6640     else
6641         return vim_strchr(s, c);
6642
6643 #ifdef FEAT_MBYTE
6644     if (has_mbyte)
6645     {
6646         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6647         {
6648             if (enc_utf8 && c > 0x80)
6649             {
6650                 if (utf_fold(utf_ptr2char(p)) == cc)
6651                     return p;
6652             }
6653             else if (*p == c || *p == cc)
6654                 return p;
6655         }
6656     }
6657     else
6658 #endif
6659         /* Faster version for when there are no multi-byte characters. */
6660         for (p = s; *p != NUL; ++p)
6661             if (*p == c || *p == cc)
6662                 return p;
6663
6664     return NULL;
6665 }
6666
6667 /***************************************************************
6668  *                    regsub stuff                             *
6669  ***************************************************************/
6670
6671 /* This stuff below really confuses cc on an SGI -- webb */
6672 #ifdef __sgi
6673 # undef __ARGS
6674 # define __ARGS(x)  ()
6675 #endif
6676
6677 /*
6678  * We should define ftpr as a pointer to a function returning a pointer to
6679  * a function returning a pointer to a function ...
6680  * This is impossible, so we declare a pointer to a function returning a
6681  * pointer to a function returning void. This should work for all compilers.
6682  */
6683 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6684
6685 static fptr_T do_upper __ARGS((int *, int));
6686 static fptr_T do_Upper __ARGS((int *, int));
6687 static fptr_T do_lower __ARGS((int *, int));
6688 static fptr_T do_Lower __ARGS((int *, int));
6689
6690 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6691
6692     static fptr_T
6693 do_upper(d, c)
6694     int         *d;
6695     int         c;
6696 {
6697     *d = MB_TOUPPER(c);
6698
6699     return (fptr_T)NULL;
6700 }
6701
6702     static fptr_T
6703 do_Upper(d, c)
6704     int         *d;
6705     int         c;
6706 {
6707     *d = MB_TOUPPER(c);
6708
6709     return (fptr_T)do_Upper;
6710 }
6711
6712     static fptr_T
6713 do_lower(d, c)
6714     int         *d;
6715     int         c;
6716 {
6717     *d = MB_TOLOWER(c);
6718
6719     return (fptr_T)NULL;
6720 }
6721
6722     static fptr_T
6723 do_Lower(d, c)
6724     int         *d;
6725     int         c;
6726 {
6727     *d = MB_TOLOWER(c);
6728
6729     return (fptr_T)do_Lower;
6730 }
6731
6732 /*
6733  * regtilde(): Replace tildes in the pattern by the old pattern.
6734  *
6735  * Short explanation of the tilde: It stands for the previous replacement
6736  * pattern.  If that previous pattern also contains a ~ we should go back a
6737  * step further...  But we insert the previous pattern into the current one
6738  * and remember that.
6739  * This still does not handle the case where "magic" changes.  So require the
6740  * user to keep his hands off of "magic".
6741  *
6742  * The tildes are parsed once before the first call to vim_regsub().
6743  */
6744     char_u *
6745 regtilde(source, magic)
6746     char_u      *source;
6747     int         magic;
6748 {
6749     char_u      *newsub = source;
6750     char_u      *tmpsub;
6751     char_u      *p;
6752     int         len;
6753     int         prevlen;
6754
6755     for (p = newsub; *p; ++p)
6756     {
6757         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6758         {
6759             if (reg_prev_sub != NULL)
6760             {
6761                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6762                 prevlen = (int)STRLEN(reg_prev_sub);
6763                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6764                 if (tmpsub != NULL)
6765                 {
6766                     /* copy prefix */
6767                     len = (int)(p - newsub);    /* not including ~ */
6768                     mch_memmove(tmpsub, newsub, (size_t)len);
6769                     /* interpret tilde */
6770                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6771                     /* copy postfix */
6772                     if (!magic)
6773                         ++p;                    /* back off \ */
6774                     STRCPY(tmpsub + len + prevlen, p + 1);
6775
6776                     if (newsub != source)       /* already allocated newsub */
6777                         vim_free(newsub);
6778                     newsub = tmpsub;
6779                     p = newsub + len + prevlen;
6780                 }
6781             }
6782             else if (magic)
6783                 STRMOVE(p, p + 1);      /* remove '~' */
6784             else
6785                 STRMOVE(p, p + 2);      /* remove '\~' */
6786             --p;
6787         }
6788         else
6789         {
6790             if (*p == '\\' && p[1])             /* skip escaped characters */
6791                 ++p;
6792 #ifdef FEAT_MBYTE
6793             if (has_mbyte)
6794                 p += (*mb_ptr2len)(p) - 1;
6795 #endif
6796         }
6797     }
6798
6799     vim_free(reg_prev_sub);
6800     if (newsub != source)       /* newsub was allocated, just keep it */
6801         reg_prev_sub = newsub;
6802     else                        /* no ~ found, need to save newsub  */
6803         reg_prev_sub = vim_strsave(newsub);
6804     return newsub;
6805 }
6806
6807 #ifdef FEAT_EVAL
6808 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6809
6810 /* These pointers are used instead of reg_match and reg_mmatch for
6811  * reg_submatch().  Needed for when the substitution string is an expression
6812  * that contains a call to substitute() and submatch(). */
6813 static regmatch_T       *submatch_match;
6814 static regmmatch_T      *submatch_mmatch;
6815 #endif
6816
6817 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6818 /*
6819  * vim_regsub() - perform substitutions after a vim_regexec() or
6820  * vim_regexec_multi() match.
6821  *
6822  * If "copy" is TRUE really copy into "dest".
6823  * If "copy" is FALSE nothing is copied, this is just to find out the length
6824  * of the result.
6825  *
6826  * If "backslash" is TRUE, a backslash will be removed later, need to double
6827  * them to keep them, and insert a backslash before a CR to avoid it being
6828  * replaced with a line break later.
6829  *
6830  * Note: The matched text must not change between the call of
6831  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6832  * references invalid!
6833  *
6834  * Returns the size of the replacement, including terminating NUL.
6835  */
6836     int
6837 vim_regsub(rmp, source, dest, copy, magic, backslash)
6838     regmatch_T  *rmp;
6839     char_u      *source;
6840     char_u      *dest;
6841     int         copy;
6842     int         magic;
6843     int         backslash;
6844 {
6845     reg_match = rmp;
6846     reg_mmatch = NULL;
6847     reg_maxline = 0;
6848     return vim_regsub_both(source, dest, copy, magic, backslash);
6849 }
6850 #endif
6851
6852     int
6853 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6854     regmmatch_T *rmp;
6855     linenr_T    lnum;
6856     char_u      *source;
6857     char_u      *dest;
6858     int         copy;
6859     int         magic;
6860     int         backslash;
6861 {
6862     reg_match = NULL;
6863     reg_mmatch = rmp;
6864     reg_buf = curbuf;           /* always works on the current buffer! */
6865     reg_firstlnum = lnum;
6866     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6867     return vim_regsub_both(source, dest, copy, magic, backslash);
6868 }
6869
6870     static int
6871 vim_regsub_both(source, dest, copy, magic, backslash)
6872     char_u      *source;
6873     char_u      *dest;
6874     int         copy;
6875     int         magic;
6876     int         backslash;
6877 {
6878     char_u      *src;
6879     char_u      *dst;
6880     char_u      *s;
6881     int         c;
6882     int         cc;
6883     int         no = -1;
6884     fptr_T      func = (fptr_T)NULL;
6885     linenr_T    clnum = 0;      /* init for GCC */
6886     int         len = 0;        /* init for GCC */
6887 #ifdef FEAT_EVAL
6888     static char_u *eval_result = NULL;
6889 #endif
6890
6891     /* Be paranoid... */
6892     if (source == NULL || dest == NULL)
6893     {
6894         EMSG(_(e_null));
6895         return 0;
6896     }
6897     if (prog_magic_wrong())
6898         return 0;
6899     src = source;
6900     dst = dest;
6901
6902     /*
6903      * When the substitute part starts with "\=" evaluate it as an expression.
6904      */
6905     if (source[0] == '\\' && source[1] == '='
6906 #ifdef FEAT_EVAL
6907             && !can_f_submatch      /* can't do this recursively */
6908 #endif
6909             )
6910     {
6911 #ifdef FEAT_EVAL
6912         /* To make sure that the length doesn't change between checking the
6913          * length and copying the string, and to speed up things, the
6914          * resulting string is saved from the call with "copy" == FALSE to the
6915          * call with "copy" == TRUE. */
6916         if (copy)
6917         {
6918             if (eval_result != NULL)
6919             {
6920                 STRCPY(dest, eval_result);
6921                 dst += STRLEN(eval_result);
6922                 vim_free(eval_result);
6923                 eval_result = NULL;
6924             }
6925         }
6926         else
6927         {
6928             linenr_T    save_reg_maxline;
6929             win_T       *save_reg_win;
6930             int         save_ireg_ic;
6931
6932             vim_free(eval_result);
6933
6934             /* The expression may contain substitute(), which calls us
6935              * recursively.  Make sure submatch() gets the text from the first
6936              * level.  Don't need to save "reg_buf", because
6937              * vim_regexec_multi() can't be called recursively. */
6938             submatch_match = reg_match;
6939             submatch_mmatch = reg_mmatch;
6940             save_reg_maxline = reg_maxline;
6941             save_reg_win = reg_win;
6942             save_ireg_ic = ireg_ic;
6943             can_f_submatch = TRUE;
6944
6945             eval_result = eval_to_string(source + 2, NULL, TRUE);
6946             if (eval_result != NULL)
6947             {
6948                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6949                 {
6950                     /* Change NL to CR, so that it becomes a line break.
6951                      * Skip over a backslashed character. */
6952                     if (*s == NL)
6953                         *s = CAR;
6954                     else if (*s == '\\' && s[1] != NUL)
6955                         ++s;
6956                 }
6957
6958                 dst += STRLEN(eval_result);
6959             }
6960
6961             reg_match = submatch_match;
6962             reg_mmatch = submatch_mmatch;
6963             reg_maxline = save_reg_maxline;
6964             reg_win = save_reg_win;
6965             ireg_ic = save_ireg_ic;
6966             can_f_submatch = FALSE;
6967         }
6968 #endif
6969     }
6970     else
6971       while ((c = *src++) != NUL)
6972       {
6973         if (c == '&' && magic)
6974             no = 0;
6975         else if (c == '\\' && *src != NUL)
6976         {
6977             if (*src == '&' && !magic)
6978             {
6979                 ++src;
6980                 no = 0;
6981             }
6982             else if ('0' <= *src && *src <= '9')
6983             {
6984                 no = *src++ - '0';
6985             }
6986             else if (vim_strchr((char_u *)"uUlLeE", *src))
6987             {
6988                 switch (*src++)
6989                 {
6990                 case 'u':   func = (fptr_T)do_upper;
6991                             continue;
6992                 case 'U':   func = (fptr_T)do_Upper;
6993                             continue;
6994                 case 'l':   func = (fptr_T)do_lower;
6995                             continue;
6996                 case 'L':   func = (fptr_T)do_Lower;
6997                             continue;
6998                 case 'e':
6999                 case 'E':   func = (fptr_T)NULL;
7000                             continue;
7001                 }
7002             }
7003         }
7004         if (no < 0)           /* Ordinary character. */
7005         {
7006             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7007             {
7008                 /* Copy a special key as-is. */
7009                 if (copy)
7010                 {
7011                     *dst++ = c;
7012                     *dst++ = *src++;
7013                     *dst++ = *src++;
7014                 }
7015                 else
7016                 {
7017                     dst += 3;
7018                     src += 2;
7019                 }
7020                 continue;
7021             }
7022
7023             if (c == '\\' && *src != NUL)
7024             {
7025                 /* Check for abbreviations -- webb */
7026                 switch (*src)
7027                 {
7028                     case 'r':   c = CAR;        ++src;  break;
7029                     case 'n':   c = NL;         ++src;  break;
7030                     case 't':   c = TAB;        ++src;  break;
7031                  /* Oh no!  \e already has meaning in subst pat :-( */
7032                  /* case 'e':   c = ESC;        ++src;  break; */
7033                     case 'b':   c = Ctrl_H;     ++src;  break;
7034
7035                     /* If "backslash" is TRUE the backslash will be removed
7036                      * later.  Used to insert a literal CR. */
7037                     default:    if (backslash)
7038                                 {
7039                                     if (copy)
7040                                         *dst = '\\';
7041                                     ++dst;
7042                                 }
7043                                 c = *src++;
7044                 }
7045             }
7046 #ifdef FEAT_MBYTE
7047             else if (has_mbyte)
7048                 c = mb_ptr2char(src - 1);
7049 #endif
7050
7051             /* Write to buffer, if copy is set. */
7052             if (func == (fptr_T)NULL)   /* just copy */
7053                 cc = c;
7054             else
7055                 /* Turbo C complains without the typecast */
7056                 func = (fptr_T)(func(&cc, c));
7057
7058 #ifdef FEAT_MBYTE
7059             if (has_mbyte)
7060             {
7061                 src += mb_ptr2len(src - 1) - 1;
7062                 if (copy)
7063                     mb_char2bytes(cc, dst);
7064                 dst += mb_char2len(cc) - 1;
7065             }
7066             else
7067 #endif
7068                 if (copy)
7069                     *dst = cc;
7070             dst++;
7071         }
7072         else
7073         {
7074             if (REG_MULTI)
7075             {
7076                 clnum = reg_mmatch->startpos[no].lnum;
7077                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7078                     s = NULL;
7079                 else
7080                 {
7081                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7082                     if (reg_mmatch->endpos[no].lnum == clnum)
7083                         len = reg_mmatch->endpos[no].col
7084                                                - reg_mmatch->startpos[no].col;
7085                     else
7086                         len = (int)STRLEN(s);
7087                 }
7088             }
7089             else
7090             {
7091                 s = reg_match->startp[no];
7092                 if (reg_match->endp[no] == NULL)
7093                     s = NULL;
7094                 else
7095                     len = (int)(reg_match->endp[no] - s);
7096             }
7097             if (s != NULL)
7098             {
7099                 for (;;)
7100                 {
7101                     if (len == 0)
7102                     {
7103                         if (REG_MULTI)
7104                         {
7105                             if (reg_mmatch->endpos[no].lnum == clnum)
7106                                 break;
7107                             if (copy)
7108                                 *dst = CAR;
7109                             ++dst;
7110                             s = reg_getline(++clnum);
7111                             if (reg_mmatch->endpos[no].lnum == clnum)
7112                                 len = reg_mmatch->endpos[no].col;
7113                             else
7114                                 len = (int)STRLEN(s);
7115                         }
7116                         else
7117                             break;
7118                     }
7119                     else if (*s == NUL) /* we hit NUL. */
7120                     {
7121                         if (copy)
7122                             EMSG(_(e_re_damg));
7123                         goto exit;
7124                     }
7125                     else
7126                     {
7127                         if (backslash && (*s == CAR || *s == '\\'))
7128                         {
7129                             /*
7130                              * Insert a backslash in front of a CR, otherwise
7131                              * it will be replaced by a line break.
7132                              * Number of backslashes will be halved later,
7133                              * double them here.
7134                              */
7135                             if (copy)
7136                             {
7137                                 dst[0] = '\\';
7138                                 dst[1] = *s;
7139                             }
7140                             dst += 2;
7141                         }
7142                         else
7143                         {
7144 #ifdef FEAT_MBYTE
7145                             if (has_mbyte)
7146                                 c = mb_ptr2char(s);
7147                             else
7148 #endif
7149                                 c = *s;
7150
7151                             if (func == (fptr_T)NULL)   /* just copy */
7152                                 cc = c;
7153                             else
7154                                 /* Turbo C complains without the typecast */
7155                                 func = (fptr_T)(func(&cc, c));
7156
7157 #ifdef FEAT_MBYTE
7158                             if (has_mbyte)
7159                             {
7160                                 int l;
7161
7162                                 /* Copy composing characters separately, one
7163                                  * at a time. */
7164                                 if (enc_utf8)
7165                                     l = utf_ptr2len(s) - 1;
7166                                 else
7167                                     l = mb_ptr2len(s) - 1;
7168
7169                                 s += l;
7170                                 len -= l;
7171                                 if (copy)
7172                                     mb_char2bytes(cc, dst);
7173                                 dst += mb_char2len(cc) - 1;
7174                             }
7175                             else
7176 #endif
7177                                 if (copy)
7178                                     *dst = cc;
7179                             dst++;
7180                         }
7181
7182                         ++s;
7183                         --len;
7184                     }
7185                 }
7186             }
7187             no = -1;
7188         }
7189       }
7190     if (copy)
7191         *dst = NUL;
7192
7193 exit:
7194     return (int)((dst - dest) + 1);
7195 }
7196
7197 #ifdef FEAT_EVAL
7198 /*
7199  * Used for the submatch() function: get the string from the n'th submatch in
7200  * allocated memory.
7201  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7202  */
7203     char_u *
7204 reg_submatch(no)
7205     int         no;
7206 {
7207     char_u      *retval = NULL;
7208     char_u      *s;
7209     int         len;
7210     int         round;
7211     linenr_T    lnum;
7212
7213     if (!can_f_submatch || no < 0)
7214         return NULL;
7215
7216     if (submatch_match == NULL)
7217     {
7218         /*
7219          * First round: compute the length and allocate memory.
7220          * Second round: copy the text.
7221          */
7222         for (round = 1; round <= 2; ++round)
7223         {
7224             lnum = submatch_mmatch->startpos[no].lnum;
7225             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7226                 return NULL;
7227
7228             s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
7229             if (s == NULL)  /* anti-crash check, cannot happen? */
7230                 break;
7231             if (submatch_mmatch->endpos[no].lnum == lnum)
7232             {
7233                 /* Within one line: take form start to end col. */
7234                 len = submatch_mmatch->endpos[no].col
7235                                           - submatch_mmatch->startpos[no].col;
7236                 if (round == 2)
7237                     vim_strncpy(retval, s, len);
7238                 ++len;
7239             }
7240             else
7241             {
7242                 /* Multiple lines: take start line from start col, middle
7243                  * lines completely and end line up to end col. */
7244                 len = (int)STRLEN(s);
7245                 if (round == 2)
7246                 {
7247                     STRCPY(retval, s);
7248                     retval[len] = '\n';
7249                 }
7250                 ++len;
7251                 ++lnum;
7252                 while (lnum < submatch_mmatch->endpos[no].lnum)
7253                 {
7254                     s = reg_getline(lnum++);
7255                     if (round == 2)
7256                         STRCPY(retval + len, s);
7257                     len += (int)STRLEN(s);
7258                     if (round == 2)
7259                         retval[len] = '\n';
7260                     ++len;
7261                 }
7262                 if (round == 2)
7263                     STRNCPY(retval + len, reg_getline(lnum),
7264                                              submatch_mmatch->endpos[no].col);
7265                 len += submatch_mmatch->endpos[no].col;
7266                 if (round == 2)
7267                     retval[len] = NUL;
7268                 ++len;
7269             }
7270
7271             if (retval == NULL)
7272             {
7273                 retval = lalloc((long_u)len, TRUE);
7274                 if (retval == NULL)
7275                     return NULL;
7276             }
7277         }
7278     }
7279     else
7280     {
7281         s = submatch_match->startp[no];
7282         if (s == NULL || submatch_match->endp[no] == NULL)
7283             retval = NULL;
7284         else
7285             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7286     }
7287
7288     return retval;
7289 }
7290 #endif