src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 /*
 382  * REGEXP_INRANGE contains all characters which are always special in a []
 383  * range after '\'.
 384  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 385  * These are:
 386  *  \n  - New line (NL).
 387  *  \r  - Carriage Return (CR).
 388  *  \t  - Tab (TAB).
 389  *  \e  - Escape (ESC).
 390  *  \b  - Backspace (Ctrl_H).
 391  *  \d  - Character code in decimal, eg \d123
 392  *  \o  - Character code in octal, eg \o80
 393  *  \x  - Character code in hex, eg \x4a
 394  *  \u  - Multibyte character code, eg \u20ac
 395  *  \U  - Long multibyte character code, eg \U12345678
 396  */
 397 static char_u REGEXP_INRANGE[] = "]^-n\\";
 398 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 399
 400 static int      backslash_trans __ARGS((int c));
 401 static int      get_char_class __ARGS((char_u **pp));
 402 static int      get_equi_class __ARGS((char_u **pp));
 403 static void     reg_equi_class __ARGS((int c));
 404 static int      get_coll_element __ARGS((char_u **pp));
 405 static char_u   *skip_anyof __ARGS((char_u *p));
 406 static void     init_class_tab __ARGS((void));
 407
 408 /*
 409  * Translate '\x' to its control character, except "\n", which is Magic.
 410  */
 411     static int
 412 backslash_trans(c)
 413     int         c;
 414 {
 415     switch (c)
 416     {
 417         case 'r':   return CAR;
 418         case 't':   return TAB;
 419         case 'e':   return ESC;
 420         case 'b':   return BS;
 421     }
 422     return c;
 423 }
 424
 425 /*
 426  * Check for a character class name "[:name:]".  "pp" points to the '['.
 427  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 428  * recognized.  Otherwise "pp" is advanced to after the item.
 429  */
 430     static int
 431 get_char_class(pp)
 432     char_u      **pp;
 433 {
 434     static const char *(class_names[]) =
 435     {
 436         "alnum:]",
 437 #define CLASS_ALNUM 0
 438         "alpha:]",
 439 #define CLASS_ALPHA 1
 440         "blank:]",
 441 #define CLASS_BLANK 2
 442         "cntrl:]",
 443 #define CLASS_CNTRL 3
 444         "digit:]",
 445 #define CLASS_DIGIT 4
 446         "graph:]",
 447 #define CLASS_GRAPH 5
 448         "lower:]",
 449 #define CLASS_LOWER 6
 450         "print:]",
 451 #define CLASS_PRINT 7
 452         "punct:]",
 453 #define CLASS_PUNCT 8
 454         "space:]",
 455 #define CLASS_SPACE 9
 456         "upper:]",
 457 #define CLASS_UPPER 10
 458         "xdigit:]",
 459 #define CLASS_XDIGIT 11
 460         "tab:]",
 461 #define CLASS_TAB 12
 462         "return:]",
 463 #define CLASS_RETURN 13
 464         "backspace:]",
 465 #define CLASS_BACKSPACE 14
 466         "escape:]",
 467 #define CLASS_ESCAPE 15
 468     };
 469 #define CLASS_NONE 99
 470     int i;
 471
 472     if ((*pp)[1] == ':')
 473     {
 474         for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
 475             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 476             {
 477                 *pp += STRLEN(class_names[i]) + 2;
 478                 return i;
 479             }
 480     }
 481     return CLASS_NONE;
 482 }
 483
 484 /*
 485  * Specific version of character class functions.
 486  * Using a table to keep this fast.
 487  */
 488 static short    class_tab[256];
 489
 490 #define     RI_DIGIT    0x01
 491 #define     RI_HEX      0x02
 492 #define     RI_OCTAL    0x04
 493 #define     RI_WORD     0x08
 494 #define     RI_HEAD     0x10
 495 #define     RI_ALPHA    0x20
 496 #define     RI_LOWER    0x40
 497 #define     RI_UPPER    0x80
 498 #define     RI_WHITE    0x100
 499
 500     static void
 501 init_class_tab()
 502 {
 503     int         i;
 504     static int  done = FALSE;
 505
 506     if (done)
 507         return;
 508
 509     for (i = 0; i < 256; ++i)
 510     {
 511         if (i >= '0' && i <= '7')
 512             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 513         else if (i >= '8' && i <= '9')
 514             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 515         else if (i >= 'a' && i <= 'f')
 516             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 517 #ifdef EBCDIC
 518         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 519                                                     || (i >= 's' && i <= 'z'))
 520 #else
 521         else if (i >= 'g' && i <= 'z')
 522 #endif
 523             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 524         else if (i >= 'A' && i <= 'F')
 525             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 526 #ifdef EBCDIC
 527         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 528                                                     || (i >= 'S' && i <= 'Z'))
 529 #else
 530         else if (i >= 'G' && i <= 'Z')
 531 #endif
 532             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 533         else if (i == '_')
 534             class_tab[i] = RI_WORD + RI_HEAD;
 535         else
 536             class_tab[i] = 0;
 537     }
 538     class_tab[' '] |= RI_WHITE;
 539     class_tab['\t'] |= RI_WHITE;
 540     done = TRUE;
 541 }
 542
 543 #ifdef FEAT_MBYTE
 544 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 545 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 546 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 547 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 548 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 549 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 550 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 551 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 552 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 553 #else
 554 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 555 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 556 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 557 # define ri_word(c)     (class_tab[c] & RI_WORD)
 558 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 559 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 560 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 561 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 562 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 563 #endif
 564
 565 /* flags for regflags */
 566 #define RF_ICASE    1   /* ignore case */
 567 #define RF_NOICASE  2   /* don't ignore case */
 568 #define RF_HASNL    4   /* can match a NL */
 569 #define RF_ICOMBINE 8   /* ignore combining characters */
 570 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 571
 572 /*
 573  * Global work variables for vim_regcomp().
 574  */
 575
 576 static char_u   *regparse;      /* Input-scan pointer. */
 577 static int      prevchr_len;    /* byte length of previous char */
 578 static int      num_complex_braces; /* Complex \{...} count */
 579 static int      regnpar;        /* () count. */
 580 #ifdef FEAT_SYN_HL
 581 static int      regnzpar;       /* \z() count. */
 582 static int      re_has_z;       /* \z item detected */
 583 #endif
 584 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 585 static long     regsize;        /* Code size. */
 586 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 587 static unsigned regflags;       /* RF_ flags for prog */
 588 static long     brace_min[10];  /* Minimums for complex brace repeats */
 589 static long     brace_max[10];  /* Maximums for complex brace repeats */
 590 static int      brace_count[10]; /* Current counts for complex brace repeats */
 591 #if defined(FEAT_SYN_HL) || defined(PROTO)
 592 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 593 #endif
 594 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 595
 596 static int      reg_magic;      /* magicness of the pattern: */
 597 #define MAGIC_NONE      1       /* "\V" very unmagic */
 598 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 599 #define MAGIC_ON        3       /* "\m" or 'magic' */
 600 #define MAGIC_ALL       4       /* "\v" very magic */
 601
 602 static int      reg_string;     /* matching with a string instead of a buffer
 603                                    line */
 604 static int      reg_strict;     /* "[abc" is illegal */
 605
 606 /*
 607  * META contains all characters that may be magic, except '^' and '$'.
 608  */
 609
 610 #ifdef EBCDIC
 611 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 612 #else
 613 /* META[] is used often enough to justify turning it into a table. */
 614 static char_u META_flags[] = {
 615     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 616     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 617 /*                 %  &     (  )  *  +        .    */
 618     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 619 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 620     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 621 /*  @  A     C  D     F     H  I     K  L  M     O */
 622     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 623 /*  P        S     U  V  W  X     Z  [           _ */
 624     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 625 /*     a     c  d     f     h  i     k  l  m  n  o */
 626     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 627 /*  p        s     u  v  w  x     z  {  |     ~    */
 628     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 629 };
 630 #endif
 631
 632 static int      curchr;
 633
 634 /* arguments for reg() */
 635 #define REG_NOPAREN     0       /* toplevel reg() */
 636 #define REG_PAREN       1       /* \(\) */
 637 #define REG_ZPAREN      2       /* \z(\) */
 638 #define REG_NPAREN      3       /* \%(\) */
 639
 640 /*
 641  * Forward declarations for vim_regcomp()'s friends.
 642  */
 643 static void     initchr __ARGS((char_u *));
 644 static int      getchr __ARGS((void));
 645 static void     skipchr_keepstart __ARGS((void));
 646 static int      peekchr __ARGS((void));
 647 static void     skipchr __ARGS((void));
 648 static void     ungetchr __ARGS((void));
 649 static int      gethexchrs __ARGS((int maxinputlen));
 650 static int      getoctchrs __ARGS((void));
 651 static int      getdecchrs __ARGS((void));
 652 static int      coll_get_char __ARGS((void));
 653 static void     regcomp_start __ARGS((char_u *expr, int flags));
 654 static char_u   *reg __ARGS((int, int *));
 655 static char_u   *regbranch __ARGS((int *flagp));
 656 static char_u   *regconcat __ARGS((int *flagp));
 657 static char_u   *regpiece __ARGS((int *));
 658 static char_u   *regatom __ARGS((int *));
 659 static char_u   *regnode __ARGS((int));
 660 #ifdef FEAT_MBYTE
 661 static int      use_multibytecode __ARGS((int c));
 662 #endif
 663 static int      prog_magic_wrong __ARGS((void));
 664 static char_u   *regnext __ARGS((char_u *));
 665 static void     regc __ARGS((int b));
 666 #ifdef FEAT_MBYTE
 667 static void     regmbc __ARGS((int c));
 668 #else
 669 # define regmbc(c) regc(c)
 670 #endif
 671 static void     reginsert __ARGS((int, char_u *));
 672 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 673 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 674 static int      read_limits __ARGS((long *, long *));
 675 static void     regtail __ARGS((char_u *, char_u *));
 676 static void     regoptail __ARGS((char_u *, char_u *));
 677
 678 /*
 679  * Return TRUE if compiled regular expression "prog" can match a line break.
 680  */
 681     int
 682 re_multiline(prog)
 683     regprog_T *prog;
 684 {
 685     return (prog->regflags & RF_HASNL);
 686 }
 687
 688 /*
 689  * Return TRUE if compiled regular expression "prog" looks before the start
 690  * position (pattern contains "\@<=" or "\@<!").
 691  */
 692     int
 693 re_lookbehind(prog)
 694     regprog_T *prog;
 695 {
 696     return (prog->regflags & RF_LOOKBH);
 697 }
 698
 699 /*
 700  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 701  * Returns a character representing the class. Zero means that no item was
 702  * recognized.  Otherwise "pp" is advanced to after the item.
 703  */
 704     static int
 705 get_equi_class(pp)
 706     char_u      **pp;
 707 {
 708     int         c;
 709     int         l = 1;
 710     char_u      *p = *pp;
 711
 712     if (p[1] == '=')
 713     {
 714 #ifdef FEAT_MBYTE
 715         if (has_mbyte)
 716             l = (*mb_ptr2len)(p + 2);
 717 #endif
 718         if (p[l + 2] == '=' && p[l + 3] == ']')
 719         {
 720 #ifdef FEAT_MBYTE
 721             if (has_mbyte)
 722                 c = mb_ptr2char(p + 2);
 723             else
 724 #endif
 725                 c = p[2];
 726             *pp += l + 4;
 727             return c;
 728         }
 729     }
 730     return 0;
 731 }
 732
 733 /*
 734  * Produce the bytes for equivalence class "c".
 735  * Currently only handles latin1, latin9 and utf-8.
 736  */
 737     static void
 738 reg_equi_class(c)
 739     int     c;
 740 {
 741 #ifdef FEAT_MBYTE
 742     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 743                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 744 #endif
 745     {
 746         switch (c)
 747         {
 748             case 'A': case '\300': case '\301': case '\302':
 749             case '\303': case '\304': case '\305':
 750                       regmbc('A'); regmbc('\300'); regmbc('\301');
 751                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 752                       regmbc('\305');
 753                       return;
 754             case 'C': case '\307':
 755                       regmbc('C'); regmbc('\307');
 756                       return;
 757             case 'E': case '\310': case '\311': case '\312': case '\313':
 758                       regmbc('E'); regmbc('\310'); regmbc('\311');
 759                       regmbc('\312'); regmbc('\313');
 760                       return;
 761             case 'I': case '\314': case '\315': case '\316': case '\317':
 762                       regmbc('I'); regmbc('\314'); regmbc('\315');
 763                       regmbc('\316'); regmbc('\317');
 764                       return;
 765             case 'N': case '\321':
 766                       regmbc('N'); regmbc('\321');
 767                       return;
 768             case 'O': case '\322': case '\323': case '\324': case '\325':
 769             case '\326':
 770                       regmbc('O'); regmbc('\322'); regmbc('\323');
 771                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 772                       return;
 773             case 'U': case '\331': case '\332': case '\333': case '\334':
 774                       regmbc('U'); regmbc('\331'); regmbc('\332');
 775                       regmbc('\333'); regmbc('\334');
 776                       return;
 777             case 'Y': case '\335':
 778                       regmbc('Y'); regmbc('\335');
 779                       return;
 780             case 'a': case '\340': case '\341': case '\342':
 781             case '\343': case '\344': case '\345':
 782                       regmbc('a'); regmbc('\340'); regmbc('\341');
 783                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 784                       regmbc('\345');
 785                       return;
 786             case 'c': case '\347':
 787                       regmbc('c'); regmbc('\347');
 788                       return;
 789             case 'e': case '\350': case '\351': case '\352': case '\353':
 790                       regmbc('e'); regmbc('\350'); regmbc('\351');
 791                       regmbc('\352'); regmbc('\353');
 792                       return;
 793             case 'i': case '\354': case '\355': case '\356': case '\357':
 794                       regmbc('i'); regmbc('\354'); regmbc('\355');
 795                       regmbc('\356'); regmbc('\357');
 796                       return;
 797             case 'n': case '\361':
 798                       regmbc('n'); regmbc('\361');
 799                       return;
 800             case 'o': case '\362': case '\363': case '\364': case '\365':
 801             case '\366':
 802                       regmbc('o'); regmbc('\362'); regmbc('\363');
 803                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 804                       return;
 805             case 'u': case '\371': case '\372': case '\373': case '\374':
 806                       regmbc('u'); regmbc('\371'); regmbc('\372');
 807                       regmbc('\373'); regmbc('\374');
 808                       return;
 809             case 'y': case '\375': case '\377':
 810                       regmbc('y'); regmbc('\375'); regmbc('\377');
 811                       return;
 812         }
 813     }
 814     regmbc(c);
 815 }
 816
 817 /*
 818  * Check for a collating element "[.a.]".  "pp" points to the '['.
 819  * Returns a character. Zero means that no item was recognized.  Otherwise
 820  * "pp" is advanced to after the item.
 821  * Currently only single characters are recognized!
 822  */
 823     static int
 824 get_coll_element(pp)
 825     char_u      **pp;
 826 {
 827     int         c;
 828     int         l = 1;
 829     char_u      *p = *pp;
 830
 831     if (p[1] == '.')
 832     {
 833 #ifdef FEAT_MBYTE
 834         if (has_mbyte)
 835             l = (*mb_ptr2len)(p + 2);
 836 #endif
 837         if (p[l + 2] == '.' && p[l + 3] == ']')
 838         {
 839 #ifdef FEAT_MBYTE
 840             if (has_mbyte)
 841                 c = mb_ptr2char(p + 2);
 842             else
 843 #endif
 844                 c = p[2];
 845             *pp += l + 4;
 846             return c;
 847         }
 848     }
 849     return 0;
 850 }
 851
 852
 853 /*
 854  * Skip over a "[]" range.
 855  * "p" must point to the character after the '['.
 856  * The returned pointer is on the matching ']', or the terminating NUL.
 857  */
 858     static char_u *
 859 skip_anyof(p)
 860     char_u      *p;
 861 {
 862     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 863     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 864 #ifdef FEAT_MBYTE
 865     int         l;
 866 #endif
 867
 868     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 869     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 870
 871     if (*p == '^')      /* Complement of range. */
 872         ++p;
 873     if (*p == ']' || *p == '-')
 874         ++p;
 875     while (*p != NUL && *p != ']')
 876     {
 877 #ifdef FEAT_MBYTE
 878         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 879             p += l;
 880         else
 881 #endif
 882             if (*p == '-')
 883             {
 884                 ++p;
 885                 if (*p != ']' && *p != NUL)
 886                     mb_ptr_adv(p);
 887             }
 888         else if (*p == '\\'
 889                 && !cpo_bsl
 890                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 891                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 892             p += 2;
 893         else if (*p == '[')
 894         {
 895             if (get_char_class(&p) == CLASS_NONE
 896                     && get_equi_class(&p) == 0
 897                     && get_coll_element(&p) == 0)
 898                 ++p; /* It was not a class name */
 899         }
 900         else
 901             ++p;
 902     }
 903
 904     return p;
 905 }
 906
 907 /*
 908  * Skip past regular expression.
 909  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 910  * Take care of characters with a backslash in front of it.
 911  * Skip strings inside [ and ].
 912  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 913  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 914  * is changed in-place.
 915  */
 916     char_u *
 917 skip_regexp(startp, dirc, magic, newp)
 918     char_u      *startp;
 919     int         dirc;
 920     int         magic;
 921     char_u      **newp;
 922 {
 923     int         mymagic;
 924     char_u      *p = startp;
 925
 926     if (magic)
 927         mymagic = MAGIC_ON;
 928     else
 929         mymagic = MAGIC_OFF;
 930
 931     for (; p[0] != NUL; mb_ptr_adv(p))
 932     {
 933         if (p[0] == dirc)       /* found end of regexp */
 934             break;
 935         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 936                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 937         {
 938             p = skip_anyof(p + 1);
 939             if (p[0] == NUL)
 940                 break;
 941         }
 942         else if (p[0] == '\\' && p[1] != NUL)
 943         {
 944             if (dirc == '?' && newp != NULL && p[1] == '?')
 945             {
 946                 /* change "\?" to "?", make a copy first. */
 947                 if (*newp == NULL)
 948                 {
 949                     *newp = vim_strsave(startp);
 950                     if (*newp != NULL)
 951                         p = *newp + (p - startp);
 952                 }
 953                 if (*newp != NULL)
 954                     mch_memmove(p, p + 1, STRLEN(p));
 955                 else
 956                     ++p;
 957             }
 958             else
 959                 ++p;    /* skip next character */
 960             if (*p == 'v')
 961                 mymagic = MAGIC_ALL;
 962             else if (*p == 'V')
 963                 mymagic = MAGIC_NONE;
 964         }
 965     }
 966     return p;
 967 }
 968
 969 /*
 970  * vim_regcomp() - compile a regular expression into internal code
 971  * Returns the program in allocated space.  Returns NULL for an error.
 972  *
 973  * We can't allocate space until we know how big the compiled form will be,
 974  * but we can't compile it (and thus know how big it is) until we've got a
 975  * place to put the code.  So we cheat:  we compile it twice, once with code
 976  * generation turned off and size counting turned on, and once "for real".
 977  * This also means that we don't allocate space until we are sure that the
 978  * thing really will compile successfully, and we never have to move the
 979  * code and thus invalidate pointers into it.  (Note that it has to be in
 980  * one piece because vim_free() must be able to free it all.)
 981  *
 982  * Whether upper/lower case is to be ignored is decided when executing the
 983  * program, it does not matter here.
 984  *
 985  * Beware that the optimization-preparation code in here knows about some
 986  * of the structure of the compiled regexp.
 987  * "re_flags": RE_MAGIC and/or RE_STRING.
 988  */
 989     regprog_T *
 990 vim_regcomp(expr, re_flags)
 991     char_u      *expr;
 992     int         re_flags;
 993 {
 994     regprog_T   *r;
 995     char_u      *scan;
 996     char_u      *longest;
 997     int         len;
 998     int         flags;
 999
1000     if (expr == NULL)
1001         EMSG_RET_NULL(_(e_null));
1002
1003     init_class_tab();
1004
1005     /*
1006      * First pass: determine size, legality.
1007      */
1008     regcomp_start(expr, re_flags);
1009     regcode = JUST_CALC_SIZE;
1010     regc(REGMAGIC);
1011     if (reg(REG_NOPAREN, &flags) == NULL)
1012         return NULL;
1013
1014     /* Small enough for pointer-storage convention? */
1015 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1016     if (regsize >= 65536L - 256L)
1017         EMSG_RET_NULL(_("E339: Pattern too long"));
1018 #endif
1019
1020     /* Allocate space. */
1021     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1022     if (r == NULL)
1023         return NULL;
1024
1025     /*
1026      * Second pass: emit code.
1027      */
1028     regcomp_start(expr, re_flags);
1029     regcode = r->program;
1030     regc(REGMAGIC);
1031     if (reg(REG_NOPAREN, &flags) == NULL)
1032     {
1033         vim_free(r);
1034         return NULL;
1035     }
1036
1037     /* Dig out information for optimizations. */
1038     r->regstart = NUL;          /* Worst-case defaults. */
1039     r->reganch = 0;
1040     r->regmust = NULL;
1041     r->regmlen = 0;
1042     r->regflags = regflags;
1043     if (flags & HASNL)
1044         r->regflags |= RF_HASNL;
1045     if (flags & HASLOOKBH)
1046         r->regflags |= RF_LOOKBH;
1047 #ifdef FEAT_SYN_HL
1048     /* Remember whether this pattern has any \z specials in it. */
1049     r->reghasz = re_has_z;
1050 #endif
1051     scan = r->program + 1;      /* First BRANCH. */
1052     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1053     {
1054         scan = OPERAND(scan);
1055
1056         /* Starting-point info. */
1057         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1058         {
1059             r->reganch++;
1060             scan = regnext(scan);
1061         }
1062
1063         if (OP(scan) == EXACTLY)
1064         {
1065 #ifdef FEAT_MBYTE
1066             if (has_mbyte)
1067                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1068             else
1069 #endif
1070                 r->regstart = *OPERAND(scan);
1071         }
1072         else if ((OP(scan) == BOW
1073                     || OP(scan) == EOW
1074                     || OP(scan) == NOTHING
1075                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1076                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1077                  && OP(regnext(scan)) == EXACTLY)
1078         {
1079 #ifdef FEAT_MBYTE
1080             if (has_mbyte)
1081                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1082             else
1083 #endif
1084                 r->regstart = *OPERAND(regnext(scan));
1085         }
1086
1087         /*
1088          * If there's something expensive in the r.e., find the longest
1089          * literal string that must appear and make it the regmust.  Resolve
1090          * ties in favor of later strings, since the regstart check works
1091          * with the beginning of the r.e. and avoiding duplication
1092          * strengthens checking.  Not a strong reason, but sufficient in the
1093          * absence of others.
1094          */
1095         /*
1096          * When the r.e. starts with BOW, it is faster to look for a regmust
1097          * first. Used a lot for "#" and "*" commands. (Added by mool).
1098          */
1099         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1100                                                           && !(flags & HASNL))
1101         {
1102             longest = NULL;
1103             len = 0;
1104             for (; scan != NULL; scan = regnext(scan))
1105                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1106                 {
1107                     longest = OPERAND(scan);
1108                     len = (int)STRLEN(OPERAND(scan));
1109                 }
1110             r->regmust = longest;
1111             r->regmlen = len;
1112         }
1113     }
1114 #ifdef DEBUG
1115     regdump(expr, r);
1116 #endif
1117     return r;
1118 }
1119
1120 /*
1121  * Setup to parse the regexp.  Used once to get the length and once to do it.
1122  */
1123     static void
1124 regcomp_start(expr, re_flags)
1125     char_u      *expr;
1126     int         re_flags;           /* see vim_regcomp() */
1127 {
1128     initchr(expr);
1129     if (re_flags & RE_MAGIC)
1130         reg_magic = MAGIC_ON;
1131     else
1132         reg_magic = MAGIC_OFF;
1133     reg_string = (re_flags & RE_STRING);
1134     reg_strict = (re_flags & RE_STRICT);
1135
1136     num_complex_braces = 0;
1137     regnpar = 1;
1138     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1139 #ifdef FEAT_SYN_HL
1140     regnzpar = 1;
1141     re_has_z = 0;
1142 #endif
1143     regsize = 0L;
1144     regflags = 0;
1145 #if defined(FEAT_SYN_HL) || defined(PROTO)
1146     had_eol = FALSE;
1147 #endif
1148 }
1149
1150 #if defined(FEAT_SYN_HL) || defined(PROTO)
1151 /*
1152  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1153  * found.  This is messy, but it works fine.
1154  */
1155     int
1156 vim_regcomp_had_eol()
1157 {
1158     return had_eol;
1159 }
1160 #endif
1161
1162 /*
1163  * reg - regular expression, i.e. main body or parenthesized thing
1164  *
1165  * Caller must absorb opening parenthesis.
1166  *
1167  * Combining parenthesis handling with the base level of regular expression
1168  * is a trifle forced, but the need to tie the tails of the branches to what
1169  * follows makes it hard to avoid.
1170  */
1171     static char_u *
1172 reg(paren, flagp)
1173     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1174     int         *flagp;
1175 {
1176     char_u      *ret;
1177     char_u      *br;
1178     char_u      *ender;
1179     int         parno = 0;
1180     int         flags;
1181
1182     *flagp = HASWIDTH;          /* Tentatively. */
1183
1184 #ifdef FEAT_SYN_HL
1185     if (paren == REG_ZPAREN)
1186     {
1187         /* Make a ZOPEN node. */
1188         if (regnzpar >= NSUBEXP)
1189             EMSG_RET_NULL(_("E50: Too many \\z("));
1190         parno = regnzpar;
1191         regnzpar++;
1192         ret = regnode(ZOPEN + parno);
1193     }
1194     else
1195 #endif
1196         if (paren == REG_PAREN)
1197     {
1198         /* Make a MOPEN node. */
1199         if (regnpar >= NSUBEXP)
1200             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1201         parno = regnpar;
1202         ++regnpar;
1203         ret = regnode(MOPEN + parno);
1204     }
1205     else if (paren == REG_NPAREN)
1206     {
1207         /* Make a NOPEN node. */
1208         ret = regnode(NOPEN);
1209     }
1210     else
1211         ret = NULL;
1212
1213     /* Pick up the branches, linking them together. */
1214     br = regbranch(&flags);
1215     if (br == NULL)
1216         return NULL;
1217     if (ret != NULL)
1218         regtail(ret, br);       /* [MZ]OPEN -> first. */
1219     else
1220         ret = br;
1221     /* If one of the branches can be zero-width, the whole thing can.
1222      * If one of the branches has * at start or matches a line-break, the
1223      * whole thing can. */
1224     if (!(flags & HASWIDTH))
1225         *flagp &= ~HASWIDTH;
1226     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1227     while (peekchr() == Magic('|'))
1228     {
1229         skipchr();
1230         br = regbranch(&flags);
1231         if (br == NULL)
1232             return NULL;
1233         regtail(ret, br);       /* BRANCH -> BRANCH. */
1234         if (!(flags & HASWIDTH))
1235             *flagp &= ~HASWIDTH;
1236         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1237     }
1238
1239     /* Make a closing node, and hook it on the end. */
1240     ender = regnode(
1241 #ifdef FEAT_SYN_HL
1242             paren == REG_ZPAREN ? ZCLOSE + parno :
1243 #endif
1244             paren == REG_PAREN ? MCLOSE + parno :
1245             paren == REG_NPAREN ? NCLOSE : END);
1246     regtail(ret, ender);
1247
1248     /* Hook the tails of the branches to the closing node. */
1249     for (br = ret; br != NULL; br = regnext(br))
1250         regoptail(br, ender);
1251
1252     /* Check for proper termination. */
1253     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1254     {
1255 #ifdef FEAT_SYN_HL
1256         if (paren == REG_ZPAREN)
1257             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1258         else
1259 #endif
1260             if (paren == REG_NPAREN)
1261             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1262         else
1263             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1264     }
1265     else if (paren == REG_NOPAREN && peekchr() != NUL)
1266     {
1267         if (curchr == Magic(')'))
1268             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1269         else
1270             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1271         /* NOTREACHED */
1272     }
1273     /*
1274      * Here we set the flag allowing back references to this set of
1275      * parentheses.
1276      */
1277     if (paren == REG_PAREN)
1278         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1279     return ret;
1280 }
1281
1282 /*
1283  * Handle one alternative of an | operator.
1284  * Implements the & operator.
1285  */
1286     static char_u *
1287 regbranch(flagp)
1288     int         *flagp;
1289 {
1290     char_u      *ret;
1291     char_u      *chain = NULL;
1292     char_u      *latest;
1293     int         flags;
1294
1295     *flagp = WORST | HASNL;             /* Tentatively. */
1296
1297     ret = regnode(BRANCH);
1298     for (;;)
1299     {
1300         latest = regconcat(&flags);
1301         if (latest == NULL)
1302             return NULL;
1303         /* If one of the branches has width, the whole thing has.  If one of
1304          * the branches anchors at start-of-line, the whole thing does.
1305          * If one of the branches uses look-behind, the whole thing does. */
1306         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1307         /* If one of the branches doesn't match a line-break, the whole thing
1308          * doesn't. */
1309         *flagp &= ~HASNL | (flags & HASNL);
1310         if (chain != NULL)
1311             regtail(chain, latest);
1312         if (peekchr() != Magic('&'))
1313             break;
1314         skipchr();
1315         regtail(latest, regnode(END)); /* operand ends */
1316         reginsert(MATCH, latest);
1317         chain = latest;
1318     }
1319
1320     return ret;
1321 }
1322
1323 /*
1324  * Handle one alternative of an | or & operator.
1325  * Implements the concatenation operator.
1326  */
1327     static char_u *
1328 regconcat(flagp)
1329     int         *flagp;
1330 {
1331     char_u      *first = NULL;
1332     char_u      *chain = NULL;
1333     char_u      *latest;
1334     int         flags;
1335     int         cont = TRUE;
1336
1337     *flagp = WORST;             /* Tentatively. */
1338
1339     while (cont)
1340     {
1341         switch (peekchr())
1342         {
1343             case NUL:
1344             case Magic('|'):
1345             case Magic('&'):
1346             case Magic(')'):
1347                             cont = FALSE;
1348                             break;
1349             case Magic('Z'):
1350 #ifdef FEAT_MBYTE
1351                             regflags |= RF_ICOMBINE;
1352 #endif
1353                             skipchr_keepstart();
1354                             break;
1355             case Magic('c'):
1356                             regflags |= RF_ICASE;
1357                             skipchr_keepstart();
1358                             break;
1359             case Magic('C'):
1360                             regflags |= RF_NOICASE;
1361                             skipchr_keepstart();
1362                             break;
1363             case Magic('v'):
1364                             reg_magic = MAGIC_ALL;
1365                             skipchr_keepstart();
1366                             curchr = -1;
1367                             break;
1368             case Magic('m'):
1369                             reg_magic = MAGIC_ON;
1370                             skipchr_keepstart();
1371                             curchr = -1;
1372                             break;
1373             case Magic('M'):
1374                             reg_magic = MAGIC_OFF;
1375                             skipchr_keepstart();
1376                             curchr = -1;
1377                             break;
1378             case Magic('V'):
1379                             reg_magic = MAGIC_NONE;
1380                             skipchr_keepstart();
1381                             curchr = -1;
1382                             break;
1383             default:
1384                             latest = regpiece(&flags);
1385                             if (latest == NULL)
1386                                 return NULL;
1387                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1388                             if (chain == NULL)  /* First piece. */
1389                                 *flagp |= flags & SPSTART;
1390                             else
1391                                 regtail(chain, latest);
1392                             chain = latest;
1393                             if (first == NULL)
1394                                 first = latest;
1395                             break;
1396         }
1397     }
1398     if (first == NULL)          /* Loop ran zero times. */
1399         first = regnode(NOTHING);
1400     return first;
1401 }
1402
1403 /*
1404  * regpiece - something followed by possible [*+=]
1405  *
1406  * Note that the branching code sequences used for = and the general cases
1407  * of * and + are somewhat optimized:  they use the same NOTHING node as
1408  * both the endmarker for their branch list and the body of the last branch.
1409  * It might seem that this node could be dispensed with entirely, but the
1410  * endmarker role is not redundant.
1411  */
1412     static char_u *
1413 regpiece(flagp)
1414     int             *flagp;
1415 {
1416     char_u          *ret;
1417     int             op;
1418     char_u          *next;
1419     int             flags;
1420     long            minval;
1421     long            maxval;
1422
1423     ret = regatom(&flags);
1424     if (ret == NULL)
1425         return NULL;
1426
1427     op = peekchr();
1428     if (re_multi_type(op) == NOT_MULTI)
1429     {
1430         *flagp = flags;
1431         return ret;
1432     }
1433     /* default flags */
1434     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1435
1436     skipchr();
1437     switch (op)
1438     {
1439         case Magic('*'):
1440             if (flags & SIMPLE)
1441                 reginsert(STAR, ret);
1442             else
1443             {
1444                 /* Emit x* as (x&|), where & means "self". */
1445                 reginsert(BRANCH, ret); /* Either x */
1446                 regoptail(ret, regnode(BACK));  /* and loop */
1447                 regoptail(ret, ret);    /* back */
1448                 regtail(ret, regnode(BRANCH));  /* or */
1449                 regtail(ret, regnode(NOTHING)); /* null. */
1450             }
1451             break;
1452
1453         case Magic('+'):
1454             if (flags & SIMPLE)
1455                 reginsert(PLUS, ret);
1456             else
1457             {
1458                 /* Emit x+ as x(&|), where & means "self". */
1459                 next = regnode(BRANCH); /* Either */
1460                 regtail(ret, next);
1461                 regtail(regnode(BACK), ret);    /* loop back */
1462                 regtail(next, regnode(BRANCH)); /* or */
1463                 regtail(ret, regnode(NOTHING)); /* null. */
1464             }
1465             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1466             break;
1467
1468         case Magic('@'):
1469             {
1470                 int     lop = END;
1471
1472                 switch (no_Magic(getchr()))
1473                 {
1474                     case '=': lop = MATCH; break;                 /* \@= */
1475                     case '!': lop = NOMATCH; break;               /* \@! */
1476                     case '>': lop = SUBPAT; break;                /* \@> */
1477                     case '<': switch (no_Magic(getchr()))
1478                               {
1479                                   case '=': lop = BEHIND; break;   /* \@<= */
1480                                   case '!': lop = NOBEHIND; break; /* \@<! */
1481                               }
1482                 }
1483                 if (lop == END)
1484                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1485                                                       reg_magic == MAGIC_ALL);
1486                 /* Look behind must match with behind_pos. */
1487                 if (lop == BEHIND || lop == NOBEHIND)
1488                 {
1489                     regtail(ret, regnode(BHPOS));
1490                     *flagp |= HASLOOKBH;
1491                 }
1492                 regtail(ret, regnode(END)); /* operand ends */
1493                 reginsert(lop, ret);
1494                 break;
1495             }
1496
1497         case Magic('?'):
1498         case Magic('='):
1499             /* Emit x= as (x|) */
1500             reginsert(BRANCH, ret);             /* Either x */
1501             regtail(ret, regnode(BRANCH));      /* or */
1502             next = regnode(NOTHING);            /* null. */
1503             regtail(ret, next);
1504             regoptail(ret, next);
1505             break;
1506
1507         case Magic('{'):
1508             if (!read_limits(&minval, &maxval))
1509                 return NULL;
1510             if (flags & SIMPLE)
1511             {
1512                 reginsert(BRACE_SIMPLE, ret);
1513                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1514             }
1515             else
1516             {
1517                 if (num_complex_braces >= 10)
1518                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1519                                                       reg_magic == MAGIC_ALL);
1520                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1521                 regoptail(ret, regnode(BACK));
1522                 regoptail(ret, ret);
1523                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1524                 ++num_complex_braces;
1525             }
1526             if (minval > 0 && maxval > 0)
1527                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1528             break;
1529     }
1530     if (re_multi_type(peekchr()) != NOT_MULTI)
1531     {
1532         /* Can't have a multi follow a multi. */
1533         if (peekchr() == Magic('*'))
1534             sprintf((char *)IObuff, _("E61: Nested %s*"),
1535                                             reg_magic >= MAGIC_ON ? "" : "\\");
1536         else
1537             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1538                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1539         EMSG_RET_NULL(IObuff);
1540     }
1541
1542     return ret;
1543 }
1544
1545 /*
1546  * regatom - the lowest level
1547  *
1548  * Optimization:  gobbles an entire sequence of ordinary characters so that
1549  * it can turn them into a single node, which is smaller to store and
1550  * faster to run.  Don't do this when one_exactly is set.
1551  */
1552     static char_u *
1553 regatom(flagp)
1554     int            *flagp;
1555 {
1556     char_u          *ret;
1557     int             flags;
1558     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1559     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1560     int             c;
1561     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1562     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1563                                     FNAME, SFNAME, PRINT, SPRINT,
1564                                     WHITE, NWHITE, DIGIT, NDIGIT,
1565                                     HEX, NHEX, OCTAL, NOCTAL,
1566                                     WORD, NWORD, HEAD, NHEAD,
1567                                     ALPHA, NALPHA, LOWER, NLOWER,
1568                                     UPPER, NUPPER
1569                                     };
1570     char_u          *p;
1571     int             extra = 0;
1572
1573     *flagp = WORST;             /* Tentatively. */
1574     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1575     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1576
1577     c = getchr();
1578     switch (c)
1579     {
1580       case Magic('^'):
1581         ret = regnode(BOL);
1582         break;
1583
1584       case Magic('$'):
1585         ret = regnode(EOL);
1586 #if defined(FEAT_SYN_HL) || defined(PROTO)
1587         had_eol = TRUE;
1588 #endif
1589         break;
1590
1591       case Magic('<'):
1592         ret = regnode(BOW);
1593         break;
1594
1595       case Magic('>'):
1596         ret = regnode(EOW);
1597         break;
1598
1599       case Magic('_'):
1600         c = no_Magic(getchr());
1601         if (c == '^')           /* "\_^" is start-of-line */
1602         {
1603             ret = regnode(BOL);
1604             break;
1605         }
1606         if (c == '$')           /* "\_$" is end-of-line */
1607         {
1608             ret = regnode(EOL);
1609 #if defined(FEAT_SYN_HL) || defined(PROTO)
1610             had_eol = TRUE;
1611 #endif
1612             break;
1613         }
1614
1615         extra = ADD_NL;
1616         *flagp |= HASNL;
1617
1618         /* "\_[" is character range plus newline */
1619         if (c == '[')
1620             goto collection;
1621
1622         /* "\_x" is character class plus newline */
1623         /*FALLTHROUGH*/
1624
1625         /*
1626          * Character classes.
1627          */
1628       case Magic('.'):
1629       case Magic('i'):
1630       case Magic('I'):
1631       case Magic('k'):
1632       case Magic('K'):
1633       case Magic('f'):
1634       case Magic('F'):
1635       case Magic('p'):
1636       case Magic('P'):
1637       case Magic('s'):
1638       case Magic('S'):
1639       case Magic('d'):
1640       case Magic('D'):
1641       case Magic('x'):
1642       case Magic('X'):
1643       case Magic('o'):
1644       case Magic('O'):
1645       case Magic('w'):
1646       case Magic('W'):
1647       case Magic('h'):
1648       case Magic('H'):
1649       case Magic('a'):
1650       case Magic('A'):
1651       case Magic('l'):
1652       case Magic('L'):
1653       case Magic('u'):
1654       case Magic('U'):
1655         p = vim_strchr(classchars, no_Magic(c));
1656         if (p == NULL)
1657             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1658 #ifdef FEAT_MBYTE
1659         /* When '.' is followed by a composing char ignore the dot, so that
1660          * the composing char is matched here. */
1661         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1662         {
1663             c = getchr();
1664             goto do_multibyte;
1665         }
1666 #endif
1667         ret = regnode(classcodes[p - classchars] + extra);
1668         *flagp |= HASWIDTH | SIMPLE;
1669         break;
1670
1671       case Magic('n'):
1672         if (reg_string)
1673         {
1674             /* In a string "\n" matches a newline character. */
1675             ret = regnode(EXACTLY);
1676             regc(NL);
1677             regc(NUL);
1678             *flagp |= HASWIDTH | SIMPLE;
1679         }
1680         else
1681         {
1682             /* In buffer text "\n" matches the end of a line. */
1683             ret = regnode(NEWL);
1684             *flagp |= HASWIDTH | HASNL;
1685         }
1686         break;
1687
1688       case Magic('('):
1689         if (one_exactly)
1690             EMSG_ONE_RET_NULL;
1691         ret = reg(REG_PAREN, &flags);
1692         if (ret == NULL)
1693             return NULL;
1694         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1695         break;
1696
1697       case NUL:
1698       case Magic('|'):
1699       case Magic('&'):
1700       case Magic(')'):
1701         if (one_exactly)
1702             EMSG_ONE_RET_NULL;
1703         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1704         /* NOTREACHED */
1705
1706       case Magic('='):
1707       case Magic('?'):
1708       case Magic('+'):
1709       case Magic('@'):
1710       case Magic('{'):
1711       case Magic('*'):
1712         c = no_Magic(c);
1713         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1714                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1715                 ? "" : "\\", c);
1716         EMSG_RET_NULL(IObuff);
1717         /* NOTREACHED */
1718
1719       case Magic('~'):          /* previous substitute pattern */
1720             if (reg_prev_sub != NULL)
1721             {
1722                 char_u      *lp;
1723
1724                 ret = regnode(EXACTLY);
1725                 lp = reg_prev_sub;
1726                 while (*lp != NUL)
1727                     regc(*lp++);
1728                 regc(NUL);
1729                 if (*reg_prev_sub != NUL)
1730                 {
1731                     *flagp |= HASWIDTH;
1732                     if ((lp - reg_prev_sub) == 1)
1733                         *flagp |= SIMPLE;
1734                 }
1735             }
1736             else
1737                 EMSG_RET_NULL(_(e_nopresub));
1738             break;
1739
1740       case Magic('1'):
1741       case Magic('2'):
1742       case Magic('3'):
1743       case Magic('4'):
1744       case Magic('5'):
1745       case Magic('6'):
1746       case Magic('7'):
1747       case Magic('8'):
1748       case Magic('9'):
1749             {
1750                 int                 refnum;
1751
1752                 refnum = c - Magic('0');
1753                 /*
1754                  * Check if the back reference is legal. We must have seen the
1755                  * close brace.
1756                  * TODO: Should also check that we don't refer to something
1757                  * that is repeated (+*=): what instance of the repetition
1758                  * should we match?
1759                  */
1760                 if (!had_endbrace[refnum])
1761                 {
1762                     /* Trick: check if "@<=" or "@<!" follows, in which case
1763                      * the \1 can appear before the referenced match. */
1764                     for (p = regparse; *p != NUL; ++p)
1765                         if (p[0] == '@' && p[1] == '<'
1766                                               && (p[2] == '!' || p[2] == '='))
1767                             break;
1768                     if (*p == NUL)
1769                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1770                 }
1771                 ret = regnode(BACKREF + refnum);
1772             }
1773             break;
1774
1775       case Magic('z'):
1776         {
1777             c = no_Magic(getchr());
1778             switch (c)
1779             {
1780 #ifdef FEAT_SYN_HL
1781                 case '(': if (reg_do_extmatch != REX_SET)
1782                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1783                           if (one_exactly)
1784                               EMSG_ONE_RET_NULL;
1785                           ret = reg(REG_ZPAREN, &flags);
1786                           if (ret == NULL)
1787                               return NULL;
1788                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1789                           re_has_z = REX_SET;
1790                           break;
1791
1792                 case '1':
1793                 case '2':
1794                 case '3':
1795                 case '4':
1796                 case '5':
1797                 case '6':
1798                 case '7':
1799                 case '8':
1800                 case '9': if (reg_do_extmatch != REX_USE)
1801                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1802                           ret = regnode(ZREF + c - '0');
1803                           re_has_z = REX_USE;
1804                           break;
1805 #endif
1806
1807                 case 's': ret = regnode(MOPEN + 0);
1808                           break;
1809
1810                 case 'e': ret = regnode(MCLOSE + 0);
1811                           break;
1812
1813                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1814             }
1815         }
1816         break;
1817
1818       case Magic('%'):
1819         {
1820             c = no_Magic(getchr());
1821             switch (c)
1822             {
1823                 /* () without a back reference */
1824                 case '(':
1825                     if (one_exactly)
1826                         EMSG_ONE_RET_NULL;
1827                     ret = reg(REG_NPAREN, &flags);
1828                     if (ret == NULL)
1829                         return NULL;
1830                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1831                     break;
1832
1833                 /* Catch \%^ and \%$ regardless of where they appear in the
1834                  * pattern -- regardless of whether or not it makes sense. */
1835                 case '^':
1836                     ret = regnode(RE_BOF);
1837                     break;
1838
1839                 case '$':
1840                     ret = regnode(RE_EOF);
1841                     break;
1842
1843                 case '#':
1844                     ret = regnode(CURSOR);
1845                     break;
1846
1847                 case 'V':
1848                     ret = regnode(RE_VISUAL);
1849                     break;
1850
1851                 /* \%[abc]: Emit as a list of branches, all ending at the last
1852                  * branch which matches nothing. */
1853                 case '[':
1854                           if (one_exactly)      /* doesn't nest */
1855                               EMSG_ONE_RET_NULL;
1856                           {
1857                               char_u    *lastbranch;
1858                               char_u    *lastnode = NULL;
1859                               char_u    *br;
1860
1861                               ret = NULL;
1862                               while ((c = getchr()) != ']')
1863                               {
1864                                   if (c == NUL)
1865                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1866                                                       reg_magic == MAGIC_ALL);
1867                                   br = regnode(BRANCH);
1868                                   if (ret == NULL)
1869                                       ret = br;
1870                                   else
1871                                       regtail(lastnode, br);
1872
1873                                   ungetchr();
1874                                   one_exactly = TRUE;
1875                                   lastnode = regatom(flagp);
1876                                   one_exactly = FALSE;
1877                                   if (lastnode == NULL)
1878                                       return NULL;
1879                               }
1880                               if (ret == NULL)
1881                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1882                                                       reg_magic == MAGIC_ALL);
1883                               lastbranch = regnode(BRANCH);
1884                               br = regnode(NOTHING);
1885                               if (ret != JUST_CALC_SIZE)
1886                               {
1887                                   regtail(lastnode, br);
1888                                   regtail(lastbranch, br);
1889                                   /* connect all branches to the NOTHING
1890                                    * branch at the end */
1891                                   for (br = ret; br != lastnode; )
1892                                   {
1893                                       if (OP(br) == BRANCH)
1894                                       {
1895                                           regtail(br, lastbranch);
1896                                           br = OPERAND(br);
1897                                       }
1898                                       else
1899                                           br = regnext(br);
1900                                   }
1901                               }
1902                               *flagp &= ~HASWIDTH;
1903                               break;
1904                           }
1905
1906                 case 'd':   /* %d123 decimal */
1907                 case 'o':   /* %o123 octal */
1908                 case 'x':   /* %xab hex 2 */
1909                 case 'u':   /* %uabcd hex 4 */
1910                 case 'U':   /* %U1234abcd hex 8 */
1911                           {
1912                               int i;
1913
1914                               switch (c)
1915                               {
1916                                   case 'd': i = getdecchrs(); break;
1917                                   case 'o': i = getoctchrs(); break;
1918                                   case 'x': i = gethexchrs(2); break;
1919                                   case 'u': i = gethexchrs(4); break;
1920                                   case 'U': i = gethexchrs(8); break;
1921                                   default:  i = -1; break;
1922                               }
1923
1924                               if (i < 0)
1925                                   EMSG_M_RET_NULL(
1926                                         _("E678: Invalid character after %s%%[dxouU]"),
1927                                         reg_magic == MAGIC_ALL);
1928 #ifdef FEAT_MBYTE
1929                               if (use_multibytecode(i))
1930                                   ret = regnode(MULTIBYTECODE);
1931                               else
1932 #endif
1933                                   ret = regnode(EXACTLY);
1934                               if (i == 0)
1935                                   regc(0x0a);
1936                               else
1937 #ifdef FEAT_MBYTE
1938                                   regmbc(i);
1939 #else
1940                                   regc(i);
1941 #endif
1942                               regc(NUL);
1943                               *flagp |= HASWIDTH;
1944                               break;
1945                           }
1946
1947                 default:
1948                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1949                                                                  || c == '\'')
1950                           {
1951                               long_u    n = 0;
1952                               int       cmp;
1953
1954                               cmp = c;
1955                               if (cmp == '<' || cmp == '>')
1956                                   c = getchr();
1957                               while (VIM_ISDIGIT(c))
1958                               {
1959                                   n = n * 10 + (c - '0');
1960                                   c = getchr();
1961                               }
1962                               if (c == '\'' && n == 0)
1963                               {
1964                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1965                                   c = getchr();
1966                                   ret = regnode(RE_MARK);
1967                                   if (ret == JUST_CALC_SIZE)
1968                                       regsize += 2;
1969                                   else
1970                                   {
1971                                       *regcode++ = c;
1972                                       *regcode++ = cmp;
1973                                   }
1974                                   break;
1975                               }
1976                               else if (c == 'l' || c == 'c' || c == 'v')
1977                               {
1978                                   if (c == 'l')
1979                                       ret = regnode(RE_LNUM);
1980                                   else if (c == 'c')
1981                                       ret = regnode(RE_COL);
1982                                   else
1983                                       ret = regnode(RE_VCOL);
1984                                   if (ret == JUST_CALC_SIZE)
1985                                       regsize += 5;
1986                                   else
1987                                   {
1988                                       /* put the number and the optional
1989                                        * comparator after the opcode */
1990                                       regcode = re_put_long(regcode, n);
1991                                       *regcode++ = cmp;
1992                                   }
1993                                   break;
1994                               }
1995                           }
1996
1997                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1998                                                       reg_magic == MAGIC_ALL);
1999             }
2000         }
2001         break;
2002
2003       case Magic('['):
2004 collection:
2005         {
2006             char_u      *lp;
2007
2008             /*
2009              * If there is no matching ']', we assume the '[' is a normal
2010              * character.  This makes 'incsearch' and ":help [" work.
2011              */
2012             lp = skip_anyof(regparse);
2013             if (*lp == ']')     /* there is a matching ']' */
2014             {
2015                 int     startc = -1;    /* > 0 when next '-' is a range */
2016                 int     endc;
2017
2018                 /*
2019                  * In a character class, different parsing rules apply.
2020                  * Not even \ is special anymore, nothing is.
2021                  */
2022                 if (*regparse == '^')       /* Complement of range. */
2023                 {
2024                     ret = regnode(ANYBUT + extra);
2025                     regparse++;
2026                 }
2027                 else
2028                     ret = regnode(ANYOF + extra);
2029
2030                 /* At the start ']' and '-' mean the literal character. */
2031                 if (*regparse == ']' || *regparse == '-')
2032                 {
2033                     startc = *regparse;
2034                     regc(*regparse++);
2035                 }
2036
2037                 while (*regparse != NUL && *regparse != ']')
2038                 {
2039                     if (*regparse == '-')
2040                     {
2041                         ++regparse;
2042                         /* The '-' is not used for a range at the end and
2043                          * after or before a '\n'. */
2044                         if (*regparse == ']' || *regparse == NUL
2045                                 || startc == -1
2046                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2047                         {
2048                             regc('-');
2049                             startc = '-';       /* [--x] is a range */
2050                         }
2051                         else
2052                         {
2053                             /* Also accept "a-[.z.]" */
2054                             endc = 0;
2055                             if (*regparse == '[')
2056                                 endc = get_coll_element(&regparse);
2057                             if (endc == 0)
2058                             {
2059 #ifdef FEAT_MBYTE
2060                                 if (has_mbyte)
2061                                     endc = mb_ptr2char_adv(&regparse);
2062                                 else
2063 #endif
2064                                     endc = *regparse++;
2065                             }
2066
2067                             /* Handle \o40, \x20 and \u20AC style sequences */
2068                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2069                                 endc = coll_get_char();
2070
2071                             if (startc > endc)
2072                                 EMSG_RET_NULL(_(e_invrange));
2073 #ifdef FEAT_MBYTE
2074                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2075                                                  || (*mb_char2len)(endc) > 1))
2076                             {
2077                                 /* Limit to a range of 256 chars */
2078                                 if (endc > startc + 256)
2079                                     EMSG_RET_NULL(_(e_invrange));
2080                                 while (++startc <= endc)
2081                                     regmbc(startc);
2082                             }
2083                             else
2084 #endif
2085                             {
2086 #ifdef EBCDIC
2087                                 int     alpha_only = FALSE;
2088
2089                                 /* for alphabetical range skip the gaps
2090                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2091                                 if (isalpha(startc) && isalpha(endc))
2092                                     alpha_only = TRUE;
2093 #endif
2094                                 while (++startc <= endc)
2095 #ifdef EBCDIC
2096                                     if (!alpha_only || isalpha(startc))
2097 #endif
2098                                         regc(startc);
2099                             }
2100                             startc = -1;
2101                         }
2102                     }
2103                     /*
2104                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2105                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2106                      * 'cpoptions' is not included.
2107                      * Posix doesn't recognize backslash at all.
2108                      */
2109                     else if (*regparse == '\\'
2110                             && !cpo_bsl
2111                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2112                                 || (!cpo_lit
2113                                     && vim_strchr(REGEXP_ABBR,
2114                                                        regparse[1]) != NULL)))
2115                     {
2116                         regparse++;
2117                         if (*regparse == 'n')
2118                         {
2119                             /* '\n' in range: also match NL */
2120                             if (ret != JUST_CALC_SIZE)
2121                             {
2122                                 if (*ret == ANYBUT)
2123                                     *ret = ANYBUT + ADD_NL;
2124                                 else if (*ret == ANYOF)
2125                                     *ret = ANYOF + ADD_NL;
2126                                 /* else: must have had a \n already */
2127                             }
2128                             *flagp |= HASNL;
2129                             regparse++;
2130                             startc = -1;
2131                         }
2132                         else if (*regparse == 'd'
2133                                 || *regparse == 'o'
2134                                 || *regparse == 'x'
2135                                 || *regparse == 'u'
2136                                 || *regparse == 'U')
2137                         {
2138                             startc = coll_get_char();
2139                             if (startc == 0)
2140                                 regc(0x0a);
2141                             else
2142 #ifdef FEAT_MBYTE
2143                                 regmbc(startc);
2144 #else
2145                                 regc(startc);
2146 #endif
2147                         }
2148                         else
2149                         {
2150                             startc = backslash_trans(*regparse++);
2151                             regc(startc);
2152                         }
2153                     }
2154                     else if (*regparse == '[')
2155                     {
2156                         int c_class;
2157                         int cu;
2158
2159                         c_class = get_char_class(&regparse);
2160                         startc = -1;
2161                         /* Characters assumed to be 8 bits! */
2162                         switch (c_class)
2163                         {
2164                             case CLASS_NONE:
2165                                 c_class = get_equi_class(&regparse);
2166                                 if (c_class != 0)
2167                                 {
2168                                     /* produce equivalence class */
2169                                     reg_equi_class(c_class);
2170                                 }
2171                                 else if ((c_class =
2172                                             get_coll_element(&regparse)) != 0)
2173                                 {
2174                                     /* produce a collating element */
2175                                     regmbc(c_class);
2176                                 }
2177                                 else
2178                                 {
2179                                     /* literal '[', allow [[-x] as a range */
2180                                     startc = *regparse++;
2181                                     regc(startc);
2182                                 }
2183                                 break;
2184                             case CLASS_ALNUM:
2185                                 for (cu = 1; cu <= 255; cu++)
2186                                     if (isalnum(cu))
2187                                         regc(cu);
2188                                 break;
2189                             case CLASS_ALPHA:
2190                                 for (cu = 1; cu <= 255; cu++)
2191                                     if (isalpha(cu))
2192                                         regc(cu);
2193                                 break;
2194                             case CLASS_BLANK:
2195                                 regc(' ');
2196                                 regc('\t');
2197                                 break;
2198                             case CLASS_CNTRL:
2199                                 for (cu = 1; cu <= 255; cu++)
2200                                     if (iscntrl(cu))
2201                                         regc(cu);
2202                                 break;
2203                             case CLASS_DIGIT:
2204                                 for (cu = 1; cu <= 255; cu++)
2205                                     if (VIM_ISDIGIT(cu))
2206                                         regc(cu);
2207                                 break;
2208                             case CLASS_GRAPH:
2209                                 for (cu = 1; cu <= 255; cu++)
2210                                     if (isgraph(cu))
2211                                         regc(cu);
2212                                 break;
2213                             case CLASS_LOWER:
2214                                 for (cu = 1; cu <= 255; cu++)
2215                                     if (MB_ISLOWER(cu))
2216                                         regc(cu);
2217                                 break;
2218                             case CLASS_PRINT:
2219                                 for (cu = 1; cu <= 255; cu++)
2220                                     if (vim_isprintc(cu))
2221                                         regc(cu);
2222                                 break;
2223                             case CLASS_PUNCT:
2224                                 for (cu = 1; cu <= 255; cu++)
2225                                     if (ispunct(cu))
2226                                         regc(cu);
2227                                 break;
2228                             case CLASS_SPACE:
2229                                 for (cu = 9; cu <= 13; cu++)
2230                                     regc(cu);
2231                                 regc(' ');
2232                                 break;
2233                             case CLASS_UPPER:
2234                                 for (cu = 1; cu <= 255; cu++)
2235                                     if (MB_ISUPPER(cu))
2236                                         regc(cu);
2237                                 break;
2238                             case CLASS_XDIGIT:
2239                                 for (cu = 1; cu <= 255; cu++)
2240                                     if (vim_isxdigit(cu))
2241                                         regc(cu);
2242                                 break;
2243                             case CLASS_TAB:
2244                                 regc('\t');
2245                                 break;
2246                             case CLASS_RETURN:
2247                                 regc('\r');
2248                                 break;
2249                             case CLASS_BACKSPACE:
2250                                 regc('\b');
2251                                 break;
2252                             case CLASS_ESCAPE:
2253                                 regc('\033');
2254                                 break;
2255                         }
2256                     }
2257                     else
2258                     {
2259 #ifdef FEAT_MBYTE
2260                         if (has_mbyte)
2261                         {
2262                             int len;
2263
2264                             /* produce a multibyte character, including any
2265                              * following composing characters */
2266                             startc = mb_ptr2char(regparse);
2267                             len = (*mb_ptr2len)(regparse);
2268                             if (enc_utf8 && utf_char2len(startc) != len)
2269                                 startc = -1;    /* composing chars */
2270                             while (--len >= 0)
2271                                 regc(*regparse++);
2272                         }
2273                         else
2274 #endif
2275                         {
2276                             startc = *regparse++;
2277                             regc(startc);
2278                         }
2279                     }
2280                 }
2281                 regc(NUL);
2282                 prevchr_len = 1;        /* last char was the ']' */
2283                 if (*regparse != ']')
2284                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2285                 skipchr();          /* let's be friends with the lexer again */
2286                 *flagp |= HASWIDTH | SIMPLE;
2287                 break;
2288             }
2289             else if (reg_strict)
2290                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2291                                                        reg_magic > MAGIC_OFF);
2292         }
2293         /* FALLTHROUGH */
2294
2295       default:
2296         {
2297             int         len;
2298
2299 #ifdef FEAT_MBYTE
2300             /* A multi-byte character is handled as a separate atom if it's
2301              * before a multi and when it's a composing char. */
2302             if (use_multibytecode(c))
2303             {
2304 do_multibyte:
2305                 ret = regnode(MULTIBYTECODE);
2306                 regmbc(c);
2307                 *flagp |= HASWIDTH | SIMPLE;
2308                 break;
2309             }
2310 #endif
2311
2312             ret = regnode(EXACTLY);
2313
2314             /*
2315              * Append characters as long as:
2316              * - there is no following multi, we then need the character in
2317              *   front of it as a single character operand
2318              * - not running into a Magic character
2319              * - "one_exactly" is not set
2320              * But always emit at least one character.  Might be a Multi,
2321              * e.g., a "[" without matching "]".
2322              */
2323             for (len = 0; c != NUL && (len == 0
2324                         || (re_multi_type(peekchr()) == NOT_MULTI
2325                             && !one_exactly
2326                             && !is_Magic(c))); ++len)
2327             {
2328                 c = no_Magic(c);
2329 #ifdef FEAT_MBYTE
2330                 if (has_mbyte)
2331                 {
2332                     regmbc(c);
2333                     if (enc_utf8)
2334                     {
2335                         int     l;
2336
2337                         /* Need to get composing character too. */
2338                         for (;;)
2339                         {
2340                             l = utf_ptr2len(regparse);
2341                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2342                                 break;
2343                             regmbc(utf_ptr2char(regparse));
2344                             skipchr();
2345                         }
2346                     }
2347                 }
2348                 else
2349 #endif
2350                     regc(c);
2351                 c = getchr();
2352             }
2353             ungetchr();
2354
2355             regc(NUL);
2356             *flagp |= HASWIDTH;
2357             if (len == 1)
2358                 *flagp |= SIMPLE;
2359         }
2360         break;
2361     }
2362
2363     return ret;
2364 }
2365
2366 #ifdef FEAT_MBYTE
2367 /*
2368  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2369  * character "c".
2370  */
2371     static int
2372 use_multibytecode(c)
2373     int c;
2374 {
2375     return has_mbyte && (*mb_char2len)(c) > 1
2376                      && (re_multi_type(peekchr()) != NOT_MULTI
2377                              || (enc_utf8 && utf_iscomposing(c)));
2378 }
2379 #endif
2380
2381 /*
2382  * emit a node
2383  * Return pointer to generated code.
2384  */
2385     static char_u *
2386 regnode(op)
2387     int         op;
2388 {
2389     char_u  *ret;
2390
2391     ret = regcode;
2392     if (ret == JUST_CALC_SIZE)
2393         regsize += 3;
2394     else
2395     {
2396         *regcode++ = op;
2397         *regcode++ = NUL;               /* Null "next" pointer. */
2398         *regcode++ = NUL;
2399     }
2400     return ret;
2401 }
2402
2403 /*
2404  * Emit (if appropriate) a byte of code
2405  */
2406     static void
2407 regc(b)
2408     int         b;
2409 {
2410     if (regcode == JUST_CALC_SIZE)
2411         regsize++;
2412     else
2413         *regcode++ = b;
2414 }
2415
2416 #ifdef FEAT_MBYTE
2417 /*
2418  * Emit (if appropriate) a multi-byte character of code
2419  */
2420     static void
2421 regmbc(c)
2422     int         c;
2423 {
2424     if (regcode == JUST_CALC_SIZE)
2425         regsize += (*mb_char2len)(c);
2426     else
2427         regcode += (*mb_char2bytes)(c, regcode);
2428 }
2429 #endif
2430
2431 /*
2432  * reginsert - insert an operator in front of already-emitted operand
2433  *
2434  * Means relocating the operand.
2435  */
2436     static void
2437 reginsert(op, opnd)
2438     int         op;
2439     char_u     *opnd;
2440 {
2441     char_u      *src;
2442     char_u      *dst;
2443     char_u      *place;
2444
2445     if (regcode == JUST_CALC_SIZE)
2446     {
2447         regsize += 3;
2448         return;
2449     }
2450     src = regcode;
2451     regcode += 3;
2452     dst = regcode;
2453     while (src > opnd)
2454         *--dst = *--src;
2455
2456     place = opnd;               /* Op node, where operand used to be. */
2457     *place++ = op;
2458     *place++ = NUL;
2459     *place = NUL;
2460 }
2461
2462 /*
2463  * reginsert_limits - insert an operator in front of already-emitted operand.
2464  * The operator has the given limit values as operands.  Also set next pointer.
2465  *
2466  * Means relocating the operand.
2467  */
2468     static void
2469 reginsert_limits(op, minval, maxval, opnd)
2470     int         op;
2471     long        minval;
2472     long        maxval;
2473     char_u      *opnd;
2474 {
2475     char_u      *src;
2476     char_u      *dst;
2477     char_u      *place;
2478
2479     if (regcode == JUST_CALC_SIZE)
2480     {
2481         regsize += 11;
2482         return;
2483     }
2484     src = regcode;
2485     regcode += 11;
2486     dst = regcode;
2487     while (src > opnd)
2488         *--dst = *--src;
2489
2490     place = opnd;               /* Op node, where operand used to be. */
2491     *place++ = op;
2492     *place++ = NUL;
2493     *place++ = NUL;
2494     place = re_put_long(place, (long_u)minval);
2495     place = re_put_long(place, (long_u)maxval);
2496     regtail(opnd, place);
2497 }
2498
2499 /*
2500  * Write a long as four bytes at "p" and return pointer to the next char.
2501  */
2502     static char_u *
2503 re_put_long(p, val)
2504     char_u      *p;
2505     long_u      val;
2506 {
2507     *p++ = (char_u) ((val >> 24) & 0377);
2508     *p++ = (char_u) ((val >> 16) & 0377);
2509     *p++ = (char_u) ((val >> 8) & 0377);
2510     *p++ = (char_u) (val & 0377);
2511     return p;
2512 }
2513
2514 /*
2515  * regtail - set the next-pointer at the end of a node chain
2516  */
2517     static void
2518 regtail(p, val)
2519     char_u      *p;
2520     char_u      *val;
2521 {
2522     char_u      *scan;
2523     char_u      *temp;
2524     int         offset;
2525
2526     if (p == JUST_CALC_SIZE)
2527         return;
2528
2529     /* Find last node. */
2530     scan = p;
2531     for (;;)
2532     {
2533         temp = regnext(scan);
2534         if (temp == NULL)
2535             break;
2536         scan = temp;
2537     }
2538
2539     if (OP(scan) == BACK)
2540         offset = (int)(scan - val);
2541     else
2542         offset = (int)(val - scan);
2543     *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2544     *(scan + 2) = (char_u) (offset & 0377);
2545 }
2546
2547 /*
2548  * regoptail - regtail on item after a BRANCH; nop if none
2549  */
2550     static void
2551 regoptail(p, val)
2552     char_u      *p;
2553     char_u      *val;
2554 {
2555     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2556     if (p == NULL || p == JUST_CALC_SIZE
2557             || (OP(p) != BRANCH
2558                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2559         return;
2560     regtail(OPERAND(p), val);
2561 }
2562
2563 /*
2564  * getchr() - get the next character from the pattern. We know about
2565  * magic and such, so therefore we need a lexical analyzer.
2566  */
2567
2568 /* static int       curchr; */
2569 static int      prevprevchr;
2570 static int      prevchr;
2571 static int      nextchr;    /* used for ungetchr() */
2572 /*
2573  * Note: prevchr is sometimes -1 when we are not at the start,
2574  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2575  * taken to be magic -- webb
2576  */
2577 static int      at_start;       /* True when on the first character */
2578 static int      prev_at_start;  /* True when on the second character */
2579
2580     static void
2581 initchr(str)
2582     char_u *str;
2583 {
2584     regparse = str;
2585     prevchr_len = 0;
2586     curchr = prevprevchr = prevchr = nextchr = -1;
2587     at_start = TRUE;
2588     prev_at_start = FALSE;
2589 }
2590
2591     static int
2592 peekchr()
2593 {
2594     static int  after_slash = FALSE;
2595
2596     if (curchr == -1)
2597     {
2598         switch (curchr = regparse[0])
2599         {
2600         case '.':
2601         case '[':
2602         case '~':
2603             /* magic when 'magic' is on */
2604             if (reg_magic >= MAGIC_ON)
2605                 curchr = Magic(curchr);
2606             break;
2607         case '(':
2608         case ')':
2609         case '{':
2610         case '%':
2611         case '+':
2612         case '=':
2613         case '?':
2614         case '@':
2615         case '!':
2616         case '&':
2617         case '|':
2618         case '<':
2619         case '>':
2620         case '#':       /* future ext. */
2621         case '"':       /* future ext. */
2622         case '\'':      /* future ext. */
2623         case ',':       /* future ext. */
2624         case '-':       /* future ext. */
2625         case ':':       /* future ext. */
2626         case ';':       /* future ext. */
2627         case '`':       /* future ext. */
2628         case '/':       /* Can't be used in / command */
2629             /* magic only after "\v" */
2630             if (reg_magic == MAGIC_ALL)
2631                 curchr = Magic(curchr);
2632             break;
2633         case '*':
2634             /* * is not magic as the very first character, eg "?*ptr", when
2635              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2636              * "\(\*" is not magic, thus must be magic if "after_slash" */
2637             if (reg_magic >= MAGIC_ON
2638                     && !at_start
2639                     && !(prev_at_start && prevchr == Magic('^'))
2640                     && (after_slash
2641                         || (prevchr != Magic('(')
2642                             && prevchr != Magic('&')
2643                             && prevchr != Magic('|'))))
2644                 curchr = Magic('*');
2645             break;
2646         case '^':
2647             /* '^' is only magic as the very first character and if it's after
2648              * "\(", "\|", "\&' or "\n" */
2649             if (reg_magic >= MAGIC_OFF
2650                     && (at_start
2651                         || reg_magic == MAGIC_ALL
2652                         || prevchr == Magic('(')
2653                         || prevchr == Magic('|')
2654                         || prevchr == Magic('&')
2655                         || prevchr == Magic('n')
2656                         || (no_Magic(prevchr) == '('
2657                             && prevprevchr == Magic('%'))))
2658             {
2659                 curchr = Magic('^');
2660                 at_start = TRUE;
2661                 prev_at_start = FALSE;
2662             }
2663             break;
2664         case '$':
2665             /* '$' is only magic as the very last char and if it's in front of
2666              * either "\|", "\)", "\&", or "\n" */
2667             if (reg_magic >= MAGIC_OFF)
2668             {
2669                 char_u *p = regparse + 1;
2670
2671                 /* ignore \c \C \m and \M after '$' */
2672                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2673                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2674                     p += 2;
2675                 if (p[0] == NUL
2676                         || (p[0] == '\\'
2677                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2678                                 || p[1] == 'n'))
2679                         || reg_magic == MAGIC_ALL)
2680                     curchr = Magic('$');
2681             }
2682             break;
2683         case '\\':
2684             {
2685                 int c = regparse[1];
2686
2687                 if (c == NUL)
2688                     curchr = '\\';      /* trailing '\' */
2689                 else if (
2690 #ifdef EBCDIC
2691                         vim_strchr(META, c)
2692 #else
2693                         c <= '~' && META_flags[c]
2694 #endif
2695                         )
2696                 {
2697                     /*
2698                      * META contains everything that may be magic sometimes,
2699                      * except ^ and $ ("\^" and "\$" are only magic after
2700                      * "\v").  We now fetch the next character and toggle its
2701                      * magicness.  Therefore, \ is so meta-magic that it is
2702                      * not in META.
2703                      */
2704                     curchr = -1;
2705                     prev_at_start = at_start;
2706                     at_start = FALSE;   /* be able to say "/\*ptr" */
2707                     ++regparse;
2708                     ++after_slash;
2709                     peekchr();
2710                     --regparse;
2711                     --after_slash;
2712                     curchr = toggle_Magic(curchr);
2713                 }
2714                 else if (vim_strchr(REGEXP_ABBR, c))
2715                 {
2716                     /*
2717                      * Handle abbreviations, like "\t" for TAB -- webb
2718                      */
2719                     curchr = backslash_trans(c);
2720                 }
2721                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2722                     curchr = toggle_Magic(c);
2723                 else
2724                 {
2725                     /*
2726                      * Next character can never be (made) magic?
2727                      * Then backslashing it won't do anything.
2728                      */
2729 #ifdef FEAT_MBYTE
2730                     if (has_mbyte)
2731                         curchr = (*mb_ptr2char)(regparse + 1);
2732                     else
2733 #endif
2734                         curchr = c;
2735                 }
2736                 break;
2737             }
2738
2739 #ifdef FEAT_MBYTE
2740         default:
2741             if (has_mbyte)
2742                 curchr = (*mb_ptr2char)(regparse);
2743 #endif
2744         }
2745     }
2746
2747     return curchr;
2748 }
2749
2750 /*
2751  * Eat one lexed character.  Do this in a way that we can undo it.
2752  */
2753     static void
2754 skipchr()
2755 {
2756     /* peekchr() eats a backslash, do the same here */
2757     if (*regparse == '\\')
2758         prevchr_len = 1;
2759     else
2760         prevchr_len = 0;
2761     if (regparse[prevchr_len] != NUL)
2762     {
2763 #ifdef FEAT_MBYTE
2764         if (enc_utf8)
2765             /* exclude composing chars that mb_ptr2len does include */
2766             prevchr_len += utf_ptr2len(regparse + prevchr_len);
2767         else if (has_mbyte)
2768             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2769         else
2770 #endif
2771             ++prevchr_len;
2772     }
2773     regparse += prevchr_len;
2774     prev_at_start = at_start;
2775     at_start = FALSE;
2776     prevprevchr = prevchr;
2777     prevchr = curchr;
2778     curchr = nextchr;       /* use previously unget char, or -1 */
2779     nextchr = -1;
2780 }
2781
2782 /*
2783  * Skip a character while keeping the value of prev_at_start for at_start.
2784  * prevchr and prevprevchr are also kept.
2785  */
2786     static void
2787 skipchr_keepstart()
2788 {
2789     int as = prev_at_start;
2790     int pr = prevchr;
2791     int prpr = prevprevchr;
2792
2793     skipchr();
2794     at_start = as;
2795     prevchr = pr;
2796     prevprevchr = prpr;
2797 }
2798
2799     static int
2800 getchr()
2801 {
2802     int chr = peekchr();
2803
2804     skipchr();
2805     return chr;
2806 }
2807
2808 /*
2809  * put character back.  Works only once!
2810  */
2811     static void
2812 ungetchr()
2813 {
2814     nextchr = curchr;
2815     curchr = prevchr;
2816     prevchr = prevprevchr;
2817     at_start = prev_at_start;
2818     prev_at_start = FALSE;
2819
2820     /* Backup regparse, so that it's at the same position as before the
2821      * getchr(). */
2822     regparse -= prevchr_len;
2823 }
2824
2825 /*
2826  * Get and return the value of the hex string at the current position.
2827  * Return -1 if there is no valid hex number.
2828  * The position is updated:
2829  *     blahblah\%x20asdf
2830  *         before-^ ^-after
2831  * The parameter controls the maximum number of input characters. This will be
2832  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2833  */
2834     static int
2835 gethexchrs(maxinputlen)
2836     int         maxinputlen;
2837 {
2838     int         nr = 0;
2839     int         c;
2840     int         i;
2841
2842     for (i = 0; i < maxinputlen; ++i)
2843     {
2844         c = regparse[0];
2845         if (!vim_isxdigit(c))
2846             break;
2847         nr <<= 4;
2848         nr |= hex2nr(c);
2849         ++regparse;
2850     }
2851
2852     if (i == 0)
2853         return -1;
2854     return nr;
2855 }
2856
2857 /*
2858  * get and return the value of the decimal string immediately after the
2859  * current position. Return -1 for invalid.  Consumes all digits.
2860  */
2861     static int
2862 getdecchrs()
2863 {
2864     int         nr = 0;
2865     int         c;
2866     int         i;
2867
2868     for (i = 0; ; ++i)
2869     {
2870         c = regparse[0];
2871         if (c < '0' || c > '9')
2872             break;
2873         nr *= 10;
2874         nr += c - '0';
2875         ++regparse;
2876     }
2877
2878     if (i == 0)
2879         return -1;
2880     return nr;
2881 }
2882
2883 /*
2884  * get and return the value of the octal string immediately after the current
2885  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2886  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2887  * treat 8 or 9 as recognised characters. Position is updated:
2888  *     blahblah\%o210asdf
2889  *         before-^  ^-after
2890  */
2891     static int
2892 getoctchrs()
2893 {
2894     int         nr = 0;
2895     int         c;
2896     int         i;
2897
2898     for (i = 0; i < 3 && nr < 040; ++i)
2899     {
2900         c = regparse[0];
2901         if (c < '0' || c > '7')
2902             break;
2903         nr <<= 3;
2904         nr |= hex2nr(c);
2905         ++regparse;
2906     }
2907
2908     if (i == 0)
2909         return -1;
2910     return nr;
2911 }
2912
2913 /*
2914  * Get a number after a backslash that is inside [].
2915  * When nothing is recognized return a backslash.
2916  */
2917     static int
2918 coll_get_char()
2919 {
2920     int     nr = -1;
2921
2922     switch (*regparse++)
2923     {
2924         case 'd': nr = getdecchrs(); break;
2925         case 'o': nr = getoctchrs(); break;
2926         case 'x': nr = gethexchrs(2); break;
2927         case 'u': nr = gethexchrs(4); break;
2928         case 'U': nr = gethexchrs(8); break;
2929     }
2930     if (nr < 0)
2931     {
2932         /* If getting the number fails be backwards compatible: the character
2933          * is a backslash. */
2934         --regparse;
2935         nr = '\\';
2936     }
2937     return nr;
2938 }
2939
2940 /*
2941  * read_limits - Read two integers to be taken as a minimum and maximum.
2942  * If the first character is '-', then the range is reversed.
2943  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2944  * missing, a very big number is the default.
2945  */
2946     static int
2947 read_limits(minval, maxval)
2948     long        *minval;
2949     long        *maxval;
2950 {
2951     int         reverse = FALSE;
2952     char_u      *first_char;
2953     long        tmp;
2954
2955     if (*regparse == '-')
2956     {
2957         /* Starts with '-', so reverse the range later */
2958         regparse++;
2959         reverse = TRUE;
2960     }
2961     first_char = regparse;
2962     *minval = getdigits(&regparse);
2963     if (*regparse == ',')           /* There is a comma */
2964     {
2965         if (vim_isdigit(*++regparse))
2966             *maxval = getdigits(&regparse);
2967         else
2968             *maxval = MAX_LIMIT;
2969     }
2970     else if (VIM_ISDIGIT(*first_char))
2971         *maxval = *minval;          /* It was \{n} or \{-n} */
2972     else
2973         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2974     if (*regparse == '\\')
2975         regparse++;     /* Allow either \{...} or \{...\} */
2976     if (*regparse != '}')
2977     {
2978         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2979                                           reg_magic == MAGIC_ALL ? "" : "\\");
2980         EMSG_RET_FAIL(IObuff);
2981     }
2982
2983     /*
2984      * Reverse the range if there was a '-', or make sure it is in the right
2985      * order otherwise.
2986      */
2987     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2988     {
2989         tmp = *minval;
2990         *minval = *maxval;
2991         *maxval = tmp;
2992     }
2993     skipchr();          /* let's be friends with the lexer again */
2994     return OK;
2995 }
2996
2997 /*
2998  * vim_regexec and friends
2999  */
3000
3001 /*
3002  * Global work variables for vim_regexec().
3003  */
3004
3005 /* The current match-position is remembered with these variables: */
3006 static linenr_T reglnum;        /* line number, relative to first line */
3007 static char_u   *regline;       /* start of current line */
3008 static char_u   *reginput;      /* current input, points into "regline" */
3009
3010 static int      need_clear_subexpr;     /* subexpressions still need to be
3011                                          * cleared */
3012 #ifdef FEAT_SYN_HL
3013 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3014                                                  * still need to be cleared */
3015 #endif
3016
3017 /*
3018  * Structure used to save the current input state, when it needs to be
3019  * restored after trying a match.  Used by reg_save() and reg_restore().
3020  * Also stores the length of "backpos".
3021  */
3022 typedef struct
3023 {
3024     union
3025     {
3026         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3027         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3028     } rs_u;
3029     int         rs_len;
3030 } regsave_T;
3031
3032 /* struct to save start/end pointer/position in for \(\) */
3033 typedef struct
3034 {
3035     union
3036     {
3037         char_u  *ptr;
3038         lpos_T  pos;
3039     } se_u;
3040 } save_se_T;
3041
3042 /* used for BEHIND and NOBEHIND matching */
3043 typedef struct regbehind_S
3044 {
3045     regsave_T   save_after;
3046     regsave_T   save_behind;
3047     int         save_need_clear_subexpr;
3048     save_se_T   save_start[NSUBEXP];
3049     save_se_T   save_end[NSUBEXP];
3050 } regbehind_T;
3051
3052 static char_u   *reg_getline __ARGS((linenr_T lnum));
3053 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3054 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3055 static void     cleanup_subexpr __ARGS((void));
3056 #ifdef FEAT_SYN_HL
3057 static void     cleanup_zsubexpr __ARGS((void));
3058 #endif
3059 static void     save_subexpr __ARGS((regbehind_T *bp));
3060 static void     restore_subexpr __ARGS((regbehind_T *bp));
3061 static void     reg_nextline __ARGS((void));
3062 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3063 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3064 static int      reg_save_equal __ARGS((regsave_T *save));
3065 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3066 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3067
3068 /* Save the sub-expressions before attempting a match. */
3069 #define save_se(savep, posp, pp) \
3070     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3071
3072 /* After a failed match restore the sub-expressions. */
3073 #define restore_se(savep, posp, pp) { \
3074     if (REG_MULTI) \
3075         *(posp) = (savep)->se_u.pos; \
3076     else \
3077         *(pp) = (savep)->se_u.ptr; }
3078
3079 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3080 static int      regmatch __ARGS((char_u *prog));
3081 static int      regrepeat __ARGS((char_u *p, long maxcount));
3082
3083 #ifdef DEBUG
3084 int             regnarrate = 0;
3085 #endif
3086
3087 /*
3088  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3089  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3090  * contains '\c' or '\C' the value is overruled.
3091  */
3092 static int      ireg_ic;
3093
3094 #ifdef FEAT_MBYTE
3095 /*
3096  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3097  * in the regexp.  Defaults to false, always.
3098  */
3099 static int      ireg_icombine;
3100 #endif
3101
3102 /*
3103  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3104  * there is no maximum.
3105  */
3106 static colnr_T  ireg_maxcol;
3107
3108 /*
3109  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3110  * slow, we keep one allocated piece of memory and only re-allocate it when
3111  * it's too small.  It's freed in vim_regexec_both() when finished.
3112  */
3113 static char_u   *reg_tofree = NULL;
3114 static unsigned reg_tofreelen;
3115
3116 /*
3117  * These variables are set when executing a regexp to speed up the execution.
3118  * Which ones are set depends on whether a single-line or multi-line match is
3119  * done:
3120  *                      single-line             multi-line
3121  * reg_match            &regmatch_T             NULL
3122  * reg_mmatch           NULL                    &regmmatch_T
3123  * reg_startp           reg_match->startp       <invalid>
3124  * reg_endp             reg_match->endp         <invalid>
3125  * reg_startpos         <invalid>               reg_mmatch->startpos
3126  * reg_endpos           <invalid>               reg_mmatch->endpos
3127  * reg_win              NULL                    window in which to search
3128  * reg_buf              <invalid>               buffer in which to search
3129  * reg_firstlnum        <invalid>               first line in which to search
3130  * reg_maxline          0                       last line nr
3131  * reg_line_lbr         FALSE or TRUE           FALSE
3132  */
3133 static regmatch_T       *reg_match;
3134 static regmmatch_T      *reg_mmatch;
3135 static char_u           **reg_startp = NULL;
3136 static char_u           **reg_endp = NULL;
3137 static lpos_T           *reg_startpos = NULL;
3138 static lpos_T           *reg_endpos = NULL;
3139 static win_T            *reg_win;
3140 static buf_T            *reg_buf;
3141 static linenr_T         reg_firstlnum;
3142 static linenr_T         reg_maxline;
3143 static int              reg_line_lbr;       /* "\n" in string is line break */
3144
3145 /* Values for rs_state in regitem_T. */
3146 typedef enum regstate_E
3147 {
3148     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3149     , RS_MOPEN          /* MOPEN + [0-9] */
3150     , RS_MCLOSE         /* MCLOSE + [0-9] */
3151 #ifdef FEAT_SYN_HL
3152     , RS_ZOPEN          /* ZOPEN + [0-9] */
3153     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3154 #endif
3155     , RS_BRANCH         /* BRANCH */
3156     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3157     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3158     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3159     , RS_NOMATCH        /* NOMATCH */
3160     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3161     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3162     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3163     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3164 } regstate_T;
3165
3166 /*
3167  * When there are alternatives a regstate_T is put on the regstack to remember
3168  * what we are doing.
3169  * Before it may be another type of item, depending on rs_state, to remember
3170  * more things.
3171  */
3172 typedef struct regitem_S
3173 {
3174     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3175     char_u      *rs_scan;       /* current node in program */
3176     union
3177     {
3178         save_se_T  sesave;
3179         regsave_T  regsave;
3180     } rs_un;                    /* room for saving reginput */
3181     short       rs_no;          /* submatch nr or BEHIND/NOBEHIND */
3182 } regitem_T;
3183
3184 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3185 static void regstack_pop __ARGS((char_u **scan));
3186
3187 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3188 typedef struct regstar_S
3189 {
3190     int         nextb;          /* next byte */
3191     int         nextb_ic;       /* next byte reverse case */
3192     long        count;
3193     long        minval;
3194     long        maxval;
3195 } regstar_T;
3196
3197 /* used to store input position when a BACK was encountered, so that we now if
3198  * we made any progress since the last time. */
3199 typedef struct backpos_S
3200 {
3201     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3202     regsave_T   bp_pos;         /* last input position */
3203 } backpos_T;
3204
3205 /*
3206  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3207  * to avoid invoking malloc() and free() often.
3208  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3209  * or regbehind_T.
3210  * "backpos_T" is a table with backpos_T for BACK
3211  */
3212 static garray_T regstack = {0, 0, 0, 0, NULL};
3213 static garray_T backpos = {0, 0, 0, 0, NULL};
3214
3215 /*
3216  * Both for regstack and backpos tables we use the following strategy of
3217  * allocation (to reduce malloc/free calls):
3218  * - Initial size is fairly small.
3219  * - When needed, the tables are grown bigger (8 times at first, double after
3220  *   that).
3221  * - After executing the match we free the memory only if the array has grown.
3222  *   Thus the memory is kept allocated when it's at the initial size.
3223  * This makes it fast while not keeping a lot of memory allocated.
3224  * A three times speed increase was observed when using many simple patterns.
3225  */
3226 #define REGSTACK_INITIAL        2048
3227 #define BACKPOS_INITIAL         64
3228
3229 #if defined(EXITFREE) || defined(PROTO)
3230     void
3231 free_regexp_stuff()
3232 {
3233     ga_clear(&regstack);
3234     ga_clear(&backpos);
3235     vim_free(reg_tofree);
3236     vim_free(reg_prev_sub);
3237 }
3238 #endif
3239
3240 /*
3241  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3242  */
3243     static char_u *
3244 reg_getline(lnum)
3245     linenr_T    lnum;
3246 {
3247     /* when looking behind for a match/no-match lnum is negative.  But we
3248      * can't go before line 1 */
3249     if (reg_firstlnum + lnum < 1)
3250         return NULL;
3251     if (lnum > reg_maxline)
3252         /* Must have matched the "\n" in the last line. */
3253         return (char_u *)"";
3254     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3255 }
3256
3257 static regsave_T behind_pos;
3258
3259 #ifdef FEAT_SYN_HL
3260 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3261 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3262 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3263 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3264 #endif
3265
3266 /* TRUE if using multi-line regexp. */
3267 #define REG_MULTI       (reg_match == NULL)
3268
3269 /*
3270  * Match a regexp against a string.
3271  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3272  * Uses curbuf for line count and 'iskeyword'.
3273  *
3274  * Return TRUE if there is a match, FALSE if not.
3275  */
3276     int
3277 vim_regexec(rmp, line, col)
3278     regmatch_T  *rmp;
3279     char_u      *line;  /* string to match against */
3280     colnr_T     col;    /* column to start looking for match */
3281 {
3282     reg_match = rmp;
3283     reg_mmatch = NULL;
3284     reg_maxline = 0;
3285     reg_line_lbr = FALSE;
3286     reg_win = NULL;
3287     ireg_ic = rmp->rm_ic;
3288 #ifdef FEAT_MBYTE
3289     ireg_icombine = FALSE;
3290 #endif
3291     ireg_maxcol = 0;
3292     return (vim_regexec_both(line, col, NULL) != 0);
3293 }
3294
3295 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3296         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3297 /*
3298  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3299  */
3300     int
3301 vim_regexec_nl(rmp, line, col)
3302     regmatch_T  *rmp;
3303     char_u      *line;  /* string to match against */
3304     colnr_T     col;    /* column to start looking for match */
3305 {
3306     reg_match = rmp;
3307     reg_mmatch = NULL;
3308     reg_maxline = 0;
3309     reg_line_lbr = TRUE;
3310     reg_win = NULL;
3311     ireg_ic = rmp->rm_ic;
3312 #ifdef FEAT_MBYTE
3313     ireg_icombine = FALSE;
3314 #endif
3315     ireg_maxcol = 0;
3316     return (vim_regexec_both(line, col, NULL) != 0);
3317 }
3318 #endif
3319
3320 /*
3321  * Match a regexp against multiple lines.
3322  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3323  * Uses curbuf for line count and 'iskeyword'.
3324  *
3325  * Return zero if there is no match.  Return number of lines contained in the
3326  * match otherwise.
3327  */
3328     long
3329 vim_regexec_multi(rmp, win, buf, lnum, col, tm)
3330     regmmatch_T *rmp;
3331     win_T       *win;           /* window in which to search or NULL */
3332     buf_T       *buf;           /* buffer in which to search */
3333     linenr_T    lnum;           /* nr of line to start looking for match */
3334     colnr_T     col;            /* column to start looking for match */
3335     proftime_T  *tm;            /* timeout limit or NULL */
3336 {
3337     long        r;
3338     buf_T       *save_curbuf = curbuf;
3339
3340     reg_match = NULL;
3341     reg_mmatch = rmp;
3342     reg_buf = buf;
3343     reg_win = win;
3344     reg_firstlnum = lnum;
3345     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3346     reg_line_lbr = FALSE;
3347     ireg_ic = rmp->rmm_ic;
3348 #ifdef FEAT_MBYTE
3349     ireg_icombine = FALSE;
3350 #endif
3351     ireg_maxcol = rmp->rmm_maxcol;
3352
3353     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3354     curbuf = buf;
3355     r = vim_regexec_both(NULL, col, tm);
3356     curbuf = save_curbuf;
3357
3358     return r;
3359 }
3360
3361 /*
3362  * Match a regexp against a string ("line" points to the string) or multiple
3363  * lines ("line" is NULL, use reg_getline()).
3364  */
3365 /*ARGSUSED*/
3366     static long
3367 vim_regexec_both(line, col, tm)
3368     char_u      *line;
3369     colnr_T     col;            /* column to start looking for match */
3370     proftime_T  *tm;            /* timeout limit or NULL */
3371 {
3372     regprog_T   *prog;
3373     char_u      *s;
3374     long        retval = 0L;
3375
3376     /* Create "regstack" and "backpos" if they are not allocated yet.
3377      * We allocate *_INITIAL amount of bytes first and then set the grow size
3378      * to much bigger value to avoid many malloc calls in case of deep regular
3379      * expressions.  */
3380     if (regstack.ga_data == NULL)
3381     {
3382         /* Use an item size of 1 byte, since we push different things
3383          * onto the regstack. */
3384         ga_init2(&regstack, 1, REGSTACK_INITIAL);
3385         ga_grow(&regstack, REGSTACK_INITIAL);
3386         regstack.ga_growsize = REGSTACK_INITIAL * 8;
3387     }
3388
3389     if (backpos.ga_data == NULL)
3390     {
3391         ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3392         ga_grow(&backpos, BACKPOS_INITIAL);
3393         backpos.ga_growsize = BACKPOS_INITIAL * 8;
3394     }
3395
3396     if (REG_MULTI)
3397     {
3398         prog = reg_mmatch->regprog;
3399         line = reg_getline((linenr_T)0);
3400         reg_startpos = reg_mmatch->startpos;
3401         reg_endpos = reg_mmatch->endpos;
3402     }
3403     else
3404     {
3405         prog = reg_match->regprog;
3406         reg_startp = reg_match->startp;
3407         reg_endp = reg_match->endp;
3408     }
3409
3410     /* Be paranoid... */
3411     if (prog == NULL || line == NULL)
3412     {
3413         EMSG(_(e_null));
3414         goto theend;
3415     }
3416
3417     /* Check validity of program. */
3418     if (prog_magic_wrong())
3419         goto theend;
3420
3421     /* If the start column is past the maximum column: no need to try. */
3422     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3423         goto theend;
3424
3425     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3426     if (prog->regflags & RF_ICASE)
3427         ireg_ic = TRUE;
3428     else if (prog->regflags & RF_NOICASE)
3429         ireg_ic = FALSE;
3430
3431 #ifdef FEAT_MBYTE
3432     /* If pattern contains "\Z" overrule value of ireg_icombine */
3433     if (prog->regflags & RF_ICOMBINE)
3434         ireg_icombine = TRUE;
3435 #endif
3436
3437     /* If there is a "must appear" string, look for it. */
3438     if (prog->regmust != NULL)
3439     {
3440         int c;
3441
3442 #ifdef FEAT_MBYTE
3443         if (has_mbyte)
3444             c = (*mb_ptr2char)(prog->regmust);
3445         else
3446 #endif
3447             c = *prog->regmust;
3448         s = line + col;
3449
3450         /*
3451          * This is used very often, esp. for ":global".  Use three versions of
3452          * the loop to avoid overhead of conditions.
3453          */
3454         if (!ireg_ic
3455 #ifdef FEAT_MBYTE
3456                 && !has_mbyte
3457 #endif
3458                 )
3459             while ((s = vim_strbyte(s, c)) != NULL)
3460             {
3461                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3462                     break;              /* Found it. */
3463                 ++s;
3464             }
3465 #ifdef FEAT_MBYTE
3466         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3467             while ((s = vim_strchr(s, c)) != NULL)
3468             {
3469                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3470                     break;              /* Found it. */
3471                 mb_ptr_adv(s);
3472             }
3473 #endif
3474         else
3475             while ((s = cstrchr(s, c)) != NULL)
3476             {
3477                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3478                     break;              /* Found it. */
3479                 mb_ptr_adv(s);
3480             }
3481         if (s == NULL)          /* Not present. */
3482             goto theend;
3483     }
3484
3485     regline = line;
3486     reglnum = 0;
3487
3488     /* Simplest case: Anchored match need be tried only once. */
3489     if (prog->reganch)
3490     {
3491         int     c;
3492
3493 #ifdef FEAT_MBYTE
3494         if (has_mbyte)
3495             c = (*mb_ptr2char)(regline + col);
3496         else
3497 #endif
3498             c = regline[col];
3499         if (prog->regstart == NUL
3500                 || prog->regstart == c
3501                 || (ireg_ic && ((
3502 #ifdef FEAT_MBYTE
3503                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3504                         || (c < 255 && prog->regstart < 255 &&
3505 #endif
3506                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3507             retval = regtry(prog, col);
3508         else
3509             retval = 0;
3510     }
3511     else
3512     {
3513 #ifdef FEAT_RELTIME
3514         int tm_count = 0;
3515 #endif
3516         /* Messy cases:  unanchored match. */
3517         while (!got_int)
3518         {
3519             if (prog->regstart != NUL)
3520             {
3521                 /* Skip until the char we know it must start with.
3522                  * Used often, do some work to avoid call overhead. */
3523                 if (!ireg_ic
3524 #ifdef FEAT_MBYTE
3525                             && !has_mbyte
3526 #endif
3527                             )
3528                     s = vim_strbyte(regline + col, prog->regstart);
3529                 else
3530                     s = cstrchr(regline + col, prog->regstart);
3531                 if (s == NULL)
3532                 {
3533                     retval = 0;
3534                     break;
3535                 }
3536                 col = (int)(s - regline);
3537             }
3538
3539             /* Check for maximum column to try. */
3540             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3541             {
3542                 retval = 0;
3543                 break;
3544             }
3545
3546             retval = regtry(prog, col);
3547             if (retval > 0)
3548                 break;
3549
3550             /* if not currently on the first line, get it again */
3551             if (reglnum != 0)
3552             {
3553                 reglnum = 0;
3554                 regline = reg_getline((linenr_T)0);
3555             }
3556             if (regline[col] == NUL)
3557                 break;
3558 #ifdef FEAT_MBYTE
3559             if (has_mbyte)
3560                 col += (*mb_ptr2len)(regline + col);
3561             else
3562 #endif
3563                 ++col;
3564 #ifdef FEAT_RELTIME
3565             /* Check for timeout once in a twenty times to avoid overhead. */
3566             if (tm != NULL && ++tm_count == 20)
3567             {
3568                 tm_count = 0;
3569                 if (profile_passed_limit(tm))
3570                     break;
3571             }
3572 #endif
3573         }
3574     }
3575
3576 theend:
3577     /* Free "reg_tofree" when it's a bit big.
3578      * Free regstack and backpos if they are bigger than their initial size. */
3579     if (reg_tofreelen > 400)
3580     {
3581         vim_free(reg_tofree);
3582         reg_tofree = NULL;
3583     }
3584     if (regstack.ga_maxlen > REGSTACK_INITIAL)
3585         ga_clear(&regstack);
3586     if (backpos.ga_maxlen > BACKPOS_INITIAL)
3587         ga_clear(&backpos);
3588
3589     return retval;
3590 }
3591
3592 #ifdef FEAT_SYN_HL
3593 static reg_extmatch_T *make_extmatch __ARGS((void));
3594
3595 /*
3596  * Create a new extmatch and mark it as referenced once.
3597  */
3598     static reg_extmatch_T *
3599 make_extmatch()
3600 {
3601     reg_extmatch_T      *em;
3602
3603     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3604     if (em != NULL)
3605         em->refcnt = 1;
3606     return em;
3607 }
3608
3609 /*
3610  * Add a reference to an extmatch.
3611  */
3612     reg_extmatch_T *
3613 ref_extmatch(em)
3614     reg_extmatch_T      *em;
3615 {
3616     if (em != NULL)
3617         em->refcnt++;
3618     return em;
3619 }
3620
3621 /*
3622  * Remove a reference to an extmatch.  If there are no references left, free
3623  * the info.
3624  */
3625     void
3626 unref_extmatch(em)
3627     reg_extmatch_T      *em;
3628 {
3629     int i;
3630
3631     if (em != NULL && --em->refcnt <= 0)
3632     {
3633         for (i = 0; i < NSUBEXP; ++i)
3634             vim_free(em->matches[i]);
3635         vim_free(em);
3636     }
3637 }
3638 #endif
3639
3640 /*
3641  * regtry - try match of "prog" with at regline["col"].
3642  * Returns 0 for failure, number of lines contained in the match otherwise.
3643  */
3644     static long
3645 regtry(prog, col)
3646     regprog_T   *prog;
3647     colnr_T     col;
3648 {
3649     reginput = regline + col;
3650     need_clear_subexpr = TRUE;
3651 #ifdef FEAT_SYN_HL
3652     /* Clear the external match subpointers if necessary. */
3653     if (prog->reghasz == REX_SET)
3654         need_clear_zsubexpr = TRUE;
3655 #endif
3656
3657     if (regmatch(prog->program + 1) == 0)
3658         return 0;
3659
3660     cleanup_subexpr();
3661     if (REG_MULTI)
3662     {
3663         if (reg_startpos[0].lnum < 0)
3664         {
3665             reg_startpos[0].lnum = 0;
3666             reg_startpos[0].col = col;
3667         }
3668         if (reg_endpos[0].lnum < 0)
3669         {
3670             reg_endpos[0].lnum = reglnum;
3671             reg_endpos[0].col = (int)(reginput - regline);
3672         }
3673         else
3674             /* Use line number of "\ze". */
3675             reglnum = reg_endpos[0].lnum;
3676     }
3677     else
3678     {
3679         if (reg_startp[0] == NULL)
3680             reg_startp[0] = regline + col;
3681         if (reg_endp[0] == NULL)
3682             reg_endp[0] = reginput;
3683     }
3684 #ifdef FEAT_SYN_HL
3685     /* Package any found \z(...\) matches for export. Default is none. */
3686     unref_extmatch(re_extmatch_out);
3687     re_extmatch_out = NULL;
3688
3689     if (prog->reghasz == REX_SET)
3690     {
3691         int             i;
3692
3693         cleanup_zsubexpr();
3694         re_extmatch_out = make_extmatch();
3695         for (i = 0; i < NSUBEXP; i++)
3696         {
3697             if (REG_MULTI)
3698             {
3699                 /* Only accept single line matches. */
3700                 if (reg_startzpos[i].lnum >= 0
3701                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3702                     re_extmatch_out->matches[i] =
3703                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3704                                                        + reg_startzpos[i].col,
3705                                    reg_endzpos[i].col - reg_startzpos[i].col);
3706             }
3707             else
3708             {
3709                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3710                     re_extmatch_out->matches[i] =
3711                             vim_strnsave(reg_startzp[i],
3712                                         (int)(reg_endzp[i] - reg_startzp[i]));
3713             }
3714         }
3715     }
3716 #endif
3717     return 1 + reglnum;
3718 }
3719
3720 #ifdef FEAT_MBYTE
3721 static int reg_prev_class __ARGS((void));
3722
3723 /*
3724  * Get class of previous character.
3725  */
3726     static int
3727 reg_prev_class()
3728 {
3729     if (reginput > regline)
3730         return mb_get_class(reginput - 1
3731                                      - (*mb_head_off)(regline, reginput - 1));
3732     return -1;
3733 }
3734
3735 #endif
3736 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3737
3738 /*
3739  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3740  * to regmatch(), but they are here to reduce the amount of stack space used
3741  * (it can be called recursively many times).
3742  */
3743 static long     bl_minval;
3744 static long     bl_maxval;
3745
3746 /*
3747  * regmatch - main matching routine
3748  *
3749  * Conceptually the strategy is simple: Check to see whether the current node
3750  * matches, push an item onto the regstack and loop to see whether the rest
3751  * matches, and then act accordingly.  In practice we make some effort to
3752  * avoid using the regstack, in particular by going through "ordinary" nodes
3753  * (that don't need to know whether the rest of the match failed) by a nested
3754  * loop.
3755  *
3756  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3757  * the last matched character.
3758  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3759  * undefined state!
3760  */
3761     static int
3762 regmatch(scan)
3763     char_u      *scan;          /* Current node. */
3764 {
3765   char_u        *next;          /* Next node. */
3766   int           op;
3767   int           c;
3768   regitem_T     *rp;
3769   int           no;
3770   int           status;         /* one of the RA_ values: */
3771 #define RA_FAIL         1       /* something failed, abort */
3772 #define RA_CONT         2       /* continue in inner loop */
3773 #define RA_BREAK        3       /* break inner loop */
3774 #define RA_MATCH        4       /* successful match */
3775 #define RA_NOMATCH      5       /* didn't match */
3776
3777   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
3778    * vim_regexec_both() to reduce malloc()/free() calls. */
3779   regstack.ga_len = 0;
3780   backpos.ga_len = 0;
3781
3782   /*
3783    * Repeat until "regstack" is empty.
3784    */
3785   for (;;)
3786   {
3787     /* Some patterns my cause a long time to match, even though they are not
3788      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3789     fast_breakcheck();
3790
3791 #ifdef DEBUG
3792     if (scan != NULL && regnarrate)
3793     {
3794         mch_errmsg(regprop(scan));
3795         mch_errmsg("(\n");
3796     }
3797 #endif
3798
3799     /*
3800      * Repeat for items that can be matched sequentially, without using the
3801      * regstack.
3802      */
3803     for (;;)
3804     {
3805         if (got_int || scan == NULL)
3806         {
3807             status = RA_FAIL;
3808             break;
3809         }
3810         status = RA_CONT;
3811
3812 #ifdef DEBUG
3813         if (regnarrate)
3814         {
3815             mch_errmsg(regprop(scan));
3816             mch_errmsg("...\n");
3817 # ifdef FEAT_SYN_HL
3818             if (re_extmatch_in != NULL)
3819             {
3820                 int i;
3821
3822                 mch_errmsg(_("External submatches:\n"));
3823                 for (i = 0; i < NSUBEXP; i++)
3824                 {
3825                     mch_errmsg("    \"");
3826                     if (re_extmatch_in->matches[i] != NULL)
3827                         mch_errmsg(re_extmatch_in->matches[i]);
3828                     mch_errmsg("\"\n");
3829                 }
3830             }
3831 # endif
3832         }
3833 #endif
3834         next = regnext(scan);
3835
3836         op = OP(scan);
3837         /* Check for character class with NL added. */
3838         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3839                                 && *reginput == NUL && reglnum <= reg_maxline)
3840         {
3841             reg_nextline();
3842         }
3843         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3844         {
3845             ADVANCE_REGINPUT();
3846         }
3847         else
3848         {
3849           if (WITH_NL(op))
3850               op -= ADD_NL;
3851 #ifdef FEAT_MBYTE
3852           if (has_mbyte)
3853               c = (*mb_ptr2char)(reginput);
3854           else
3855 #endif
3856               c = *reginput;
3857           switch (op)
3858           {
3859           case BOL:
3860             if (reginput != regline)
3861                 status = RA_NOMATCH;
3862             break;
3863
3864           case EOL:
3865             if (c != NUL)
3866                 status = RA_NOMATCH;
3867             break;
3868
3869           case RE_BOF:
3870             /* We're not at the beginning of the file when below the first
3871              * line where we started, not at the start of the line or we
3872              * didn't start at the first line of the buffer. */
3873             if (reglnum != 0 || reginput != regline
3874                                           || (REG_MULTI && reg_firstlnum > 1))
3875                 status = RA_NOMATCH;
3876             break;
3877
3878           case RE_EOF:
3879             if (reglnum != reg_maxline || c != NUL)
3880                 status = RA_NOMATCH;
3881             break;
3882
3883           case CURSOR:
3884             /* Check if the buffer is in a window and compare the
3885              * reg_win->w_cursor position to the match position. */
3886             if (reg_win == NULL
3887                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3888                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3889                 status = RA_NOMATCH;
3890             break;
3891
3892           case RE_MARK:
3893             /* Compare the mark position to the match position.  NOTE: Always
3894              * uses the current buffer. */
3895             {
3896                 int     mark = OPERAND(scan)[0];
3897                 int     cmp = OPERAND(scan)[1];
3898                 pos_T   *pos;
3899
3900                 pos = getmark(mark, FALSE);
3901                 if (pos == NULL              /* mark doesn't exist */
3902                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3903                         || (pos->lnum == reglnum + reg_firstlnum
3904                                 ? (pos->col == (colnr_T)(reginput - regline)
3905                                     ? (cmp == '<' || cmp == '>')
3906                                     : (pos->col < (colnr_T)(reginput - regline)
3907                                         ? cmp != '>'
3908                                         : cmp != '<'))
3909                                 : (pos->lnum < reglnum + reg_firstlnum
3910                                     ? cmp != '>'
3911                                     : cmp != '<')))
3912                     status = RA_NOMATCH;
3913             }
3914             break;
3915
3916           case RE_VISUAL:
3917 #ifdef FEAT_VISUAL
3918             /* Check if the buffer is the current buffer. and whether the
3919              * position is inside the Visual area. */
3920             if (reg_buf != curbuf || VIsual.lnum == 0)
3921                 status = RA_NOMATCH;
3922             else
3923             {
3924                 pos_T       top, bot;
3925                 linenr_T    lnum;
3926                 colnr_T     col;
3927                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3928                 int         mode;
3929
3930                 if (VIsual_active)
3931                 {
3932                     if (lt(VIsual, wp->w_cursor))
3933                     {
3934                         top = VIsual;
3935                         bot = wp->w_cursor;
3936                     }
3937                     else
3938                     {
3939                         top = wp->w_cursor;
3940                         bot = VIsual;
3941                     }
3942                     mode = VIsual_mode;
3943                 }
3944                 else
3945                 {
3946                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3947                     {
3948                         top = curbuf->b_visual.vi_start;
3949                         bot = curbuf->b_visual.vi_end;
3950                     }
3951                     else
3952                     {
3953                         top = curbuf->b_visual.vi_end;
3954                         bot = curbuf->b_visual.vi_start;
3955                     }
3956                     mode = curbuf->b_visual.vi_mode;
3957                 }
3958                 lnum = reglnum + reg_firstlnum;
3959                 col = (colnr_T)(reginput - regline);
3960                 if (lnum < top.lnum || lnum > bot.lnum)
3961                     status = RA_NOMATCH;
3962                 else if (mode == 'v')
3963                 {
3964                     if ((lnum == top.lnum && col < top.col)
3965                             || (lnum == bot.lnum
3966                                          && col >= bot.col + (*p_sel != 'e')))
3967                         status = RA_NOMATCH;
3968                 }
3969                 else if (mode == Ctrl_V)
3970                 {
3971                     colnr_T         start, end;
3972                     colnr_T         start2, end2;
3973                     colnr_T         cols;
3974
3975                     getvvcol(wp, &top, &start, NULL, &end);
3976                     getvvcol(wp, &bot, &start2, NULL, &end2);
3977                     if (start2 < start)
3978                         start = start2;
3979                     if (end2 > end)
3980                         end = end2;
3981                     if (top.col == MAXCOL || bot.col == MAXCOL)
3982                         end = MAXCOL;
3983                     cols = win_linetabsize(wp,
3984                                       regline, (colnr_T)(reginput - regline));
3985                     if (cols < start || cols > end - (*p_sel == 'e'))
3986                         status = RA_NOMATCH;
3987                 }
3988             }
3989 #else
3990             status = RA_NOMATCH;
3991 #endif
3992             break;
3993
3994           case RE_LNUM:
3995             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3996                                                                         scan))
3997                 status = RA_NOMATCH;
3998             break;
3999
4000           case RE_COL:
4001             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
4002                 status = RA_NOMATCH;
4003             break;
4004
4005           case RE_VCOL:
4006             if (!re_num_cmp((long_u)win_linetabsize(
4007                             reg_win == NULL ? curwin : reg_win,
4008                             regline, (colnr_T)(reginput - regline)) + 1, scan))
4009                 status = RA_NOMATCH;
4010             break;
4011
4012           case BOW:     /* \<word; reginput points to w */
4013             if (c == NUL)       /* Can't match at end of line */
4014                 status = RA_NOMATCH;
4015 #ifdef FEAT_MBYTE
4016             else if (has_mbyte)
4017             {
4018                 int this_class;
4019
4020                 /* Get class of current and previous char (if it exists). */
4021                 this_class = mb_get_class(reginput);
4022                 if (this_class <= 1)
4023                     status = RA_NOMATCH;  /* not on a word at all */
4024                 else if (reg_prev_class() == this_class)
4025                     status = RA_NOMATCH;  /* previous char is in same word */
4026             }
4027 #endif
4028             else
4029             {
4030                 if (!vim_iswordc(c)
4031                         || (reginput > regline && vim_iswordc(reginput[-1])))
4032                     status = RA_NOMATCH;
4033             }
4034             break;
4035
4036           case EOW:     /* word\>; reginput points after d */
4037             if (reginput == regline)    /* Can't match at start of line */
4038                 status = RA_NOMATCH;
4039 #ifdef FEAT_MBYTE
4040             else if (has_mbyte)
4041             {
4042                 int this_class, prev_class;
4043
4044                 /* Get class of current and previous char (if it exists). */
4045                 this_class = mb_get_class(reginput);
4046                 prev_class = reg_prev_class();
4047                 if (this_class == prev_class
4048                         || prev_class == 0 || prev_class == 1)
4049                     status = RA_NOMATCH;
4050             }
4051 #endif
4052             else
4053             {
4054                 if (!vim_iswordc(reginput[-1])
4055                         || (reginput[0] != NUL && vim_iswordc(c)))
4056                     status = RA_NOMATCH;
4057             }
4058             break; /* Matched with EOW */
4059
4060           case ANY:
4061             if (c == NUL)
4062                 status = RA_NOMATCH;
4063             else
4064                 ADVANCE_REGINPUT();
4065             break;
4066
4067           case IDENT:
4068             if (!vim_isIDc(c))
4069                 status = RA_NOMATCH;
4070             else
4071                 ADVANCE_REGINPUT();
4072             break;
4073
4074           case SIDENT:
4075             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4076                 status = RA_NOMATCH;
4077             else
4078                 ADVANCE_REGINPUT();
4079             break;
4080
4081           case KWORD:
4082             if (!vim_iswordp(reginput))
4083                 status = RA_NOMATCH;
4084             else
4085                 ADVANCE_REGINPUT();
4086             break;
4087
4088           case SKWORD:
4089             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4090                 status = RA_NOMATCH;
4091             else
4092                 ADVANCE_REGINPUT();
4093             break;
4094
4095           case FNAME:
4096             if (!vim_isfilec(c))
4097                 status = RA_NOMATCH;
4098             else
4099                 ADVANCE_REGINPUT();
4100             break;
4101
4102           case SFNAME:
4103             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4104                 status = RA_NOMATCH;
4105             else
4106                 ADVANCE_REGINPUT();
4107             break;
4108
4109           case PRINT:
4110             if (ptr2cells(reginput) != 1)
4111                 status = RA_NOMATCH;
4112             else
4113                 ADVANCE_REGINPUT();
4114             break;
4115
4116           case SPRINT:
4117             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4118                 status = RA_NOMATCH;
4119             else
4120                 ADVANCE_REGINPUT();
4121             break;
4122
4123           case WHITE:
4124             if (!vim_iswhite(c))
4125                 status = RA_NOMATCH;
4126             else
4127                 ADVANCE_REGINPUT();
4128             break;
4129
4130           case NWHITE:
4131             if (c == NUL || vim_iswhite(c))
4132                 status = RA_NOMATCH;
4133             else
4134                 ADVANCE_REGINPUT();
4135             break;
4136
4137           case DIGIT:
4138             if (!ri_digit(c))
4139                 status = RA_NOMATCH;
4140             else
4141                 ADVANCE_REGINPUT();
4142             break;
4143
4144           case NDIGIT:
4145             if (c == NUL || ri_digit(c))
4146                 status = RA_NOMATCH;
4147             else
4148                 ADVANCE_REGINPUT();
4149             break;
4150
4151           case HEX:
4152             if (!ri_hex(c))
4153                 status = RA_NOMATCH;
4154             else
4155                 ADVANCE_REGINPUT();
4156             break;
4157
4158           case NHEX:
4159             if (c == NUL || ri_hex(c))
4160                 status = RA_NOMATCH;
4161             else
4162                 ADVANCE_REGINPUT();
4163             break;
4164
4165           case OCTAL:
4166             if (!ri_octal(c))
4167                 status = RA_NOMATCH;
4168             else
4169                 ADVANCE_REGINPUT();
4170             break;
4171
4172           case NOCTAL:
4173             if (c == NUL || ri_octal(c))
4174                 status = RA_NOMATCH;
4175             else
4176                 ADVANCE_REGINPUT();
4177             break;
4178
4179           case WORD:
4180             if (!ri_word(c))
4181                 status = RA_NOMATCH;
4182             else
4183                 ADVANCE_REGINPUT();
4184             break;
4185
4186           case NWORD:
4187             if (c == NUL || ri_word(c))
4188                 status = RA_NOMATCH;
4189             else
4190                 ADVANCE_REGINPUT();
4191             break;
4192
4193           case HEAD:
4194             if (!ri_head(c))
4195                 status = RA_NOMATCH;
4196             else
4197                 ADVANCE_REGINPUT();
4198             break;
4199
4200           case NHEAD:
4201             if (c == NUL || ri_head(c))
4202                 status = RA_NOMATCH;
4203             else
4204                 ADVANCE_REGINPUT();
4205             break;
4206
4207           case ALPHA:
4208             if (!ri_alpha(c))
4209                 status = RA_NOMATCH;
4210             else
4211                 ADVANCE_REGINPUT();
4212             break;
4213
4214           case NALPHA:
4215             if (c == NUL || ri_alpha(c))
4216                 status = RA_NOMATCH;
4217             else
4218                 ADVANCE_REGINPUT();
4219             break;
4220
4221           case LOWER:
4222             if (!ri_lower(c))
4223                 status = RA_NOMATCH;
4224             else
4225                 ADVANCE_REGINPUT();
4226             break;
4227
4228           case NLOWER:
4229             if (c == NUL || ri_lower(c))
4230                 status = RA_NOMATCH;
4231             else
4232                 ADVANCE_REGINPUT();
4233             break;
4234
4235           case UPPER:
4236             if (!ri_upper(c))
4237                 status = RA_NOMATCH;
4238             else
4239                 ADVANCE_REGINPUT();
4240             break;
4241
4242           case NUPPER:
4243             if (c == NUL || ri_upper(c))
4244                 status = RA_NOMATCH;
4245             else
4246                 ADVANCE_REGINPUT();
4247             break;
4248
4249           case EXACTLY:
4250             {
4251                 int     len;
4252                 char_u  *opnd;
4253
4254                 opnd = OPERAND(scan);
4255                 /* Inline the first byte, for speed. */
4256                 if (*opnd != *reginput
4257                         && (!ireg_ic || (
4258 #ifdef FEAT_MBYTE
4259                             !enc_utf8 &&
4260 #endif
4261                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4262                     status = RA_NOMATCH;
4263                 else if (*opnd == NUL)
4264                 {
4265                     /* match empty string always works; happens when "~" is
4266                      * empty. */
4267                 }
4268                 else if (opnd[1] == NUL
4269 #ifdef FEAT_MBYTE
4270                             && !(enc_utf8 && ireg_ic)
4271 #endif
4272                         )
4273                     ++reginput;         /* matched a single char */
4274                 else
4275                 {
4276                     len = (int)STRLEN(opnd);
4277                     /* Need to match first byte again for multi-byte. */
4278                     if (cstrncmp(opnd, reginput, &len) != 0)
4279                         status = RA_NOMATCH;
4280 #ifdef FEAT_MBYTE
4281                     /* Check for following composing character. */
4282                     else if (enc_utf8
4283                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4284                     {
4285                         /* raaron: This code makes a composing character get
4286                          * ignored, which is the correct behavior (sometimes)
4287                          * for voweled Hebrew texts. */
4288                         if (!ireg_icombine)
4289                             status = RA_NOMATCH;
4290                     }
4291 #endif
4292                     else
4293                         reginput += len;
4294                 }
4295             }
4296             break;
4297
4298           case ANYOF:
4299           case ANYBUT:
4300             if (c == NUL)
4301                 status = RA_NOMATCH;
4302             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4303                 status = RA_NOMATCH;
4304             else
4305                 ADVANCE_REGINPUT();
4306             break;
4307
4308 #ifdef FEAT_MBYTE
4309           case MULTIBYTECODE:
4310             if (has_mbyte)
4311             {
4312                 int     i, len;
4313                 char_u  *opnd;
4314                 int     opndc = 0, inpc;
4315
4316                 opnd = OPERAND(scan);
4317                 /* Safety check (just in case 'encoding' was changed since
4318                  * compiling the program). */
4319                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4320                 {
4321                     status = RA_NOMATCH;
4322                     break;
4323                 }
4324                 if (enc_utf8)
4325                     opndc = mb_ptr2char(opnd);
4326                 if (enc_utf8 && utf_iscomposing(opndc))
4327                 {
4328                     /* When only a composing char is given match at any
4329                      * position where that composing char appears. */
4330                     status = RA_NOMATCH;
4331                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4332                     {
4333                         inpc = mb_ptr2char(reginput + i);
4334                         if (!utf_iscomposing(inpc))
4335                         {
4336                             if (i > 0)
4337                                 break;
4338                         }
4339                         else if (opndc == inpc)
4340                         {
4341                             /* Include all following composing chars. */
4342                             len = i + mb_ptr2len(reginput + i);
4343                             status = RA_MATCH;
4344                             break;
4345                         }
4346                     }
4347                 }
4348                 else
4349                     for (i = 0; i < len; ++i)
4350                         if (opnd[i] != reginput[i])
4351                         {
4352                             status = RA_NOMATCH;
4353                             break;
4354                         }
4355                 reginput += len;
4356             }
4357             else
4358                 status = RA_NOMATCH;
4359             break;
4360 #endif
4361
4362           case NOTHING:
4363             break;
4364
4365           case BACK:
4366             {
4367                 int             i;
4368                 backpos_T       *bp;
4369
4370                 /*
4371                  * When we run into BACK we need to check if we don't keep
4372                  * looping without matching any input.  The second and later
4373                  * times a BACK is encountered it fails if the input is still
4374                  * at the same position as the previous time.
4375                  * The positions are stored in "backpos" and found by the
4376                  * current value of "scan", the position in the RE program.
4377                  */
4378                 bp = (backpos_T *)backpos.ga_data;
4379                 for (i = 0; i < backpos.ga_len; ++i)
4380                     if (bp[i].bp_scan == scan)
4381                         break;
4382                 if (i == backpos.ga_len)
4383                 {
4384                     /* First time at this BACK, make room to store the pos. */
4385                     if (ga_grow(&backpos, 1) == FAIL)
4386                         status = RA_FAIL;
4387                     else
4388                     {
4389                         /* get "ga_data" again, it may have changed */
4390                         bp = (backpos_T *)backpos.ga_data;
4391                         bp[i].bp_scan = scan;
4392                         ++backpos.ga_len;
4393                     }
4394                 }
4395                 else if (reg_save_equal(&bp[i].bp_pos))
4396                     /* Still at same position as last time, fail. */
4397                     status = RA_NOMATCH;
4398
4399                 if (status != RA_FAIL && status != RA_NOMATCH)
4400                     reg_save(&bp[i].bp_pos, &backpos);
4401             }
4402             break;
4403
4404           case MOPEN + 0:   /* Match start: \zs */
4405           case MOPEN + 1:   /* \( */
4406           case MOPEN + 2:
4407           case MOPEN + 3:
4408           case MOPEN + 4:
4409           case MOPEN + 5:
4410           case MOPEN + 6:
4411           case MOPEN + 7:
4412           case MOPEN + 8:
4413           case MOPEN + 9:
4414             {
4415                 no = op - MOPEN;
4416                 cleanup_subexpr();
4417                 rp = regstack_push(RS_MOPEN, scan);
4418                 if (rp == NULL)
4419                     status = RA_FAIL;
4420                 else
4421                 {
4422                     rp->rs_no = no;
4423                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4424                                                              &reg_startp[no]);
4425                     /* We simply continue and handle the result when done. */
4426                 }
4427             }
4428             break;
4429
4430           case NOPEN:       /* \%( */
4431           case NCLOSE:      /* \) after \%( */
4432                 if (regstack_push(RS_NOPEN, scan) == NULL)
4433                     status = RA_FAIL;
4434                 /* We simply continue and handle the result when done. */
4435                 break;
4436
4437 #ifdef FEAT_SYN_HL
4438           case ZOPEN + 1:
4439           case ZOPEN + 2:
4440           case ZOPEN + 3:
4441           case ZOPEN + 4:
4442           case ZOPEN + 5:
4443           case ZOPEN + 6:
4444           case ZOPEN + 7:
4445           case ZOPEN + 8:
4446           case ZOPEN + 9:
4447             {
4448                 no = op - ZOPEN;
4449                 cleanup_zsubexpr();
4450                 rp = regstack_push(RS_ZOPEN, scan);
4451                 if (rp == NULL)
4452                     status = RA_FAIL;
4453                 else
4454                 {
4455                     rp->rs_no = no;
4456                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4457                                                              &reg_startzp[no]);
4458                     /* We simply continue and handle the result when done. */
4459                 }
4460             }
4461             break;
4462 #endif
4463
4464           case MCLOSE + 0:  /* Match end: \ze */
4465           case MCLOSE + 1:  /* \) */
4466           case MCLOSE + 2:
4467           case MCLOSE + 3:
4468           case MCLOSE + 4:
4469           case MCLOSE + 5:
4470           case MCLOSE + 6:
4471           case MCLOSE + 7:
4472           case MCLOSE + 8:
4473           case MCLOSE + 9:
4474             {
4475                 no = op - MCLOSE;
4476                 cleanup_subexpr();
4477                 rp = regstack_push(RS_MCLOSE, scan);
4478                 if (rp == NULL)
4479                     status = RA_FAIL;
4480                 else
4481                 {
4482                     rp->rs_no = no;
4483                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4484                     /* We simply continue and handle the result when done. */
4485                 }
4486             }
4487             break;
4488
4489 #ifdef FEAT_SYN_HL
4490           case ZCLOSE + 1:  /* \) after \z( */
4491           case ZCLOSE + 2:
4492           case ZCLOSE + 3:
4493           case ZCLOSE + 4:
4494           case ZCLOSE + 5:
4495           case ZCLOSE + 6:
4496           case ZCLOSE + 7:
4497           case ZCLOSE + 8:
4498           case ZCLOSE + 9:
4499             {
4500                 no = op - ZCLOSE;
4501                 cleanup_zsubexpr();
4502                 rp = regstack_push(RS_ZCLOSE, scan);
4503                 if (rp == NULL)
4504                     status = RA_FAIL;
4505                 else
4506                 {
4507                     rp->rs_no = no;
4508                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4509                                                               &reg_endzp[no]);
4510                     /* We simply continue and handle the result when done. */
4511                 }
4512             }
4513             break;
4514 #endif
4515
4516           case BACKREF + 1:
4517           case BACKREF + 2:
4518           case BACKREF + 3:
4519           case BACKREF + 4:
4520           case BACKREF + 5:
4521           case BACKREF + 6:
4522           case BACKREF + 7:
4523           case BACKREF + 8:
4524           case BACKREF + 9:
4525             {
4526                 int             len;
4527                 linenr_T        clnum;
4528                 colnr_T         ccol;
4529                 char_u          *p;
4530
4531                 no = op - BACKREF;
4532                 cleanup_subexpr();
4533                 if (!REG_MULTI)         /* Single-line regexp */
4534                 {
4535                     if (reg_endp[no] == NULL)
4536                     {
4537                         /* Backref was not set: Match an empty string. */
4538                         len = 0;
4539                     }
4540                     else
4541                     {
4542                         /* Compare current input with back-ref in the same
4543                          * line. */
4544                         len = (int)(reg_endp[no] - reg_startp[no]);
4545                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4546                             status = RA_NOMATCH;
4547                     }
4548                 }
4549                 else                            /* Multi-line regexp */
4550                 {
4551                     if (reg_endpos[no].lnum < 0)
4552                     {
4553                         /* Backref was not set: Match an empty string. */
4554                         len = 0;
4555                     }
4556                     else
4557                     {
4558                         if (reg_startpos[no].lnum == reglnum
4559                                 && reg_endpos[no].lnum == reglnum)
4560                         {
4561                             /* Compare back-ref within the current line. */
4562                             len = reg_endpos[no].col - reg_startpos[no].col;
4563                             if (cstrncmp(regline + reg_startpos[no].col,
4564                                                           reginput, &len) != 0)
4565                                 status = RA_NOMATCH;
4566                         }
4567                         else
4568                         {
4569                             /* Messy situation: Need to compare between two
4570                              * lines. */
4571                             ccol = reg_startpos[no].col;
4572                             clnum = reg_startpos[no].lnum;
4573                             for (;;)
4574                             {
4575                                 /* Since getting one line may invalidate
4576                                  * the other, need to make copy.  Slow! */
4577                                 if (regline != reg_tofree)
4578                                 {
4579                                     len = (int)STRLEN(regline);
4580                                     if (reg_tofree == NULL
4581                                                  || len >= (int)reg_tofreelen)
4582                                     {
4583                                         len += 50;      /* get some extra */
4584                                         vim_free(reg_tofree);
4585                                         reg_tofree = alloc(len);
4586                                         if (reg_tofree == NULL)
4587                                         {
4588                                             status = RA_FAIL; /* outof memory!*/
4589                                             break;
4590                                         }
4591                                         reg_tofreelen = len;
4592                                     }
4593                                     STRCPY(reg_tofree, regline);
4594                                     reginput = reg_tofree
4595                                                        + (reginput - regline);
4596                                     regline = reg_tofree;
4597                                 }
4598
4599                                 /* Get the line to compare with. */
4600                                 p = reg_getline(clnum);
4601                                 if (clnum == reg_endpos[no].lnum)
4602                                     len = reg_endpos[no].col - ccol;
4603                                 else
4604                                     len = (int)STRLEN(p + ccol);
4605
4606                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4607                                 {
4608                                     status = RA_NOMATCH;  /* doesn't match */
4609                                     break;
4610                                 }
4611                                 if (clnum == reg_endpos[no].lnum)
4612                                     break;              /* match and at end! */
4613                                 if (reglnum >= reg_maxline)
4614                                 {
4615                                     status = RA_NOMATCH;  /* text too short */
4616                                     break;
4617                                 }
4618
4619                                 /* Advance to next line. */
4620                                 reg_nextline();
4621                                 ++clnum;
4622                                 ccol = 0;
4623                                 if (got_int)
4624                                 {
4625                                     status = RA_FAIL;
4626                                     break;
4627                                 }
4628                             }
4629
4630                             /* found a match!  Note that regline may now point
4631                              * to a copy of the line, that should not matter. */
4632                         }
4633                     }
4634                 }
4635
4636                 /* Matched the backref, skip over it. */
4637                 reginput += len;
4638             }
4639             break;
4640
4641 #ifdef FEAT_SYN_HL
4642           case ZREF + 1:
4643           case ZREF + 2:
4644           case ZREF + 3:
4645           case ZREF + 4:
4646           case ZREF + 5:
4647           case ZREF + 6:
4648           case ZREF + 7:
4649           case ZREF + 8:
4650           case ZREF + 9:
4651             {
4652                 int     len;
4653
4654                 cleanup_zsubexpr();
4655                 no = op - ZREF;
4656                 if (re_extmatch_in != NULL
4657                         && re_extmatch_in->matches[no] != NULL)
4658                 {
4659                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4660                     if (cstrncmp(re_extmatch_in->matches[no],
4661                                                           reginput, &len) != 0)
4662                         status = RA_NOMATCH;
4663                     else
4664                         reginput += len;
4665                 }
4666                 else
4667                 {
4668                     /* Backref was not set: Match an empty string. */
4669                 }
4670             }
4671             break;
4672 #endif
4673
4674           case BRANCH:
4675             {
4676                 if (OP(next) != BRANCH) /* No choice. */
4677                     next = OPERAND(scan);       /* Avoid recursion. */
4678                 else
4679                 {
4680                     rp = regstack_push(RS_BRANCH, scan);
4681                     if (rp == NULL)
4682                         status = RA_FAIL;
4683                     else
4684                         status = RA_BREAK;      /* rest is below */
4685                 }
4686             }
4687             break;
4688
4689           case BRACE_LIMITS:
4690             {
4691                 if (OP(next) == BRACE_SIMPLE)
4692                 {
4693                     bl_minval = OPERAND_MIN(scan);
4694                     bl_maxval = OPERAND_MAX(scan);
4695                 }
4696                 else if (OP(next) >= BRACE_COMPLEX
4697                         && OP(next) < BRACE_COMPLEX + 10)
4698                 {
4699                     no = OP(next) - BRACE_COMPLEX;
4700                     brace_min[no] = OPERAND_MIN(scan);
4701                     brace_max[no] = OPERAND_MAX(scan);
4702                     brace_count[no] = 0;
4703                 }
4704                 else
4705                 {
4706                     EMSG(_(e_internal));            /* Shouldn't happen */
4707                     status = RA_FAIL;
4708                 }
4709             }
4710             break;
4711
4712           case BRACE_COMPLEX + 0:
4713           case BRACE_COMPLEX + 1:
4714           case BRACE_COMPLEX + 2:
4715           case BRACE_COMPLEX + 3:
4716           case BRACE_COMPLEX + 4:
4717           case BRACE_COMPLEX + 5:
4718           case BRACE_COMPLEX + 6:
4719           case BRACE_COMPLEX + 7:
4720           case BRACE_COMPLEX + 8:
4721           case BRACE_COMPLEX + 9:
4722             {
4723                 no = op - BRACE_COMPLEX;
4724                 ++brace_count[no];
4725
4726                 /* If not matched enough times yet, try one more */
4727                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4728                                              ? brace_min[no] : brace_max[no]))
4729                 {
4730                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4731                     if (rp == NULL)
4732                         status = RA_FAIL;
4733                     else
4734                     {
4735                         rp->rs_no = no;
4736                         reg_save(&rp->rs_un.regsave, &backpos);
4737                         next = OPERAND(scan);
4738                         /* We continue and handle the result when done. */
4739                     }
4740                     break;
4741                 }
4742
4743                 /* If matched enough times, may try matching some more */
4744                 if (brace_min[no] <= brace_max[no])
4745                 {
4746                     /* Range is the normal way around, use longest match */
4747                     if (brace_count[no] <= brace_max[no])
4748                     {
4749                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4750                         if (rp == NULL)
4751                             status = RA_FAIL;
4752                         else
4753                         {
4754                             rp->rs_no = no;
4755                             reg_save(&rp->rs_un.regsave, &backpos);
4756                             next = OPERAND(scan);
4757                             /* We continue and handle the result when done. */
4758                         }
4759                     }
4760                 }
4761                 else
4762                 {
4763                     /* Range is backwards, use shortest match first */
4764                     if (brace_count[no] <= brace_min[no])
4765                     {
4766                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4767                         if (rp == NULL)
4768                             status = RA_FAIL;
4769                         else
4770                         {
4771                             reg_save(&rp->rs_un.regsave, &backpos);
4772                             /* We continue and handle the result when done. */
4773                         }
4774                     }
4775                 }
4776             }
4777             break;
4778
4779           case BRACE_SIMPLE:
4780           case STAR:
4781           case PLUS:
4782             {
4783                 regstar_T       rst;
4784
4785                 /*
4786                  * Lookahead to avoid useless match attempts when we know
4787                  * what character comes next.
4788                  */
4789                 if (OP(next) == EXACTLY)
4790                 {
4791                     rst.nextb = *OPERAND(next);
4792                     if (ireg_ic)
4793                     {
4794                         if (MB_ISUPPER(rst.nextb))
4795                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4796                         else
4797                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4798                     }
4799                     else
4800                         rst.nextb_ic = rst.nextb;
4801                 }
4802                 else
4803                 {
4804                     rst.nextb = NUL;
4805                     rst.nextb_ic = NUL;
4806                 }
4807                 if (op != BRACE_SIMPLE)
4808                 {
4809                     rst.minval = (op == STAR) ? 0 : 1;
4810                     rst.maxval = MAX_LIMIT;
4811                 }
4812                 else
4813                 {
4814                     rst.minval = bl_minval;
4815                     rst.maxval = bl_maxval;
4816                 }
4817
4818                 /*
4819                  * When maxval > minval, try matching as much as possible, up
4820                  * to maxval.  When maxval < minval, try matching at least the
4821                  * minimal number (since the range is backwards, that's also
4822                  * maxval!).
4823                  */
4824                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4825                 if (got_int)
4826                 {
4827                     status = RA_FAIL;
4828                     break;
4829                 }
4830                 if (rst.minval <= rst.maxval
4831                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4832                 {
4833                     /* It could match.  Prepare for trying to match what
4834                      * follows.  The code is below.  Parameters are stored in
4835                      * a regstar_T on the regstack. */
4836                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4837                     {
4838                         EMSG(_(e_maxmempat));
4839                         status = RA_FAIL;
4840                     }
4841                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4842                         status = RA_FAIL;
4843                     else
4844                     {
4845                         regstack.ga_len += sizeof(regstar_T);
4846                         rp = regstack_push(rst.minval <= rst.maxval
4847                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4848                         if (rp == NULL)
4849                             status = RA_FAIL;
4850                         else
4851                         {
4852                             *(((regstar_T *)rp) - 1) = rst;
4853                             status = RA_BREAK;      /* skip the restore bits */
4854                         }
4855                     }
4856                 }
4857                 else
4858                     status = RA_NOMATCH;
4859
4860             }
4861             break;
4862
4863           case NOMATCH:
4864           case MATCH:
4865           case SUBPAT:
4866             rp = regstack_push(RS_NOMATCH, scan);
4867             if (rp == NULL)
4868                 status = RA_FAIL;
4869             else
4870             {
4871                 rp->rs_no = op;
4872                 reg_save(&rp->rs_un.regsave, &backpos);
4873                 next = OPERAND(scan);
4874                 /* We continue and handle the result when done. */
4875             }
4876             break;
4877
4878           case BEHIND:
4879           case NOBEHIND:
4880             /* Need a bit of room to store extra positions. */
4881             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4882             {
4883                 EMSG(_(e_maxmempat));
4884                 status = RA_FAIL;
4885             }
4886             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4887                 status = RA_FAIL;
4888             else
4889             {
4890                 regstack.ga_len += sizeof(regbehind_T);
4891                 rp = regstack_push(RS_BEHIND1, scan);
4892                 if (rp == NULL)
4893                     status = RA_FAIL;
4894                 else
4895                 {
4896                     /* Need to save the subexpr to be able to restore them
4897                      * when there is a match but we don't use it. */
4898                     save_subexpr(((regbehind_T *)rp) - 1);
4899
4900                     rp->rs_no = op;
4901                     reg_save(&rp->rs_un.regsave, &backpos);
4902                     /* First try if what follows matches.  If it does then we
4903                      * check the behind match by looping. */
4904                 }
4905             }
4906             break;
4907
4908           case BHPOS:
4909             if (REG_MULTI)
4910             {
4911                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4912                         || behind_pos.rs_u.pos.lnum != reglnum)
4913                     status = RA_NOMATCH;
4914             }
4915             else if (behind_pos.rs_u.ptr != reginput)
4916                 status = RA_NOMATCH;
4917             break;
4918
4919           case NEWL:
4920             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4921                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4922                 status = RA_NOMATCH;
4923             else if (reg_line_lbr)
4924                 ADVANCE_REGINPUT();
4925             else
4926                 reg_nextline();
4927             break;
4928
4929           case END:
4930             status = RA_MATCH;  /* Success! */
4931             break;
4932
4933           default:
4934             EMSG(_(e_re_corr));
4935 #ifdef DEBUG
4936             printf("Illegal op code %d\n", op);
4937 #endif
4938             status = RA_FAIL;
4939             break;
4940           }
4941         }
4942
4943         /* If we can't continue sequentially, break the inner loop. */
4944         if (status != RA_CONT)
4945             break;
4946
4947         /* Continue in inner loop, advance to next item. */
4948         scan = next;
4949
4950     } /* end of inner loop */
4951
4952     /*
4953      * If there is something on the regstack execute the code for the state.
4954      * If the state is popped then loop and use the older state.
4955      */
4956     while (regstack.ga_len > 0 && status != RA_FAIL)
4957     {
4958         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4959         switch (rp->rs_state)
4960         {
4961           case RS_NOPEN:
4962             /* Result is passed on as-is, simply pop the state. */
4963             regstack_pop(&scan);
4964             break;
4965
4966           case RS_MOPEN:
4967             /* Pop the state.  Restore pointers when there is no match. */
4968             if (status == RA_NOMATCH)
4969                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4970                                                   &reg_startp[rp->rs_no]);
4971             regstack_pop(&scan);
4972             break;
4973
4974 #ifdef FEAT_SYN_HL
4975           case RS_ZOPEN:
4976             /* Pop the state.  Restore pointers when there is no match. */
4977             if (status == RA_NOMATCH)
4978                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4979                                                  &reg_startzp[rp->rs_no]);
4980             regstack_pop(&scan);
4981             break;
4982 #endif
4983
4984           case RS_MCLOSE:
4985             /* Pop the state.  Restore pointers when there is no match. */
4986             if (status == RA_NOMATCH)
4987                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4988                                                     &reg_endp[rp->rs_no]);
4989             regstack_pop(&scan);
4990             break;
4991
4992 #ifdef FEAT_SYN_HL
4993           case RS_ZCLOSE:
4994             /* Pop the state.  Restore pointers when there is no match. */
4995             if (status == RA_NOMATCH)
4996                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4997                                                    &reg_endzp[rp->rs_no]);
4998             regstack_pop(&scan);
4999             break;
5000 #endif
5001
5002           case RS_BRANCH:
5003             if (status == RA_MATCH)
5004                 /* this branch matched, use it */
5005                 regstack_pop(&scan);
5006             else
5007             {
5008                 if (status != RA_BREAK)
5009                 {
5010                     /* After a non-matching branch: try next one. */
5011                     reg_restore(&rp->rs_un.regsave, &backpos);
5012                     scan = rp->rs_scan;
5013                 }
5014                 if (scan == NULL || OP(scan) != BRANCH)
5015                 {
5016                     /* no more branches, didn't find a match */
5017                     status = RA_NOMATCH;
5018                     regstack_pop(&scan);
5019                 }
5020                 else
5021                 {
5022                     /* Prepare to try a branch. */
5023                     rp->rs_scan = regnext(scan);
5024                     reg_save(&rp->rs_un.regsave, &backpos);
5025                     scan = OPERAND(scan);
5026                 }
5027             }
5028             break;
5029
5030           case RS_BRCPLX_MORE:
5031             /* Pop the state.  Restore pointers when there is no match. */
5032             if (status == RA_NOMATCH)
5033             {
5034                 reg_restore(&rp->rs_un.regsave, &backpos);
5035                 --brace_count[rp->rs_no];       /* decrement match count */
5036             }
5037             regstack_pop(&scan);
5038             break;
5039
5040           case RS_BRCPLX_LONG:
5041             /* Pop the state.  Restore pointers when there is no match. */
5042             if (status == RA_NOMATCH)
5043             {
5044                 /* There was no match, but we did find enough matches. */
5045                 reg_restore(&rp->rs_un.regsave, &backpos);
5046                 --brace_count[rp->rs_no];
5047                 /* continue with the items after "\{}" */
5048                 status = RA_CONT;
5049             }
5050             regstack_pop(&scan);
5051             if (status == RA_CONT)
5052                 scan = regnext(scan);
5053             break;
5054
5055           case RS_BRCPLX_SHORT:
5056             /* Pop the state.  Restore pointers when there is no match. */
5057             if (status == RA_NOMATCH)
5058                 /* There was no match, try to match one more item. */
5059                 reg_restore(&rp->rs_un.regsave, &backpos);
5060             regstack_pop(&scan);
5061             if (status == RA_NOMATCH)
5062             {
5063                 scan = OPERAND(scan);
5064                 status = RA_CONT;
5065             }
5066             break;
5067
5068           case RS_NOMATCH:
5069             /* Pop the state.  If the operand matches for NOMATCH or
5070              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5071              * except for SUBPAT, and continue with the next item. */
5072             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5073                 status = RA_NOMATCH;
5074             else
5075             {
5076                 status = RA_CONT;
5077                 if (rp->rs_no != SUBPAT)        /* zero-width */
5078                     reg_restore(&rp->rs_un.regsave, &backpos);
5079             }
5080             regstack_pop(&scan);
5081             if (status == RA_CONT)
5082                 scan = regnext(scan);
5083             break;
5084
5085           case RS_BEHIND1:
5086             if (status == RA_NOMATCH)
5087             {
5088                 regstack_pop(&scan);
5089                 regstack.ga_len -= sizeof(regbehind_T);
5090             }
5091             else
5092             {
5093                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5094                  * the behind part does (not) match before the current
5095                  * position in the input.  This must be done at every
5096                  * position in the input and checking if the match ends at
5097                  * the current position. */
5098
5099                 /* save the position after the found match for next */
5100                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5101
5102                 /* start looking for a match with operand at the current
5103                  * position.  Go back one character until we find the
5104                  * result, hitting the start of the line or the previous
5105                  * line (for multi-line matching).
5106                  * Set behind_pos to where the match should end, BHPOS
5107                  * will match it.  Save the current value. */
5108                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5109                 behind_pos = rp->rs_un.regsave;
5110
5111                 rp->rs_state = RS_BEHIND2;
5112
5113                 reg_restore(&rp->rs_un.regsave, &backpos);
5114                 scan = OPERAND(rp->rs_scan);
5115             }
5116             break;
5117
5118           case RS_BEHIND2:
5119             /*
5120              * Looping for BEHIND / NOBEHIND match.
5121              */
5122             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5123             {
5124                 /* found a match that ends where "next" started */
5125                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5126                 if (rp->rs_no == BEHIND)
5127                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5128                                                                     &backpos);
5129                 else
5130                 {
5131                     /* But we didn't want a match.  Need to restore the
5132                      * subexpr, because what follows matched, so they have
5133                      * been set. */
5134                     status = RA_NOMATCH;
5135                     restore_subexpr(((regbehind_T *)rp) - 1);
5136                 }
5137                 regstack_pop(&scan);
5138                 regstack.ga_len -= sizeof(regbehind_T);
5139             }
5140             else
5141             {
5142                 /* No match or a match that doesn't end where we want it: Go
5143                  * back one character.  May go to previous line once. */
5144                 no = OK;
5145                 if (REG_MULTI)
5146                 {
5147                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5148                     {
5149                         if (rp->rs_un.regsave.rs_u.pos.lnum
5150                                         < behind_pos.rs_u.pos.lnum
5151                                 || reg_getline(
5152                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5153                                                                   == NULL)
5154                             no = FAIL;
5155                         else
5156                         {
5157                             reg_restore(&rp->rs_un.regsave, &backpos);
5158                             rp->rs_un.regsave.rs_u.pos.col =
5159                                                  (colnr_T)STRLEN(regline);
5160                         }
5161                     }
5162                     else
5163                         --rp->rs_un.regsave.rs_u.pos.col;
5164                 }
5165                 else
5166                 {
5167                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5168                         no = FAIL;
5169                     else
5170                         --rp->rs_un.regsave.rs_u.ptr;
5171                 }
5172                 if (no == OK)
5173                 {
5174                     /* Advanced, prepare for finding match again. */
5175                     reg_restore(&rp->rs_un.regsave, &backpos);
5176                     scan = OPERAND(rp->rs_scan);
5177                     if (status == RA_MATCH)
5178                     {
5179                         /* We did match, so subexpr may have been changed,
5180                          * need to restore them for the next try. */
5181                         status = RA_NOMATCH;
5182                         restore_subexpr(((regbehind_T *)rp) - 1);
5183                     }
5184                 }
5185                 else
5186                 {
5187                     /* Can't advance.  For NOBEHIND that's a match. */
5188                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5189                     if (rp->rs_no == NOBEHIND)
5190                     {
5191                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5192                                                                     &backpos);
5193                         status = RA_MATCH;
5194                     }
5195                     else
5196                     {
5197                         /* We do want a proper match.  Need to restore the
5198                          * subexpr if we had a match, because they may have
5199                          * been set. */
5200                         if (status == RA_MATCH)
5201                         {
5202                             status = RA_NOMATCH;
5203                             restore_subexpr(((regbehind_T *)rp) - 1);
5204                         }
5205                     }
5206                     regstack_pop(&scan);
5207                     regstack.ga_len -= sizeof(regbehind_T);
5208                 }
5209             }
5210             break;
5211
5212           case RS_STAR_LONG:
5213           case RS_STAR_SHORT:
5214             {
5215                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5216
5217                 if (status == RA_MATCH)
5218                 {
5219                     regstack_pop(&scan);
5220                     regstack.ga_len -= sizeof(regstar_T);
5221                     break;
5222                 }
5223
5224                 /* Tried once already, restore input pointers. */
5225                 if (status != RA_BREAK)
5226                     reg_restore(&rp->rs_un.regsave, &backpos);
5227
5228                 /* Repeat until we found a position where it could match. */
5229                 for (;;)
5230                 {
5231                     if (status != RA_BREAK)
5232                     {
5233                         /* Tried first position already, advance. */
5234                         if (rp->rs_state == RS_STAR_LONG)
5235                         {
5236                             /* Trying for longest match, but couldn't or
5237                              * didn't match -- back up one char. */
5238                             if (--rst->count < rst->minval)
5239                                 break;
5240                             if (reginput == regline)
5241                             {
5242                                 /* backup to last char of previous line */
5243                                 --reglnum;
5244                                 regline = reg_getline(reglnum);
5245                                 /* Just in case regrepeat() didn't count
5246                                  * right. */
5247                                 if (regline == NULL)
5248                                     break;
5249                                 reginput = regline + STRLEN(regline);
5250                                 fast_breakcheck();
5251                             }
5252                             else
5253                                 mb_ptr_back(regline, reginput);
5254                         }
5255                         else
5256                         {
5257                             /* Range is backwards, use shortest match first.
5258                              * Careful: maxval and minval are exchanged!
5259                              * Couldn't or didn't match: try advancing one
5260                              * char. */
5261                             if (rst->count == rst->minval
5262                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5263                                 break;
5264                             ++rst->count;
5265                         }
5266                         if (got_int)
5267                             break;
5268                     }
5269                     else
5270                         status = RA_NOMATCH;
5271
5272                     /* If it could match, try it. */
5273                     if (rst->nextb == NUL || *reginput == rst->nextb
5274                                              || *reginput == rst->nextb_ic)
5275                     {
5276                         reg_save(&rp->rs_un.regsave, &backpos);
5277                         scan = regnext(rp->rs_scan);
5278                         status = RA_CONT;
5279                         break;
5280                     }
5281                 }
5282                 if (status != RA_CONT)
5283                 {
5284                     /* Failed. */
5285                     regstack_pop(&scan);
5286                     regstack.ga_len -= sizeof(regstar_T);
5287                     status = RA_NOMATCH;
5288                 }
5289             }
5290             break;
5291         }
5292
5293         /* If we want to continue the inner loop or didn't pop a state
5294          * continue matching loop */
5295         if (status == RA_CONT || rp == (regitem_T *)
5296                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5297             break;
5298     }
5299
5300     /* May need to continue with the inner loop, starting at "scan". */
5301     if (status == RA_CONT)
5302         continue;
5303
5304     /*
5305      * If the regstack is empty or something failed we are done.
5306      */
5307     if (regstack.ga_len == 0 || status == RA_FAIL)
5308     {
5309         if (scan == NULL)
5310         {
5311             /*
5312              * We get here only if there's trouble -- normally "case END" is
5313              * the terminating point.
5314              */
5315             EMSG(_(e_re_corr));
5316 #ifdef DEBUG
5317             printf("Premature EOL\n");
5318 #endif
5319         }
5320         if (status == RA_FAIL)
5321             got_int = TRUE;
5322         return (status == RA_MATCH);
5323     }
5324
5325   } /* End of loop until the regstack is empty. */
5326
5327   /* NOTREACHED */
5328 }
5329
5330 /*
5331  * Push an item onto the regstack.
5332  * Returns pointer to new item.  Returns NULL when out of memory.
5333  */
5334     static regitem_T *
5335 regstack_push(state, scan)
5336     regstate_T  state;
5337     char_u      *scan;
5338 {
5339     regitem_T   *rp;
5340
5341     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5342     {
5343         EMSG(_(e_maxmempat));
5344         return NULL;
5345     }
5346     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5347         return NULL;
5348
5349     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5350     rp->rs_state = state;
5351     rp->rs_scan = scan;
5352
5353     regstack.ga_len += sizeof(regitem_T);
5354     return rp;
5355 }
5356
5357 /*
5358  * Pop an item from the regstack.
5359  */
5360     static void
5361 regstack_pop(scan)
5362     char_u      **scan;
5363 {
5364     regitem_T   *rp;
5365
5366     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5367     *scan = rp->rs_scan;
5368
5369     regstack.ga_len -= sizeof(regitem_T);
5370 }
5371
5372 /*
5373  * regrepeat - repeatedly match something simple, return how many.
5374  * Advances reginput (and reglnum) to just after the matched chars.
5375  */
5376     static int
5377 regrepeat(p, maxcount)
5378     char_u      *p;
5379     long        maxcount;   /* maximum number of matches allowed */
5380 {
5381     long        count = 0;
5382     char_u      *scan;
5383     char_u      *opnd;
5384     int         mask;
5385     int         testval = 0;
5386
5387     scan = reginput;        /* Make local copy of reginput for speed. */
5388     opnd = OPERAND(p);
5389     switch (OP(p))
5390     {
5391       case ANY:
5392       case ANY + ADD_NL:
5393         while (count < maxcount)
5394         {
5395             /* Matching anything means we continue until end-of-line (or
5396              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5397             while (*scan != NUL && count < maxcount)
5398             {
5399                 ++count;
5400                 mb_ptr_adv(scan);
5401             }
5402             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5403                                          || reg_line_lbr || count == maxcount)
5404                 break;
5405             ++count;            /* count the line-break */
5406             reg_nextline();
5407             scan = reginput;
5408             if (got_int)
5409                 break;
5410         }
5411         break;
5412
5413       case IDENT:
5414       case IDENT + ADD_NL:
5415         testval = TRUE;
5416         /*FALLTHROUGH*/
5417       case SIDENT:
5418       case SIDENT + ADD_NL:
5419         while (count < maxcount)
5420         {
5421             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5422             {
5423                 mb_ptr_adv(scan);
5424             }
5425             else if (*scan == NUL)
5426             {
5427                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5428                                                               || reg_line_lbr)
5429                     break;
5430                 reg_nextline();
5431                 scan = reginput;
5432                 if (got_int)
5433                     break;
5434             }
5435             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5436                 ++scan;
5437             else
5438                 break;
5439             ++count;
5440         }
5441         break;
5442
5443       case KWORD:
5444       case KWORD + ADD_NL:
5445         testval = TRUE;
5446         /*FALLTHROUGH*/
5447       case SKWORD:
5448       case SKWORD + ADD_NL:
5449         while (count < maxcount)
5450         {
5451             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5452             {
5453                 mb_ptr_adv(scan);
5454             }
5455             else if (*scan == NUL)
5456             {
5457                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5458                                                               || reg_line_lbr)
5459                     break;
5460                 reg_nextline();
5461                 scan = reginput;
5462                 if (got_int)
5463                     break;
5464             }
5465             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5466                 ++scan;
5467             else
5468                 break;
5469             ++count;
5470         }
5471         break;
5472
5473       case FNAME:
5474       case FNAME + ADD_NL:
5475         testval = TRUE;
5476         /*FALLTHROUGH*/
5477       case SFNAME:
5478       case SFNAME + ADD_NL:
5479         while (count < maxcount)
5480         {
5481             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5482             {
5483                 mb_ptr_adv(scan);
5484             }
5485             else if (*scan == NUL)
5486             {
5487                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5488                                                               || reg_line_lbr)
5489                     break;
5490                 reg_nextline();
5491                 scan = reginput;
5492                 if (got_int)
5493                     break;
5494             }
5495             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5496                 ++scan;
5497             else
5498                 break;
5499             ++count;
5500         }
5501         break;
5502
5503       case PRINT:
5504       case PRINT + ADD_NL:
5505         testval = TRUE;
5506         /*FALLTHROUGH*/
5507       case SPRINT:
5508       case SPRINT + ADD_NL:
5509         while (count < maxcount)
5510         {
5511             if (*scan == NUL)
5512             {
5513                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5514                                                               || reg_line_lbr)
5515                     break;
5516                 reg_nextline();
5517                 scan = reginput;
5518                 if (got_int)
5519                     break;
5520             }
5521             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5522             {
5523                 mb_ptr_adv(scan);
5524             }
5525             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5526                 ++scan;
5527             else
5528                 break;
5529             ++count;
5530         }
5531         break;
5532
5533       case WHITE:
5534       case WHITE + ADD_NL:
5535         testval = mask = RI_WHITE;
5536 do_class:
5537         while (count < maxcount)
5538         {
5539 #ifdef FEAT_MBYTE
5540             int         l;
5541 #endif
5542             if (*scan == NUL)
5543             {
5544                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5545                                                               || reg_line_lbr)
5546                     break;
5547                 reg_nextline();
5548                 scan = reginput;
5549                 if (got_int)
5550                     break;
5551             }
5552 #ifdef FEAT_MBYTE
5553             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5554             {
5555                 if (testval != 0)
5556                     break;
5557                 scan += l;
5558             }
5559 #endif
5560             else if ((class_tab[*scan] & mask) == testval)
5561                 ++scan;
5562             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5563                 ++scan;
5564             else
5565                 break;
5566             ++count;
5567         }
5568         break;
5569
5570       case NWHITE:
5571       case NWHITE + ADD_NL:
5572         mask = RI_WHITE;
5573         goto do_class;
5574       case DIGIT:
5575       case DIGIT + ADD_NL:
5576         testval = mask = RI_DIGIT;
5577         goto do_class;
5578       case NDIGIT:
5579       case NDIGIT + ADD_NL:
5580         mask = RI_DIGIT;
5581         goto do_class;
5582       case HEX:
5583       case HEX + ADD_NL:
5584         testval = mask = RI_HEX;
5585         goto do_class;
5586       case NHEX:
5587       case NHEX + ADD_NL:
5588         mask = RI_HEX;
5589         goto do_class;
5590       case OCTAL:
5591       case OCTAL + ADD_NL:
5592         testval = mask = RI_OCTAL;
5593         goto do_class;
5594       case NOCTAL:
5595       case NOCTAL + ADD_NL:
5596         mask = RI_OCTAL;
5597         goto do_class;
5598       case WORD:
5599       case WORD + ADD_NL:
5600         testval = mask = RI_WORD;
5601         goto do_class;
5602       case NWORD:
5603       case NWORD + ADD_NL:
5604         mask = RI_WORD;
5605         goto do_class;
5606       case HEAD:
5607       case HEAD + ADD_NL:
5608         testval = mask = RI_HEAD;
5609         goto do_class;
5610       case NHEAD:
5611       case NHEAD + ADD_NL:
5612         mask = RI_HEAD;
5613         goto do_class;
5614       case ALPHA:
5615       case ALPHA + ADD_NL:
5616         testval = mask = RI_ALPHA;
5617         goto do_class;
5618       case NALPHA:
5619       case NALPHA + ADD_NL:
5620         mask = RI_ALPHA;
5621         goto do_class;
5622       case LOWER:
5623       case LOWER + ADD_NL:
5624         testval = mask = RI_LOWER;
5625         goto do_class;
5626       case NLOWER:
5627       case NLOWER + ADD_NL:
5628         mask = RI_LOWER;
5629         goto do_class;
5630       case UPPER:
5631       case UPPER + ADD_NL:
5632         testval = mask = RI_UPPER;
5633         goto do_class;
5634       case NUPPER:
5635       case NUPPER + ADD_NL:
5636         mask = RI_UPPER;
5637         goto do_class;
5638
5639       case EXACTLY:
5640         {
5641             int     cu, cl;
5642
5643             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5644              * would have been used for it.  It does handle single-byte
5645              * characters, such as latin1. */
5646             if (ireg_ic)
5647             {
5648                 cu = MB_TOUPPER(*opnd);
5649                 cl = MB_TOLOWER(*opnd);
5650                 while (count < maxcount && (*scan == cu || *scan == cl))
5651                 {
5652                     count++;
5653                     scan++;
5654                 }
5655             }
5656             else
5657             {
5658                 cu = *opnd;
5659                 while (count < maxcount && *scan == cu)
5660                 {
5661                     count++;
5662                     scan++;
5663                 }
5664             }
5665             break;
5666         }
5667
5668 #ifdef FEAT_MBYTE
5669       case MULTIBYTECODE:
5670         {
5671             int         i, len, cf = 0;
5672
5673             /* Safety check (just in case 'encoding' was changed since
5674              * compiling the program). */
5675             if ((len = (*mb_ptr2len)(opnd)) > 1)
5676             {
5677                 if (ireg_ic && enc_utf8)
5678                     cf = utf_fold(utf_ptr2char(opnd));
5679                 while (count < maxcount)
5680                 {
5681                     for (i = 0; i < len; ++i)
5682                         if (opnd[i] != scan[i])
5683                             break;
5684                     if (i < len && (!ireg_ic || !enc_utf8
5685                                         || utf_fold(utf_ptr2char(scan)) != cf))
5686                         break;
5687                     scan += len;
5688                     ++count;
5689                 }
5690             }
5691         }
5692         break;
5693 #endif
5694
5695       case ANYOF:
5696       case ANYOF + ADD_NL:
5697         testval = TRUE;
5698         /*FALLTHROUGH*/
5699
5700       case ANYBUT:
5701       case ANYBUT + ADD_NL:
5702         while (count < maxcount)
5703         {
5704 #ifdef FEAT_MBYTE
5705             int len;
5706 #endif
5707             if (*scan == NUL)
5708             {
5709                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5710                                                               || reg_line_lbr)
5711                     break;
5712                 reg_nextline();
5713                 scan = reginput;
5714                 if (got_int)
5715                     break;
5716             }
5717             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5718                 ++scan;
5719 #ifdef FEAT_MBYTE
5720             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5721             {
5722                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5723                     break;
5724                 scan += len;
5725             }
5726 #endif
5727             else
5728             {
5729                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5730                     break;
5731                 ++scan;
5732             }
5733             ++count;
5734         }
5735         break;
5736
5737       case NEWL:
5738         while (count < maxcount
5739                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5740                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5741         {
5742             count++;
5743             if (reg_line_lbr)
5744                 ADVANCE_REGINPUT();
5745             else
5746                 reg_nextline();
5747             scan = reginput;
5748             if (got_int)
5749                 break;
5750         }
5751         break;
5752
5753       default:                  /* Oh dear.  Called inappropriately. */
5754         EMSG(_(e_re_corr));
5755 #ifdef DEBUG
5756         printf("Called regrepeat with op code %d\n", OP(p));
5757 #endif
5758         break;
5759     }
5760
5761     reginput = scan;
5762
5763     return (int)count;
5764 }
5765
5766 /*
5767  * regnext - dig the "next" pointer out of a node
5768  */
5769     static char_u *
5770 regnext(p)
5771     char_u  *p;
5772 {
5773     int     offset;
5774
5775     if (p == JUST_CALC_SIZE)
5776         return NULL;
5777
5778     offset = NEXT(p);
5779     if (offset == 0)
5780         return NULL;
5781
5782     if (OP(p) == BACK)
5783         return p - offset;
5784     else
5785         return p + offset;
5786 }
5787
5788 /*
5789  * Check the regexp program for its magic number.
5790  * Return TRUE if it's wrong.
5791  */
5792     static int
5793 prog_magic_wrong()
5794 {
5795     if (UCHARAT(REG_MULTI
5796                 ? reg_mmatch->regprog->program
5797                 : reg_match->regprog->program) != REGMAGIC)
5798     {
5799         EMSG(_(e_re_corr));
5800         return TRUE;
5801     }
5802     return FALSE;
5803 }
5804
5805 /*
5806  * Cleanup the subexpressions, if this wasn't done yet.
5807  * This construction is used to clear the subexpressions only when they are
5808  * used (to increase speed).
5809  */
5810     static void
5811 cleanup_subexpr()
5812 {
5813     if (need_clear_subexpr)
5814     {
5815         if (REG_MULTI)
5816         {
5817             /* Use 0xff to set lnum to -1 */
5818             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5819             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5820         }
5821         else
5822         {
5823             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5824             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5825         }
5826         need_clear_subexpr = FALSE;
5827     }
5828 }
5829
5830 #ifdef FEAT_SYN_HL
5831     static void
5832 cleanup_zsubexpr()
5833 {
5834     if (need_clear_zsubexpr)
5835     {
5836         if (REG_MULTI)
5837         {
5838             /* Use 0xff to set lnum to -1 */
5839             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5840             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5841         }
5842         else
5843         {
5844             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5845             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5846         }
5847         need_clear_zsubexpr = FALSE;
5848     }
5849 }
5850 #endif
5851
5852 /*
5853  * Save the current subexpr to "bp", so that they can be restored
5854  * later by restore_subexpr().
5855  */
5856     static void
5857 save_subexpr(bp)
5858     regbehind_T *bp;
5859 {
5860     int i;
5861
5862     /* When "need_clear_subexpr" is set we don't need to save the values, only
5863      * remember that this flag needs to be set again when restoring. */
5864     bp->save_need_clear_subexpr = need_clear_subexpr;
5865     if (!need_clear_subexpr)
5866     {
5867         for (i = 0; i < NSUBEXP; ++i)
5868         {
5869             if (REG_MULTI)
5870             {
5871                 bp->save_start[i].se_u.pos = reg_startpos[i];
5872                 bp->save_end[i].se_u.pos = reg_endpos[i];
5873             }
5874             else
5875             {
5876                 bp->save_start[i].se_u.ptr = reg_startp[i];
5877                 bp->save_end[i].se_u.ptr = reg_endp[i];
5878             }
5879         }
5880     }
5881 }
5882
5883 /*
5884  * Restore the subexpr from "bp".
5885  */
5886     static void
5887 restore_subexpr(bp)
5888     regbehind_T *bp;
5889 {
5890     int i;
5891
5892     /* Only need to restore saved values when they are not to be cleared. */
5893     need_clear_subexpr = bp->save_need_clear_subexpr;
5894     if (!need_clear_subexpr)
5895     {
5896         for (i = 0; i < NSUBEXP; ++i)
5897         {
5898             if (REG_MULTI)
5899             {
5900                 reg_startpos[i] = bp->save_start[i].se_u.pos;
5901                 reg_endpos[i] = bp->save_end[i].se_u.pos;
5902             }
5903             else
5904             {
5905                 reg_startp[i] = bp->save_start[i].se_u.ptr;
5906                 reg_endp[i] = bp->save_end[i].se_u.ptr;
5907             }
5908         }
5909     }
5910 }
5911
5912 /*
5913  * Advance reglnum, regline and reginput to the next line.
5914  */
5915     static void
5916 reg_nextline()
5917 {
5918     regline = reg_getline(++reglnum);
5919     reginput = regline;
5920     fast_breakcheck();
5921 }
5922
5923 /*
5924  * Save the input line and position in a regsave_T.
5925  */
5926     static void
5927 reg_save(save, gap)
5928     regsave_T   *save;
5929     garray_T    *gap;
5930 {
5931     if (REG_MULTI)
5932     {
5933         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5934         save->rs_u.pos.lnum = reglnum;
5935     }
5936     else
5937         save->rs_u.ptr = reginput;
5938     save->rs_len = gap->ga_len;
5939 }
5940
5941 /*
5942  * Restore the input line and position from a regsave_T.
5943  */
5944     static void
5945 reg_restore(save, gap)
5946     regsave_T   *save;
5947     garray_T    *gap;
5948 {
5949     if (REG_MULTI)
5950     {
5951         if (reglnum != save->rs_u.pos.lnum)
5952         {
5953             /* only call reg_getline() when the line number changed to save
5954              * a bit of time */
5955             reglnum = save->rs_u.pos.lnum;
5956             regline = reg_getline(reglnum);
5957         }
5958         reginput = regline + save->rs_u.pos.col;
5959     }
5960     else
5961         reginput = save->rs_u.ptr;
5962     gap->ga_len = save->rs_len;
5963 }
5964
5965 /*
5966  * Return TRUE if current position is equal to saved position.
5967  */
5968     static int
5969 reg_save_equal(save)
5970     regsave_T   *save;
5971 {
5972     if (REG_MULTI)
5973         return reglnum == save->rs_u.pos.lnum
5974                                   && reginput == regline + save->rs_u.pos.col;
5975     return reginput == save->rs_u.ptr;
5976 }
5977
5978 /*
5979  * Tentatively set the sub-expression start to the current position (after
5980  * calling regmatch() they will have changed).  Need to save the existing
5981  * values for when there is no match.
5982  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5983  * depending on REG_MULTI.
5984  */
5985     static void
5986 save_se_multi(savep, posp)
5987     save_se_T   *savep;
5988     lpos_T      *posp;
5989 {
5990     savep->se_u.pos = *posp;
5991     posp->lnum = reglnum;
5992     posp->col = (colnr_T)(reginput - regline);
5993 }
5994
5995     static void
5996 save_se_one(savep, pp)
5997     save_se_T   *savep;
5998     char_u      **pp;
5999 {
6000     savep->se_u.ptr = *pp;
6001     *pp = reginput;
6002 }
6003
6004 /*
6005  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6006  */
6007     static int
6008 re_num_cmp(val, scan)
6009     long_u      val;
6010     char_u      *scan;
6011 {
6012     long_u  n = OPERAND_MIN(scan);
6013
6014     if (OPERAND_CMP(scan) == '>')
6015         return val > n;
6016     if (OPERAND_CMP(scan) == '<')
6017         return val < n;
6018     return val == n;
6019 }
6020
6021
6022 #ifdef DEBUG
6023
6024 /*
6025  * regdump - dump a regexp onto stdout in vaguely comprehensible form
6026  */
6027     static void
6028 regdump(pattern, r)
6029     char_u      *pattern;
6030     regprog_T   *r;
6031 {
6032     char_u  *s;
6033     int     op = EXACTLY;       /* Arbitrary non-END op. */
6034     char_u  *next;
6035     char_u  *end = NULL;
6036
6037     printf("\r\nregcomp(%s):\r\n", pattern);
6038
6039     s = r->program + 1;
6040     /*
6041      * Loop until we find the END that isn't before a referred next (an END
6042      * can also appear in a NOMATCH operand).
6043      */
6044     while (op != END || s <= end)
6045     {
6046         op = OP(s);
6047         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
6048         next = regnext(s);
6049         if (next == NULL)       /* Next ptr. */
6050             printf("(0)");
6051         else
6052             printf("(%d)", (int)((s - r->program) + (next - s)));
6053         if (end < next)
6054             end = next;
6055         if (op == BRACE_LIMITS)
6056         {
6057             /* Two short ints */
6058             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
6059             s += 8;
6060         }
6061         s += 3;
6062         if (op == ANYOF || op == ANYOF + ADD_NL
6063                 || op == ANYBUT || op == ANYBUT + ADD_NL
6064                 || op == EXACTLY)
6065         {
6066             /* Literal string, where present. */
6067             while (*s != NUL)
6068                 printf("%c", *s++);
6069             s++;
6070         }
6071         printf("\r\n");
6072     }
6073
6074     /* Header fields of interest. */
6075     if (r->regstart != NUL)
6076         printf("start `%s' 0x%x; ", r->regstart < 256
6077                 ? (char *)transchar(r->regstart)
6078                 : "multibyte", r->regstart);
6079     if (r->reganch)
6080         printf("anchored; ");
6081     if (r->regmust != NULL)
6082         printf("must have \"%s\"", r->regmust);
6083     printf("\r\n");
6084 }
6085
6086 /*
6087  * regprop - printable representation of opcode
6088  */
6089     static char_u *
6090 regprop(op)
6091     char_u         *op;
6092 {
6093     char_u          *p;
6094     static char_u   buf[50];
6095
6096     (void) strcpy(buf, ":");
6097
6098     switch (OP(op))
6099     {
6100       case BOL:
6101         p = "BOL";
6102         break;
6103       case EOL:
6104         p = "EOL";
6105         break;
6106       case RE_BOF:
6107         p = "BOF";
6108         break;
6109       case RE_EOF:
6110         p = "EOF";
6111         break;
6112       case CURSOR:
6113         p = "CURSOR";
6114         break;
6115       case RE_VISUAL:
6116         p = "RE_VISUAL";
6117         break;
6118       case RE_LNUM:
6119         p = "RE_LNUM";
6120         break;
6121       case RE_MARK:
6122         p = "RE_MARK";
6123         break;
6124       case RE_COL:
6125         p = "RE_COL";
6126         break;
6127       case RE_VCOL:
6128         p = "RE_VCOL";
6129         break;
6130       case BOW:
6131         p = "BOW";
6132         break;
6133       case EOW:
6134         p = "EOW";
6135         break;
6136       case ANY:
6137         p = "ANY";
6138         break;
6139       case ANY + ADD_NL:
6140         p = "ANY+NL";
6141         break;
6142       case ANYOF:
6143         p = "ANYOF";
6144         break;
6145       case ANYOF + ADD_NL:
6146         p = "ANYOF+NL";
6147         break;
6148       case ANYBUT:
6149         p = "ANYBUT";
6150         break;
6151       case ANYBUT + ADD_NL:
6152         p = "ANYBUT+NL";
6153         break;
6154       case IDENT:
6155         p = "IDENT";
6156         break;
6157       case IDENT + ADD_NL:
6158         p = "IDENT+NL";
6159         break;
6160       case SIDENT:
6161         p = "SIDENT";
6162         break;
6163       case SIDENT + ADD_NL:
6164         p = "SIDENT+NL";
6165         break;
6166       case KWORD:
6167         p = "KWORD";
6168         break;
6169       case KWORD + ADD_NL:
6170         p = "KWORD+NL";
6171         break;
6172       case SKWORD:
6173         p = "SKWORD";
6174         break;
6175       case SKWORD + ADD_NL:
6176         p = "SKWORD+NL";
6177         break;
6178       case FNAME:
6179         p = "FNAME";
6180         break;
6181       case FNAME + ADD_NL:
6182         p = "FNAME+NL";
6183         break;
6184       case SFNAME:
6185         p = "SFNAME";
6186         break;
6187       case SFNAME + ADD_NL:
6188         p = "SFNAME+NL";
6189         break;
6190       case PRINT:
6191         p = "PRINT";
6192         break;
6193       case PRINT + ADD_NL:
6194         p = "PRINT+NL";
6195         break;
6196       case SPRINT:
6197         p = "SPRINT";
6198         break;
6199       case SPRINT + ADD_NL:
6200         p = "SPRINT+NL";
6201         break;
6202       case WHITE:
6203         p = "WHITE";
6204         break;
6205       case WHITE + ADD_NL:
6206         p = "WHITE+NL";
6207         break;
6208       case NWHITE:
6209         p = "NWHITE";
6210         break;
6211       case NWHITE + ADD_NL:
6212         p = "NWHITE+NL";
6213         break;
6214       case DIGIT:
6215         p = "DIGIT";
6216         break;
6217       case DIGIT + ADD_NL:
6218         p = "DIGIT+NL";
6219         break;
6220       case NDIGIT:
6221         p = "NDIGIT";
6222         break;
6223       case NDIGIT + ADD_NL:
6224         p = "NDIGIT+NL";
6225         break;
6226       case HEX:
6227         p = "HEX";
6228         break;
6229       case HEX + ADD_NL:
6230         p = "HEX+NL";
6231         break;
6232       case NHEX:
6233         p = "NHEX";
6234         break;
6235       case NHEX + ADD_NL:
6236         p = "NHEX+NL";
6237         break;
6238       case OCTAL:
6239         p = "OCTAL";
6240         break;
6241       case OCTAL + ADD_NL:
6242         p = "OCTAL+NL";
6243         break;
6244       case NOCTAL:
6245         p = "NOCTAL";
6246         break;
6247       case NOCTAL + ADD_NL:
6248         p = "NOCTAL+NL";
6249         break;
6250       case WORD:
6251         p = "WORD";
6252         break;
6253       case WORD + ADD_NL:
6254         p = "WORD+NL";
6255         break;
6256       case NWORD:
6257         p = "NWORD";
6258         break;
6259       case NWORD + ADD_NL:
6260         p = "NWORD+NL";
6261         break;
6262       case HEAD:
6263         p = "HEAD";
6264         break;
6265       case HEAD + ADD_NL:
6266         p = "HEAD+NL";
6267         break;
6268       case NHEAD:
6269         p = "NHEAD";
6270         break;
6271       case NHEAD + ADD_NL:
6272         p = "NHEAD+NL";
6273         break;
6274       case ALPHA:
6275         p = "ALPHA";
6276         break;
6277       case ALPHA + ADD_NL:
6278         p = "ALPHA+NL";
6279         break;
6280       case NALPHA:
6281         p = "NALPHA";
6282         break;
6283       case NALPHA + ADD_NL:
6284         p = "NALPHA+NL";
6285         break;
6286       case LOWER:
6287         p = "LOWER";
6288         break;
6289       case LOWER + ADD_NL:
6290         p = "LOWER+NL";
6291         break;
6292       case NLOWER:
6293         p = "NLOWER";
6294         break;
6295       case NLOWER + ADD_NL:
6296         p = "NLOWER+NL";
6297         break;
6298       case UPPER:
6299         p = "UPPER";
6300         break;
6301       case UPPER + ADD_NL:
6302         p = "UPPER+NL";
6303         break;
6304       case NUPPER:
6305         p = "NUPPER";
6306         break;
6307       case NUPPER + ADD_NL:
6308         p = "NUPPER+NL";
6309         break;
6310       case BRANCH:
6311         p = "BRANCH";
6312         break;
6313       case EXACTLY:
6314         p = "EXACTLY";
6315         break;
6316       case NOTHING:
6317         p = "NOTHING";
6318         break;
6319       case BACK:
6320         p = "BACK";
6321         break;
6322       case END:
6323         p = "END";
6324         break;
6325       case MOPEN + 0:
6326         p = "MATCH START";
6327         break;
6328       case MOPEN + 1:
6329       case MOPEN + 2:
6330       case MOPEN + 3:
6331       case MOPEN + 4:
6332       case MOPEN + 5:
6333       case MOPEN + 6:
6334       case MOPEN + 7:
6335       case MOPEN + 8:
6336       case MOPEN + 9:
6337         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6338         p = NULL;
6339         break;
6340       case MCLOSE + 0:
6341         p = "MATCH END";
6342         break;
6343       case MCLOSE + 1:
6344       case MCLOSE + 2:
6345       case MCLOSE + 3:
6346       case MCLOSE + 4:
6347       case MCLOSE + 5:
6348       case MCLOSE + 6:
6349       case MCLOSE + 7:
6350       case MCLOSE + 8:
6351       case MCLOSE + 9:
6352         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6353         p = NULL;
6354         break;
6355       case BACKREF + 1:
6356       case BACKREF + 2:
6357       case BACKREF + 3:
6358       case BACKREF + 4:
6359       case BACKREF + 5:
6360       case BACKREF + 6:
6361       case BACKREF + 7:
6362       case BACKREF + 8:
6363       case BACKREF + 9:
6364         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6365         p = NULL;
6366         break;
6367       case NOPEN:
6368         p = "NOPEN";
6369         break;
6370       case NCLOSE:
6371         p = "NCLOSE";
6372         break;
6373 #ifdef FEAT_SYN_HL
6374       case ZOPEN + 1:
6375       case ZOPEN + 2:
6376       case ZOPEN + 3:
6377       case ZOPEN + 4:
6378       case ZOPEN + 5:
6379       case ZOPEN + 6:
6380       case ZOPEN + 7:
6381       case ZOPEN + 8:
6382       case ZOPEN + 9:
6383         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6384         p = NULL;
6385         break;
6386       case ZCLOSE + 1:
6387       case ZCLOSE + 2:
6388       case ZCLOSE + 3:
6389       case ZCLOSE + 4:
6390       case ZCLOSE + 5:
6391       case ZCLOSE + 6:
6392       case ZCLOSE + 7:
6393       case ZCLOSE + 8:
6394       case ZCLOSE + 9:
6395         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6396         p = NULL;
6397         break;
6398       case ZREF + 1:
6399       case ZREF + 2:
6400       case ZREF + 3:
6401       case ZREF + 4:
6402       case ZREF + 5:
6403       case ZREF + 6:
6404       case ZREF + 7:
6405       case ZREF + 8:
6406       case ZREF + 9:
6407         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6408         p = NULL;
6409         break;
6410 #endif
6411       case STAR:
6412         p = "STAR";
6413         break;
6414       case PLUS:
6415         p = "PLUS";
6416         break;
6417       case NOMATCH:
6418         p = "NOMATCH";
6419         break;
6420       case MATCH:
6421         p = "MATCH";
6422         break;
6423       case BEHIND:
6424         p = "BEHIND";
6425         break;
6426       case NOBEHIND:
6427         p = "NOBEHIND";
6428         break;
6429       case SUBPAT:
6430         p = "SUBPAT";
6431         break;
6432       case BRACE_LIMITS:
6433         p = "BRACE_LIMITS";
6434         break;
6435       case BRACE_SIMPLE:
6436         p = "BRACE_SIMPLE";
6437         break;
6438       case BRACE_COMPLEX + 0:
6439       case BRACE_COMPLEX + 1:
6440       case BRACE_COMPLEX + 2:
6441       case BRACE_COMPLEX + 3:
6442       case BRACE_COMPLEX + 4:
6443       case BRACE_COMPLEX + 5:
6444       case BRACE_COMPLEX + 6:
6445       case BRACE_COMPLEX + 7:
6446       case BRACE_COMPLEX + 8:
6447       case BRACE_COMPLEX + 9:
6448         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6449         p = NULL;
6450         break;
6451 #ifdef FEAT_MBYTE
6452       case MULTIBYTECODE:
6453         p = "MULTIBYTECODE";
6454         break;
6455 #endif
6456       case NEWL:
6457         p = "NEWL";
6458         break;
6459       default:
6460         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6461         p = NULL;
6462         break;
6463     }
6464     if (p != NULL)
6465         (void) strcat(buf, p);
6466     return buf;
6467 }
6468 #endif
6469
6470 #ifdef FEAT_MBYTE
6471 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6472
6473 typedef struct
6474 {
6475     int a, b, c;
6476 } decomp_T;
6477
6478
6479 /* 0xfb20 - 0xfb4f */
6480 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6481 {
6482     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6483     {0x5d0,0,0},                /* 0xfb21       alt alef */
6484     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6485     {0x5d4,0,0},                /* 0xfb23       alt he */
6486     {0x5db,0,0},                /* 0xfb24       alt kaf */
6487     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6488     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6489     {0x5e8,0,0},                /* 0xfb27       alt resh */
6490     {0x5ea,0,0},                /* 0xfb28       alt tav */
6491     {'+', 0, 0},                /* 0xfb29       alt plus */
6492     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6493     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6494     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6495     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6496     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6497     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6498     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6499     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6500     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6501     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6502     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6503     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6504     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6505     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6506     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6507     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6508     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6509     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6510     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6511     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6512     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6513     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6514     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6515     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6516     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6517     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6518     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6519     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6520     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6521     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6522     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6523     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6524     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6525     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6526     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6527     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6528     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6529     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6530 };
6531
6532     static void
6533 mb_decompose(c, c1, c2, c3)
6534     int c, *c1, *c2, *c3;
6535 {
6536     decomp_T d;
6537
6538     if (c >= 0x4b20 && c <= 0xfb4f)
6539     {
6540         d = decomp_table[c - 0xfb20];
6541         *c1 = d.a;
6542         *c2 = d.b;
6543         *c3 = d.c;
6544     }
6545     else
6546     {
6547         *c1 = c;
6548         *c2 = *c3 = 0;
6549     }
6550 }
6551 #endif
6552
6553 /*
6554  * Compare two strings, ignore case if ireg_ic set.
6555  * Return 0 if strings match, non-zero otherwise.
6556  * Correct the length "*n" when composing characters are ignored.
6557  */
6558     static int
6559 cstrncmp(s1, s2, n)
6560     char_u      *s1, *s2;
6561     int         *n;
6562 {
6563     int         result;
6564
6565     if (!ireg_ic)
6566         result = STRNCMP(s1, s2, *n);
6567     else
6568         result = MB_STRNICMP(s1, s2, *n);
6569
6570 #ifdef FEAT_MBYTE
6571     /* if it failed and it's utf8 and we want to combineignore: */
6572     if (result != 0 && enc_utf8 && ireg_icombine)
6573     {
6574         char_u  *str1, *str2;
6575         int     c1, c2, c11, c12;
6576         int     junk;
6577
6578         /* we have to handle the strcmp ourselves, since it is necessary to
6579          * deal with the composing characters by ignoring them: */
6580         str1 = s1;
6581         str2 = s2;
6582         c1 = c2 = 0;
6583         while ((int)(str1 - s1) < *n)
6584         {
6585             c1 = mb_ptr2char_adv(&str1);
6586             c2 = mb_ptr2char_adv(&str2);
6587
6588             /* decompose the character if necessary, into 'base' characters
6589              * because I don't care about Arabic, I will hard-code the Hebrew
6590              * which I *do* care about!  So sue me... */
6591             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6592             {
6593                 /* decomposition necessary? */
6594                 mb_decompose(c1, &c11, &junk, &junk);
6595                 mb_decompose(c2, &c12, &junk, &junk);
6596                 c1 = c11;
6597                 c2 = c12;
6598                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6599                     break;
6600             }
6601         }
6602         result = c2 - c1;
6603         if (result == 0)
6604             *n = (int)(str2 - s2);
6605     }
6606 #endif
6607
6608     return result;
6609 }
6610
6611 /*
6612  * cstrchr: This function is used a lot for simple searches, keep it fast!
6613  */
6614     static char_u *
6615 cstrchr(s, c)
6616     char_u      *s;
6617     int         c;
6618 {
6619     char_u      *p;
6620     int         cc;
6621
6622     if (!ireg_ic
6623 #ifdef FEAT_MBYTE
6624             || (!enc_utf8 && mb_char2len(c) > 1)
6625 #endif
6626             )
6627         return vim_strchr(s, c);
6628
6629     /* tolower() and toupper() can be slow, comparing twice should be a lot
6630      * faster (esp. when using MS Visual C++!).
6631      * For UTF-8 need to use folded case. */
6632 #ifdef FEAT_MBYTE
6633     if (enc_utf8 && c > 0x80)
6634         cc = utf_fold(c);
6635     else
6636 #endif
6637          if (MB_ISUPPER(c))
6638         cc = MB_TOLOWER(c);
6639     else if (MB_ISLOWER(c))
6640         cc = MB_TOUPPER(c);
6641     else
6642         return vim_strchr(s, c);
6643
6644 #ifdef FEAT_MBYTE
6645     if (has_mbyte)
6646     {
6647         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6648         {
6649             if (enc_utf8 && c > 0x80)
6650             {
6651                 if (utf_fold(utf_ptr2char(p)) == cc)
6652                     return p;
6653             }
6654             else if (*p == c || *p == cc)
6655                 return p;
6656         }
6657     }
6658     else
6659 #endif
6660         /* Faster version for when there are no multi-byte characters. */
6661         for (p = s; *p != NUL; ++p)
6662             if (*p == c || *p == cc)
6663                 return p;
6664
6665     return NULL;
6666 }
6667
6668 /***************************************************************
6669  *                    regsub stuff                             *
6670  ***************************************************************/
6671
6672 /* This stuff below really confuses cc on an SGI -- webb */
6673 #ifdef __sgi
6674 # undef __ARGS
6675 # define __ARGS(x)  ()
6676 #endif
6677
6678 /*
6679  * We should define ftpr as a pointer to a function returning a pointer to
6680  * a function returning a pointer to a function ...
6681  * This is impossible, so we declare a pointer to a function returning a
6682  * pointer to a function returning void. This should work for all compilers.
6683  */
6684 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6685
6686 static fptr_T do_upper __ARGS((int *, int));
6687 static fptr_T do_Upper __ARGS((int *, int));
6688 static fptr_T do_lower __ARGS((int *, int));
6689 static fptr_T do_Lower __ARGS((int *, int));
6690
6691 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6692
6693     static fptr_T
6694 do_upper(d, c)
6695     int         *d;
6696     int         c;
6697 {
6698     *d = MB_TOUPPER(c);
6699
6700     return (fptr_T)NULL;
6701 }
6702
6703     static fptr_T
6704 do_Upper(d, c)
6705     int         *d;
6706     int         c;
6707 {
6708     *d = MB_TOUPPER(c);
6709
6710     return (fptr_T)do_Upper;
6711 }
6712
6713     static fptr_T
6714 do_lower(d, c)
6715     int         *d;
6716     int         c;
6717 {
6718     *d = MB_TOLOWER(c);
6719
6720     return (fptr_T)NULL;
6721 }
6722
6723     static fptr_T
6724 do_Lower(d, c)
6725     int         *d;
6726     int         c;
6727 {
6728     *d = MB_TOLOWER(c);
6729
6730     return (fptr_T)do_Lower;
6731 }
6732
6733 /*
6734  * regtilde(): Replace tildes in the pattern by the old pattern.
6735  *
6736  * Short explanation of the tilde: It stands for the previous replacement
6737  * pattern.  If that previous pattern also contains a ~ we should go back a
6738  * step further...  But we insert the previous pattern into the current one
6739  * and remember that.
6740  * This still does not handle the case where "magic" changes.  So require the
6741  * user to keep his hands off of "magic".
6742  *
6743  * The tildes are parsed once before the first call to vim_regsub().
6744  */
6745     char_u *
6746 regtilde(source, magic)
6747     char_u      *source;
6748     int         magic;
6749 {
6750     char_u      *newsub = source;
6751     char_u      *tmpsub;
6752     char_u      *p;
6753     int         len;
6754     int         prevlen;
6755
6756     for (p = newsub; *p; ++p)
6757     {
6758         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6759         {
6760             if (reg_prev_sub != NULL)
6761             {
6762                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6763                 prevlen = (int)STRLEN(reg_prev_sub);
6764                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6765                 if (tmpsub != NULL)
6766                 {
6767                     /* copy prefix */
6768                     len = (int)(p - newsub);    /* not including ~ */
6769                     mch_memmove(tmpsub, newsub, (size_t)len);
6770                     /* interpret tilde */
6771                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6772                     /* copy postfix */
6773                     if (!magic)
6774                         ++p;                    /* back off \ */
6775                     STRCPY(tmpsub + len + prevlen, p + 1);
6776
6777                     if (newsub != source)       /* already allocated newsub */
6778                         vim_free(newsub);
6779                     newsub = tmpsub;
6780                     p = newsub + len + prevlen;
6781                 }
6782             }
6783             else if (magic)
6784                 mch_memmove(p, p + 1, STRLEN(p));       /* remove '~' */
6785             else
6786                 mch_memmove(p, p + 2, STRLEN(p) - 1);   /* remove '\~' */
6787             --p;
6788         }
6789         else
6790         {
6791             if (*p == '\\' && p[1])             /* skip escaped characters */
6792                 ++p;
6793 #ifdef FEAT_MBYTE
6794             if (has_mbyte)
6795                 p += (*mb_ptr2len)(p) - 1;
6796 #endif
6797         }
6798     }
6799
6800     vim_free(reg_prev_sub);
6801     if (newsub != source)       /* newsub was allocated, just keep it */
6802         reg_prev_sub = newsub;
6803     else                        /* no ~ found, need to save newsub  */
6804         reg_prev_sub = vim_strsave(newsub);
6805     return newsub;
6806 }
6807
6808 #ifdef FEAT_EVAL
6809 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6810
6811 /* These pointers are used instead of reg_match and reg_mmatch for
6812  * reg_submatch().  Needed for when the substitution string is an expression
6813  * that contains a call to substitute() and submatch(). */
6814 static regmatch_T       *submatch_match;
6815 static regmmatch_T      *submatch_mmatch;
6816 #endif
6817
6818 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6819 /*
6820  * vim_regsub() - perform substitutions after a vim_regexec() or
6821  * vim_regexec_multi() match.
6822  *
6823  * If "copy" is TRUE really copy into "dest".
6824  * If "copy" is FALSE nothing is copied, this is just to find out the length
6825  * of the result.
6826  *
6827  * If "backslash" is TRUE, a backslash will be removed later, need to double
6828  * them to keep them, and insert a backslash before a CR to avoid it being
6829  * replaced with a line break later.
6830  *
6831  * Note: The matched text must not change between the call of
6832  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6833  * references invalid!
6834  *
6835  * Returns the size of the replacement, including terminating NUL.
6836  */
6837     int
6838 vim_regsub(rmp, source, dest, copy, magic, backslash)
6839     regmatch_T  *rmp;
6840     char_u      *source;
6841     char_u      *dest;
6842     int         copy;
6843     int         magic;
6844     int         backslash;
6845 {
6846     reg_match = rmp;
6847     reg_mmatch = NULL;
6848     reg_maxline = 0;
6849     return vim_regsub_both(source, dest, copy, magic, backslash);
6850 }
6851 #endif
6852
6853     int
6854 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6855     regmmatch_T *rmp;
6856     linenr_T    lnum;
6857     char_u      *source;
6858     char_u      *dest;
6859     int         copy;
6860     int         magic;
6861     int         backslash;
6862 {
6863     reg_match = NULL;
6864     reg_mmatch = rmp;
6865     reg_buf = curbuf;           /* always works on the current buffer! */
6866     reg_firstlnum = lnum;
6867     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6868     return vim_regsub_both(source, dest, copy, magic, backslash);
6869 }
6870
6871     static int
6872 vim_regsub_both(source, dest, copy, magic, backslash)
6873     char_u      *source;
6874     char_u      *dest;
6875     int         copy;
6876     int         magic;
6877     int         backslash;
6878 {
6879     char_u      *src;
6880     char_u      *dst;
6881     char_u      *s;
6882     int         c;
6883     int         cc;
6884     int         no = -1;
6885     fptr_T      func = (fptr_T)NULL;
6886     linenr_T    clnum = 0;      /* init for GCC */
6887     int         len = 0;        /* init for GCC */
6888 #ifdef FEAT_EVAL
6889     static char_u *eval_result = NULL;
6890 #endif
6891
6892     /* Be paranoid... */
6893     if (source == NULL || dest == NULL)
6894     {
6895         EMSG(_(e_null));
6896         return 0;
6897     }
6898     if (prog_magic_wrong())
6899         return 0;
6900     src = source;
6901     dst = dest;
6902
6903     /*
6904      * When the substitute part starts with "\=" evaluate it as an expression.
6905      */
6906     if (source[0] == '\\' && source[1] == '='
6907 #ifdef FEAT_EVAL
6908             && !can_f_submatch      /* can't do this recursively */
6909 #endif
6910             )
6911     {
6912 #ifdef FEAT_EVAL
6913         /* To make sure that the length doesn't change between checking the
6914          * length and copying the string, and to speed up things, the
6915          * resulting string is saved from the call with "copy" == FALSE to the
6916          * call with "copy" == TRUE. */
6917         if (copy)
6918         {
6919             if (eval_result != NULL)
6920             {
6921                 STRCPY(dest, eval_result);
6922                 dst += STRLEN(eval_result);
6923                 vim_free(eval_result);
6924                 eval_result = NULL;
6925             }
6926         }
6927         else
6928         {
6929             linenr_T    save_reg_maxline;
6930             win_T       *save_reg_win;
6931             int         save_ireg_ic;
6932
6933             vim_free(eval_result);
6934
6935             /* The expression may contain substitute(), which calls us
6936              * recursively.  Make sure submatch() gets the text from the first
6937              * level.  Don't need to save "reg_buf", because
6938              * vim_regexec_multi() can't be called recursively. */
6939             submatch_match = reg_match;
6940             submatch_mmatch = reg_mmatch;
6941             save_reg_maxline = reg_maxline;
6942             save_reg_win = reg_win;
6943             save_ireg_ic = ireg_ic;
6944             can_f_submatch = TRUE;
6945
6946             eval_result = eval_to_string(source + 2, NULL, TRUE);
6947             if (eval_result != NULL)
6948             {
6949                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6950                 {
6951                     /* Change NL to CR, so that it becomes a line break.
6952                      * Skip over a backslashed character. */
6953                     if (*s == NL)
6954                         *s = CAR;
6955                     else if (*s == '\\' && s[1] != NUL)
6956                         ++s;
6957                 }
6958
6959                 dst += STRLEN(eval_result);
6960             }
6961
6962             reg_match = submatch_match;
6963             reg_mmatch = submatch_mmatch;
6964             reg_maxline = save_reg_maxline;
6965             reg_win = save_reg_win;
6966             ireg_ic = save_ireg_ic;
6967             can_f_submatch = FALSE;
6968         }
6969 #endif
6970     }
6971     else
6972       while ((c = *src++) != NUL)
6973       {
6974         if (c == '&' && magic)
6975             no = 0;
6976         else if (c == '\\' && *src != NUL)
6977         {
6978             if (*src == '&' && !magic)
6979             {
6980                 ++src;
6981                 no = 0;
6982             }
6983             else if ('0' <= *src && *src <= '9')
6984             {
6985                 no = *src++ - '0';
6986             }
6987             else if (vim_strchr((char_u *)"uUlLeE", *src))
6988             {
6989                 switch (*src++)
6990                 {
6991                 case 'u':   func = (fptr_T)do_upper;
6992                             continue;
6993                 case 'U':   func = (fptr_T)do_Upper;
6994                             continue;
6995                 case 'l':   func = (fptr_T)do_lower;
6996                             continue;
6997                 case 'L':   func = (fptr_T)do_Lower;
6998                             continue;
6999                 case 'e':
7000                 case 'E':   func = (fptr_T)NULL;
7001                             continue;
7002                 }
7003             }
7004         }
7005         if (no < 0)           /* Ordinary character. */
7006         {
7007             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7008             {
7009                 /* Copy a special key as-is. */
7010                 if (copy)
7011                 {
7012                     *dst++ = c;
7013                     *dst++ = *src++;
7014                     *dst++ = *src++;
7015                 }
7016                 else
7017                 {
7018                     dst += 3;
7019                     src += 2;
7020                 }
7021                 continue;
7022             }
7023
7024             if (c == '\\' && *src != NUL)
7025             {
7026                 /* Check for abbreviations -- webb */
7027                 switch (*src)
7028                 {
7029                     case 'r':   c = CAR;        ++src;  break;
7030                     case 'n':   c = NL;         ++src;  break;
7031                     case 't':   c = TAB;        ++src;  break;
7032                  /* Oh no!  \e already has meaning in subst pat :-( */
7033                  /* case 'e':   c = ESC;        ++src;  break; */
7034                     case 'b':   c = Ctrl_H;     ++src;  break;
7035
7036                     /* If "backslash" is TRUE the backslash will be removed
7037                      * later.  Used to insert a literal CR. */
7038                     default:    if (backslash)
7039                                 {
7040                                     if (copy)
7041                                         *dst = '\\';
7042                                     ++dst;
7043                                 }
7044                                 c = *src++;
7045                 }
7046             }
7047 #ifdef FEAT_MBYTE
7048             else if (has_mbyte)
7049                 c = mb_ptr2char(src - 1);
7050 #endif
7051
7052             /* Write to buffer, if copy is set. */
7053             if (func == (fptr_T)NULL)   /* just copy */
7054                 cc = c;
7055             else
7056                 /* Turbo C complains without the typecast */
7057                 func = (fptr_T)(func(&cc, c));
7058
7059 #ifdef FEAT_MBYTE
7060             if (has_mbyte)
7061             {
7062                 src += mb_ptr2len(src - 1) - 1;
7063                 if (copy)
7064                     mb_char2bytes(cc, dst);
7065                 dst += mb_char2len(cc) - 1;
7066             }
7067             else
7068 #endif
7069                 if (copy)
7070                     *dst = cc;
7071             dst++;
7072         }
7073         else
7074         {
7075             if (REG_MULTI)
7076             {
7077                 clnum = reg_mmatch->startpos[no].lnum;
7078                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7079                     s = NULL;
7080                 else
7081                 {
7082                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7083                     if (reg_mmatch->endpos[no].lnum == clnum)
7084                         len = reg_mmatch->endpos[no].col
7085                                                - reg_mmatch->startpos[no].col;
7086                     else
7087                         len = (int)STRLEN(s);
7088                 }
7089             }
7090             else
7091             {
7092                 s = reg_match->startp[no];
7093                 if (reg_match->endp[no] == NULL)
7094                     s = NULL;
7095                 else
7096                     len = (int)(reg_match->endp[no] - s);
7097             }
7098             if (s != NULL)
7099             {
7100                 for (;;)
7101                 {
7102                     if (len == 0)
7103                     {
7104                         if (REG_MULTI)
7105                         {
7106                             if (reg_mmatch->endpos[no].lnum == clnum)
7107                                 break;
7108                             if (copy)
7109                                 *dst = CAR;
7110                             ++dst;
7111                             s = reg_getline(++clnum);
7112                             if (reg_mmatch->endpos[no].lnum == clnum)
7113                                 len = reg_mmatch->endpos[no].col;
7114                             else
7115                                 len = (int)STRLEN(s);
7116                         }
7117                         else
7118                             break;
7119                     }
7120                     else if (*s == NUL) /* we hit NUL. */
7121                     {
7122                         if (copy)
7123                             EMSG(_(e_re_damg));
7124                         goto exit;
7125                     }
7126                     else
7127                     {
7128                         if (backslash && (*s == CAR || *s == '\\'))
7129                         {
7130                             /*
7131                              * Insert a backslash in front of a CR, otherwise
7132                              * it will be replaced by a line break.
7133                              * Number of backslashes will be halved later,
7134                              * double them here.
7135                              */
7136                             if (copy)
7137                             {
7138                                 dst[0] = '\\';
7139                                 dst[1] = *s;
7140                             }
7141                             dst += 2;
7142                         }
7143                         else
7144                         {
7145 #ifdef FEAT_MBYTE
7146                             if (has_mbyte)
7147                                 c = mb_ptr2char(s);
7148                             else
7149 #endif
7150                                 c = *s;
7151
7152                             if (func == (fptr_T)NULL)   /* just copy */
7153                                 cc = c;
7154                             else
7155                                 /* Turbo C complains without the typecast */
7156                                 func = (fptr_T)(func(&cc, c));
7157
7158 #ifdef FEAT_MBYTE
7159                             if (has_mbyte)
7160                             {
7161                                 int l;
7162
7163                                 /* Copy composing characters separately, one
7164                                  * at a time. */
7165                                 if (enc_utf8)
7166                                     l = utf_ptr2len(s) - 1;
7167                                 else
7168                                     l = mb_ptr2len(s) - 1;
7169
7170                                 s += l;
7171                                 len -= l;
7172                                 if (copy)
7173                                     mb_char2bytes(cc, dst);
7174                                 dst += mb_char2len(cc) - 1;
7175                             }
7176                             else
7177 #endif
7178                                 if (copy)
7179                                     *dst = cc;
7180                             dst++;
7181                         }
7182
7183                         ++s;
7184                         --len;
7185                     }
7186                 }
7187             }
7188             no = -1;
7189         }
7190       }
7191     if (copy)
7192         *dst = NUL;
7193
7194 exit:
7195     return (int)((dst - dest) + 1);
7196 }
7197
7198 #ifdef FEAT_EVAL
7199 /*
7200  * Used for the submatch() function: get the string from the n'th submatch in
7201  * allocated memory.
7202  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7203  */
7204     char_u *
7205 reg_submatch(no)
7206     int         no;
7207 {
7208     char_u      *retval = NULL;
7209     char_u      *s;
7210     int         len;
7211     int         round;
7212     linenr_T    lnum;
7213
7214     if (!can_f_submatch || no < 0)
7215         return NULL;
7216
7217     if (submatch_match == NULL)
7218     {
7219         /*
7220          * First round: compute the length and allocate memory.
7221          * Second round: copy the text.
7222          */
7223         for (round = 1; round <= 2; ++round)
7224         {
7225             lnum = submatch_mmatch->startpos[no].lnum;
7226             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7227                 return NULL;
7228
7229             s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
7230             if (s == NULL)  /* anti-crash check, cannot happen? */
7231                 break;
7232             if (submatch_mmatch->endpos[no].lnum == lnum)
7233             {
7234                 /* Within one line: take form start to end col. */
7235                 len = submatch_mmatch->endpos[no].col
7236                                           - submatch_mmatch->startpos[no].col;
7237                 if (round == 2)
7238                     vim_strncpy(retval, s, len);
7239                 ++len;
7240             }
7241             else
7242             {
7243                 /* Multiple lines: take start line from start col, middle
7244                  * lines completely and end line up to end col. */
7245                 len = (int)STRLEN(s);
7246                 if (round == 2)
7247                 {
7248                     STRCPY(retval, s);
7249                     retval[len] = '\n';
7250                 }
7251                 ++len;
7252                 ++lnum;
7253                 while (lnum < submatch_mmatch->endpos[no].lnum)
7254                 {
7255                     s = reg_getline(lnum++);
7256                     if (round == 2)
7257                         STRCPY(retval + len, s);
7258                     len += (int)STRLEN(s);
7259                     if (round == 2)
7260                         retval[len] = '\n';
7261                     ++len;
7262                 }
7263                 if (round == 2)
7264                     STRNCPY(retval + len, reg_getline(lnum),
7265                                              submatch_mmatch->endpos[no].col);
7266                 len += submatch_mmatch->endpos[no].col;
7267                 if (round == 2)
7268                     retval[len] = NUL;
7269                 ++len;
7270             }
7271
7272             if (retval == NULL)
7273             {
7274                 retval = lalloc((long_u)len, TRUE);
7275                 if (retval == NULL)
7276                     return NULL;
7277             }
7278         }
7279     }
7280     else
7281     {
7282         if (submatch_match->endp[no] == NULL)
7283             retval = NULL;
7284         else
7285         {
7286             s = submatch_match->startp[no];
7287             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7288         }
7289     }
7290
7291     return retval;
7292 }
7293 #endif