src/regexp.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
   4  *
   5  * NOTICE:
   6  *
   7  * This is NOT the original regular expression code as written by Henry
   8  * Spencer.  This code has been modified specifically for use with the VIM
   9  * editor, and should not be used separately from Vim.  If you want a good
  10  * regular expression library, get the original code.  The copyright notice
  11  * that follows is from the original.
  12  *
  13  * END NOTICE
  14  *
  15  *      Copyright (c) 1986 by University of Toronto.
  16  *      Written by Henry Spencer.  Not derived from licensed software.
  17  *
  18  *      Permission is granted to anyone to use this software for any
  19  *      purpose on any computer system, and to redistribute it freely,
  20  *      subject to the following restrictions:
  21  *
  22  *      1. The author is not responsible for the consequences of use of
  23  *              this software, no matter how awful, even if they arise
  24  *              from defects in it.
  25  *
  26  *      2. The origin of this software must not be misrepresented, either
  27  *              by explicit claim or by omission.
  28  *
  29  *      3. Altered versions must be plainly marked as such, and must not
  30  *              be misrepresented as being the original software.
  31  *
  32  * Beware that some of this code is subtly aware of the way operator
  33  * precedence is structured in regular expressions.  Serious changes in
  34  * regular-expression syntax might require a total rethink.
  35  *
  36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
  37  * Webb, Ciaran McCreesh and Bram Moolenaar.
  38  * Named character class support added by Walter Briscoe (1998 Jul 01)
  39  */
  40
  41 #include "vim.h"
  42
  43 #undef DEBUG
  44
  45 /*
  46  * The "internal use only" fields in regexp.h are present to pass info from
  47  * compile to execute that permits the execute phase to run lots faster on
  48  * simple cases.  They are:
  49  *
  50  * regstart     char that must begin a match; NUL if none obvious; Can be a
  51  *              multi-byte character.
  52  * reganch      is the match anchored (at beginning-of-line only)?
  53  * regmust      string (pointer into program) that match must include, or NULL
  54  * regmlen      length of regmust string
  55  * regflags     RF_ values or'ed together
  56  *
  57  * Regstart and reganch permit very fast decisions on suitable starting points
  58  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  59  * of lines that cannot possibly match.  The regmust tests are costly enough
  60  * that vim_regcomp() supplies a regmust only if the r.e. contains something
  61  * potentially expensive (at present, the only such thing detected is * or +
  62  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  63  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
  64  * computing it anyway.
  65  */
  66
  67 /*
  68  * Structure for regexp "program".  This is essentially a linear encoding
  69  * of a nondeterministic finite-state machine (aka syntax charts or
  70  * "railroad normal form" in parsing technology).  Each node is an opcode
  71  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  72  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
  73  * pointer with a BRANCH on both ends of it is connecting two alternatives.
  74  * (Here we have one of the subtle syntax dependencies: an individual BRANCH
  75  * (as opposed to a collection of them) is never concatenated with anything
  76  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
  77  * node points to the node after the stuff to be repeated.
  78  * The operand of some types of node is a literal string; for others, it is a
  79  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
  80  * is the first node of the branch.
  81  * (NB this is *not* a tree structure: the tail of the branch connects to the
  82  * thing following the set of BRANCHes.)
  83  *
  84  * pattern      is coded like:
  85  *
  86  *                        +-----------------+
  87  *                        |                 V
  88  * <aa>\|<bb>   BRANCH <aa> BRANCH <bb> --> END
  89  *                   |      ^    |          ^
  90  *                   +------+    +----------+
  91  *
  92  *
  93  *                     +------------------+
  94  *                     V                  |
  95  * <aa>*        BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
  96  *                   |      |               ^                      ^
  97  *                   |      +---------------+                      |
  98  *                   +---------------------------------------------+
  99  *
 100  *
 101  *                     +----------------------+
 102  *                     V                      |
 103  * <aa>\+       BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
 104  *                   |               |           ^                      ^
 105  *                   |               +-----------+                      |
 106  *                   +--------------------------------------------------+
 107  *
 108  *
 109  *                                      +-------------------------+
 110  *                                      V                         |
 111  * <aa>\{}      BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
 112  *                   |                              |                ^
 113  *                   |                              +----------------+
 114  *                   +-----------------------------------------------+
 115  *
 116  *
 117  * <aa>\@!<bb>  BRANCH NOMATCH <aa> --> END  <bb> --> END
 118  *                   |       |                ^       ^
 119  *                   |       +----------------+       |
 120  *                   +--------------------------------+
 121  *
 122  *                                                    +---------+
 123  *                                                    |         V
 124  * \z[abc]      BRANCH BRANCH  a  BRANCH  b  BRANCH  c  BRANCH  NOTHING --> END
 125  *                   |      |          |          |     ^                   ^
 126  *                   |      |          |          +-----+                   |
 127  *                   |      |          +----------------+                   |
 128  *                   |      +---------------------------+                   |
 129  *                   +------------------------------------------------------+
 130  *
 131  * They all start with a BRANCH for "\|" alternatives, even when there is only
 132  * one alternative.
 133  */
 134
 135 /*
 136  * The opcodes are:
 137  */
 138
 139 /* definition   number             opnd?    meaning */
 140 #define END             0       /*      End of program or NOMATCH operand. */
 141 #define BOL             1       /*      Match "" at beginning of line. */
 142 #define EOL             2       /*      Match "" at end of line. */
 143 #define BRANCH          3       /* node Match this alternative, or the
 144                                  *      next... */
 145 #define BACK            4       /*      Match "", "next" ptr points backward. */
 146 #define EXACTLY         5       /* str  Match this string. */
 147 #define NOTHING         6       /*      Match empty string. */
 148 #define STAR            7       /* node Match this (simple) thing 0 or more
 149                                  *      times. */
 150 #define PLUS            8       /* node Match this (simple) thing 1 or more
 151                                  *      times. */
 152 #define MATCH           9       /* node match the operand zero-width */
 153 #define NOMATCH         10      /* node check for no match with operand */
 154 #define BEHIND          11      /* node look behind for a match with operand */
 155 #define NOBEHIND        12      /* node look behind for no match with operand */
 156 #define SUBPAT          13      /* node match the operand here */
 157 #define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and
 158                                  *      n times (\{m,n\}). */
 159 #define BOW             15      /*      Match "" after [^a-zA-Z0-9_] */
 160 #define EOW             16      /*      Match "" at    [^a-zA-Z0-9_] */
 161 #define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE
 162                                  *      and BRACE_COMPLEX. */
 163 #define NEWL            18      /*      Match line-break */
 164 #define BHPOS           19      /*      End position for BEHIND or NOBEHIND */
 165
 166
 167 /* character classes: 20-48 normal, 50-78 include a line-break */
 168 #define ADD_NL          30
 169 #define FIRST_NL        ANY + ADD_NL
 170 #define ANY             20      /*      Match any one character. */
 171 #define ANYOF           21      /* str  Match any character in this string. */
 172 #define ANYBUT          22      /* str  Match any character not in this
 173                                  *      string. */
 174 #define IDENT           23      /*      Match identifier char */
 175 #define SIDENT          24      /*      Match identifier char but no digit */
 176 #define KWORD           25      /*      Match keyword char */
 177 #define SKWORD          26      /*      Match word char but no digit */
 178 #define FNAME           27      /*      Match file name char */
 179 #define SFNAME          28      /*      Match file name char but no digit */
 180 #define PRINT           29      /*      Match printable char */
 181 #define SPRINT          30      /*      Match printable char but no digit */
 182 #define WHITE           31      /*      Match whitespace char */
 183 #define NWHITE          32      /*      Match non-whitespace char */
 184 #define DIGIT           33      /*      Match digit char */
 185 #define NDIGIT          34      /*      Match non-digit char */
 186 #define HEX             35      /*      Match hex char */
 187 #define NHEX            36      /*      Match non-hex char */
 188 #define OCTAL           37      /*      Match octal char */
 189 #define NOCTAL          38      /*      Match non-octal char */
 190 #define WORD            39      /*      Match word char */
 191 #define NWORD           40      /*      Match non-word char */
 192 #define HEAD            41      /*      Match head char */
 193 #define NHEAD           42      /*      Match non-head char */
 194 #define ALPHA           43      /*      Match alpha char */
 195 #define NALPHA          44      /*      Match non-alpha char */
 196 #define LOWER           45      /*      Match lowercase char */
 197 #define NLOWER          46      /*      Match non-lowercase char */
 198 #define UPPER           47      /*      Match uppercase char */
 199 #define NUPPER          48      /*      Match non-uppercase char */
 200 #define LAST_NL         NUPPER + ADD_NL
 201 #define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL)
 202
 203 #define MOPEN           80  /* -89       Mark this point in input as start of
 204                                  *       \( subexpr.  MOPEN + 0 marks start of
 205                                  *       match. */
 206 #define MCLOSE          90  /* -99       Analogous to MOPEN.  MCLOSE + 0 marks
 207                                  *       end of match. */
 208 #define BACKREF         100 /* -109 node Match same string again \1-\9 */
 209
 210 #ifdef FEAT_SYN_HL
 211 # define ZOPEN          110 /* -119      Mark this point in input as start of
 212                                  *       \z( subexpr. */
 213 # define ZCLOSE         120 /* -129      Analogous to ZOPEN. */
 214 # define ZREF           130 /* -139 node Match external submatch \z1-\z9 */
 215 #endif
 216
 217 #define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */
 218
 219 #define NOPEN           150     /*      Mark this point in input as start of
 220                                         \%( subexpr. */
 221 #define NCLOSE          151     /*      Analogous to NOPEN. */
 222
 223 #define MULTIBYTECODE   200     /* mbc  Match one multi-byte character */
 224 #define RE_BOF          201     /*      Match "" at beginning of file. */
 225 #define RE_EOF          202     /*      Match "" at end of file. */
 226 #define CURSOR          203     /*      Match location of cursor. */
 227
 228 #define RE_LNUM         204     /* nr cmp  Match line number */
 229 #define RE_COL          205     /* nr cmp  Match column number */
 230 #define RE_VCOL         206     /* nr cmp  Match virtual column number */
 231
 232 #define RE_MARK         207     /* mark cmp  Match mark position */
 233 #define RE_VISUAL       208     /*      Match Visual area */
 234
 235 /*
 236  * Magic characters have a special meaning, they don't match literally.
 237  * Magic characters are negative.  This separates them from literal characters
 238  * (possibly multi-byte).  Only ASCII characters can be Magic.
 239  */
 240 #define Magic(x)        ((int)(x) - 256)
 241 #define un_Magic(x)     ((x) + 256)
 242 #define is_Magic(x)     ((x) < 0)
 243
 244 static int no_Magic __ARGS((int x));
 245 static int toggle_Magic __ARGS((int x));
 246
 247     static int
 248 no_Magic(x)
 249     int         x;
 250 {
 251     if (is_Magic(x))
 252         return un_Magic(x);
 253     return x;
 254 }
 255
 256     static int
 257 toggle_Magic(x)
 258     int         x;
 259 {
 260     if (is_Magic(x))
 261         return un_Magic(x);
 262     return Magic(x);
 263 }
 264
 265 /*
 266  * The first byte of the regexp internal "program" is actually this magic
 267  * number; the start node begins in the second byte.  It's used to catch the
 268  * most severe mutilation of the program by the caller.
 269  */
 270
 271 #define REGMAGIC        0234
 272
 273 /*
 274  * Opcode notes:
 275  *
 276  * BRANCH       The set of branches constituting a single choice are hooked
 277  *              together with their "next" pointers, since precedence prevents
 278  *              anything being concatenated to any individual branch.  The
 279  *              "next" pointer of the last BRANCH in a choice points to the
 280  *              thing following the whole choice.  This is also where the
 281  *              final "next" pointer of each individual branch points; each
 282  *              branch starts with the operand node of a BRANCH node.
 283  *
 284  * BACK         Normal "next" pointers all implicitly point forward; BACK
 285  *              exists to make loop structures possible.
 286  *
 287  * STAR,PLUS    '=', and complex '*' and '+', are implemented as circular
 288  *              BRANCH structures using BACK.  Simple cases (one character
 289  *              per match) are implemented with STAR and PLUS for speed
 290  *              and to minimize recursive plunges.
 291  *
 292  * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
 293  *              node, and defines the min and max limits to be used for that
 294  *              node.
 295  *
 296  * MOPEN,MCLOSE ...are numbered at compile time.
 297  * ZOPEN,ZCLOSE ...ditto
 298  */
 299
 300 /*
 301  * A node is one char of opcode followed by two chars of "next" pointer.
 302  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
 303  * value is a positive offset from the opcode of the node containing it.
 304  * An operand, if any, simply follows the node.  (Note that much of the
 305  * code generation knows about this implicit relationship.)
 306  *
 307  * Using two bytes for the "next" pointer is vast overkill for most things,
 308  * but allows patterns to get big without disasters.
 309  */
 310 #define OP(p)           ((int)*(p))
 311 #define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
 312 #define OPERAND(p)      ((p) + 3)
 313 /* Obtain an operand that was stored as four bytes, MSB first. */
 314 #define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 315                         + ((long)(p)[5] << 8) + (long)(p)[6])
 316 /* Obtain a second operand stored as four bytes. */
 317 #define OPERAND_MAX(p)  OPERAND_MIN((p) + 4)
 318 /* Obtain a second single-byte operand stored after a four bytes operand. */
 319 #define OPERAND_CMP(p)  (p)[7]
 320
 321 /*
 322  * Utility definitions.
 323  */
 324 #define UCHARAT(p)      ((int)*(char_u *)(p))
 325
 326 /* Used for an error (down from) vim_regcomp(): give the error message, set
 327  * rc_did_emsg and return NULL */
 328 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
 329 #define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
 330 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
 331 #define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
 332
 333 #define MAX_LIMIT       (32767L << 16L)
 334
 335 static int re_multi_type __ARGS((int));
 336 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
 337 static char_u *cstrchr __ARGS((char_u *, int));
 338
 339 #ifdef DEBUG
 340 static void     regdump __ARGS((char_u *, regprog_T *));
 341 static char_u   *regprop __ARGS((char_u *));
 342 #endif
 343
 344 #define NOT_MULTI       0
 345 #define MULTI_ONE       1
 346 #define MULTI_MULT      2
 347 /*
 348  * Return NOT_MULTI if c is not a "multi" operator.
 349  * Return MULTI_ONE if c is a single "multi" operator.
 350  * Return MULTI_MULT if c is a multi "multi" operator.
 351  */
 352     static int
 353 re_multi_type(c)
 354     int c;
 355 {
 356     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
 357         return MULTI_ONE;
 358     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
 359         return MULTI_MULT;
 360     return NOT_MULTI;
 361 }
 362
 363 /*
 364  * Flags to be passed up and down.
 365  */
 366 #define HASWIDTH        0x1     /* Known never to match null string. */
 367 #define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */
 368 #define SPSTART         0x4     /* Starts with * or +. */
 369 #define HASNL           0x8     /* Contains some \n. */
 370 #define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */
 371 #define WORST           0       /* Worst case. */
 372
 373 /*
 374  * When regcode is set to this value, code is not emitted and size is computed
 375  * instead.
 376  */
 377 #define JUST_CALC_SIZE  ((char_u *) -1)
 378
 379 static char_u           *reg_prev_sub = NULL;
 380
 381 /*
 382  * REGEXP_INRANGE contains all characters which are always special in a []
 383  * range after '\'.
 384  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
 385  * These are:
 386  *  \n  - New line (NL).
 387  *  \r  - Carriage Return (CR).
 388  *  \t  - Tab (TAB).
 389  *  \e  - Escape (ESC).
 390  *  \b  - Backspace (Ctrl_H).
 391  *  \d  - Character code in decimal, eg \d123
 392  *  \o  - Character code in octal, eg \o80
 393  *  \x  - Character code in hex, eg \x4a
 394  *  \u  - Multibyte character code, eg \u20ac
 395  *  \U  - Long multibyte character code, eg \U12345678
 396  */
 397 static char_u REGEXP_INRANGE[] = "]^-n\\";
 398 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
 399
 400 static int      backslash_trans __ARGS((int c));
 401 static int      get_char_class __ARGS((char_u **pp));
 402 static int      get_equi_class __ARGS((char_u **pp));
 403 static void     reg_equi_class __ARGS((int c));
 404 static int      get_coll_element __ARGS((char_u **pp));
 405 static char_u   *skip_anyof __ARGS((char_u *p));
 406 static void     init_class_tab __ARGS((void));
 407
 408 /*
 409  * Translate '\x' to its control character, except "\n", which is Magic.
 410  */
 411     static int
 412 backslash_trans(c)
 413     int         c;
 414 {
 415     switch (c)
 416     {
 417         case 'r':   return CAR;
 418         case 't':   return TAB;
 419         case 'e':   return ESC;
 420         case 'b':   return BS;
 421     }
 422     return c;
 423 }
 424
 425 /*
 426  * Check for a character class name "[:name:]".  "pp" points to the '['.
 427  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
 428  * recognized.  Otherwise "pp" is advanced to after the item.
 429  */
 430     static int
 431 get_char_class(pp)
 432     char_u      **pp;
 433 {
 434     static const char *(class_names[]) =
 435     {
 436         "alnum:]",
 437 #define CLASS_ALNUM 0
 438         "alpha:]",
 439 #define CLASS_ALPHA 1
 440         "blank:]",
 441 #define CLASS_BLANK 2
 442         "cntrl:]",
 443 #define CLASS_CNTRL 3
 444         "digit:]",
 445 #define CLASS_DIGIT 4
 446         "graph:]",
 447 #define CLASS_GRAPH 5
 448         "lower:]",
 449 #define CLASS_LOWER 6
 450         "print:]",
 451 #define CLASS_PRINT 7
 452         "punct:]",
 453 #define CLASS_PUNCT 8
 454         "space:]",
 455 #define CLASS_SPACE 9
 456         "upper:]",
 457 #define CLASS_UPPER 10
 458         "xdigit:]",
 459 #define CLASS_XDIGIT 11
 460         "tab:]",
 461 #define CLASS_TAB 12
 462         "return:]",
 463 #define CLASS_RETURN 13
 464         "backspace:]",
 465 #define CLASS_BACKSPACE 14
 466         "escape:]",
 467 #define CLASS_ESCAPE 15
 468     };
 469 #define CLASS_NONE 99
 470     int i;
 471
 472     if ((*pp)[1] == ':')
 473     {
 474         for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
 475             if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
 476             {
 477                 *pp += STRLEN(class_names[i]) + 2;
 478                 return i;
 479             }
 480     }
 481     return CLASS_NONE;
 482 }
 483
 484 /*
 485  * Specific version of character class functions.
 486  * Using a table to keep this fast.
 487  */
 488 static short    class_tab[256];
 489
 490 #define     RI_DIGIT    0x01
 491 #define     RI_HEX      0x02
 492 #define     RI_OCTAL    0x04
 493 #define     RI_WORD     0x08
 494 #define     RI_HEAD     0x10
 495 #define     RI_ALPHA    0x20
 496 #define     RI_LOWER    0x40
 497 #define     RI_UPPER    0x80
 498 #define     RI_WHITE    0x100
 499
 500     static void
 501 init_class_tab()
 502 {
 503     int         i;
 504     static int  done = FALSE;
 505
 506     if (done)
 507         return;
 508
 509     for (i = 0; i < 256; ++i)
 510     {
 511         if (i >= '0' && i <= '7')
 512             class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
 513         else if (i >= '8' && i <= '9')
 514             class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
 515         else if (i >= 'a' && i <= 'f')
 516             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 517 #ifdef EBCDIC
 518         else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
 519                                                     || (i >= 's' && i <= 'z'))
 520 #else
 521         else if (i >= 'g' && i <= 'z')
 522 #endif
 523             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
 524         else if (i >= 'A' && i <= 'F')
 525             class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 526 #ifdef EBCDIC
 527         else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
 528                                                     || (i >= 'S' && i <= 'Z'))
 529 #else
 530         else if (i >= 'G' && i <= 'Z')
 531 #endif
 532             class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
 533         else if (i == '_')
 534             class_tab[i] = RI_WORD + RI_HEAD;
 535         else
 536             class_tab[i] = 0;
 537     }
 538     class_tab[' '] |= RI_WHITE;
 539     class_tab['\t'] |= RI_WHITE;
 540     done = TRUE;
 541 }
 542
 543 #ifdef FEAT_MBYTE
 544 # define ri_digit(c)    (c < 0x100 && (class_tab[c] & RI_DIGIT))
 545 # define ri_hex(c)      (c < 0x100 && (class_tab[c] & RI_HEX))
 546 # define ri_octal(c)    (c < 0x100 && (class_tab[c] & RI_OCTAL))
 547 # define ri_word(c)     (c < 0x100 && (class_tab[c] & RI_WORD))
 548 # define ri_head(c)     (c < 0x100 && (class_tab[c] & RI_HEAD))
 549 # define ri_alpha(c)    (c < 0x100 && (class_tab[c] & RI_ALPHA))
 550 # define ri_lower(c)    (c < 0x100 && (class_tab[c] & RI_LOWER))
 551 # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))
 552 # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE))
 553 #else
 554 # define ri_digit(c)    (class_tab[c] & RI_DIGIT)
 555 # define ri_hex(c)      (class_tab[c] & RI_HEX)
 556 # define ri_octal(c)    (class_tab[c] & RI_OCTAL)
 557 # define ri_word(c)     (class_tab[c] & RI_WORD)
 558 # define ri_head(c)     (class_tab[c] & RI_HEAD)
 559 # define ri_alpha(c)    (class_tab[c] & RI_ALPHA)
 560 # define ri_lower(c)    (class_tab[c] & RI_LOWER)
 561 # define ri_upper(c)    (class_tab[c] & RI_UPPER)
 562 # define ri_white(c)    (class_tab[c] & RI_WHITE)
 563 #endif
 564
 565 /* flags for regflags */
 566 #define RF_ICASE    1   /* ignore case */
 567 #define RF_NOICASE  2   /* don't ignore case */
 568 #define RF_HASNL    4   /* can match a NL */
 569 #define RF_ICOMBINE 8   /* ignore combining characters */
 570 #define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */
 571
 572 /*
 573  * Global work variables for vim_regcomp().
 574  */
 575
 576 static char_u   *regparse;      /* Input-scan pointer. */
 577 static int      prevchr_len;    /* byte length of previous char */
 578 static int      num_complex_braces; /* Complex \{...} count */
 579 static int      regnpar;        /* () count. */
 580 #ifdef FEAT_SYN_HL
 581 static int      regnzpar;       /* \z() count. */
 582 static int      re_has_z;       /* \z item detected */
 583 #endif
 584 static char_u   *regcode;       /* Code-emit pointer, or JUST_CALC_SIZE */
 585 static long     regsize;        /* Code size. */
 586 static char_u   had_endbrace[NSUBEXP];  /* flags, TRUE if end of () found */
 587 static unsigned regflags;       /* RF_ flags for prog */
 588 static long     brace_min[10];  /* Minimums for complex brace repeats */
 589 static long     brace_max[10];  /* Maximums for complex brace repeats */
 590 static int      brace_count[10]; /* Current counts for complex brace repeats */
 591 #if defined(FEAT_SYN_HL) || defined(PROTO)
 592 static int      had_eol;        /* TRUE when EOL found by vim_regcomp() */
 593 #endif
 594 static int      one_exactly = FALSE;    /* only do one char for EXACTLY */
 595
 596 static int      reg_magic;      /* magicness of the pattern: */
 597 #define MAGIC_NONE      1       /* "\V" very unmagic */
 598 #define MAGIC_OFF       2       /* "\M" or 'magic' off */
 599 #define MAGIC_ON        3       /* "\m" or 'magic' */
 600 #define MAGIC_ALL       4       /* "\v" very magic */
 601
 602 static int      reg_string;     /* matching with a string instead of a buffer
 603                                    line */
 604 static int      reg_strict;     /* "[abc" is illegal */
 605
 606 /*
 607  * META contains all characters that may be magic, except '^' and '$'.
 608  */
 609
 610 #ifdef EBCDIC
 611 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
 612 #else
 613 /* META[] is used often enough to justify turning it into a table. */
 614 static char_u META_flags[] = {
 615     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 616     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 617 /*                 %  &     (  )  *  +        .    */
 618     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
 619 /*     1  2  3  4  5  6  7  8  9        <  =  >  ? */
 620     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
 621 /*  @  A     C  D     F     H  I     K  L  M     O */
 622     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
 623 /*  P        S     U  V  W  X     Z  [           _ */
 624     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
 625 /*     a     c  d     f     h  i     k  l  m  n  o */
 626     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
 627 /*  p        s     u  v  w  x     z  {  |     ~    */
 628     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
 629 };
 630 #endif
 631
 632 static int      curchr;
 633
 634 /* arguments for reg() */
 635 #define REG_NOPAREN     0       /* toplevel reg() */
 636 #define REG_PAREN       1       /* \(\) */
 637 #define REG_ZPAREN      2       /* \z(\) */
 638 #define REG_NPAREN      3       /* \%(\) */
 639
 640 /*
 641  * Forward declarations for vim_regcomp()'s friends.
 642  */
 643 static void     initchr __ARGS((char_u *));
 644 static int      getchr __ARGS((void));
 645 static void     skipchr_keepstart __ARGS((void));
 646 static int      peekchr __ARGS((void));
 647 static void     skipchr __ARGS((void));
 648 static void     ungetchr __ARGS((void));
 649 static int      gethexchrs __ARGS((int maxinputlen));
 650 static int      getoctchrs __ARGS((void));
 651 static int      getdecchrs __ARGS((void));
 652 static int      coll_get_char __ARGS((void));
 653 static void     regcomp_start __ARGS((char_u *expr, int flags));
 654 static char_u   *reg __ARGS((int, int *));
 655 static char_u   *regbranch __ARGS((int *flagp));
 656 static char_u   *regconcat __ARGS((int *flagp));
 657 static char_u   *regpiece __ARGS((int *));
 658 static char_u   *regatom __ARGS((int *));
 659 static char_u   *regnode __ARGS((int));
 660 #ifdef FEAT_MBYTE
 661 static int      use_multibytecode __ARGS((int c));
 662 #endif
 663 static int      prog_magic_wrong __ARGS((void));
 664 static char_u   *regnext __ARGS((char_u *));
 665 static void     regc __ARGS((int b));
 666 #ifdef FEAT_MBYTE
 667 static void     regmbc __ARGS((int c));
 668 #else
 669 # define regmbc(c) regc(c)
 670 #endif
 671 static void     reginsert __ARGS((int, char_u *));
 672 static void     reginsert_limits __ARGS((int, long, long, char_u *));
 673 static char_u   *re_put_long __ARGS((char_u *pr, long_u val));
 674 static int      read_limits __ARGS((long *, long *));
 675 static void     regtail __ARGS((char_u *, char_u *));
 676 static void     regoptail __ARGS((char_u *, char_u *));
 677
 678 /*
 679  * Return TRUE if compiled regular expression "prog" can match a line break.
 680  */
 681     int
 682 re_multiline(prog)
 683     regprog_T *prog;
 684 {
 685     return (prog->regflags & RF_HASNL);
 686 }
 687
 688 /*
 689  * Return TRUE if compiled regular expression "prog" looks before the start
 690  * position (pattern contains "\@<=" or "\@<!").
 691  */
 692     int
 693 re_lookbehind(prog)
 694     regprog_T *prog;
 695 {
 696     return (prog->regflags & RF_LOOKBH);
 697 }
 698
 699 /*
 700  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
 701  * Returns a character representing the class. Zero means that no item was
 702  * recognized.  Otherwise "pp" is advanced to after the item.
 703  */
 704     static int
 705 get_equi_class(pp)
 706     char_u      **pp;
 707 {
 708     int         c;
 709     int         l = 1;
 710     char_u      *p = *pp;
 711
 712     if (p[1] == '=')
 713     {
 714 #ifdef FEAT_MBYTE
 715         if (has_mbyte)
 716             l = (*mb_ptr2len)(p + 2);
 717 #endif
 718         if (p[l + 2] == '=' && p[l + 3] == ']')
 719         {
 720 #ifdef FEAT_MBYTE
 721             if (has_mbyte)
 722                 c = mb_ptr2char(p + 2);
 723             else
 724 #endif
 725                 c = p[2];
 726             *pp += l + 4;
 727             return c;
 728         }
 729     }
 730     return 0;
 731 }
 732
 733 /*
 734  * Produce the bytes for equivalence class "c".
 735  * Currently only handles latin1, latin9 and utf-8.
 736  */
 737     static void
 738 reg_equi_class(c)
 739     int     c;
 740 {
 741 #ifdef FEAT_MBYTE
 742     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
 743                                          || STRCMP(p_enc, "iso-8859-15") == 0)
 744 #endif
 745     {
 746         switch (c)
 747         {
 748             case 'A': case '\300': case '\301': case '\302':
 749             case '\303': case '\304': case '\305':
 750                       regmbc('A'); regmbc('\300'); regmbc('\301');
 751                       regmbc('\302'); regmbc('\303'); regmbc('\304');
 752                       regmbc('\305');
 753                       return;
 754             case 'C': case '\307':
 755                       regmbc('C'); regmbc('\307');
 756                       return;
 757             case 'E': case '\310': case '\311': case '\312': case '\313':
 758                       regmbc('E'); regmbc('\310'); regmbc('\311');
 759                       regmbc('\312'); regmbc('\313');
 760                       return;
 761             case 'I': case '\314': case '\315': case '\316': case '\317':
 762                       regmbc('I'); regmbc('\314'); regmbc('\315');
 763                       regmbc('\316'); regmbc('\317');
 764                       return;
 765             case 'N': case '\321':
 766                       regmbc('N'); regmbc('\321');
 767                       return;
 768             case 'O': case '\322': case '\323': case '\324': case '\325':
 769             case '\326':
 770                       regmbc('O'); regmbc('\322'); regmbc('\323');
 771                       regmbc('\324'); regmbc('\325'); regmbc('\326');
 772                       return;
 773             case 'U': case '\331': case '\332': case '\333': case '\334':
 774                       regmbc('U'); regmbc('\331'); regmbc('\332');
 775                       regmbc('\333'); regmbc('\334');
 776                       return;
 777             case 'Y': case '\335':
 778                       regmbc('Y'); regmbc('\335');
 779                       return;
 780             case 'a': case '\340': case '\341': case '\342':
 781             case '\343': case '\344': case '\345':
 782                       regmbc('a'); regmbc('\340'); regmbc('\341');
 783                       regmbc('\342'); regmbc('\343'); regmbc('\344');
 784                       regmbc('\345');
 785                       return;
 786             case 'c': case '\347':
 787                       regmbc('c'); regmbc('\347');
 788                       return;
 789             case 'e': case '\350': case '\351': case '\352': case '\353':
 790                       regmbc('e'); regmbc('\350'); regmbc('\351');
 791                       regmbc('\352'); regmbc('\353');
 792                       return;
 793             case 'i': case '\354': case '\355': case '\356': case '\357':
 794                       regmbc('i'); regmbc('\354'); regmbc('\355');
 795                       regmbc('\356'); regmbc('\357');
 796                       return;
 797             case 'n': case '\361':
 798                       regmbc('n'); regmbc('\361');
 799                       return;
 800             case 'o': case '\362': case '\363': case '\364': case '\365':
 801             case '\366':
 802                       regmbc('o'); regmbc('\362'); regmbc('\363');
 803                       regmbc('\364'); regmbc('\365'); regmbc('\366');
 804                       return;
 805             case 'u': case '\371': case '\372': case '\373': case '\374':
 806                       regmbc('u'); regmbc('\371'); regmbc('\372');
 807                       regmbc('\373'); regmbc('\374');
 808                       return;
 809             case 'y': case '\375': case '\377':
 810                       regmbc('y'); regmbc('\375'); regmbc('\377');
 811                       return;
 812         }
 813     }
 814     regmbc(c);
 815 }
 816
 817 /*
 818  * Check for a collating element "[.a.]".  "pp" points to the '['.
 819  * Returns a character. Zero means that no item was recognized.  Otherwise
 820  * "pp" is advanced to after the item.
 821  * Currently only single characters are recognized!
 822  */
 823     static int
 824 get_coll_element(pp)
 825     char_u      **pp;
 826 {
 827     int         c;
 828     int         l = 1;
 829     char_u      *p = *pp;
 830
 831     if (p[1] == '.')
 832     {
 833 #ifdef FEAT_MBYTE
 834         if (has_mbyte)
 835             l = (*mb_ptr2len)(p + 2);
 836 #endif
 837         if (p[l + 2] == '.' && p[l + 3] == ']')
 838         {
 839 #ifdef FEAT_MBYTE
 840             if (has_mbyte)
 841                 c = mb_ptr2char(p + 2);
 842             else
 843 #endif
 844                 c = p[2];
 845             *pp += l + 4;
 846             return c;
 847         }
 848     }
 849     return 0;
 850 }
 851
 852
 853 /*
 854  * Skip over a "[]" range.
 855  * "p" must point to the character after the '['.
 856  * The returned pointer is on the matching ']', or the terminating NUL.
 857  */
 858     static char_u *
 859 skip_anyof(p)
 860     char_u      *p;
 861 {
 862     int         cpo_lit;        /* 'cpoptions' contains 'l' flag */
 863     int         cpo_bsl;        /* 'cpoptions' contains '\' flag */
 864 #ifdef FEAT_MBYTE
 865     int         l;
 866 #endif
 867
 868     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
 869     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
 870
 871     if (*p == '^')      /* Complement of range. */
 872         ++p;
 873     if (*p == ']' || *p == '-')
 874         ++p;
 875     while (*p != NUL && *p != ']')
 876     {
 877 #ifdef FEAT_MBYTE
 878         if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
 879             p += l;
 880         else
 881 #endif
 882             if (*p == '-')
 883             {
 884                 ++p;
 885                 if (*p != ']' && *p != NUL)
 886                     mb_ptr_adv(p);
 887             }
 888         else if (*p == '\\'
 889                 && !cpo_bsl
 890                 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
 891                     || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
 892             p += 2;
 893         else if (*p == '[')
 894         {
 895             if (get_char_class(&p) == CLASS_NONE
 896                     && get_equi_class(&p) == 0
 897                     && get_coll_element(&p) == 0)
 898                 ++p; /* It was not a class name */
 899         }
 900         else
 901             ++p;
 902     }
 903
 904     return p;
 905 }
 906
 907 /*
 908  * Skip past regular expression.
 909  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
 910  * Take care of characters with a backslash in front of it.
 911  * Skip strings inside [ and ].
 912  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
 913  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
 914  * is changed in-place.
 915  */
 916     char_u *
 917 skip_regexp(startp, dirc, magic, newp)
 918     char_u      *startp;
 919     int         dirc;
 920     int         magic;
 921     char_u      **newp;
 922 {
 923     int         mymagic;
 924     char_u      *p = startp;
 925
 926     if (magic)
 927         mymagic = MAGIC_ON;
 928     else
 929         mymagic = MAGIC_OFF;
 930
 931     for (; p[0] != NUL; mb_ptr_adv(p))
 932     {
 933         if (p[0] == dirc)       /* found end of regexp */
 934             break;
 935         if ((p[0] == '[' && mymagic >= MAGIC_ON)
 936                 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
 937         {
 938             p = skip_anyof(p + 1);
 939             if (p[0] == NUL)
 940                 break;
 941         }
 942         else if (p[0] == '\\' && p[1] != NUL)
 943         {
 944             if (dirc == '?' && newp != NULL && p[1] == '?')
 945             {
 946                 /* change "\?" to "?", make a copy first. */
 947                 if (*newp == NULL)
 948                 {
 949                     *newp = vim_strsave(startp);
 950                     if (*newp != NULL)
 951                         p = *newp + (p - startp);
 952                 }
 953                 if (*newp != NULL)
 954                     mch_memmove(p, p + 1, STRLEN(p));
 955                 else
 956                     ++p;
 957             }
 958             else
 959                 ++p;    /* skip next character */
 960             if (*p == 'v')
 961                 mymagic = MAGIC_ALL;
 962             else if (*p == 'V')
 963                 mymagic = MAGIC_NONE;
 964         }
 965     }
 966     return p;
 967 }
 968
 969 /*
 970  * vim_regcomp() - compile a regular expression into internal code
 971  * Returns the program in allocated space.  Returns NULL for an error.
 972  *
 973  * We can't allocate space until we know how big the compiled form will be,
 974  * but we can't compile it (and thus know how big it is) until we've got a
 975  * place to put the code.  So we cheat:  we compile it twice, once with code
 976  * generation turned off and size counting turned on, and once "for real".
 977  * This also means that we don't allocate space until we are sure that the
 978  * thing really will compile successfully, and we never have to move the
 979  * code and thus invalidate pointers into it.  (Note that it has to be in
 980  * one piece because vim_free() must be able to free it all.)
 981  *
 982  * Whether upper/lower case is to be ignored is decided when executing the
 983  * program, it does not matter here.
 984  *
 985  * Beware that the optimization-preparation code in here knows about some
 986  * of the structure of the compiled regexp.
 987  * "re_flags": RE_MAGIC and/or RE_STRING.
 988  */
 989     regprog_T *
 990 vim_regcomp(expr, re_flags)
 991     char_u      *expr;
 992     int         re_flags;
 993 {
 994     regprog_T   *r;
 995     char_u      *scan;
 996     char_u      *longest;
 997     int         len;
 998     int         flags;
 999
1000     if (expr == NULL)
1001         EMSG_RET_NULL(_(e_null));
1002
1003     init_class_tab();
1004
1005     /*
1006      * First pass: determine size, legality.
1007      */
1008     regcomp_start(expr, re_flags);
1009     regcode = JUST_CALC_SIZE;
1010     regc(REGMAGIC);
1011     if (reg(REG_NOPAREN, &flags) == NULL)
1012         return NULL;
1013
1014     /* Small enough for pointer-storage convention? */
1015 #ifdef SMALL_MALLOC             /* 16 bit storage allocation */
1016     if (regsize >= 65536L - 256L)
1017         EMSG_RET_NULL(_("E339: Pattern too long"));
1018 #endif
1019
1020     /* Allocate space. */
1021     r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
1022     if (r == NULL)
1023         return NULL;
1024
1025     /*
1026      * Second pass: emit code.
1027      */
1028     regcomp_start(expr, re_flags);
1029     regcode = r->program;
1030     regc(REGMAGIC);
1031     if (reg(REG_NOPAREN, &flags) == NULL)
1032     {
1033         vim_free(r);
1034         return NULL;
1035     }
1036
1037     /* Dig out information for optimizations. */
1038     r->regstart = NUL;          /* Worst-case defaults. */
1039     r->reganch = 0;
1040     r->regmust = NULL;
1041     r->regmlen = 0;
1042     r->regflags = regflags;
1043     if (flags & HASNL)
1044         r->regflags |= RF_HASNL;
1045     if (flags & HASLOOKBH)
1046         r->regflags |= RF_LOOKBH;
1047 #ifdef FEAT_SYN_HL
1048     /* Remember whether this pattern has any \z specials in it. */
1049     r->reghasz = re_has_z;
1050 #endif
1051     scan = r->program + 1;      /* First BRANCH. */
1052     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1053     {
1054         scan = OPERAND(scan);
1055
1056         /* Starting-point info. */
1057         if (OP(scan) == BOL || OP(scan) == RE_BOF)
1058         {
1059             r->reganch++;
1060             scan = regnext(scan);
1061         }
1062
1063         if (OP(scan) == EXACTLY)
1064         {
1065 #ifdef FEAT_MBYTE
1066             if (has_mbyte)
1067                 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1068             else
1069 #endif
1070                 r->regstart = *OPERAND(scan);
1071         }
1072         else if ((OP(scan) == BOW
1073                     || OP(scan) == EOW
1074                     || OP(scan) == NOTHING
1075                     || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1076                     || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1077                  && OP(regnext(scan)) == EXACTLY)
1078         {
1079 #ifdef FEAT_MBYTE
1080             if (has_mbyte)
1081                 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1082             else
1083 #endif
1084                 r->regstart = *OPERAND(regnext(scan));
1085         }
1086
1087         /*
1088          * If there's something expensive in the r.e., find the longest
1089          * literal string that must appear and make it the regmust.  Resolve
1090          * ties in favor of later strings, since the regstart check works
1091          * with the beginning of the r.e. and avoiding duplication
1092          * strengthens checking.  Not a strong reason, but sufficient in the
1093          * absence of others.
1094          */
1095         /*
1096          * When the r.e. starts with BOW, it is faster to look for a regmust
1097          * first. Used a lot for "#" and "*" commands. (Added by mool).
1098          */
1099         if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1100                                                           && !(flags & HASNL))
1101         {
1102             longest = NULL;
1103             len = 0;
1104             for (; scan != NULL; scan = regnext(scan))
1105                 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1106                 {
1107                     longest = OPERAND(scan);
1108                     len = (int)STRLEN(OPERAND(scan));
1109                 }
1110             r->regmust = longest;
1111             r->regmlen = len;
1112         }
1113     }
1114 #ifdef DEBUG
1115     regdump(expr, r);
1116 #endif
1117     return r;
1118 }
1119
1120 /*
1121  * Setup to parse the regexp.  Used once to get the length and once to do it.
1122  */
1123     static void
1124 regcomp_start(expr, re_flags)
1125     char_u      *expr;
1126     int         re_flags;           /* see vim_regcomp() */
1127 {
1128     initchr(expr);
1129     if (re_flags & RE_MAGIC)
1130         reg_magic = MAGIC_ON;
1131     else
1132         reg_magic = MAGIC_OFF;
1133     reg_string = (re_flags & RE_STRING);
1134     reg_strict = (re_flags & RE_STRICT);
1135
1136     num_complex_braces = 0;
1137     regnpar = 1;
1138     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1139 #ifdef FEAT_SYN_HL
1140     regnzpar = 1;
1141     re_has_z = 0;
1142 #endif
1143     regsize = 0L;
1144     regflags = 0;
1145 #if defined(FEAT_SYN_HL) || defined(PROTO)
1146     had_eol = FALSE;
1147 #endif
1148 }
1149
1150 #if defined(FEAT_SYN_HL) || defined(PROTO)
1151 /*
1152  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1153  * found.  This is messy, but it works fine.
1154  */
1155     int
1156 vim_regcomp_had_eol()
1157 {
1158     return had_eol;
1159 }
1160 #endif
1161
1162 /*
1163  * reg - regular expression, i.e. main body or parenthesized thing
1164  *
1165  * Caller must absorb opening parenthesis.
1166  *
1167  * Combining parenthesis handling with the base level of regular expression
1168  * is a trifle forced, but the need to tie the tails of the branches to what
1169  * follows makes it hard to avoid.
1170  */
1171     static char_u *
1172 reg(paren, flagp)
1173     int         paren;  /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1174     int         *flagp;
1175 {
1176     char_u      *ret;
1177     char_u      *br;
1178     char_u      *ender;
1179     int         parno = 0;
1180     int         flags;
1181
1182     *flagp = HASWIDTH;          /* Tentatively. */
1183
1184 #ifdef FEAT_SYN_HL
1185     if (paren == REG_ZPAREN)
1186     {
1187         /* Make a ZOPEN node. */
1188         if (regnzpar >= NSUBEXP)
1189             EMSG_RET_NULL(_("E50: Too many \\z("));
1190         parno = regnzpar;
1191         regnzpar++;
1192         ret = regnode(ZOPEN + parno);
1193     }
1194     else
1195 #endif
1196         if (paren == REG_PAREN)
1197     {
1198         /* Make a MOPEN node. */
1199         if (regnpar >= NSUBEXP)
1200             EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1201         parno = regnpar;
1202         ++regnpar;
1203         ret = regnode(MOPEN + parno);
1204     }
1205     else if (paren == REG_NPAREN)
1206     {
1207         /* Make a NOPEN node. */
1208         ret = regnode(NOPEN);
1209     }
1210     else
1211         ret = NULL;
1212
1213     /* Pick up the branches, linking them together. */
1214     br = regbranch(&flags);
1215     if (br == NULL)
1216         return NULL;
1217     if (ret != NULL)
1218         regtail(ret, br);       /* [MZ]OPEN -> first. */
1219     else
1220         ret = br;
1221     /* If one of the branches can be zero-width, the whole thing can.
1222      * If one of the branches has * at start or matches a line-break, the
1223      * whole thing can. */
1224     if (!(flags & HASWIDTH))
1225         *flagp &= ~HASWIDTH;
1226     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1227     while (peekchr() == Magic('|'))
1228     {
1229         skipchr();
1230         br = regbranch(&flags);
1231         if (br == NULL)
1232             return NULL;
1233         regtail(ret, br);       /* BRANCH -> BRANCH. */
1234         if (!(flags & HASWIDTH))
1235             *flagp &= ~HASWIDTH;
1236         *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1237     }
1238
1239     /* Make a closing node, and hook it on the end. */
1240     ender = regnode(
1241 #ifdef FEAT_SYN_HL
1242             paren == REG_ZPAREN ? ZCLOSE + parno :
1243 #endif
1244             paren == REG_PAREN ? MCLOSE + parno :
1245             paren == REG_NPAREN ? NCLOSE : END);
1246     regtail(ret, ender);
1247
1248     /* Hook the tails of the branches to the closing node. */
1249     for (br = ret; br != NULL; br = regnext(br))
1250         regoptail(br, ender);
1251
1252     /* Check for proper termination. */
1253     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1254     {
1255 #ifdef FEAT_SYN_HL
1256         if (paren == REG_ZPAREN)
1257             EMSG_RET_NULL(_("E52: Unmatched \\z("));
1258         else
1259 #endif
1260             if (paren == REG_NPAREN)
1261             EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
1262         else
1263             EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
1264     }
1265     else if (paren == REG_NOPAREN && peekchr() != NUL)
1266     {
1267         if (curchr == Magic(')'))
1268             EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
1269         else
1270             EMSG_RET_NULL(_(e_trailing));       /* "Can't happen". */
1271         /* NOTREACHED */
1272     }
1273     /*
1274      * Here we set the flag allowing back references to this set of
1275      * parentheses.
1276      */
1277     if (paren == REG_PAREN)
1278         had_endbrace[parno] = TRUE;     /* have seen the close paren */
1279     return ret;
1280 }
1281
1282 /*
1283  * Handle one alternative of an | operator.
1284  * Implements the & operator.
1285  */
1286     static char_u *
1287 regbranch(flagp)
1288     int         *flagp;
1289 {
1290     char_u      *ret;
1291     char_u      *chain = NULL;
1292     char_u      *latest;
1293     int         flags;
1294
1295     *flagp = WORST | HASNL;             /* Tentatively. */
1296
1297     ret = regnode(BRANCH);
1298     for (;;)
1299     {
1300         latest = regconcat(&flags);
1301         if (latest == NULL)
1302             return NULL;
1303         /* If one of the branches has width, the whole thing has.  If one of
1304          * the branches anchors at start-of-line, the whole thing does.
1305          * If one of the branches uses look-behind, the whole thing does. */
1306         *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1307         /* If one of the branches doesn't match a line-break, the whole thing
1308          * doesn't. */
1309         *flagp &= ~HASNL | (flags & HASNL);
1310         if (chain != NULL)
1311             regtail(chain, latest);
1312         if (peekchr() != Magic('&'))
1313             break;
1314         skipchr();
1315         regtail(latest, regnode(END)); /* operand ends */
1316         reginsert(MATCH, latest);
1317         chain = latest;
1318     }
1319
1320     return ret;
1321 }
1322
1323 /*
1324  * Handle one alternative of an | or & operator.
1325  * Implements the concatenation operator.
1326  */
1327     static char_u *
1328 regconcat(flagp)
1329     int         *flagp;
1330 {
1331     char_u      *first = NULL;
1332     char_u      *chain = NULL;
1333     char_u      *latest;
1334     int         flags;
1335     int         cont = TRUE;
1336
1337     *flagp = WORST;             /* Tentatively. */
1338
1339     while (cont)
1340     {
1341         switch (peekchr())
1342         {
1343             case NUL:
1344             case Magic('|'):
1345             case Magic('&'):
1346             case Magic(')'):
1347                             cont = FALSE;
1348                             break;
1349             case Magic('Z'):
1350 #ifdef FEAT_MBYTE
1351                             regflags |= RF_ICOMBINE;
1352 #endif
1353                             skipchr_keepstart();
1354                             break;
1355             case Magic('c'):
1356                             regflags |= RF_ICASE;
1357                             skipchr_keepstart();
1358                             break;
1359             case Magic('C'):
1360                             regflags |= RF_NOICASE;
1361                             skipchr_keepstart();
1362                             break;
1363             case Magic('v'):
1364                             reg_magic = MAGIC_ALL;
1365                             skipchr_keepstart();
1366                             curchr = -1;
1367                             break;
1368             case Magic('m'):
1369                             reg_magic = MAGIC_ON;
1370                             skipchr_keepstart();
1371                             curchr = -1;
1372                             break;
1373             case Magic('M'):
1374                             reg_magic = MAGIC_OFF;
1375                             skipchr_keepstart();
1376                             curchr = -1;
1377                             break;
1378             case Magic('V'):
1379                             reg_magic = MAGIC_NONE;
1380                             skipchr_keepstart();
1381                             curchr = -1;
1382                             break;
1383             default:
1384                             latest = regpiece(&flags);
1385                             if (latest == NULL)
1386                                 return NULL;
1387                             *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1388                             if (chain == NULL)  /* First piece. */
1389                                 *flagp |= flags & SPSTART;
1390                             else
1391                                 regtail(chain, latest);
1392                             chain = latest;
1393                             if (first == NULL)
1394                                 first = latest;
1395                             break;
1396         }
1397     }
1398     if (first == NULL)          /* Loop ran zero times. */
1399         first = regnode(NOTHING);
1400     return first;
1401 }
1402
1403 /*
1404  * regpiece - something followed by possible [*+=]
1405  *
1406  * Note that the branching code sequences used for = and the general cases
1407  * of * and + are somewhat optimized:  they use the same NOTHING node as
1408  * both the endmarker for their branch list and the body of the last branch.
1409  * It might seem that this node could be dispensed with entirely, but the
1410  * endmarker role is not redundant.
1411  */
1412     static char_u *
1413 regpiece(flagp)
1414     int             *flagp;
1415 {
1416     char_u          *ret;
1417     int             op;
1418     char_u          *next;
1419     int             flags;
1420     long            minval;
1421     long            maxval;
1422
1423     ret = regatom(&flags);
1424     if (ret == NULL)
1425         return NULL;
1426
1427     op = peekchr();
1428     if (re_multi_type(op) == NOT_MULTI)
1429     {
1430         *flagp = flags;
1431         return ret;
1432     }
1433     /* default flags */
1434     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1435
1436     skipchr();
1437     switch (op)
1438     {
1439         case Magic('*'):
1440             if (flags & SIMPLE)
1441                 reginsert(STAR, ret);
1442             else
1443             {
1444                 /* Emit x* as (x&|), where & means "self". */
1445                 reginsert(BRANCH, ret); /* Either x */
1446                 regoptail(ret, regnode(BACK));  /* and loop */
1447                 regoptail(ret, ret);    /* back */
1448                 regtail(ret, regnode(BRANCH));  /* or */
1449                 regtail(ret, regnode(NOTHING)); /* null. */
1450             }
1451             break;
1452
1453         case Magic('+'):
1454             if (flags & SIMPLE)
1455                 reginsert(PLUS, ret);
1456             else
1457             {
1458                 /* Emit x+ as x(&|), where & means "self". */
1459                 next = regnode(BRANCH); /* Either */
1460                 regtail(ret, next);
1461                 regtail(regnode(BACK), ret);    /* loop back */
1462                 regtail(next, regnode(BRANCH)); /* or */
1463                 regtail(ret, regnode(NOTHING)); /* null. */
1464             }
1465             *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1466             break;
1467
1468         case Magic('@'):
1469             {
1470                 int     lop = END;
1471
1472                 switch (no_Magic(getchr()))
1473                 {
1474                     case '=': lop = MATCH; break;                 /* \@= */
1475                     case '!': lop = NOMATCH; break;               /* \@! */
1476                     case '>': lop = SUBPAT; break;                /* \@> */
1477                     case '<': switch (no_Magic(getchr()))
1478                               {
1479                                   case '=': lop = BEHIND; break;   /* \@<= */
1480                                   case '!': lop = NOBEHIND; break; /* \@<! */
1481                               }
1482                 }
1483                 if (lop == END)
1484                     EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
1485                                                       reg_magic == MAGIC_ALL);
1486                 /* Look behind must match with behind_pos. */
1487                 if (lop == BEHIND || lop == NOBEHIND)
1488                 {
1489                     regtail(ret, regnode(BHPOS));
1490                     *flagp |= HASLOOKBH;
1491                 }
1492                 regtail(ret, regnode(END)); /* operand ends */
1493                 reginsert(lop, ret);
1494                 break;
1495             }
1496
1497         case Magic('?'):
1498         case Magic('='):
1499             /* Emit x= as (x|) */
1500             reginsert(BRANCH, ret);             /* Either x */
1501             regtail(ret, regnode(BRANCH));      /* or */
1502             next = regnode(NOTHING);            /* null. */
1503             regtail(ret, next);
1504             regoptail(ret, next);
1505             break;
1506
1507         case Magic('{'):
1508             if (!read_limits(&minval, &maxval))
1509                 return NULL;
1510             if (flags & SIMPLE)
1511             {
1512                 reginsert(BRACE_SIMPLE, ret);
1513                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1514             }
1515             else
1516             {
1517                 if (num_complex_braces >= 10)
1518                     EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
1519                                                       reg_magic == MAGIC_ALL);
1520                 reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1521                 regoptail(ret, regnode(BACK));
1522                 regoptail(ret, ret);
1523                 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1524                 ++num_complex_braces;
1525             }
1526             if (minval > 0 && maxval > 0)
1527                 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1528             break;
1529     }
1530     if (re_multi_type(peekchr()) != NOT_MULTI)
1531     {
1532         /* Can't have a multi follow a multi. */
1533         if (peekchr() == Magic('*'))
1534             sprintf((char *)IObuff, _("E61: Nested %s*"),
1535                                             reg_magic >= MAGIC_ON ? "" : "\\");
1536         else
1537             sprintf((char *)IObuff, _("E62: Nested %s%c"),
1538                 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1539         EMSG_RET_NULL(IObuff);
1540     }
1541
1542     return ret;
1543 }
1544
1545 /*
1546  * regatom - the lowest level
1547  *
1548  * Optimization:  gobbles an entire sequence of ordinary characters so that
1549  * it can turn them into a single node, which is smaller to store and
1550  * faster to run.  Don't do this when one_exactly is set.
1551  */
1552     static char_u *
1553 regatom(flagp)
1554     int            *flagp;
1555 {
1556     char_u          *ret;
1557     int             flags;
1558     int             cpo_lit;        /* 'cpoptions' contains 'l' flag */
1559     int             cpo_bsl;        /* 'cpoptions' contains '\' flag */
1560     int             c;
1561     static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1562     static int      classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1563                                     FNAME, SFNAME, PRINT, SPRINT,
1564                                     WHITE, NWHITE, DIGIT, NDIGIT,
1565                                     HEX, NHEX, OCTAL, NOCTAL,
1566                                     WORD, NWORD, HEAD, NHEAD,
1567                                     ALPHA, NALPHA, LOWER, NLOWER,
1568                                     UPPER, NUPPER
1569                                     };
1570     char_u          *p;
1571     int             extra = 0;
1572
1573     *flagp = WORST;             /* Tentatively. */
1574     cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1575     cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1576
1577     c = getchr();
1578     switch (c)
1579     {
1580       case Magic('^'):
1581         ret = regnode(BOL);
1582         break;
1583
1584       case Magic('$'):
1585         ret = regnode(EOL);
1586 #if defined(FEAT_SYN_HL) || defined(PROTO)
1587         had_eol = TRUE;
1588 #endif
1589         break;
1590
1591       case Magic('<'):
1592         ret = regnode(BOW);
1593         break;
1594
1595       case Magic('>'):
1596         ret = regnode(EOW);
1597         break;
1598
1599       case Magic('_'):
1600         c = no_Magic(getchr());
1601         if (c == '^')           /* "\_^" is start-of-line */
1602         {
1603             ret = regnode(BOL);
1604             break;
1605         }
1606         if (c == '$')           /* "\_$" is end-of-line */
1607         {
1608             ret = regnode(EOL);
1609 #if defined(FEAT_SYN_HL) || defined(PROTO)
1610             had_eol = TRUE;
1611 #endif
1612             break;
1613         }
1614
1615         extra = ADD_NL;
1616         *flagp |= HASNL;
1617
1618         /* "\_[" is character range plus newline */
1619         if (c == '[')
1620             goto collection;
1621
1622         /* "\_x" is character class plus newline */
1623         /*FALLTHROUGH*/
1624
1625         /*
1626          * Character classes.
1627          */
1628       case Magic('.'):
1629       case Magic('i'):
1630       case Magic('I'):
1631       case Magic('k'):
1632       case Magic('K'):
1633       case Magic('f'):
1634       case Magic('F'):
1635       case Magic('p'):
1636       case Magic('P'):
1637       case Magic('s'):
1638       case Magic('S'):
1639       case Magic('d'):
1640       case Magic('D'):
1641       case Magic('x'):
1642       case Magic('X'):
1643       case Magic('o'):
1644       case Magic('O'):
1645       case Magic('w'):
1646       case Magic('W'):
1647       case Magic('h'):
1648       case Magic('H'):
1649       case Magic('a'):
1650       case Magic('A'):
1651       case Magic('l'):
1652       case Magic('L'):
1653       case Magic('u'):
1654       case Magic('U'):
1655         p = vim_strchr(classchars, no_Magic(c));
1656         if (p == NULL)
1657             EMSG_RET_NULL(_("E63: invalid use of \\_"));
1658 #ifdef FEAT_MBYTE
1659         /* When '.' is followed by a composing char ignore the dot, so that
1660          * the composing char is matched here. */
1661         if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1662         {
1663             c = getchr();
1664             goto do_multibyte;
1665         }
1666 #endif
1667         ret = regnode(classcodes[p - classchars] + extra);
1668         *flagp |= HASWIDTH | SIMPLE;
1669         break;
1670
1671       case Magic('n'):
1672         if (reg_string)
1673         {
1674             /* In a string "\n" matches a newline character. */
1675             ret = regnode(EXACTLY);
1676             regc(NL);
1677             regc(NUL);
1678             *flagp |= HASWIDTH | SIMPLE;
1679         }
1680         else
1681         {
1682             /* In buffer text "\n" matches the end of a line. */
1683             ret = regnode(NEWL);
1684             *flagp |= HASWIDTH | HASNL;
1685         }
1686         break;
1687
1688       case Magic('('):
1689         if (one_exactly)
1690             EMSG_ONE_RET_NULL;
1691         ret = reg(REG_PAREN, &flags);
1692         if (ret == NULL)
1693             return NULL;
1694         *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1695         break;
1696
1697       case NUL:
1698       case Magic('|'):
1699       case Magic('&'):
1700       case Magic(')'):
1701         if (one_exactly)
1702             EMSG_ONE_RET_NULL;
1703         EMSG_RET_NULL(_(e_internal));   /* Supposed to be caught earlier. */
1704         /* NOTREACHED */
1705
1706       case Magic('='):
1707       case Magic('?'):
1708       case Magic('+'):
1709       case Magic('@'):
1710       case Magic('{'):
1711       case Magic('*'):
1712         c = no_Magic(c);
1713         sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
1714                 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
1715                 ? "" : "\\", c);
1716         EMSG_RET_NULL(IObuff);
1717         /* NOTREACHED */
1718
1719       case Magic('~'):          /* previous substitute pattern */
1720             if (reg_prev_sub != NULL)
1721             {
1722                 char_u      *lp;
1723
1724                 ret = regnode(EXACTLY);
1725                 lp = reg_prev_sub;
1726                 while (*lp != NUL)
1727                     regc(*lp++);
1728                 regc(NUL);
1729                 if (*reg_prev_sub != NUL)
1730                 {
1731                     *flagp |= HASWIDTH;
1732                     if ((lp - reg_prev_sub) == 1)
1733                         *flagp |= SIMPLE;
1734                 }
1735             }
1736             else
1737                 EMSG_RET_NULL(_(e_nopresub));
1738             break;
1739
1740       case Magic('1'):
1741       case Magic('2'):
1742       case Magic('3'):
1743       case Magic('4'):
1744       case Magic('5'):
1745       case Magic('6'):
1746       case Magic('7'):
1747       case Magic('8'):
1748       case Magic('9'):
1749             {
1750                 int                 refnum;
1751
1752                 refnum = c - Magic('0');
1753                 /*
1754                  * Check if the back reference is legal. We must have seen the
1755                  * close brace.
1756                  * TODO: Should also check that we don't refer to something
1757                  * that is repeated (+*=): what instance of the repetition
1758                  * should we match?
1759                  */
1760                 if (!had_endbrace[refnum])
1761                 {
1762                     /* Trick: check if "@<=" or "@<!" follows, in which case
1763                      * the \1 can appear before the referenced match. */
1764                     for (p = regparse; *p != NUL; ++p)
1765                         if (p[0] == '@' && p[1] == '<'
1766                                               && (p[2] == '!' || p[2] == '='))
1767                             break;
1768                     if (*p == NUL)
1769                         EMSG_RET_NULL(_("E65: Illegal back reference"));
1770                 }
1771                 ret = regnode(BACKREF + refnum);
1772             }
1773             break;
1774
1775       case Magic('z'):
1776         {
1777             c = no_Magic(getchr());
1778             switch (c)
1779             {
1780 #ifdef FEAT_SYN_HL
1781                 case '(': if (reg_do_extmatch != REX_SET)
1782                               EMSG_RET_NULL(_("E66: \\z( not allowed here"));
1783                           if (one_exactly)
1784                               EMSG_ONE_RET_NULL;
1785                           ret = reg(REG_ZPAREN, &flags);
1786                           if (ret == NULL)
1787                               return NULL;
1788                           *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1789                           re_has_z = REX_SET;
1790                           break;
1791
1792                 case '1':
1793                 case '2':
1794                 case '3':
1795                 case '4':
1796                 case '5':
1797                 case '6':
1798                 case '7':
1799                 case '8':
1800                 case '9': if (reg_do_extmatch != REX_USE)
1801                               EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
1802                           ret = regnode(ZREF + c - '0');
1803                           re_has_z = REX_USE;
1804                           break;
1805 #endif
1806
1807                 case 's': ret = regnode(MOPEN + 0);
1808                           break;
1809
1810                 case 'e': ret = regnode(MCLOSE + 0);
1811                           break;
1812
1813                 default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1814             }
1815         }
1816         break;
1817
1818       case Magic('%'):
1819         {
1820             c = no_Magic(getchr());
1821             switch (c)
1822             {
1823                 /* () without a back reference */
1824                 case '(':
1825                     if (one_exactly)
1826                         EMSG_ONE_RET_NULL;
1827                     ret = reg(REG_NPAREN, &flags);
1828                     if (ret == NULL)
1829                         return NULL;
1830                     *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1831                     break;
1832
1833                 /* Catch \%^ and \%$ regardless of where they appear in the
1834                  * pattern -- regardless of whether or not it makes sense. */
1835                 case '^':
1836                     ret = regnode(RE_BOF);
1837                     break;
1838
1839                 case '$':
1840                     ret = regnode(RE_EOF);
1841                     break;
1842
1843                 case '#':
1844                     ret = regnode(CURSOR);
1845                     break;
1846
1847                 case 'V':
1848                     ret = regnode(RE_VISUAL);
1849                     break;
1850
1851                 /* \%[abc]: Emit as a list of branches, all ending at the last
1852                  * branch which matches nothing. */
1853                 case '[':
1854                           if (one_exactly)      /* doesn't nest */
1855                               EMSG_ONE_RET_NULL;
1856                           {
1857                               char_u    *lastbranch;
1858                               char_u    *lastnode = NULL;
1859                               char_u    *br;
1860
1861                               ret = NULL;
1862                               while ((c = getchr()) != ']')
1863                               {
1864                                   if (c == NUL)
1865                                       EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
1866                                                       reg_magic == MAGIC_ALL);
1867                                   br = regnode(BRANCH);
1868                                   if (ret == NULL)
1869                                       ret = br;
1870                                   else
1871                                       regtail(lastnode, br);
1872
1873                                   ungetchr();
1874                                   one_exactly = TRUE;
1875                                   lastnode = regatom(flagp);
1876                                   one_exactly = FALSE;
1877                                   if (lastnode == NULL)
1878                                       return NULL;
1879                               }
1880                               if (ret == NULL)
1881                                   EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
1882                                                       reg_magic == MAGIC_ALL);
1883                               lastbranch = regnode(BRANCH);
1884                               br = regnode(NOTHING);
1885                               if (ret != JUST_CALC_SIZE)
1886                               {
1887                                   regtail(lastnode, br);
1888                                   regtail(lastbranch, br);
1889                                   /* connect all branches to the NOTHING
1890                                    * branch at the end */
1891                                   for (br = ret; br != lastnode; )
1892                                   {
1893                                       if (OP(br) == BRANCH)
1894                                       {
1895                                           regtail(br, lastbranch);
1896                                           br = OPERAND(br);
1897                                       }
1898                                       else
1899                                           br = regnext(br);
1900                                   }
1901                               }
1902                               *flagp &= ~HASWIDTH;
1903                               break;
1904                           }
1905
1906                 case 'd':   /* %d123 decimal */
1907                 case 'o':   /* %o123 octal */
1908                 case 'x':   /* %xab hex 2 */
1909                 case 'u':   /* %uabcd hex 4 */
1910                 case 'U':   /* %U1234abcd hex 8 */
1911                           {
1912                               int i;
1913
1914                               switch (c)
1915                               {
1916                                   case 'd': i = getdecchrs(); break;
1917                                   case 'o': i = getoctchrs(); break;
1918                                   case 'x': i = gethexchrs(2); break;
1919                                   case 'u': i = gethexchrs(4); break;
1920                                   case 'U': i = gethexchrs(8); break;
1921                                   default:  i = -1; break;
1922                               }
1923
1924                               if (i < 0)
1925                                   EMSG_M_RET_NULL(
1926                                         _("E678: Invalid character after %s%%[dxouU]"),
1927                                         reg_magic == MAGIC_ALL);
1928 #ifdef FEAT_MBYTE
1929                               if (use_multibytecode(i))
1930                                   ret = regnode(MULTIBYTECODE);
1931                               else
1932 #endif
1933                                   ret = regnode(EXACTLY);
1934                               if (i == 0)
1935                                   regc(0x0a);
1936                               else
1937 #ifdef FEAT_MBYTE
1938                                   regmbc(i);
1939 #else
1940                                   regc(i);
1941 #endif
1942                               regc(NUL);
1943                               *flagp |= HASWIDTH;
1944                               break;
1945                           }
1946
1947                 default:
1948                           if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1949                                                                  || c == '\'')
1950                           {
1951                               long_u    n = 0;
1952                               int       cmp;
1953
1954                               cmp = c;
1955                               if (cmp == '<' || cmp == '>')
1956                                   c = getchr();
1957                               while (VIM_ISDIGIT(c))
1958                               {
1959                                   n = n * 10 + (c - '0');
1960                                   c = getchr();
1961                               }
1962                               if (c == '\'' && n == 0)
1963                               {
1964                                   /* "\%'m", "\%<'m" and "\%>'m": Mark */
1965                                   c = getchr();
1966                                   ret = regnode(RE_MARK);
1967                                   if (ret == JUST_CALC_SIZE)
1968                                       regsize += 2;
1969                                   else
1970                                   {
1971                                       *regcode++ = c;
1972                                       *regcode++ = cmp;
1973                                   }
1974                                   break;
1975                               }
1976                               else if (c == 'l' || c == 'c' || c == 'v')
1977                               {
1978                                   if (c == 'l')
1979                                       ret = regnode(RE_LNUM);
1980                                   else if (c == 'c')
1981                                       ret = regnode(RE_COL);
1982                                   else
1983                                       ret = regnode(RE_VCOL);
1984                                   if (ret == JUST_CALC_SIZE)
1985                                       regsize += 5;
1986                                   else
1987                                   {
1988                                       /* put the number and the optional
1989                                        * comparator after the opcode */
1990                                       regcode = re_put_long(regcode, n);
1991                                       *regcode++ = cmp;
1992                                   }
1993                                   break;
1994                               }
1995                           }
1996
1997                           EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
1998                                                       reg_magic == MAGIC_ALL);
1999             }
2000         }
2001         break;
2002
2003       case Magic('['):
2004 collection:
2005         {
2006             char_u      *lp;
2007
2008             /*
2009              * If there is no matching ']', we assume the '[' is a normal
2010              * character.  This makes 'incsearch' and ":help [" work.
2011              */
2012             lp = skip_anyof(regparse);
2013             if (*lp == ']')     /* there is a matching ']' */
2014             {
2015                 int     startc = -1;    /* > 0 when next '-' is a range */
2016                 int     endc;
2017
2018                 /*
2019                  * In a character class, different parsing rules apply.
2020                  * Not even \ is special anymore, nothing is.
2021                  */
2022                 if (*regparse == '^')       /* Complement of range. */
2023                 {
2024                     ret = regnode(ANYBUT + extra);
2025                     regparse++;
2026                 }
2027                 else
2028                     ret = regnode(ANYOF + extra);
2029
2030                 /* At the start ']' and '-' mean the literal character. */
2031                 if (*regparse == ']' || *regparse == '-')
2032                 {
2033                     startc = *regparse;
2034                     regc(*regparse++);
2035                 }
2036
2037                 while (*regparse != NUL && *regparse != ']')
2038                 {
2039                     if (*regparse == '-')
2040                     {
2041                         ++regparse;
2042                         /* The '-' is not used for a range at the end and
2043                          * after or before a '\n'. */
2044                         if (*regparse == ']' || *regparse == NUL
2045                                 || startc == -1
2046                                 || (regparse[0] == '\\' && regparse[1] == 'n'))
2047                         {
2048                             regc('-');
2049                             startc = '-';       /* [--x] is a range */
2050                         }
2051                         else
2052                         {
2053                             /* Also accept "a-[.z.]" */
2054                             endc = 0;
2055                             if (*regparse == '[')
2056                                 endc = get_coll_element(&regparse);
2057                             if (endc == 0)
2058                             {
2059 #ifdef FEAT_MBYTE
2060                                 if (has_mbyte)
2061                                     endc = mb_ptr2char_adv(&regparse);
2062                                 else
2063 #endif
2064                                     endc = *regparse++;
2065                             }
2066
2067                             /* Handle \o40, \x20 and \u20AC style sequences */
2068                             if (endc == '\\' && !cpo_lit && !cpo_bsl)
2069                                 endc = coll_get_char();
2070
2071                             if (startc > endc)
2072                                 EMSG_RET_NULL(_(e_invrange));
2073 #ifdef FEAT_MBYTE
2074                             if (has_mbyte && ((*mb_char2len)(startc) > 1
2075                                                  || (*mb_char2len)(endc) > 1))
2076                             {
2077                                 /* Limit to a range of 256 chars */
2078                                 if (endc > startc + 256)
2079                                     EMSG_RET_NULL(_(e_invrange));
2080                                 while (++startc <= endc)
2081                                     regmbc(startc);
2082                             }
2083                             else
2084 #endif
2085                             {
2086 #ifdef EBCDIC
2087                                 int     alpha_only = FALSE;
2088
2089                                 /* for alphabetical range skip the gaps
2090                                  * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2091                                 if (isalpha(startc) && isalpha(endc))
2092                                     alpha_only = TRUE;
2093 #endif
2094                                 while (++startc <= endc)
2095 #ifdef EBCDIC
2096                                     if (!alpha_only || isalpha(startc))
2097 #endif
2098                                         regc(startc);
2099                             }
2100                             startc = -1;
2101                         }
2102                     }
2103                     /*
2104                      * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2105                      * accepts "\t", "\e", etc., but only when the 'l' flag in
2106                      * 'cpoptions' is not included.
2107                      * Posix doesn't recognize backslash at all.
2108                      */
2109                     else if (*regparse == '\\'
2110                             && !cpo_bsl
2111                             && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2112                                 || (!cpo_lit
2113                                     && vim_strchr(REGEXP_ABBR,
2114                                                        regparse[1]) != NULL)))
2115                     {
2116                         regparse++;
2117                         if (*regparse == 'n')
2118                         {
2119                             /* '\n' in range: also match NL */
2120                             if (ret != JUST_CALC_SIZE)
2121                             {
2122                                 if (*ret == ANYBUT)
2123                                     *ret = ANYBUT + ADD_NL;
2124                                 else if (*ret == ANYOF)
2125                                     *ret = ANYOF + ADD_NL;
2126                                 /* else: must have had a \n already */
2127                             }
2128                             *flagp |= HASNL;
2129                             regparse++;
2130                             startc = -1;
2131                         }
2132                         else if (*regparse == 'd'
2133                                 || *regparse == 'o'
2134                                 || *regparse == 'x'
2135                                 || *regparse == 'u'
2136                                 || *regparse == 'U')
2137                         {
2138                             startc = coll_get_char();
2139                             if (startc == 0)
2140                                 regc(0x0a);
2141                             else
2142 #ifdef FEAT_MBYTE
2143                                 regmbc(startc);
2144 #else
2145                                 regc(startc);
2146 #endif
2147                         }
2148                         else
2149                         {
2150                             startc = backslash_trans(*regparse++);
2151                             regc(startc);
2152                         }
2153                     }
2154                     else if (*regparse == '[')
2155                     {
2156                         int c_class;
2157                         int cu;
2158
2159                         c_class = get_char_class(&regparse);
2160                         startc = -1;
2161                         /* Characters assumed to be 8 bits! */
2162                         switch (c_class)
2163                         {
2164                             case CLASS_NONE:
2165                                 c_class = get_equi_class(&regparse);
2166                                 if (c_class != 0)
2167                                 {
2168                                     /* produce equivalence class */
2169                                     reg_equi_class(c_class);
2170                                 }
2171                                 else if ((c_class =
2172                                             get_coll_element(&regparse)) != 0)
2173                                 {
2174                                     /* produce a collating element */
2175                                     regmbc(c_class);
2176                                 }
2177                                 else
2178                                 {
2179                                     /* literal '[', allow [[-x] as a range */
2180                                     startc = *regparse++;
2181                                     regc(startc);
2182                                 }
2183                                 break;
2184                             case CLASS_ALNUM:
2185                                 for (cu = 1; cu <= 255; cu++)
2186                                     if (isalnum(cu))
2187                                         regc(cu);
2188                                 break;
2189                             case CLASS_ALPHA:
2190                                 for (cu = 1; cu <= 255; cu++)
2191                                     if (isalpha(cu))
2192                                         regc(cu);
2193                                 break;
2194                             case CLASS_BLANK:
2195                                 regc(' ');
2196                                 regc('\t');
2197                                 break;
2198                             case CLASS_CNTRL:
2199                                 for (cu = 1; cu <= 255; cu++)
2200                                     if (iscntrl(cu))
2201                                         regc(cu);
2202                                 break;
2203                             case CLASS_DIGIT:
2204                                 for (cu = 1; cu <= 255; cu++)
2205                                     if (VIM_ISDIGIT(cu))
2206                                         regc(cu);
2207                                 break;
2208                             case CLASS_GRAPH:
2209                                 for (cu = 1; cu <= 255; cu++)
2210                                     if (isgraph(cu))
2211                                         regc(cu);
2212                                 break;
2213                             case CLASS_LOWER:
2214                                 for (cu = 1; cu <= 255; cu++)
2215                                     if (MB_ISLOWER(cu))
2216                                         regc(cu);
2217                                 break;
2218                             case CLASS_PRINT:
2219                                 for (cu = 1; cu <= 255; cu++)
2220                                     if (vim_isprintc(cu))
2221                                         regc(cu);
2222                                 break;
2223                             case CLASS_PUNCT:
2224                                 for (cu = 1; cu <= 255; cu++)
2225                                     if (ispunct(cu))
2226                                         regc(cu);
2227                                 break;
2228                             case CLASS_SPACE:
2229                                 for (cu = 9; cu <= 13; cu++)
2230                                     regc(cu);
2231                                 regc(' ');
2232                                 break;
2233                             case CLASS_UPPER:
2234                                 for (cu = 1; cu <= 255; cu++)
2235                                     if (MB_ISUPPER(cu))
2236                                         regc(cu);
2237                                 break;
2238                             case CLASS_XDIGIT:
2239                                 for (cu = 1; cu <= 255; cu++)
2240                                     if (vim_isxdigit(cu))
2241                                         regc(cu);
2242                                 break;
2243                             case CLASS_TAB:
2244                                 regc('\t');
2245                                 break;
2246                             case CLASS_RETURN:
2247                                 regc('\r');
2248                                 break;
2249                             case CLASS_BACKSPACE:
2250                                 regc('\b');
2251                                 break;
2252                             case CLASS_ESCAPE:
2253                                 regc('\033');
2254                                 break;
2255                         }
2256                     }
2257                     else
2258                     {
2259 #ifdef FEAT_MBYTE
2260                         if (has_mbyte)
2261                         {
2262                             int len;
2263
2264                             /* produce a multibyte character, including any
2265                              * following composing characters */
2266                             startc = mb_ptr2char(regparse);
2267                             len = (*mb_ptr2len)(regparse);
2268                             if (enc_utf8 && utf_char2len(startc) != len)
2269                                 startc = -1;    /* composing chars */
2270                             while (--len >= 0)
2271                                 regc(*regparse++);
2272                         }
2273                         else
2274 #endif
2275                         {
2276                             startc = *regparse++;
2277                             regc(startc);
2278                         }
2279                     }
2280                 }
2281                 regc(NUL);
2282                 prevchr_len = 1;        /* last char was the ']' */
2283                 if (*regparse != ']')
2284                     EMSG_RET_NULL(_(e_toomsbra));       /* Cannot happen? */
2285                 skipchr();          /* let's be friends with the lexer again */
2286                 *flagp |= HASWIDTH | SIMPLE;
2287                 break;
2288             }
2289             else if (reg_strict)
2290                 EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
2291                                                        reg_magic > MAGIC_OFF);
2292         }
2293         /* FALLTHROUGH */
2294
2295       default:
2296         {
2297             int         len;
2298
2299 #ifdef FEAT_MBYTE
2300             /* A multi-byte character is handled as a separate atom if it's
2301              * before a multi and when it's a composing char. */
2302             if (use_multibytecode(c))
2303             {
2304 do_multibyte:
2305                 ret = regnode(MULTIBYTECODE);
2306                 regmbc(c);
2307                 *flagp |= HASWIDTH | SIMPLE;
2308                 break;
2309             }
2310 #endif
2311
2312             ret = regnode(EXACTLY);
2313
2314             /*
2315              * Append characters as long as:
2316              * - there is no following multi, we then need the character in
2317              *   front of it as a single character operand
2318              * - not running into a Magic character
2319              * - "one_exactly" is not set
2320              * But always emit at least one character.  Might be a Multi,
2321              * e.g., a "[" without matching "]".
2322              */
2323             for (len = 0; c != NUL && (len == 0
2324                         || (re_multi_type(peekchr()) == NOT_MULTI
2325                             && !one_exactly
2326                             && !is_Magic(c))); ++len)
2327             {
2328                 c = no_Magic(c);
2329 #ifdef FEAT_MBYTE
2330                 if (has_mbyte)
2331                 {
2332                     regmbc(c);
2333                     if (enc_utf8)
2334                     {
2335                         int     l;
2336
2337                         /* Need to get composing character too. */
2338                         for (;;)
2339                         {
2340                             l = utf_ptr2len(regparse);
2341                             if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2342                                 break;
2343                             regmbc(utf_ptr2char(regparse));
2344                             skipchr();
2345                         }
2346                     }
2347                 }
2348                 else
2349 #endif
2350                     regc(c);
2351                 c = getchr();
2352             }
2353             ungetchr();
2354
2355             regc(NUL);
2356             *flagp |= HASWIDTH;
2357             if (len == 1)
2358                 *flagp |= SIMPLE;
2359         }
2360         break;
2361     }
2362
2363     return ret;
2364 }
2365
2366 #ifdef FEAT_MBYTE
2367 /*
2368  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2369  * character "c".
2370  */
2371     static int
2372 use_multibytecode(c)
2373     int c;
2374 {
2375     return has_mbyte && (*mb_char2len)(c) > 1
2376                      && (re_multi_type(peekchr()) != NOT_MULTI
2377                              || (enc_utf8 && utf_iscomposing(c)));
2378 }
2379 #endif
2380
2381 /*
2382  * emit a node
2383  * Return pointer to generated code.
2384  */
2385     static char_u *
2386 regnode(op)
2387     int         op;
2388 {
2389     char_u  *ret;
2390
2391     ret = regcode;
2392     if (ret == JUST_CALC_SIZE)
2393         regsize += 3;
2394     else
2395     {
2396         *regcode++ = op;
2397         *regcode++ = NUL;               /* Null "next" pointer. */
2398         *regcode++ = NUL;
2399     }
2400     return ret;
2401 }
2402
2403 /*
2404  * Emit (if appropriate) a byte of code
2405  */
2406     static void
2407 regc(b)
2408     int         b;
2409 {
2410     if (regcode == JUST_CALC_SIZE)
2411         regsize++;
2412     else
2413         *regcode++ = b;
2414 }
2415
2416 #ifdef FEAT_MBYTE
2417 /*
2418  * Emit (if appropriate) a multi-byte character of code
2419  */
2420     static void
2421 regmbc(c)
2422     int         c;
2423 {
2424     if (regcode == JUST_CALC_SIZE)
2425         regsize += (*mb_char2len)(c);
2426     else
2427         regcode += (*mb_char2bytes)(c, regcode);
2428 }
2429 #endif
2430
2431 /*
2432  * reginsert - insert an operator in front of already-emitted operand
2433  *
2434  * Means relocating the operand.
2435  */
2436     static void
2437 reginsert(op, opnd)
2438     int         op;
2439     char_u     *opnd;
2440 {
2441     char_u      *src;
2442     char_u      *dst;
2443     char_u      *place;
2444
2445     if (regcode == JUST_CALC_SIZE)
2446     {
2447         regsize += 3;
2448         return;
2449     }
2450     src = regcode;
2451     regcode += 3;
2452     dst = regcode;
2453     while (src > opnd)
2454         *--dst = *--src;
2455
2456     place = opnd;               /* Op node, where operand used to be. */
2457     *place++ = op;
2458     *place++ = NUL;
2459     *place = NUL;
2460 }
2461
2462 /*
2463  * reginsert_limits - insert an operator in front of already-emitted operand.
2464  * The operator has the given limit values as operands.  Also set next pointer.
2465  *
2466  * Means relocating the operand.
2467  */
2468     static void
2469 reginsert_limits(op, minval, maxval, opnd)
2470     int         op;
2471     long        minval;
2472     long        maxval;
2473     char_u      *opnd;
2474 {
2475     char_u      *src;
2476     char_u      *dst;
2477     char_u      *place;
2478
2479     if (regcode == JUST_CALC_SIZE)
2480     {
2481         regsize += 11;
2482         return;
2483     }
2484     src = regcode;
2485     regcode += 11;
2486     dst = regcode;
2487     while (src > opnd)
2488         *--dst = *--src;
2489
2490     place = opnd;               /* Op node, where operand used to be. */
2491     *place++ = op;
2492     *place++ = NUL;
2493     *place++ = NUL;
2494     place = re_put_long(place, (long_u)minval);
2495     place = re_put_long(place, (long_u)maxval);
2496     regtail(opnd, place);
2497 }
2498
2499 /*
2500  * Write a long as four bytes at "p" and return pointer to the next char.
2501  */
2502     static char_u *
2503 re_put_long(p, val)
2504     char_u      *p;
2505     long_u      val;
2506 {
2507     *p++ = (char_u) ((val >> 24) & 0377);
2508     *p++ = (char_u) ((val >> 16) & 0377);
2509     *p++ = (char_u) ((val >> 8) & 0377);
2510     *p++ = (char_u) (val & 0377);
2511     return p;
2512 }
2513
2514 /*
2515  * regtail - set the next-pointer at the end of a node chain
2516  */
2517     static void
2518 regtail(p, val)
2519     char_u      *p;
2520     char_u      *val;
2521 {
2522     char_u      *scan;
2523     char_u      *temp;
2524     int         offset;
2525
2526     if (p == JUST_CALC_SIZE)
2527         return;
2528
2529     /* Find last node. */
2530     scan = p;
2531     for (;;)
2532     {
2533         temp = regnext(scan);
2534         if (temp == NULL)
2535             break;
2536         scan = temp;
2537     }
2538
2539     if (OP(scan) == BACK)
2540         offset = (int)(scan - val);
2541     else
2542         offset = (int)(val - scan);
2543     *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2544     *(scan + 2) = (char_u) (offset & 0377);
2545 }
2546
2547 /*
2548  * regoptail - regtail on item after a BRANCH; nop if none
2549  */
2550     static void
2551 regoptail(p, val)
2552     char_u      *p;
2553     char_u      *val;
2554 {
2555     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2556     if (p == NULL || p == JUST_CALC_SIZE
2557             || (OP(p) != BRANCH
2558                 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2559         return;
2560     regtail(OPERAND(p), val);
2561 }
2562
2563 /*
2564  * getchr() - get the next character from the pattern. We know about
2565  * magic and such, so therefore we need a lexical analyzer.
2566  */
2567
2568 /* static int       curchr; */
2569 static int      prevprevchr;
2570 static int      prevchr;
2571 static int      nextchr;    /* used for ungetchr() */
2572 /*
2573  * Note: prevchr is sometimes -1 when we are not at the start,
2574  * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
2575  * taken to be magic -- webb
2576  */
2577 static int      at_start;       /* True when on the first character */
2578 static int      prev_at_start;  /* True when on the second character */
2579
2580     static void
2581 initchr(str)
2582     char_u *str;
2583 {
2584     regparse = str;
2585     prevchr_len = 0;
2586     curchr = prevprevchr = prevchr = nextchr = -1;
2587     at_start = TRUE;
2588     prev_at_start = FALSE;
2589 }
2590
2591     static int
2592 peekchr()
2593 {
2594     static int  after_slash = FALSE;
2595
2596     if (curchr == -1)
2597     {
2598         switch (curchr = regparse[0])
2599         {
2600         case '.':
2601         case '[':
2602         case '~':
2603             /* magic when 'magic' is on */
2604             if (reg_magic >= MAGIC_ON)
2605                 curchr = Magic(curchr);
2606             break;
2607         case '(':
2608         case ')':
2609         case '{':
2610         case '%':
2611         case '+':
2612         case '=':
2613         case '?':
2614         case '@':
2615         case '!':
2616         case '&':
2617         case '|':
2618         case '<':
2619         case '>':
2620         case '#':       /* future ext. */
2621         case '"':       /* future ext. */
2622         case '\'':      /* future ext. */
2623         case ',':       /* future ext. */
2624         case '-':       /* future ext. */
2625         case ':':       /* future ext. */
2626         case ';':       /* future ext. */
2627         case '`':       /* future ext. */
2628         case '/':       /* Can't be used in / command */
2629             /* magic only after "\v" */
2630             if (reg_magic == MAGIC_ALL)
2631                 curchr = Magic(curchr);
2632             break;
2633         case '*':
2634             /* * is not magic as the very first character, eg "?*ptr", when
2635              * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
2636              * "\(\*" is not magic, thus must be magic if "after_slash" */
2637             if (reg_magic >= MAGIC_ON
2638                     && !at_start
2639                     && !(prev_at_start && prevchr == Magic('^'))
2640                     && (after_slash
2641                         || (prevchr != Magic('(')
2642                             && prevchr != Magic('&')
2643                             && prevchr != Magic('|'))))
2644                 curchr = Magic('*');
2645             break;
2646         case '^':
2647             /* '^' is only magic as the very first character and if it's after
2648              * "\(", "\|", "\&' or "\n" */
2649             if (reg_magic >= MAGIC_OFF
2650                     && (at_start
2651                         || reg_magic == MAGIC_ALL
2652                         || prevchr == Magic('(')
2653                         || prevchr == Magic('|')
2654                         || prevchr == Magic('&')
2655                         || prevchr == Magic('n')
2656                         || (no_Magic(prevchr) == '('
2657                             && prevprevchr == Magic('%'))))
2658             {
2659                 curchr = Magic('^');
2660                 at_start = TRUE;
2661                 prev_at_start = FALSE;
2662             }
2663             break;
2664         case '$':
2665             /* '$' is only magic as the very last char and if it's in front of
2666              * either "\|", "\)", "\&", or "\n" */
2667             if (reg_magic >= MAGIC_OFF)
2668             {
2669                 char_u *p = regparse + 1;
2670
2671                 /* ignore \c \C \m and \M after '$' */
2672                 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
2673                                 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z'))
2674                     p += 2;
2675                 if (p[0] == NUL
2676                         || (p[0] == '\\'
2677                             && (p[1] == '|' || p[1] == '&' || p[1] == ')'
2678                                 || p[1] == 'n'))
2679                         || reg_magic == MAGIC_ALL)
2680                     curchr = Magic('$');
2681             }
2682             break;
2683         case '\\':
2684             {
2685                 int c = regparse[1];
2686
2687                 if (c == NUL)
2688                     curchr = '\\';      /* trailing '\' */
2689                 else if (
2690 #ifdef EBCDIC
2691                         vim_strchr(META, c)
2692 #else
2693                         c <= '~' && META_flags[c]
2694 #endif
2695                         )
2696                 {
2697                     /*
2698                      * META contains everything that may be magic sometimes,
2699                      * except ^ and $ ("\^" and "\$" are only magic after
2700                      * "\v").  We now fetch the next character and toggle its
2701                      * magicness.  Therefore, \ is so meta-magic that it is
2702                      * not in META.
2703                      */
2704                     curchr = -1;
2705                     prev_at_start = at_start;
2706                     at_start = FALSE;   /* be able to say "/\*ptr" */
2707                     ++regparse;
2708                     ++after_slash;
2709                     peekchr();
2710                     --regparse;
2711                     --after_slash;
2712                     curchr = toggle_Magic(curchr);
2713                 }
2714                 else if (vim_strchr(REGEXP_ABBR, c))
2715                 {
2716                     /*
2717                      * Handle abbreviations, like "\t" for TAB -- webb
2718                      */
2719                     curchr = backslash_trans(c);
2720                 }
2721                 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
2722                     curchr = toggle_Magic(c);
2723                 else
2724                 {
2725                     /*
2726                      * Next character can never be (made) magic?
2727                      * Then backslashing it won't do anything.
2728                      */
2729 #ifdef FEAT_MBYTE
2730                     if (has_mbyte)
2731                         curchr = (*mb_ptr2char)(regparse + 1);
2732                     else
2733 #endif
2734                         curchr = c;
2735                 }
2736                 break;
2737             }
2738
2739 #ifdef FEAT_MBYTE
2740         default:
2741             if (has_mbyte)
2742                 curchr = (*mb_ptr2char)(regparse);
2743 #endif
2744         }
2745     }
2746
2747     return curchr;
2748 }
2749
2750 /*
2751  * Eat one lexed character.  Do this in a way that we can undo it.
2752  */
2753     static void
2754 skipchr()
2755 {
2756     /* peekchr() eats a backslash, do the same here */
2757     if (*regparse == '\\')
2758         prevchr_len = 1;
2759     else
2760         prevchr_len = 0;
2761     if (regparse[prevchr_len] != NUL)
2762     {
2763 #ifdef FEAT_MBYTE
2764         if (enc_utf8)
2765             /* exclude composing chars that mb_ptr2len does include */
2766             prevchr_len += utf_ptr2len(regparse + prevchr_len);
2767         else if (has_mbyte)
2768             prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
2769         else
2770 #endif
2771             ++prevchr_len;
2772     }
2773     regparse += prevchr_len;
2774     prev_at_start = at_start;
2775     at_start = FALSE;
2776     prevprevchr = prevchr;
2777     prevchr = curchr;
2778     curchr = nextchr;       /* use previously unget char, or -1 */
2779     nextchr = -1;
2780 }
2781
2782 /*
2783  * Skip a character while keeping the value of prev_at_start for at_start.
2784  * prevchr and prevprevchr are also kept.
2785  */
2786     static void
2787 skipchr_keepstart()
2788 {
2789     int as = prev_at_start;
2790     int pr = prevchr;
2791     int prpr = prevprevchr;
2792
2793     skipchr();
2794     at_start = as;
2795     prevchr = pr;
2796     prevprevchr = prpr;
2797 }
2798
2799     static int
2800 getchr()
2801 {
2802     int chr = peekchr();
2803
2804     skipchr();
2805     return chr;
2806 }
2807
2808 /*
2809  * put character back.  Works only once!
2810  */
2811     static void
2812 ungetchr()
2813 {
2814     nextchr = curchr;
2815     curchr = prevchr;
2816     prevchr = prevprevchr;
2817     at_start = prev_at_start;
2818     prev_at_start = FALSE;
2819
2820     /* Backup regparse, so that it's at the same position as before the
2821      * getchr(). */
2822     regparse -= prevchr_len;
2823 }
2824
2825 /*
2826  * Get and return the value of the hex string at the current position.
2827  * Return -1 if there is no valid hex number.
2828  * The position is updated:
2829  *     blahblah\%x20asdf
2830  *         before-^ ^-after
2831  * The parameter controls the maximum number of input characters. This will be
2832  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
2833  */
2834     static int
2835 gethexchrs(maxinputlen)
2836     int         maxinputlen;
2837 {
2838     int         nr = 0;
2839     int         c;
2840     int         i;
2841
2842     for (i = 0; i < maxinputlen; ++i)
2843     {
2844         c = regparse[0];
2845         if (!vim_isxdigit(c))
2846             break;
2847         nr <<= 4;
2848         nr |= hex2nr(c);
2849         ++regparse;
2850     }
2851
2852     if (i == 0)
2853         return -1;
2854     return nr;
2855 }
2856
2857 /*
2858  * get and return the value of the decimal string immediately after the
2859  * current position. Return -1 for invalid.  Consumes all digits.
2860  */
2861     static int
2862 getdecchrs()
2863 {
2864     int         nr = 0;
2865     int         c;
2866     int         i;
2867
2868     for (i = 0; ; ++i)
2869     {
2870         c = regparse[0];
2871         if (c < '0' || c > '9')
2872             break;
2873         nr *= 10;
2874         nr += c - '0';
2875         ++regparse;
2876     }
2877
2878     if (i == 0)
2879         return -1;
2880     return nr;
2881 }
2882
2883 /*
2884  * get and return the value of the octal string immediately after the current
2885  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
2886  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
2887  * treat 8 or 9 as recognised characters. Position is updated:
2888  *     blahblah\%o210asdf
2889  *         before-^  ^-after
2890  */
2891     static int
2892 getoctchrs()
2893 {
2894     int         nr = 0;
2895     int         c;
2896     int         i;
2897
2898     for (i = 0; i < 3 && nr < 040; ++i)
2899     {
2900         c = regparse[0];
2901         if (c < '0' || c > '7')
2902             break;
2903         nr <<= 3;
2904         nr |= hex2nr(c);
2905         ++regparse;
2906     }
2907
2908     if (i == 0)
2909         return -1;
2910     return nr;
2911 }
2912
2913 /*
2914  * Get a number after a backslash that is inside [].
2915  * When nothing is recognized return a backslash.
2916  */
2917     static int
2918 coll_get_char()
2919 {
2920     int     nr = -1;
2921
2922     switch (*regparse++)
2923     {
2924         case 'd': nr = getdecchrs(); break;
2925         case 'o': nr = getoctchrs(); break;
2926         case 'x': nr = gethexchrs(2); break;
2927         case 'u': nr = gethexchrs(4); break;
2928         case 'U': nr = gethexchrs(8); break;
2929     }
2930     if (nr < 0)
2931     {
2932         /* If getting the number fails be backwards compatible: the character
2933          * is a backslash. */
2934         --regparse;
2935         nr = '\\';
2936     }
2937     return nr;
2938 }
2939
2940 /*
2941  * read_limits - Read two integers to be taken as a minimum and maximum.
2942  * If the first character is '-', then the range is reversed.
2943  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
2944  * missing, a very big number is the default.
2945  */
2946     static int
2947 read_limits(minval, maxval)
2948     long        *minval;
2949     long        *maxval;
2950 {
2951     int         reverse = FALSE;
2952     char_u      *first_char;
2953     long        tmp;
2954
2955     if (*regparse == '-')
2956     {
2957         /* Starts with '-', so reverse the range later */
2958         regparse++;
2959         reverse = TRUE;
2960     }
2961     first_char = regparse;
2962     *minval = getdigits(&regparse);
2963     if (*regparse == ',')           /* There is a comma */
2964     {
2965         if (vim_isdigit(*++regparse))
2966             *maxval = getdigits(&regparse);
2967         else
2968             *maxval = MAX_LIMIT;
2969     }
2970     else if (VIM_ISDIGIT(*first_char))
2971         *maxval = *minval;          /* It was \{n} or \{-n} */
2972     else
2973         *maxval = MAX_LIMIT;        /* It was \{} or \{-} */
2974     if (*regparse == '\\')
2975         regparse++;     /* Allow either \{...} or \{...\} */
2976     if (*regparse != '}')
2977     {
2978         sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2979                                           reg_magic == MAGIC_ALL ? "" : "\\");
2980         EMSG_RET_FAIL(IObuff);
2981     }
2982
2983     /*
2984      * Reverse the range if there was a '-', or make sure it is in the right
2985      * order otherwise.
2986      */
2987     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
2988     {
2989         tmp = *minval;
2990         *minval = *maxval;
2991         *maxval = tmp;
2992     }
2993     skipchr();          /* let's be friends with the lexer again */
2994     return OK;
2995 }
2996
2997 /*
2998  * vim_regexec and friends
2999  */
3000
3001 /*
3002  * Global work variables for vim_regexec().
3003  */
3004
3005 /* The current match-position is remembered with these variables: */
3006 static linenr_T reglnum;        /* line number, relative to first line */
3007 static char_u   *regline;       /* start of current line */
3008 static char_u   *reginput;      /* current input, points into "regline" */
3009
3010 static int      need_clear_subexpr;     /* subexpressions still need to be
3011                                          * cleared */
3012 #ifdef FEAT_SYN_HL
3013 static int      need_clear_zsubexpr = FALSE;    /* extmatch subexpressions
3014                                                  * still need to be cleared */
3015 #endif
3016
3017 /*
3018  * Structure used to save the current input state, when it needs to be
3019  * restored after trying a match.  Used by reg_save() and reg_restore().
3020  * Also stores the length of "backpos".
3021  */
3022 typedef struct
3023 {
3024     union
3025     {
3026         char_u  *ptr;   /* reginput pointer, for single-line regexp */
3027         lpos_T  pos;    /* reginput pos, for multi-line regexp */
3028     } rs_u;
3029     int         rs_len;
3030 } regsave_T;
3031
3032 /* struct to save start/end pointer/position in for \(\) */
3033 typedef struct
3034 {
3035     union
3036     {
3037         char_u  *ptr;
3038         lpos_T  pos;
3039     } se_u;
3040 } save_se_T;
3041
3042 static char_u   *reg_getline __ARGS((linenr_T lnum));
3043 static long     vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3044 static long     regtry __ARGS((regprog_T *prog, colnr_T col));
3045 static void     cleanup_subexpr __ARGS((void));
3046 #ifdef FEAT_SYN_HL
3047 static void     cleanup_zsubexpr __ARGS((void));
3048 #endif
3049 static void     reg_nextline __ARGS((void));
3050 static void     reg_save __ARGS((regsave_T *save, garray_T *gap));
3051 static void     reg_restore __ARGS((regsave_T *save, garray_T *gap));
3052 static int      reg_save_equal __ARGS((regsave_T *save));
3053 static void     save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3054 static void     save_se_one __ARGS((save_se_T *savep, char_u **pp));
3055
3056 /* Save the sub-expressions before attempting a match. */
3057 #define save_se(savep, posp, pp) \
3058     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3059
3060 /* After a failed match restore the sub-expressions. */
3061 #define restore_se(savep, posp, pp) { \
3062     if (REG_MULTI) \
3063         *(posp) = (savep)->se_u.pos; \
3064     else \
3065         *(pp) = (savep)->se_u.ptr; }
3066
3067 static int      re_num_cmp __ARGS((long_u val, char_u *scan));
3068 static int      regmatch __ARGS((char_u *prog));
3069 static int      regrepeat __ARGS((char_u *p, long maxcount));
3070
3071 #ifdef DEBUG
3072 int             regnarrate = 0;
3073 #endif
3074
3075 /*
3076  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3077  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3078  * contains '\c' or '\C' the value is overruled.
3079  */
3080 static int      ireg_ic;
3081
3082 #ifdef FEAT_MBYTE
3083 /*
3084  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3085  * in the regexp.  Defaults to false, always.
3086  */
3087 static int      ireg_icombine;
3088 #endif
3089
3090 /*
3091  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3092  * there is no maximum.
3093  */
3094 static colnr_T  ireg_maxcol;
3095
3096 /*
3097  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3098  * slow, we keep one allocated piece of memory and only re-allocate it when
3099  * it's too small.  It's freed in vim_regexec_both() when finished.
3100  */
3101 static char_u   *reg_tofree = NULL;
3102 static unsigned reg_tofreelen;
3103
3104 /*
3105  * These variables are set when executing a regexp to speed up the execution.
3106  * Which ones are set depends on whether a single-line or multi-line match is
3107  * done:
3108  *                      single-line             multi-line
3109  * reg_match            &regmatch_T             NULL
3110  * reg_mmatch           NULL                    &regmmatch_T
3111  * reg_startp           reg_match->startp       <invalid>
3112  * reg_endp             reg_match->endp         <invalid>
3113  * reg_startpos         <invalid>               reg_mmatch->startpos
3114  * reg_endpos           <invalid>               reg_mmatch->endpos
3115  * reg_win              NULL                    window in which to search
3116  * reg_buf              <invalid>               buffer in which to search
3117  * reg_firstlnum        <invalid>               first line in which to search
3118  * reg_maxline          0                       last line nr
3119  * reg_line_lbr         FALSE or TRUE           FALSE
3120  */
3121 static regmatch_T       *reg_match;
3122 static regmmatch_T      *reg_mmatch;
3123 static char_u           **reg_startp = NULL;
3124 static char_u           **reg_endp = NULL;
3125 static lpos_T           *reg_startpos = NULL;
3126 static lpos_T           *reg_endpos = NULL;
3127 static win_T            *reg_win;
3128 static buf_T            *reg_buf;
3129 static linenr_T         reg_firstlnum;
3130 static linenr_T         reg_maxline;
3131 static int              reg_line_lbr;       /* "\n" in string is line break */
3132
3133 /* Values for rs_state in regitem_T. */
3134 typedef enum regstate_E
3135 {
3136     RS_NOPEN = 0        /* NOPEN and NCLOSE */
3137     , RS_MOPEN          /* MOPEN + [0-9] */
3138     , RS_MCLOSE         /* MCLOSE + [0-9] */
3139 #ifdef FEAT_SYN_HL
3140     , RS_ZOPEN          /* ZOPEN + [0-9] */
3141     , RS_ZCLOSE         /* ZCLOSE + [0-9] */
3142 #endif
3143     , RS_BRANCH         /* BRANCH */
3144     , RS_BRCPLX_MORE    /* BRACE_COMPLEX and trying one more match */
3145     , RS_BRCPLX_LONG    /* BRACE_COMPLEX and trying longest match */
3146     , RS_BRCPLX_SHORT   /* BRACE_COMPLEX and trying shortest match */
3147     , RS_NOMATCH        /* NOMATCH */
3148     , RS_BEHIND1        /* BEHIND / NOBEHIND matching rest */
3149     , RS_BEHIND2        /* BEHIND / NOBEHIND matching behind part */
3150     , RS_STAR_LONG      /* STAR/PLUS/BRACE_SIMPLE longest match */
3151     , RS_STAR_SHORT     /* STAR/PLUS/BRACE_SIMPLE shortest match */
3152 } regstate_T;
3153
3154 /*
3155  * When there are alternatives a regstate_T is put on the regstack to remember
3156  * what we are doing.
3157  * Before it may be another type of item, depending on rs_state, to remember
3158  * more things.
3159  */
3160 typedef struct regitem_S
3161 {
3162     regstate_T  rs_state;       /* what we are doing, one of RS_ above */
3163     char_u      *rs_scan;       /* current node in program */
3164     union
3165     {
3166         save_se_T  sesave;
3167         regsave_T  regsave;
3168     } rs_un;                    /* room for saving reginput */
3169     short       rs_no;          /* submatch nr */
3170 } regitem_T;
3171
3172 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3173 static void regstack_pop __ARGS((char_u **scan));
3174
3175 /* used for BEHIND and NOBEHIND matching */
3176 typedef struct regbehind_S
3177 {
3178     regsave_T   save_after;
3179     regsave_T   save_behind;
3180 } regbehind_T;
3181
3182 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3183 typedef struct regstar_S
3184 {
3185     int         nextb;          /* next byte */
3186     int         nextb_ic;       /* next byte reverse case */
3187     long        count;
3188     long        minval;
3189     long        maxval;
3190 } regstar_T;
3191
3192 /* used to store input position when a BACK was encountered, so that we now if
3193  * we made any progress since the last time. */
3194 typedef struct backpos_S
3195 {
3196     char_u      *bp_scan;       /* "scan" where BACK was encountered */
3197     regsave_T   bp_pos;         /* last input position */
3198 } backpos_T;
3199
3200 /*
3201  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3202  * to avoid invoking malloc() and free() often.
3203  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3204  * or regbehind_T.
3205  * "backpos_T" is a table with backpos_T for BACK
3206  */
3207 static garray_T regstack = {0, 0, 0, 0, NULL};
3208 static garray_T backpos = {0, 0, 0, 0, NULL};
3209
3210 /*
3211  * Both for regstack and backpos tables we use the following strategy of
3212  * allocation (to reduce malloc/free calls):
3213  * - Initial size is fairly small.
3214  * - When needed, the tables are grown bigger (8 times at first, double after
3215  *   that).
3216  * - After executing the match we free the memory only if the array has grown.
3217  *   Thus the memory is kept allocated when it's at the initial size.
3218  * This makes it fast while not keeping a lot of memory allocated.
3219  * A three times speed increase was observed when using many simple patterns.
3220  */
3221 #define REGSTACK_INITIAL        2048
3222 #define BACKPOS_INITIAL         64
3223
3224 #if defined(EXITFREE) || defined(PROTO)
3225     void
3226 free_regexp_stuff()
3227 {
3228     ga_clear(&regstack);
3229     ga_clear(&backpos);
3230     vim_free(reg_tofree);
3231     vim_free(reg_prev_sub);
3232 }
3233 #endif
3234
3235 /*
3236  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3237  */
3238     static char_u *
3239 reg_getline(lnum)
3240     linenr_T    lnum;
3241 {
3242     /* when looking behind for a match/no-match lnum is negative.  But we
3243      * can't go before line 1 */
3244     if (reg_firstlnum + lnum < 1)
3245         return NULL;
3246     if (lnum > reg_maxline)
3247         /* Must have matched the "\n" in the last line. */
3248         return (char_u *)"";
3249     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3250 }
3251
3252 static regsave_T behind_pos;
3253
3254 #ifdef FEAT_SYN_HL
3255 static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */
3256 static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */
3257 static lpos_T   reg_startzpos[NSUBEXP]; /* idem, beginning pos */
3258 static lpos_T   reg_endzpos[NSUBEXP];   /* idem, end pos */
3259 #endif
3260
3261 /* TRUE if using multi-line regexp. */
3262 #define REG_MULTI       (reg_match == NULL)
3263
3264 /*
3265  * Match a regexp against a string.
3266  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3267  * Uses curbuf for line count and 'iskeyword'.
3268  *
3269  * Return TRUE if there is a match, FALSE if not.
3270  */
3271     int
3272 vim_regexec(rmp, line, col)
3273     regmatch_T  *rmp;
3274     char_u      *line;  /* string to match against */
3275     colnr_T     col;    /* column to start looking for match */
3276 {
3277     reg_match = rmp;
3278     reg_mmatch = NULL;
3279     reg_maxline = 0;
3280     reg_line_lbr = FALSE;
3281     reg_win = NULL;
3282     ireg_ic = rmp->rm_ic;
3283 #ifdef FEAT_MBYTE
3284     ireg_icombine = FALSE;
3285 #endif
3286     ireg_maxcol = 0;
3287     return (vim_regexec_both(line, col, NULL) != 0);
3288 }
3289
3290 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
3291         || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
3292 /*
3293  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
3294  */
3295     int
3296 vim_regexec_nl(rmp, line, col)
3297     regmatch_T  *rmp;
3298     char_u      *line;  /* string to match against */
3299     colnr_T     col;    /* column to start looking for match */
3300 {
3301     reg_match = rmp;
3302     reg_mmatch = NULL;
3303     reg_maxline = 0;
3304     reg_line_lbr = TRUE;
3305     reg_win = NULL;
3306     ireg_ic = rmp->rm_ic;
3307 #ifdef FEAT_MBYTE
3308     ireg_icombine = FALSE;
3309 #endif
3310     ireg_maxcol = 0;
3311     return (vim_regexec_both(line, col, NULL) != 0);
3312 }
3313 #endif
3314
3315 /*
3316  * Match a regexp against multiple lines.
3317  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3318  * Uses curbuf for line count and 'iskeyword'.
3319  *
3320  * Return zero if there is no match.  Return number of lines contained in the
3321  * match otherwise.
3322  */
3323     long
3324 vim_regexec_multi(rmp, win, buf, lnum, col, tm)
3325     regmmatch_T *rmp;
3326     win_T       *win;           /* window in which to search or NULL */
3327     buf_T       *buf;           /* buffer in which to search */
3328     linenr_T    lnum;           /* nr of line to start looking for match */
3329     colnr_T     col;            /* column to start looking for match */
3330     proftime_T  *tm;            /* timeout limit or NULL */
3331 {
3332     long        r;
3333     buf_T       *save_curbuf = curbuf;
3334
3335     reg_match = NULL;
3336     reg_mmatch = rmp;
3337     reg_buf = buf;
3338     reg_win = win;
3339     reg_firstlnum = lnum;
3340     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3341     reg_line_lbr = FALSE;
3342     ireg_ic = rmp->rmm_ic;
3343 #ifdef FEAT_MBYTE
3344     ireg_icombine = FALSE;
3345 #endif
3346     ireg_maxcol = rmp->rmm_maxcol;
3347
3348     /* Need to switch to buffer "buf" to make vim_iswordc() work. */
3349     curbuf = buf;
3350     r = vim_regexec_both(NULL, col, tm);
3351     curbuf = save_curbuf;
3352
3353     return r;
3354 }
3355
3356 /*
3357  * Match a regexp against a string ("line" points to the string) or multiple
3358  * lines ("line" is NULL, use reg_getline()).
3359  */
3360 /*ARGSUSED*/
3361     static long
3362 vim_regexec_both(line, col, tm)
3363     char_u      *line;
3364     colnr_T     col;            /* column to start looking for match */
3365     proftime_T  *tm;            /* timeout limit or NULL */
3366 {
3367     regprog_T   *prog;
3368     char_u      *s;
3369     long        retval = 0L;
3370
3371     /* Create "regstack" and "backpos" if they are not allocated yet.
3372      * We allocate *_INITIAL amount of bytes first and then set the grow size
3373      * to much bigger value to avoid many malloc calls in case of deep regular
3374      * expressions.  */
3375     if (regstack.ga_data == NULL)
3376     {
3377         /* Use an item size of 1 byte, since we push different things
3378          * onto the regstack. */
3379         ga_init2(&regstack, 1, REGSTACK_INITIAL);
3380         ga_grow(&regstack, REGSTACK_INITIAL);
3381         regstack.ga_growsize = REGSTACK_INITIAL * 8;
3382     }
3383
3384     if (backpos.ga_data == NULL)
3385     {
3386         ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3387         ga_grow(&backpos, BACKPOS_INITIAL);
3388         backpos.ga_growsize = BACKPOS_INITIAL * 8;
3389     }
3390
3391     if (REG_MULTI)
3392     {
3393         prog = reg_mmatch->regprog;
3394         line = reg_getline((linenr_T)0);
3395         reg_startpos = reg_mmatch->startpos;
3396         reg_endpos = reg_mmatch->endpos;
3397     }
3398     else
3399     {
3400         prog = reg_match->regprog;
3401         reg_startp = reg_match->startp;
3402         reg_endp = reg_match->endp;
3403     }
3404
3405     /* Be paranoid... */
3406     if (prog == NULL || line == NULL)
3407     {
3408         EMSG(_(e_null));
3409         goto theend;
3410     }
3411
3412     /* Check validity of program. */
3413     if (prog_magic_wrong())
3414         goto theend;
3415
3416     /* If the start column is past the maximum column: no need to try. */
3417     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3418         goto theend;
3419
3420     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3421     if (prog->regflags & RF_ICASE)
3422         ireg_ic = TRUE;
3423     else if (prog->regflags & RF_NOICASE)
3424         ireg_ic = FALSE;
3425
3426 #ifdef FEAT_MBYTE
3427     /* If pattern contains "\Z" overrule value of ireg_icombine */
3428     if (prog->regflags & RF_ICOMBINE)
3429         ireg_icombine = TRUE;
3430 #endif
3431
3432     /* If there is a "must appear" string, look for it. */
3433     if (prog->regmust != NULL)
3434     {
3435         int c;
3436
3437 #ifdef FEAT_MBYTE
3438         if (has_mbyte)
3439             c = (*mb_ptr2char)(prog->regmust);
3440         else
3441 #endif
3442             c = *prog->regmust;
3443         s = line + col;
3444
3445         /*
3446          * This is used very often, esp. for ":global".  Use three versions of
3447          * the loop to avoid overhead of conditions.
3448          */
3449         if (!ireg_ic
3450 #ifdef FEAT_MBYTE
3451                 && !has_mbyte
3452 #endif
3453                 )
3454             while ((s = vim_strbyte(s, c)) != NULL)
3455             {
3456                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3457                     break;              /* Found it. */
3458                 ++s;
3459             }
3460 #ifdef FEAT_MBYTE
3461         else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3462             while ((s = vim_strchr(s, c)) != NULL)
3463             {
3464                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3465                     break;              /* Found it. */
3466                 mb_ptr_adv(s);
3467             }
3468 #endif
3469         else
3470             while ((s = cstrchr(s, c)) != NULL)
3471             {
3472                 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3473                     break;              /* Found it. */
3474                 mb_ptr_adv(s);
3475             }
3476         if (s == NULL)          /* Not present. */
3477             goto theend;
3478     }
3479
3480     regline = line;
3481     reglnum = 0;
3482
3483     /* Simplest case: Anchored match need be tried only once. */
3484     if (prog->reganch)
3485     {
3486         int     c;
3487
3488 #ifdef FEAT_MBYTE
3489         if (has_mbyte)
3490             c = (*mb_ptr2char)(regline + col);
3491         else
3492 #endif
3493             c = regline[col];
3494         if (prog->regstart == NUL
3495                 || prog->regstart == c
3496                 || (ireg_ic && ((
3497 #ifdef FEAT_MBYTE
3498                         (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3499                         || (c < 255 && prog->regstart < 255 &&
3500 #endif
3501                             MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3502             retval = regtry(prog, col);
3503         else
3504             retval = 0;
3505     }
3506     else
3507     {
3508 #ifdef FEAT_RELTIME
3509         int tm_count = 0;
3510 #endif
3511         /* Messy cases:  unanchored match. */
3512         while (!got_int)
3513         {
3514             if (prog->regstart != NUL)
3515             {
3516                 /* Skip until the char we know it must start with.
3517                  * Used often, do some work to avoid call overhead. */
3518                 if (!ireg_ic
3519 #ifdef FEAT_MBYTE
3520                             && !has_mbyte
3521 #endif
3522                             )
3523                     s = vim_strbyte(regline + col, prog->regstart);
3524                 else
3525                     s = cstrchr(regline + col, prog->regstart);
3526                 if (s == NULL)
3527                 {
3528                     retval = 0;
3529                     break;
3530                 }
3531                 col = (int)(s - regline);
3532             }
3533
3534             /* Check for maximum column to try. */
3535             if (ireg_maxcol > 0 && col >= ireg_maxcol)
3536             {
3537                 retval = 0;
3538                 break;
3539             }
3540
3541             retval = regtry(prog, col);
3542             if (retval > 0)
3543                 break;
3544
3545             /* if not currently on the first line, get it again */
3546             if (reglnum != 0)
3547             {
3548                 reglnum = 0;
3549                 regline = reg_getline((linenr_T)0);
3550             }
3551             if (regline[col] == NUL)
3552                 break;
3553 #ifdef FEAT_MBYTE
3554             if (has_mbyte)
3555                 col += (*mb_ptr2len)(regline + col);
3556             else
3557 #endif
3558                 ++col;
3559 #ifdef FEAT_RELTIME
3560             /* Check for timeout once in a twenty times to avoid overhead. */
3561             if (tm != NULL && ++tm_count == 20)
3562             {
3563                 tm_count = 0;
3564                 if (profile_passed_limit(tm))
3565                     break;
3566             }
3567 #endif
3568         }
3569     }
3570
3571 theend:
3572     /* Free "reg_tofree" when it's a bit big.
3573      * Free regstack and backpos if they are bigger than their initial size. */
3574     if (reg_tofreelen > 400)
3575     {
3576         vim_free(reg_tofree);
3577         reg_tofree = NULL;
3578     }
3579     if (regstack.ga_maxlen > REGSTACK_INITIAL)
3580         ga_clear(&regstack);
3581     if (backpos.ga_maxlen > BACKPOS_INITIAL)
3582         ga_clear(&backpos);
3583
3584     return retval;
3585 }
3586
3587 #ifdef FEAT_SYN_HL
3588 static reg_extmatch_T *make_extmatch __ARGS((void));
3589
3590 /*
3591  * Create a new extmatch and mark it as referenced once.
3592  */
3593     static reg_extmatch_T *
3594 make_extmatch()
3595 {
3596     reg_extmatch_T      *em;
3597
3598     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
3599     if (em != NULL)
3600         em->refcnt = 1;
3601     return em;
3602 }
3603
3604 /*
3605  * Add a reference to an extmatch.
3606  */
3607     reg_extmatch_T *
3608 ref_extmatch(em)
3609     reg_extmatch_T      *em;
3610 {
3611     if (em != NULL)
3612         em->refcnt++;
3613     return em;
3614 }
3615
3616 /*
3617  * Remove a reference to an extmatch.  If there are no references left, free
3618  * the info.
3619  */
3620     void
3621 unref_extmatch(em)
3622     reg_extmatch_T      *em;
3623 {
3624     int i;
3625
3626     if (em != NULL && --em->refcnt <= 0)
3627     {
3628         for (i = 0; i < NSUBEXP; ++i)
3629             vim_free(em->matches[i]);
3630         vim_free(em);
3631     }
3632 }
3633 #endif
3634
3635 /*
3636  * regtry - try match of "prog" with at regline["col"].
3637  * Returns 0 for failure, number of lines contained in the match otherwise.
3638  */
3639     static long
3640 regtry(prog, col)
3641     regprog_T   *prog;
3642     colnr_T     col;
3643 {
3644     reginput = regline + col;
3645     need_clear_subexpr = TRUE;
3646 #ifdef FEAT_SYN_HL
3647     /* Clear the external match subpointers if necessary. */
3648     if (prog->reghasz == REX_SET)
3649         need_clear_zsubexpr = TRUE;
3650 #endif
3651
3652     if (regmatch(prog->program + 1) == 0)
3653         return 0;
3654
3655     cleanup_subexpr();
3656     if (REG_MULTI)
3657     {
3658         if (reg_startpos[0].lnum < 0)
3659         {
3660             reg_startpos[0].lnum = 0;
3661             reg_startpos[0].col = col;
3662         }
3663         if (reg_endpos[0].lnum < 0)
3664         {
3665             reg_endpos[0].lnum = reglnum;
3666             reg_endpos[0].col = (int)(reginput - regline);
3667         }
3668         else
3669             /* Use line number of "\ze". */
3670             reglnum = reg_endpos[0].lnum;
3671     }
3672     else
3673     {
3674         if (reg_startp[0] == NULL)
3675             reg_startp[0] = regline + col;
3676         if (reg_endp[0] == NULL)
3677             reg_endp[0] = reginput;
3678     }
3679 #ifdef FEAT_SYN_HL
3680     /* Package any found \z(...\) matches for export. Default is none. */
3681     unref_extmatch(re_extmatch_out);
3682     re_extmatch_out = NULL;
3683
3684     if (prog->reghasz == REX_SET)
3685     {
3686         int             i;
3687
3688         cleanup_zsubexpr();
3689         re_extmatch_out = make_extmatch();
3690         for (i = 0; i < NSUBEXP; i++)
3691         {
3692             if (REG_MULTI)
3693             {
3694                 /* Only accept single line matches. */
3695                 if (reg_startzpos[i].lnum >= 0
3696                         && reg_endzpos[i].lnum == reg_startzpos[i].lnum)
3697                     re_extmatch_out->matches[i] =
3698                         vim_strnsave(reg_getline(reg_startzpos[i].lnum)
3699                                                        + reg_startzpos[i].col,
3700                                    reg_endzpos[i].col - reg_startzpos[i].col);
3701             }
3702             else
3703             {
3704                 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
3705                     re_extmatch_out->matches[i] =
3706                             vim_strnsave(reg_startzp[i],
3707                                         (int)(reg_endzp[i] - reg_startzp[i]));
3708             }
3709         }
3710     }
3711 #endif
3712     return 1 + reglnum;
3713 }
3714
3715 #ifdef FEAT_MBYTE
3716 static int reg_prev_class __ARGS((void));
3717
3718 /*
3719  * Get class of previous character.
3720  */
3721     static int
3722 reg_prev_class()
3723 {
3724     if (reginput > regline)
3725         return mb_get_class(reginput - 1
3726                                      - (*mb_head_off)(regline, reginput - 1));
3727     return -1;
3728 }
3729
3730 #endif
3731 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
3732
3733 /*
3734  * The arguments from BRACE_LIMITS are stored here.  They are actually local
3735  * to regmatch(), but they are here to reduce the amount of stack space used
3736  * (it can be called recursively many times).
3737  */
3738 static long     bl_minval;
3739 static long     bl_maxval;
3740
3741 /*
3742  * regmatch - main matching routine
3743  *
3744  * Conceptually the strategy is simple: Check to see whether the current node
3745  * matches, push an item onto the regstack and loop to see whether the rest
3746  * matches, and then act accordingly.  In practice we make some effort to
3747  * avoid using the regstack, in particular by going through "ordinary" nodes
3748  * (that don't need to know whether the rest of the match failed) by a nested
3749  * loop.
3750  *
3751  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
3752  * the last matched character.
3753  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
3754  * undefined state!
3755  */
3756     static int
3757 regmatch(scan)
3758     char_u      *scan;          /* Current node. */
3759 {
3760   char_u        *next;          /* Next node. */
3761   int           op;
3762   int           c;
3763   regitem_T     *rp;
3764   int           no;
3765   int           status;         /* one of the RA_ values: */
3766 #define RA_FAIL         1       /* something failed, abort */
3767 #define RA_CONT         2       /* continue in inner loop */
3768 #define RA_BREAK        3       /* break inner loop */
3769 #define RA_MATCH        4       /* successful match */
3770 #define RA_NOMATCH      5       /* didn't match */
3771
3772   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
3773    * vim_regexec_both() to reduce malloc()/free() calls. */
3774   regstack.ga_len = 0;
3775   backpos.ga_len = 0;
3776
3777   /*
3778    * Repeat until "regstack" is empty.
3779    */
3780   for (;;)
3781   {
3782     /* Some patterns my cause a long time to match, even though they are not
3783      * illegal.  E.g., "\([a-z]\+\)\+Q".  Allow breaking them with CTRL-C. */
3784     fast_breakcheck();
3785
3786 #ifdef DEBUG
3787     if (scan != NULL && regnarrate)
3788     {
3789         mch_errmsg(regprop(scan));
3790         mch_errmsg("(\n");
3791     }
3792 #endif
3793
3794     /*
3795      * Repeat for items that can be matched sequentially, without using the
3796      * regstack.
3797      */
3798     for (;;)
3799     {
3800         if (got_int || scan == NULL)
3801         {
3802             status = RA_FAIL;
3803             break;
3804         }
3805         status = RA_CONT;
3806
3807 #ifdef DEBUG
3808         if (regnarrate)
3809         {
3810             mch_errmsg(regprop(scan));
3811             mch_errmsg("...\n");
3812 # ifdef FEAT_SYN_HL
3813             if (re_extmatch_in != NULL)
3814             {
3815                 int i;
3816
3817                 mch_errmsg(_("External submatches:\n"));
3818                 for (i = 0; i < NSUBEXP; i++)
3819                 {
3820                     mch_errmsg("    \"");
3821                     if (re_extmatch_in->matches[i] != NULL)
3822                         mch_errmsg(re_extmatch_in->matches[i]);
3823                     mch_errmsg("\"\n");
3824                 }
3825             }
3826 # endif
3827         }
3828 #endif
3829         next = regnext(scan);
3830
3831         op = OP(scan);
3832         /* Check for character class with NL added. */
3833         if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
3834                                 && *reginput == NUL && reglnum <= reg_maxline)
3835         {
3836             reg_nextline();
3837         }
3838         else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
3839         {
3840             ADVANCE_REGINPUT();
3841         }
3842         else
3843         {
3844           if (WITH_NL(op))
3845               op -= ADD_NL;
3846 #ifdef FEAT_MBYTE
3847           if (has_mbyte)
3848               c = (*mb_ptr2char)(reginput);
3849           else
3850 #endif
3851               c = *reginput;
3852           switch (op)
3853           {
3854           case BOL:
3855             if (reginput != regline)
3856                 status = RA_NOMATCH;
3857             break;
3858
3859           case EOL:
3860             if (c != NUL)
3861                 status = RA_NOMATCH;
3862             break;
3863
3864           case RE_BOF:
3865             /* We're not at the beginning of the file when below the first
3866              * line where we started, not at the start of the line or we
3867              * didn't start at the first line of the buffer. */
3868             if (reglnum != 0 || reginput != regline
3869                                           || (REG_MULTI && reg_firstlnum > 1))
3870                 status = RA_NOMATCH;
3871             break;
3872
3873           case RE_EOF:
3874             if (reglnum != reg_maxline || c != NUL)
3875                 status = RA_NOMATCH;
3876             break;
3877
3878           case CURSOR:
3879             /* Check if the buffer is in a window and compare the
3880              * reg_win->w_cursor position to the match position. */
3881             if (reg_win == NULL
3882                     || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
3883                     || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
3884                 status = RA_NOMATCH;
3885             break;
3886
3887           case RE_MARK:
3888             /* Compare the mark position to the match position.  NOTE: Always
3889              * uses the current buffer. */
3890             {
3891                 int     mark = OPERAND(scan)[0];
3892                 int     cmp = OPERAND(scan)[1];
3893                 pos_T   *pos;
3894
3895                 pos = getmark(mark, FALSE);
3896                 if (pos == NULL              /* mark doesn't exist */
3897                         || pos->lnum <= 0    /* mark isn't set (in curbuf) */
3898                         || (pos->lnum == reglnum + reg_firstlnum
3899                                 ? (pos->col == (colnr_T)(reginput - regline)
3900                                     ? (cmp == '<' || cmp == '>')
3901                                     : (pos->col < (colnr_T)(reginput - regline)
3902                                         ? cmp != '>'
3903                                         : cmp != '<'))
3904                                 : (pos->lnum < reglnum + reg_firstlnum
3905                                     ? cmp != '>'
3906                                     : cmp != '<')))
3907                     status = RA_NOMATCH;
3908             }
3909             break;
3910
3911           case RE_VISUAL:
3912 #ifdef FEAT_VISUAL
3913             /* Check if the buffer is the current buffer. and whether the
3914              * position is inside the Visual area. */
3915             if (reg_buf != curbuf || VIsual.lnum == 0)
3916                 status = RA_NOMATCH;
3917             else
3918             {
3919                 pos_T       top, bot;
3920                 linenr_T    lnum;
3921                 colnr_T     col;
3922                 win_T       *wp = reg_win == NULL ? curwin : reg_win;
3923                 int         mode;
3924
3925                 if (VIsual_active)
3926                 {
3927                     if (lt(VIsual, wp->w_cursor))
3928                     {
3929                         top = VIsual;
3930                         bot = wp->w_cursor;
3931                     }
3932                     else
3933                     {
3934                         top = wp->w_cursor;
3935                         bot = VIsual;
3936                     }
3937                     mode = VIsual_mode;
3938                 }
3939                 else
3940                 {
3941                     if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
3942                     {
3943                         top = curbuf->b_visual.vi_start;
3944                         bot = curbuf->b_visual.vi_end;
3945                     }
3946                     else
3947                     {
3948                         top = curbuf->b_visual.vi_end;
3949                         bot = curbuf->b_visual.vi_start;
3950                     }
3951                     mode = curbuf->b_visual.vi_mode;
3952                 }
3953                 lnum = reglnum + reg_firstlnum;
3954                 col = (colnr_T)(reginput - regline);
3955                 if (lnum < top.lnum || lnum > bot.lnum)
3956                     status = RA_NOMATCH;
3957                 else if (mode == 'v')
3958                 {
3959                     if ((lnum == top.lnum && col < top.col)
3960                             || (lnum == bot.lnum
3961                                          && col >= bot.col + (*p_sel != 'e')))
3962                         status = RA_NOMATCH;
3963                 }
3964                 else if (mode == Ctrl_V)
3965                 {
3966                     colnr_T         start, end;
3967                     colnr_T         start2, end2;
3968                     colnr_T         cols;
3969
3970                     getvvcol(wp, &top, &start, NULL, &end);
3971                     getvvcol(wp, &bot, &start2, NULL, &end2);
3972                     if (start2 < start)
3973                         start = start2;
3974                     if (end2 > end)
3975                         end = end2;
3976                     if (top.col == MAXCOL || bot.col == MAXCOL)
3977                         end = MAXCOL;
3978                     cols = win_linetabsize(wp,
3979                                       regline, (colnr_T)(reginput - regline));
3980                     if (cols < start || cols > end - (*p_sel == 'e'))
3981                         status = RA_NOMATCH;
3982                 }
3983             }
3984 #else
3985             status = RA_NOMATCH;
3986 #endif
3987             break;
3988
3989           case RE_LNUM:
3990             if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
3991                                                                         scan))
3992                 status = RA_NOMATCH;
3993             break;
3994
3995           case RE_COL:
3996             if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
3997                 status = RA_NOMATCH;
3998             break;
3999
4000           case RE_VCOL:
4001             if (!re_num_cmp((long_u)win_linetabsize(
4002                             reg_win == NULL ? curwin : reg_win,
4003                             regline, (colnr_T)(reginput - regline)) + 1, scan))
4004                 status = RA_NOMATCH;
4005             break;
4006
4007           case BOW:     /* \<word; reginput points to w */
4008             if (c == NUL)       /* Can't match at end of line */
4009                 status = RA_NOMATCH;
4010 #ifdef FEAT_MBYTE
4011             else if (has_mbyte)
4012             {
4013                 int this_class;
4014
4015                 /* Get class of current and previous char (if it exists). */
4016                 this_class = mb_get_class(reginput);
4017                 if (this_class <= 1)
4018                     status = RA_NOMATCH;  /* not on a word at all */
4019                 else if (reg_prev_class() == this_class)
4020                     status = RA_NOMATCH;  /* previous char is in same word */
4021             }
4022 #endif
4023             else
4024             {
4025                 if (!vim_iswordc(c)
4026                         || (reginput > regline && vim_iswordc(reginput[-1])))
4027                     status = RA_NOMATCH;
4028             }
4029             break;
4030
4031           case EOW:     /* word\>; reginput points after d */
4032             if (reginput == regline)    /* Can't match at start of line */
4033                 status = RA_NOMATCH;
4034 #ifdef FEAT_MBYTE
4035             else if (has_mbyte)
4036             {
4037                 int this_class, prev_class;
4038
4039                 /* Get class of current and previous char (if it exists). */
4040                 this_class = mb_get_class(reginput);
4041                 prev_class = reg_prev_class();
4042                 if (this_class == prev_class
4043                         || prev_class == 0 || prev_class == 1)
4044                     status = RA_NOMATCH;
4045             }
4046 #endif
4047             else
4048             {
4049                 if (!vim_iswordc(reginput[-1])
4050                         || (reginput[0] != NUL && vim_iswordc(c)))
4051                     status = RA_NOMATCH;
4052             }
4053             break; /* Matched with EOW */
4054
4055           case ANY:
4056             if (c == NUL)
4057                 status = RA_NOMATCH;
4058             else
4059                 ADVANCE_REGINPUT();
4060             break;
4061
4062           case IDENT:
4063             if (!vim_isIDc(c))
4064                 status = RA_NOMATCH;
4065             else
4066                 ADVANCE_REGINPUT();
4067             break;
4068
4069           case SIDENT:
4070             if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4071                 status = RA_NOMATCH;
4072             else
4073                 ADVANCE_REGINPUT();
4074             break;
4075
4076           case KWORD:
4077             if (!vim_iswordp(reginput))
4078                 status = RA_NOMATCH;
4079             else
4080                 ADVANCE_REGINPUT();
4081             break;
4082
4083           case SKWORD:
4084             if (VIM_ISDIGIT(*reginput) || !vim_iswordp(reginput))
4085                 status = RA_NOMATCH;
4086             else
4087                 ADVANCE_REGINPUT();
4088             break;
4089
4090           case FNAME:
4091             if (!vim_isfilec(c))
4092                 status = RA_NOMATCH;
4093             else
4094                 ADVANCE_REGINPUT();
4095             break;
4096
4097           case SFNAME:
4098             if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4099                 status = RA_NOMATCH;
4100             else
4101                 ADVANCE_REGINPUT();
4102             break;
4103
4104           case PRINT:
4105             if (ptr2cells(reginput) != 1)
4106                 status = RA_NOMATCH;
4107             else
4108                 ADVANCE_REGINPUT();
4109             break;
4110
4111           case SPRINT:
4112             if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
4113                 status = RA_NOMATCH;
4114             else
4115                 ADVANCE_REGINPUT();
4116             break;
4117
4118           case WHITE:
4119             if (!vim_iswhite(c))
4120                 status = RA_NOMATCH;
4121             else
4122                 ADVANCE_REGINPUT();
4123             break;
4124
4125           case NWHITE:
4126             if (c == NUL || vim_iswhite(c))
4127                 status = RA_NOMATCH;
4128             else
4129                 ADVANCE_REGINPUT();
4130             break;
4131
4132           case DIGIT:
4133             if (!ri_digit(c))
4134                 status = RA_NOMATCH;
4135             else
4136                 ADVANCE_REGINPUT();
4137             break;
4138
4139           case NDIGIT:
4140             if (c == NUL || ri_digit(c))
4141                 status = RA_NOMATCH;
4142             else
4143                 ADVANCE_REGINPUT();
4144             break;
4145
4146           case HEX:
4147             if (!ri_hex(c))
4148                 status = RA_NOMATCH;
4149             else
4150                 ADVANCE_REGINPUT();
4151             break;
4152
4153           case NHEX:
4154             if (c == NUL || ri_hex(c))
4155                 status = RA_NOMATCH;
4156             else
4157                 ADVANCE_REGINPUT();
4158             break;
4159
4160           case OCTAL:
4161             if (!ri_octal(c))
4162                 status = RA_NOMATCH;
4163             else
4164                 ADVANCE_REGINPUT();
4165             break;
4166
4167           case NOCTAL:
4168             if (c == NUL || ri_octal(c))
4169                 status = RA_NOMATCH;
4170             else
4171                 ADVANCE_REGINPUT();
4172             break;
4173
4174           case WORD:
4175             if (!ri_word(c))
4176                 status = RA_NOMATCH;
4177             else
4178                 ADVANCE_REGINPUT();
4179             break;
4180
4181           case NWORD:
4182             if (c == NUL || ri_word(c))
4183                 status = RA_NOMATCH;
4184             else
4185                 ADVANCE_REGINPUT();
4186             break;
4187
4188           case HEAD:
4189             if (!ri_head(c))
4190                 status = RA_NOMATCH;
4191             else
4192                 ADVANCE_REGINPUT();
4193             break;
4194
4195           case NHEAD:
4196             if (c == NUL || ri_head(c))
4197                 status = RA_NOMATCH;
4198             else
4199                 ADVANCE_REGINPUT();
4200             break;
4201
4202           case ALPHA:
4203             if (!ri_alpha(c))
4204                 status = RA_NOMATCH;
4205             else
4206                 ADVANCE_REGINPUT();
4207             break;
4208
4209           case NALPHA:
4210             if (c == NUL || ri_alpha(c))
4211                 status = RA_NOMATCH;
4212             else
4213                 ADVANCE_REGINPUT();
4214             break;
4215
4216           case LOWER:
4217             if (!ri_lower(c))
4218                 status = RA_NOMATCH;
4219             else
4220                 ADVANCE_REGINPUT();
4221             break;
4222
4223           case NLOWER:
4224             if (c == NUL || ri_lower(c))
4225                 status = RA_NOMATCH;
4226             else
4227                 ADVANCE_REGINPUT();
4228             break;
4229
4230           case UPPER:
4231             if (!ri_upper(c))
4232                 status = RA_NOMATCH;
4233             else
4234                 ADVANCE_REGINPUT();
4235             break;
4236
4237           case NUPPER:
4238             if (c == NUL || ri_upper(c))
4239                 status = RA_NOMATCH;
4240             else
4241                 ADVANCE_REGINPUT();
4242             break;
4243
4244           case EXACTLY:
4245             {
4246                 int     len;
4247                 char_u  *opnd;
4248
4249                 opnd = OPERAND(scan);
4250                 /* Inline the first byte, for speed. */
4251                 if (*opnd != *reginput
4252                         && (!ireg_ic || (
4253 #ifdef FEAT_MBYTE
4254                             !enc_utf8 &&
4255 #endif
4256                             MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4257                     status = RA_NOMATCH;
4258                 else if (*opnd == NUL)
4259                 {
4260                     /* match empty string always works; happens when "~" is
4261                      * empty. */
4262                 }
4263                 else if (opnd[1] == NUL
4264 #ifdef FEAT_MBYTE
4265                             && !(enc_utf8 && ireg_ic)
4266 #endif
4267                         )
4268                     ++reginput;         /* matched a single char */
4269                 else
4270                 {
4271                     len = (int)STRLEN(opnd);
4272                     /* Need to match first byte again for multi-byte. */
4273                     if (cstrncmp(opnd, reginput, &len) != 0)
4274                         status = RA_NOMATCH;
4275 #ifdef FEAT_MBYTE
4276                     /* Check for following composing character. */
4277                     else if (enc_utf8
4278                                && UTF_COMPOSINGLIKE(reginput, reginput + len))
4279                     {
4280                         /* raaron: This code makes a composing character get
4281                          * ignored, which is the correct behavior (sometimes)
4282                          * for voweled Hebrew texts. */
4283                         if (!ireg_icombine)
4284                             status = RA_NOMATCH;
4285                     }
4286 #endif
4287                     else
4288                         reginput += len;
4289                 }
4290             }
4291             break;
4292
4293           case ANYOF:
4294           case ANYBUT:
4295             if (c == NUL)
4296                 status = RA_NOMATCH;
4297             else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4298                 status = RA_NOMATCH;
4299             else
4300                 ADVANCE_REGINPUT();
4301             break;
4302
4303 #ifdef FEAT_MBYTE
4304           case MULTIBYTECODE:
4305             if (has_mbyte)
4306             {
4307                 int     i, len;
4308                 char_u  *opnd;
4309                 int     opndc = 0, inpc;
4310
4311                 opnd = OPERAND(scan);
4312                 /* Safety check (just in case 'encoding' was changed since
4313                  * compiling the program). */
4314                 if ((len = (*mb_ptr2len)(opnd)) < 2)
4315                 {
4316                     status = RA_NOMATCH;
4317                     break;
4318                 }
4319                 if (enc_utf8)
4320                     opndc = mb_ptr2char(opnd);
4321                 if (enc_utf8 && utf_iscomposing(opndc))
4322                 {
4323                     /* When only a composing char is given match at any
4324                      * position where that composing char appears. */
4325                     status = RA_NOMATCH;
4326                     for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc))
4327                     {
4328                         inpc = mb_ptr2char(reginput + i);
4329                         if (!utf_iscomposing(inpc))
4330                         {
4331                             if (i > 0)
4332                                 break;
4333                         }
4334                         else if (opndc == inpc)
4335                         {
4336                             /* Include all following composing chars. */
4337                             len = i + mb_ptr2len(reginput + i);
4338                             status = RA_MATCH;
4339                             break;
4340                         }
4341                     }
4342                 }
4343                 else
4344                     for (i = 0; i < len; ++i)
4345                         if (opnd[i] != reginput[i])
4346                         {
4347                             status = RA_NOMATCH;
4348                             break;
4349                         }
4350                 reginput += len;
4351             }
4352             else
4353                 status = RA_NOMATCH;
4354             break;
4355 #endif
4356
4357           case NOTHING:
4358             break;
4359
4360           case BACK:
4361             {
4362                 int             i;
4363                 backpos_T       *bp;
4364
4365                 /*
4366                  * When we run into BACK we need to check if we don't keep
4367                  * looping without matching any input.  The second and later
4368                  * times a BACK is encountered it fails if the input is still
4369                  * at the same position as the previous time.
4370                  * The positions are stored in "backpos" and found by the
4371                  * current value of "scan", the position in the RE program.
4372                  */
4373                 bp = (backpos_T *)backpos.ga_data;
4374                 for (i = 0; i < backpos.ga_len; ++i)
4375                     if (bp[i].bp_scan == scan)
4376                         break;
4377                 if (i == backpos.ga_len)
4378                 {
4379                     /* First time at this BACK, make room to store the pos. */
4380                     if (ga_grow(&backpos, 1) == FAIL)
4381                         status = RA_FAIL;
4382                     else
4383                     {
4384                         /* get "ga_data" again, it may have changed */
4385                         bp = (backpos_T *)backpos.ga_data;
4386                         bp[i].bp_scan = scan;
4387                         ++backpos.ga_len;
4388                     }
4389                 }
4390                 else if (reg_save_equal(&bp[i].bp_pos))
4391                     /* Still at same position as last time, fail. */
4392                     status = RA_NOMATCH;
4393
4394                 if (status != RA_FAIL && status != RA_NOMATCH)
4395                     reg_save(&bp[i].bp_pos, &backpos);
4396             }
4397             break;
4398
4399           case MOPEN + 0:   /* Match start: \zs */
4400           case MOPEN + 1:   /* \( */
4401           case MOPEN + 2:
4402           case MOPEN + 3:
4403           case MOPEN + 4:
4404           case MOPEN + 5:
4405           case MOPEN + 6:
4406           case MOPEN + 7:
4407           case MOPEN + 8:
4408           case MOPEN + 9:
4409             {
4410                 no = op - MOPEN;
4411                 cleanup_subexpr();
4412                 rp = regstack_push(RS_MOPEN, scan);
4413                 if (rp == NULL)
4414                     status = RA_FAIL;
4415                 else
4416                 {
4417                     rp->rs_no = no;
4418                     save_se(&rp->rs_un.sesave, &reg_startpos[no],
4419                                                              &reg_startp[no]);
4420                     /* We simply continue and handle the result when done. */
4421                 }
4422             }
4423             break;
4424
4425           case NOPEN:       /* \%( */
4426           case NCLOSE:      /* \) after \%( */
4427                 if (regstack_push(RS_NOPEN, scan) == NULL)
4428                     status = RA_FAIL;
4429                 /* We simply continue and handle the result when done. */
4430                 break;
4431
4432 #ifdef FEAT_SYN_HL
4433           case ZOPEN + 1:
4434           case ZOPEN + 2:
4435           case ZOPEN + 3:
4436           case ZOPEN + 4:
4437           case ZOPEN + 5:
4438           case ZOPEN + 6:
4439           case ZOPEN + 7:
4440           case ZOPEN + 8:
4441           case ZOPEN + 9:
4442             {
4443                 no = op - ZOPEN;
4444                 cleanup_zsubexpr();
4445                 rp = regstack_push(RS_ZOPEN, scan);
4446                 if (rp == NULL)
4447                     status = RA_FAIL;
4448                 else
4449                 {
4450                     rp->rs_no = no;
4451                     save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4452                                                              &reg_startzp[no]);
4453                     /* We simply continue and handle the result when done. */
4454                 }
4455             }
4456             break;
4457 #endif
4458
4459           case MCLOSE + 0:  /* Match end: \ze */
4460           case MCLOSE + 1:  /* \) */
4461           case MCLOSE + 2:
4462           case MCLOSE + 3:
4463           case MCLOSE + 4:
4464           case MCLOSE + 5:
4465           case MCLOSE + 6:
4466           case MCLOSE + 7:
4467           case MCLOSE + 8:
4468           case MCLOSE + 9:
4469             {
4470                 no = op - MCLOSE;
4471                 cleanup_subexpr();
4472                 rp = regstack_push(RS_MCLOSE, scan);
4473                 if (rp == NULL)
4474                     status = RA_FAIL;
4475                 else
4476                 {
4477                     rp->rs_no = no;
4478                     save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4479                     /* We simply continue and handle the result when done. */
4480                 }
4481             }
4482             break;
4483
4484 #ifdef FEAT_SYN_HL
4485           case ZCLOSE + 1:  /* \) after \z( */
4486           case ZCLOSE + 2:
4487           case ZCLOSE + 3:
4488           case ZCLOSE + 4:
4489           case ZCLOSE + 5:
4490           case ZCLOSE + 6:
4491           case ZCLOSE + 7:
4492           case ZCLOSE + 8:
4493           case ZCLOSE + 9:
4494             {
4495                 no = op - ZCLOSE;
4496                 cleanup_zsubexpr();
4497                 rp = regstack_push(RS_ZCLOSE, scan);
4498                 if (rp == NULL)
4499                     status = RA_FAIL;
4500                 else
4501                 {
4502                     rp->rs_no = no;
4503                     save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4504                                                               &reg_endzp[no]);
4505                     /* We simply continue and handle the result when done. */
4506                 }
4507             }
4508             break;
4509 #endif
4510
4511           case BACKREF + 1:
4512           case BACKREF + 2:
4513           case BACKREF + 3:
4514           case BACKREF + 4:
4515           case BACKREF + 5:
4516           case BACKREF + 6:
4517           case BACKREF + 7:
4518           case BACKREF + 8:
4519           case BACKREF + 9:
4520             {
4521                 int             len;
4522                 linenr_T        clnum;
4523                 colnr_T         ccol;
4524                 char_u          *p;
4525
4526                 no = op - BACKREF;
4527                 cleanup_subexpr();
4528                 if (!REG_MULTI)         /* Single-line regexp */
4529                 {
4530                     if (reg_endp[no] == NULL)
4531                     {
4532                         /* Backref was not set: Match an empty string. */
4533                         len = 0;
4534                     }
4535                     else
4536                     {
4537                         /* Compare current input with back-ref in the same
4538                          * line. */
4539                         len = (int)(reg_endp[no] - reg_startp[no]);
4540                         if (cstrncmp(reg_startp[no], reginput, &len) != 0)
4541                             status = RA_NOMATCH;
4542                     }
4543                 }
4544                 else                            /* Multi-line regexp */
4545                 {
4546                     if (reg_endpos[no].lnum < 0)
4547                     {
4548                         /* Backref was not set: Match an empty string. */
4549                         len = 0;
4550                     }
4551                     else
4552                     {
4553                         if (reg_startpos[no].lnum == reglnum
4554                                 && reg_endpos[no].lnum == reglnum)
4555                         {
4556                             /* Compare back-ref within the current line. */
4557                             len = reg_endpos[no].col - reg_startpos[no].col;
4558                             if (cstrncmp(regline + reg_startpos[no].col,
4559                                                           reginput, &len) != 0)
4560                                 status = RA_NOMATCH;
4561                         }
4562                         else
4563                         {
4564                             /* Messy situation: Need to compare between two
4565                              * lines. */
4566                             ccol = reg_startpos[no].col;
4567                             clnum = reg_startpos[no].lnum;
4568                             for (;;)
4569                             {
4570                                 /* Since getting one line may invalidate
4571                                  * the other, need to make copy.  Slow! */
4572                                 if (regline != reg_tofree)
4573                                 {
4574                                     len = (int)STRLEN(regline);
4575                                     if (reg_tofree == NULL
4576                                                  || len >= (int)reg_tofreelen)
4577                                     {
4578                                         len += 50;      /* get some extra */
4579                                         vim_free(reg_tofree);
4580                                         reg_tofree = alloc(len);
4581                                         if (reg_tofree == NULL)
4582                                         {
4583                                             status = RA_FAIL; /* outof memory!*/
4584                                             break;
4585                                         }
4586                                         reg_tofreelen = len;
4587                                     }
4588                                     STRCPY(reg_tofree, regline);
4589                                     reginput = reg_tofree
4590                                                        + (reginput - regline);
4591                                     regline = reg_tofree;
4592                                 }
4593
4594                                 /* Get the line to compare with. */
4595                                 p = reg_getline(clnum);
4596                                 if (clnum == reg_endpos[no].lnum)
4597                                     len = reg_endpos[no].col - ccol;
4598                                 else
4599                                     len = (int)STRLEN(p + ccol);
4600
4601                                 if (cstrncmp(p + ccol, reginput, &len) != 0)
4602                                 {
4603                                     status = RA_NOMATCH;  /* doesn't match */
4604                                     break;
4605                                 }
4606                                 if (clnum == reg_endpos[no].lnum)
4607                                     break;              /* match and at end! */
4608                                 if (reglnum >= reg_maxline)
4609                                 {
4610                                     status = RA_NOMATCH;  /* text too short */
4611                                     break;
4612                                 }
4613
4614                                 /* Advance to next line. */
4615                                 reg_nextline();
4616                                 ++clnum;
4617                                 ccol = 0;
4618                                 if (got_int)
4619                                 {
4620                                     status = RA_FAIL;
4621                                     break;
4622                                 }
4623                             }
4624
4625                             /* found a match!  Note that regline may now point
4626                              * to a copy of the line, that should not matter. */
4627                         }
4628                     }
4629                 }
4630
4631                 /* Matched the backref, skip over it. */
4632                 reginput += len;
4633             }
4634             break;
4635
4636 #ifdef FEAT_SYN_HL
4637           case ZREF + 1:
4638           case ZREF + 2:
4639           case ZREF + 3:
4640           case ZREF + 4:
4641           case ZREF + 5:
4642           case ZREF + 6:
4643           case ZREF + 7:
4644           case ZREF + 8:
4645           case ZREF + 9:
4646             {
4647                 int     len;
4648
4649                 cleanup_zsubexpr();
4650                 no = op - ZREF;
4651                 if (re_extmatch_in != NULL
4652                         && re_extmatch_in->matches[no] != NULL)
4653                 {
4654                     len = (int)STRLEN(re_extmatch_in->matches[no]);
4655                     if (cstrncmp(re_extmatch_in->matches[no],
4656                                                           reginput, &len) != 0)
4657                         status = RA_NOMATCH;
4658                     else
4659                         reginput += len;
4660                 }
4661                 else
4662                 {
4663                     /* Backref was not set: Match an empty string. */
4664                 }
4665             }
4666             break;
4667 #endif
4668
4669           case BRANCH:
4670             {
4671                 if (OP(next) != BRANCH) /* No choice. */
4672                     next = OPERAND(scan);       /* Avoid recursion. */
4673                 else
4674                 {
4675                     rp = regstack_push(RS_BRANCH, scan);
4676                     if (rp == NULL)
4677                         status = RA_FAIL;
4678                     else
4679                         status = RA_BREAK;      /* rest is below */
4680                 }
4681             }
4682             break;
4683
4684           case BRACE_LIMITS:
4685             {
4686                 if (OP(next) == BRACE_SIMPLE)
4687                 {
4688                     bl_minval = OPERAND_MIN(scan);
4689                     bl_maxval = OPERAND_MAX(scan);
4690                 }
4691                 else if (OP(next) >= BRACE_COMPLEX
4692                         && OP(next) < BRACE_COMPLEX + 10)
4693                 {
4694                     no = OP(next) - BRACE_COMPLEX;
4695                     brace_min[no] = OPERAND_MIN(scan);
4696                     brace_max[no] = OPERAND_MAX(scan);
4697                     brace_count[no] = 0;
4698                 }
4699                 else
4700                 {
4701                     EMSG(_(e_internal));            /* Shouldn't happen */
4702                     status = RA_FAIL;
4703                 }
4704             }
4705             break;
4706
4707           case BRACE_COMPLEX + 0:
4708           case BRACE_COMPLEX + 1:
4709           case BRACE_COMPLEX + 2:
4710           case BRACE_COMPLEX + 3:
4711           case BRACE_COMPLEX + 4:
4712           case BRACE_COMPLEX + 5:
4713           case BRACE_COMPLEX + 6:
4714           case BRACE_COMPLEX + 7:
4715           case BRACE_COMPLEX + 8:
4716           case BRACE_COMPLEX + 9:
4717             {
4718                 no = op - BRACE_COMPLEX;
4719                 ++brace_count[no];
4720
4721                 /* If not matched enough times yet, try one more */
4722                 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4723                                              ? brace_min[no] : brace_max[no]))
4724                 {
4725                     rp = regstack_push(RS_BRCPLX_MORE, scan);
4726                     if (rp == NULL)
4727                         status = RA_FAIL;
4728                     else
4729                     {
4730                         rp->rs_no = no;
4731                         reg_save(&rp->rs_un.regsave, &backpos);
4732                         next = OPERAND(scan);
4733                         /* We continue and handle the result when done. */
4734                     }
4735                     break;
4736                 }
4737
4738                 /* If matched enough times, may try matching some more */
4739                 if (brace_min[no] <= brace_max[no])
4740                 {
4741                     /* Range is the normal way around, use longest match */
4742                     if (brace_count[no] <= brace_max[no])
4743                     {
4744                         rp = regstack_push(RS_BRCPLX_LONG, scan);
4745                         if (rp == NULL)
4746                             status = RA_FAIL;
4747                         else
4748                         {
4749                             rp->rs_no = no;
4750                             reg_save(&rp->rs_un.regsave, &backpos);
4751                             next = OPERAND(scan);
4752                             /* We continue and handle the result when done. */
4753                         }
4754                     }
4755                 }
4756                 else
4757                 {
4758                     /* Range is backwards, use shortest match first */
4759                     if (brace_count[no] <= brace_min[no])
4760                     {
4761                         rp = regstack_push(RS_BRCPLX_SHORT, scan);
4762                         if (rp == NULL)
4763                             status = RA_FAIL;
4764                         else
4765                         {
4766                             reg_save(&rp->rs_un.regsave, &backpos);
4767                             /* We continue and handle the result when done. */
4768                         }
4769                     }
4770                 }
4771             }
4772             break;
4773
4774           case BRACE_SIMPLE:
4775           case STAR:
4776           case PLUS:
4777             {
4778                 regstar_T       rst;
4779
4780                 /*
4781                  * Lookahead to avoid useless match attempts when we know
4782                  * what character comes next.
4783                  */
4784                 if (OP(next) == EXACTLY)
4785                 {
4786                     rst.nextb = *OPERAND(next);
4787                     if (ireg_ic)
4788                     {
4789                         if (MB_ISUPPER(rst.nextb))
4790                             rst.nextb_ic = MB_TOLOWER(rst.nextb);
4791                         else
4792                             rst.nextb_ic = MB_TOUPPER(rst.nextb);
4793                     }
4794                     else
4795                         rst.nextb_ic = rst.nextb;
4796                 }
4797                 else
4798                 {
4799                     rst.nextb = NUL;
4800                     rst.nextb_ic = NUL;
4801                 }
4802                 if (op != BRACE_SIMPLE)
4803                 {
4804                     rst.minval = (op == STAR) ? 0 : 1;
4805                     rst.maxval = MAX_LIMIT;
4806                 }
4807                 else
4808                 {
4809                     rst.minval = bl_minval;
4810                     rst.maxval = bl_maxval;
4811                 }
4812
4813                 /*
4814                  * When maxval > minval, try matching as much as possible, up
4815                  * to maxval.  When maxval < minval, try matching at least the
4816                  * minimal number (since the range is backwards, that's also
4817                  * maxval!).
4818                  */
4819                 rst.count = regrepeat(OPERAND(scan), rst.maxval);
4820                 if (got_int)
4821                 {
4822                     status = RA_FAIL;
4823                     break;
4824                 }
4825                 if (rst.minval <= rst.maxval
4826                           ? rst.count >= rst.minval : rst.count >= rst.maxval)
4827                 {
4828                     /* It could match.  Prepare for trying to match what
4829                      * follows.  The code is below.  Parameters are stored in
4830                      * a regstar_T on the regstack. */
4831                     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4832                     {
4833                         EMSG(_(e_maxmempat));
4834                         status = RA_FAIL;
4835                     }
4836                     else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4837                         status = RA_FAIL;
4838                     else
4839                     {
4840                         regstack.ga_len += sizeof(regstar_T);
4841                         rp = regstack_push(rst.minval <= rst.maxval
4842                                         ? RS_STAR_LONG : RS_STAR_SHORT, scan);
4843                         if (rp == NULL)
4844                             status = RA_FAIL;
4845                         else
4846                         {
4847                             *(((regstar_T *)rp) - 1) = rst;
4848                             status = RA_BREAK;      /* skip the restore bits */
4849                         }
4850                     }
4851                 }
4852                 else
4853                     status = RA_NOMATCH;
4854
4855             }
4856             break;
4857
4858           case NOMATCH:
4859           case MATCH:
4860           case SUBPAT:
4861             rp = regstack_push(RS_NOMATCH, scan);
4862             if (rp == NULL)
4863                 status = RA_FAIL;
4864             else
4865             {
4866                 rp->rs_no = op;
4867                 reg_save(&rp->rs_un.regsave, &backpos);
4868                 next = OPERAND(scan);
4869                 /* We continue and handle the result when done. */
4870             }
4871             break;
4872
4873           case BEHIND:
4874           case NOBEHIND:
4875             /* Need a bit of room to store extra positions. */
4876             if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4877             {
4878                 EMSG(_(e_maxmempat));
4879                 status = RA_FAIL;
4880             }
4881             else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4882                 status = RA_FAIL;
4883             else
4884             {
4885                 regstack.ga_len += sizeof(regbehind_T);
4886                 rp = regstack_push(RS_BEHIND1, scan);
4887                 if (rp == NULL)
4888                     status = RA_FAIL;
4889                 else
4890                 {
4891                     rp->rs_no = op;
4892                     reg_save(&rp->rs_un.regsave, &backpos);
4893                     /* First try if what follows matches.  If it does then we
4894                      * check the behind match by looping. */
4895                 }
4896             }
4897             break;
4898
4899           case BHPOS:
4900             if (REG_MULTI)
4901             {
4902                 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
4903                         || behind_pos.rs_u.pos.lnum != reglnum)
4904                     status = RA_NOMATCH;
4905             }
4906             else if (behind_pos.rs_u.ptr != reginput)
4907                 status = RA_NOMATCH;
4908             break;
4909
4910           case NEWL:
4911             if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
4912                              || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
4913                 status = RA_NOMATCH;
4914             else if (reg_line_lbr)
4915                 ADVANCE_REGINPUT();
4916             else
4917                 reg_nextline();
4918             break;
4919
4920           case END:
4921             status = RA_MATCH;  /* Success! */
4922             break;
4923
4924           default:
4925             EMSG(_(e_re_corr));
4926 #ifdef DEBUG
4927             printf("Illegal op code %d\n", op);
4928 #endif
4929             status = RA_FAIL;
4930             break;
4931           }
4932         }
4933
4934         /* If we can't continue sequentially, break the inner loop. */
4935         if (status != RA_CONT)
4936             break;
4937
4938         /* Continue in inner loop, advance to next item. */
4939         scan = next;
4940
4941     } /* end of inner loop */
4942
4943     /*
4944      * If there is something on the regstack execute the code for the state.
4945      * If the state is popped then loop and use the older state.
4946      */
4947     while (regstack.ga_len > 0 && status != RA_FAIL)
4948     {
4949         rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4950         switch (rp->rs_state)
4951         {
4952           case RS_NOPEN:
4953             /* Result is passed on as-is, simply pop the state. */
4954             regstack_pop(&scan);
4955             break;
4956
4957           case RS_MOPEN:
4958             /* Pop the state.  Restore pointers when there is no match. */
4959             if (status == RA_NOMATCH)
4960                 restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
4961                                                   &reg_startp[rp->rs_no]);
4962             regstack_pop(&scan);
4963             break;
4964
4965 #ifdef FEAT_SYN_HL
4966           case RS_ZOPEN:
4967             /* Pop the state.  Restore pointers when there is no match. */
4968             if (status == RA_NOMATCH)
4969                 restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4970                                                  &reg_startzp[rp->rs_no]);
4971             regstack_pop(&scan);
4972             break;
4973 #endif
4974
4975           case RS_MCLOSE:
4976             /* Pop the state.  Restore pointers when there is no match. */
4977             if (status == RA_NOMATCH)
4978                 restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
4979                                                     &reg_endp[rp->rs_no]);
4980             regstack_pop(&scan);
4981             break;
4982
4983 #ifdef FEAT_SYN_HL
4984           case RS_ZCLOSE:
4985             /* Pop the state.  Restore pointers when there is no match. */
4986             if (status == RA_NOMATCH)
4987                 restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4988                                                    &reg_endzp[rp->rs_no]);
4989             regstack_pop(&scan);
4990             break;
4991 #endif
4992
4993           case RS_BRANCH:
4994             if (status == RA_MATCH)
4995                 /* this branch matched, use it */
4996                 regstack_pop(&scan);
4997             else
4998             {
4999                 if (status != RA_BREAK)
5000                 {
5001                     /* After a non-matching branch: try next one. */
5002                     reg_restore(&rp->rs_un.regsave, &backpos);
5003                     scan = rp->rs_scan;
5004                 }
5005                 if (scan == NULL || OP(scan) != BRANCH)
5006                 {
5007                     /* no more branches, didn't find a match */
5008                     status = RA_NOMATCH;
5009                     regstack_pop(&scan);
5010                 }
5011                 else
5012                 {
5013                     /* Prepare to try a branch. */
5014                     rp->rs_scan = regnext(scan);
5015                     reg_save(&rp->rs_un.regsave, &backpos);
5016                     scan = OPERAND(scan);
5017                 }
5018             }
5019             break;
5020
5021           case RS_BRCPLX_MORE:
5022             /* Pop the state.  Restore pointers when there is no match. */
5023             if (status == RA_NOMATCH)
5024             {
5025                 reg_restore(&rp->rs_un.regsave, &backpos);
5026                 --brace_count[rp->rs_no];       /* decrement match count */
5027             }
5028             regstack_pop(&scan);
5029             break;
5030
5031           case RS_BRCPLX_LONG:
5032             /* Pop the state.  Restore pointers when there is no match. */
5033             if (status == RA_NOMATCH)
5034             {
5035                 /* There was no match, but we did find enough matches. */
5036                 reg_restore(&rp->rs_un.regsave, &backpos);
5037                 --brace_count[rp->rs_no];
5038                 /* continue with the items after "\{}" */
5039                 status = RA_CONT;
5040             }
5041             regstack_pop(&scan);
5042             if (status == RA_CONT)
5043                 scan = regnext(scan);
5044             break;
5045
5046           case RS_BRCPLX_SHORT:
5047             /* Pop the state.  Restore pointers when there is no match. */
5048             if (status == RA_NOMATCH)
5049                 /* There was no match, try to match one more item. */
5050                 reg_restore(&rp->rs_un.regsave, &backpos);
5051             regstack_pop(&scan);
5052             if (status == RA_NOMATCH)
5053             {
5054                 scan = OPERAND(scan);
5055                 status = RA_CONT;
5056             }
5057             break;
5058
5059           case RS_NOMATCH:
5060             /* Pop the state.  If the operand matches for NOMATCH or
5061              * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5062              * except for SUBPAT, and continue with the next item. */
5063             if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5064                 status = RA_NOMATCH;
5065             else
5066             {
5067                 status = RA_CONT;
5068                 if (rp->rs_no != SUBPAT)        /* zero-width */
5069                     reg_restore(&rp->rs_un.regsave, &backpos);
5070             }
5071             regstack_pop(&scan);
5072             if (status == RA_CONT)
5073                 scan = regnext(scan);
5074             break;
5075
5076           case RS_BEHIND1:
5077             if (status == RA_NOMATCH)
5078             {
5079                 regstack_pop(&scan);
5080                 regstack.ga_len -= sizeof(regbehind_T);
5081             }
5082             else
5083             {
5084                 /* The stuff after BEHIND/NOBEHIND matches.  Now try if
5085                  * the behind part does (not) match before the current
5086                  * position in the input.  This must be done at every
5087                  * position in the input and checking if the match ends at
5088                  * the current position. */
5089
5090                 /* save the position after the found match for next */
5091                 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5092
5093                 /* start looking for a match with operand at the current
5094                  * position.  Go back one character until we find the
5095                  * result, hitting the start of the line or the previous
5096                  * line (for multi-line matching).
5097                  * Set behind_pos to where the match should end, BHPOS
5098                  * will match it.  Save the current value. */
5099                 (((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5100                 behind_pos = rp->rs_un.regsave;
5101
5102                 rp->rs_state = RS_BEHIND2;
5103
5104                 reg_restore(&rp->rs_un.regsave, &backpos);
5105                 scan = OPERAND(rp->rs_scan);
5106             }
5107             break;
5108
5109           case RS_BEHIND2:
5110             /*
5111              * Looping for BEHIND / NOBEHIND match.
5112              */
5113             if (status == RA_MATCH && reg_save_equal(&behind_pos))
5114             {
5115                 /* found a match that ends where "next" started */
5116                 behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5117                 if (rp->rs_no == BEHIND)
5118                     reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5119                                                                     &backpos);
5120                 else
5121                     /* But we didn't want a match. */
5122                     status = RA_NOMATCH;
5123                 regstack_pop(&scan);
5124                 regstack.ga_len -= sizeof(regbehind_T);
5125             }
5126             else
5127             {
5128                 /* No match: Go back one character.  May go to previous
5129                  * line once. */
5130                 no = OK;
5131                 if (REG_MULTI)
5132                 {
5133                     if (rp->rs_un.regsave.rs_u.pos.col == 0)
5134                     {
5135                         if (rp->rs_un.regsave.rs_u.pos.lnum
5136                                         < behind_pos.rs_u.pos.lnum
5137                                 || reg_getline(
5138                                         --rp->rs_un.regsave.rs_u.pos.lnum)
5139                                                                   == NULL)
5140                             no = FAIL;
5141                         else
5142                         {
5143                             reg_restore(&rp->rs_un.regsave, &backpos);
5144                             rp->rs_un.regsave.rs_u.pos.col =
5145                                                  (colnr_T)STRLEN(regline);
5146                         }
5147                     }
5148                     else
5149                         --rp->rs_un.regsave.rs_u.pos.col;
5150                 }
5151                 else
5152                 {
5153                     if (rp->rs_un.regsave.rs_u.ptr == regline)
5154                         no = FAIL;
5155                     else
5156                         --rp->rs_un.regsave.rs_u.ptr;
5157                 }
5158                 if (no == OK)
5159                 {
5160                     /* Advanced, prepare for finding match again. */
5161                     reg_restore(&rp->rs_un.regsave, &backpos);
5162                     scan = OPERAND(rp->rs_scan);
5163                 }
5164                 else
5165                 {
5166                     /* Can't advance.  For NOBEHIND that's a match. */
5167                     behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5168                     if (rp->rs_no == NOBEHIND)
5169                     {
5170                         reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5171                                                                     &backpos);
5172                         status = RA_MATCH;
5173                     }
5174                     else
5175                         status = RA_NOMATCH;
5176                     regstack_pop(&scan);
5177                     regstack.ga_len -= sizeof(regbehind_T);
5178                 }
5179             }
5180             break;
5181
5182           case RS_STAR_LONG:
5183           case RS_STAR_SHORT:
5184             {
5185                 regstar_T           *rst = ((regstar_T *)rp) - 1;
5186
5187                 if (status == RA_MATCH)
5188                 {
5189                     regstack_pop(&scan);
5190                     regstack.ga_len -= sizeof(regstar_T);
5191                     break;
5192                 }
5193
5194                 /* Tried once already, restore input pointers. */
5195                 if (status != RA_BREAK)
5196                     reg_restore(&rp->rs_un.regsave, &backpos);
5197
5198                 /* Repeat until we found a position where it could match. */
5199                 for (;;)
5200                 {
5201                     if (status != RA_BREAK)
5202                     {
5203                         /* Tried first position already, advance. */
5204                         if (rp->rs_state == RS_STAR_LONG)
5205                         {
5206                             /* Trying for longest match, but couldn't or
5207                              * didn't match -- back up one char. */
5208                             if (--rst->count < rst->minval)
5209                                 break;
5210                             if (reginput == regline)
5211                             {
5212                                 /* backup to last char of previous line */
5213                                 --reglnum;
5214                                 regline = reg_getline(reglnum);
5215                                 /* Just in case regrepeat() didn't count
5216                                  * right. */
5217                                 if (regline == NULL)
5218                                     break;
5219                                 reginput = regline + STRLEN(regline);
5220                                 fast_breakcheck();
5221                             }
5222                             else
5223                                 mb_ptr_back(regline, reginput);
5224                         }
5225                         else
5226                         {
5227                             /* Range is backwards, use shortest match first.
5228                              * Careful: maxval and minval are exchanged!
5229                              * Couldn't or didn't match: try advancing one
5230                              * char. */
5231                             if (rst->count == rst->minval
5232                                   || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5233                                 break;
5234                             ++rst->count;
5235                         }
5236                         if (got_int)
5237                             break;
5238                     }
5239                     else
5240                         status = RA_NOMATCH;
5241
5242                     /* If it could match, try it. */
5243                     if (rst->nextb == NUL || *reginput == rst->nextb
5244                                              || *reginput == rst->nextb_ic)
5245                     {
5246                         reg_save(&rp->rs_un.regsave, &backpos);
5247                         scan = regnext(rp->rs_scan);
5248                         status = RA_CONT;
5249                         break;
5250                     }
5251                 }
5252                 if (status != RA_CONT)
5253                 {
5254                     /* Failed. */
5255                     regstack_pop(&scan);
5256                     regstack.ga_len -= sizeof(regstar_T);
5257                     status = RA_NOMATCH;
5258                 }
5259             }
5260             break;
5261         }
5262
5263         /* If we want to continue the inner loop or didn't pop a state
5264          * continue matching loop */
5265         if (status == RA_CONT || rp == (regitem_T *)
5266                              ((char *)regstack.ga_data + regstack.ga_len) - 1)
5267             break;
5268     }
5269
5270     /* May need to continue with the inner loop, starting at "scan". */
5271     if (status == RA_CONT)
5272         continue;
5273
5274     /*
5275      * If the regstack is empty or something failed we are done.
5276      */
5277     if (regstack.ga_len == 0 || status == RA_FAIL)
5278     {
5279         if (scan == NULL)
5280         {
5281             /*
5282              * We get here only if there's trouble -- normally "case END" is
5283              * the terminating point.
5284              */
5285             EMSG(_(e_re_corr));
5286 #ifdef DEBUG
5287             printf("Premature EOL\n");
5288 #endif
5289         }
5290         if (status == RA_FAIL)
5291             got_int = TRUE;
5292         return (status == RA_MATCH);
5293     }
5294
5295   } /* End of loop until the regstack is empty. */
5296
5297   /* NOTREACHED */
5298 }
5299
5300 /*
5301  * Push an item onto the regstack.
5302  * Returns pointer to new item.  Returns NULL when out of memory.
5303  */
5304     static regitem_T *
5305 regstack_push(state, scan)
5306     regstate_T  state;
5307     char_u      *scan;
5308 {
5309     regitem_T   *rp;
5310
5311     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5312     {
5313         EMSG(_(e_maxmempat));
5314         return NULL;
5315     }
5316     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5317         return NULL;
5318
5319     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5320     rp->rs_state = state;
5321     rp->rs_scan = scan;
5322
5323     regstack.ga_len += sizeof(regitem_T);
5324     return rp;
5325 }
5326
5327 /*
5328  * Pop an item from the regstack.
5329  */
5330     static void
5331 regstack_pop(scan)
5332     char_u      **scan;
5333 {
5334     regitem_T   *rp;
5335
5336     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5337     *scan = rp->rs_scan;
5338
5339     regstack.ga_len -= sizeof(regitem_T);
5340 }
5341
5342 /*
5343  * regrepeat - repeatedly match something simple, return how many.
5344  * Advances reginput (and reglnum) to just after the matched chars.
5345  */
5346     static int
5347 regrepeat(p, maxcount)
5348     char_u      *p;
5349     long        maxcount;   /* maximum number of matches allowed */
5350 {
5351     long        count = 0;
5352     char_u      *scan;
5353     char_u      *opnd;
5354     int         mask;
5355     int         testval = 0;
5356
5357     scan = reginput;        /* Make local copy of reginput for speed. */
5358     opnd = OPERAND(p);
5359     switch (OP(p))
5360     {
5361       case ANY:
5362       case ANY + ADD_NL:
5363         while (count < maxcount)
5364         {
5365             /* Matching anything means we continue until end-of-line (or
5366              * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5367             while (*scan != NUL && count < maxcount)
5368             {
5369                 ++count;
5370                 mb_ptr_adv(scan);
5371             }
5372             if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5373                                          || reg_line_lbr || count == maxcount)
5374                 break;
5375             ++count;            /* count the line-break */
5376             reg_nextline();
5377             scan = reginput;
5378             if (got_int)
5379                 break;
5380         }
5381         break;
5382
5383       case IDENT:
5384       case IDENT + ADD_NL:
5385         testval = TRUE;
5386         /*FALLTHROUGH*/
5387       case SIDENT:
5388       case SIDENT + ADD_NL:
5389         while (count < maxcount)
5390         {
5391             if (vim_isIDc(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5392             {
5393                 mb_ptr_adv(scan);
5394             }
5395             else if (*scan == NUL)
5396             {
5397                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5398                                                               || reg_line_lbr)
5399                     break;
5400                 reg_nextline();
5401                 scan = reginput;
5402                 if (got_int)
5403                     break;
5404             }
5405             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5406                 ++scan;
5407             else
5408                 break;
5409             ++count;
5410         }
5411         break;
5412
5413       case KWORD:
5414       case KWORD + ADD_NL:
5415         testval = TRUE;
5416         /*FALLTHROUGH*/
5417       case SKWORD:
5418       case SKWORD + ADD_NL:
5419         while (count < maxcount)
5420         {
5421             if (vim_iswordp(scan) && (testval || !VIM_ISDIGIT(*scan)))
5422             {
5423                 mb_ptr_adv(scan);
5424             }
5425             else if (*scan == NUL)
5426             {
5427                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5428                                                               || reg_line_lbr)
5429                     break;
5430                 reg_nextline();
5431                 scan = reginput;
5432                 if (got_int)
5433                     break;
5434             }
5435             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5436                 ++scan;
5437             else
5438                 break;
5439             ++count;
5440         }
5441         break;
5442
5443       case FNAME:
5444       case FNAME + ADD_NL:
5445         testval = TRUE;
5446         /*FALLTHROUGH*/
5447       case SFNAME:
5448       case SFNAME + ADD_NL:
5449         while (count < maxcount)
5450         {
5451             if (vim_isfilec(*scan) && (testval || !VIM_ISDIGIT(*scan)))
5452             {
5453                 mb_ptr_adv(scan);
5454             }
5455             else if (*scan == NUL)
5456             {
5457                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5458                                                               || reg_line_lbr)
5459                     break;
5460                 reg_nextline();
5461                 scan = reginput;
5462                 if (got_int)
5463                     break;
5464             }
5465             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5466                 ++scan;
5467             else
5468                 break;
5469             ++count;
5470         }
5471         break;
5472
5473       case PRINT:
5474       case PRINT + ADD_NL:
5475         testval = TRUE;
5476         /*FALLTHROUGH*/
5477       case SPRINT:
5478       case SPRINT + ADD_NL:
5479         while (count < maxcount)
5480         {
5481             if (*scan == NUL)
5482             {
5483                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5484                                                               || reg_line_lbr)
5485                     break;
5486                 reg_nextline();
5487                 scan = reginput;
5488                 if (got_int)
5489                     break;
5490             }
5491             else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
5492             {
5493                 mb_ptr_adv(scan);
5494             }
5495             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5496                 ++scan;
5497             else
5498                 break;
5499             ++count;
5500         }
5501         break;
5502
5503       case WHITE:
5504       case WHITE + ADD_NL:
5505         testval = mask = RI_WHITE;
5506 do_class:
5507         while (count < maxcount)
5508         {
5509 #ifdef FEAT_MBYTE
5510             int         l;
5511 #endif
5512             if (*scan == NUL)
5513             {
5514                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5515                                                               || reg_line_lbr)
5516                     break;
5517                 reg_nextline();
5518                 scan = reginput;
5519                 if (got_int)
5520                     break;
5521             }
5522 #ifdef FEAT_MBYTE
5523             else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5524             {
5525                 if (testval != 0)
5526                     break;
5527                 scan += l;
5528             }
5529 #endif
5530             else if ((class_tab[*scan] & mask) == testval)
5531                 ++scan;
5532             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5533                 ++scan;
5534             else
5535                 break;
5536             ++count;
5537         }
5538         break;
5539
5540       case NWHITE:
5541       case NWHITE + ADD_NL:
5542         mask = RI_WHITE;
5543         goto do_class;
5544       case DIGIT:
5545       case DIGIT + ADD_NL:
5546         testval = mask = RI_DIGIT;
5547         goto do_class;
5548       case NDIGIT:
5549       case NDIGIT + ADD_NL:
5550         mask = RI_DIGIT;
5551         goto do_class;
5552       case HEX:
5553       case HEX + ADD_NL:
5554         testval = mask = RI_HEX;
5555         goto do_class;
5556       case NHEX:
5557       case NHEX + ADD_NL:
5558         mask = RI_HEX;
5559         goto do_class;
5560       case OCTAL:
5561       case OCTAL + ADD_NL:
5562         testval = mask = RI_OCTAL;
5563         goto do_class;
5564       case NOCTAL:
5565       case NOCTAL + ADD_NL:
5566         mask = RI_OCTAL;
5567         goto do_class;
5568       case WORD:
5569       case WORD + ADD_NL:
5570         testval = mask = RI_WORD;
5571         goto do_class;
5572       case NWORD:
5573       case NWORD + ADD_NL:
5574         mask = RI_WORD;
5575         goto do_class;
5576       case HEAD:
5577       case HEAD + ADD_NL:
5578         testval = mask = RI_HEAD;
5579         goto do_class;
5580       case NHEAD:
5581       case NHEAD + ADD_NL:
5582         mask = RI_HEAD;
5583         goto do_class;
5584       case ALPHA:
5585       case ALPHA + ADD_NL:
5586         testval = mask = RI_ALPHA;
5587         goto do_class;
5588       case NALPHA:
5589       case NALPHA + ADD_NL:
5590         mask = RI_ALPHA;
5591         goto do_class;
5592       case LOWER:
5593       case LOWER + ADD_NL:
5594         testval = mask = RI_LOWER;
5595         goto do_class;
5596       case NLOWER:
5597       case NLOWER + ADD_NL:
5598         mask = RI_LOWER;
5599         goto do_class;
5600       case UPPER:
5601       case UPPER + ADD_NL:
5602         testval = mask = RI_UPPER;
5603         goto do_class;
5604       case NUPPER:
5605       case NUPPER + ADD_NL:
5606         mask = RI_UPPER;
5607         goto do_class;
5608
5609       case EXACTLY:
5610         {
5611             int     cu, cl;
5612
5613             /* This doesn't do a multi-byte character, because a MULTIBYTECODE
5614              * would have been used for it.  It does handle single-byte
5615              * characters, such as latin1. */
5616             if (ireg_ic)
5617             {
5618                 cu = MB_TOUPPER(*opnd);
5619                 cl = MB_TOLOWER(*opnd);
5620                 while (count < maxcount && (*scan == cu || *scan == cl))
5621                 {
5622                     count++;
5623                     scan++;
5624                 }
5625             }
5626             else
5627             {
5628                 cu = *opnd;
5629                 while (count < maxcount && *scan == cu)
5630                 {
5631                     count++;
5632                     scan++;
5633                 }
5634             }
5635             break;
5636         }
5637
5638 #ifdef FEAT_MBYTE
5639       case MULTIBYTECODE:
5640         {
5641             int         i, len, cf = 0;
5642
5643             /* Safety check (just in case 'encoding' was changed since
5644              * compiling the program). */
5645             if ((len = (*mb_ptr2len)(opnd)) > 1)
5646             {
5647                 if (ireg_ic && enc_utf8)
5648                     cf = utf_fold(utf_ptr2char(opnd));
5649                 while (count < maxcount)
5650                 {
5651                     for (i = 0; i < len; ++i)
5652                         if (opnd[i] != scan[i])
5653                             break;
5654                     if (i < len && (!ireg_ic || !enc_utf8
5655                                         || utf_fold(utf_ptr2char(scan)) != cf))
5656                         break;
5657                     scan += len;
5658                     ++count;
5659                 }
5660             }
5661         }
5662         break;
5663 #endif
5664
5665       case ANYOF:
5666       case ANYOF + ADD_NL:
5667         testval = TRUE;
5668         /*FALLTHROUGH*/
5669
5670       case ANYBUT:
5671       case ANYBUT + ADD_NL:
5672         while (count < maxcount)
5673         {
5674 #ifdef FEAT_MBYTE
5675             int len;
5676 #endif
5677             if (*scan == NUL)
5678             {
5679                 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5680                                                               || reg_line_lbr)
5681                     break;
5682                 reg_nextline();
5683                 scan = reginput;
5684                 if (got_int)
5685                     break;
5686             }
5687             else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5688                 ++scan;
5689 #ifdef FEAT_MBYTE
5690             else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
5691             {
5692                 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
5693                     break;
5694                 scan += len;
5695             }
5696 #endif
5697             else
5698             {
5699                 if ((cstrchr(opnd, *scan) == NULL) == testval)
5700                     break;
5701                 ++scan;
5702             }
5703             ++count;
5704         }
5705         break;
5706
5707       case NEWL:
5708         while (count < maxcount
5709                 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
5710                             && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
5711         {
5712             count++;
5713             if (reg_line_lbr)
5714                 ADVANCE_REGINPUT();
5715             else
5716                 reg_nextline();
5717             scan = reginput;
5718             if (got_int)
5719                 break;
5720         }
5721         break;
5722
5723       default:                  /* Oh dear.  Called inappropriately. */
5724         EMSG(_(e_re_corr));
5725 #ifdef DEBUG
5726         printf("Called regrepeat with op code %d\n", OP(p));
5727 #endif
5728         break;
5729     }
5730
5731     reginput = scan;
5732
5733     return (int)count;
5734 }
5735
5736 /*
5737  * regnext - dig the "next" pointer out of a node
5738  */
5739     static char_u *
5740 regnext(p)
5741     char_u  *p;
5742 {
5743     int     offset;
5744
5745     if (p == JUST_CALC_SIZE)
5746         return NULL;
5747
5748     offset = NEXT(p);
5749     if (offset == 0)
5750         return NULL;
5751
5752     if (OP(p) == BACK)
5753         return p - offset;
5754     else
5755         return p + offset;
5756 }
5757
5758 /*
5759  * Check the regexp program for its magic number.
5760  * Return TRUE if it's wrong.
5761  */
5762     static int
5763 prog_magic_wrong()
5764 {
5765     if (UCHARAT(REG_MULTI
5766                 ? reg_mmatch->regprog->program
5767                 : reg_match->regprog->program) != REGMAGIC)
5768     {
5769         EMSG(_(e_re_corr));
5770         return TRUE;
5771     }
5772     return FALSE;
5773 }
5774
5775 /*
5776  * Cleanup the subexpressions, if this wasn't done yet.
5777  * This construction is used to clear the subexpressions only when they are
5778  * used (to increase speed).
5779  */
5780     static void
5781 cleanup_subexpr()
5782 {
5783     if (need_clear_subexpr)
5784     {
5785         if (REG_MULTI)
5786         {
5787             /* Use 0xff to set lnum to -1 */
5788             vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5789             vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5790         }
5791         else
5792         {
5793             vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
5794             vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
5795         }
5796         need_clear_subexpr = FALSE;
5797     }
5798 }
5799
5800 #ifdef FEAT_SYN_HL
5801     static void
5802 cleanup_zsubexpr()
5803 {
5804     if (need_clear_zsubexpr)
5805     {
5806         if (REG_MULTI)
5807         {
5808             /* Use 0xff to set lnum to -1 */
5809             vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5810             vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
5811         }
5812         else
5813         {
5814             vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
5815             vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
5816         }
5817         need_clear_zsubexpr = FALSE;
5818     }
5819 }
5820 #endif
5821
5822 /*
5823  * Advance reglnum, regline and reginput to the next line.
5824  */
5825     static void
5826 reg_nextline()
5827 {
5828     regline = reg_getline(++reglnum);
5829     reginput = regline;
5830     fast_breakcheck();
5831 }
5832
5833 /*
5834  * Save the input line and position in a regsave_T.
5835  */
5836     static void
5837 reg_save(save, gap)
5838     regsave_T   *save;
5839     garray_T    *gap;
5840 {
5841     if (REG_MULTI)
5842     {
5843         save->rs_u.pos.col = (colnr_T)(reginput - regline);
5844         save->rs_u.pos.lnum = reglnum;
5845     }
5846     else
5847         save->rs_u.ptr = reginput;
5848     save->rs_len = gap->ga_len;
5849 }
5850
5851 /*
5852  * Restore the input line and position from a regsave_T.
5853  */
5854     static void
5855 reg_restore(save, gap)
5856     regsave_T   *save;
5857     garray_T    *gap;
5858 {
5859     if (REG_MULTI)
5860     {
5861         if (reglnum != save->rs_u.pos.lnum)
5862         {
5863             /* only call reg_getline() when the line number changed to save
5864              * a bit of time */
5865             reglnum = save->rs_u.pos.lnum;
5866             regline = reg_getline(reglnum);
5867         }
5868         reginput = regline + save->rs_u.pos.col;
5869     }
5870     else
5871         reginput = save->rs_u.ptr;
5872     gap->ga_len = save->rs_len;
5873 }
5874
5875 /*
5876  * Return TRUE if current position is equal to saved position.
5877  */
5878     static int
5879 reg_save_equal(save)
5880     regsave_T   *save;
5881 {
5882     if (REG_MULTI)
5883         return reglnum == save->rs_u.pos.lnum
5884                                   && reginput == regline + save->rs_u.pos.col;
5885     return reginput == save->rs_u.ptr;
5886 }
5887
5888 /*
5889  * Tentatively set the sub-expression start to the current position (after
5890  * calling regmatch() they will have changed).  Need to save the existing
5891  * values for when there is no match.
5892  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
5893  * depending on REG_MULTI.
5894  */
5895     static void
5896 save_se_multi(savep, posp)
5897     save_se_T   *savep;
5898     lpos_T      *posp;
5899 {
5900     savep->se_u.pos = *posp;
5901     posp->lnum = reglnum;
5902     posp->col = (colnr_T)(reginput - regline);
5903 }
5904
5905     static void
5906 save_se_one(savep, pp)
5907     save_se_T   *savep;
5908     char_u      **pp;
5909 {
5910     savep->se_u.ptr = *pp;
5911     *pp = reginput;
5912 }
5913
5914 /*
5915  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5916  */
5917     static int
5918 re_num_cmp(val, scan)
5919     long_u      val;
5920     char_u      *scan;
5921 {
5922     long_u  n = OPERAND_MIN(scan);
5923
5924     if (OPERAND_CMP(scan) == '>')
5925         return val > n;
5926     if (OPERAND_CMP(scan) == '<')
5927         return val < n;
5928     return val == n;
5929 }
5930
5931
5932 #ifdef DEBUG
5933
5934 /*
5935  * regdump - dump a regexp onto stdout in vaguely comprehensible form
5936  */
5937     static void
5938 regdump(pattern, r)
5939     char_u      *pattern;
5940     regprog_T   *r;
5941 {
5942     char_u  *s;
5943     int     op = EXACTLY;       /* Arbitrary non-END op. */
5944     char_u  *next;
5945     char_u  *end = NULL;
5946
5947     printf("\r\nregcomp(%s):\r\n", pattern);
5948
5949     s = r->program + 1;
5950     /*
5951      * Loop until we find the END that isn't before a referred next (an END
5952      * can also appear in a NOMATCH operand).
5953      */
5954     while (op != END || s <= end)
5955     {
5956         op = OP(s);
5957         printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
5958         next = regnext(s);
5959         if (next == NULL)       /* Next ptr. */
5960             printf("(0)");
5961         else
5962             printf("(%d)", (int)((s - r->program) + (next - s)));
5963         if (end < next)
5964             end = next;
5965         if (op == BRACE_LIMITS)
5966         {
5967             /* Two short ints */
5968             printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5969             s += 8;
5970         }
5971         s += 3;
5972         if (op == ANYOF || op == ANYOF + ADD_NL
5973                 || op == ANYBUT || op == ANYBUT + ADD_NL
5974                 || op == EXACTLY)
5975         {
5976             /* Literal string, where present. */
5977             while (*s != NUL)
5978                 printf("%c", *s++);
5979             s++;
5980         }
5981         printf("\r\n");
5982     }
5983
5984     /* Header fields of interest. */
5985     if (r->regstart != NUL)
5986         printf("start `%s' 0x%x; ", r->regstart < 256
5987                 ? (char *)transchar(r->regstart)
5988                 : "multibyte", r->regstart);
5989     if (r->reganch)
5990         printf("anchored; ");
5991     if (r->regmust != NULL)
5992         printf("must have \"%s\"", r->regmust);
5993     printf("\r\n");
5994 }
5995
5996 /*
5997  * regprop - printable representation of opcode
5998  */
5999     static char_u *
6000 regprop(op)
6001     char_u         *op;
6002 {
6003     char_u          *p;
6004     static char_u   buf[50];
6005
6006     (void) strcpy(buf, ":");
6007
6008     switch (OP(op))
6009     {
6010       case BOL:
6011         p = "BOL";
6012         break;
6013       case EOL:
6014         p = "EOL";
6015         break;
6016       case RE_BOF:
6017         p = "BOF";
6018         break;
6019       case RE_EOF:
6020         p = "EOF";
6021         break;
6022       case CURSOR:
6023         p = "CURSOR";
6024         break;
6025       case RE_VISUAL:
6026         p = "RE_VISUAL";
6027         break;
6028       case RE_LNUM:
6029         p = "RE_LNUM";
6030         break;
6031       case RE_MARK:
6032         p = "RE_MARK";
6033         break;
6034       case RE_COL:
6035         p = "RE_COL";
6036         break;
6037       case RE_VCOL:
6038         p = "RE_VCOL";
6039         break;
6040       case BOW:
6041         p = "BOW";
6042         break;
6043       case EOW:
6044         p = "EOW";
6045         break;
6046       case ANY:
6047         p = "ANY";
6048         break;
6049       case ANY + ADD_NL:
6050         p = "ANY+NL";
6051         break;
6052       case ANYOF:
6053         p = "ANYOF";
6054         break;
6055       case ANYOF + ADD_NL:
6056         p = "ANYOF+NL";
6057         break;
6058       case ANYBUT:
6059         p = "ANYBUT";
6060         break;
6061       case ANYBUT + ADD_NL:
6062         p = "ANYBUT+NL";
6063         break;
6064       case IDENT:
6065         p = "IDENT";
6066         break;
6067       case IDENT + ADD_NL:
6068         p = "IDENT+NL";
6069         break;
6070       case SIDENT:
6071         p = "SIDENT";
6072         break;
6073       case SIDENT + ADD_NL:
6074         p = "SIDENT+NL";
6075         break;
6076       case KWORD:
6077         p = "KWORD";
6078         break;
6079       case KWORD + ADD_NL:
6080         p = "KWORD+NL";
6081         break;
6082       case SKWORD:
6083         p = "SKWORD";
6084         break;
6085       case SKWORD + ADD_NL:
6086         p = "SKWORD+NL";
6087         break;
6088       case FNAME:
6089         p = "FNAME";
6090         break;
6091       case FNAME + ADD_NL:
6092         p = "FNAME+NL";
6093         break;
6094       case SFNAME:
6095         p = "SFNAME";
6096         break;
6097       case SFNAME + ADD_NL:
6098         p = "SFNAME+NL";
6099         break;
6100       case PRINT:
6101         p = "PRINT";
6102         break;
6103       case PRINT + ADD_NL:
6104         p = "PRINT+NL";
6105         break;
6106       case SPRINT:
6107         p = "SPRINT";
6108         break;
6109       case SPRINT + ADD_NL:
6110         p = "SPRINT+NL";
6111         break;
6112       case WHITE:
6113         p = "WHITE";
6114         break;
6115       case WHITE + ADD_NL:
6116         p = "WHITE+NL";
6117         break;
6118       case NWHITE:
6119         p = "NWHITE";
6120         break;
6121       case NWHITE + ADD_NL:
6122         p = "NWHITE+NL";
6123         break;
6124       case DIGIT:
6125         p = "DIGIT";
6126         break;
6127       case DIGIT + ADD_NL:
6128         p = "DIGIT+NL";
6129         break;
6130       case NDIGIT:
6131         p = "NDIGIT";
6132         break;
6133       case NDIGIT + ADD_NL:
6134         p = "NDIGIT+NL";
6135         break;
6136       case HEX:
6137         p = "HEX";
6138         break;
6139       case HEX + ADD_NL:
6140         p = "HEX+NL";
6141         break;
6142       case NHEX:
6143         p = "NHEX";
6144         break;
6145       case NHEX + ADD_NL:
6146         p = "NHEX+NL";
6147         break;
6148       case OCTAL:
6149         p = "OCTAL";
6150         break;
6151       case OCTAL + ADD_NL:
6152         p = "OCTAL+NL";
6153         break;
6154       case NOCTAL:
6155         p = "NOCTAL";
6156         break;
6157       case NOCTAL + ADD_NL:
6158         p = "NOCTAL+NL";
6159         break;
6160       case WORD:
6161         p = "WORD";
6162         break;
6163       case WORD + ADD_NL:
6164         p = "WORD+NL";
6165         break;
6166       case NWORD:
6167         p = "NWORD";
6168         break;
6169       case NWORD + ADD_NL:
6170         p = "NWORD+NL";
6171         break;
6172       case HEAD:
6173         p = "HEAD";
6174         break;
6175       case HEAD + ADD_NL:
6176         p = "HEAD+NL";
6177         break;
6178       case NHEAD:
6179         p = "NHEAD";
6180         break;
6181       case NHEAD + ADD_NL:
6182         p = "NHEAD+NL";
6183         break;
6184       case ALPHA:
6185         p = "ALPHA";
6186         break;
6187       case ALPHA + ADD_NL:
6188         p = "ALPHA+NL";
6189         break;
6190       case NALPHA:
6191         p = "NALPHA";
6192         break;
6193       case NALPHA + ADD_NL:
6194         p = "NALPHA+NL";
6195         break;
6196       case LOWER:
6197         p = "LOWER";
6198         break;
6199       case LOWER + ADD_NL:
6200         p = "LOWER+NL";
6201         break;
6202       case NLOWER:
6203         p = "NLOWER";
6204         break;
6205       case NLOWER + ADD_NL:
6206         p = "NLOWER+NL";
6207         break;
6208       case UPPER:
6209         p = "UPPER";
6210         break;
6211       case UPPER + ADD_NL:
6212         p = "UPPER+NL";
6213         break;
6214       case NUPPER:
6215         p = "NUPPER";
6216         break;
6217       case NUPPER + ADD_NL:
6218         p = "NUPPER+NL";
6219         break;
6220       case BRANCH:
6221         p = "BRANCH";
6222         break;
6223       case EXACTLY:
6224         p = "EXACTLY";
6225         break;
6226       case NOTHING:
6227         p = "NOTHING";
6228         break;
6229       case BACK:
6230         p = "BACK";
6231         break;
6232       case END:
6233         p = "END";
6234         break;
6235       case MOPEN + 0:
6236         p = "MATCH START";
6237         break;
6238       case MOPEN + 1:
6239       case MOPEN + 2:
6240       case MOPEN + 3:
6241       case MOPEN + 4:
6242       case MOPEN + 5:
6243       case MOPEN + 6:
6244       case MOPEN + 7:
6245       case MOPEN + 8:
6246       case MOPEN + 9:
6247         sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6248         p = NULL;
6249         break;
6250       case MCLOSE + 0:
6251         p = "MATCH END";
6252         break;
6253       case MCLOSE + 1:
6254       case MCLOSE + 2:
6255       case MCLOSE + 3:
6256       case MCLOSE + 4:
6257       case MCLOSE + 5:
6258       case MCLOSE + 6:
6259       case MCLOSE + 7:
6260       case MCLOSE + 8:
6261       case MCLOSE + 9:
6262         sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6263         p = NULL;
6264         break;
6265       case BACKREF + 1:
6266       case BACKREF + 2:
6267       case BACKREF + 3:
6268       case BACKREF + 4:
6269       case BACKREF + 5:
6270       case BACKREF + 6:
6271       case BACKREF + 7:
6272       case BACKREF + 8:
6273       case BACKREF + 9:
6274         sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6275         p = NULL;
6276         break;
6277       case NOPEN:
6278         p = "NOPEN";
6279         break;
6280       case NCLOSE:
6281         p = "NCLOSE";
6282         break;
6283 #ifdef FEAT_SYN_HL
6284       case ZOPEN + 1:
6285       case ZOPEN + 2:
6286       case ZOPEN + 3:
6287       case ZOPEN + 4:
6288       case ZOPEN + 5:
6289       case ZOPEN + 6:
6290       case ZOPEN + 7:
6291       case ZOPEN + 8:
6292       case ZOPEN + 9:
6293         sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6294         p = NULL;
6295         break;
6296       case ZCLOSE + 1:
6297       case ZCLOSE + 2:
6298       case ZCLOSE + 3:
6299       case ZCLOSE + 4:
6300       case ZCLOSE + 5:
6301       case ZCLOSE + 6:
6302       case ZCLOSE + 7:
6303       case ZCLOSE + 8:
6304       case ZCLOSE + 9:
6305         sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6306         p = NULL;
6307         break;
6308       case ZREF + 1:
6309       case ZREF + 2:
6310       case ZREF + 3:
6311       case ZREF + 4:
6312       case ZREF + 5:
6313       case ZREF + 6:
6314       case ZREF + 7:
6315       case ZREF + 8:
6316       case ZREF + 9:
6317         sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6318         p = NULL;
6319         break;
6320 #endif
6321       case STAR:
6322         p = "STAR";
6323         break;
6324       case PLUS:
6325         p = "PLUS";
6326         break;
6327       case NOMATCH:
6328         p = "NOMATCH";
6329         break;
6330       case MATCH:
6331         p = "MATCH";
6332         break;
6333       case BEHIND:
6334         p = "BEHIND";
6335         break;
6336       case NOBEHIND:
6337         p = "NOBEHIND";
6338         break;
6339       case SUBPAT:
6340         p = "SUBPAT";
6341         break;
6342       case BRACE_LIMITS:
6343         p = "BRACE_LIMITS";
6344         break;
6345       case BRACE_SIMPLE:
6346         p = "BRACE_SIMPLE";
6347         break;
6348       case BRACE_COMPLEX + 0:
6349       case BRACE_COMPLEX + 1:
6350       case BRACE_COMPLEX + 2:
6351       case BRACE_COMPLEX + 3:
6352       case BRACE_COMPLEX + 4:
6353       case BRACE_COMPLEX + 5:
6354       case BRACE_COMPLEX + 6:
6355       case BRACE_COMPLEX + 7:
6356       case BRACE_COMPLEX + 8:
6357       case BRACE_COMPLEX + 9:
6358         sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6359         p = NULL;
6360         break;
6361 #ifdef FEAT_MBYTE
6362       case MULTIBYTECODE:
6363         p = "MULTIBYTECODE";
6364         break;
6365 #endif
6366       case NEWL:
6367         p = "NEWL";
6368         break;
6369       default:
6370         sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6371         p = NULL;
6372         break;
6373     }
6374     if (p != NULL)
6375         (void) strcat(buf, p);
6376     return buf;
6377 }
6378 #endif
6379
6380 #ifdef FEAT_MBYTE
6381 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
6382
6383 typedef struct
6384 {
6385     int a, b, c;
6386 } decomp_T;
6387
6388
6389 /* 0xfb20 - 0xfb4f */
6390 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
6391 {
6392     {0x5e2,0,0},                /* 0xfb20       alt ayin */
6393     {0x5d0,0,0},                /* 0xfb21       alt alef */
6394     {0x5d3,0,0},                /* 0xfb22       alt dalet */
6395     {0x5d4,0,0},                /* 0xfb23       alt he */
6396     {0x5db,0,0},                /* 0xfb24       alt kaf */
6397     {0x5dc,0,0},                /* 0xfb25       alt lamed */
6398     {0x5dd,0,0},                /* 0xfb26       alt mem-sofit */
6399     {0x5e8,0,0},                /* 0xfb27       alt resh */
6400     {0x5ea,0,0},                /* 0xfb28       alt tav */
6401     {'+', 0, 0},                /* 0xfb29       alt plus */
6402     {0x5e9, 0x5c1, 0},          /* 0xfb2a       shin+shin-dot */
6403     {0x5e9, 0x5c2, 0},          /* 0xfb2b       shin+sin-dot */
6404     {0x5e9, 0x5c1, 0x5bc},      /* 0xfb2c       shin+shin-dot+dagesh */
6405     {0x5e9, 0x5c2, 0x5bc},      /* 0xfb2d       shin+sin-dot+dagesh */
6406     {0x5d0, 0x5b7, 0},          /* 0xfb2e       alef+patah */
6407     {0x5d0, 0x5b8, 0},          /* 0xfb2f       alef+qamats */
6408     {0x5d0, 0x5b4, 0},          /* 0xfb30       alef+hiriq */
6409     {0x5d1, 0x5bc, 0},          /* 0xfb31       bet+dagesh */
6410     {0x5d2, 0x5bc, 0},          /* 0xfb32       gimel+dagesh */
6411     {0x5d3, 0x5bc, 0},          /* 0xfb33       dalet+dagesh */
6412     {0x5d4, 0x5bc, 0},          /* 0xfb34       he+dagesh */
6413     {0x5d5, 0x5bc, 0},          /* 0xfb35       vav+dagesh */
6414     {0x5d6, 0x5bc, 0},          /* 0xfb36       zayin+dagesh */
6415     {0xfb37, 0, 0},             /* 0xfb37 -- UNUSED */
6416     {0x5d8, 0x5bc, 0},          /* 0xfb38       tet+dagesh */
6417     {0x5d9, 0x5bc, 0},          /* 0xfb39       yud+dagesh */
6418     {0x5da, 0x5bc, 0},          /* 0xfb3a       kaf sofit+dagesh */
6419     {0x5db, 0x5bc, 0},          /* 0xfb3b       kaf+dagesh */
6420     {0x5dc, 0x5bc, 0},          /* 0xfb3c       lamed+dagesh */
6421     {0xfb3d, 0, 0},             /* 0xfb3d -- UNUSED */
6422     {0x5de, 0x5bc, 0},          /* 0xfb3e       mem+dagesh */
6423     {0xfb3f, 0, 0},             /* 0xfb3f -- UNUSED */
6424     {0x5e0, 0x5bc, 0},          /* 0xfb40       nun+dagesh */
6425     {0x5e1, 0x5bc, 0},          /* 0xfb41       samech+dagesh */
6426     {0xfb42, 0, 0},             /* 0xfb42 -- UNUSED */
6427     {0x5e3, 0x5bc, 0},          /* 0xfb43       pe sofit+dagesh */
6428     {0x5e4, 0x5bc,0},           /* 0xfb44       pe+dagesh */
6429     {0xfb45, 0, 0},             /* 0xfb45 -- UNUSED */
6430     {0x5e6, 0x5bc, 0},          /* 0xfb46       tsadi+dagesh */
6431     {0x5e7, 0x5bc, 0},          /* 0xfb47       qof+dagesh */
6432     {0x5e8, 0x5bc, 0},          /* 0xfb48       resh+dagesh */
6433     {0x5e9, 0x5bc, 0},          /* 0xfb49       shin+dagesh */
6434     {0x5ea, 0x5bc, 0},          /* 0xfb4a       tav+dagesh */
6435     {0x5d5, 0x5b9, 0},          /* 0xfb4b       vav+holam */
6436     {0x5d1, 0x5bf, 0},          /* 0xfb4c       bet+rafe */
6437     {0x5db, 0x5bf, 0},          /* 0xfb4d       kaf+rafe */
6438     {0x5e4, 0x5bf, 0},          /* 0xfb4e       pe+rafe */
6439     {0x5d0, 0x5dc, 0}           /* 0xfb4f       alef-lamed */
6440 };
6441
6442     static void
6443 mb_decompose(c, c1, c2, c3)
6444     int c, *c1, *c2, *c3;
6445 {
6446     decomp_T d;
6447
6448     if (c >= 0x4b20 && c <= 0xfb4f)
6449     {
6450         d = decomp_table[c - 0xfb20];
6451         *c1 = d.a;
6452         *c2 = d.b;
6453         *c3 = d.c;
6454     }
6455     else
6456     {
6457         *c1 = c;
6458         *c2 = *c3 = 0;
6459     }
6460 }
6461 #endif
6462
6463 /*
6464  * Compare two strings, ignore case if ireg_ic set.
6465  * Return 0 if strings match, non-zero otherwise.
6466  * Correct the length "*n" when composing characters are ignored.
6467  */
6468     static int
6469 cstrncmp(s1, s2, n)
6470     char_u      *s1, *s2;
6471     int         *n;
6472 {
6473     int         result;
6474
6475     if (!ireg_ic)
6476         result = STRNCMP(s1, s2, *n);
6477     else
6478         result = MB_STRNICMP(s1, s2, *n);
6479
6480 #ifdef FEAT_MBYTE
6481     /* if it failed and it's utf8 and we want to combineignore: */
6482     if (result != 0 && enc_utf8 && ireg_icombine)
6483     {
6484         char_u  *str1, *str2;
6485         int     c1, c2, c11, c12;
6486         int     junk;
6487
6488         /* we have to handle the strcmp ourselves, since it is necessary to
6489          * deal with the composing characters by ignoring them: */
6490         str1 = s1;
6491         str2 = s2;
6492         c1 = c2 = 0;
6493         while ((int)(str1 - s1) < *n)
6494         {
6495             c1 = mb_ptr2char_adv(&str1);
6496             c2 = mb_ptr2char_adv(&str2);
6497
6498             /* decompose the character if necessary, into 'base' characters
6499              * because I don't care about Arabic, I will hard-code the Hebrew
6500              * which I *do* care about!  So sue me... */
6501             if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
6502             {
6503                 /* decomposition necessary? */
6504                 mb_decompose(c1, &c11, &junk, &junk);
6505                 mb_decompose(c2, &c12, &junk, &junk);
6506                 c1 = c11;
6507                 c2 = c12;
6508                 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
6509                     break;
6510             }
6511         }
6512         result = c2 - c1;
6513         if (result == 0)
6514             *n = (int)(str2 - s2);
6515     }
6516 #endif
6517
6518     return result;
6519 }
6520
6521 /*
6522  * cstrchr: This function is used a lot for simple searches, keep it fast!
6523  */
6524     static char_u *
6525 cstrchr(s, c)
6526     char_u      *s;
6527     int         c;
6528 {
6529     char_u      *p;
6530     int         cc;
6531
6532     if (!ireg_ic
6533 #ifdef FEAT_MBYTE
6534             || (!enc_utf8 && mb_char2len(c) > 1)
6535 #endif
6536             )
6537         return vim_strchr(s, c);
6538
6539     /* tolower() and toupper() can be slow, comparing twice should be a lot
6540      * faster (esp. when using MS Visual C++!).
6541      * For UTF-8 need to use folded case. */
6542 #ifdef FEAT_MBYTE
6543     if (enc_utf8 && c > 0x80)
6544         cc = utf_fold(c);
6545     else
6546 #endif
6547          if (MB_ISUPPER(c))
6548         cc = MB_TOLOWER(c);
6549     else if (MB_ISLOWER(c))
6550         cc = MB_TOUPPER(c);
6551     else
6552         return vim_strchr(s, c);
6553
6554 #ifdef FEAT_MBYTE
6555     if (has_mbyte)
6556     {
6557         for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
6558         {
6559             if (enc_utf8 && c > 0x80)
6560             {
6561                 if (utf_fold(utf_ptr2char(p)) == cc)
6562                     return p;
6563             }
6564             else if (*p == c || *p == cc)
6565                 return p;
6566         }
6567     }
6568     else
6569 #endif
6570         /* Faster version for when there are no multi-byte characters. */
6571         for (p = s; *p != NUL; ++p)
6572             if (*p == c || *p == cc)
6573                 return p;
6574
6575     return NULL;
6576 }
6577
6578 /***************************************************************
6579  *                    regsub stuff                             *
6580  ***************************************************************/
6581
6582 /* This stuff below really confuses cc on an SGI -- webb */
6583 #ifdef __sgi
6584 # undef __ARGS
6585 # define __ARGS(x)  ()
6586 #endif
6587
6588 /*
6589  * We should define ftpr as a pointer to a function returning a pointer to
6590  * a function returning a pointer to a function ...
6591  * This is impossible, so we declare a pointer to a function returning a
6592  * pointer to a function returning void. This should work for all compilers.
6593  */
6594 typedef void (*(*fptr_T) __ARGS((int *, int)))();
6595
6596 static fptr_T do_upper __ARGS((int *, int));
6597 static fptr_T do_Upper __ARGS((int *, int));
6598 static fptr_T do_lower __ARGS((int *, int));
6599 static fptr_T do_Lower __ARGS((int *, int));
6600
6601 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
6602
6603     static fptr_T
6604 do_upper(d, c)
6605     int         *d;
6606     int         c;
6607 {
6608     *d = MB_TOUPPER(c);
6609
6610     return (fptr_T)NULL;
6611 }
6612
6613     static fptr_T
6614 do_Upper(d, c)
6615     int         *d;
6616     int         c;
6617 {
6618     *d = MB_TOUPPER(c);
6619
6620     return (fptr_T)do_Upper;
6621 }
6622
6623     static fptr_T
6624 do_lower(d, c)
6625     int         *d;
6626     int         c;
6627 {
6628     *d = MB_TOLOWER(c);
6629
6630     return (fptr_T)NULL;
6631 }
6632
6633     static fptr_T
6634 do_Lower(d, c)
6635     int         *d;
6636     int         c;
6637 {
6638     *d = MB_TOLOWER(c);
6639
6640     return (fptr_T)do_Lower;
6641 }
6642
6643 /*
6644  * regtilde(): Replace tildes in the pattern by the old pattern.
6645  *
6646  * Short explanation of the tilde: It stands for the previous replacement
6647  * pattern.  If that previous pattern also contains a ~ we should go back a
6648  * step further...  But we insert the previous pattern into the current one
6649  * and remember that.
6650  * This still does not handle the case where "magic" changes.  So require the
6651  * user to keep his hands off of "magic".
6652  *
6653  * The tildes are parsed once before the first call to vim_regsub().
6654  */
6655     char_u *
6656 regtilde(source, magic)
6657     char_u      *source;
6658     int         magic;
6659 {
6660     char_u      *newsub = source;
6661     char_u      *tmpsub;
6662     char_u      *p;
6663     int         len;
6664     int         prevlen;
6665
6666     for (p = newsub; *p; ++p)
6667     {
6668         if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
6669         {
6670             if (reg_prev_sub != NULL)
6671             {
6672                 /* length = len(newsub) - 1 + len(prev_sub) + 1 */
6673                 prevlen = (int)STRLEN(reg_prev_sub);
6674                 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
6675                 if (tmpsub != NULL)
6676                 {
6677                     /* copy prefix */
6678                     len = (int)(p - newsub);    /* not including ~ */
6679                     mch_memmove(tmpsub, newsub, (size_t)len);
6680                     /* interpret tilde */
6681                     mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
6682                     /* copy postfix */
6683                     if (!magic)
6684                         ++p;                    /* back off \ */
6685                     STRCPY(tmpsub + len + prevlen, p + 1);
6686
6687                     if (newsub != source)       /* already allocated newsub */
6688                         vim_free(newsub);
6689                     newsub = tmpsub;
6690                     p = newsub + len + prevlen;
6691                 }
6692             }
6693             else if (magic)
6694                 mch_memmove(p, p + 1, STRLEN(p));       /* remove '~' */
6695             else
6696                 mch_memmove(p, p + 2, STRLEN(p) - 1);   /* remove '\~' */
6697             --p;
6698         }
6699         else
6700         {
6701             if (*p == '\\' && p[1])             /* skip escaped characters */
6702                 ++p;
6703 #ifdef FEAT_MBYTE
6704             if (has_mbyte)
6705                 p += (*mb_ptr2len)(p) - 1;
6706 #endif
6707         }
6708     }
6709
6710     vim_free(reg_prev_sub);
6711     if (newsub != source)       /* newsub was allocated, just keep it */
6712         reg_prev_sub = newsub;
6713     else                        /* no ~ found, need to save newsub  */
6714         reg_prev_sub = vim_strsave(newsub);
6715     return newsub;
6716 }
6717
6718 #ifdef FEAT_EVAL
6719 static int can_f_submatch = FALSE;      /* TRUE when submatch() can be used */
6720
6721 /* These pointers are used instead of reg_match and reg_mmatch for
6722  * reg_submatch().  Needed for when the substitution string is an expression
6723  * that contains a call to substitute() and submatch(). */
6724 static regmatch_T       *submatch_match;
6725 static regmmatch_T      *submatch_mmatch;
6726 #endif
6727
6728 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
6729 /*
6730  * vim_regsub() - perform substitutions after a vim_regexec() or
6731  * vim_regexec_multi() match.
6732  *
6733  * If "copy" is TRUE really copy into "dest".
6734  * If "copy" is FALSE nothing is copied, this is just to find out the length
6735  * of the result.
6736  *
6737  * If "backslash" is TRUE, a backslash will be removed later, need to double
6738  * them to keep them, and insert a backslash before a CR to avoid it being
6739  * replaced with a line break later.
6740  *
6741  * Note: The matched text must not change between the call of
6742  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
6743  * references invalid!
6744  *
6745  * Returns the size of the replacement, including terminating NUL.
6746  */
6747     int
6748 vim_regsub(rmp, source, dest, copy, magic, backslash)
6749     regmatch_T  *rmp;
6750     char_u      *source;
6751     char_u      *dest;
6752     int         copy;
6753     int         magic;
6754     int         backslash;
6755 {
6756     reg_match = rmp;
6757     reg_mmatch = NULL;
6758     reg_maxline = 0;
6759     return vim_regsub_both(source, dest, copy, magic, backslash);
6760 }
6761 #endif
6762
6763     int
6764 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
6765     regmmatch_T *rmp;
6766     linenr_T    lnum;
6767     char_u      *source;
6768     char_u      *dest;
6769     int         copy;
6770     int         magic;
6771     int         backslash;
6772 {
6773     reg_match = NULL;
6774     reg_mmatch = rmp;
6775     reg_buf = curbuf;           /* always works on the current buffer! */
6776     reg_firstlnum = lnum;
6777     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
6778     return vim_regsub_both(source, dest, copy, magic, backslash);
6779 }
6780
6781     static int
6782 vim_regsub_both(source, dest, copy, magic, backslash)
6783     char_u      *source;
6784     char_u      *dest;
6785     int         copy;
6786     int         magic;
6787     int         backslash;
6788 {
6789     char_u      *src;
6790     char_u      *dst;
6791     char_u      *s;
6792     int         c;
6793     int         cc;
6794     int         no = -1;
6795     fptr_T      func = (fptr_T)NULL;
6796     linenr_T    clnum = 0;      /* init for GCC */
6797     int         len = 0;        /* init for GCC */
6798 #ifdef FEAT_EVAL
6799     static char_u *eval_result = NULL;
6800 #endif
6801
6802     /* Be paranoid... */
6803     if (source == NULL || dest == NULL)
6804     {
6805         EMSG(_(e_null));
6806         return 0;
6807     }
6808     if (prog_magic_wrong())
6809         return 0;
6810     src = source;
6811     dst = dest;
6812
6813     /*
6814      * When the substitute part starts with "\=" evaluate it as an expression.
6815      */
6816     if (source[0] == '\\' && source[1] == '='
6817 #ifdef FEAT_EVAL
6818             && !can_f_submatch      /* can't do this recursively */
6819 #endif
6820             )
6821     {
6822 #ifdef FEAT_EVAL
6823         /* To make sure that the length doesn't change between checking the
6824          * length and copying the string, and to speed up things, the
6825          * resulting string is saved from the call with "copy" == FALSE to the
6826          * call with "copy" == TRUE. */
6827         if (copy)
6828         {
6829             if (eval_result != NULL)
6830             {
6831                 STRCPY(dest, eval_result);
6832                 dst += STRLEN(eval_result);
6833                 vim_free(eval_result);
6834                 eval_result = NULL;
6835             }
6836         }
6837         else
6838         {
6839             linenr_T    save_reg_maxline;
6840             win_T       *save_reg_win;
6841             int         save_ireg_ic;
6842
6843             vim_free(eval_result);
6844
6845             /* The expression may contain substitute(), which calls us
6846              * recursively.  Make sure submatch() gets the text from the first
6847              * level.  Don't need to save "reg_buf", because
6848              * vim_regexec_multi() can't be called recursively. */
6849             submatch_match = reg_match;
6850             submatch_mmatch = reg_mmatch;
6851             save_reg_maxline = reg_maxline;
6852             save_reg_win = reg_win;
6853             save_ireg_ic = ireg_ic;
6854             can_f_submatch = TRUE;
6855
6856             eval_result = eval_to_string(source + 2, NULL, TRUE);
6857             if (eval_result != NULL)
6858             {
6859                 for (s = eval_result; *s != NUL; mb_ptr_adv(s))
6860                 {
6861                     /* Change NL to CR, so that it becomes a line break.
6862                      * Skip over a backslashed character. */
6863                     if (*s == NL)
6864                         *s = CAR;
6865                     else if (*s == '\\' && s[1] != NUL)
6866                         ++s;
6867                 }
6868
6869                 dst += STRLEN(eval_result);
6870             }
6871
6872             reg_match = submatch_match;
6873             reg_mmatch = submatch_mmatch;
6874             reg_maxline = save_reg_maxline;
6875             reg_win = save_reg_win;
6876             ireg_ic = save_ireg_ic;
6877             can_f_submatch = FALSE;
6878         }
6879 #endif
6880     }
6881     else
6882       while ((c = *src++) != NUL)
6883       {
6884         if (c == '&' && magic)
6885             no = 0;
6886         else if (c == '\\' && *src != NUL)
6887         {
6888             if (*src == '&' && !magic)
6889             {
6890                 ++src;
6891                 no = 0;
6892             }
6893             else if ('0' <= *src && *src <= '9')
6894             {
6895                 no = *src++ - '0';
6896             }
6897             else if (vim_strchr((char_u *)"uUlLeE", *src))
6898             {
6899                 switch (*src++)
6900                 {
6901                 case 'u':   func = (fptr_T)do_upper;
6902                             continue;
6903                 case 'U':   func = (fptr_T)do_Upper;
6904                             continue;
6905                 case 'l':   func = (fptr_T)do_lower;
6906                             continue;
6907                 case 'L':   func = (fptr_T)do_Lower;
6908                             continue;
6909                 case 'e':
6910                 case 'E':   func = (fptr_T)NULL;
6911                             continue;
6912                 }
6913             }
6914         }
6915         if (no < 0)           /* Ordinary character. */
6916         {
6917             if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
6918             {
6919                 /* Copy a special key as-is. */
6920                 if (copy)
6921                 {
6922                     *dst++ = c;
6923                     *dst++ = *src++;
6924                     *dst++ = *src++;
6925                 }
6926                 else
6927                 {
6928                     dst += 3;
6929                     src += 2;
6930                 }
6931                 continue;
6932             }
6933
6934             if (c == '\\' && *src != NUL)
6935             {
6936                 /* Check for abbreviations -- webb */
6937                 switch (*src)
6938                 {
6939                     case 'r':   c = CAR;        ++src;  break;
6940                     case 'n':   c = NL;         ++src;  break;
6941                     case 't':   c = TAB;        ++src;  break;
6942                  /* Oh no!  \e already has meaning in subst pat :-( */
6943                  /* case 'e':   c = ESC;        ++src;  break; */
6944                     case 'b':   c = Ctrl_H;     ++src;  break;
6945
6946                     /* If "backslash" is TRUE the backslash will be removed
6947                      * later.  Used to insert a literal CR. */
6948                     default:    if (backslash)
6949                                 {
6950                                     if (copy)
6951                                         *dst = '\\';
6952                                     ++dst;
6953                                 }
6954                                 c = *src++;
6955                 }
6956             }
6957 #ifdef FEAT_MBYTE
6958             else if (has_mbyte)
6959                 c = mb_ptr2char(src - 1);
6960 #endif
6961
6962             /* Write to buffer, if copy is set. */
6963             if (func == (fptr_T)NULL)   /* just copy */
6964                 cc = c;
6965             else
6966                 /* Turbo C complains without the typecast */
6967                 func = (fptr_T)(func(&cc, c));
6968
6969 #ifdef FEAT_MBYTE
6970             if (has_mbyte)
6971             {
6972                 src += mb_ptr2len(src - 1) - 1;
6973                 if (copy)
6974                     mb_char2bytes(cc, dst);
6975                 dst += mb_char2len(cc) - 1;
6976             }
6977             else
6978 #endif
6979                 if (copy)
6980                     *dst = cc;
6981             dst++;
6982         }
6983         else
6984         {
6985             if (REG_MULTI)
6986             {
6987                 clnum = reg_mmatch->startpos[no].lnum;
6988                 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
6989                     s = NULL;
6990                 else
6991                 {
6992                     s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
6993                     if (reg_mmatch->endpos[no].lnum == clnum)
6994                         len = reg_mmatch->endpos[no].col
6995                                                - reg_mmatch->startpos[no].col;
6996                     else
6997                         len = (int)STRLEN(s);
6998                 }
6999             }
7000             else
7001             {
7002                 s = reg_match->startp[no];
7003                 if (reg_match->endp[no] == NULL)
7004                     s = NULL;
7005                 else
7006                     len = (int)(reg_match->endp[no] - s);
7007             }
7008             if (s != NULL)
7009             {
7010                 for (;;)
7011                 {
7012                     if (len == 0)
7013                     {
7014                         if (REG_MULTI)
7015                         {
7016                             if (reg_mmatch->endpos[no].lnum == clnum)
7017                                 break;
7018                             if (copy)
7019                                 *dst = CAR;
7020                             ++dst;
7021                             s = reg_getline(++clnum);
7022                             if (reg_mmatch->endpos[no].lnum == clnum)
7023                                 len = reg_mmatch->endpos[no].col;
7024                             else
7025                                 len = (int)STRLEN(s);
7026                         }
7027                         else
7028                             break;
7029                     }
7030                     else if (*s == NUL) /* we hit NUL. */
7031                     {
7032                         if (copy)
7033                             EMSG(_(e_re_damg));
7034                         goto exit;
7035                     }
7036                     else
7037                     {
7038                         if (backslash && (*s == CAR || *s == '\\'))
7039                         {
7040                             /*
7041                              * Insert a backslash in front of a CR, otherwise
7042                              * it will be replaced by a line break.
7043                              * Number of backslashes will be halved later,
7044                              * double them here.
7045                              */
7046                             if (copy)
7047                             {
7048                                 dst[0] = '\\';
7049                                 dst[1] = *s;
7050                             }
7051                             dst += 2;
7052                         }
7053                         else
7054                         {
7055 #ifdef FEAT_MBYTE
7056                             if (has_mbyte)
7057                                 c = mb_ptr2char(s);
7058                             else
7059 #endif
7060                                 c = *s;
7061
7062                             if (func == (fptr_T)NULL)   /* just copy */
7063                                 cc = c;
7064                             else
7065                                 /* Turbo C complains without the typecast */
7066                                 func = (fptr_T)(func(&cc, c));
7067
7068 #ifdef FEAT_MBYTE
7069                             if (has_mbyte)
7070                             {
7071                                 int l;
7072
7073                                 /* Copy composing characters separately, one
7074                                  * at a time. */
7075                                 if (enc_utf8)
7076                                     l = utf_ptr2len(s) - 1;
7077                                 else
7078                                     l = mb_ptr2len(s) - 1;
7079
7080                                 s += l;
7081                                 len -= l;
7082                                 if (copy)
7083                                     mb_char2bytes(cc, dst);
7084                                 dst += mb_char2len(cc) - 1;
7085                             }
7086                             else
7087 #endif
7088                                 if (copy)
7089                                     *dst = cc;
7090                             dst++;
7091                         }
7092
7093                         ++s;
7094                         --len;
7095                     }
7096                 }
7097             }
7098             no = -1;
7099         }
7100       }
7101     if (copy)
7102         *dst = NUL;
7103
7104 exit:
7105     return (int)((dst - dest) + 1);
7106 }
7107
7108 #ifdef FEAT_EVAL
7109 /*
7110  * Used for the submatch() function: get the string from the n'th submatch in
7111  * allocated memory.
7112  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7113  */
7114     char_u *
7115 reg_submatch(no)
7116     int         no;
7117 {
7118     char_u      *retval = NULL;
7119     char_u      *s;
7120     int         len;
7121     int         round;
7122     linenr_T    lnum;
7123
7124     if (!can_f_submatch || no < 0)
7125         return NULL;
7126
7127     if (submatch_match == NULL)
7128     {
7129         /*
7130          * First round: compute the length and allocate memory.
7131          * Second round: copy the text.
7132          */
7133         for (round = 1; round <= 2; ++round)
7134         {
7135             lnum = submatch_mmatch->startpos[no].lnum;
7136             if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7137                 return NULL;
7138
7139             s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
7140             if (s == NULL)  /* anti-crash check, cannot happen? */
7141                 break;
7142             if (submatch_mmatch->endpos[no].lnum == lnum)
7143             {
7144                 /* Within one line: take form start to end col. */
7145                 len = submatch_mmatch->endpos[no].col
7146                                           - submatch_mmatch->startpos[no].col;
7147                 if (round == 2)
7148                     vim_strncpy(retval, s, len);
7149                 ++len;
7150             }
7151             else
7152             {
7153                 /* Multiple lines: take start line from start col, middle
7154                  * lines completely and end line up to end col. */
7155                 len = (int)STRLEN(s);
7156                 if (round == 2)
7157                 {
7158                     STRCPY(retval, s);
7159                     retval[len] = '\n';
7160                 }
7161                 ++len;
7162                 ++lnum;
7163                 while (lnum < submatch_mmatch->endpos[no].lnum)
7164                 {
7165                     s = reg_getline(lnum++);
7166                     if (round == 2)
7167                         STRCPY(retval + len, s);
7168                     len += (int)STRLEN(s);
7169                     if (round == 2)
7170                         retval[len] = '\n';
7171                     ++len;
7172                 }
7173                 if (round == 2)
7174                     STRNCPY(retval + len, reg_getline(lnum),
7175                                              submatch_mmatch->endpos[no].col);
7176                 len += submatch_mmatch->endpos[no].col;
7177                 if (round == 2)
7178                     retval[len] = NUL;
7179                 ++len;
7180             }
7181
7182             if (retval == NULL)
7183             {
7184                 retval = lalloc((long_u)len, TRUE);
7185                 if (retval == NULL)
7186                     return NULL;
7187             }
7188         }
7189     }
7190     else
7191     {
7192         if (submatch_match->endp[no] == NULL)
7193             retval = NULL;
7194         else
7195         {
7196             s = submatch_match->startp[no];
7197             retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7198         }
7199     }
7200
7201     return retval;
7202 }
7203 #endif