Remove autosetup/jimsh0.exe on distclean
[jimtcl.git] / jimregexp.c
blob77254870d560e73de30cd26defd944ce931b7a52
1 /*
2 * regcomp and regexec -- regsub and regerror are elsewhere
4 * Copyright (c) 1986 by University of Toronto.
5 * Written by Henry Spencer. Not derived from licensed software.
7 * Permission is granted to anyone to use this software for any
8 * purpose on any computer system, and to redistribute it freely,
9 * subject to the following restrictions:
11 * 1. The author is not responsible for the consequences of use of
12 * this software, no matter how awful, even if they arise
13 * from defects in it.
15 * 2. The origin of this software must not be misrepresented, either
16 * by explicit claim or by omission.
18 * 3. Altered versions must be plainly marked as such, and must not
19 * be misrepresented as being the original software.
20 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
21 *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to |
22 *** to assist in implementing egrep.
23 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
24 *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
25 *** as in BSD grep and ex.
26 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
27 *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
28 *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods,
29 *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
30 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
31 *** seiwald@vix.com, on 28 August 1993, for use in jam. Regmagic.h
32 *** was moved into regexp.h, and the include of regexp.h now uses "'s
33 *** to avoid conflicting with the system regexp.h. Const, bless its
34 *** soul, was removed so it can compile everywhere. The declaration
35 *** of strchr() was in conflict on AIX, so it was removed (as it is
36 *** happily defined in string.h).
37 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
38 *** seiwald@perforce.com, on 20 January 2000, to use function prototypes.
39 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
40 *** seiwald@perforce.com, on 05 November 2002, to const string literals.
42 * THIS IS AN ALTERED VERSION. It was altered by Steve Bennett <steveb@workware.net.au>
43 * on 16 October 2010, to remove static state and add better Tcl ARE compatibility.
44 * This includes counted repetitions, UTF-8 support, character classes,
45 * shorthand character classes, increased number of parentheses to 100,
46 * backslash escape sequences. It also removes \n as an alternative to |.
48 * Beware that some of this code is subtly aware of the way operator
49 * precedence is structured in regular expressions. Serious changes in
50 * regular-expression syntax might require a total rethink.
52 #include <stdio.h>
53 #include <ctype.h>
54 #include <stdlib.h>
55 #include <string.h>
57 #include "jim.h"
58 #include "jimautoconf.h"
59 #include "jimregexp.h"
60 #include "utf8.h"
62 #if !defined(HAVE_REGCOMP) || defined(JIM_REGEXP)
65 * Structure for regexp "program". This is essentially a linear encoding
66 * of a nondeterministic finite-state machine (aka syntax charts or
67 * "railroad normal form" in parsing technology). Each node is an opcode
68 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
69 * all nodes except BRANCH implement concatenation; a "next" pointer with
70 * a BRANCH on both ends of it is connecting two alternatives. (Here we
71 * have one of the subtle syntax dependencies: an individual BRANCH (as
72 * opposed to a collection of them) is never concatenated with anything
73 * because of operator precedence.) The operand of some types of node is
74 * a literal string; for others, it is a node leading into a sub-FSM. In
75 * particular, the operand of a BRANCH node is the first node of the branch.
76 * (NB this is *not* a tree structure: the tail of the branch connects
77 * to the thing following the set of BRANCHes.) The opcodes are:
80 /* This *MUST* be less than (255-20)/2=117 */
81 #define REG_MAX_PAREN 100
83 /* definition number opnd? meaning */
84 #define END 0 /* no End of program. */
85 #define BOL 1 /* no Match "" at beginning of line. */
86 #define EOL 2 /* no Match "" at end of line. */
87 #define ANY 3 /* no Match any one character. */
88 #define ANYOF 4 /* str Match any character in this string. */
89 #define ANYBUT 5 /* str Match any character not in this string. */
90 #define BRANCH 6 /* node Match this alternative, or the next... */
91 #define BACK 7 /* no Match "", "next" ptr points backward. */
92 #define EXACTLY 8 /* str Match this string. */
93 #define NOTHING 9 /* no Match empty string. */
94 #define REP 10 /* max,min Match this (simple) thing [min,max] times. */
95 #define REPMIN 11 /* max,min Match this (simple) thing [min,max] times, mininal match. */
96 #define REPX 12 /* max,min Match this (complex) thing [min,max] times. */
97 #define REPXMIN 13 /* max,min Match this (complex) thing [min,max] times, minimal match. */
99 #define WORDA 15 /* no Match "" at wordchar, where prev is nonword */
100 #define WORDZ 16 /* no Match "" at nonwordchar, where prev is word */
101 #define OPEN 20 /* no Mark this point in input as start of #n. */
102 /* OPEN+1 is number 1, etc. */
103 #define CLOSE (OPEN+REG_MAX_PAREN) /* no Analogous to OPEN. */
104 #define CLOSE_END (CLOSE+REG_MAX_PAREN)
107 * The first byte of the regexp internal "program" is actually this magic
108 * number; the start node begins in the second byte.
110 #define REG_MAGIC 0xFADED00D
113 * Opcode notes:
115 * BRANCH The set of branches constituting a single choice are hooked
116 * together with their "next" pointers, since precedence prevents
117 * anything being concatenated to any individual branch. The
118 * "next" pointer of the last BRANCH in a choice points to the
119 * thing following the whole choice. This is also where the
120 * final "next" pointer of each individual branch points; each
121 * branch starts with the operand node of a BRANCH node.
123 * BACK Normal "next" pointers all implicitly point forward; BACK
124 * exists to make loop structures possible.
126 * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
127 * BRANCH structures using BACK. Simple cases (one character
128 * per match) are implemented with STAR and PLUS for speed
129 * and to minimize recursive plunges.
131 * OPEN,CLOSE ...are numbered at compile time.
135 * A node is one char of opcode followed by two chars of "next" pointer.
136 * "Next" pointers are stored as two 8-bit pieces, high order first. The
137 * value is a positive offset from the opcode of the node containing it.
138 * An operand, if any, simply follows the node. (Note that much of the
139 * code generation knows about this implicit relationship.)
141 * Using two bytes for the "next" pointer is vast overkill for most things,
142 * but allows patterns to get big without disasters.
144 #define OP(p) ((p)[0])
145 #define NEXT(p) ((p)[1])
146 #define OPERAND(p) ((p) + 2)
149 * See regmagic.h for one further detail of program structure.
154 * Utility definitions.
157 #define FAIL(R,M) { (R)->err = (M); return (M); }
158 #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?' || (c) == '{')
159 #define META "^$.[()|?{+*"
162 * Flags to be passed up and down.
164 #define HASWIDTH 01 /* Known never to match null string. */
165 #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
166 #define SPSTART 04 /* Starts with * or +. */
167 #define WORST 0 /* Worst case. */
169 #define MAX_REP_COUNT 1000000
172 * Forward declarations for regcomp()'s friends.
174 static int *reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp );
175 static int *regpiece(regex_t *preg, int *flagp );
176 static int *regbranch(regex_t *preg, int *flagp );
177 static int *regatom(regex_t *preg, int *flagp );
178 static int *regnode(regex_t *preg, int op );
179 static const int *regnext(regex_t *preg, const int *p );
180 static void regc(regex_t *preg, int b );
181 static int *reginsert(regex_t *preg, int op, int size, int *opnd );
182 static void regtail(regex_t *preg, int *p, const int *val );
183 static void regoptail(regex_t *preg, int *p, const int *val );
185 static int reg_range_find(const int *string, int c);
186 static const char *str_find(const char *string, int c, int nocase);
187 static int prefix_cmp(const int *prog, int proglen, const char *string, int nocase);
189 /*#define DEBUG*/
190 #ifdef DEBUG
191 int regnarrate = 0;
192 static void regdump(regex_t *preg);
193 static const char *regprop( const int *op );
194 #endif
197 static int regdummy;
200 * Returns the length of the null-terminated integer sequence.
202 static int str_int_len(const int *seq)
204 int n = 0;
205 while (*seq++) {
206 n++;
208 return n;
212 - regcomp - compile a regular expression into internal code
214 * We can't allocate space until we know how big the compiled form will be,
215 * but we can't compile it (and thus know how big it is) until we've got a
216 * place to put the code. So we cheat: we compile it twice, once with code
217 * generation turned off and size counting turned on, and once "for real".
218 * This also means that we don't allocate space until we are sure that the
219 * thing really will compile successfully, and we never have to move the
220 * code and thus invalidate pointers into it. (Note that it has to be in
221 * one piece because free() must be able to free it all.)
223 * Beware that the optimization-preparation code in here knows about some
224 * of the structure of the compiled regexp.
226 int regcomp(regex_t *preg, const char *exp, int cflags)
228 const int *scan;
229 const int *longest;
230 unsigned len;
231 int flags;
233 #ifdef DEBUG
234 fprintf(stderr, "Compiling: '%s'\n", exp);
235 #endif
236 memset(preg, 0, sizeof(*preg));
238 if (exp == NULL)
239 FAIL(preg, REG_ERR_NULL_ARGUMENT);
241 /* First pass: determine size, legality. */
242 preg->cflags = cflags;
243 preg->regparse = exp;
244 preg->re_nsub = 0;
245 preg->regsize = 0L;
246 preg->regcode = &regdummy;
247 regc(preg, REG_MAGIC);
248 if (reg(preg, 0, &flags) == NULL)
249 return preg->err;
251 /* Small enough for pointer-storage convention? */
252 if (preg->regsize >= 32767L || preg->re_nsub >= REG_MAX_PAREN) /* Probably could be 65535L. */
253 FAIL(preg,REG_ERR_TOO_BIG);
255 /* Allocate space. */
256 preg->program = malloc(preg->regsize * sizeof(*preg->program));
257 if (preg->program == NULL)
258 FAIL(preg, REG_ERR_NOMEM);
260 /* Second pass: emit code. */
261 preg->regparse = exp;
262 preg->re_nsub = 0;
263 preg->regsize = 0L;
264 preg->regcode = preg->program;
265 regc(preg, REG_MAGIC);
266 if (reg(preg, 0, &flags) == NULL)
267 return preg->err;
269 /* Dig out information for optimizations. */
270 preg->regstart = 0; /* Worst-case defaults. */
271 preg->reganch = 0;
272 preg->regmust = NULL;
273 preg->regmlen = 0;
274 scan = preg->program+1; /* First BRANCH. */
275 if (OP(regnext(preg, scan)) == END) { /* Only one top-level choice. */
276 scan = OPERAND(scan);
278 /* Starting-point info. */
279 if (OP(scan) == EXACTLY) {
280 preg->regstart = *OPERAND(scan);
282 else if (OP(scan) == BOL)
283 preg->reganch++;
286 * If there's something expensive in the r.e., find the
287 * longest literal string that must appear and make it the
288 * regmust. Resolve ties in favor of later strings, since
289 * the regstart check works with the beginning of the r.e.
290 * and avoiding duplication strengthens checking. Not a
291 * strong reason, but sufficient in the absence of others.
293 if (flags&SPSTART) {
294 longest = NULL;
295 len = 0;
296 for (; scan != NULL; scan = regnext(preg, scan)) {
297 if (OP(scan) == EXACTLY) {
298 int plen = str_int_len(OPERAND(scan));
299 if (plen >= len) {
300 longest = OPERAND(scan);
301 len = plen;
305 preg->regmust = longest;
306 preg->regmlen = len;
310 #ifdef DEBUG
311 regdump(preg);
312 #endif
314 return 0;
318 - reg - regular expression, i.e. main body or parenthesized thing
320 * Caller must absorb opening parenthesis.
322 * Combining parenthesis handling with the base level of regular expression
323 * is a trifle forced, but the need to tie the tails of the branches to what
324 * follows makes it hard to avoid.
326 static int *reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp )
328 int *ret;
329 int *br;
330 const int *ender;
331 int parno = 0;
332 int flags;
334 *flagp = HASWIDTH; /* Tentatively. */
336 /* Make an OPEN node, if parenthesized. */
337 if (paren) {
338 parno = ++preg->re_nsub;
339 ret = regnode(preg, OPEN+parno);
340 } else
341 ret = NULL;
343 /* Pick up the branches, linking them together. */
344 br = regbranch(preg, &flags);
345 if (br == NULL)
346 return(NULL);
347 if (ret != NULL)
348 regtail(preg, ret, br); /* OPEN -> first. */
349 else
350 ret = br;
351 if (!(flags&HASWIDTH))
352 *flagp &= ~HASWIDTH;
353 *flagp |= flags&SPSTART;
354 while (*preg->regparse == '|') {
355 preg->regparse++;
356 br = regbranch(preg, &flags);
357 if (br == NULL)
358 return(NULL);
359 regtail(preg, ret, br); /* BRANCH -> BRANCH. */
360 if (!(flags&HASWIDTH))
361 *flagp &= ~HASWIDTH;
362 *flagp |= flags&SPSTART;
365 /* Make a closing node, and hook it on the end. */
366 ender = regnode(preg, (paren) ? CLOSE+parno : END);
367 regtail(preg, ret, ender);
369 /* Hook the tails of the branches to the closing node. */
370 for (br = ret; br != NULL; br = (int *)regnext(preg, br))
371 regoptail(preg, br, ender);
373 /* Check for proper termination. */
374 if (paren && *preg->regparse++ != ')') {
375 preg->err = REG_ERR_UNMATCHED_PAREN;
376 return NULL;
377 } else if (!paren && *preg->regparse != '\0') {
378 if (*preg->regparse == ')') {
379 preg->err = REG_ERR_UNMATCHED_PAREN;
380 return NULL;
381 } else {
382 preg->err = REG_ERR_JUNK_ON_END;
383 return NULL;
387 return(ret);
391 - regbranch - one alternative of an | operator
393 * Implements the concatenation operator.
395 static int *regbranch(regex_t *preg, int *flagp )
397 int *ret;
398 int *chain;
399 int *latest;
400 int flags;
402 *flagp = WORST; /* Tentatively. */
404 ret = regnode(preg, BRANCH);
405 chain = NULL;
406 while (*preg->regparse != '\0' && *preg->regparse != ')' &&
407 *preg->regparse != '|') {
408 latest = regpiece(preg, &flags);
409 if (latest == NULL)
410 return(NULL);
411 *flagp |= flags&HASWIDTH;
412 if (chain == NULL) {/* First piece. */
413 *flagp |= flags&SPSTART;
415 else {
416 regtail(preg, chain, latest);
418 chain = latest;
420 if (chain == NULL) /* Loop ran zero times. */
421 (void) regnode(preg, NOTHING);
423 return(ret);
427 - regpiece - something followed by possible [*+?]
429 * Note that the branching code sequences used for ? and the general cases
430 * of * and + are somewhat optimized: they use the same NOTHING node as
431 * both the endmarker for their branch list and the body of the last branch.
432 * It might seem that this node could be dispensed with entirely, but the
433 * endmarker role is not redundant.
435 static int *regpiece(regex_t *preg, int *flagp)
437 int *ret;
438 char op;
439 int *next;
440 int flags;
441 int size = preg->regsize;
442 int *chain = NULL;
443 int min;
444 int max;
446 ret = regatom(preg, &flags);
447 if (ret == NULL)
448 return(NULL);
450 size = preg->regsize - size;
452 op = *preg->regparse;
453 if (!ISMULT(op)) {
454 *flagp = flags;
455 return(ret);
458 if (!(flags&HASWIDTH) && op != '?') {
459 preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
460 return NULL;
463 /* Handle braces (counted repetition) by expansion */
464 if (op == '{') {
465 char *end;
467 min = strtoul(preg->regparse + 1, &end, 10);
468 if (end == preg->regparse + 1) {
469 preg->err = REG_ERR_BAD_COUNT;
470 return NULL;
472 if (*end == '}') {
473 max = min;
475 else {
476 preg->regparse = end;
477 max = strtoul(preg->regparse + 1, &end, 10);
478 if (*end != '}') {
479 preg->err = REG_ERR_UNMATCHED_BRACES;
480 return NULL;
483 if (end == preg->regparse + 1) {
484 max = MAX_REP_COUNT;
486 else if (max < min || max >= 100) {
487 preg->err = REG_ERR_BAD_COUNT;
488 return NULL;
490 if (min >= 100) {
491 preg->err = REG_ERR_BAD_COUNT;
492 return NULL;
495 preg->regparse = strchr(preg->regparse, '}');
497 else {
498 min = (op == '+');
499 max = (op == '?' ? 1 : MAX_REP_COUNT);
502 if (preg->regparse[1] == '?') {
503 preg->regparse++;
504 next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
506 else {
507 next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
509 if (preg->regcode != &regdummy) {
510 ret[2] = max;
511 ret[3] = min;
512 ret[4] = 0;
515 *flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART);
517 if (!(flags & SIMPLE)) {
518 int *back = regnode(preg, BACK);
519 regtail(preg, back, ret);
520 regtail(preg, next, back);
523 preg->regparse++;
524 if (ISMULT(*preg->regparse)) {
525 preg->err = REG_ERR_NESTED_COUNT;
526 return NULL;
529 return chain ? chain : ret;
533 * Add all characters in the inclusive range between lower and upper.
535 * Handles a swapped range (upper < lower).
537 static void reg_addrange(regex_t *preg, int lower, int upper)
539 if (lower > upper) {
540 reg_addrange(preg, upper, lower);
542 /* Add a range as length, start */
543 regc(preg, upper - lower + 1);
544 regc(preg, lower);
548 * Add a null-terminated literal string as a set of ranges.
550 static void reg_addrange_str(regex_t *preg, const char *str)
552 while (*str) {
553 reg_addrange(preg, *str, *str);
554 str++;
559 * Extracts the next unicode char from utf8.
561 * If 'upper' is set, converts the char to uppercase.
563 static int reg_utf8_tounicode_case(const char *s, int *uc, int upper)
565 int l = utf8_tounicode(s, uc);
566 if (upper) {
567 *uc = utf8_upper(*uc);
569 return l;
573 * Converts a hex digit to decimal.
575 * Returns -1 for an invalid hex digit.
577 static int hexdigitval(int c)
579 if (c >= '0' && c <= '9')
580 return c - '0';
581 if (c >= 'a' && c <= 'f')
582 return c - 'a' + 10;
583 if (c >= 'A' && c <= 'F')
584 return c - 'A' + 10;
585 return -1;
589 * Parses up to 'n' hex digits at 's' and stores the result in *uc.
591 * Returns the number of hex digits parsed.
592 * If there are no hex digits, returns 0 and stores nothing.
594 static int parse_hex(const char *s, int n, int *uc)
596 int val = 0;
597 int k;
599 for (k = 0; k < n; k++) {
600 int c = hexdigitval(*s++);
601 if (c == -1) {
602 break;
604 val = (val << 4) | c;
606 if (k) {
607 *uc = val;
609 return k;
613 * Call for chars after a backlash to decode the escape sequence.
615 * Stores the result in *ch.
617 * Returns the number of bytes consumed.
619 static int reg_decode_escape(const char *s, int *ch)
621 int n;
622 const char *s0 = s;
624 *ch = *s++;
626 switch (*ch) {
627 case 'b': *ch = '\b'; break;
628 case 'e': *ch = 27; break;
629 case 'f': *ch = '\f'; break;
630 case 'n': *ch = '\n'; break;
631 case 'r': *ch = '\r'; break;
632 case 't': *ch = '\t'; break;
633 case 'v': *ch = '\v'; break;
634 case 'u':
635 if ((n = parse_hex(s, 4, ch)) > 0) {
636 s += n;
638 break;
639 case 'x':
640 if ((n = parse_hex(s, 2, ch)) > 0) {
641 s += n;
643 break;
644 case '\0':
645 s--;
646 *ch = '\\';
647 break;
649 return s - s0;
653 - regatom - the lowest level
655 * Optimization: gobbles an entire sequence of ordinary characters so that
656 * it can turn them into a single node, which is smaller to store and
657 * faster to run. Backslashed characters are exceptions, each becoming a
658 * separate node; the code is simpler that way and it's not worth fixing.
660 static int *regatom(regex_t *preg, int *flagp)
662 int *ret;
663 int flags;
664 int nocase = (preg->cflags & REG_ICASE);
666 int ch;
667 int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase);
669 *flagp = WORST; /* Tentatively. */
671 preg->regparse += n;
672 switch (ch) {
673 /* FIXME: these chars only have meaning at beg/end of pat? */
674 case '^':
675 ret = regnode(preg, BOL);
676 break;
677 case '$':
678 ret = regnode(preg, EOL);
679 break;
680 case '.':
681 ret = regnode(preg, ANY);
682 *flagp |= HASWIDTH|SIMPLE;
683 break;
684 case '[': {
685 const char *pattern = preg->regparse;
687 if (*pattern == '^') { /* Complement of range. */
688 ret = regnode(preg, ANYBUT);
689 pattern++;
690 } else
691 ret = regnode(preg, ANYOF);
693 /* Special case. If the first char is ']' or '-', it is part of the set */
694 if (*pattern == ']' || *pattern == '-') {
695 reg_addrange(preg, *pattern, *pattern);
696 pattern++;
699 while (*pattern && *pattern != ']') {
700 /* Is this a range? a-z */
701 int start;
702 int end;
704 pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
705 if (start == '\\') {
706 pattern += reg_decode_escape(pattern, &start);
707 if (start == 0) {
708 preg->err = REG_ERR_NULL_CHAR;
709 return NULL;
712 if (pattern[0] == '-' && pattern[1]) {
713 /* skip '-' */
714 pattern += utf8_tounicode(pattern, &end);
715 pattern += reg_utf8_tounicode_case(pattern, &end, nocase);
716 if (end == '\\') {
717 pattern += reg_decode_escape(pattern, &end);
718 if (end == 0) {
719 preg->err = REG_ERR_NULL_CHAR;
720 return NULL;
724 reg_addrange(preg, start, end);
725 continue;
727 if (start == '[') {
728 if (strncmp(pattern, ":alpha:]", 8) == 0) {
729 if ((preg->cflags & REG_ICASE) == 0) {
730 reg_addrange(preg, 'a', 'z');
732 reg_addrange(preg, 'A', 'Z');
733 pattern += 8;
734 continue;
736 if (strncmp(pattern, ":alnum:]", 8) == 0) {
737 if ((preg->cflags & REG_ICASE) == 0) {
738 reg_addrange(preg, 'a', 'z');
740 reg_addrange(preg, 'A', 'Z');
741 reg_addrange(preg, '0', '9');
742 pattern += 8;
743 continue;
745 if (strncmp(pattern, ":space:]", 8) == 0) {
746 reg_addrange_str(preg, " \t\r\n\f\v");
747 pattern += 8;
748 continue;
751 /* Not a range, so just add the char */
752 reg_addrange(preg, start, start);
754 regc(preg, '\0');
756 if (*pattern) {
757 pattern++;
759 preg->regparse = pattern;
761 *flagp |= HASWIDTH|SIMPLE;
763 break;
764 case '(':
765 ret = reg(preg, 1, &flags);
766 if (ret == NULL)
767 return(NULL);
768 *flagp |= flags&(HASWIDTH|SPSTART);
769 break;
770 case '\0':
771 case '|':
772 case ')':
773 preg->err = REG_ERR_INTERNAL;
774 return NULL; /* Supposed to be caught earlier. */
775 case '?':
776 case '+':
777 case '*':
778 case '{':
779 preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
780 return NULL;
781 case '\\':
782 switch (*preg->regparse++) {
783 case '\0':
784 preg->err = REG_ERR_TRAILING_BACKSLASH;
785 return NULL;
786 break;
787 case '<':
788 case 'm':
789 ret = regnode(preg, WORDA);
790 break;
791 case '>':
792 case 'M':
793 ret = regnode(preg, WORDZ);
794 break;
795 case 'd':
796 ret = regnode(preg, ANYOF);
797 reg_addrange(preg, '0', '9');
798 regc(preg, '\0');
799 *flagp |= HASWIDTH|SIMPLE;
800 break;
801 case 'w':
802 ret = regnode(preg, ANYOF);
803 if ((preg->cflags & REG_ICASE) == 0) {
804 reg_addrange(preg, 'a', 'z');
806 reg_addrange(preg, 'A', 'Z');
807 reg_addrange(preg, '0', '9');
808 reg_addrange(preg, '_', '_');
809 regc(preg, '\0');
810 *flagp |= HASWIDTH|SIMPLE;
811 break;
812 case 's':
813 ret = regnode(preg, ANYOF);
814 reg_addrange_str(preg," \t\r\n\f\v");
815 regc(preg, '\0');
816 *flagp |= HASWIDTH|SIMPLE;
817 break;
818 /* FIXME: Someday handle \1, \2, ... */
819 default:
820 /* Handle general quoted chars in exact-match routine */
821 /* Back up to include the backslash */
822 preg->regparse--;
823 goto de_fault;
825 break;
826 de_fault:
827 default: {
829 * Encode a string of characters to be matched exactly.
831 int added = 0;
833 /* Back up to pick up the first char of interest */
834 preg->regparse -= n;
836 ret = regnode(preg, EXACTLY);
838 /* Note that a META operator such as ? or * consumes the
839 * preceding char.
840 * Thus we must be careful to look ahead by 2 and add the
841 * last char as it's own EXACTLY if necessary
844 /* Until end of string or a META char is reached */
845 while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
846 n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
847 if (ch == '\\' && preg->regparse[n]) {
848 /* Non-trailing backslash.
849 * Is this a special escape, or a regular escape?
851 if (strchr("<>mMwds", preg->regparse[n])) {
852 /* A special escape. All done with EXACTLY */
853 break;
855 /* Decode it. Note that we add the length for the escape
856 * sequence to the length for the backlash so we can skip
857 * the entire sequence, or not as required.
859 n += reg_decode_escape(preg->regparse + n, &ch);
860 if (ch == 0) {
861 preg->err = REG_ERR_NULL_CHAR;
862 return NULL;
866 /* Now we have one char 'ch' of length 'n'.
867 * Check to see if the following char is a MULT
870 if (ISMULT(preg->regparse[n])) {
871 /* Yes. But do we already have some EXACTLY chars? */
872 if (added) {
873 /* Yes, so return what we have and pick up the current char next time around */
874 break;
876 /* No, so add this single char and finish */
877 regc(preg, ch);
878 added++;
879 preg->regparse += n;
880 break;
883 /* No, so just add this char normally */
884 regc(preg, ch);
885 added++;
886 preg->regparse += n;
888 regc(preg, '\0');
890 *flagp |= HASWIDTH;
891 if (added == 1)
892 *flagp |= SIMPLE;
893 break;
895 break;
898 return(ret);
902 - regnode - emit a node
904 /* Location. */
905 static int *regnode(regex_t *preg, int op)
907 int *ret;
908 int *ptr;
910 preg->regsize += 2;
911 ret = preg->regcode;
912 if (ret == &regdummy) {
913 return(ret);
916 ptr = ret;
917 *ptr++ = op;
918 *ptr++ = 0; /* Null "next" pointer. */
919 preg->regcode = ptr;
921 return(ret);
925 - regc - emit (if appropriate) a byte of code
927 static void regc(regex_t *preg, int b )
929 preg->regsize++;
930 if (preg->regcode != &regdummy)
931 *preg->regcode++ = b;
935 - reginsert - insert an operator in front of already-emitted operand
937 * Means relocating the operand.
938 * Returns the new location of the original operand.
940 static int *reginsert(regex_t *preg, int op, int size, int *opnd )
942 int *src;
943 int *dst;
944 int *place;
946 preg->regsize += size;
948 if (preg->regcode == &regdummy) {
949 return opnd;
952 src = preg->regcode;
953 preg->regcode += size;
954 dst = preg->regcode;
955 while (src > opnd)
956 *--dst = *--src;
958 place = opnd; /* Op node, where operand used to be. */
959 *place++ = op;
960 while (--size) {
961 *place++ = 0;
964 return place;
968 - regtail - set the next-pointer at the end of a node chain
970 static void regtail(regex_t *preg, int *p, const int *val )
972 int *scan;
973 int *temp;
974 int offset;
976 if (p == &regdummy)
977 return;
979 /* Find last node. */
980 scan = p;
981 for (;;) {
982 temp = (int *)regnext(preg, scan);
983 if (temp == NULL)
984 break;
985 scan = temp;
988 if (OP(scan) == BACK)
989 offset = scan - val;
990 else
991 offset = val - scan;
993 scan[1] = offset;
997 - regoptail - regtail on operand of first argument; nop if operandless
1000 static void regoptail(regex_t *preg, int *p, const int *val )
1002 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
1003 if (p == NULL || p == &regdummy || OP(p) != BRANCH)
1004 return;
1005 regtail(preg, OPERAND(p), val);
1009 * regexec and friends
1013 * Forwards.
1015 static int regtry(regex_t *preg, const char *string );
1016 static int regmatch(regex_t *preg, const int *prog);
1017 static int regrepeat(regex_t *preg, const int *p, int max);
1020 - regexec - match a regexp against a string
1022 int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
1024 const char *s;
1025 const int *scan;
1027 /* Be paranoid... */
1028 if (preg == NULL || preg->program == NULL || string == NULL) {
1029 return REG_ERR_NULL_ARGUMENT;
1032 /* Check validity of program. */
1033 if (*preg->program != REG_MAGIC) {
1034 return REG_ERR_CORRUPTED;
1037 #ifdef DEBUG
1038 fprintf(stderr, "regexec: %s\n", string);
1039 regdump(preg);
1040 #endif
1042 preg->eflags = eflags;
1043 preg->pmatch = pmatch;
1044 preg->nmatch = nmatch;
1045 preg->start = string; /* All offsets are computed from here */
1047 /* Must clear out the embedded repeat counts */
1048 for (scan = OPERAND(preg->program + 1); scan != NULL; scan = regnext(preg, scan)) {
1049 switch (OP(scan)) {
1050 case REP:
1051 case REPMIN:
1052 case REPX:
1053 case REPXMIN:
1054 *(int *)(scan + 4) = 0;
1055 break;
1059 /* If there is a "must appear" string, look for it. */
1060 if (preg->regmust != NULL) {
1061 s = string;
1062 while ((s = str_find(s, preg->regmust[0], preg->cflags & REG_ICASE)) != NULL) {
1063 if (prefix_cmp(preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) {
1064 break;
1066 s++;
1068 if (s == NULL) /* Not present. */
1069 return REG_NOMATCH;
1072 /* Mark beginning of line for ^ . */
1073 preg->regbol = string;
1075 /* Simplest case: anchored match need be tried only once (maybe per line). */
1076 if (preg->reganch) {
1077 if (eflags & REG_NOTBOL) {
1078 /* This is an anchored search, but not an BOL, so possibly skip to the next line */
1079 goto nextline;
1081 while (1) {
1082 int ret = regtry(preg, string);
1083 if (ret) {
1084 return REG_NOERROR;
1086 if (*string) {
1087 nextline:
1088 if (preg->cflags & REG_NEWLINE) {
1089 /* Try the next anchor? */
1090 string = strchr(string, '\n');
1091 if (string) {
1092 preg->regbol = ++string;
1093 continue;
1097 return REG_NOMATCH;
1101 /* Messy cases: unanchored match. */
1102 s = string;
1103 if (preg->regstart != '\0') {
1104 /* We know what char it must start with. */
1105 while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) {
1106 if (regtry(preg, s))
1107 return REG_NOERROR;
1108 s++;
1111 else
1112 /* We don't -- general case. */
1113 while (1) {
1114 if (regtry(preg, s))
1115 return REG_NOERROR;
1116 if (*s == '\0') {
1117 break;
1119 s += utf8_charlen(*s);
1122 /* Failure. */
1123 return REG_NOMATCH;
1127 - regtry - try match at specific point
1129 /* 0 failure, 1 success */
1130 static int regtry( regex_t *preg, const char *string )
1132 int i;
1134 preg->reginput = string;
1136 for (i = 0; i < preg->nmatch; i++) {
1137 preg->pmatch[i].rm_so = -1;
1138 preg->pmatch[i].rm_eo = -1;
1140 if (regmatch(preg, preg->program + 1)) {
1141 preg->pmatch[0].rm_so = string - preg->start;
1142 preg->pmatch[0].rm_eo = preg->reginput - preg->start;
1143 return(1);
1144 } else
1145 return(0);
1149 * Returns bytes matched if 'pattern' is a prefix of 'string'.
1151 * If 'nocase' is non-zero, does a case-insensitive match.
1153 * Returns -1 on not found.
1155 static int prefix_cmp(const int *prog, int proglen, const char *string, int nocase)
1157 const char *s = string;
1158 while (proglen && *s) {
1159 int ch;
1160 int n = reg_utf8_tounicode_case(s, &ch, nocase);
1161 if (ch != *prog) {
1162 return -1;
1164 prog++;
1165 s += n;
1166 proglen--;
1168 if (proglen == 0) {
1169 return s - string;
1171 return -1;
1175 * Searchs for 'c' in the range 'range'.
1177 * Returns 1 if found, or 0 if not.
1179 static int reg_range_find(const int *range, int c)
1181 while (*range) {
1182 /*printf("Checking %d in range [%d,%d]\n", c, range[1], (range[0] + range[1] - 1));*/
1183 if (c >= range[1] && c <= (range[0] + range[1] - 1)) {
1184 return 1;
1186 range += 2;
1188 return 0;
1192 * Search for the character 'c' in the utf-8 string 'string'.
1194 * If 'nocase' is set, the 'string' is assumed to be uppercase
1195 * and 'c' is converted to uppercase before matching.
1197 * Returns the byte position in the string where the 'c' was found, or
1198 * NULL if not found.
1200 static const char *str_find(const char *string, int c, int nocase)
1202 if (nocase) {
1203 /* The "string" should already be converted to uppercase */
1204 c = utf8_upper(c);
1206 while (*string) {
1207 int ch;
1208 int n = reg_utf8_tounicode_case(string, &ch, nocase);
1209 if (c == ch) {
1210 return string;
1212 string += n;
1214 return NULL;
1218 * Returns true if 'ch' is an end-of-line char.
1220 * In REG_NEWLINE mode, \n is considered EOL in
1221 * addition to \0
1223 static int reg_iseol(regex_t *preg, int ch)
1225 if (preg->cflags & REG_NEWLINE) {
1226 return ch == '\0' || ch == '\n';
1228 else {
1229 return ch == '\0';
1233 static int regmatchsimplerepeat(regex_t *preg, const int *scan, int matchmin)
1235 int nextch = '\0';
1236 const char *save;
1237 int no;
1238 int c;
1240 int max = scan[2];
1241 int min = scan[3];
1242 const int *next = regnext(preg, scan);
1245 * Lookahead to avoid useless match attempts
1246 * when we know what character comes next.
1248 if (OP(next) == EXACTLY) {
1249 nextch = *OPERAND(next);
1251 save = preg->reginput;
1252 no = regrepeat(preg, scan + 5, max);
1253 if (no < min) {
1254 return 0;
1256 if (matchmin) {
1257 /* from min up to no */
1258 max = no;
1259 no = min;
1261 /* else from no down to min */
1262 while (1) {
1263 if (matchmin) {
1264 if (no > max) {
1265 break;
1268 else {
1269 if (no < min) {
1270 break;
1273 preg->reginput = save + utf8_index(save, no);
1274 reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
1275 /* If it could work, try it. */
1276 if (reg_iseol(preg, nextch) || c == nextch) {
1277 if (regmatch(preg, next)) {
1278 return(1);
1281 if (matchmin) {
1282 /* Couldn't or didn't, add one more */
1283 no++;
1285 else {
1286 /* Couldn't or didn't -- back up. */
1287 no--;
1290 return(0);
1293 static int regmatchrepeat(regex_t *preg, int *scan, int matchmin)
1295 const char *save;
1297 int max = scan[2];
1298 int min = scan[3];
1300 save = preg->reginput;
1302 /* Have we reached min? */
1303 if (scan[4] < min) {
1304 /* No, so get another one */
1305 scan[4]++;
1306 if (regmatch(preg, scan + 5)) {
1307 return 1;
1309 scan[4]--;
1310 return 0;
1312 if (scan[4] > max) {
1313 return 0;
1316 if (matchmin) {
1317 /* minimal, so try other branch first */
1318 if (regmatch(preg, regnext(preg, scan))) {
1319 return 1;
1321 /* No, so try one more */
1322 scan[4]++;
1323 if (regmatch(preg, scan + 5)) {
1324 return 1;
1326 scan[4]--;
1327 return 0;
1329 /* maximal, so try this branch again */
1330 save = preg->reginput;
1331 if (scan[4] < max) {
1332 scan[4]++;
1333 if (regmatch(preg, scan + 5)) {
1334 return 1;
1336 scan[4]--;
1338 /* At this point we are at max with no match. Back up by one and try the other branch */
1339 preg->reginput = save;
1340 int ret = regmatch(preg, regnext(preg, scan));
1341 return ret;
1345 - regmatch - main matching routine
1347 * Conceptually the strategy is simple: check to see whether the current
1348 * node matches, call self recursively to see whether the rest matches,
1349 * and then act accordingly. In practice we make some effort to avoid
1350 * recursion, in particular by going through "ordinary" nodes (that don't
1351 * need to know whether the rest of the match failed) by a loop instead of
1352 * by recursion.
1354 /* 0 failure, 1 success */
1355 static int regmatch(regex_t *preg, const int *prog)
1357 const int *scan; /* Current node. */
1358 const int *next; /* Next node. */
1360 scan = prog;
1362 #ifdef DEBUG
1363 if (scan != NULL && regnarrate)
1364 fprintf(stderr, "%s(\n", regprop(scan));
1365 #endif
1366 while (scan != NULL) {
1367 int n;
1368 int c;
1369 #ifdef DEBUG
1370 if (regnarrate) {
1371 //fprintf(stderr, "%s...\n", regprop(scan));
1372 int op = OP(scan);
1373 fprintf(stderr, "%2d{%02x}%s...\n", (int)(scan-preg->program), op, regprop(scan)); /* Where, what. */
1375 #endif
1376 next = regnext(preg, scan);
1377 n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
1379 switch (OP(scan)) {
1380 case BOL:
1381 if (preg->reginput != preg->regbol)
1382 return(0);
1383 break;
1384 case EOL:
1385 if (!reg_iseol(preg, c)) {
1386 return(0);
1388 break;
1389 case WORDA:
1390 /* Must be looking at a letter, digit, or _ */
1391 if ((!isalnum(UCHAR(c))) && c != '_')
1392 return(0);
1393 /* Prev must be BOL or nonword */
1394 if (preg->reginput > preg->regbol &&
1395 (isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_'))
1396 return(0);
1397 break;
1398 case WORDZ:
1399 /* Can't match at BOL */
1400 if (preg->reginput > preg->regbol) {
1401 /* Current must be EOL or nonword */
1402 if (reg_iseol(preg, c) || !isalnum(UCHAR(c)) || c != '_') {
1403 c = preg->reginput[-1];
1404 /* Previous must be word */
1405 if (isalnum(UCHAR(c)) || c == '_') {
1406 break;
1410 /* No */
1411 return(0);
1413 case ANY:
1414 if (reg_iseol(preg, c))
1415 return 0;
1416 preg->reginput += n;
1417 break;
1418 case EXACTLY: {
1419 const int *opnd;
1420 int len;
1421 int slen;
1423 opnd = OPERAND(scan);
1424 len = str_int_len(opnd);
1426 slen = prefix_cmp(opnd, len, preg->reginput, preg->cflags & REG_ICASE);
1427 if (slen < 0) {
1428 return(0);
1430 preg->reginput += slen;
1432 break;
1433 case ANYOF:
1434 if (reg_iseol(preg, c) || reg_range_find(OPERAND(scan), c) == 0) {
1435 return(0);
1437 preg->reginput += n;
1438 break;
1439 case ANYBUT:
1440 if (reg_iseol(preg, c) || reg_range_find(OPERAND(scan), c) != 0) {
1441 return(0);
1443 preg->reginput += n;
1444 break;
1445 case NOTHING:
1446 break;
1447 case BACK:
1448 break;
1449 case BRANCH: {
1450 const char *save;
1452 if (OP(next) != BRANCH) /* No choice. */
1453 next = OPERAND(scan); /* Avoid recursion. */
1454 else {
1455 do {
1456 save = preg->reginput;
1457 if (regmatch(preg, OPERAND(scan))) {
1458 return(1);
1460 preg->reginput = save;
1461 scan = regnext(preg, scan);
1462 } while (scan != NULL && OP(scan) == BRANCH);
1463 return(0);
1464 /* NOTREACHED */
1467 break;
1468 case REP:
1469 case REPMIN:
1470 return regmatchsimplerepeat(preg, scan, OP(scan) == REPMIN);
1472 case REPX:
1473 case REPXMIN:
1474 return regmatchrepeat(preg, (int *)scan, OP(scan) == REPXMIN);
1476 case END:
1477 return(1); /* Success! */
1478 break;
1479 default:
1480 if (OP(scan) >= OPEN+1 && OP(scan) < CLOSE_END) {
1481 const char *save;
1483 save = preg->reginput;
1485 if (regmatch(preg, next)) {
1486 int no;
1488 * Don't set startp if some later
1489 * invocation of the same parentheses
1490 * already has.
1492 if (OP(scan) < CLOSE) {
1493 no = OP(scan) - OPEN;
1494 if (no < preg->nmatch && preg->pmatch[no].rm_so == -1) {
1495 preg->pmatch[no].rm_so = save - preg->start;
1498 else {
1499 no = OP(scan) - CLOSE;
1500 if (no < preg->nmatch && preg->pmatch[no].rm_eo == -1) {
1501 preg->pmatch[no].rm_eo = save - preg->start;
1504 return(1);
1505 } else
1506 return(0);
1508 return REG_ERR_INTERNAL;
1511 scan = next;
1515 * We get here only if there's trouble -- normally "case END" is
1516 * the terminating point.
1518 return REG_ERR_INTERNAL;
1522 - regrepeat - repeatedly match something simple, report how many
1524 static int regrepeat(regex_t *preg, const int *p, int max)
1526 int count = 0;
1527 const char *scan;
1528 const int *opnd;
1529 int ch;
1530 int n;
1532 scan = preg->reginput;
1533 opnd = OPERAND(p);
1534 switch (OP(p)) {
1535 case ANY:
1536 /* No need to handle utf8 specially here */
1537 while (!reg_iseol(preg, *scan) && count < max) {
1538 count++;
1539 scan++;
1541 break;
1542 case EXACTLY:
1543 while (count < max) {
1544 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1545 if (*opnd != ch) {
1546 break;
1548 count++;
1549 scan += n;
1551 break;
1552 case ANYOF:
1553 while (count < max) {
1554 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1555 if (reg_iseol(preg, ch) || reg_range_find(opnd, ch) == 0) {
1556 break;
1558 count++;
1559 scan += n;
1561 break;
1562 case ANYBUT:
1563 while (count < max) {
1564 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1565 if (reg_iseol(preg, ch) || reg_range_find(opnd, ch) != 0) {
1566 break;
1568 count++;
1569 scan += n;
1571 break;
1572 default: /* Oh dear. Called inappropriately. */
1573 preg->err = REG_ERR_INTERNAL;
1574 count = 0; /* Best compromise. */
1575 break;
1577 preg->reginput = scan;
1579 return(count);
1583 - regnext - dig the "next" pointer out of a node
1585 static const int *regnext(regex_t *preg, const int *p )
1587 int offset;
1589 if (p == &regdummy)
1590 return(NULL);
1592 offset = NEXT(p);
1594 if (offset == 0)
1595 return(NULL);
1597 if (OP(p) == BACK)
1598 return(p-offset);
1599 else
1600 return(p+offset);
1603 #ifdef DEBUG
1606 - regdump - dump a regexp onto stdout in vaguely comprehensible form
1608 static void regdump(regex_t *preg)
1610 const int *s;
1611 char op = EXACTLY; /* Arbitrary non-END op. */
1612 const int *next;
1613 char buf[4];
1615 if (preg->regcode == &regdummy)
1616 return;
1618 s = preg->program + 1;
1619 while (op != END && s < preg->regcode) { /* While that wasn't END last time... */
1620 op = OP(s);
1621 printf("%2d{%02x}%s", (int)(s-preg->program), op, regprop(s)); /* Where, what. */
1622 next = regnext(preg, s);
1623 if (next == NULL) /* Next ptr. */
1624 printf("(0)");
1625 else
1626 printf("(%d)", (int)((s-preg->program)+(next-s)));
1627 s += 2;
1628 if (op == REP || op == REPMIN || op == REPX || op == REPXMIN) {
1629 int max = s[0];
1630 int min = s[1];
1631 if (max == 65535) {
1632 printf("{%d,*}", min);
1634 else {
1635 printf("{%d,%d}", min, max);
1637 printf(" %d", s[2]);
1638 s += 3;
1640 else if (op == ANYOF || op == ANYBUT) {
1641 /* set of ranges */
1643 while (*s) {
1644 int len = *s++;
1645 int first = *s++;
1646 buf[utf8_fromunicode(buf, first)] = 0;
1647 printf("%s", buf);
1648 if (len > 1) {
1649 buf[utf8_fromunicode(buf, first + len - 1)] = 0;
1650 printf("-%s", buf);
1653 s++;
1655 else if (op == EXACTLY) {
1656 /* Literal string, where present. */
1658 while (*s) {
1659 buf[utf8_fromunicode(buf, *s)] = 0;
1660 printf("%s", buf);
1661 s++;
1663 s++;
1665 putchar('\n');
1668 if (op == END) {
1669 /* Header fields of interest. */
1670 if (preg->regstart) {
1671 buf[utf8_fromunicode(buf, preg->regstart)] = 0;
1672 printf("start '%s' ", buf);
1674 if (preg->reganch)
1675 printf("anchored ");
1676 if (preg->regmust != NULL) {
1677 int i;
1678 printf("must have:");
1679 for (i = 0; i < preg->regmlen; i++) {
1680 putchar(preg->regmust[i]);
1682 putchar('\n');
1685 printf("\n");
1689 - regprop - printable representation of opcode
1691 static const char *regprop( const int *op )
1693 char *p;
1694 static char buf[50];
1696 (void) strcpy(buf, ":");
1698 switch (OP(op)) {
1699 case BOL:
1700 p = "BOL";
1701 break;
1702 case EOL:
1703 p = "EOL";
1704 break;
1705 case ANY:
1706 p = "ANY";
1707 break;
1708 case ANYOF:
1709 p = "ANYOF";
1710 break;
1711 case ANYBUT:
1712 p = "ANYBUT";
1713 break;
1714 case BRANCH:
1715 p = "BRANCH";
1716 break;
1717 case EXACTLY:
1718 p = "EXACTLY";
1719 break;
1720 case NOTHING:
1721 p = "NOTHING";
1722 break;
1723 case BACK:
1724 p = "BACK";
1725 break;
1726 case END:
1727 p = "END";
1728 break;
1729 case REP:
1730 p = "REP";
1731 break;
1732 case REPMIN:
1733 p = "REPMIN";
1734 break;
1735 case REPX:
1736 p = "REPX";
1737 break;
1738 case REPXMIN:
1739 p = "REPXMIN";
1740 break;
1741 case WORDA:
1742 p = "WORDA";
1743 break;
1744 case WORDZ:
1745 p = "WORDZ";
1746 break;
1747 default:
1748 if (OP(op) >= OPEN && OP(op) < CLOSE) {
1749 sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
1751 else if (OP(op) >= CLOSE && OP(op) < CLOSE_END) {
1752 sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
1754 else {
1755 abort();
1757 p = NULL;
1758 break;
1760 if (p != NULL)
1761 (void) strcat(buf, p);
1762 return(buf);
1764 #endif
1766 size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
1768 static const char *error_strings[] = {
1769 "success",
1770 "no match",
1771 "bad pattern",
1772 "null argument",
1773 "unknown error",
1774 "too big",
1775 "out of memory",
1776 "too many ()",
1777 "parentheses () not balanced",
1778 "braces {} not balanced",
1779 "invalid repetition count(s)",
1780 "extra characters",
1781 "*+ of empty atom",
1782 "nested count",
1783 "internal error",
1784 "count follows nothing",
1785 "trailing backslash",
1786 "corrupted program",
1787 "contains null char",
1789 const char *err;
1791 if (errcode < 0 || errcode >= REG_ERR_NUM) {
1792 err = "Bad error code";
1794 else {
1795 err = error_strings[errcode];
1798 return snprintf(errbuf, errbuf_size, "%s", err);
1801 void regfree(regex_t *preg)
1803 free(preg->program);
1806 #endif