usbmodeswitch: Updated to v.1.2.6 from shibby's branch.
[tomato.git] / release / src / router / usbmodeswitch / jim / jimregexp.c
blobc652ad4d32ec73c26ace20ead2a10a0344a36365
1 /*
2 * regcomp and regexec -- regsub and regerror are elsewhere
4 * Copyright (c) 1986 by University of Toronto.
5 * Written by Henry Spencer. Not derived from licensed software.
7 * Permission is granted to anyone to use this software for any
8 * purpose on any computer system, and to redistribute it freely,
9 * subject to the following restrictions:
11 * 1. The author is not responsible for the consequences of use of
12 * this software, no matter how awful, even if they arise
13 * from defects in it.
15 * 2. The origin of this software must not be misrepresented, either
16 * by explicit claim or by omission.
18 * 3. Altered versions must be plainly marked as such, and must not
19 * be misrepresented as being the original software.
20 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
21 *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to |
22 *** to assist in implementing egrep.
23 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
24 *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
25 *** as in BSD grep and ex.
26 *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
27 *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
28 *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods,
29 *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
30 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
31 *** seiwald@vix.com, on 28 August 1993, for use in jam. Regmagic.h
32 *** was moved into regexp.h, and the include of regexp.h now uses "'s
33 *** to avoid conflicting with the system regexp.h. Const, bless its
34 *** soul, was removed so it can compile everywhere. The declaration
35 *** of strchr() was in conflict on AIX, so it was removed (as it is
36 *** happily defined in string.h).
37 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
38 *** seiwald@perforce.com, on 20 January 2000, to use function prototypes.
39 *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
40 *** seiwald@perforce.com, on 05 November 2002, to const string literals.
42 * THIS IS AN ALTERED VERSION. It was altered by Steve Bennett <steveb@workware.net.au>
43 * on 16 October 2010, to remove static state and add better Tcl ARE compatibility.
44 * This includes counted repetitions, UTF-8 support, character classes,
45 * shorthand character classes, increased number of parentheses to 100,
46 * backslash escape sequences. It also removes \n as an alternative to |.
48 * Beware that some of this code is subtly aware of the way operator
49 * precedence is structured in regular expressions. Serious changes in
50 * regular-expression syntax might require a total rethink.
52 #include <stdio.h>
53 #include <ctype.h>
54 #include <stdlib.h>
55 #include <string.h>
57 #include "jim.h"
58 #include "jimautoconf.h"
59 #include "jimregexp.h"
60 #include "utf8.h"
62 #if !defined(HAVE_REGCOMP) || defined(JIM_REGEXP)
65 * Structure for regexp "program". This is essentially a linear encoding
66 * of a nondeterministic finite-state machine (aka syntax charts or
67 * "railroad normal form" in parsing technology). Each node is an opcode
68 * plus a "next" pointer, possibly plus an operand. "Next" pointers of
69 * all nodes except BRANCH implement concatenation; a "next" pointer with
70 * a BRANCH on both ends of it is connecting two alternatives. (Here we
71 * have one of the subtle syntax dependencies: an individual BRANCH (as
72 * opposed to a collection of them) is never concatenated with anything
73 * because of operator precedence.) The operand of some types of node is
74 * a literal string; for others, it is a node leading into a sub-FSM. In
75 * particular, the operand of a BRANCH node is the first node of the branch.
76 * (NB this is *not* a tree structure: the tail of the branch connects
77 * to the thing following the set of BRANCHes.) The opcodes are:
80 /* This *MUST* be less than (255-20)/2=117 */
81 #define REG_MAX_PAREN 100
83 /* definition number opnd? meaning */
84 #define END 0 /* no End of program. */
85 #define BOL 1 /* no Match "" at beginning of line. */
86 #define EOL 2 /* no Match "" at end of line. */
87 #define ANY 3 /* no Match any one character. */
88 #define ANYOF 4 /* str Match any character in this string. */
89 #define ANYBUT 5 /* str Match any character not in this string. */
90 #define BRANCH 6 /* node Match this alternative, or the next... */
91 #define BACK 7 /* no Match "", "next" ptr points backward. */
92 #define EXACTLY 8 /* str Match this string. */
93 #define NOTHING 9 /* no Match empty string. */
94 #define REP 10 /* max,min Match this (simple) thing [min,max] times. */
95 #define REPMIN 11 /* max,min Match this (simple) thing [min,max] times, mininal match. */
96 #define REPX 12 /* max,min Match this (complex) thing [min,max] times. */
97 #define REPXMIN 13 /* max,min Match this (complex) thing [min,max] times, minimal match. */
99 #define WORDA 15 /* no Match "" at wordchar, where prev is nonword */
100 #define WORDZ 16 /* no Match "" at nonwordchar, where prev is word */
101 #define OPEN 20 /* no Mark this point in input as start of #n. */
102 /* OPEN+1 is number 1, etc. */
103 #define CLOSE (OPEN+REG_MAX_PAREN) /* no Analogous to OPEN. */
104 #define CLOSE_END (CLOSE+REG_MAX_PAREN)
107 * The first byte of the regexp internal "program" is actually this magic
108 * number; the start node begins in the second byte.
110 #define REG_MAGIC 0xFADED00D
113 * Opcode notes:
115 * BRANCH The set of branches constituting a single choice are hooked
116 * together with their "next" pointers, since precedence prevents
117 * anything being concatenated to any individual branch. The
118 * "next" pointer of the last BRANCH in a choice points to the
119 * thing following the whole choice. This is also where the
120 * final "next" pointer of each individual branch points; each
121 * branch starts with the operand node of a BRANCH node.
123 * BACK Normal "next" pointers all implicitly point forward; BACK
124 * exists to make loop structures possible.
126 * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
127 * BRANCH structures using BACK. Simple cases (one character
128 * per match) are implemented with STAR and PLUS for speed
129 * and to minimize recursive plunges.
131 * OPEN,CLOSE ...are numbered at compile time.
135 * A node is one char of opcode followed by two chars of "next" pointer.
136 * "Next" pointers are stored as two 8-bit pieces, high order first. The
137 * value is a positive offset from the opcode of the node containing it.
138 * An operand, if any, simply follows the node. (Note that much of the
139 * code generation knows about this implicit relationship.)
141 * Using two bytes for the "next" pointer is vast overkill for most things,
142 * but allows patterns to get big without disasters.
144 #define OP(preg, p) (preg->program[p])
145 #define NEXT(preg, p) (preg->program[p + 1])
146 #define OPERAND(p) ((p) + 2)
149 * See regmagic.h for one further detail of program structure.
154 * Utility definitions.
157 #define FAIL(R,M) { (R)->err = (M); return (M); }
158 #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?' || (c) == '{')
159 #define META "^$.[()|?{+*"
162 * Flags to be passed up and down.
164 #define HASWIDTH 01 /* Known never to match null string. */
165 #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
166 #define SPSTART 04 /* Starts with * or +. */
167 #define WORST 0 /* Worst case. */
169 #define MAX_REP_COUNT 1000000
172 * Forward declarations for regcomp()'s friends.
174 static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp );
175 static int regpiece(regex_t *preg, int *flagp );
176 static int regbranch(regex_t *preg, int *flagp );
177 static int regatom(regex_t *preg, int *flagp );
178 static int regnode(regex_t *preg, int op );
179 static int regnext(regex_t *preg, int p );
180 static void regc(regex_t *preg, int b );
181 static int reginsert(regex_t *preg, int op, int size, int opnd );
182 static void regtail_(regex_t *preg, int p, int val, int line );
183 static void regoptail(regex_t *preg, int p, int val );
184 #define regtail(PREG, P, VAL) regtail_(PREG, P, VAL, __LINE__)
186 static int reg_range_find(const int *string, int c);
187 static const char *str_find(const char *string, int c, int nocase);
188 static int prefix_cmp(const int *prog, int proglen, const char *string, int nocase);
190 /*#define DEBUG*/
191 #ifdef DEBUG
192 int regnarrate = 0;
193 static void regdump(regex_t *preg);
194 static const char *regprop( int op );
195 #endif
199 * Returns the length of the null-terminated integer sequence.
201 static int str_int_len(const int *seq)
203 int n = 0;
204 while (*seq++) {
205 n++;
207 return n;
211 - regcomp - compile a regular expression into internal code
213 * We can't allocate space until we know how big the compiled form will be,
214 * but we can't compile it (and thus know how big it is) until we've got a
215 * place to put the code. So we cheat: we compile it twice, once with code
216 * generation turned off and size counting turned on, and once "for real".
217 * This also means that we don't allocate space until we are sure that the
218 * thing really will compile successfully, and we never have to move the
219 * code and thus invalidate pointers into it. (Note that it has to be in
220 * one piece because free() must be able to free it all.)
222 * Beware that the optimization-preparation code in here knows about some
223 * of the structure of the compiled regexp.
225 int regcomp(regex_t *preg, const char *exp, int cflags)
227 int scan;
228 int longest;
229 unsigned len;
230 int flags;
232 #ifdef DEBUG
233 fprintf(stderr, "Compiling: '%s'\n", exp);
234 #endif
235 memset(preg, 0, sizeof(*preg));
237 if (exp == NULL)
238 FAIL(preg, REG_ERR_NULL_ARGUMENT);
240 /* First pass: determine size, legality. */
241 preg->cflags = cflags;
242 preg->regparse = exp;
243 /* XXX: For now, start unallocated */
244 preg->program = NULL;
245 preg->proglen = 0;
247 #if 1
248 /* Allocate space. */
249 preg->proglen = (strlen(exp) + 1) * 5;
250 preg->program = malloc(preg->proglen * sizeof(int));
251 if (preg->program == NULL)
252 FAIL(preg, REG_ERR_NOMEM);
253 #endif
255 /* Note that since we store a magic value as the first item in the program,
256 * program offsets will never be 0
258 regc(preg, REG_MAGIC);
259 if (reg(preg, 0, &flags) == 0) {
260 return preg->err;
263 /* Small enough for pointer-storage convention? */
264 if (preg->re_nsub >= REG_MAX_PAREN) /* Probably could be 65535L. */
265 FAIL(preg,REG_ERR_TOO_BIG);
267 /* Dig out information for optimizations. */
268 preg->regstart = 0; /* Worst-case defaults. */
269 preg->reganch = 0;
270 preg->regmust = 0;
271 preg->regmlen = 0;
272 scan = 1; /* First BRANCH. */
273 if (OP(preg, regnext(preg, scan)) == END) { /* Only one top-level choice. */
274 scan = OPERAND(scan);
276 /* Starting-point info. */
277 if (OP(preg, scan) == EXACTLY) {
278 preg->regstart = preg->program[OPERAND(scan)];
280 else if (OP(preg, scan) == BOL)
281 preg->reganch++;
284 * If there's something expensive in the r.e., find the
285 * longest literal string that must appear and make it the
286 * regmust. Resolve ties in favor of later strings, since
287 * the regstart check works with the beginning of the r.e.
288 * and avoiding duplication strengthens checking. Not a
289 * strong reason, but sufficient in the absence of others.
291 if (flags&SPSTART) {
292 longest = 0;
293 len = 0;
294 for (; scan != 0; scan = regnext(preg, scan)) {
295 if (OP(preg, scan) == EXACTLY) {
296 int plen = str_int_len(preg->program + OPERAND(scan));
297 if (plen >= len) {
298 longest = OPERAND(scan);
299 len = plen;
303 preg->regmust = longest;
304 preg->regmlen = len;
308 #ifdef DEBUG
309 regdump(preg);
310 #endif
312 return 0;
316 - reg - regular expression, i.e. main body or parenthesized thing
318 * Caller must absorb opening parenthesis.
320 * Combining parenthesis handling with the base level of regular expression
321 * is a trifle forced, but the need to tie the tails of the branches to what
322 * follows makes it hard to avoid.
324 static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp )
326 int ret;
327 int br;
328 int ender;
329 int parno = 0;
330 int flags;
332 *flagp = HASWIDTH; /* Tentatively. */
334 /* Make an OPEN node, if parenthesized. */
335 if (paren) {
336 parno = ++preg->re_nsub;
337 ret = regnode(preg, OPEN+parno);
338 } else
339 ret = 0;
341 /* Pick up the branches, linking them together. */
342 br = regbranch(preg, &flags);
343 if (br == 0)
344 return 0;
345 if (ret != 0)
346 regtail(preg, ret, br); /* OPEN -> first. */
347 else
348 ret = br;
349 if (!(flags&HASWIDTH))
350 *flagp &= ~HASWIDTH;
351 *flagp |= flags&SPSTART;
352 while (*preg->regparse == '|') {
353 preg->regparse++;
354 br = regbranch(preg, &flags);
355 if (br == 0)
356 return 0;
357 regtail(preg, ret, br); /* BRANCH -> BRANCH. */
358 if (!(flags&HASWIDTH))
359 *flagp &= ~HASWIDTH;
360 *flagp |= flags&SPSTART;
363 /* Make a closing node, and hook it on the end. */
364 ender = regnode(preg, (paren) ? CLOSE+parno : END);
365 regtail(preg, ret, ender);
367 /* Hook the tails of the branches to the closing node. */
368 for (br = ret; br != 0; br = regnext(preg, br))
369 regoptail(preg, br, ender);
371 /* Check for proper termination. */
372 if (paren && *preg->regparse++ != ')') {
373 preg->err = REG_ERR_UNMATCHED_PAREN;
374 return 0;
375 } else if (!paren && *preg->regparse != '\0') {
376 if (*preg->regparse == ')') {
377 preg->err = REG_ERR_UNMATCHED_PAREN;
378 return 0;
379 } else {
380 preg->err = REG_ERR_JUNK_ON_END;
381 return 0;
385 return(ret);
389 - regbranch - one alternative of an | operator
391 * Implements the concatenation operator.
393 static int regbranch(regex_t *preg, int *flagp )
395 int ret;
396 int chain;
397 int latest;
398 int flags;
400 *flagp = WORST; /* Tentatively. */
402 ret = regnode(preg, BRANCH);
403 chain = 0;
404 while (*preg->regparse != '\0' && *preg->regparse != ')' &&
405 *preg->regparse != '|') {
406 latest = regpiece(preg, &flags);
407 if (latest == 0)
408 return 0;
409 *flagp |= flags&HASWIDTH;
410 if (chain == 0) {/* First piece. */
411 *flagp |= flags&SPSTART;
413 else {
414 regtail(preg, chain, latest);
416 chain = latest;
418 if (chain == 0) /* Loop ran zero times. */
419 (void) regnode(preg, NOTHING);
421 return(ret);
425 - regpiece - something followed by possible [*+?]
427 * Note that the branching code sequences used for ? and the general cases
428 * of * and + are somewhat optimized: they use the same NOTHING node as
429 * both the endmarker for their branch list and the body of the last branch.
430 * It might seem that this node could be dispensed with entirely, but the
431 * endmarker role is not redundant.
433 static int regpiece(regex_t *preg, int *flagp)
435 int ret;
436 char op;
437 int next;
438 int flags;
439 int chain = 0;
440 int min;
441 int max;
443 ret = regatom(preg, &flags);
444 if (ret == 0)
445 return 0;
447 op = *preg->regparse;
448 if (!ISMULT(op)) {
449 *flagp = flags;
450 return(ret);
453 if (!(flags&HASWIDTH) && op != '?') {
454 preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
455 return 0;
458 /* Handle braces (counted repetition) by expansion */
459 if (op == '{') {
460 char *end;
462 min = strtoul(preg->regparse + 1, &end, 10);
463 if (end == preg->regparse + 1) {
464 preg->err = REG_ERR_BAD_COUNT;
465 return 0;
467 if (*end == '}') {
468 max = min;
470 else {
471 preg->regparse = end;
472 max = strtoul(preg->regparse + 1, &end, 10);
473 if (*end != '}') {
474 preg->err = REG_ERR_UNMATCHED_BRACES;
475 return 0;
478 if (end == preg->regparse + 1) {
479 max = MAX_REP_COUNT;
481 else if (max < min || max >= 100) {
482 preg->err = REG_ERR_BAD_COUNT;
483 return 0;
485 if (min >= 100) {
486 preg->err = REG_ERR_BAD_COUNT;
487 return 0;
490 preg->regparse = strchr(preg->regparse, '}');
492 else {
493 min = (op == '+');
494 max = (op == '?' ? 1 : MAX_REP_COUNT);
497 if (preg->regparse[1] == '?') {
498 preg->regparse++;
499 next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret);
501 else {
502 next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret);
504 preg->program[ret + 2] = max;
505 preg->program[ret + 3] = min;
506 preg->program[ret + 4] = 0;
508 *flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART);
510 if (!(flags & SIMPLE)) {
511 int back = regnode(preg, BACK);
512 regtail(preg, back, ret);
513 regtail(preg, next, back);
516 preg->regparse++;
517 if (ISMULT(*preg->regparse)) {
518 preg->err = REG_ERR_NESTED_COUNT;
519 return 0;
522 return chain ? chain : ret;
526 * Add all characters in the inclusive range between lower and upper.
528 * Handles a swapped range (upper < lower).
530 static void reg_addrange(regex_t *preg, int lower, int upper)
532 if (lower > upper) {
533 reg_addrange(preg, upper, lower);
535 /* Add a range as length, start */
536 regc(preg, upper - lower + 1);
537 regc(preg, lower);
541 * Add a null-terminated literal string as a set of ranges.
543 static void reg_addrange_str(regex_t *preg, const char *str)
545 while (*str) {
546 reg_addrange(preg, *str, *str);
547 str++;
552 * Extracts the next unicode char from utf8.
554 * If 'upper' is set, converts the char to uppercase.
556 static int reg_utf8_tounicode_case(const char *s, int *uc, int upper)
558 int l = utf8_tounicode(s, uc);
559 if (upper) {
560 *uc = utf8_upper(*uc);
562 return l;
566 * Converts a hex digit to decimal.
568 * Returns -1 for an invalid hex digit.
570 static int hexdigitval(int c)
572 if (c >= '0' && c <= '9')
573 return c - '0';
574 if (c >= 'a' && c <= 'f')
575 return c - 'a' + 10;
576 if (c >= 'A' && c <= 'F')
577 return c - 'A' + 10;
578 return -1;
582 * Parses up to 'n' hex digits at 's' and stores the result in *uc.
584 * Returns the number of hex digits parsed.
585 * If there are no hex digits, returns 0 and stores nothing.
587 static int parse_hex(const char *s, int n, int *uc)
589 int val = 0;
590 int k;
592 for (k = 0; k < n; k++) {
593 int c = hexdigitval(*s++);
594 if (c == -1) {
595 break;
597 val = (val << 4) | c;
599 if (k) {
600 *uc = val;
602 return k;
606 * Call for chars after a backlash to decode the escape sequence.
608 * Stores the result in *ch.
610 * Returns the number of bytes consumed.
612 static int reg_decode_escape(const char *s, int *ch)
614 int n;
615 const char *s0 = s;
617 *ch = *s++;
619 switch (*ch) {
620 case 'b': *ch = '\b'; break;
621 case 'e': *ch = 27; break;
622 case 'f': *ch = '\f'; break;
623 case 'n': *ch = '\n'; break;
624 case 'r': *ch = '\r'; break;
625 case 't': *ch = '\t'; break;
626 case 'v': *ch = '\v'; break;
627 case 'u':
628 if ((n = parse_hex(s, 4, ch)) > 0) {
629 s += n;
631 break;
632 case 'x':
633 if ((n = parse_hex(s, 2, ch)) > 0) {
634 s += n;
636 break;
637 case '\0':
638 s--;
639 *ch = '\\';
640 break;
642 return s - s0;
646 - regatom - the lowest level
648 * Optimization: gobbles an entire sequence of ordinary characters so that
649 * it can turn them into a single node, which is smaller to store and
650 * faster to run. Backslashed characters are exceptions, each becoming a
651 * separate node; the code is simpler that way and it's not worth fixing.
653 static int regatom(regex_t *preg, int *flagp)
655 int ret;
656 int flags;
657 int nocase = (preg->cflags & REG_ICASE);
659 int ch;
660 int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase);
662 *flagp = WORST; /* Tentatively. */
664 preg->regparse += n;
665 switch (ch) {
666 /* FIXME: these chars only have meaning at beg/end of pat? */
667 case '^':
668 ret = regnode(preg, BOL);
669 break;
670 case '$':
671 ret = regnode(preg, EOL);
672 break;
673 case '.':
674 ret = regnode(preg, ANY);
675 *flagp |= HASWIDTH|SIMPLE;
676 break;
677 case '[': {
678 const char *pattern = preg->regparse;
680 if (*pattern == '^') { /* Complement of range. */
681 ret = regnode(preg, ANYBUT);
682 pattern++;
683 } else
684 ret = regnode(preg, ANYOF);
686 /* Special case. If the first char is ']' or '-', it is part of the set */
687 if (*pattern == ']' || *pattern == '-') {
688 reg_addrange(preg, *pattern, *pattern);
689 pattern++;
692 while (*pattern && *pattern != ']') {
693 /* Is this a range? a-z */
694 int start;
695 int end;
697 pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
698 if (start == '\\') {
699 pattern += reg_decode_escape(pattern, &start);
700 if (start == 0) {
701 preg->err = REG_ERR_NULL_CHAR;
702 return 0;
705 if (pattern[0] == '-' && pattern[1]) {
706 /* skip '-' */
707 pattern += utf8_tounicode(pattern, &end);
708 pattern += reg_utf8_tounicode_case(pattern, &end, nocase);
709 if (end == '\\') {
710 pattern += reg_decode_escape(pattern, &end);
711 if (end == 0) {
712 preg->err = REG_ERR_NULL_CHAR;
713 return 0;
717 reg_addrange(preg, start, end);
718 continue;
720 if (start == '[') {
721 if (strncmp(pattern, ":alpha:]", 8) == 0) {
722 if ((preg->cflags & REG_ICASE) == 0) {
723 reg_addrange(preg, 'a', 'z');
725 reg_addrange(preg, 'A', 'Z');
726 pattern += 8;
727 continue;
729 if (strncmp(pattern, ":alnum:]", 8) == 0) {
730 if ((preg->cflags & REG_ICASE) == 0) {
731 reg_addrange(preg, 'a', 'z');
733 reg_addrange(preg, 'A', 'Z');
734 reg_addrange(preg, '0', '9');
735 pattern += 8;
736 continue;
738 if (strncmp(pattern, ":space:]", 8) == 0) {
739 reg_addrange_str(preg, " \t\r\n\f\v");
740 pattern += 8;
741 continue;
744 /* Not a range, so just add the char */
745 reg_addrange(preg, start, start);
747 regc(preg, '\0');
749 if (*pattern) {
750 pattern++;
752 preg->regparse = pattern;
754 *flagp |= HASWIDTH|SIMPLE;
756 break;
757 case '(':
758 ret = reg(preg, 1, &flags);
759 if (ret == 0)
760 return 0;
761 *flagp |= flags&(HASWIDTH|SPSTART);
762 break;
763 case '\0':
764 case '|':
765 case ')':
766 preg->err = REG_ERR_INTERNAL;
767 return 0; /* Supposed to be caught earlier. */
768 case '?':
769 case '+':
770 case '*':
771 case '{':
772 preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
773 return 0;
774 case '\\':
775 switch (*preg->regparse++) {
776 case '\0':
777 preg->err = REG_ERR_TRAILING_BACKSLASH;
778 return 0;
779 case '<':
780 case 'm':
781 ret = regnode(preg, WORDA);
782 break;
783 case '>':
784 case 'M':
785 ret = regnode(preg, WORDZ);
786 break;
787 case 'd':
788 ret = regnode(preg, ANYOF);
789 reg_addrange(preg, '0', '9');
790 regc(preg, '\0');
791 *flagp |= HASWIDTH|SIMPLE;
792 break;
793 case 'w':
794 ret = regnode(preg, ANYOF);
795 if ((preg->cflags & REG_ICASE) == 0) {
796 reg_addrange(preg, 'a', 'z');
798 reg_addrange(preg, 'A', 'Z');
799 reg_addrange(preg, '0', '9');
800 reg_addrange(preg, '_', '_');
801 regc(preg, '\0');
802 *flagp |= HASWIDTH|SIMPLE;
803 break;
804 case 's':
805 ret = regnode(preg, ANYOF);
806 reg_addrange_str(preg," \t\r\n\f\v");
807 regc(preg, '\0');
808 *flagp |= HASWIDTH|SIMPLE;
809 break;
810 /* FIXME: Someday handle \1, \2, ... */
811 default:
812 /* Handle general quoted chars in exact-match routine */
813 /* Back up to include the backslash */
814 preg->regparse--;
815 goto de_fault;
817 break;
818 de_fault:
819 default: {
821 * Encode a string of characters to be matched exactly.
823 int added = 0;
825 /* Back up to pick up the first char of interest */
826 preg->regparse -= n;
828 ret = regnode(preg, EXACTLY);
830 /* Note that a META operator such as ? or * consumes the
831 * preceding char.
832 * Thus we must be careful to look ahead by 2 and add the
833 * last char as it's own EXACTLY if necessary
836 /* Until end of string or a META char is reached */
837 while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
838 n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
839 if (ch == '\\' && preg->regparse[n]) {
840 /* Non-trailing backslash.
841 * Is this a special escape, or a regular escape?
843 if (strchr("<>mMwds", preg->regparse[n])) {
844 /* A special escape. All done with EXACTLY */
845 break;
847 /* Decode it. Note that we add the length for the escape
848 * sequence to the length for the backlash so we can skip
849 * the entire sequence, or not as required.
851 n += reg_decode_escape(preg->regparse + n, &ch);
852 if (ch == 0) {
853 preg->err = REG_ERR_NULL_CHAR;
854 return 0;
858 /* Now we have one char 'ch' of length 'n'.
859 * Check to see if the following char is a MULT
862 if (ISMULT(preg->regparse[n])) {
863 /* Yes. But do we already have some EXACTLY chars? */
864 if (added) {
865 /* Yes, so return what we have and pick up the current char next time around */
866 break;
868 /* No, so add this single char and finish */
869 regc(preg, ch);
870 added++;
871 preg->regparse += n;
872 break;
875 /* No, so just add this char normally */
876 regc(preg, ch);
877 added++;
878 preg->regparse += n;
880 regc(preg, '\0');
882 *flagp |= HASWIDTH;
883 if (added == 1)
884 *flagp |= SIMPLE;
885 break;
887 break;
890 return(ret);
893 static void reg_grow(regex_t *preg, int n)
895 if (preg->p + n >= preg->proglen) {
896 preg->proglen = (preg->p + n) * 2;
897 preg->program = realloc(preg->program, preg->proglen * sizeof(int));
902 - regnode - emit a node
904 /* Location. */
905 static int regnode(regex_t *preg, int op)
907 reg_grow(preg, 2);
909 preg->program[preg->p++] = op;
910 preg->program[preg->p++] = 0;
912 /* Return the start of the node */
913 return preg->p - 2;
917 - regc - emit (if appropriate) a byte of code
919 static void regc(regex_t *preg, int b )
921 reg_grow(preg, 1);
922 preg->program[preg->p++] = b;
926 - reginsert - insert an operator in front of already-emitted operand
928 * Means relocating the operand.
929 * Returns the new location of the original operand.
931 static int reginsert(regex_t *preg, int op, int size, int opnd )
933 reg_grow(preg, size);
935 /* Move everything from opnd up */
936 memmove(preg->program + opnd + size, preg->program + opnd, sizeof(int) * (preg->p - opnd));
937 /* Zero out the new space */
938 memset(preg->program + opnd, 0, sizeof(int) * size);
940 preg->program[opnd] = op;
942 preg->p += size;
944 return opnd + size;
948 - regtail - set the next-pointer at the end of a node chain
950 static void regtail_(regex_t *preg, int p, int val, int line )
952 int scan;
953 int temp;
954 int offset;
956 /* Find last node. */
957 scan = p;
958 for (;;) {
959 temp = regnext(preg, scan);
960 if (temp == 0)
961 break;
962 scan = temp;
965 if (OP(preg, scan) == BACK)
966 offset = scan - val;
967 else
968 offset = val - scan;
970 preg->program[scan + 1] = offset;
974 - regoptail - regtail on operand of first argument; nop if operandless
977 static void regoptail(regex_t *preg, int p, int val )
979 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
980 if (p != 0 && OP(preg, p) == BRANCH) {
981 regtail(preg, OPERAND(p), val);
986 * regexec and friends
990 * Forwards.
992 static int regtry(regex_t *preg, const char *string );
993 static int regmatch(regex_t *preg, int prog);
994 static int regrepeat(regex_t *preg, int p, int max);
997 - regexec - match a regexp against a string
999 int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
1001 const char *s;
1002 int scan;
1004 /* Be paranoid... */
1005 if (preg == NULL || preg->program == NULL || string == NULL) {
1006 return REG_ERR_NULL_ARGUMENT;
1009 /* Check validity of program. */
1010 if (*preg->program != REG_MAGIC) {
1011 return REG_ERR_CORRUPTED;
1014 #ifdef DEBUG
1015 fprintf(stderr, "regexec: %s\n", string);
1016 regdump(preg);
1017 #endif
1019 preg->eflags = eflags;
1020 preg->pmatch = pmatch;
1021 preg->nmatch = nmatch;
1022 preg->start = string; /* All offsets are computed from here */
1024 /* Must clear out the embedded repeat counts */
1025 for (scan = OPERAND(1); scan != 0; scan = regnext(preg, scan)) {
1026 switch (OP(preg, scan)) {
1027 case REP:
1028 case REPMIN:
1029 case REPX:
1030 case REPXMIN:
1031 preg->program[scan + 4] = 0;
1032 break;
1036 /* If there is a "must appear" string, look for it. */
1037 if (preg->regmust != 0) {
1038 s = string;
1039 while ((s = str_find(s, preg->program[preg->regmust], preg->cflags & REG_ICASE)) != NULL) {
1040 if (prefix_cmp(preg->program + preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) {
1041 break;
1043 s++;
1045 if (s == NULL) /* Not present. */
1046 return REG_NOMATCH;
1049 /* Mark beginning of line for ^ . */
1050 preg->regbol = string;
1052 /* Simplest case: anchored match need be tried only once (maybe per line). */
1053 if (preg->reganch) {
1054 if (eflags & REG_NOTBOL) {
1055 /* This is an anchored search, but not an BOL, so possibly skip to the next line */
1056 goto nextline;
1058 while (1) {
1059 int ret = regtry(preg, string);
1060 if (ret) {
1061 return REG_NOERROR;
1063 if (*string) {
1064 nextline:
1065 if (preg->cflags & REG_NEWLINE) {
1066 /* Try the next anchor? */
1067 string = strchr(string, '\n');
1068 if (string) {
1069 preg->regbol = ++string;
1070 continue;
1074 return REG_NOMATCH;
1078 /* Messy cases: unanchored match. */
1079 s = string;
1080 if (preg->regstart != '\0') {
1081 /* We know what char it must start with. */
1082 while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) {
1083 if (regtry(preg, s))
1084 return REG_NOERROR;
1085 s++;
1088 else
1089 /* We don't -- general case. */
1090 while (1) {
1091 if (regtry(preg, s))
1092 return REG_NOERROR;
1093 if (*s == '\0') {
1094 break;
1096 s += utf8_charlen(*s);
1099 /* Failure. */
1100 return REG_NOMATCH;
1104 - regtry - try match at specific point
1106 /* 0 failure, 1 success */
1107 static int regtry( regex_t *preg, const char *string )
1109 int i;
1111 preg->reginput = string;
1113 for (i = 0; i < preg->nmatch; i++) {
1114 preg->pmatch[i].rm_so = -1;
1115 preg->pmatch[i].rm_eo = -1;
1117 if (regmatch(preg, 1)) {
1118 preg->pmatch[0].rm_so = string - preg->start;
1119 preg->pmatch[0].rm_eo = preg->reginput - preg->start;
1120 return(1);
1121 } else
1122 return(0);
1126 * Returns bytes matched if 'pattern' is a prefix of 'string'.
1128 * If 'nocase' is non-zero, does a case-insensitive match.
1130 * Returns -1 on not found.
1132 static int prefix_cmp(const int *prog, int proglen, const char *string, int nocase)
1134 const char *s = string;
1135 while (proglen && *s) {
1136 int ch;
1137 int n = reg_utf8_tounicode_case(s, &ch, nocase);
1138 if (ch != *prog) {
1139 return -1;
1141 prog++;
1142 s += n;
1143 proglen--;
1145 if (proglen == 0) {
1146 return s - string;
1148 return -1;
1152 * Searchs for 'c' in the range 'range'.
1154 * Returns 1 if found, or 0 if not.
1156 static int reg_range_find(const int *range, int c)
1158 while (*range) {
1159 /*printf("Checking %d in range [%d,%d]\n", c, range[1], (range[0] + range[1] - 1));*/
1160 if (c >= range[1] && c <= (range[0] + range[1] - 1)) {
1161 return 1;
1163 range += 2;
1165 return 0;
1169 * Search for the character 'c' in the utf-8 string 'string'.
1171 * If 'nocase' is set, the 'string' is assumed to be uppercase
1172 * and 'c' is converted to uppercase before matching.
1174 * Returns the byte position in the string where the 'c' was found, or
1175 * NULL if not found.
1177 static const char *str_find(const char *string, int c, int nocase)
1179 if (nocase) {
1180 /* The "string" should already be converted to uppercase */
1181 c = utf8_upper(c);
1183 while (*string) {
1184 int ch;
1185 int n = reg_utf8_tounicode_case(string, &ch, nocase);
1186 if (c == ch) {
1187 return string;
1189 string += n;
1191 return NULL;
1195 * Returns true if 'ch' is an end-of-line char.
1197 * In REG_NEWLINE mode, \n is considered EOL in
1198 * addition to \0
1200 static int reg_iseol(regex_t *preg, int ch)
1202 if (preg->cflags & REG_NEWLINE) {
1203 return ch == '\0' || ch == '\n';
1205 else {
1206 return ch == '\0';
1210 static int regmatchsimplerepeat(regex_t *preg, int scan, int matchmin)
1212 int nextch = '\0';
1213 const char *save;
1214 int no;
1215 int c;
1217 int max = preg->program[scan + 2];
1218 int min = preg->program[scan + 3];
1219 int next = regnext(preg, scan);
1222 * Lookahead to avoid useless match attempts
1223 * when we know what character comes next.
1225 if (OP(preg, next) == EXACTLY) {
1226 nextch = preg->program[OPERAND(next)];
1228 save = preg->reginput;
1229 no = regrepeat(preg, scan + 5, max);
1230 if (no < min) {
1231 return 0;
1233 if (matchmin) {
1234 /* from min up to no */
1235 max = no;
1236 no = min;
1238 /* else from no down to min */
1239 while (1) {
1240 if (matchmin) {
1241 if (no > max) {
1242 break;
1245 else {
1246 if (no < min) {
1247 break;
1250 preg->reginput = save + utf8_index(save, no);
1251 reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
1252 /* If it could work, try it. */
1253 if (reg_iseol(preg, nextch) || c == nextch) {
1254 if (regmatch(preg, next)) {
1255 return(1);
1258 if (matchmin) {
1259 /* Couldn't or didn't, add one more */
1260 no++;
1262 else {
1263 /* Couldn't or didn't -- back up. */
1264 no--;
1267 return(0);
1270 static int regmatchrepeat(regex_t *preg, int scan, int matchmin)
1272 int *scanpt = preg->program + scan;
1274 int max = scanpt[2];
1275 int min = scanpt[3];
1277 /* Have we reached min? */
1278 if (scanpt[4] < min) {
1279 /* No, so get another one */
1280 scanpt[4]++;
1281 if (regmatch(preg, scan + 5)) {
1282 return 1;
1284 scanpt[4]--;
1285 return 0;
1287 if (scanpt[4] > max) {
1288 return 0;
1291 if (matchmin) {
1292 /* minimal, so try other branch first */
1293 if (regmatch(preg, regnext(preg, scan))) {
1294 return 1;
1296 /* No, so try one more */
1297 scanpt[4]++;
1298 if (regmatch(preg, scan + 5)) {
1299 return 1;
1301 scanpt[4]--;
1302 return 0;
1304 /* maximal, so try this branch again */
1305 if (scanpt[4] < max) {
1306 scanpt[4]++;
1307 if (regmatch(preg, scan + 5)) {
1308 return 1;
1310 scanpt[4]--;
1312 /* At this point we are at max with no match. Try the other branch */
1313 return regmatch(preg, regnext(preg, scan));
1317 - regmatch - main matching routine
1319 * Conceptually the strategy is simple: check to see whether the current
1320 * node matches, call self recursively to see whether the rest matches,
1321 * and then act accordingly. In practice we make some effort to avoid
1322 * recursion, in particular by going through "ordinary" nodes (that don't
1323 * need to know whether the rest of the match failed) by a loop instead of
1324 * by recursion.
1326 /* 0 failure, 1 success */
1327 static int regmatch(regex_t *preg, int prog)
1329 int scan; /* Current node. */
1330 int next; /* Next node. */
1332 scan = prog;
1334 #ifdef DEBUG
1335 if (scan != 0 && regnarrate)
1336 fprintf(stderr, "%s(\n", regprop(scan));
1337 #endif
1338 while (scan != 0) {
1339 int n;
1340 int c;
1341 #ifdef DEBUG
1342 if (regnarrate) {
1343 fprintf(stderr, "%3d: %s...\n", scan, regprop(OP(preg, scan))); /* Where, what. */
1345 #endif
1346 next = regnext(preg, scan);
1347 n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE));
1349 switch (OP(preg, scan)) {
1350 case BOL:
1351 if (preg->reginput != preg->regbol)
1352 return(0);
1353 break;
1354 case EOL:
1355 if (!reg_iseol(preg, c)) {
1356 return(0);
1358 break;
1359 case WORDA:
1360 /* Must be looking at a letter, digit, or _ */
1361 if ((!isalnum(UCHAR(c))) && c != '_')
1362 return(0);
1363 /* Prev must be BOL or nonword */
1364 if (preg->reginput > preg->regbol &&
1365 (isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_'))
1366 return(0);
1367 break;
1368 case WORDZ:
1369 /* Can't match at BOL */
1370 if (preg->reginput > preg->regbol) {
1371 /* Current must be EOL or nonword */
1372 if (reg_iseol(preg, c) || !isalnum(UCHAR(c)) || c != '_') {
1373 c = preg->reginput[-1];
1374 /* Previous must be word */
1375 if (isalnum(UCHAR(c)) || c == '_') {
1376 break;
1380 /* No */
1381 return(0);
1383 case ANY:
1384 if (reg_iseol(preg, c))
1385 return 0;
1386 preg->reginput += n;
1387 break;
1388 case EXACTLY: {
1389 int opnd;
1390 int len;
1391 int slen;
1393 opnd = OPERAND(scan);
1394 len = str_int_len(preg->program + opnd);
1396 slen = prefix_cmp(preg->program + opnd, len, preg->reginput, preg->cflags & REG_ICASE);
1397 if (slen < 0) {
1398 return(0);
1400 preg->reginput += slen;
1402 break;
1403 case ANYOF:
1404 if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) == 0) {
1405 return(0);
1407 preg->reginput += n;
1408 break;
1409 case ANYBUT:
1410 if (reg_iseol(preg, c) || reg_range_find(preg->program + OPERAND(scan), c) != 0) {
1411 return(0);
1413 preg->reginput += n;
1414 break;
1415 case NOTHING:
1416 break;
1417 case BACK:
1418 break;
1419 case BRANCH: {
1420 const char *save;
1422 if (OP(preg, next) != BRANCH) /* No choice. */
1423 next = OPERAND(scan); /* Avoid recursion. */
1424 else {
1425 do {
1426 save = preg->reginput;
1427 if (regmatch(preg, OPERAND(scan))) {
1428 return(1);
1430 preg->reginput = save;
1431 scan = regnext(preg, scan);
1432 } while (scan != 0 && OP(preg, scan) == BRANCH);
1433 return(0);
1434 /* NOTREACHED */
1437 break;
1438 case REP:
1439 case REPMIN:
1440 return regmatchsimplerepeat(preg, scan, OP(preg, scan) == REPMIN);
1442 case REPX:
1443 case REPXMIN:
1444 return regmatchrepeat(preg, scan, OP(preg, scan) == REPXMIN);
1446 case END:
1447 return(1); /* Success! */
1448 break;
1449 default:
1450 if (OP(preg, scan) >= OPEN+1 && OP(preg, scan) < CLOSE_END) {
1451 const char *save;
1453 save = preg->reginput;
1455 if (regmatch(preg, next)) {
1456 int no;
1458 * Don't set startp if some later
1459 * invocation of the same parentheses
1460 * already has.
1462 if (OP(preg, scan) < CLOSE) {
1463 no = OP(preg, scan) - OPEN;
1464 if (no < preg->nmatch && preg->pmatch[no].rm_so == -1) {
1465 preg->pmatch[no].rm_so = save - preg->start;
1468 else {
1469 no = OP(preg, scan) - CLOSE;
1470 if (no < preg->nmatch && preg->pmatch[no].rm_eo == -1) {
1471 preg->pmatch[no].rm_eo = save - preg->start;
1474 return(1);
1475 } else
1476 return(0);
1478 return REG_ERR_INTERNAL;
1481 scan = next;
1485 * We get here only if there's trouble -- normally "case END" is
1486 * the terminating point.
1488 return REG_ERR_INTERNAL;
1492 - regrepeat - repeatedly match something simple, report how many
1494 static int regrepeat(regex_t *preg, int p, int max)
1496 int count = 0;
1497 const char *scan;
1498 int opnd;
1499 int ch;
1500 int n;
1502 scan = preg->reginput;
1503 opnd = OPERAND(p);
1504 switch (OP(preg, p)) {
1505 case ANY:
1506 /* No need to handle utf8 specially here */
1507 while (!reg_iseol(preg, *scan) && count < max) {
1508 count++;
1509 scan++;
1511 break;
1512 case EXACTLY:
1513 while (count < max) {
1514 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1515 if (preg->program[opnd] != ch) {
1516 break;
1518 count++;
1519 scan += n;
1521 break;
1522 case ANYOF:
1523 while (count < max) {
1524 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1525 if (reg_iseol(preg, ch) || reg_range_find(preg->program + opnd, ch) == 0) {
1526 break;
1528 count++;
1529 scan += n;
1531 break;
1532 case ANYBUT:
1533 while (count < max) {
1534 n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE);
1535 if (reg_iseol(preg, ch) || reg_range_find(preg->program + opnd, ch) != 0) {
1536 break;
1538 count++;
1539 scan += n;
1541 break;
1542 default: /* Oh dear. Called inappropriately. */
1543 preg->err = REG_ERR_INTERNAL;
1544 count = 0; /* Best compromise. */
1545 break;
1547 preg->reginput = scan;
1549 return(count);
1553 - regnext - dig the "next" pointer out of a node
1555 static int regnext(regex_t *preg, int p )
1557 int offset;
1559 offset = NEXT(preg, p);
1561 if (offset == 0)
1562 return 0;
1564 if (OP(preg, p) == BACK)
1565 return(p-offset);
1566 else
1567 return(p+offset);
1570 #if defined(DEBUG) && !defined(JIM_BOOTSTRAP)
1573 - regdump - dump a regexp onto stdout in vaguely comprehensible form
1575 static void regdump(regex_t *preg)
1577 int s;
1578 int op = EXACTLY; /* Arbitrary non-END op. */
1579 int next;
1580 char buf[4];
1582 int i;
1583 for (i = 1; i < preg->p; i++) {
1584 printf("%02x ", preg->program[i]);
1585 if (i % 16 == 15) {
1586 printf("\n");
1589 printf("\n");
1591 s = 1;
1592 while (op != END && s < preg->p) { /* While that wasn't END last time... */
1593 op = OP(preg, s);
1594 printf("%3d: %s", s, regprop(op)); /* Where, what. */
1595 next = regnext(preg, s);
1596 if (next == 0) /* Next ptr. */
1597 printf("(0)");
1598 else
1599 printf("(%d)", next);
1600 s += 2;
1601 if (op == REP || op == REPMIN || op == REPX || op == REPXMIN) {
1602 int max = preg->program[s];
1603 int min = preg->program[s + 1];
1604 if (max == 65535) {
1605 printf("{%d,*}", min);
1607 else {
1608 printf("{%d,%d}", min, max);
1610 printf(" %d", preg->program[s + 2]);
1611 s += 3;
1613 else if (op == ANYOF || op == ANYBUT) {
1614 /* set of ranges */
1616 while (preg->program[s]) {
1617 int len = preg->program[s++];
1618 int first = preg->program[s++];
1619 buf[utf8_fromunicode(buf, first)] = 0;
1620 printf("%s", buf);
1621 if (len > 1) {
1622 buf[utf8_fromunicode(buf, first + len - 1)] = 0;
1623 printf("-%s", buf);
1626 s++;
1628 else if (op == EXACTLY) {
1629 /* Literal string, where present. */
1631 while (preg->program[s]) {
1632 buf[utf8_fromunicode(buf, preg->program[s])] = 0;
1633 printf("%s", buf);
1634 s++;
1636 s++;
1638 putchar('\n');
1641 if (op == END) {
1642 /* Header fields of interest. */
1643 if (preg->regstart) {
1644 buf[utf8_fromunicode(buf, preg->regstart)] = 0;
1645 printf("start '%s' ", buf);
1647 if (preg->reganch)
1648 printf("anchored ");
1649 if (preg->regmust != 0) {
1650 int i;
1651 printf("must have:");
1652 for (i = 0; i < preg->regmlen; i++) {
1653 putchar(preg->program[preg->regmust + i]);
1655 putchar('\n');
1658 printf("\n");
1662 - regprop - printable representation of opcode
1664 static const char *regprop( int op )
1666 static char buf[50];
1668 switch (op) {
1669 case BOL:
1670 return "BOL";
1671 case EOL:
1672 return "EOL";
1673 case ANY:
1674 return "ANY";
1675 case ANYOF:
1676 return "ANYOF";
1677 case ANYBUT:
1678 return "ANYBUT";
1679 case BRANCH:
1680 return "BRANCH";
1681 case EXACTLY:
1682 return "EXACTLY";
1683 case NOTHING:
1684 return "NOTHING";
1685 case BACK:
1686 return "BACK";
1687 case END:
1688 return "END";
1689 case REP:
1690 return "REP";
1691 case REPMIN:
1692 return "REPMIN";
1693 case REPX:
1694 return "REPX";
1695 case REPXMIN:
1696 return "REPXMIN";
1697 case WORDA:
1698 return "WORDA";
1699 case WORDZ:
1700 return "WORDZ";
1701 default:
1702 if (op >= OPEN && op < CLOSE) {
1703 snprintf(buf, sizeof(buf), "OPEN%d", op-OPEN);
1705 else if (op >= CLOSE && op < CLOSE_END) {
1706 snprintf(buf, sizeof(buf), "CLOSE%d", op-CLOSE);
1708 else {
1709 snprintf(buf, sizeof(buf), "?%d?\n", op);
1711 return(buf);
1714 #endif /* JIM_BOOTSTRAP */
1716 size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
1718 static const char *error_strings[] = {
1719 "success",
1720 "no match",
1721 "bad pattern",
1722 "null argument",
1723 "unknown error",
1724 "too big",
1725 "out of memory",
1726 "too many ()",
1727 "parentheses () not balanced",
1728 "braces {} not balanced",
1729 "invalid repetition count(s)",
1730 "extra characters",
1731 "*+ of empty atom",
1732 "nested count",
1733 "internal error",
1734 "count follows nothing",
1735 "trailing backslash",
1736 "corrupted program",
1737 "contains null char",
1739 const char *err;
1741 if (errcode < 0 || errcode >= REG_ERR_NUM) {
1742 err = "Bad error code";
1744 else {
1745 err = error_strings[errcode];
1748 return snprintf(errbuf, errbuf_size, "%s", err);
1751 void regfree(regex_t *preg)
1753 free(preg->program);
1756 #endif