2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved.
35 * @(#)regexp.c 8.1 (Berkeley) 6/6/93
37 * $DragonFly: src/usr.bin/vgrind/regexp.c,v 1.3 2003/10/04 20:36:54 hmp Exp $
49 static void expconv(void);
51 boolean _escaped
; /* true if we are currently _escaped */
52 char *s_start
; /* start of string */
53 boolean l_onecase
; /* true if upper and lower equivalent */
55 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
57 /* STRNCMP - like strncmp except that we convert the
58 * first string to lower case before comparing
59 * if l_onecase is set.
63 STRNCMP(register char *s1
, register char *s2
, register int len
)
67 if (*s2
- makelower(*s1
))
68 return (*s2
- makelower(*s1
));
87 /* The following routine converts an irregular expression to
90 * Either meta symbols (\a \d or \p) or character strings or
91 * operations ( alternation or perenthesizing ) can be
92 * specified. Each starts with a descriptor byte. The descriptor
93 * byte has STR set for strings, META set for meta symbols
94 * and OPER set for operations.
95 * The descriptor byte can also have the OPT bit set if the object
96 * defined is optional. Also ALT can be set to indicate an alternation.
98 * For metasymbols the byte following the descriptor byte identities
99 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
100 * strings the byte after the descriptor is a character count for
103 * meta symbols := descriptor
106 * strings := descriptor
110 * operatins := descriptor
116 * handy macros for accessing parts of match blocks
118 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
119 #define MNEXT(A) (A+2) /* character following a metasymbol block */
121 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
122 #define OCNT(A) (*(A+2)) /* character count */
123 #define ONEXT(A) (A+3) /* next character after the operation */
124 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
126 #define SCNT(A) (*(A+1)) /* byte count of a string */
127 #define SSTR(A) (A+2) /* address of the string */
128 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
131 * bit flags in the descriptor
139 static char *ccre
; /* pointer to current position in converted exp*/
140 static char *ure
; /* pointer current position in unconverted exp */
142 /* re: unconverted irregular expression */
146 register char *cre
; /* pointer to converted regular expression */
148 /* allocate room for the converted expression */
153 cre
= malloc (4 * strlen(re
) + 3);
157 /* start the conversion with a \a */
162 /* start the conversion (its recursive) */
171 register char *cs
; /* pointer to current symbol in converted exp */
172 register char c
; /* character being processed */
173 register char *acs
; /* pinter to last alternate */
176 /* let the conversion begin */
179 while (*ure
!= NIL
) {
180 switch (c
= *ure
++) {
183 switch (c
= *ure
++) {
185 /* escaped characters are just characters */
187 if (cs
== NIL
|| (*cs
& STR
) == 0) {
197 /* normal(?) metacharacters */
202 if (acs
!= NIL
&& acs
!= cs
) {
205 OCNT(acs
) = ccre
- acs
;
218 /* just put the symbol in */
221 if (acs
!= NIL
&& acs
!= cs
) {
224 OCNT(acs
) = ccre
- acs
;
235 /* mark the last match sequence as optional */
241 /* recurse and define a subexpression */
243 if (acs
!= NIL
&& acs
!= cs
) {
246 OCNT(acs
) = ccre
- acs
;
256 OCNT(cs
) = ccre
- cs
; /* offset to next symbol */
259 /* return from a recursion */
264 OCNT(acs
) = ccre
- acs
;
275 /* mark the last match sequence as having an alternate */
276 /* the third byte will contain an offset to jump over the */
277 /* alternate match in case the first did not fail */
279 if (acs
!= NIL
&& acs
!= cs
)
280 OCNT(ccre
) = ccre
- acs
; /* make a back pointer */
288 acs
= cs
; /* remember that the pointer is to be filles */
291 /* if its not a metasymbol just build a scharacter string */
293 if (cs
== NIL
|| (*cs
& STR
) == 0) {
307 OCNT(acs
) = ccre
- acs
;
314 /* end of convertre */
318 * The following routine recognises an irregular expresion
319 * with the following special characters:
321 * \? - means last match was optional
322 * \a - matches any number of characters
323 * \d - matches any number of spaces and tabs
324 * \p - matches any number of alphanumeric
326 * characters matched will be copied into
327 * the area pointed to by 'name'.
329 * \( \) - grouping used mostly for alternation and
332 * The irregular expression must be translated to internal form
333 * prior to calling this routine
335 * The value returned is the pointer to the first non \a
340 s: string to check for a match in
341 re: a converted irregular expression
342 mstring: where to put whatever matches a \p
345 expmatch(register char *s
, register char *re
, register char *mstring
)
347 register char *cs
; /* the current symbol */
348 register char *ptr
,*s1
; /* temporary pointer */
349 boolean matched
; /* a temporary boolean */
351 /* initial conditions */
357 /* loop till expression string is exhausted (or at least pretty tired) */
359 switch (*cs
& (OPER
| STR
| META
)) {
361 /* try to match a string */
363 matched
= !STRNCMP (s
, SSTR(cs
), SCNT(cs
));
366 /* hoorah it matches */
369 } else if (*cs
& ALT
) {
371 /* alternation, skip to next expression */
373 } else if (*cs
& OPT
) {
375 /* the match is optional */
377 matched
= 1; /* indicate a successful match */
380 /* no match, error return */
385 /* an operator, do something fancy */
389 /* this is an alternation */
393 /* last thing in the alternation was a match, skip ahead */
397 /* no match, keep trying */
401 /* this is a grouping, recurse */
403 ptr
= expmatch (s
, ONEXT(cs
), mstring
);
406 /* the subexpression matched */
409 } else if (*cs
& ALT
) {
411 /* alternation, skip to next expression */
413 } else if (*cs
& OPT
) {
415 /* the match is optional */
416 matched
= 1; /* indicate a successful match */
419 /* no match, error return */
427 /* try to match a metasymbol */
431 /* try to match anything and remember what was matched */
434 * This is really the same as trying the match the
435 * remaining parts of the expression to any subset
440 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
441 if (ptr
!= NIL
&& s1
!= s
) {
443 /* we have a match, remember the match */
444 strncpy (mstring
, s
, s1
- s
);
445 mstring
[s1
- s
] = '\0';
447 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
449 /* it was aoptional so no match is ok */
451 } else if (ptr
!= NIL
) {
453 /* not optional and we still matched */
456 if (!(isalnum(*s1
) || *s1
== '_' ||
459 /* C++ scope operator */
460 (strlen(s1
) > 1 && *s1
== ':' && s1
[1] == ':' &&
464 _escaped
= _escaped
? FALSE
: TRUE
;
470 /* try to match anything */
473 * This is really the same as trying the match the
474 * remaining parts of the expression to any subset
479 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
480 if (ptr
!= NIL
&& s1
!= s
) {
482 /* we have a match */
484 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
486 /* it was aoptional so no match is ok */
488 } else if (ptr
!= NIL
) {
490 /* not optional and we still matched */
494 _escaped
= _escaped
? FALSE
: TRUE
;
500 /* fail if we are currently _escaped */
507 /* match any number of tabs and spaces */
510 while (*s
== ' ' || *s
== '\t')
512 if (s
!= ptr
|| s
== s_start
) {
514 /* match, be happy */
517 } else if (*s
== '\n' || *s
== '\0') {
519 /* match, be happy */
522 } else if (*cs
& ALT
) {
524 /* try the next part */
527 } else if (*cs
& OPT
) {
534 /* no match, error return */
538 /* check for end of line */
540 if (*s
== '\0' || *s
== '\n') {
542 /* match, be happy */
546 } else if (*cs
& ALT
) {
548 /* try the next part */
551 } else if (*cs
& OPT
) {
558 /* no match, error return */
562 /* check for start of line */
566 /* match, be happy */
569 } else if (*cs
& ALT
) {
571 /* try the next part */
574 } else if (*cs
& OPT
) {
581 /* no match, error return */
585 /* end of a subexpression, return success */