2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved.
31 * @(#)regexp.c 8.1 (Berkeley) 6/6/93
33 * $DragonFly: src/usr.bin/vgrind/regexp.c,v 1.4 2008/10/16 01:52:34 swildner Exp $
45 static void expconv(void);
47 boolean _escaped
; /* true if we are currently _escaped */
48 char *s_start
; /* start of string */
49 boolean l_onecase
; /* true if upper and lower equivalent */
51 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
53 /* STRNCMP - like strncmp except that we convert the
54 * first string to lower case before comparing
55 * if l_onecase is set.
59 STRNCMP(char *s1
, char *s2
, int len
)
63 if (*s2
- makelower(*s1
))
64 return (*s2
- makelower(*s1
));
83 /* The following routine converts an irregular expression to
86 * Either meta symbols (\a \d or \p) or character strings or
87 * operations ( alternation or perenthesizing ) can be
88 * specified. Each starts with a descriptor byte. The descriptor
89 * byte has STR set for strings, META set for meta symbols
90 * and OPER set for operations.
91 * The descriptor byte can also have the OPT bit set if the object
92 * defined is optional. Also ALT can be set to indicate an alternation.
94 * For metasymbols the byte following the descriptor byte identities
95 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
96 * strings the byte after the descriptor is a character count for
99 * meta symbols := descriptor
102 * strings := descriptor
106 * operatins := descriptor
112 * handy macros for accessing parts of match blocks
114 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
115 #define MNEXT(A) (A+2) /* character following a metasymbol block */
117 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
118 #define OCNT(A) (*(A+2)) /* character count */
119 #define ONEXT(A) (A+3) /* next character after the operation */
120 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
122 #define SCNT(A) (*(A+1)) /* byte count of a string */
123 #define SSTR(A) (A+2) /* address of the string */
124 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
127 * bit flags in the descriptor
135 static char *ccre
; /* pointer to current position in converted exp*/
136 static char *ure
; /* pointer current position in unconverted exp */
138 /* re: unconverted irregular expression */
142 char *cre
; /* pointer to converted regular expression */
144 /* allocate room for the converted expression */
149 cre
= malloc (4 * strlen(re
) + 3);
153 /* start the conversion with a \a */
158 /* start the conversion (its recursive) */
167 char *cs
; /* pointer to current symbol in converted exp */
168 char c
; /* character being processed */
169 char *acs
; /* pinter to last alternate */
172 /* let the conversion begin */
175 while (*ure
!= NIL
) {
176 switch (c
= *ure
++) {
179 switch (c
= *ure
++) {
181 /* escaped characters are just characters */
183 if (cs
== NIL
|| (*cs
& STR
) == 0) {
193 /* normal(?) metacharacters */
198 if (acs
!= NIL
&& acs
!= cs
) {
201 OCNT(acs
) = ccre
- acs
;
214 /* just put the symbol in */
217 if (acs
!= NIL
&& acs
!= cs
) {
220 OCNT(acs
) = ccre
- acs
;
231 /* mark the last match sequence as optional */
237 /* recurse and define a subexpression */
239 if (acs
!= NIL
&& acs
!= cs
) {
242 OCNT(acs
) = ccre
- acs
;
252 OCNT(cs
) = ccre
- cs
; /* offset to next symbol */
255 /* return from a recursion */
260 OCNT(acs
) = ccre
- acs
;
271 /* mark the last match sequence as having an alternate */
272 /* the third byte will contain an offset to jump over the */
273 /* alternate match in case the first did not fail */
275 if (acs
!= NIL
&& acs
!= cs
)
276 OCNT(ccre
) = ccre
- acs
; /* make a back pointer */
284 acs
= cs
; /* remember that the pointer is to be filles */
287 /* if its not a metasymbol just build a scharacter string */
289 if (cs
== NIL
|| (*cs
& STR
) == 0) {
303 OCNT(acs
) = ccre
- acs
;
310 /* end of convertre */
314 * The following routine recognises an irregular expresion
315 * with the following special characters:
317 * \? - means last match was optional
318 * \a - matches any number of characters
319 * \d - matches any number of spaces and tabs
320 * \p - matches any number of alphanumeric
322 * characters matched will be copied into
323 * the area pointed to by 'name'.
325 * \( \) - grouping used mostly for alternation and
328 * The irregular expression must be translated to internal form
329 * prior to calling this routine
331 * The value returned is the pointer to the first non \a
336 s: string to check for a match in
337 re: a converted irregular expression
338 mstring: where to put whatever matches a \p
341 expmatch(char *s
, char *re
, char *mstring
)
343 char *cs
; /* the current symbol */
344 char *ptr
,*s1
; /* temporary pointer */
345 boolean matched
; /* a temporary boolean */
347 /* initial conditions */
353 /* loop till expression string is exhausted (or at least pretty tired) */
355 switch (*cs
& (OPER
| STR
| META
)) {
357 /* try to match a string */
359 matched
= !STRNCMP (s
, SSTR(cs
), SCNT(cs
));
362 /* hoorah it matches */
365 } else if (*cs
& ALT
) {
367 /* alternation, skip to next expression */
369 } else if (*cs
& OPT
) {
371 /* the match is optional */
373 matched
= 1; /* indicate a successful match */
376 /* no match, error return */
381 /* an operator, do something fancy */
385 /* this is an alternation */
389 /* last thing in the alternation was a match, skip ahead */
393 /* no match, keep trying */
397 /* this is a grouping, recurse */
399 ptr
= expmatch (s
, ONEXT(cs
), mstring
);
402 /* the subexpression matched */
405 } else if (*cs
& ALT
) {
407 /* alternation, skip to next expression */
409 } else if (*cs
& OPT
) {
411 /* the match is optional */
412 matched
= 1; /* indicate a successful match */
415 /* no match, error return */
423 /* try to match a metasymbol */
427 /* try to match anything and remember what was matched */
430 * This is really the same as trying the match the
431 * remaining parts of the expression to any subset
436 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
437 if (ptr
!= NIL
&& s1
!= s
) {
439 /* we have a match, remember the match */
440 strncpy (mstring
, s
, s1
- s
);
441 mstring
[s1
- s
] = '\0';
443 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
445 /* it was aoptional so no match is ok */
447 } else if (ptr
!= NIL
) {
449 /* not optional and we still matched */
452 if (!(isalnum(*s1
) || *s1
== '_' ||
455 /* C++ scope operator */
456 (strlen(s1
) > 1 && *s1
== ':' && s1
[1] == ':' &&
460 _escaped
= _escaped
? FALSE
: TRUE
;
466 /* try to match anything */
469 * This is really the same as trying the match the
470 * remaining parts of the expression to any subset
475 ptr
= expmatch (s1
, MNEXT(cs
), mstring
);
476 if (ptr
!= NIL
&& s1
!= s
) {
478 /* we have a match */
480 } else if (ptr
!= NIL
&& (*cs
& OPT
)) {
482 /* it was aoptional so no match is ok */
484 } else if (ptr
!= NIL
) {
486 /* not optional and we still matched */
490 _escaped
= _escaped
? FALSE
: TRUE
;
496 /* fail if we are currently _escaped */
503 /* match any number of tabs and spaces */
506 while (*s
== ' ' || *s
== '\t')
508 if (s
!= ptr
|| s
== s_start
) {
510 /* match, be happy */
513 } else if (*s
== '\n' || *s
== '\0') {
515 /* match, be happy */
518 } else if (*cs
& ALT
) {
520 /* try the next part */
523 } else if (*cs
& OPT
) {
530 /* no match, error return */
534 /* check for end of line */
536 if (*s
== '\0' || *s
== '\n') {
538 /* match, be happy */
542 } else if (*cs
& ALT
) {
544 /* try the next part */
547 } else if (*cs
& OPT
) {
554 /* no match, error return */
558 /* check for start of line */
562 /* match, be happy */
565 } else if (*cs
& ALT
) {
567 /* try the next part */
570 } else if (*cs
& OPT
) {
577 /* no match, error return */
581 /* end of a subexpression, return success */