MFC:
[dragonfly.git] / usr.bin / vgrind / regexp.c
blobff4b06c318c446f224a44ab646da077c5e3d1e0c
1 /*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved.
35 * @(#)regexp.c 8.1 (Berkeley) 6/6/93
37 * $DragonFly: src/usr.bin/vgrind/regexp.c,v 1.3 2003/10/04 20:36:54 hmp Exp $
40 #include <ctype.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include "extern.h"
45 #define FALSE 0
46 #define TRUE !(FALSE)
47 #define NIL 0
49 static void expconv(void);
51 boolean _escaped; /* true if we are currently _escaped */
52 char *s_start; /* start of string */
53 boolean l_onecase; /* true if upper and lower equivalent */
55 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
57 /* STRNCMP - like strncmp except that we convert the
58 * first string to lower case before comparing
59 * if l_onecase is set.
62 int
63 STRNCMP(register char *s1, register char *s2, register int len)
65 if (l_onecase) {
67 if (*s2 - makelower(*s1))
68 return (*s2 - makelower(*s1));
69 else {
70 s2++;
71 s1++;
73 while (--len);
74 } else {
76 if (*s2 - *s1)
77 return (*s2 - *s1);
78 else {
79 s2++;
80 s1++;
82 while (--len);
84 return(0);
87 /* The following routine converts an irregular expression to
88 * internal format.
90 * Either meta symbols (\a \d or \p) or character strings or
91 * operations ( alternation or perenthesizing ) can be
92 * specified. Each starts with a descriptor byte. The descriptor
93 * byte has STR set for strings, META set for meta symbols
94 * and OPER set for operations.
95 * The descriptor byte can also have the OPT bit set if the object
96 * defined is optional. Also ALT can be set to indicate an alternation.
98 * For metasymbols the byte following the descriptor byte identities
99 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
100 * strings the byte after the descriptor is a character count for
101 * the string:
103 * meta symbols := descriptor
104 * symbol
106 * strings := descriptor
107 * character count
108 * the string
110 * operatins := descriptor
111 * symbol
112 * character count
116 * handy macros for accessing parts of match blocks
118 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
119 #define MNEXT(A) (A+2) /* character following a metasymbol block */
121 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
122 #define OCNT(A) (*(A+2)) /* character count */
123 #define ONEXT(A) (A+3) /* next character after the operation */
124 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
126 #define SCNT(A) (*(A+1)) /* byte count of a string */
127 #define SSTR(A) (A+2) /* address of the string */
128 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
131 * bit flags in the descriptor
133 #define OPT 1
134 #define STR 2
135 #define META 4
136 #define ALT 8
137 #define OPER 16
139 static char *ccre; /* pointer to current position in converted exp*/
140 static char *ure; /* pointer current position in unconverted exp */
142 /* re: unconverted irregular expression */
143 char *
144 convexp(char *re)
146 register char *cre; /* pointer to converted regular expression */
148 /* allocate room for the converted expression */
149 if (re == NIL)
150 return (NIL);
151 if (*re == '\0')
152 return (NIL);
153 cre = malloc (4 * strlen(re) + 3);
154 ccre = cre;
155 ure = re;
157 /* start the conversion with a \a */
158 *cre = META | OPT;
159 MSYM(cre) = 'a';
160 ccre = MNEXT(cre);
162 /* start the conversion (its recursive) */
163 expconv ();
164 *ccre = 0;
165 return (cre);
168 static void
169 expconv(void)
171 register char *cs; /* pointer to current symbol in converted exp */
172 register char c; /* character being processed */
173 register char *acs; /* pinter to last alternate */
174 register int temp;
176 /* let the conversion begin */
177 acs = NIL;
178 cs = NIL;
179 while (*ure != NIL) {
180 switch (c = *ure++) {
182 case '\\':
183 switch (c = *ure++) {
185 /* escaped characters are just characters */
186 default:
187 if (cs == NIL || (*cs & STR) == 0) {
188 cs = ccre;
189 *cs = STR;
190 SCNT(cs) = 1;
191 ccre += 2;
192 } else
193 SCNT(cs)++;
194 *ccre++ = c;
195 break;
197 /* normal(?) metacharacters */
198 case 'a':
199 case 'd':
200 case 'e':
201 case 'p':
202 if (acs != NIL && acs != cs) {
203 do {
204 temp = OCNT(acs);
205 OCNT(acs) = ccre - acs;
206 acs -= temp;
207 } while (temp != 0);
208 acs = NIL;
210 cs = ccre;
211 *cs = META;
212 MSYM(cs) = c;
213 ccre = MNEXT(cs);
214 break;
216 break;
218 /* just put the symbol in */
219 case '^':
220 case '$':
221 if (acs != NIL && acs != cs) {
222 do {
223 temp = OCNT(acs);
224 OCNT(acs) = ccre - acs;
225 acs -= temp;
226 } while (temp != 0);
227 acs = NIL;
229 cs = ccre;
230 *cs = META;
231 MSYM(cs) = c;
232 ccre = MNEXT(cs);
233 break;
235 /* mark the last match sequence as optional */
236 case '?':
237 if (cs)
238 *cs = *cs | OPT;
239 break;
241 /* recurse and define a subexpression */
242 case '(':
243 if (acs != NIL && acs != cs) {
244 do {
245 temp = OCNT(acs);
246 OCNT(acs) = ccre - acs;
247 acs -= temp;
248 } while (temp != 0);
249 acs = NIL;
251 cs = ccre;
252 *cs = OPER;
253 OSYM(cs) = '(';
254 ccre = ONEXT(cs);
255 expconv ();
256 OCNT(cs) = ccre - cs; /* offset to next symbol */
257 break;
259 /* return from a recursion */
260 case ')':
261 if (acs != NIL) {
262 do {
263 temp = OCNT(acs);
264 OCNT(acs) = ccre - acs;
265 acs -= temp;
266 } while (temp != 0);
267 acs = NIL;
269 cs = ccre;
270 *cs = META;
271 MSYM(cs) = c;
272 ccre = MNEXT(cs);
273 return;
275 /* mark the last match sequence as having an alternate */
276 /* the third byte will contain an offset to jump over the */
277 /* alternate match in case the first did not fail */
278 case '|':
279 if (acs != NIL && acs != cs)
280 OCNT(ccre) = ccre - acs; /* make a back pointer */
281 else
282 OCNT(ccre) = 0;
283 *cs |= ALT;
284 cs = ccre;
285 *cs = OPER;
286 OSYM(cs) = '|';
287 ccre = ONEXT(cs);
288 acs = cs; /* remember that the pointer is to be filles */
289 break;
291 /* if its not a metasymbol just build a scharacter string */
292 default:
293 if (cs == NIL || (*cs & STR) == 0) {
294 cs = ccre;
295 *cs = STR;
296 SCNT(cs) = 1;
297 ccre = SSTR(cs);
298 } else
299 SCNT(cs)++;
300 *ccre++ = c;
301 break;
304 if (acs != NIL) {
305 do {
306 temp = OCNT(acs);
307 OCNT(acs) = ccre - acs;
308 acs -= temp;
309 } while (temp != 0);
310 acs = NIL;
312 return;
314 /* end of convertre */
318 * The following routine recognises an irregular expresion
319 * with the following special characters:
321 * \? - means last match was optional
322 * \a - matches any number of characters
323 * \d - matches any number of spaces and tabs
324 * \p - matches any number of alphanumeric
325 * characters. The
326 * characters matched will be copied into
327 * the area pointed to by 'name'.
328 * \| - alternation
329 * \( \) - grouping used mostly for alternation and
330 * optionality
332 * The irregular expression must be translated to internal form
333 * prior to calling this routine
335 * The value returned is the pointer to the first non \a
336 * character matched.
340 s: string to check for a match in
341 re: a converted irregular expression
342 mstring: where to put whatever matches a \p
344 char *
345 expmatch(register char *s, register char *re, register char *mstring)
347 register char *cs; /* the current symbol */
348 register char *ptr,*s1; /* temporary pointer */
349 boolean matched; /* a temporary boolean */
351 /* initial conditions */
352 if (re == NIL)
353 return (NIL);
354 cs = re;
355 matched = FALSE;
357 /* loop till expression string is exhausted (or at least pretty tired) */
358 while (*cs) {
359 switch (*cs & (OPER | STR | META)) {
361 /* try to match a string */
362 case STR:
363 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
364 if (matched) {
366 /* hoorah it matches */
367 s += SCNT(cs);
368 cs = SNEXT(cs);
369 } else if (*cs & ALT) {
371 /* alternation, skip to next expression */
372 cs = SNEXT(cs);
373 } else if (*cs & OPT) {
375 /* the match is optional */
376 cs = SNEXT(cs);
377 matched = 1; /* indicate a successful match */
378 } else {
380 /* no match, error return */
381 return (NIL);
383 break;
385 /* an operator, do something fancy */
386 case OPER:
387 switch (OSYM(cs)) {
389 /* this is an alternation */
390 case '|':
391 if (matched)
393 /* last thing in the alternation was a match, skip ahead */
394 cs = OPTR(cs);
395 else
397 /* no match, keep trying */
398 cs = ONEXT(cs);
399 break;
401 /* this is a grouping, recurse */
402 case '(':
403 ptr = expmatch (s, ONEXT(cs), mstring);
404 if (ptr != NIL) {
406 /* the subexpression matched */
407 matched = 1;
408 s = ptr;
409 } else if (*cs & ALT) {
411 /* alternation, skip to next expression */
412 matched = 0;
413 } else if (*cs & OPT) {
415 /* the match is optional */
416 matched = 1; /* indicate a successful match */
417 } else {
419 /* no match, error return */
420 return (NIL);
422 cs = OPTR(cs);
423 break;
425 break;
427 /* try to match a metasymbol */
428 case META:
429 switch (MSYM(cs)) {
431 /* try to match anything and remember what was matched */
432 case 'p':
434 * This is really the same as trying the match the
435 * remaining parts of the expression to any subset
436 * of the string.
438 s1 = s;
439 do {
440 ptr = expmatch (s1, MNEXT(cs), mstring);
441 if (ptr != NIL && s1 != s) {
443 /* we have a match, remember the match */
444 strncpy (mstring, s, s1 - s);
445 mstring[s1 - s] = '\0';
446 return (ptr);
447 } else if (ptr != NIL && (*cs & OPT)) {
449 /* it was aoptional so no match is ok */
450 return (ptr);
451 } else if (ptr != NIL) {
453 /* not optional and we still matched */
454 return (NIL);
456 if (!(isalnum(*s1) || *s1 == '_' ||
457 /* C++ destructor */
458 *s1 == '~' ||
459 /* C++ scope operator */
460 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
461 (s1++, TRUE))))
462 return (NIL);
463 if (*s1 == '\\')
464 _escaped = _escaped ? FALSE : TRUE;
465 else
466 _escaped = FALSE;
467 } while (*s1++);
468 return (NIL);
470 /* try to match anything */
471 case 'a':
473 * This is really the same as trying the match the
474 * remaining parts of the expression to any subset
475 * of the string.
477 s1 = s;
478 do {
479 ptr = expmatch (s1, MNEXT(cs), mstring);
480 if (ptr != NIL && s1 != s) {
482 /* we have a match */
483 return (ptr);
484 } else if (ptr != NIL && (*cs & OPT)) {
486 /* it was aoptional so no match is ok */
487 return (ptr);
488 } else if (ptr != NIL) {
490 /* not optional and we still matched */
491 return (NIL);
493 if (*s1 == '\\')
494 _escaped = _escaped ? FALSE : TRUE;
495 else
496 _escaped = FALSE;
497 } while (*s1++);
498 return (NIL);
500 /* fail if we are currently _escaped */
501 case 'e':
502 if (_escaped)
503 return(NIL);
504 cs = MNEXT(cs);
505 break;
507 /* match any number of tabs and spaces */
508 case 'd':
509 ptr = s;
510 while (*s == ' ' || *s == '\t')
511 s++;
512 if (s != ptr || s == s_start) {
514 /* match, be happy */
515 matched = 1;
516 cs = MNEXT(cs);
517 } else if (*s == '\n' || *s == '\0') {
519 /* match, be happy */
520 matched = 1;
521 cs = MNEXT(cs);
522 } else if (*cs & ALT) {
524 /* try the next part */
525 matched = 0;
526 cs = MNEXT(cs);
527 } else if (*cs & OPT) {
529 /* doesn't matter */
530 matched = 1;
531 cs = MNEXT(cs);
532 } else
534 /* no match, error return */
535 return (NIL);
536 break;
538 /* check for end of line */
539 case '$':
540 if (*s == '\0' || *s == '\n') {
542 /* match, be happy */
543 s++;
544 matched = 1;
545 cs = MNEXT(cs);
546 } else if (*cs & ALT) {
548 /* try the next part */
549 matched = 0;
550 cs = MNEXT(cs);
551 } else if (*cs & OPT) {
553 /* doesn't matter */
554 matched = 1;
555 cs = MNEXT(cs);
556 } else
558 /* no match, error return */
559 return (NIL);
560 break;
562 /* check for start of line */
563 case '^':
564 if (s == s_start) {
566 /* match, be happy */
567 matched = 1;
568 cs = MNEXT(cs);
569 } else if (*cs & ALT) {
571 /* try the next part */
572 matched = 0;
573 cs = MNEXT(cs);
574 } else if (*cs & OPT) {
576 /* doesn't matter */
577 matched = 1;
578 cs = MNEXT(cs);
579 } else
581 /* no match, error return */
582 return (NIL);
583 break;
585 /* end of a subexpression, return success */
586 case ')':
587 return (s);
589 break;
592 return (s);