sbin/hammer: Split alloc_bigblock() into two functions [1/2]
[dragonfly.git] / usr.bin / vgrind / regexp.c
blobf1c4f6807ffa3a360566a12852ae56b160bbf567
1 /*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
30 * @(#) Copyright (c) 1980, 1993 The Regents of the University of California. All rights reserved.
31 * @(#)regexp.c 8.1 (Berkeley) 6/6/93
33 * $DragonFly: src/usr.bin/vgrind/regexp.c,v 1.4 2008/10/16 01:52:34 swildner Exp $
36 #include <ctype.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include "extern.h"
41 #define FALSE 0
42 #define TRUE !(FALSE)
43 #define NIL 0
45 static void expconv(void);
47 boolean _escaped; /* true if we are currently _escaped */
48 char *s_start; /* start of string */
49 boolean l_onecase; /* true if upper and lower equivalent */
51 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
53 /* STRNCMP - like strncmp except that we convert the
54 * first string to lower case before comparing
55 * if l_onecase is set.
58 int
59 STRNCMP(char *s1, char *s2, int len)
61 if (l_onecase) {
63 if (*s2 - makelower(*s1))
64 return (*s2 - makelower(*s1));
65 else {
66 s2++;
67 s1++;
69 while (--len);
70 } else {
72 if (*s2 - *s1)
73 return (*s2 - *s1);
74 else {
75 s2++;
76 s1++;
78 while (--len);
80 return(0);
83 /* The following routine converts an irregular expression to
84 * internal format.
86 * Either meta symbols (\a \d or \p) or character strings or
87 * operations ( alternation or perenthesizing ) can be
88 * specified. Each starts with a descriptor byte. The descriptor
89 * byte has STR set for strings, META set for meta symbols
90 * and OPER set for operations.
91 * The descriptor byte can also have the OPT bit set if the object
92 * defined is optional. Also ALT can be set to indicate an alternation.
94 * For metasymbols the byte following the descriptor byte identities
95 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
96 * strings the byte after the descriptor is a character count for
97 * the string:
99 * meta symbols := descriptor
100 * symbol
102 * strings := descriptor
103 * character count
104 * the string
106 * operatins := descriptor
107 * symbol
108 * character count
112 * handy macros for accessing parts of match blocks
114 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
115 #define MNEXT(A) (A+2) /* character following a metasymbol block */
117 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
118 #define OCNT(A) (*(A+2)) /* character count */
119 #define ONEXT(A) (A+3) /* next character after the operation */
120 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
122 #define SCNT(A) (*(A+1)) /* byte count of a string */
123 #define SSTR(A) (A+2) /* address of the string */
124 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
127 * bit flags in the descriptor
129 #define OPT 1
130 #define STR 2
131 #define META 4
132 #define ALT 8
133 #define OPER 16
135 static char *ccre; /* pointer to current position in converted exp*/
136 static char *ure; /* pointer current position in unconverted exp */
138 /* re: unconverted irregular expression */
139 char *
140 convexp(char *re)
142 char *cre; /* pointer to converted regular expression */
144 /* allocate room for the converted expression */
145 if (re == NIL)
146 return (NIL);
147 if (*re == '\0')
148 return (NIL);
149 cre = malloc (4 * strlen(re) + 3);
150 ccre = cre;
151 ure = re;
153 /* start the conversion with a \a */
154 *cre = META | OPT;
155 MSYM(cre) = 'a';
156 ccre = MNEXT(cre);
158 /* start the conversion (its recursive) */
159 expconv ();
160 *ccre = 0;
161 return (cre);
164 static void
165 expconv(void)
167 char *cs; /* pointer to current symbol in converted exp */
168 char c; /* character being processed */
169 char *acs; /* pinter to last alternate */
170 int temp;
172 /* let the conversion begin */
173 acs = NIL;
174 cs = NIL;
175 while (*ure != NIL) {
176 switch (c = *ure++) {
178 case '\\':
179 switch (c = *ure++) {
181 /* escaped characters are just characters */
182 default:
183 if (cs == NIL || (*cs & STR) == 0) {
184 cs = ccre;
185 *cs = STR;
186 SCNT(cs) = 1;
187 ccre += 2;
188 } else
189 SCNT(cs)++;
190 *ccre++ = c;
191 break;
193 /* normal(?) metacharacters */
194 case 'a':
195 case 'd':
196 case 'e':
197 case 'p':
198 if (acs != NIL && acs != cs) {
199 do {
200 temp = OCNT(acs);
201 OCNT(acs) = ccre - acs;
202 acs -= temp;
203 } while (temp != 0);
204 acs = NIL;
206 cs = ccre;
207 *cs = META;
208 MSYM(cs) = c;
209 ccre = MNEXT(cs);
210 break;
212 break;
214 /* just put the symbol in */
215 case '^':
216 case '$':
217 if (acs != NIL && acs != cs) {
218 do {
219 temp = OCNT(acs);
220 OCNT(acs) = ccre - acs;
221 acs -= temp;
222 } while (temp != 0);
223 acs = NIL;
225 cs = ccre;
226 *cs = META;
227 MSYM(cs) = c;
228 ccre = MNEXT(cs);
229 break;
231 /* mark the last match sequence as optional */
232 case '?':
233 if (cs)
234 *cs = *cs | OPT;
235 break;
237 /* recurse and define a subexpression */
238 case '(':
239 if (acs != NIL && acs != cs) {
240 do {
241 temp = OCNT(acs);
242 OCNT(acs) = ccre - acs;
243 acs -= temp;
244 } while (temp != 0);
245 acs = NIL;
247 cs = ccre;
248 *cs = OPER;
249 OSYM(cs) = '(';
250 ccre = ONEXT(cs);
251 expconv ();
252 OCNT(cs) = ccre - cs; /* offset to next symbol */
253 break;
255 /* return from a recursion */
256 case ')':
257 if (acs != NIL) {
258 do {
259 temp = OCNT(acs);
260 OCNT(acs) = ccre - acs;
261 acs -= temp;
262 } while (temp != 0);
263 acs = NIL;
265 cs = ccre;
266 *cs = META;
267 MSYM(cs) = c;
268 ccre = MNEXT(cs);
269 return;
271 /* mark the last match sequence as having an alternate */
272 /* the third byte will contain an offset to jump over the */
273 /* alternate match in case the first did not fail */
274 case '|':
275 if (acs != NIL && acs != cs)
276 OCNT(ccre) = ccre - acs; /* make a back pointer */
277 else
278 OCNT(ccre) = 0;
279 *cs |= ALT;
280 cs = ccre;
281 *cs = OPER;
282 OSYM(cs) = '|';
283 ccre = ONEXT(cs);
284 acs = cs; /* remember that the pointer is to be filles */
285 break;
287 /* if its not a metasymbol just build a scharacter string */
288 default:
289 if (cs == NIL || (*cs & STR) == 0) {
290 cs = ccre;
291 *cs = STR;
292 SCNT(cs) = 1;
293 ccre = SSTR(cs);
294 } else
295 SCNT(cs)++;
296 *ccre++ = c;
297 break;
300 if (acs != NIL) {
301 do {
302 temp = OCNT(acs);
303 OCNT(acs) = ccre - acs;
304 acs -= temp;
305 } while (temp != 0);
306 acs = NIL;
308 return;
310 /* end of convertre */
314 * The following routine recognises an irregular expresion
315 * with the following special characters:
317 * \? - means last match was optional
318 * \a - matches any number of characters
319 * \d - matches any number of spaces and tabs
320 * \p - matches any number of alphanumeric
321 * characters. The
322 * characters matched will be copied into
323 * the area pointed to by 'name'.
324 * \| - alternation
325 * \( \) - grouping used mostly for alternation and
326 * optionality
328 * The irregular expression must be translated to internal form
329 * prior to calling this routine
331 * The value returned is the pointer to the first non \a
332 * character matched.
336 s: string to check for a match in
337 re: a converted irregular expression
338 mstring: where to put whatever matches a \p
340 char *
341 expmatch(char *s, char *re, char *mstring)
343 char *cs; /* the current symbol */
344 char *ptr,*s1; /* temporary pointer */
345 boolean matched; /* a temporary boolean */
347 /* initial conditions */
348 if (re == NIL)
349 return (NIL);
350 cs = re;
351 matched = FALSE;
353 /* loop till expression string is exhausted (or at least pretty tired) */
354 while (*cs) {
355 switch (*cs & (OPER | STR | META)) {
357 /* try to match a string */
358 case STR:
359 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
360 if (matched) {
362 /* hoorah it matches */
363 s += SCNT(cs);
364 cs = SNEXT(cs);
365 } else if (*cs & ALT) {
367 /* alternation, skip to next expression */
368 cs = SNEXT(cs);
369 } else if (*cs & OPT) {
371 /* the match is optional */
372 cs = SNEXT(cs);
373 matched = 1; /* indicate a successful match */
374 } else {
376 /* no match, error return */
377 return (NIL);
379 break;
381 /* an operator, do something fancy */
382 case OPER:
383 switch (OSYM(cs)) {
385 /* this is an alternation */
386 case '|':
387 if (matched)
389 /* last thing in the alternation was a match, skip ahead */
390 cs = OPTR(cs);
391 else
393 /* no match, keep trying */
394 cs = ONEXT(cs);
395 break;
397 /* this is a grouping, recurse */
398 case '(':
399 ptr = expmatch (s, ONEXT(cs), mstring);
400 if (ptr != NIL) {
402 /* the subexpression matched */
403 matched = 1;
404 s = ptr;
405 } else if (*cs & ALT) {
407 /* alternation, skip to next expression */
408 matched = 0;
409 } else if (*cs & OPT) {
411 /* the match is optional */
412 matched = 1; /* indicate a successful match */
413 } else {
415 /* no match, error return */
416 return (NIL);
418 cs = OPTR(cs);
419 break;
421 break;
423 /* try to match a metasymbol */
424 case META:
425 switch (MSYM(cs)) {
427 /* try to match anything and remember what was matched */
428 case 'p':
430 * This is really the same as trying the match the
431 * remaining parts of the expression to any subset
432 * of the string.
434 s1 = s;
435 do {
436 ptr = expmatch (s1, MNEXT(cs), mstring);
437 if (ptr != NIL && s1 != s) {
439 /* we have a match, remember the match */
440 strncpy (mstring, s, s1 - s);
441 mstring[s1 - s] = '\0';
442 return (ptr);
443 } else if (ptr != NIL && (*cs & OPT)) {
445 /* it was aoptional so no match is ok */
446 return (ptr);
447 } else if (ptr != NIL) {
449 /* not optional and we still matched */
450 return (NIL);
452 if (!(isalnum(*s1) || *s1 == '_' ||
453 /* C++ destructor */
454 *s1 == '~' ||
455 /* C++ scope operator */
456 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
457 (s1++, TRUE))))
458 return (NIL);
459 if (*s1 == '\\')
460 _escaped = _escaped ? FALSE : TRUE;
461 else
462 _escaped = FALSE;
463 } while (*s1++);
464 return (NIL);
466 /* try to match anything */
467 case 'a':
469 * This is really the same as trying the match the
470 * remaining parts of the expression to any subset
471 * of the string.
473 s1 = s;
474 do {
475 ptr = expmatch (s1, MNEXT(cs), mstring);
476 if (ptr != NIL && s1 != s) {
478 /* we have a match */
479 return (ptr);
480 } else if (ptr != NIL && (*cs & OPT)) {
482 /* it was aoptional so no match is ok */
483 return (ptr);
484 } else if (ptr != NIL) {
486 /* not optional and we still matched */
487 return (NIL);
489 if (*s1 == '\\')
490 _escaped = _escaped ? FALSE : TRUE;
491 else
492 _escaped = FALSE;
493 } while (*s1++);
494 return (NIL);
496 /* fail if we are currently _escaped */
497 case 'e':
498 if (_escaped)
499 return(NIL);
500 cs = MNEXT(cs);
501 break;
503 /* match any number of tabs and spaces */
504 case 'd':
505 ptr = s;
506 while (*s == ' ' || *s == '\t')
507 s++;
508 if (s != ptr || s == s_start) {
510 /* match, be happy */
511 matched = 1;
512 cs = MNEXT(cs);
513 } else if (*s == '\n' || *s == '\0') {
515 /* match, be happy */
516 matched = 1;
517 cs = MNEXT(cs);
518 } else if (*cs & ALT) {
520 /* try the next part */
521 matched = 0;
522 cs = MNEXT(cs);
523 } else if (*cs & OPT) {
525 /* doesn't matter */
526 matched = 1;
527 cs = MNEXT(cs);
528 } else
530 /* no match, error return */
531 return (NIL);
532 break;
534 /* check for end of line */
535 case '$':
536 if (*s == '\0' || *s == '\n') {
538 /* match, be happy */
539 s++;
540 matched = 1;
541 cs = MNEXT(cs);
542 } else if (*cs & ALT) {
544 /* try the next part */
545 matched = 0;
546 cs = MNEXT(cs);
547 } else if (*cs & OPT) {
549 /* doesn't matter */
550 matched = 1;
551 cs = MNEXT(cs);
552 } else
554 /* no match, error return */
555 return (NIL);
556 break;
558 /* check for start of line */
559 case '^':
560 if (s == s_start) {
562 /* match, be happy */
563 matched = 1;
564 cs = MNEXT(cs);
565 } else if (*cs & ALT) {
567 /* try the next part */
568 matched = 0;
569 cs = MNEXT(cs);
570 } else if (*cs & OPT) {
572 /* doesn't matter */
573 matched = 1;
574 cs = MNEXT(cs);
575 } else
577 /* no match, error return */
578 return (NIL);
579 break;
581 /* end of a subexpression, return success */
582 case ')':
583 return (s);
585 break;
588 return (s);