4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
19 #include "general.h" /* must always come first */
27 # ifdef HAVE_SYS_TYPES_H
28 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
33 # include "gnuregex.h"
48 /* Back-references \0 through \9 */
49 #define BACK_REFERENCE_COUNT 10
51 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
55 #define REGEX_NAME "Regex"
60 #if defined (POSIX_REGEX)
69 enum pType
{ PTRN_TAG
, PTRN_CALLBACK
};
80 regexCallback function
;
88 regexPattern
*patterns
;
96 static boolean regexBroken
= FALSE
;
98 /* Array of pattern sets, indexed by language */
99 static patternSet
* Sets
= NULL
;
100 static int SetUpper
= -1; /* upper language index in list */
103 * FUNCTION DEFINITIONS
106 static void clearPatternSet (const langType language
)
108 if (language
<= SetUpper
)
110 patternSet
* const set
= Sets
+ language
;
112 for (i
= 0 ; i
< set
->count
; ++i
)
114 regexPattern
*p
= &set
->patterns
[i
];
115 #if defined (POSIX_REGEX)
116 regfree (p
->pattern
);
121 if (p
->type
== PTRN_TAG
)
123 eFree (p
->u
.tag
.name_pattern
);
124 p
->u
.tag
.name_pattern
= NULL
;
125 eFree (p
->u
.tag
.kind
.name
);
126 p
->u
.tag
.kind
.name
= NULL
;
127 if (p
->u
.tag
.kind
.description
!= NULL
)
129 eFree (p
->u
.tag
.kind
.description
);
130 p
->u
.tag
.kind
.description
= NULL
;
134 if (set
->patterns
!= NULL
)
135 eFree (set
->patterns
);
136 set
->patterns
= NULL
;
142 * Regex psuedo-parser
145 static void makeRegexTag (
146 const vString
* const name
, const struct sKind
* const kind
)
151 Assert (name
!= NULL
&& vStringLength (name
) > 0);
152 Assert (kind
!= NULL
);
153 initTagEntry (&e
, vStringValue (name
));
154 e
.kind
= kind
->letter
;
155 e
.kindName
= kind
->name
;
161 * Regex pattern definition
164 /* Take a string like "/blah/" and turn it into "blah", making sure
165 * that the first and last characters are the same, and handling
166 * quoted separator characters. Actually, stops on the occurrence of
167 * an unquoted separator. Also turns "\t" into a Tab character.
168 * Returns pointer to terminating separator. Works in place. Null
169 * terminates name string.
171 static char* scanSeparators (char* name
)
175 boolean quoted
= FALSE
;
177 for (++name
; *name
!= '\0' ; ++name
)
183 else if (*name
== 't')
187 /* Something else is quoted, so preserve the quote. */
193 else if (*name
== '\\')
195 else if (*name
== sep
)
206 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
207 * character is whatever the first character of `regexp' is), by breaking it
208 * up into null terminated strings, removing the separators, and expanding
209 * '\t' into tabs. When complete, `regexp' points to the line matching
210 * pattern, a pointer to the name matching pattern is written to `name', a
211 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
212 * to the trailing flags is written to `flags'. If the pattern is not in the
213 * correct format, a false value is returned.
215 static boolean
parseTagRegex (
216 char* const regexp
, char** const name
,
217 char** const kinds
, char** const flags
)
219 boolean result
= FALSE
;
220 const int separator
= (unsigned char) regexp
[0];
222 *name
= scanSeparators (regexp
);
224 printf ("regex: empty regexp\n");
225 else if (**name
!= separator
)
226 printf ("regex: %s: incomplete regexp\n", regexp
);
229 char* const third
= scanSeparators (*name
);
231 printf ("regex: %s: regexp missing name pattern\n", regexp
);
232 if ((*name
) [strlen (*name
) - 1] == '\\')
233 printf ("regex: error in name pattern: \"%s\"\n", *name
);
234 if (*third
!= separator
)
235 printf ("regex: %s: regexp missing final separator\n", regexp
);
238 char* const fourth
= scanSeparators (third
);
239 if (*fourth
== separator
)
242 scanSeparators (fourth
);
256 static void addCompiledTagPattern (
257 const langType language
, regex_t
* const pattern
,
258 char* const name
, const char kind
, char* const kindName
,
259 char *const description
)
263 if (language
> SetUpper
)
266 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
267 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
269 Sets
[i
].patterns
= NULL
;
274 set
= Sets
+ language
;
275 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
276 ptrn
= &set
->patterns
[set
->count
];
279 ptrn
->pattern
= pattern
;
280 ptrn
->type
= PTRN_TAG
;
281 ptrn
->u
.tag
.name_pattern
= name
;
282 ptrn
->u
.tag
.kind
.enabled
= TRUE
;
283 ptrn
->u
.tag
.kind
.letter
= kind
;
284 ptrn
->u
.tag
.kind
.name
= kindName
;
285 ptrn
->u
.tag
.kind
.description
= description
;
288 static void addCompiledCallbackPattern (
289 const langType language
, regex_t
* const pattern
,
290 const regexCallback callback
)
294 if (language
> SetUpper
)
297 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
298 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
300 Sets
[i
].patterns
= NULL
;
305 set
= Sets
+ language
;
306 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
307 ptrn
= &set
->patterns
[set
->count
];
310 ptrn
->pattern
= pattern
;
311 ptrn
->type
= PTRN_CALLBACK
;
312 ptrn
->u
.callback
.function
= callback
;
315 #if defined (POSIX_REGEX)
317 static regex_t
* compileRegex (const char* const regexp
, const char* const flags
)
319 int cflags
= REG_EXTENDED
| REG_NEWLINE
;
320 regex_t
*result
= NULL
;
323 for (i
= 0 ; flags
!= NULL
&& flags
[i
] != '\0' ; ++i
)
325 switch ((int) flags
[i
])
327 case 'b': cflags
&= ~REG_EXTENDED
; break;
328 case 'e': cflags
|= REG_EXTENDED
; break;
329 case 'i': cflags
|= REG_ICASE
; break;
330 default: printf ("regex: unknown regex flag: '%c'\n", *flags
); break;
333 result
= xMalloc (1, regex_t
);
334 errcode
= regcomp (result
, regexp
, cflags
);
338 regerror (errcode
, result
, errmsg
, 256);
339 printf ("regex: regcomp %s: %s\n", regexp
, errmsg
);
349 static void parseKinds (
350 const char* const kinds
, char* const kind
, char** const kindName
,
356 if (kinds
== NULL
|| kinds
[0] == '\0')
359 *kindName
= eStrdup ("regex");
361 else if (kinds
[0] != '\0')
363 const char* k
= kinds
;
364 if (k
[0] != ',' && (k
[1] == ',' || k
[1] == '\0'))
371 *kindName
= eStrdup ("regex");
374 const char *const comma
= strchr (k
, ',');
376 *kindName
= eStrdup (k
);
379 *kindName
= (char*) eMalloc (comma
- k
+ 1);
380 strncpy (*kindName
, k
, comma
- k
);
381 (*kindName
) [comma
- k
] = '\0';
384 *description
= eStrdup (k
);
390 static void printRegexKind (const regexPattern
*pat
, unsigned int i
, boolean indent
)
392 const struct sKind
*const kind
= &pat
[i
].u
.tag
.kind
;
393 const char *const indentation
= indent
? " " : "";
394 Assert (pat
[i
].type
== PTRN_TAG
);
395 printf ("%s%c %s %s\n", indentation
,
396 kind
->letter
!= '\0' ? kind
->letter
: '?',
397 kind
->description
!= NULL
? kind
->description
: kind
->name
,
398 kind
->enabled
? "" : " [off]");
401 static void processLanguageRegex (const langType language
,
402 const char* const parameter
)
404 if (parameter
== NULL
|| parameter
[0] == '\0')
405 clearPatternSet (language
);
406 else if (parameter
[0] != '@')
407 addLanguageRegex (language
, parameter
);
408 else if (! doesFileExist (parameter
+ 1))
409 printf ("regex: cannot open regex file\n");
412 const char* regexfile
= parameter
+ 1;
413 FILE* const fp
= fopen (regexfile
, "r");
415 printf ("regex: %s\n", regexfile
);
418 vString
* const regex
= vStringNew ();
419 while (readLine (regex
, fp
))
420 addLanguageRegex (language
, vStringValue (regex
));
422 vStringDelete (regex
);
428 * Regex pattern matching
431 #if defined (POSIX_REGEX)
433 static vString
* substitute (
434 const char* const in
, const char* out
,
435 const int nmatch
, const regmatch_t
* const pmatch
)
437 vString
* result
= vStringNew ();
439 for (p
= out
; *p
!= '\0' ; p
++)
441 if (*p
== '\\' && isdigit ((int) *++p
))
443 const int dig
= *p
- '0';
444 if (0 < dig
&& dig
< nmatch
&& pmatch
[dig
].rm_so
!= -1)
446 const int diglen
= pmatch
[dig
].rm_eo
- pmatch
[dig
].rm_so
;
447 vStringNCatS (result
, in
+ pmatch
[dig
].rm_so
, diglen
);
450 else if (*p
!= '\n' && *p
!= '\r')
451 vStringPut (result
, *p
);
453 vStringTerminate (result
);
457 static void matchTagPattern (const vString
* const line
,
458 const regexPattern
* const patbuf
,
459 const regmatch_t
* const pmatch
)
461 vString
*const name
= substitute (vStringValue (line
),
462 patbuf
->u
.tag
.name_pattern
, BACK_REFERENCE_COUNT
, pmatch
);
463 vStringStripLeading (name
);
464 vStringStripTrailing (name
);
465 if (vStringLength (name
) > 0)
466 makeRegexTag (name
, &patbuf
->u
.tag
.kind
);
468 error (WARNING
, "%s:%ld: null expansion of name pattern \"%s\"",
469 getInputFileName (), getInputLineNumber (),
470 patbuf
->u
.tag
.name_pattern
);
471 vStringDelete (name
);
474 static void matchCallbackPattern (
475 const vString
* const line
, const regexPattern
* const patbuf
,
476 const regmatch_t
* const pmatch
)
478 regexMatch matches
[BACK_REFERENCE_COUNT
];
479 unsigned int count
= 0;
481 for (i
= 0 ; i
< BACK_REFERENCE_COUNT
&& pmatch
[i
].rm_so
!= -1 ; ++i
)
483 matches
[i
].start
= pmatch
[i
].rm_so
;
484 matches
[i
].length
= pmatch
[i
].rm_eo
- pmatch
[i
].rm_so
;
487 patbuf
->u
.callback
.function (vStringValue (line
), matches
, count
);
490 static boolean
matchRegexPattern (const vString
* const line
,
491 const regexPattern
* const patbuf
)
493 boolean result
= FALSE
;
494 regmatch_t pmatch
[BACK_REFERENCE_COUNT
];
495 const int match
= regexec (patbuf
->pattern
, vStringValue (line
),
496 BACK_REFERENCE_COUNT
, pmatch
, 0);
500 if (patbuf
->type
== PTRN_TAG
)
501 matchTagPattern (line
, patbuf
, pmatch
);
502 else if (patbuf
->type
== PTRN_CALLBACK
)
503 matchCallbackPattern (line
, patbuf
, pmatch
);
506 Assert ("invalid pattern type" == NULL
);
515 /* PUBLIC INTERFACE */
517 /* Match against all patterns for specified language. Returns true if at least
518 * on pattern matched.
520 extern boolean
matchRegex (const vString
* const line
, const langType language
)
522 boolean result
= FALSE
;
523 if (language
!= LANG_IGNORE
&& language
<= SetUpper
&&
524 Sets
[language
].count
> 0)
526 const patternSet
* const set
= Sets
+ language
;
528 for (i
= 0 ; i
< set
->count
; ++i
)
529 if (matchRegexPattern (line
, set
->patterns
+ i
))
535 extern void findRegexTags (void)
537 /* merely read all lines of the file */
538 while (fileReadLine () != NULL
)
542 #endif /* HAVE_REGEX */
544 extern void addTagRegex (
545 const langType language __unused__
,
546 const char* const regex __unused__
,
547 const char* const name __unused__
,
548 const char* const kinds __unused__
,
549 const char* const flags __unused__
)
552 Assert (regex
!= NULL
);
553 Assert (name
!= NULL
);
556 regex_t
* const cp
= compileRegex (regex
, flags
);
562 parseKinds (kinds
, &kind
, &kindName
, &description
);
563 addCompiledTagPattern (language
, cp
, eStrdup (name
),
564 kind
, kindName
, description
);
570 extern void addCallbackRegex (
571 const langType language __unused__
,
572 const char* const regex __unused__
,
573 const char* const flags __unused__
,
574 const regexCallback callback __unused__
)
577 Assert (regex
!= NULL
);
580 regex_t
* const cp
= compileRegex (regex
, flags
);
582 addCompiledCallbackPattern (language
, cp
, callback
);
587 extern void addLanguageRegex (
588 const langType language __unused__
, const char* const regex __unused__
)
593 char *const regex_pat
= eStrdup (regex
);
594 char *name
, *kinds
, *flags
;
595 if (parseTagRegex (regex_pat
, &name
, &kinds
, &flags
))
597 addTagRegex (language
, regex_pat
, name
, kinds
, flags
);
605 * Regex option parsing
608 extern boolean
processRegexOption (const char *const option
,
609 const char *const parameter __unused__
)
611 boolean handled
= FALSE
;
612 const char* const dash
= strchr (option
, '-');
613 if (dash
!= NULL
&& strncmp (option
, "regex", dash
- option
) == 0)
617 language
= getNamedLanguage (dash
+ 1);
618 if (language
== LANG_IGNORE
)
619 printf ("regex: unknown language \"%s\" in --%s option\n", (dash
+ 1), option
);
621 processLanguageRegex (language
, parameter
);
623 printf ("regex: regex support not available; required for --%s option\n",
631 extern void disableRegexKinds (const langType language __unused__
)
634 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
636 patternSet
* const set
= Sets
+ language
;
638 for (i
= 0 ; i
< set
->count
; ++i
)
639 if (set
->patterns
[i
].type
== PTRN_TAG
)
640 set
->patterns
[i
].u
.tag
.kind
.enabled
= FALSE
;
645 extern boolean
enableRegexKind (
646 const langType language __unused__
,
647 const int kind __unused__
, const boolean mode __unused__
)
649 boolean result
= FALSE
;
651 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
653 patternSet
* const set
= Sets
+ language
;
655 for (i
= 0 ; i
< set
->count
; ++i
)
656 if (set
->patterns
[i
].type
== PTRN_TAG
&&
657 set
->patterns
[i
].u
.tag
.kind
.letter
== kind
)
659 set
->patterns
[i
].u
.tag
.kind
.enabled
= mode
;
667 extern void printRegexKinds (const langType language __unused__
, boolean indent __unused__
)
670 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
672 patternSet
* const set
= Sets
+ language
;
674 for (i
= 0 ; i
< set
->count
; ++i
)
675 if (set
->patterns
[i
].type
== PTRN_TAG
)
676 printRegexKind (set
->patterns
, i
, indent
);
681 extern void freeRegexResources (void)
685 for (i
= 0 ; i
<= SetUpper
; ++i
)
694 /* Check for broken regcomp() on Cygwin */
695 extern void checkRegex (void)
697 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
700 if (regcomp (&patbuf
, "/hello/", 0) != 0)
702 error (WARNING
, "Disabling broken regex");
708 /* vi:set tabstop=4 shiftwidth=4: */