3 * Copyright (c) 2000-2001, Darren Hiebert
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License.
8 * This module contains functions for applying regular expression matching.
10 * The code for utlizing the Gnu regex package with regards to processing the
11 * regex option and checking for regex matches was adapted from routines in
18 #include "general.h" /* must always come first */
22 #if defined (HAVE_REGCOMP) || defined (HAVE_RE_COMPILE_PATTERN)
25 # ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
43 /* Back-references \0 through \9 */
44 #define BACK_REFERENCE_COUNT 10
46 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
50 #define REGEX_NAME "Regex"
55 #if defined (POSIX_REGEX)
63 enum pType
{ PTRN_TAG
, PTRN_CALLBACK
};
74 regexCallback function
;
82 regexPattern
*patterns
;
90 static boolean regexBroken
= FALSE
;
92 /* Array of pattern sets, indexed by language */
93 static patternSet
* Sets
= NULL
;
94 static int SetUpper
= -1; /* upper language index in list */
97 * FUNCTION DEFINITIONS
100 static void clearPatternSet (const langType language
)
102 if (language
< SetUpper
)
104 patternSet
* const set
= Sets
+ language
;
106 for (i
= 0 ; i
< set
->count
; ++i
)
108 #if defined (POSIX_REGEX)
109 regfree (set
->patterns
[i
].pattern
);
111 eFree (set
->patterns
[i
].pattern
);
112 set
->patterns
[i
].pattern
= NULL
;
114 if (set
->patterns
[i
].type
== PTRN_TAG
)
116 eFree (set
->patterns
[i
].u
.tag
.name_pattern
);
117 set
->patterns
[i
].u
.tag
.name_pattern
= NULL
;
120 if (set
->patterns
!= NULL
)
121 eFree (set
->patterns
);
122 set
->patterns
= NULL
;
128 * Regex psuedo-parser
131 static void makeRegexTag (const vString
* const name
,
132 const struct sKind
* const kind
)
137 Assert (name
!= NULL
&& vStringLength (name
) > 0);
138 Assert (kind
!= NULL
);
139 initTagEntry (&e
, vStringValue (name
));
140 e
.kind
= kind
->letter
;
141 e
.kindName
= kind
->name
;
147 * Regex pattern definition
150 /* Take a string like "/blah/" and turn it into "blah", making sure
151 * that the first and last characters are the same, and handling
152 * quoted separator characters. Actually, stops on the occurrence of
153 * an unquoted separator. Also turns "\t" into a Tab character.
154 * Returns pointer to terminating separator. Works in place. Null
155 * terminates name string.
157 static char* scanSeparators (char* name
)
161 boolean quoted
= FALSE
;
163 for (++name
; *name
!= '\0' ; ++name
)
169 else if (*name
== 't')
173 /* Something else is quoted, so preserve the quote. */
179 else if (*name
== '\\')
181 else if (*name
== sep
)
192 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
193 * character is whatever the first character of `regexp' is), by breaking it
194 * up into null terminated strings, removing the separators, and expanding
195 * '\t' into tabs. When complete, `regexp' points to the line matching
196 * pattern, a pointer to the name matching pattern is written to `name', a
197 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
198 * to the trailing flags is written to `flags'. If the pattern is not in the
199 * correct format, a false value is returned.
201 static boolean
parseTagRegex (char* const regexp
, char** const name
,
202 char** const kinds
, char** const flags
)
204 boolean result
= FALSE
;
205 const int separator
= (unsigned char) regexp
[0];
207 *name
= scanSeparators (regexp
);
209 error (WARNING
, "empty regexp");
210 else if (**name
!= separator
)
211 error (WARNING
, "%s: incomplete regexp", regexp
);
214 char* const third
= scanSeparators (*name
);
216 error (WARNING
, "%s: regexp missing name pattern", regexp
);
217 if ((*name
) [strlen (*name
) - 1] == '\\')
218 error (WARNING
, "error in name pattern: \"%s\"", *name
);
219 if (*third
!= separator
)
220 error (WARNING
, "%s: regexp missing final separator", regexp
);
223 char* const fourth
= scanSeparators (third
);
224 if (*fourth
== separator
)
227 scanSeparators (fourth
);
241 static void addCompiledTagPattern (const langType language
,
242 regex_t
* const pattern
, char* const name
,
243 const char kind
, char* const kindName
)
247 if (language
> SetUpper
)
250 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
251 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
253 Sets
[i
].patterns
= NULL
;
258 set
= Sets
+ language
;
259 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
260 ptrn
= &set
->patterns
[set
->count
];
263 ptrn
->pattern
= pattern
;
264 ptrn
->type
= PTRN_TAG
;
265 ptrn
->u
.tag
.name_pattern
= name
;
266 ptrn
->u
.tag
.kind
.enabled
= TRUE
;
267 ptrn
->u
.tag
.kind
.letter
= kind
;
268 ptrn
->u
.tag
.kind
.name
= kindName
;
271 static void addCompiledCallbackPattern (const langType language
,
272 regex_t
* const pattern
,
273 const regexCallback callback
)
277 if (language
> SetUpper
)
280 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
281 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
283 Sets
[i
].patterns
= NULL
;
288 set
= Sets
+ language
;
289 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
290 ptrn
= &set
->patterns
[set
->count
];
293 ptrn
->pattern
= pattern
;
294 ptrn
->type
= PTRN_CALLBACK
;
295 ptrn
->u
.callback
.function
= callback
;
298 #if defined (POSIX_REGEX)
300 static regex_t
* compileRegex (const char* const regexp
, const char* const flags
)
302 int cflags
= REG_EXTENDED
| REG_NEWLINE
;
303 regex_t
*result
= NULL
;
306 for (i
= 0 ; flags
!= NULL
&& flags
[i
] != '\0' ; ++i
)
308 switch ((int) flags
[i
])
310 case 'b': cflags
&= ~REG_EXTENDED
; break;
311 case 'e': cflags
|= REG_EXTENDED
; break;
312 case 'i': cflags
|= REG_ICASE
; break;
313 default: error (WARNING
, "unknown regex flag: '%c'", *flags
); break;
316 result
= xMalloc (1, regex_t
);
317 errcode
= regcomp (result
, regexp
, cflags
);
321 regerror (errcode
, result
, errmsg
, 256);
322 error (WARNING
, "%s", errmsg
);
332 static void parseKinds (const char* const kinds
,
333 char* const kind
, char** const kindName
)
340 *kindName
= eStrdup ("regex");
342 else if (kinds
[0] != '\0')
344 const char* k
= kinds
;
345 if (k
[1] == ',' || k
[1] == '\0')
350 *kindName
= eStrdup (k
);
354 static void printRegexKindOption (const regexPattern
*pat
, unsigned int i
)
356 const struct sKind
*const kind
= &pat
[i
].u
.tag
.kind
;
357 Assert (pat
[i
].type
== PTRN_TAG
);
358 printf (" %c %s (regex %d)%s\n",
359 kind
->letter
!= '\0' ? kind
->letter
: '?',
360 kind
->name
!= NULL
? kind
->name
: "Regex pattern",
361 i
+ 1, kind
->enabled
? "" : " [off]");
364 static void processLanguageRegex (const langType language
,
365 const char* const parameter
)
367 if (parameter
== NULL
|| parameter
[0] == '\0')
368 clearPatternSet (language
);
369 else if (parameter
[0] != '@')
370 addLanguageRegex (language
, parameter
);
371 else if (! doesFileExist (parameter
+ 1))
372 error (WARNING
, "cannot open regex file");
375 const char* regexfile
= parameter
+ 1;
376 FILE* const fp
= fopen (regexfile
, "r");
378 error (WARNING
| PERROR
, regexfile
);
381 vString
* const regex
= vStringNew ();
382 while (readLine (regex
, fp
))
383 addLanguageRegex (language
, vStringValue (regex
));
385 vStringDelete (regex
);
391 * Regex pattern matching
394 #if defined (POSIX_REGEX)
396 static vString
* substitute (const char* const in
, const char* out
,
397 const int nmatch
, const regmatch_t
* const pmatch
)
399 vString
* result
= vStringNew ();
401 for (p
= out
; *p
!= '\0' ; p
++)
403 if (*p
== '\\' && isdigit ((int) *++p
))
405 const int dig
= *p
- '0';
406 if (0 < dig
&& dig
< nmatch
&& pmatch
[dig
].rm_so
!= -1)
408 const int diglen
= pmatch
[dig
].rm_eo
- pmatch
[dig
].rm_so
;
409 vStringNCatS (result
, in
+ pmatch
[dig
].rm_so
, diglen
);
412 else if (*p
!= '\n' && *p
!= '\r')
413 vStringPut (result
, *p
);
415 vStringTerminate (result
);
419 static void matchTagPattern (const vString
* const line
,
420 const regexPattern
* const patbuf
,
421 const regmatch_t
* const pmatch
)
423 vString
*const name
= substitute (vStringValue (line
),
424 patbuf
->u
.tag
.name_pattern
, BACK_REFERENCE_COUNT
, pmatch
);
425 vStringStripLeading (name
);
426 vStringStripTrailing (name
);
427 if (vStringLength (name
) > 0)
428 makeRegexTag (name
, &patbuf
->u
.tag
.kind
);
430 error (WARNING
, "%s:%ld: null expansion of name pattern \"%s\"",
431 getInputFileName (), getInputLineNumber (),
432 patbuf
->u
.tag
.name_pattern
);
433 vStringDelete (name
);
436 static void matchCallbackPattern (const vString
* const line
,
437 const regexPattern
* const patbuf
,
438 const regmatch_t
* const pmatch
)
440 regexMatch matches
[BACK_REFERENCE_COUNT
];
441 unsigned int count
= 0;
443 for (i
= 0 ; i
< BACK_REFERENCE_COUNT
&& pmatch
[i
].rm_so
!= -1 ; ++i
)
445 matches
[i
].start
= pmatch
[i
].rm_so
;
446 matches
[i
].length
= pmatch
[i
].rm_eo
- pmatch
[i
].rm_so
;
449 patbuf
->u
.callback
.function (vStringValue (line
), matches
, count
);
452 static void matchRegexPattern (const vString
* const line
,
453 const regexPattern
* const patbuf
)
455 regmatch_t pmatch
[BACK_REFERENCE_COUNT
];
456 const int match
= regexec (patbuf
->pattern
, vStringValue (line
),
457 BACK_REFERENCE_COUNT
, pmatch
, 0);
460 if (patbuf
->type
== PTRN_TAG
)
461 matchTagPattern (line
, patbuf
, pmatch
);
462 else if (patbuf
->type
== PTRN_CALLBACK
)
463 matchCallbackPattern (line
, patbuf
, pmatch
);
469 /* PUBLIC INTERFACE */
471 /* Match against all patterns for specified language. */
472 extern void matchRegex (const vString
* const line
, const langType language
)
474 if (language
!= LANG_IGNORE
&& language
<= SetUpper
&&
475 Sets
[language
].count
> 0)
477 const patternSet
* const set
= Sets
+ language
;
479 for (i
= 0 ; i
< set
->count
; ++i
)
480 matchRegexPattern (line
, set
->patterns
+ i
);
484 extern void findRegexTags (void)
486 /* merely read all lines of the file */
487 while (fileReadLine () != NULL
)
491 #endif /* HAVE_REGEX */
493 extern void addTagRegex (const langType __unused__ language
,
494 const char* const __unused__ regex
,
495 const char* const __unused__ name
,
496 const char* const __unused__ kinds
,
497 const char* const __unused__ flags
)
500 Assert (regex
!= NULL
);
501 Assert (name
!= NULL
);
504 regex_t
* const cp
= compileRegex (regex
, flags
);
509 parseKinds (kinds
, &kind
, &kindName
);
510 addCompiledTagPattern (language
, cp
, eStrdup (name
),
517 extern void addCallbackRegex (const langType __unused__ language
,
518 const char* const __unused__ regex
,
519 const char* const __unused__ flags
,
520 const regexCallback __unused__ callback
)
523 Assert (regex
!= NULL
);
526 regex_t
* const cp
= compileRegex (regex
, flags
);
528 addCompiledCallbackPattern (language
, cp
, callback
);
533 extern void addLanguageRegex (const langType __unused__ language
,
534 const char* const __unused__ regex
)
539 char *const regex_pat
= eStrdup (regex
);
540 char *name
, *kinds
, *flags
;
541 if (parseTagRegex (regex_pat
, &name
, &kinds
, &flags
))
543 addTagRegex (language
, regex_pat
, name
, kinds
, flags
);
551 * Regex option parsing
554 extern boolean
processRegexOption (const char *const option
,
555 const char *const __unused__ parameter
)
557 boolean handled
= FALSE
;
558 const char* const dash
= strchr (option
, '-');
559 if (dash
!= NULL
&& strncmp (option
, "regex", dash
- option
) == 0)
563 language
= getNamedLanguage (dash
+ 1);
564 if (language
== LANG_IGNORE
)
565 error (WARNING
, "unknown language in --%s option", option
);
567 processLanguageRegex (language
, parameter
);
569 error (WARNING
, "regex support not available; required for --%s option",
577 extern void disableRegexKinds (const langType __unused__ language
)
580 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
582 patternSet
* const set
= Sets
+ language
;
584 for (i
= 0 ; i
< set
->count
; ++i
)
585 if (set
->patterns
[i
].type
== PTRN_TAG
)
586 set
->patterns
[i
].u
.tag
.kind
.enabled
= FALSE
;
591 extern boolean
enableRegexKind (const langType __unused__ language
,
592 const int __unused__ kind
,
593 const boolean __unused__ mode
)
595 boolean result
= FALSE
;
597 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
599 patternSet
* const set
= Sets
+ language
;
601 for (i
= 0 ; i
< set
->count
; ++i
)
602 if (set
->patterns
[i
].type
== PTRN_TAG
&&
603 set
->patterns
[i
].u
.tag
.kind
.letter
== kind
)
605 set
->patterns
[i
].u
.tag
.kind
.enabled
= mode
;
613 extern void printRegexKindOptions (const langType __unused__ language
)
616 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
618 patternSet
* const set
= Sets
+ language
;
620 for (i
= 0 ; i
< set
->count
; ++i
)
621 if (set
->patterns
[i
].type
== PTRN_TAG
)
622 printRegexKindOption (set
->patterns
, i
);
627 extern void freeRegexResources (void)
631 for (i
= 0 ; i
<= SetUpper
; ++i
)
640 /* Check for broken regcomp() on Cygwin */
641 extern void checkRegex (void)
643 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
646 if (regcomp (&patbuf
, "/hello/", 0) != 0)
648 error (WARNING
, "Disabling broken regex");
654 /* vi:set tabstop=8 shiftwidth=4: */