2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for applying regular expression matching.
9 * The code for utlizing the Gnu regex package with regards to processing the
10 * regex option and checking for regex matches was adapted from routines in
17 #include "general.h" /* must always come first */
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
42 /* Back-references \0 through \9 */
43 #define BACK_REFERENCE_COUNT 10
45 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
49 #define REGEX_NAME "Regex"
54 #if defined (POSIX_REGEX)
63 enum pType
{ PTRN_TAG
, PTRN_CALLBACK
};
74 regexCallback function
;
82 regexPattern
*patterns
;
90 static boolean regexBroken
= FALSE
;
92 /* Array of pattern sets, indexed by language */
93 static patternSet
* Sets
= NULL
;
94 static int SetUpper
= -1; /* upper language index in list */
97 * FUNCTION DEFINITIONS
100 static void clearPatternSet (const langType language
)
102 if (language
<= SetUpper
)
104 patternSet
* const set
= Sets
+ language
;
106 for (i
= 0 ; i
< set
->count
; ++i
)
108 regexPattern
*p
= &set
->patterns
[i
];
109 g_regex_unref(p
->pattern
);
112 if (p
->type
== PTRN_TAG
)
114 eFree (p
->u
.tag
.name_pattern
);
115 p
->u
.tag
.name_pattern
= NULL
;
116 eFree (p
->u
.tag
.kind
.name
);
117 p
->u
.tag
.kind
.name
= NULL
;
118 if (p
->u
.tag
.kind
.description
!= NULL
)
120 eFree (p
->u
.tag
.kind
.description
);
121 p
->u
.tag
.kind
.description
= NULL
;
125 if (set
->patterns
!= NULL
)
126 eFree (set
->patterns
);
127 set
->patterns
= NULL
;
133 * Regex psuedo-parser
136 static void makeRegexTag (
137 const vString
* const name
, const struct sKind
* const kind
)
139 Assert (kind
!= NULL
);
143 Assert (name
!= NULL
&& vStringLength (name
) > 0);
144 initTagEntry (&e
, vStringValue (name
));
145 e
.kind
= kind
->letter
;
146 e
.kindName
= kind
->name
;
152 * Regex pattern definition
155 /* Take a string like "/blah/" and turn it into "blah", making sure
156 * that the first and last characters are the same, and handling
157 * quoted separator characters. Actually, stops on the occurrence of
158 * an unquoted separator. Also turns "\t" into a Tab character.
159 * Returns pointer to terminating separator. Works in place. Null
160 * terminates name string.
162 static char* scanSeparators (char* name
)
166 boolean quoted
= FALSE
;
168 for (++name
; *name
!= '\0' ; ++name
)
174 else if (*name
== 't')
178 /* Something else is quoted, so preserve the quote. */
184 else if (*name
== '\\')
186 else if (*name
== sep
)
197 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
198 * character is whatever the first character of `regexp' is), by breaking it
199 * up into null terminated strings, removing the separators, and expanding
200 * '\t' into tabs. When complete, `regexp' points to the line matching
201 * pattern, a pointer to the name matching pattern is written to `name', a
202 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
203 * to the trailing flags is written to `flags'. If the pattern is not in the
204 * correct format, a false value is returned.
206 static boolean
parseTagRegex (
207 char* const regexp
, char** const name
,
208 char** const kinds
, char** const flags
)
210 boolean result
= FALSE
;
211 const int separator
= (unsigned char) regexp
[0];
213 *name
= scanSeparators (regexp
);
215 printf ("regex: empty regexp\n");
216 else if (**name
!= separator
)
217 printf ("regex: %s: incomplete regexp\n", regexp
);
220 char* const third
= scanSeparators (*name
);
222 printf ("regex: %s: regexp missing name pattern\n", regexp
);
223 if ((*name
) [strlen (*name
) - 1] == '\\')
224 printf ("regex: error in name pattern: \"%s\"\n", *name
);
225 if (*third
!= separator
)
226 printf ("regex: %s: regexp missing final separator\n", regexp
);
229 char* const fourth
= scanSeparators (third
);
230 if (*fourth
== separator
)
233 scanSeparators (fourth
);
247 static void addCompiledTagPattern (
248 const langType language
, GRegex
* const pattern
,
249 char* const name
, const char kind
, char* const kindName
,
250 char *const description
)
254 if (language
> SetUpper
)
257 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
258 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
260 Sets
[i
].patterns
= NULL
;
265 set
= Sets
+ language
;
266 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
267 ptrn
= &set
->patterns
[set
->count
];
270 ptrn
->pattern
= pattern
;
271 ptrn
->type
= PTRN_TAG
;
272 ptrn
->u
.tag
.name_pattern
= name
;
273 ptrn
->u
.tag
.kind
.enabled
= TRUE
;
274 ptrn
->u
.tag
.kind
.letter
= kind
;
275 ptrn
->u
.tag
.kind
.name
= kindName
;
276 ptrn
->u
.tag
.kind
.description
= description
;
279 static void addCompiledCallbackPattern (
280 const langType language
, GRegex
* const pattern
,
281 const regexCallback callback
)
285 if (language
> SetUpper
)
288 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
289 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
291 Sets
[i
].patterns
= NULL
;
296 set
= Sets
+ language
;
297 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
298 ptrn
= &set
->patterns
[set
->count
];
301 ptrn
->pattern
= pattern
;
302 ptrn
->type
= PTRN_CALLBACK
;
303 ptrn
->u
.callback
.function
= callback
;
306 #if defined (POSIX_REGEX)
308 static GRegex
* compileRegex (const char* const regexp
, const char* const flags
)
310 int cflags
= G_REGEX_MULTILINE
;
311 GRegex
*result
= NULL
;
312 GError
*error
= NULL
;
314 for (i
= 0 ; flags
!= NULL
&& flags
[i
] != '\0' ; ++i
)
316 switch ((int) flags
[i
])
318 case 'b': g_warning("CTags 'b' flag not supported by Geany!"); break;
320 case 'i': cflags
|= G_REGEX_CASELESS
; break;
321 default: printf ("regex: unknown regex flag: '%c'\n", *flags
); break;
324 result
= g_regex_new(regexp
, cflags
, 0, &error
);
327 printf ("regex: regcomp %s: %s\n", regexp
, error
->message
);
335 static void parseKinds (
336 const char* const kinds
, char* const kind
, char** const kindName
,
342 if (kinds
== NULL
|| kinds
[0] == '\0')
345 *kindName
= eStrdup ("regex");
347 else if (kinds
[0] != '\0')
349 const char* k
= kinds
;
350 if (k
[0] != ',' && (k
[1] == ',' || k
[1] == '\0'))
357 *kindName
= eStrdup ("regex");
360 const char *const comma
= strchr (k
, ',');
362 *kindName
= eStrdup (k
);
365 *kindName
= (char*) eMalloc (comma
- k
+ 1);
366 strncpy (*kindName
, k
, comma
- k
);
367 (*kindName
) [comma
- k
] = '\0';
370 *description
= eStrdup (k
);
376 static void printRegexKind (const regexPattern
*pat
, unsigned int i
, boolean indent
)
378 const struct sKind
*const kind
= &pat
[i
].u
.tag
.kind
;
379 const char *const indentation
= indent
? " " : "";
380 Assert (pat
[i
].type
== PTRN_TAG
);
381 printf ("%s%c %s %s\n", indentation
,
382 kind
->letter
!= '\0' ? kind
->letter
: '?',
383 kind
->description
!= NULL
? kind
->description
: kind
->name
,
384 kind
->enabled
? "" : " [off]");
387 static void processLanguageRegex (const langType language
,
388 const char* const parameter
)
390 if (parameter
== NULL
|| parameter
[0] == '\0')
391 clearPatternSet (language
);
392 else if (parameter
[0] != '@')
393 addLanguageRegex (language
, parameter
);
394 else if (! doesFileExist (parameter
+ 1))
395 printf ("regex: cannot open regex file\n");
398 const char* regexfile
= parameter
+ 1;
399 MIO
* const mio
= mio_new_file (regexfile
, "r");
401 printf ("regex: %s\n", regexfile
);
404 vString
* const regex
= vStringNew ();
405 while (readLine (regex
, mio
))
406 addLanguageRegex (language
, vStringValue (regex
));
408 vStringDelete (regex
);
414 * Regex pattern matching
417 #if defined (POSIX_REGEX)
419 static vString
* substitute (
420 const char* const in
, const char* out
,
421 const int nmatch
, const GMatchInfo
* const minfo
)
423 vString
* result
= vStringNew ();
425 for (p
= out
; *p
!= '\0' ; p
++)
427 if (*p
== '\\' && isdigit ((int) *++p
))
429 const int dig
= *p
- '0';
431 if (0 < dig
&& dig
< nmatch
&&
432 g_match_info_fetch_pos(minfo
, dig
, &so
, &eo
) && so
!= -1)
434 const int diglen
= eo
- so
;
435 vStringNCatS (result
, in
+ so
, diglen
);
438 else if (*p
!= '\n' && *p
!= '\r')
439 vStringPut (result
, *p
);
441 vStringTerminate (result
);
445 static void matchTagPattern (const vString
* const line
,
446 const regexPattern
* const patbuf
,
447 const GMatchInfo
* const minfo
)
449 vString
*const name
= substitute (vStringValue (line
),
450 patbuf
->u
.tag
.name_pattern
, BACK_REFERENCE_COUNT
, minfo
);
451 vStringStripLeading (name
);
452 vStringStripTrailing (name
);
453 if (vStringLength (name
) > 0)
454 makeRegexTag (name
, &patbuf
->u
.tag
.kind
);
456 error (WARNING
, "%s:%ld: null expansion of name pattern \"%s\"",
457 getInputFileName (), getInputLineNumber (),
458 patbuf
->u
.tag
.name_pattern
);
459 vStringDelete (name
);
462 static void matchCallbackPattern (
463 const vString
* const line
, const regexPattern
* const patbuf
,
464 const GMatchInfo
* const minfo
)
466 regexMatch matches
[BACK_REFERENCE_COUNT
];
467 unsigned int count
= 0;
469 for (i
= 0 ; i
< BACK_REFERENCE_COUNT
; ++i
)
471 int so
= -1, eo
= -1;
472 /* with GRegex we could get the real match count, but that might
473 * cause incompatibilities with CTags */
474 g_match_info_fetch_pos(minfo
, i
, &so
, &eo
);
475 matches
[i
].start
= so
;
476 matches
[i
].length
= eo
- so
;
477 /* a valid match may have both offsets == -1,
478 * e.g. (foo)*(bar) matching "bar" - see CTags bug 2970274.
479 * As POSIX regex doesn't seem to have a way to count matches,
480 * we return the count up to the last non-empty match. */
484 patbuf
->u
.callback
.function (vStringValue (line
), matches
, count
);
487 static boolean
matchRegexPattern (const vString
* const line
,
488 const regexPattern
* const patbuf
)
490 boolean result
= FALSE
;
492 if (g_regex_match(patbuf
->pattern
, vStringValue(line
), 0, &minfo
))
495 if (patbuf
->type
== PTRN_TAG
)
496 matchTagPattern (line
, patbuf
, minfo
);
497 else if (patbuf
->type
== PTRN_CALLBACK
)
498 matchCallbackPattern (line
, patbuf
, minfo
);
501 Assert ("invalid pattern type" == NULL
);
505 g_match_info_free(minfo
);
511 /* PUBLIC INTERFACE */
513 /* Match against all patterns for specified language. Returns true if at least
514 * on pattern matched.
516 extern boolean
matchRegex (const vString
* const line
, const langType language
)
518 boolean result
= FALSE
;
519 if (language
!= LANG_IGNORE
&& language
<= SetUpper
&&
520 Sets
[language
].count
> 0)
522 const patternSet
* const set
= Sets
+ language
;
524 for (i
= 0 ; i
< set
->count
; ++i
)
525 if (matchRegexPattern (line
, set
->patterns
+ i
))
531 extern void findRegexTags (void)
533 /* merely read all lines of the file */
534 while (fileReadLine () != NULL
)
538 #endif /* HAVE_REGEX */
540 extern void addTagRegex (
541 const langType language __unused__
,
542 const char* const regex __unused__
,
543 const char* const name __unused__
,
544 const char* const kinds __unused__
,
545 const char* const flags __unused__
)
548 Assert (regex
!= NULL
);
549 Assert (name
!= NULL
);
552 GRegex
* const cp
= compileRegex (regex
, flags
);
558 parseKinds (kinds
, &kind
, &kindName
, &description
);
559 addCompiledTagPattern (language
, cp
, eStrdup (name
),
560 kind
, kindName
, description
);
566 extern void addCallbackRegex (
567 const langType language __unused__
,
568 const char* const regex __unused__
,
569 const char* const flags __unused__
,
570 const regexCallback callback __unused__
)
573 Assert (regex
!= NULL
);
576 GRegex
* const cp
= compileRegex (regex
, flags
);
578 addCompiledCallbackPattern (language
, cp
, callback
);
583 extern void addLanguageRegex (
584 const langType language __unused__
, const char* const regex __unused__
)
589 char *const regex_pat
= eStrdup (regex
);
590 char *name
, *kinds
, *flags
;
591 if (parseTagRegex (regex_pat
, &name
, &kinds
, &flags
))
593 addTagRegex (language
, regex_pat
, name
, kinds
, flags
);
601 * Regex option parsing
604 extern boolean
processRegexOption (const char *const option
,
605 const char *const parameter __unused__
)
607 boolean handled
= FALSE
;
608 const char* const dash
= strchr (option
, '-');
609 if (dash
!= NULL
&& strncmp (option
, "regex", dash
- option
) == 0)
613 language
= getNamedLanguage (dash
+ 1);
614 if (language
== LANG_IGNORE
)
615 printf ("regex: unknown language \"%s\" in --%s option\n", (dash
+ 1), option
);
617 processLanguageRegex (language
, parameter
);
619 printf ("regex: regex support not available; required for --%s option\n",
627 extern void disableRegexKinds (const langType language __unused__
)
630 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
632 patternSet
* const set
= Sets
+ language
;
634 for (i
= 0 ; i
< set
->count
; ++i
)
635 if (set
->patterns
[i
].type
== PTRN_TAG
)
636 set
->patterns
[i
].u
.tag
.kind
.enabled
= FALSE
;
641 extern boolean
enableRegexKind (
642 const langType language __unused__
,
643 const int kind __unused__
, const boolean mode __unused__
)
645 boolean result
= FALSE
;
647 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
649 patternSet
* const set
= Sets
+ language
;
651 for (i
= 0 ; i
< set
->count
; ++i
)
652 if (set
->patterns
[i
].type
== PTRN_TAG
&&
653 set
->patterns
[i
].u
.tag
.kind
.letter
== kind
)
655 set
->patterns
[i
].u
.tag
.kind
.enabled
= mode
;
663 extern void printRegexKinds (const langType language __unused__
, boolean indent __unused__
)
666 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
668 patternSet
* const set
= Sets
+ language
;
670 for (i
= 0 ; i
< set
->count
; ++i
)
671 if (set
->patterns
[i
].type
== PTRN_TAG
)
672 printRegexKind (set
->patterns
, i
, indent
);
677 extern void freeRegexResources (void)
681 for (i
= 0 ; i
<= SetUpper
; ++i
)
690 /* Check for broken regcomp() on Cygwin */
691 extern void checkRegex (void)
693 /* not needed now we have GRegex */
696 /* vi:set tabstop=4 shiftwidth=4: */