Version bump.
[geany-mirror.git] / tagmanager / lregex.c
blob0810b86130552c734efc349df4d7738ef94b56ca
1 /*
2 * $Id$
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
13 * Gnu etags.
17 * INCLUDE FILES
19 #include "general.h" /* must always come first */
21 #include <string.h>
22 #include <glib.h>
24 #ifdef HAVE_REGCOMP
25 # include <ctype.h>
26 # include <stddef.h>
27 # ifdef HAVE_SYS_TYPES_H
28 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
29 # endif
30 # ifdef HAVE_REGEX_H
31 # include <regex.h>
32 # else
33 # include "gnuregex.h"
34 # endif
35 #endif
37 #include "main.h"
38 #include "entry.h"
39 #include "parse.h"
40 #include "read.h"
42 #ifdef HAVE_REGEX
45 * MACROS
48 /* Back-references \0 through \9 */
49 #define BACK_REFERENCE_COUNT 10
51 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
52 # define POSIX_REGEX
53 #endif
55 #define REGEX_NAME "Regex"
58 * DATA DECLARATIONS
60 #if defined (POSIX_REGEX)
62 struct sKind {
63 boolean enabled;
64 char letter;
65 char* name;
66 char* description;
69 enum pType { PTRN_TAG, PTRN_CALLBACK };
71 typedef struct {
72 regex_t *pattern;
73 enum pType type;
74 union {
75 struct {
76 char *name_pattern;
77 struct sKind kind;
78 } tag;
79 struct {
80 regexCallback function;
81 } callback;
82 } u;
83 } regexPattern;
85 #endif
87 typedef struct {
88 regexPattern *patterns;
89 unsigned int count;
90 } patternSet;
93 * DATA DEFINITIONS
96 static boolean regexBroken = FALSE;
98 /* Array of pattern sets, indexed by language */
99 static patternSet* Sets = NULL;
100 static int SetUpper = -1; /* upper language index in list */
103 * FUNCTION DEFINITIONS
106 static void clearPatternSet (const langType language)
108 if (language <= SetUpper)
110 patternSet* const set = Sets + language;
111 unsigned int i;
112 for (i = 0 ; i < set->count ; ++i)
114 regexPattern *p = &set->patterns [i];
115 #if defined (POSIX_REGEX)
116 regfree (p->pattern);
117 #endif
118 eFree (p->pattern);
119 p->pattern = NULL;
121 if (p->type == PTRN_TAG)
123 eFree (p->u.tag.name_pattern);
124 p->u.tag.name_pattern = NULL;
125 eFree (p->u.tag.kind.name);
126 p->u.tag.kind.name = NULL;
127 if (p->u.tag.kind.description != NULL)
129 eFree (p->u.tag.kind.description);
130 p->u.tag.kind.description = NULL;
134 if (set->patterns != NULL)
135 eFree (set->patterns);
136 set->patterns = NULL;
137 set->count = 0;
142 * Regex psuedo-parser
145 static void makeRegexTag (
146 const vString* const name, const struct sKind* const kind)
148 if (kind->enabled)
150 tagEntryInfo e;
151 Assert (name != NULL && vStringLength (name) > 0);
152 Assert (kind != NULL);
153 initTagEntry (&e, vStringValue (name));
154 e.kind = kind->letter;
155 e.kindName = kind->name;
156 makeTagEntry (&e);
161 * Regex pattern definition
164 /* Take a string like "/blah/" and turn it into "blah", making sure
165 * that the first and last characters are the same, and handling
166 * quoted separator characters. Actually, stops on the occurrence of
167 * an unquoted separator. Also turns "\t" into a Tab character.
168 * Returns pointer to terminating separator. Works in place. Null
169 * terminates name string.
171 static char* scanSeparators (char* name)
173 char sep = name [0];
174 char *copyto = name;
175 boolean quoted = FALSE;
177 for (++name ; *name != '\0' ; ++name)
179 if (quoted)
181 if (*name == sep)
182 *copyto++ = sep;
183 else if (*name == 't')
184 *copyto++ = '\t';
185 else
187 /* Something else is quoted, so preserve the quote. */
188 *copyto++ = '\\';
189 *copyto++ = *name;
191 quoted = FALSE;
193 else if (*name == '\\')
194 quoted = TRUE;
195 else if (*name == sep)
197 break;
199 else
200 *copyto++ = *name;
202 *copyto = '\0';
203 return name;
206 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
207 * character is whatever the first character of `regexp' is), by breaking it
208 * up into null terminated strings, removing the separators, and expanding
209 * '\t' into tabs. When complete, `regexp' points to the line matching
210 * pattern, a pointer to the name matching pattern is written to `name', a
211 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
212 * to the trailing flags is written to `flags'. If the pattern is not in the
213 * correct format, a false value is returned.
215 static boolean parseTagRegex (
216 char* const regexp, char** const name,
217 char** const kinds, char** const flags)
219 boolean result = FALSE;
220 const int separator = (unsigned char) regexp [0];
222 *name = scanSeparators (regexp);
223 if (*regexp == '\0')
224 printf ("regex: empty regexp\n");
225 else if (**name != separator)
226 printf ("regex: %s: incomplete regexp\n", regexp);
227 else
229 char* const third = scanSeparators (*name);
230 if (**name == '\0')
231 printf ("regex: %s: regexp missing name pattern\n", regexp);
232 if ((*name) [strlen (*name) - 1] == '\\')
233 printf ("regex: error in name pattern: \"%s\"\n", *name);
234 if (*third != separator)
235 printf ("regex: %s: regexp missing final separator\n", regexp);
236 else
238 char* const fourth = scanSeparators (third);
239 if (*fourth == separator)
241 *kinds = third;
242 scanSeparators (fourth);
243 *flags = fourth;
245 else
247 *flags = third;
248 *kinds = NULL;
250 result = TRUE;
253 return result;
256 static void addCompiledTagPattern (
257 const langType language, regex_t* const pattern,
258 char* const name, const char kind, char* const kindName,
259 char *const description)
261 patternSet* set;
262 regexPattern *ptrn;
263 if (language > SetUpper)
265 int i;
266 Sets = xRealloc (Sets, (language + 1), patternSet);
267 for (i = SetUpper + 1 ; i <= language ; ++i)
269 Sets [i].patterns = NULL;
270 Sets [i].count = 0;
272 SetUpper = language;
274 set = Sets + language;
275 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
276 ptrn = &set->patterns [set->count];
277 set->count += 1;
279 ptrn->pattern = pattern;
280 ptrn->type = PTRN_TAG;
281 ptrn->u.tag.name_pattern = name;
282 ptrn->u.tag.kind.enabled = TRUE;
283 ptrn->u.tag.kind.letter = kind;
284 ptrn->u.tag.kind.name = kindName;
285 ptrn->u.tag.kind.description = description;
288 static void addCompiledCallbackPattern (
289 const langType language, regex_t* const pattern,
290 const regexCallback callback)
292 patternSet* set;
293 regexPattern *ptrn;
294 if (language > SetUpper)
296 int i;
297 Sets = xRealloc (Sets, (language + 1), patternSet);
298 for (i = SetUpper + 1 ; i <= language ; ++i)
300 Sets [i].patterns = NULL;
301 Sets [i].count = 0;
303 SetUpper = language;
305 set = Sets + language;
306 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
307 ptrn = &set->patterns [set->count];
308 set->count += 1;
310 ptrn->pattern = pattern;
311 ptrn->type = PTRN_CALLBACK;
312 ptrn->u.callback.function = callback;
315 #if defined (POSIX_REGEX)
317 static regex_t* compileRegex (const char* const regexp, const char* const flags)
319 int cflags = REG_EXTENDED | REG_NEWLINE;
320 regex_t *result = NULL;
321 int errcode;
322 int i;
323 for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
325 switch ((int) flags [i])
327 case 'b': cflags &= ~REG_EXTENDED; break;
328 case 'e': cflags |= REG_EXTENDED; break;
329 case 'i': cflags |= REG_ICASE; break;
330 default: printf ("regex: unknown regex flag: '%c'\n", *flags); break;
333 result = xMalloc (1, regex_t);
334 errcode = regcomp (result, regexp, cflags);
335 if (errcode != 0)
337 char errmsg[256];
338 regerror (errcode, result, errmsg, 256);
339 printf ("regex: regcomp %s: %s\n", regexp, errmsg);
340 regfree (result);
341 eFree (result);
342 result = NULL;
344 return result;
347 #endif
349 static void parseKinds (
350 const char* const kinds, char* const kind, char** const kindName,
351 char **description)
353 *kind = '\0';
354 *kindName = NULL;
355 *description = NULL;
356 if (kinds == NULL || kinds [0] == '\0')
358 *kind = 'r';
359 *kindName = eStrdup ("regex");
361 else if (kinds [0] != '\0')
363 const char* k = kinds;
364 if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
365 *kind = *k++;
366 else
367 *kind = 'r';
368 if (*k == ',')
369 ++k;
370 if (k [0] == '\0')
371 *kindName = eStrdup ("regex");
372 else
374 const char *const comma = strchr (k, ',');
375 if (comma == NULL)
376 *kindName = eStrdup (k);
377 else
379 *kindName = (char*) eMalloc (comma - k + 1);
380 strncpy (*kindName, k, comma - k);
381 (*kindName) [comma - k] = '\0';
382 k = comma + 1;
383 if (k [0] != '\0')
384 *description = eStrdup (k);
390 static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
392 const struct sKind *const kind = &pat [i].u.tag.kind;
393 const char *const indentation = indent ? " " : "";
394 Assert (pat [i].type == PTRN_TAG);
395 printf ("%s%c %s %s\n", indentation,
396 kind->letter != '\0' ? kind->letter : '?',
397 kind->description != NULL ? kind->description : kind->name,
398 kind->enabled ? "" : " [off]");
401 static void processLanguageRegex (const langType language,
402 const char* const parameter)
404 if (parameter == NULL || parameter [0] == '\0')
405 clearPatternSet (language);
406 else if (parameter [0] != '@')
407 addLanguageRegex (language, parameter);
408 else if (! doesFileExist (parameter + 1))
409 printf ("regex: cannot open regex file\n");
410 else
412 const char* regexfile = parameter + 1;
413 FILE* const fp = fopen (regexfile, "r");
414 if (fp == NULL)
415 printf ("regex: %s\n", regexfile);
416 else
418 vString* const regex = vStringNew ();
419 while (readLine (regex, fp))
420 addLanguageRegex (language, vStringValue (regex));
421 fclose (fp);
422 vStringDelete (regex);
428 * Regex pattern matching
431 #if defined (POSIX_REGEX)
433 static vString* substitute (
434 const char* const in, const char* out,
435 const int nmatch, const regmatch_t* const pmatch)
437 vString* result = vStringNew ();
438 const char* p;
439 for (p = out ; *p != '\0' ; p++)
441 if (*p == '\\' && isdigit ((int) *++p))
443 const int dig = *p - '0';
444 if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
446 const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
447 vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
450 else if (*p != '\n' && *p != '\r')
451 vStringPut (result, *p);
453 vStringTerminate (result);
454 return result;
457 static void matchTagPattern (const vString* const line,
458 const regexPattern* const patbuf,
459 const regmatch_t* const pmatch)
461 vString *const name = substitute (vStringValue (line),
462 patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
463 vStringStripLeading (name);
464 vStringStripTrailing (name);
465 if (vStringLength (name) > 0)
466 makeRegexTag (name, &patbuf->u.tag.kind);
467 else
468 error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
469 getInputFileName (), getInputLineNumber (),
470 patbuf->u.tag.name_pattern);
471 vStringDelete (name);
474 static void matchCallbackPattern (
475 const vString* const line, const regexPattern* const patbuf,
476 const regmatch_t* const pmatch)
478 regexMatch matches [BACK_REFERENCE_COUNT];
479 unsigned int count = 0;
480 int i;
481 for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
483 matches [i].start = pmatch [i].rm_so;
484 matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
485 ++count;
487 patbuf->u.callback.function (vStringValue (line), matches, count);
490 static boolean matchRegexPattern (const vString* const line,
491 const regexPattern* const patbuf)
493 boolean result = FALSE;
494 regmatch_t pmatch [BACK_REFERENCE_COUNT];
495 const int match = regexec (patbuf->pattern, vStringValue (line),
496 BACK_REFERENCE_COUNT, pmatch, 0);
497 if (match == 0)
499 result = TRUE;
500 if (patbuf->type == PTRN_TAG)
501 matchTagPattern (line, patbuf, pmatch);
502 else if (patbuf->type == PTRN_CALLBACK)
503 matchCallbackPattern (line, patbuf, pmatch);
504 else
506 Assert ("invalid pattern type" == NULL);
507 result = FALSE;
510 return result;
513 #endif
515 /* PUBLIC INTERFACE */
517 /* Match against all patterns for specified language. Returns true if at least
518 * on pattern matched.
520 extern boolean matchRegex (const vString* const line, const langType language)
522 boolean result = FALSE;
523 if (language != LANG_IGNORE && language <= SetUpper &&
524 Sets [language].count > 0)
526 const patternSet* const set = Sets + language;
527 unsigned int i;
528 for (i = 0 ; i < set->count ; ++i)
529 if (matchRegexPattern (line, set->patterns + i))
530 result = TRUE;
532 return result;
535 extern void findRegexTags (void)
537 /* merely read all lines of the file */
538 while (fileReadLine () != NULL)
542 #endif /* HAVE_REGEX */
544 extern void addTagRegex (
545 const langType language __unused__,
546 const char* const regex __unused__,
547 const char* const name __unused__,
548 const char* const kinds __unused__,
549 const char* const flags __unused__)
551 #ifdef HAVE_REGEX
552 Assert (regex != NULL);
553 Assert (name != NULL);
554 if (! regexBroken)
556 regex_t* const cp = compileRegex (regex, flags);
557 if (cp != NULL)
559 char kind;
560 char* kindName;
561 char* description;
562 parseKinds (kinds, &kind, &kindName, &description);
563 addCompiledTagPattern (language, cp, eStrdup (name),
564 kind, kindName, description);
567 #endif
570 extern void addCallbackRegex (
571 const langType language __unused__,
572 const char* const regex __unused__,
573 const char* const flags __unused__,
574 const regexCallback callback __unused__)
576 #ifdef HAVE_REGEX
577 Assert (regex != NULL);
578 if (! regexBroken)
580 regex_t* const cp = compileRegex (regex, flags);
581 if (cp != NULL)
582 addCompiledCallbackPattern (language, cp, callback);
584 #endif
587 extern void addLanguageRegex (
588 const langType language __unused__, const char* const regex __unused__)
590 #ifdef HAVE_REGEX
591 if (! regexBroken)
593 char *const regex_pat = eStrdup (regex);
594 char *name, *kinds, *flags;
595 if (parseTagRegex (regex_pat, &name, &kinds, &flags))
597 addTagRegex (language, regex_pat, name, kinds, flags);
598 eFree (regex_pat);
601 #endif
605 * Regex option parsing
608 extern boolean processRegexOption (const char *const option,
609 const char *const parameter __unused__)
611 boolean handled = FALSE;
612 const char* const dash = strchr (option, '-');
613 if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
615 #ifdef HAVE_REGEX
616 langType language;
617 language = getNamedLanguage (dash + 1);
618 if (language == LANG_IGNORE)
619 printf ("regex: unknown language \"%s\" in --%s option\n", (dash + 1), option);
620 else
621 processLanguageRegex (language, parameter);
622 #else
623 printf ("regex: regex support not available; required for --%s option\n",
624 option);
625 #endif
626 handled = TRUE;
628 return handled;
631 extern void disableRegexKinds (const langType language __unused__)
633 #ifdef HAVE_REGEX
634 if (language <= SetUpper && Sets [language].count > 0)
636 patternSet* const set = Sets + language;
637 unsigned int i;
638 for (i = 0 ; i < set->count ; ++i)
639 if (set->patterns [i].type == PTRN_TAG)
640 set->patterns [i].u.tag.kind.enabled = FALSE;
642 #endif
645 extern boolean enableRegexKind (
646 const langType language __unused__,
647 const int kind __unused__, const boolean mode __unused__)
649 boolean result = FALSE;
650 #ifdef HAVE_REGEX
651 if (language <= SetUpper && Sets [language].count > 0)
653 patternSet* const set = Sets + language;
654 unsigned int i;
655 for (i = 0 ; i < set->count ; ++i)
656 if (set->patterns [i].type == PTRN_TAG &&
657 set->patterns [i].u.tag.kind.letter == kind)
659 set->patterns [i].u.tag.kind.enabled = mode;
660 result = TRUE;
663 #endif
664 return result;
667 extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
669 #ifdef HAVE_REGEX
670 if (language <= SetUpper && Sets [language].count > 0)
672 patternSet* const set = Sets + language;
673 unsigned int i;
674 for (i = 0 ; i < set->count ; ++i)
675 if (set->patterns [i].type == PTRN_TAG)
676 printRegexKind (set->patterns, i, indent);
678 #endif
681 extern void freeRegexResources (void)
683 #ifdef HAVE_REGEX
684 int i;
685 for (i = 0 ; i <= SetUpper ; ++i)
686 clearPatternSet (i);
687 if (Sets != NULL)
688 eFree (Sets);
689 Sets = NULL;
690 SetUpper = -1;
691 #endif
694 /* Check for broken regcomp() on Cygwin */
695 extern void checkRegex (void)
697 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
698 regex_t patbuf;
699 int errcode;
700 if (regcomp (&patbuf, "/hello/", 0) != 0)
702 error (WARNING, "Disabling broken regex");
703 regexBroken = TRUE;
705 #endif
708 /* vi:set tabstop=4 shiftwidth=4: */