Updated Spanish translation
[anjuta-git-plugin.git] / tagmanager / lregex.c
blobe55753d772659449bc926d19a49cdd72a9089c5e
1 /*
2 * $Id$
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
13 * Gnu etags.
17 * INCLUDE FILES
19 #include "general.h" /* must always come first */
21 #include <string.h>
23 #ifdef HAVE_REGCOMP
24 # include <ctype.h>
25 # include <stddef.h>
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
28 # endif
29 # include "regex.h"
30 #endif
32 #include "debug.h"
33 #include "entry.h"
34 #include "parse.h"
35 #include "read.h"
36 #include "routines.h"
38 #ifdef HAVE_REGEX
41 * MACROS
44 /* Back-references \0 through \9 */
45 #define BACK_REFERENCE_COUNT 10
47 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
48 # define POSIX_REGEX
49 #endif
51 #define REGEX_NAME "Regex"
54 * DATA DECLARATIONS
56 #if defined (POSIX_REGEX)
58 struct sKind {
59 boolean enabled;
60 char letter;
61 char* name;
62 char* description;
65 enum pType { PTRN_TAG, PTRN_CALLBACK };
67 typedef struct {
68 regex_t *pattern;
69 enum pType type;
70 union {
71 struct {
72 char *name_pattern;
73 struct sKind kind;
74 } tag;
75 struct {
76 regexCallback function;
77 } callback;
78 } u;
79 } regexPattern;
81 #endif
83 typedef struct {
84 regexPattern *patterns;
85 unsigned int count;
86 } patternSet;
89 * DATA DEFINITIONS
92 static boolean regexBroken = FALSE;
94 /* Array of pattern sets, indexed by language */
95 static patternSet* Sets = NULL;
96 static int SetUpper = -1; /* upper language index in list */
99 * FUNCTION DEFINITIONS
102 static void clearPatternSet (const langType language)
104 if (language < SetUpper)
106 patternSet* const set = Sets + language;
107 unsigned int i;
108 for (i = 0 ; i < set->count ; ++i)
110 #if defined (POSIX_REGEX)
111 regfree (set->patterns [i].pattern);
112 #endif
113 eFree (set->patterns [i].pattern);
114 set->patterns [i].pattern = NULL;
116 if (set->patterns [i].type == PTRN_TAG)
118 eFree (set->patterns [i].u.tag.name_pattern);
119 set->patterns [i].u.tag.name_pattern = NULL;
122 if (set->patterns != NULL)
123 eFree (set->patterns);
124 set->patterns = NULL;
125 set->count = 0;
130 * Regex psuedo-parser
133 static void makeRegexTag (
134 const vString* const name, const struct sKind* const kind)
136 if (kind->enabled)
138 tagEntryInfo e;
139 Assert (name != NULL && vStringLength (name) > 0);
140 Assert (kind != NULL);
141 initTagEntry (&e, vStringValue (name));
142 e.kind = kind->letter;
143 e.kindName = kind->name;
144 makeTagEntry (&e);
149 * Regex pattern definition
152 /* Take a string like "/blah/" and turn it into "blah", making sure
153 * that the first and last characters are the same, and handling
154 * quoted separator characters. Actually, stops on the occurrence of
155 * an unquoted separator. Also turns "\t" into a Tab character.
156 * Returns pointer to terminating separator. Works in place. Null
157 * terminates name string.
159 static char* scanSeparators (char* name)
161 char sep = name [0];
162 char *copyto = name;
163 boolean quoted = FALSE;
165 for (++name ; *name != '\0' ; ++name)
167 if (quoted)
169 if (*name == sep)
170 *copyto++ = sep;
171 else if (*name == 't')
172 *copyto++ = '\t';
173 else
175 /* Something else is quoted, so preserve the quote. */
176 *copyto++ = '\\';
177 *copyto++ = *name;
179 quoted = FALSE;
181 else if (*name == '\\')
182 quoted = TRUE;
183 else if (*name == sep)
185 break;
187 else
188 *copyto++ = *name;
190 *copyto = '\0';
191 return name;
194 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
195 * character is whatever the first character of `regexp' is), by breaking it
196 * up into null terminated strings, removing the separators, and expanding
197 * '\t' into tabs. When complete, `regexp' points to the line matching
198 * pattern, a pointer to the name matching pattern is written to `name', a
199 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
200 * to the trailing flags is written to `flags'. If the pattern is not in the
201 * correct format, a false value is returned.
203 static boolean parseTagRegex (
204 char* const regexp, char** const name,
205 char** const kinds, char** const flags)
207 boolean result = FALSE;
208 const int separator = (unsigned char) regexp [0];
210 *name = scanSeparators (regexp);
211 if (*regexp == '\0')
212 error (WARNING, "empty regexp");
213 else if (**name != separator)
214 error (WARNING, "%s: incomplete regexp", regexp);
215 else
217 char* const third = scanSeparators (*name);
218 if (**name == '\0')
219 error (WARNING, "%s: regexp missing name pattern", regexp);
220 if ((*name) [strlen (*name) - 1] == '\\')
221 error (WARNING, "error in name pattern: \"%s\"", *name);
222 if (*third != separator)
223 error (WARNING, "%s: regexp missing final separator", regexp);
224 else
226 char* const fourth = scanSeparators (third);
227 if (*fourth == separator)
229 *kinds = third;
230 scanSeparators (fourth);
231 *flags = fourth;
233 else
235 *flags = third;
236 *kinds = NULL;
238 result = TRUE;
241 return result;
244 static void addCompiledTagPattern (
245 const langType language, regex_t* const pattern,
246 char* const name, const char kind, char* const kindName,
247 char *const description)
249 patternSet* set;
250 regexPattern *ptrn;
251 if (language > SetUpper)
253 int i;
254 Sets = xRealloc (Sets, (language + 1), patternSet);
255 for (i = SetUpper + 1 ; i <= language ; ++i)
257 Sets [i].patterns = NULL;
258 Sets [i].count = 0;
260 SetUpper = language;
262 set = Sets + language;
263 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
264 ptrn = &set->patterns [set->count];
265 set->count += 1;
267 ptrn->pattern = pattern;
268 ptrn->type = PTRN_TAG;
269 ptrn->u.tag.name_pattern = name;
270 ptrn->u.tag.kind.enabled = TRUE;
271 ptrn->u.tag.kind.letter = kind;
272 ptrn->u.tag.kind.name = kindName;
273 ptrn->u.tag.kind.description = description;
276 static void addCompiledCallbackPattern (
277 const langType language, regex_t* const pattern,
278 const regexCallback callback)
280 patternSet* set;
281 regexPattern *ptrn;
282 if (language > SetUpper)
284 int i;
285 Sets = xRealloc (Sets, (language + 1), patternSet);
286 for (i = SetUpper + 1 ; i <= language ; ++i)
288 Sets [i].patterns = NULL;
289 Sets [i].count = 0;
291 SetUpper = language;
293 set = Sets + language;
294 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
295 ptrn = &set->patterns [set->count];
296 set->count += 1;
298 ptrn->pattern = pattern;
299 ptrn->type = PTRN_CALLBACK;
300 ptrn->u.callback.function = callback;
303 #if defined (POSIX_REGEX)
305 static regex_t* compileRegex (const char* const regexp, const char* const flags)
307 int cflags = REG_EXTENDED | REG_NEWLINE;
308 regex_t *result = NULL;
309 int errcode;
310 int i;
311 for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
313 switch ((int) flags [i])
315 case 'b': cflags &= ~REG_EXTENDED; break;
316 case 'e': cflags |= REG_EXTENDED; break;
317 case 'i': cflags |= REG_ICASE; break;
318 default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
321 result = xMalloc (1, regex_t);
322 errcode = regcomp (result, regexp, cflags);
323 if (errcode != 0)
325 char errmsg[256];
326 regerror (errcode, result, errmsg, 256);
327 error (WARNING, "%s", errmsg);
328 regfree (result);
329 eFree (result);
330 result = NULL;
332 return result;
335 #endif
337 static void parseKinds (
338 const char* const kinds, char* const kind, char** const kindName,
339 char **description)
341 *kind = '\0';
342 *kindName = NULL;
343 *description = NULL;
344 if (kinds == NULL || kinds [0] == '\0')
346 *kind = 'r';
347 *kindName = eStrdup ("regex");
349 else if (kinds [0] != '\0')
351 const char* k = kinds;
352 if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
353 *kind = *k++;
354 else
355 *kind = 'r';
356 if (*k == ',')
357 ++k;
358 if (k [0] == '\0')
359 *kindName = eStrdup ("regex");
360 else
362 const char *const comma = strchr (k, ',');
363 if (comma == NULL)
364 *kindName = eStrdup (k);
365 else
367 *kindName = (char*) eMalloc (comma - k + 1);
368 strncpy (*kindName, k, comma - k);
369 (*kindName) [comma - k] = '\0';
370 k = comma + 1;
371 if (k [0] != '\0')
372 *description = eStrdup (k);
378 static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
380 const struct sKind *const kind = &pat [i].u.tag.kind;
381 const char *const indentation = indent ? " " : "";
382 Assert (pat [i].type == PTRN_TAG);
383 printf ("%s%c %s %s\n", indentation,
384 kind->letter != '\0' ? kind->letter : '?',
385 kind->description != NULL ? kind->description : kind->name,
386 kind->enabled ? "" : " [off]");
389 static void processLanguageRegex (const langType language,
390 const char* const parameter)
392 if (parameter == NULL || parameter [0] == '\0')
393 clearPatternSet (language);
394 else if (parameter [0] != '@')
395 addLanguageRegex (language, parameter);
396 else if (! doesFileExist (parameter + 1))
397 error (WARNING, "cannot open regex file");
398 else
400 const char* regexfile = parameter + 1;
401 FILE* const fp = fopen (regexfile, "r");
402 if (fp == NULL)
403 error (WARNING | PERROR, regexfile);
404 else
406 vString* const regex = vStringNew ();
407 while (readLine (regex, fp))
408 addLanguageRegex (language, vStringValue (regex));
409 fclose (fp);
410 vStringDelete (regex);
416 * Regex pattern matching
419 #if defined (POSIX_REGEX)
421 static vString* substitute (
422 const char* const in, const char* out,
423 const int nmatch, const regmatch_t* const pmatch)
425 vString* result = vStringNew ();
426 const char* p;
427 for (p = out ; *p != '\0' ; p++)
429 if (*p == '\\' && isdigit ((int) *++p))
431 const int dig = *p - '0';
432 if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
434 const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
435 vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
438 else if (*p != '\n' && *p != '\r')
439 vStringPut (result, *p);
441 vStringTerminate (result);
442 return result;
445 static void matchTagPattern (const vString* const line,
446 const regexPattern* const patbuf,
447 const regmatch_t* const pmatch)
449 vString *const name = substitute (vStringValue (line),
450 patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
451 vStringStripLeading (name);
452 vStringStripTrailing (name);
453 if (vStringLength (name) > 0)
454 makeRegexTag (name, &patbuf->u.tag.kind);
455 else
456 error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
457 getInputFileName (), getInputLineNumber (),
458 patbuf->u.tag.name_pattern);
459 vStringDelete (name);
462 static void matchCallbackPattern (
463 const vString* const line, const regexPattern* const patbuf,
464 const regmatch_t* const pmatch)
466 regexMatch matches [BACK_REFERENCE_COUNT];
467 unsigned int count = 0;
468 int i;
469 for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
471 matches [i].start = pmatch [i].rm_so;
472 matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
473 ++count;
475 patbuf->u.callback.function (vStringValue (line), matches, count);
478 static boolean matchRegexPattern (const vString* const line,
479 const regexPattern* const patbuf)
481 boolean result = FALSE;
482 regmatch_t pmatch [BACK_REFERENCE_COUNT];
483 const int match = regexec (patbuf->pattern, vStringValue (line),
484 BACK_REFERENCE_COUNT, pmatch, 0);
485 if (match == 0)
487 result = TRUE;
488 if (patbuf->type == PTRN_TAG)
489 matchTagPattern (line, patbuf, pmatch);
490 else if (patbuf->type == PTRN_CALLBACK)
491 matchCallbackPattern (line, patbuf, pmatch);
492 else
494 Assert ("invalid pattern type" == NULL);
495 result = FALSE;
498 return result;
501 #endif
503 /* PUBLIC INTERFACE */
505 /* Match against all patterns for specified language. Returns true if at least
506 * on pattern matched.
508 extern boolean matchRegex (const vString* const line, const langType language)
510 boolean result = FALSE;
511 if (language != LANG_IGNORE && language <= SetUpper &&
512 Sets [language].count > 0)
514 const patternSet* const set = Sets + language;
515 unsigned int i;
516 for (i = 0 ; i < set->count ; ++i)
517 if (matchRegexPattern (line, set->patterns + i))
518 result = TRUE;
520 return result;
523 extern void findRegexTags (void)
525 /* merely read all lines of the file */
526 while (fileReadLine () != NULL)
530 #endif /* HAVE_REGEX */
532 extern void addTagRegex (
533 const langType language __unused__,
534 const char* const regex __unused__,
535 const char* const name __unused__,
536 const char* const kinds __unused__,
537 const char* const flags __unused__)
539 #ifdef HAVE_REGEX
540 Assert (regex != NULL);
541 Assert (name != NULL);
542 if (! regexBroken)
544 regex_t* const cp = compileRegex (regex, flags);
545 if (cp != NULL)
547 char kind;
548 char* kindName;
549 char* description;
550 parseKinds (kinds, &kind, &kindName, &description);
551 addCompiledTagPattern (language, cp, eStrdup (name),
552 kind, kindName, description);
555 #endif
558 extern void addCallbackRegex (
559 const langType language __unused__,
560 const char* const regex __unused__,
561 const char* const flags __unused__,
562 const regexCallback callback __unused__)
564 #ifdef HAVE_REGEX
565 Assert (regex != NULL);
566 if (! regexBroken)
568 regex_t* const cp = compileRegex (regex, flags);
569 if (cp != NULL)
570 addCompiledCallbackPattern (language, cp, callback);
572 #endif
575 extern void addLanguageRegex (
576 const langType language __unused__, const char* const regex __unused__)
578 #ifdef HAVE_REGEX
579 if (! regexBroken)
581 char *const regex_pat = eStrdup (regex);
582 char *name, *kinds, *flags;
583 if (parseTagRegex (regex_pat, &name, &kinds, &flags))
585 addTagRegex (language, regex_pat, name, kinds, flags);
586 eFree (regex_pat);
589 #endif
593 * Regex option parsing
596 extern boolean processRegexOption (const char *const option,
597 const char *const parameter __unused__)
599 boolean handled = FALSE;
600 const char* const dash = strchr (option, '-');
601 if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
603 #ifdef HAVE_REGEX
604 langType language;
605 language = getNamedLanguage (dash + 1);
606 if (language == LANG_IGNORE)
607 error (WARNING, "unknown language in --%s option", option);
608 else
609 processLanguageRegex (language, parameter);
610 #else
611 error (WARNING, "regex support not available; required for --%s option",
612 option);
613 #endif
614 handled = TRUE;
616 return handled;
619 extern void disableRegexKinds (const langType language __unused__)
621 #ifdef HAVE_REGEX
622 if (language <= SetUpper && Sets [language].count > 0)
624 patternSet* const set = Sets + language;
625 unsigned int i;
626 for (i = 0 ; i < set->count ; ++i)
627 if (set->patterns [i].type == PTRN_TAG)
628 set->patterns [i].u.tag.kind.enabled = FALSE;
630 #endif
633 extern boolean enableRegexKind (
634 const langType language __unused__,
635 const int kind __unused__, const boolean mode __unused__)
637 boolean result = FALSE;
638 #ifdef HAVE_REGEX
639 if (language <= SetUpper && Sets [language].count > 0)
641 patternSet* const set = Sets + language;
642 unsigned int i;
643 for (i = 0 ; i < set->count ; ++i)
644 if (set->patterns [i].type == PTRN_TAG &&
645 set->patterns [i].u.tag.kind.letter == kind)
647 set->patterns [i].u.tag.kind.enabled = mode;
648 result = TRUE;
651 #endif
652 return result;
655 extern void printRegexKinds (const langType language __unused__, boolean indent)
657 #ifdef HAVE_REGEX
658 if (language <= SetUpper && Sets [language].count > 0)
660 patternSet* const set = Sets + language;
661 unsigned int i;
662 for (i = 0 ; i < set->count ; ++i)
663 if (set->patterns [i].type == PTRN_TAG)
664 printRegexKind (set->patterns, i, indent);
666 #endif
669 extern void freeRegexResources (void)
671 #ifdef HAVE_REGEX
672 int i;
673 for (i = 0 ; i <= SetUpper ; ++i)
674 clearPatternSet (i);
675 if (Sets != NULL)
676 eFree (Sets);
677 Sets = NULL;
678 SetUpper = -1;
679 #endif
682 /* Check for broken regcomp() on Cygwin */
683 extern void checkRegex (void)
685 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
686 regex_t patbuf;
687 int errcode;
688 if (regcomp (&patbuf, "/hello/", 0) != 0)
690 error (WARNING, "Disabling broken regex");
691 regexBroken = TRUE;
693 #endif
696 /* vi:set tabstop=4 shiftwidth=4: */