Implement add support
[anjuta-git-plugin.git] / tagmanager / regex.c
blobe9da7176b88861dabfad1cc58b710953032460f9
1 /*
3 * Copyright (c) 2000-2001, Darren Hiebert
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License.
8 * This module contains functions for applying regular expression matching.
10 * The code for utlizing the Gnu regex package with regards to processing the
11 * regex option and checking for regex matches was adapted from routines in
12 * Gnu etags.
16 * INCLUDE FILES
18 #include "general.h" /* must always come first */
20 #include <string.h>
22 #if defined (HAVE_REGCOMP) || defined (HAVE_RE_COMPILE_PATTERN)
23 # include <ctype.h>
24 # include <stddef.h>
25 # ifdef HAVE_SYS_TYPES_H
26 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
27 # endif
28 # include "regex.h"
29 #endif
31 #include "debug.h"
32 #include "entry.h"
33 #include "main.h"
34 #include "parse.h"
35 #include "read.h"
37 #ifdef HAVE_REGEX
40 * MACROS
43 /* Back-references \0 through \9 */
44 #define BACK_REFERENCE_COUNT 10
46 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
47 # define POSIX_REGEX
48 #endif
50 #define REGEX_NAME "Regex"
53 * DATA DECLARATIONS
55 #if defined (POSIX_REGEX)
57 struct sKind {
58 boolean enabled;
59 char letter;
60 char* name;
63 enum pType { PTRN_TAG, PTRN_CALLBACK };
65 typedef struct {
66 regex_t *pattern;
67 enum pType type;
68 union {
69 struct {
70 char *name_pattern;
71 struct sKind kind;
72 } tag;
73 struct {
74 regexCallback function;
75 } callback;
76 } u;
77 } regexPattern;
79 #endif
81 typedef struct {
82 regexPattern *patterns;
83 unsigned int count;
84 } patternSet;
87 * DATA DEFINITIONS
90 static boolean regexBroken = FALSE;
92 /* Array of pattern sets, indexed by language */
93 static patternSet* Sets = NULL;
94 static int SetUpper = -1; /* upper language index in list */
97 * FUNCTION DEFINITIONS
100 static void clearPatternSet (const langType language)
102 if (language < SetUpper)
104 patternSet* const set = Sets + language;
105 unsigned int i;
106 for (i = 0 ; i < set->count ; ++i)
108 #if defined (POSIX_REGEX)
109 regfree (set->patterns [i].pattern);
110 #endif
111 eFree (set->patterns [i].pattern);
112 set->patterns [i].pattern = NULL;
114 if (set->patterns [i].type == PTRN_TAG)
116 eFree (set->patterns [i].u.tag.name_pattern);
117 set->patterns [i].u.tag.name_pattern = NULL;
120 if (set->patterns != NULL)
121 eFree (set->patterns);
122 set->patterns = NULL;
123 set->count = 0;
128 * Regex psuedo-parser
131 static void makeRegexTag (const vString* const name,
132 const struct sKind* const kind)
134 if (kind->enabled)
136 tagEntryInfo e;
137 Assert (name != NULL && vStringLength (name) > 0);
138 Assert (kind != NULL);
139 initTagEntry (&e, vStringValue (name));
140 e.kind = kind->letter;
141 e.kindName = kind->name;
142 makeTagEntry (&e);
147 * Regex pattern definition
150 /* Take a string like "/blah/" and turn it into "blah", making sure
151 * that the first and last characters are the same, and handling
152 * quoted separator characters. Actually, stops on the occurrence of
153 * an unquoted separator. Also turns "\t" into a Tab character.
154 * Returns pointer to terminating separator. Works in place. Null
155 * terminates name string.
157 static char* scanSeparators (char* name)
159 char sep = name [0];
160 char *copyto = name;
161 boolean quoted = FALSE;
163 for (++name ; *name != '\0' ; ++name)
165 if (quoted)
167 if (*name == sep)
168 *copyto++ = sep;
169 else if (*name == 't')
170 *copyto++ = '\t';
171 else
173 /* Something else is quoted, so preserve the quote. */
174 *copyto++ = '\\';
175 *copyto++ = *name;
177 quoted = FALSE;
179 else if (*name == '\\')
180 quoted = TRUE;
181 else if (*name == sep)
183 break;
185 else
186 *copyto++ = *name;
188 *copyto = '\0';
189 return name;
192 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
193 * character is whatever the first character of `regexp' is), by breaking it
194 * up into null terminated strings, removing the separators, and expanding
195 * '\t' into tabs. When complete, `regexp' points to the line matching
196 * pattern, a pointer to the name matching pattern is written to `name', a
197 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
198 * to the trailing flags is written to `flags'. If the pattern is not in the
199 * correct format, a false value is returned.
201 static boolean parseTagRegex (char* const regexp, char** const name,
202 char** const kinds, char** const flags)
204 boolean result = FALSE;
205 const int separator = (unsigned char) regexp [0];
207 *name = scanSeparators (regexp);
208 if (*regexp == '\0')
209 error (WARNING, "empty regexp");
210 else if (**name != separator)
211 error (WARNING, "%s: incomplete regexp", regexp);
212 else
214 char* const third = scanSeparators (*name);
215 if (**name == '\0')
216 error (WARNING, "%s: regexp missing name pattern", regexp);
217 if ((*name) [strlen (*name) - 1] == '\\')
218 error (WARNING, "error in name pattern: \"%s\"", *name);
219 if (*third != separator)
220 error (WARNING, "%s: regexp missing final separator", regexp);
221 else
223 char* const fourth = scanSeparators (third);
224 if (*fourth == separator)
226 *kinds = third;
227 scanSeparators (fourth);
228 *flags = fourth;
230 else
232 *flags = third;
233 *kinds = NULL;
235 result = TRUE;
238 return result;
241 static void addCompiledTagPattern (const langType language,
242 regex_t* const pattern, char* const name,
243 const char kind, char* const kindName)
245 patternSet* set;
246 regexPattern *ptrn;
247 if (language > SetUpper)
249 int i;
250 Sets = xRealloc (Sets, (language + 1), patternSet);
251 for (i = SetUpper + 1 ; i <= language ; ++i)
253 Sets [i].patterns = NULL;
254 Sets [i].count = 0;
256 SetUpper = language;
258 set = Sets + language;
259 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
260 ptrn = &set->patterns [set->count];
261 set->count += 1;
263 ptrn->pattern = pattern;
264 ptrn->type = PTRN_TAG;
265 ptrn->u.tag.name_pattern = name;
266 ptrn->u.tag.kind.enabled = TRUE;
267 ptrn->u.tag.kind.letter = kind;
268 ptrn->u.tag.kind.name = kindName;
271 static void addCompiledCallbackPattern (const langType language,
272 regex_t* const pattern,
273 const regexCallback callback)
275 patternSet* set;
276 regexPattern *ptrn;
277 if (language > SetUpper)
279 int i;
280 Sets = xRealloc (Sets, (language + 1), patternSet);
281 for (i = SetUpper + 1 ; i <= language ; ++i)
283 Sets [i].patterns = NULL;
284 Sets [i].count = 0;
286 SetUpper = language;
288 set = Sets + language;
289 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
290 ptrn = &set->patterns [set->count];
291 set->count += 1;
293 ptrn->pattern = pattern;
294 ptrn->type = PTRN_CALLBACK;
295 ptrn->u.callback.function = callback;
298 #if defined (POSIX_REGEX)
300 static regex_t* compileRegex (const char* const regexp, const char* const flags)
302 int cflags = REG_EXTENDED | REG_NEWLINE;
303 regex_t *result = NULL;
304 int errcode;
305 int i;
306 for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
308 switch ((int) flags [i])
310 case 'b': cflags &= ~REG_EXTENDED; break;
311 case 'e': cflags |= REG_EXTENDED; break;
312 case 'i': cflags |= REG_ICASE; break;
313 default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
316 result = xMalloc (1, regex_t);
317 errcode = regcomp (result, regexp, cflags);
318 if (errcode != 0)
320 char errmsg[256];
321 regerror (errcode, result, errmsg, 256);
322 error (WARNING, "%s", errmsg);
323 regfree (result);
324 eFree (result);
325 result = NULL;
327 return result;
330 #endif
332 static void parseKinds (const char* const kinds,
333 char* const kind, char** const kindName)
335 *kind = '\0';
336 *kindName = NULL;
337 if (kinds == NULL)
339 *kind = 'r';
340 *kindName = eStrdup ("regex");
342 else if (kinds [0] != '\0')
344 const char* k = kinds;
345 if (k [1] == ',' || k [1] == '\0')
346 *kind = *k++;
347 if (*k == ',')
348 ++k;
349 if (*k != '\0')
350 *kindName = eStrdup (k);
354 static void printRegexKindOption (const regexPattern *pat, unsigned int i)
356 const struct sKind *const kind = &pat [i].u.tag.kind;
357 Assert (pat [i].type == PTRN_TAG);
358 printf (" %c %s (regex %d)%s\n",
359 kind->letter != '\0' ? kind->letter : '?',
360 kind->name != NULL ? kind->name : "Regex pattern",
361 i + 1, kind->enabled ? "" : " [off]");
364 static void processLanguageRegex (const langType language,
365 const char* const parameter)
367 if (parameter == NULL || parameter [0] == '\0')
368 clearPatternSet (language);
369 else if (parameter [0] != '@')
370 addLanguageRegex (language, parameter);
371 else if (! doesFileExist (parameter + 1))
372 error (WARNING, "cannot open regex file");
373 else
375 const char* regexfile = parameter + 1;
376 FILE* const fp = fopen (regexfile, "r");
377 if (fp == NULL)
378 error (WARNING | PERROR, regexfile);
379 else
381 vString* const regex = vStringNew ();
382 while (readLine (regex, fp))
383 addLanguageRegex (language, vStringValue (regex));
384 fclose (fp);
385 vStringDelete (regex);
391 * Regex pattern matching
394 #if defined (POSIX_REGEX)
396 static vString* substitute (const char* const in, const char* out,
397 const int nmatch, const regmatch_t* const pmatch)
399 vString* result = vStringNew ();
400 const char* p;
401 for (p = out ; *p != '\0' ; p++)
403 if (*p == '\\' && isdigit ((int) *++p))
405 const int dig = *p - '0';
406 if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
408 const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
409 vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
412 else if (*p != '\n' && *p != '\r')
413 vStringPut (result, *p);
415 vStringTerminate (result);
416 return result;
419 static void matchTagPattern (const vString* const line,
420 const regexPattern* const patbuf,
421 const regmatch_t* const pmatch)
423 vString *const name = substitute (vStringValue (line),
424 patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
425 vStringStripLeading (name);
426 vStringStripTrailing (name);
427 if (vStringLength (name) > 0)
428 makeRegexTag (name, &patbuf->u.tag.kind);
429 else
430 error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
431 getInputFileName (), getInputLineNumber (),
432 patbuf->u.tag.name_pattern);
433 vStringDelete (name);
436 static void matchCallbackPattern (const vString* const line,
437 const regexPattern* const patbuf,
438 const regmatch_t* const pmatch)
440 regexMatch matches [BACK_REFERENCE_COUNT];
441 unsigned int count = 0;
442 int i;
443 for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
445 matches [i].start = pmatch [i].rm_so;
446 matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
447 ++count;
449 patbuf->u.callback.function (vStringValue (line), matches, count);
452 static void matchRegexPattern (const vString* const line,
453 const regexPattern* const patbuf)
455 regmatch_t pmatch [BACK_REFERENCE_COUNT];
456 const int match = regexec (patbuf->pattern, vStringValue (line),
457 BACK_REFERENCE_COUNT, pmatch, 0);
458 if (match == 0)
460 if (patbuf->type == PTRN_TAG)
461 matchTagPattern (line, patbuf, pmatch);
462 else if (patbuf->type == PTRN_CALLBACK)
463 matchCallbackPattern (line, patbuf, pmatch);
467 #endif
469 /* PUBLIC INTERFACE */
471 /* Match against all patterns for specified language. */
472 extern void matchRegex (const vString* const line, const langType language)
474 if (language != LANG_IGNORE && language <= SetUpper &&
475 Sets [language].count > 0)
477 const patternSet* const set = Sets + language;
478 unsigned int i;
479 for (i = 0 ; i < set->count ; ++i)
480 matchRegexPattern (line, set->patterns + i);
484 extern void findRegexTags (void)
486 /* merely read all lines of the file */
487 while (fileReadLine () != NULL)
491 #endif /* HAVE_REGEX */
493 extern void addTagRegex (const langType __unused__ language,
494 const char* const __unused__ regex,
495 const char* const __unused__ name,
496 const char* const __unused__ kinds,
497 const char* const __unused__ flags)
499 #ifdef HAVE_REGEX
500 Assert (regex != NULL);
501 Assert (name != NULL);
502 if (! regexBroken)
504 regex_t* const cp = compileRegex (regex, flags);
505 if (cp != NULL)
507 char kind;
508 char* kindName;
509 parseKinds (kinds, &kind, &kindName);
510 addCompiledTagPattern (language, cp, eStrdup (name),
511 kind, kindName);
514 #endif
517 extern void addCallbackRegex (const langType __unused__ language,
518 const char* const __unused__ regex,
519 const char* const __unused__ flags,
520 const regexCallback __unused__ callback)
522 #ifdef HAVE_REGEX
523 Assert (regex != NULL);
524 if (! regexBroken)
526 regex_t* const cp = compileRegex (regex, flags);
527 if (cp != NULL)
528 addCompiledCallbackPattern (language, cp, callback);
530 #endif
533 extern void addLanguageRegex (const langType __unused__ language,
534 const char* const __unused__ regex)
536 #ifdef HAVE_REGEX
537 if (! regexBroken)
539 char *const regex_pat = eStrdup (regex);
540 char *name, *kinds, *flags;
541 if (parseTagRegex (regex_pat, &name, &kinds, &flags))
543 addTagRegex (language, regex_pat, name, kinds, flags);
544 eFree (regex_pat);
547 #endif
551 * Regex option parsing
554 extern boolean processRegexOption (const char *const option,
555 const char *const __unused__ parameter)
557 boolean handled = FALSE;
558 const char* const dash = strchr (option, '-');
559 if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
561 #ifdef HAVE_REGEX
562 langType language;
563 language = getNamedLanguage (dash + 1);
564 if (language == LANG_IGNORE)
565 error (WARNING, "unknown language in --%s option", option);
566 else
567 processLanguageRegex (language, parameter);
568 #else
569 error (WARNING, "regex support not available; required for --%s option",
570 option);
571 #endif
572 handled = TRUE;
574 return handled;
577 extern void disableRegexKinds (const langType __unused__ language)
579 #ifdef HAVE_REGEX
580 if (language <= SetUpper && Sets [language].count > 0)
582 patternSet* const set = Sets + language;
583 unsigned int i;
584 for (i = 0 ; i < set->count ; ++i)
585 if (set->patterns [i].type == PTRN_TAG)
586 set->patterns [i].u.tag.kind.enabled = FALSE;
588 #endif
591 extern boolean enableRegexKind (const langType __unused__ language,
592 const int __unused__ kind,
593 const boolean __unused__ mode)
595 boolean result = FALSE;
596 #ifdef HAVE_REGEX
597 if (language <= SetUpper && Sets [language].count > 0)
599 patternSet* const set = Sets + language;
600 unsigned int i;
601 for (i = 0 ; i < set->count ; ++i)
602 if (set->patterns [i].type == PTRN_TAG &&
603 set->patterns [i].u.tag.kind.letter == kind)
605 set->patterns [i].u.tag.kind.enabled = mode;
606 result = TRUE;
609 #endif
610 return result;
613 extern void printRegexKindOptions (const langType __unused__ language)
615 #ifdef HAVE_REGEX
616 if (language <= SetUpper && Sets [language].count > 0)
618 patternSet* const set = Sets + language;
619 unsigned int i;
620 for (i = 0 ; i < set->count ; ++i)
621 if (set->patterns [i].type == PTRN_TAG)
622 printRegexKindOption (set->patterns, i);
624 #endif
627 extern void freeRegexResources (void)
629 #ifdef HAVE_REGEX
630 int i;
631 for (i = 0 ; i <= SetUpper ; ++i)
632 clearPatternSet (i);
633 if (Sets != NULL)
634 eFree (Sets);
635 Sets = NULL;
636 SetUpper = -1;
637 #endif
640 /* Check for broken regcomp() on Cygwin */
641 extern void checkRegex (void)
643 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
644 regex_t patbuf;
645 int errcode;
646 if (regcomp (&patbuf, "/hello/", 0) != 0)
648 error (WARNING, "Disabling broken regex");
649 regexBroken = TRUE;
651 #endif
654 /* vi:set tabstop=8 shiftwidth=4: */