Update HACKING for changed doc generation instructions
[geany-mirror.git] / tagmanager / ctags / lregex.c
blob780e81e814b276ef2ecc7ad1ba539f05a78b6189
1 /*
2 * Copyright (c) 2000-2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for applying regular expression matching.
9 * The code for utlizing the Gnu regex package with regards to processing the
10 * regex option and checking for regex matches was adapted from routines in
11 * Gnu etags.
15 * INCLUDE FILES
17 #include "general.h" /* must always come first */
19 #include <string.h>
20 #include <glib.h>
21 #include <mio/mio.h>
23 #ifdef HAVE_REGCOMP
24 # include <ctype.h>
25 # include <stddef.h>
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
28 # endif
29 #endif
31 #include "main.h"
32 #include "entry.h"
33 #include "parse.h"
34 #include "read.h"
36 #ifdef HAVE_REGEX
39 * MACROS
42 /* Back-references \0 through \9 */
43 #define BACK_REFERENCE_COUNT 10
45 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
46 # define POSIX_REGEX
47 #endif
49 #define REGEX_NAME "Regex"
52 * DATA DECLARATIONS
54 #if defined (POSIX_REGEX)
56 struct sKind {
57 boolean enabled;
58 char letter;
59 char* name;
60 char* description;
63 enum pType { PTRN_TAG, PTRN_CALLBACK };
65 typedef struct {
66 GRegex *pattern;
67 enum pType type;
68 union {
69 struct {
70 char *name_pattern;
71 struct sKind kind;
72 } tag;
73 struct {
74 regexCallback function;
75 } callback;
76 } u;
77 } regexPattern;
79 #endif
81 typedef struct {
82 regexPattern *patterns;
83 unsigned int count;
84 } patternSet;
87 * DATA DEFINITIONS
90 static boolean regexBroken = FALSE;
92 /* Array of pattern sets, indexed by language */
93 static patternSet* Sets = NULL;
94 static int SetUpper = -1; /* upper language index in list */
97 * FUNCTION DEFINITIONS
100 static void clearPatternSet (const langType language)
102 if (language <= SetUpper)
104 patternSet* const set = Sets + language;
105 unsigned int i;
106 for (i = 0 ; i < set->count ; ++i)
108 regexPattern *p = &set->patterns [i];
109 g_regex_unref(p->pattern);
110 p->pattern = NULL;
112 if (p->type == PTRN_TAG)
114 eFree (p->u.tag.name_pattern);
115 p->u.tag.name_pattern = NULL;
116 eFree (p->u.tag.kind.name);
117 p->u.tag.kind.name = NULL;
118 if (p->u.tag.kind.description != NULL)
120 eFree (p->u.tag.kind.description);
121 p->u.tag.kind.description = NULL;
125 if (set->patterns != NULL)
126 eFree (set->patterns);
127 set->patterns = NULL;
128 set->count = 0;
133 * Regex psuedo-parser
136 static void makeRegexTag (
137 const vString* const name, const struct sKind* const kind)
139 Assert (kind != NULL);
140 if (kind->enabled)
142 tagEntryInfo e;
143 Assert (name != NULL && vStringLength (name) > 0);
144 initTagEntry (&e, vStringValue (name));
145 e.kind = kind->letter;
146 e.kindName = kind->name;
147 makeTagEntry (&e);
152 * Regex pattern definition
155 /* Take a string like "/blah/" and turn it into "blah", making sure
156 * that the first and last characters are the same, and handling
157 * quoted separator characters. Actually, stops on the occurrence of
158 * an unquoted separator. Also turns "\t" into a Tab character.
159 * Returns pointer to terminating separator. Works in place. Null
160 * terminates name string.
162 static char* scanSeparators (char* name)
164 char sep = name [0];
165 char *copyto = name;
166 boolean quoted = FALSE;
168 for (++name ; *name != '\0' ; ++name)
170 if (quoted)
172 if (*name == sep)
173 *copyto++ = sep;
174 else if (*name == 't')
175 *copyto++ = '\t';
176 else
178 /* Something else is quoted, so preserve the quote. */
179 *copyto++ = '\\';
180 *copyto++ = *name;
182 quoted = FALSE;
184 else if (*name == '\\')
185 quoted = TRUE;
186 else if (*name == sep)
188 break;
190 else
191 *copyto++ = *name;
193 *copyto = '\0';
194 return name;
197 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
198 * character is whatever the first character of `regexp' is), by breaking it
199 * up into null terminated strings, removing the separators, and expanding
200 * '\t' into tabs. When complete, `regexp' points to the line matching
201 * pattern, a pointer to the name matching pattern is written to `name', a
202 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
203 * to the trailing flags is written to `flags'. If the pattern is not in the
204 * correct format, a false value is returned.
206 static boolean parseTagRegex (
207 char* const regexp, char** const name,
208 char** const kinds, char** const flags)
210 boolean result = FALSE;
211 const int separator = (unsigned char) regexp [0];
213 *name = scanSeparators (regexp);
214 if (*regexp == '\0')
215 printf ("regex: empty regexp\n");
216 else if (**name != separator)
217 printf ("regex: %s: incomplete regexp\n", regexp);
218 else
220 char* const third = scanSeparators (*name);
221 if (**name == '\0')
222 printf ("regex: %s: regexp missing name pattern\n", regexp);
223 if ((*name) [strlen (*name) - 1] == '\\')
224 printf ("regex: error in name pattern: \"%s\"\n", *name);
225 if (*third != separator)
226 printf ("regex: %s: regexp missing final separator\n", regexp);
227 else
229 char* const fourth = scanSeparators (third);
230 if (*fourth == separator)
232 *kinds = third;
233 scanSeparators (fourth);
234 *flags = fourth;
236 else
238 *flags = third;
239 *kinds = NULL;
241 result = TRUE;
244 return result;
247 static void addCompiledTagPattern (
248 const langType language, GRegex* const pattern,
249 char* const name, const char kind, char* const kindName,
250 char *const description)
252 patternSet* set;
253 regexPattern *ptrn;
254 if (language > SetUpper)
256 int i;
257 Sets = xRealloc (Sets, (language + 1), patternSet);
258 for (i = SetUpper + 1 ; i <= language ; ++i)
260 Sets [i].patterns = NULL;
261 Sets [i].count = 0;
263 SetUpper = language;
265 set = Sets + language;
266 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
267 ptrn = &set->patterns [set->count];
268 set->count += 1;
270 ptrn->pattern = pattern;
271 ptrn->type = PTRN_TAG;
272 ptrn->u.tag.name_pattern = name;
273 ptrn->u.tag.kind.enabled = TRUE;
274 ptrn->u.tag.kind.letter = kind;
275 ptrn->u.tag.kind.name = kindName;
276 ptrn->u.tag.kind.description = description;
279 static void addCompiledCallbackPattern (
280 const langType language, GRegex* const pattern,
281 const regexCallback callback)
283 patternSet* set;
284 regexPattern *ptrn;
285 if (language > SetUpper)
287 int i;
288 Sets = xRealloc (Sets, (language + 1), patternSet);
289 for (i = SetUpper + 1 ; i <= language ; ++i)
291 Sets [i].patterns = NULL;
292 Sets [i].count = 0;
294 SetUpper = language;
296 set = Sets + language;
297 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
298 ptrn = &set->patterns [set->count];
299 set->count += 1;
301 ptrn->pattern = pattern;
302 ptrn->type = PTRN_CALLBACK;
303 ptrn->u.callback.function = callback;
306 #if defined (POSIX_REGEX)
308 static GRegex* compileRegex (const char* const regexp, const char* const flags)
310 int cflags = G_REGEX_MULTILINE;
311 GRegex *result = NULL;
312 GError *error = NULL;
313 int i;
314 for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
316 switch ((int) flags [i])
318 case 'b': g_warning("CTags 'b' flag not supported by Geany!"); break;
319 case 'e': break;
320 case 'i': cflags |= G_REGEX_CASELESS; break;
321 default: printf ("regex: unknown regex flag: '%c'\n", *flags); break;
324 result = g_regex_new(regexp, cflags, 0, &error);
325 if (error)
327 printf ("regex: regcomp %s: %s\n", regexp, error->message);
328 g_error_free(error);
330 return result;
333 #endif
335 static void parseKinds (
336 const char* const kinds, char* const kind, char** const kindName,
337 char **description)
339 *kind = '\0';
340 *kindName = NULL;
341 *description = NULL;
342 if (kinds == NULL || kinds [0] == '\0')
344 *kind = 'r';
345 *kindName = eStrdup ("regex");
347 else if (kinds [0] != '\0')
349 const char* k = kinds;
350 if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
351 *kind = *k++;
352 else
353 *kind = 'r';
354 if (*k == ',')
355 ++k;
356 if (k [0] == '\0')
357 *kindName = eStrdup ("regex");
358 else
360 const char *const comma = strchr (k, ',');
361 if (comma == NULL)
362 *kindName = eStrdup (k);
363 else
365 *kindName = (char*) eMalloc (comma - k + 1);
366 strncpy (*kindName, k, comma - k);
367 (*kindName) [comma - k] = '\0';
368 k = comma + 1;
369 if (k [0] != '\0')
370 *description = eStrdup (k);
376 static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
378 const struct sKind *const kind = &pat [i].u.tag.kind;
379 const char *const indentation = indent ? " " : "";
380 Assert (pat [i].type == PTRN_TAG);
381 printf ("%s%c %s %s\n", indentation,
382 kind->letter != '\0' ? kind->letter : '?',
383 kind->description != NULL ? kind->description : kind->name,
384 kind->enabled ? "" : " [off]");
387 static void processLanguageRegex (const langType language,
388 const char* const parameter)
390 if (parameter == NULL || parameter [0] == '\0')
391 clearPatternSet (language);
392 else if (parameter [0] != '@')
393 addLanguageRegex (language, parameter);
394 else if (! doesFileExist (parameter + 1))
395 printf ("regex: cannot open regex file\n");
396 else
398 const char* regexfile = parameter + 1;
399 MIO* const mio = mio_new_file (regexfile, "r");
400 if (mio == NULL)
401 printf ("regex: %s\n", regexfile);
402 else
404 vString* const regex = vStringNew ();
405 while (readLine (regex, mio))
406 addLanguageRegex (language, vStringValue (regex));
407 mio_free (mio);
408 vStringDelete (regex);
414 * Regex pattern matching
417 #if defined (POSIX_REGEX)
419 static vString* substitute (
420 const char* const in, const char* out,
421 const int nmatch, const GMatchInfo* const minfo)
423 vString* result = vStringNew ();
424 const char* p;
425 for (p = out ; *p != '\0' ; p++)
427 if (*p == '\\' && isdigit ((int) *++p))
429 const int dig = *p - '0';
430 int so, eo;
431 if (0 < dig && dig < nmatch &&
432 g_match_info_fetch_pos(minfo, dig, &so, &eo) && so != -1)
434 const int diglen = eo - so;
435 vStringNCatS (result, in + so, diglen);
438 else if (*p != '\n' && *p != '\r')
439 vStringPut (result, *p);
441 vStringTerminate (result);
442 return result;
445 static void matchTagPattern (const vString* const line,
446 const regexPattern* const patbuf,
447 const GMatchInfo* const minfo)
449 vString *const name = substitute (vStringValue (line),
450 patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, minfo);
451 vStringStripLeading (name);
452 vStringStripTrailing (name);
453 if (vStringLength (name) > 0)
454 makeRegexTag (name, &patbuf->u.tag.kind);
455 else
456 error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
457 getInputFileName (), getInputLineNumber (),
458 patbuf->u.tag.name_pattern);
459 vStringDelete (name);
462 static void matchCallbackPattern (
463 const vString* const line, const regexPattern* const patbuf,
464 const GMatchInfo* const minfo)
466 regexMatch matches [BACK_REFERENCE_COUNT];
467 unsigned int count = 0;
468 int i;
469 for (i = 0 ; i < BACK_REFERENCE_COUNT ; ++i)
471 int so = -1, eo = -1;
472 /* with GRegex we could get the real match count, but that might
473 * cause incompatibilities with CTags */
474 g_match_info_fetch_pos(minfo, i, &so, &eo);
475 matches [i].start = so;
476 matches [i].length = eo - so;
477 /* a valid match may have both offsets == -1,
478 * e.g. (foo)*(bar) matching "bar" - see CTags bug 2970274.
479 * As POSIX regex doesn't seem to have a way to count matches,
480 * we return the count up to the last non-empty match. */
481 if (so != -1)
482 count = i + 1;
484 patbuf->u.callback.function (vStringValue (line), matches, count);
487 static boolean matchRegexPattern (const vString* const line,
488 const regexPattern* const patbuf)
490 boolean result = FALSE;
491 GMatchInfo *minfo;
492 if (g_regex_match(patbuf->pattern, vStringValue(line), 0, &minfo))
494 result = TRUE;
495 if (patbuf->type == PTRN_TAG)
496 matchTagPattern (line, patbuf, minfo);
497 else if (patbuf->type == PTRN_CALLBACK)
498 matchCallbackPattern (line, patbuf, minfo);
499 else
501 Assert ("invalid pattern type" == NULL);
502 result = FALSE;
505 g_match_info_free(minfo);
506 return result;
509 #endif
511 /* PUBLIC INTERFACE */
513 /* Match against all patterns for specified language. Returns true if at least
514 * on pattern matched.
516 extern boolean matchRegex (const vString* const line, const langType language)
518 boolean result = FALSE;
519 if (language != LANG_IGNORE && language <= SetUpper &&
520 Sets [language].count > 0)
522 const patternSet* const set = Sets + language;
523 unsigned int i;
524 for (i = 0 ; i < set->count ; ++i)
525 if (matchRegexPattern (line, set->patterns + i))
526 result = TRUE;
528 return result;
531 extern void findRegexTags (void)
533 /* merely read all lines of the file */
534 while (fileReadLine () != NULL)
538 #endif /* HAVE_REGEX */
540 extern void addTagRegex (
541 const langType language UNUSED,
542 const char* const regex UNUSED,
543 const char* const name UNUSED,
544 const char* const kinds UNUSED,
545 const char* const flags UNUSED)
547 #ifdef HAVE_REGEX
548 Assert (regex != NULL);
549 Assert (name != NULL);
550 if (! regexBroken)
552 GRegex* const cp = compileRegex (regex, flags);
553 if (cp != NULL)
555 char kind;
556 char* kindName;
557 char* description;
558 parseKinds (kinds, &kind, &kindName, &description);
559 addCompiledTagPattern (language, cp, eStrdup (name),
560 kind, kindName, description);
563 #endif
566 extern void addCallbackRegex (
567 const langType language UNUSED,
568 const char* const regex UNUSED,
569 const char* const flags UNUSED,
570 const regexCallback callback UNUSED)
572 #ifdef HAVE_REGEX
573 Assert (regex != NULL);
574 if (! regexBroken)
576 GRegex* const cp = compileRegex (regex, flags);
577 if (cp != NULL)
578 addCompiledCallbackPattern (language, cp, callback);
580 #endif
583 extern void addLanguageRegex (
584 const langType language UNUSED, const char* const regex UNUSED)
586 #ifdef HAVE_REGEX
587 if (! regexBroken)
589 char *const regex_pat = eStrdup (regex);
590 char *name, *kinds, *flags;
591 if (parseTagRegex (regex_pat, &name, &kinds, &flags))
593 addTagRegex (language, regex_pat, name, kinds, flags);
594 eFree (regex_pat);
597 #endif
601 * Regex option parsing
604 extern boolean processRegexOption (const char *const option,
605 const char *const parameter UNUSED)
607 boolean handled = FALSE;
608 const char* const dash = strchr (option, '-');
609 if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
611 #ifdef HAVE_REGEX
612 langType language;
613 language = getNamedLanguage (dash + 1);
614 if (language == LANG_IGNORE)
615 printf ("regex: unknown language \"%s\" in --%s option\n", (dash + 1), option);
616 else
617 processLanguageRegex (language, parameter);
618 #else
619 printf ("regex: regex support not available; required for --%s option\n",
620 option);
621 #endif
622 handled = TRUE;
624 return handled;
627 extern void disableRegexKinds (const langType language UNUSED)
629 #ifdef HAVE_REGEX
630 if (language <= SetUpper && Sets [language].count > 0)
632 patternSet* const set = Sets + language;
633 unsigned int i;
634 for (i = 0 ; i < set->count ; ++i)
635 if (set->patterns [i].type == PTRN_TAG)
636 set->patterns [i].u.tag.kind.enabled = FALSE;
638 #endif
641 extern boolean enableRegexKind (
642 const langType language UNUSED,
643 const int kind UNUSED, const boolean mode UNUSED)
645 boolean result = FALSE;
646 #ifdef HAVE_REGEX
647 if (language <= SetUpper && Sets [language].count > 0)
649 patternSet* const set = Sets + language;
650 unsigned int i;
651 for (i = 0 ; i < set->count ; ++i)
652 if (set->patterns [i].type == PTRN_TAG &&
653 set->patterns [i].u.tag.kind.letter == kind)
655 set->patterns [i].u.tag.kind.enabled = mode;
656 result = TRUE;
659 #endif
660 return result;
663 extern void printRegexKinds (const langType language UNUSED, boolean indent UNUSED)
665 #ifdef HAVE_REGEX
666 if (language <= SetUpper && Sets [language].count > 0)
668 patternSet* const set = Sets + language;
669 unsigned int i;
670 for (i = 0 ; i < set->count ; ++i)
671 if (set->patterns [i].type == PTRN_TAG)
672 printRegexKind (set->patterns, i, indent);
674 #endif
677 extern void freeRegexResources (void)
679 #ifdef HAVE_REGEX
680 int i;
681 for (i = 0 ; i <= SetUpper ; ++i)
682 clearPatternSet (i);
683 if (Sets != NULL)
684 eFree (Sets);
685 Sets = NULL;
686 SetUpper = -1;
687 #endif
690 /* Check for broken regcomp() on Cygwin */
691 extern void checkRegex (void)
693 /* not needed now we have GRegex */
696 /* vi:set tabstop=4 shiftwidth=4: */