Merge pull request #11 from esorton/bugfix/add-constexpr-keyword-to-arduino-ctags
[arduino-ctags.git] / lregex.c
blob37d7ea0cc72248d56d534b12a9beb6aa8267d40a
1 /*
2 * $Id: lregex.c 747 2009-11-06 02:33:37Z dhiebert $
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
13 * Gnu etags.
17 * INCLUDE FILES
19 #include "general.h" /* must always come first */
21 #include <string.h>
23 #ifdef HAVE_REGCOMP
24 # include <ctype.h>
25 # include <stddef.h>
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
28 # endif
29 # include <regex.h>
30 #endif
32 #include "debug.h"
33 #include "entry.h"
34 #include "parse.h"
35 #include "read.h"
36 #include "routines.h"
38 #ifdef HAVE_REGEX
41 * MACROS
44 /* Back-references \0 through \9 */
45 #define BACK_REFERENCE_COUNT 10
47 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
48 # define POSIX_REGEX
49 #endif
51 #define REGEX_NAME "Regex"
54 * DATA DECLARATIONS
56 #if defined (POSIX_REGEX)
58 struct sKind {
59 boolean enabled;
60 char letter;
61 char* name;
62 char* description;
65 enum pType { PTRN_TAG, PTRN_CALLBACK };
67 typedef struct {
68 regex_t *pattern;
69 enum pType type;
70 union {
71 struct {
72 char *name_pattern;
73 struct sKind kind;
74 } tag;
75 struct {
76 regexCallback function;
77 } callback;
78 } u;
79 } regexPattern;
81 #endif
83 typedef struct {
84 regexPattern *patterns;
85 unsigned int count;
86 } patternSet;
89 * DATA DEFINITIONS
92 static boolean regexBroken = FALSE;
94 /* Array of pattern sets, indexed by language */
95 static patternSet* Sets = NULL;
96 static int SetUpper = -1; /* upper language index in list */
99 * FUNCTION DEFINITIONS
102 static void clearPatternSet (const langType language)
104 if (language <= SetUpper)
106 patternSet* const set = Sets + language;
107 unsigned int i;
108 for (i = 0 ; i < set->count ; ++i)
110 regexPattern *p = &set->patterns [i];
111 #if defined (POSIX_REGEX)
112 regfree (p->pattern);
113 #endif
114 eFree (p->pattern);
115 p->pattern = NULL;
117 if (p->type == PTRN_TAG)
119 eFree (p->u.tag.name_pattern);
120 p->u.tag.name_pattern = NULL;
121 eFree (p->u.tag.kind.name);
122 p->u.tag.kind.name = NULL;
123 if (p->u.tag.kind.description != NULL)
125 eFree (p->u.tag.kind.description);
126 p->u.tag.kind.description = NULL;
130 if (set->patterns != NULL)
131 eFree (set->patterns);
132 set->patterns = NULL;
133 set->count = 0;
138 * Regex psuedo-parser
141 static void makeRegexTag (
142 const vString* const name, const struct sKind* const kind)
144 if (kind->enabled)
146 tagEntryInfo e;
147 Assert (name != NULL && vStringLength (name) > 0);
148 Assert (kind != NULL);
149 initTagEntry (&e, vStringValue (name));
150 e.kind = kind->letter;
151 e.kindName = kind->name;
152 makeTagEntry (&e);
157 * Regex pattern definition
160 /* Take a string like "/blah/" and turn it into "blah", making sure
161 * that the first and last characters are the same, and handling
162 * quoted separator characters. Actually, stops on the occurrence of
163 * an unquoted separator. Also turns "\t" into a Tab character.
164 * Returns pointer to terminating separator. Works in place. Null
165 * terminates name string.
167 static char* scanSeparators (char* name)
169 char sep = name [0];
170 char *copyto = name;
171 boolean quoted = FALSE;
173 for (++name ; *name != '\0' ; ++name)
175 if (quoted)
177 if (*name == sep)
178 *copyto++ = sep;
179 else if (*name == 't')
180 *copyto++ = '\t';
181 else
183 /* Something else is quoted, so preserve the quote. */
184 *copyto++ = '\\';
185 *copyto++ = *name;
187 quoted = FALSE;
189 else if (*name == '\\')
190 quoted = TRUE;
191 else if (*name == sep)
193 break;
195 else
196 *copyto++ = *name;
198 *copyto = '\0';
199 return name;
202 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
203 * character is whatever the first character of `regexp' is), by breaking it
204 * up into null terminated strings, removing the separators, and expanding
205 * '\t' into tabs. When complete, `regexp' points to the line matching
206 * pattern, a pointer to the name matching pattern is written to `name', a
207 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
208 * to the trailing flags is written to `flags'. If the pattern is not in the
209 * correct format, a false value is returned.
211 static boolean parseTagRegex (
212 char* const regexp, char** const name,
213 char** const kinds, char** const flags)
215 boolean result = FALSE;
216 const int separator = (unsigned char) regexp [0];
218 *name = scanSeparators (regexp);
219 if (*regexp == '\0')
220 error (WARNING, "empty regexp");
221 else if (**name != separator)
222 error (WARNING, "%s: incomplete regexp", regexp);
223 else
225 char* const third = scanSeparators (*name);
226 if (**name == '\0')
227 error (WARNING, "%s: regexp missing name pattern", regexp);
228 if ((*name) [strlen (*name) - 1] == '\\')
229 error (WARNING, "error in name pattern: \"%s\"", *name);
230 if (*third != separator)
231 error (WARNING, "%s: regexp missing final separator", regexp);
232 else
234 char* const fourth = scanSeparators (third);
235 if (*fourth == separator)
237 *kinds = third;
238 scanSeparators (fourth);
239 *flags = fourth;
241 else
243 *flags = third;
244 *kinds = NULL;
246 result = TRUE;
249 return result;
252 static void addCompiledTagPattern (
253 const langType language, regex_t* const pattern,
254 char* const name, const char kind, char* const kindName,
255 char *const description)
257 patternSet* set;
258 regexPattern *ptrn;
259 if (language > SetUpper)
261 int i;
262 Sets = xRealloc (Sets, (language + 1), patternSet);
263 for (i = SetUpper + 1 ; i <= language ; ++i)
265 Sets [i].patterns = NULL;
266 Sets [i].count = 0;
268 SetUpper = language;
270 set = Sets + language;
271 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
272 ptrn = &set->patterns [set->count];
273 set->count += 1;
275 ptrn->pattern = pattern;
276 ptrn->type = PTRN_TAG;
277 ptrn->u.tag.name_pattern = name;
278 ptrn->u.tag.kind.enabled = TRUE;
279 ptrn->u.tag.kind.letter = kind;
280 ptrn->u.tag.kind.name = kindName;
281 ptrn->u.tag.kind.description = description;
284 static void addCompiledCallbackPattern (
285 const langType language, regex_t* const pattern,
286 const regexCallback callback)
288 patternSet* set;
289 regexPattern *ptrn;
290 if (language > SetUpper)
292 int i;
293 Sets = xRealloc (Sets, (language + 1), patternSet);
294 for (i = SetUpper + 1 ; i <= language ; ++i)
296 Sets [i].patterns = NULL;
297 Sets [i].count = 0;
299 SetUpper = language;
301 set = Sets + language;
302 set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
303 ptrn = &set->patterns [set->count];
304 set->count += 1;
306 ptrn->pattern = pattern;
307 ptrn->type = PTRN_CALLBACK;
308 ptrn->u.callback.function = callback;
311 #if defined (POSIX_REGEX)
313 static regex_t* compileRegex (const char* const regexp, const char* const flags)
315 int cflags = REG_EXTENDED | REG_NEWLINE;
316 regex_t *result = NULL;
317 int errcode;
318 int i;
319 for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
321 switch ((int) flags [i])
323 case 'b': cflags &= ~REG_EXTENDED; break;
324 case 'e': cflags |= REG_EXTENDED; break;
325 case 'i': cflags |= REG_ICASE; break;
326 default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
329 result = xMalloc (1, regex_t);
330 errcode = regcomp (result, regexp, cflags);
331 if (errcode != 0)
333 char errmsg[256];
334 regerror (errcode, result, errmsg, 256);
335 error (WARNING, "regcomp %s: %s", regexp, errmsg);
336 regfree (result);
337 eFree (result);
338 result = NULL;
340 return result;
343 #endif
345 static void parseKinds (
346 const char* const kinds, char* const kind, char** const kindName,
347 char **description)
349 *kind = '\0';
350 *kindName = NULL;
351 *description = NULL;
352 if (kinds == NULL || kinds [0] == '\0')
354 *kind = 'r';
355 *kindName = eStrdup ("regex");
357 else if (kinds [0] != '\0')
359 const char* k = kinds;
360 if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
361 *kind = *k++;
362 else
363 *kind = 'r';
364 if (*k == ',')
365 ++k;
366 if (k [0] == '\0')
367 *kindName = eStrdup ("regex");
368 else
370 const char *const comma = strchr (k, ',');
371 if (comma == NULL)
372 *kindName = eStrdup (k);
373 else
375 *kindName = (char*) eMalloc (comma - k + 1);
376 strncpy (*kindName, k, comma - k);
377 (*kindName) [comma - k] = '\0';
378 k = comma + 1;
379 if (k [0] != '\0')
380 *description = eStrdup (k);
386 static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
388 const struct sKind *const kind = &pat [i].u.tag.kind;
389 const char *const indentation = indent ? " " : "";
390 Assert (pat [i].type == PTRN_TAG);
391 printf ("%s%c %s %s\n", indentation,
392 kind->letter != '\0' ? kind->letter : '?',
393 kind->description != NULL ? kind->description : kind->name,
394 kind->enabled ? "" : " [off]");
397 static void processLanguageRegex (const langType language,
398 const char* const parameter)
400 if (parameter == NULL || parameter [0] == '\0')
401 clearPatternSet (language);
402 else if (parameter [0] != '@')
403 addLanguageRegex (language, parameter);
404 else if (! doesFileExist (parameter + 1))
405 error (WARNING, "cannot open regex file");
406 else
408 const char* regexfile = parameter + 1;
409 FILE* const fp = fopen (regexfile, "r");
410 if (fp == NULL)
411 error (WARNING | PERROR, "%s", regexfile);
412 else
414 vString* const regex = vStringNew ();
415 while (readLine (regex, fp))
416 addLanguageRegex (language, vStringValue (regex));
417 fclose (fp);
418 vStringDelete (regex);
424 * Regex pattern matching
427 #if defined (POSIX_REGEX)
429 static vString* substitute (
430 const char* const in, const char* out,
431 const int nmatch, const regmatch_t* const pmatch)
433 vString* result = vStringNew ();
434 const char* p;
435 for (p = out ; *p != '\0' ; p++)
437 if (*p == '\\' && isdigit ((int) *++p))
439 const int dig = *p - '0';
440 if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
442 const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
443 vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
446 else if (*p != '\n' && *p != '\r')
447 vStringPut (result, *p);
449 vStringTerminate (result);
450 return result;
453 static void matchTagPattern (const vString* const line,
454 const regexPattern* const patbuf,
455 const regmatch_t* const pmatch)
457 vString *const name = substitute (vStringValue (line),
458 patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
459 vStringStripLeading (name);
460 vStringStripTrailing (name);
461 if (vStringLength (name) > 0)
462 makeRegexTag (name, &patbuf->u.tag.kind);
463 else
464 error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
465 getInputFileName (), getInputLineNumber (),
466 patbuf->u.tag.name_pattern);
467 vStringDelete (name);
470 static void matchCallbackPattern (
471 const vString* const line, const regexPattern* const patbuf,
472 const regmatch_t* const pmatch)
474 regexMatch matches [BACK_REFERENCE_COUNT];
475 unsigned int count = 0;
476 int i;
477 for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
479 matches [i].start = pmatch [i].rm_so;
480 matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
481 ++count;
483 patbuf->u.callback.function (vStringValue (line), matches, count);
486 static boolean matchRegexPattern (const vString* const line,
487 const regexPattern* const patbuf)
489 boolean result = FALSE;
490 regmatch_t pmatch [BACK_REFERENCE_COUNT];
491 const int match = regexec (patbuf->pattern, vStringValue (line),
492 BACK_REFERENCE_COUNT, pmatch, 0);
493 if (match == 0)
495 result = TRUE;
496 if (patbuf->type == PTRN_TAG)
497 matchTagPattern (line, patbuf, pmatch);
498 else if (patbuf->type == PTRN_CALLBACK)
499 matchCallbackPattern (line, patbuf, pmatch);
500 else
502 Assert ("invalid pattern type" == NULL);
503 result = FALSE;
506 return result;
509 #endif
511 /* PUBLIC INTERFACE */
513 /* Match against all patterns for specified language. Returns true if at least
514 * on pattern matched.
516 extern boolean matchRegex (const vString* const line, const langType language)
518 boolean result = FALSE;
519 if (language != LANG_IGNORE && language <= SetUpper &&
520 Sets [language].count > 0)
522 const patternSet* const set = Sets + language;
523 unsigned int i;
524 for (i = 0 ; i < set->count ; ++i)
525 if (matchRegexPattern (line, set->patterns + i))
526 result = TRUE;
528 return result;
531 extern void findRegexTags (void)
533 /* merely read all lines of the file */
534 while (fileReadLine () != NULL)
538 #endif /* HAVE_REGEX */
540 extern void addTagRegex (
541 const langType language __unused__,
542 const char* const regex __unused__,
543 const char* const name __unused__,
544 const char* const kinds __unused__,
545 const char* const flags __unused__)
547 #ifdef HAVE_REGEX
548 Assert (regex != NULL);
549 Assert (name != NULL);
550 if (! regexBroken)
552 regex_t* const cp = compileRegex (regex, flags);
553 if (cp != NULL)
555 char kind;
556 char* kindName;
557 char* description;
558 parseKinds (kinds, &kind, &kindName, &description);
559 addCompiledTagPattern (language, cp, eStrdup (name),
560 kind, kindName, description);
563 #endif
566 extern void addCallbackRegex (
567 const langType language __unused__,
568 const char* const regex __unused__,
569 const char* const flags __unused__,
570 const regexCallback callback __unused__)
572 #ifdef HAVE_REGEX
573 Assert (regex != NULL);
574 if (! regexBroken)
576 regex_t* const cp = compileRegex (regex, flags);
577 if (cp != NULL)
578 addCompiledCallbackPattern (language, cp, callback);
580 #endif
583 extern void addLanguageRegex (
584 const langType language __unused__, const char* const regex __unused__)
586 #ifdef HAVE_REGEX
587 if (! regexBroken)
589 char *const regex_pat = eStrdup (regex);
590 char *name, *kinds, *flags;
591 if (parseTagRegex (regex_pat, &name, &kinds, &flags))
593 addTagRegex (language, regex_pat, name, kinds, flags);
594 eFree (regex_pat);
597 #endif
601 * Regex option parsing
604 extern boolean processRegexOption (const char *const option,
605 const char *const parameter __unused__)
607 boolean handled = FALSE;
608 const char* const dash = strchr (option, '-');
609 if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
611 #ifdef HAVE_REGEX
612 langType language;
613 language = getNamedLanguage (dash + 1);
614 if (language == LANG_IGNORE)
615 error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option);
616 else
617 processLanguageRegex (language, parameter);
618 #else
619 error (WARNING, "regex support not available; required for --%s option",
620 option);
621 #endif
622 handled = TRUE;
624 return handled;
627 extern void disableRegexKinds (const langType language __unused__)
629 #ifdef HAVE_REGEX
630 if (language <= SetUpper && Sets [language].count > 0)
632 patternSet* const set = Sets + language;
633 unsigned int i;
634 for (i = 0 ; i < set->count ; ++i)
635 if (set->patterns [i].type == PTRN_TAG)
636 set->patterns [i].u.tag.kind.enabled = FALSE;
638 #endif
641 extern boolean enableRegexKind (
642 const langType language __unused__,
643 const int kind __unused__, const boolean mode __unused__)
645 boolean result = FALSE;
646 #ifdef HAVE_REGEX
647 if (language <= SetUpper && Sets [language].count > 0)
649 patternSet* const set = Sets + language;
650 unsigned int i;
651 for (i = 0 ; i < set->count ; ++i)
652 if (set->patterns [i].type == PTRN_TAG &&
653 set->patterns [i].u.tag.kind.letter == kind)
655 set->patterns [i].u.tag.kind.enabled = mode;
656 result = TRUE;
659 #endif
660 return result;
663 extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
665 #ifdef HAVE_REGEX
666 if (language <= SetUpper && Sets [language].count > 0)
668 patternSet* const set = Sets + language;
669 unsigned int i;
670 for (i = 0 ; i < set->count ; ++i)
671 if (set->patterns [i].type == PTRN_TAG)
672 printRegexKind (set->patterns, i, indent);
674 #endif
677 extern void freeRegexResources (void)
679 #ifdef HAVE_REGEX
680 int i;
681 for (i = 0 ; i <= SetUpper ; ++i)
682 clearPatternSet (i);
683 if (Sets != NULL)
684 eFree (Sets);
685 Sets = NULL;
686 SetUpper = -1;
687 #endif
690 /* Check for broken regcomp() on Cygwin */
691 extern void checkRegex (void)
693 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
694 regex_t patbuf;
695 int errcode;
696 if (regcomp (&patbuf, "/hello/", 0) != 0)
698 error (WARNING, "Disabling broken regex");
699 regexBroken = TRUE;
701 #endif
704 /* vi:set tabstop=4 shiftwidth=4: */