Update Scintilla keywords and highlighting when changing document tabs
[geany-mirror.git] / tagmanager / get.c
blob1b447d74f65983cd92977c2e7b9aa03c78756d0d
1 /*
2 * Copyright (c) 1996-2002, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains the high level source read functions (preprocessor
8 * directives are handled within this level).
9 */
12 * INCLUDE FILES
14 #include "general.h" /* must always come first */
16 #include <string.h>
17 #include <glib.h>
19 #include "entry.h"
20 #include "get.h"
21 #include "options.h"
22 #include "read.h"
23 #include "vstring.h"
26 * MACROS
28 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
29 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
32 * DATA DECLARATIONS
34 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS, COMMENT_D } Comment;
36 enum eCppLimits {
37 MaxCppNestingLevel = 20,
38 MaxDirectiveName = 10
41 /* Defines the one nesting level of a preprocessor conditional.
43 typedef struct sConditionalInfo {
44 boolean ignoreAllBranches; /* ignoring parent conditional branch */
45 boolean singleBranch; /* choose only one branch */
46 boolean branchChosen; /* branch already selected */
47 boolean ignoring; /* current ignore state */
48 } conditionalInfo;
50 enum eState {
51 DRCTV_NONE, /* no known directive - ignore to end of line */
52 DRCTV_DEFINE, /* "#define" encountered */
53 DRCTV_HASH, /* initial '#' read; determine directive */
54 DRCTV_IF, /* "#if" or "#ifdef" encountered */
55 DRCTV_PRAGMA, /* #pragma encountered */
56 DRCTV_UNDEF /* "#undef" encountered */
59 /* Defines the current state of the pre-processor.
61 typedef struct sCppState {
62 int ungetch, ungetch2; /* ungotten characters, if any */
63 boolean resolveRequired; /* must resolve if/else/elif/endif branch */
64 boolean hasAtLiteralStrings; /* supports @"c:\" strings */
65 struct sDirective {
66 enum eState state; /* current directive being processed */
67 boolean accept; /* is a directive syntactically permitted? */
68 vString * name; /* macro name */
69 unsigned int nestLevel; /* level 0 is not used */
70 conditionalInfo ifdef [MaxCppNestingLevel];
71 } directive;
72 } cppState;
75 * DATA DEFINITIONS
78 /* Use brace formatting to detect end of block.
80 static boolean BraceFormat = FALSE;
82 static cppState Cpp = {
83 '\0', '\0', /* ungetch characters */
84 FALSE, /* resolveRequired */
85 FALSE, /* hasAtLiteralStrings */
87 DRCTV_NONE, /* state */
88 FALSE, /* accept */
89 NULL, /* tag name */
90 0, /* nestLevel */
91 { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */
92 } /* directive */
96 * FUNCTION DEFINITIONS
99 extern boolean isBraceFormat (void)
101 return BraceFormat;
104 extern unsigned int getDirectiveNestLevel (void)
106 return Cpp.directive.nestLevel;
109 extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
111 BraceFormat = state;
113 Cpp.ungetch = '\0';
114 Cpp.ungetch2 = '\0';
115 Cpp.resolveRequired = FALSE;
116 Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
118 Cpp.directive.state = DRCTV_NONE;
119 Cpp.directive.accept = TRUE;
120 Cpp.directive.nestLevel = 0;
122 Cpp.directive.ifdef [0].ignoreAllBranches = FALSE;
123 Cpp.directive.ifdef [0].singleBranch = FALSE;
124 Cpp.directive.ifdef [0].branchChosen = FALSE;
125 Cpp.directive.ifdef [0].ignoring = FALSE;
127 if (Cpp.directive.name == NULL)
128 Cpp.directive.name = vStringNew ();
129 else
130 vStringClear (Cpp.directive.name);
133 extern void cppTerminate (void)
135 if (Cpp.directive.name != NULL)
137 vStringDelete (Cpp.directive.name);
138 Cpp.directive.name = NULL;
142 extern void cppBeginStatement (void)
144 Cpp.resolveRequired = TRUE;
147 extern void cppEndStatement (void)
149 Cpp.resolveRequired = FALSE;
153 * Scanning functions
155 * This section handles preprocessor directives. It strips out all
156 * directives and may emit a tag for #define directives.
159 /* This puts a character back into the input queue for the source File.
160 * Up to two characters may be ungotten.
162 extern void cppUngetc (const int c)
164 Assert (Cpp.ungetch2 == '\0');
165 Cpp.ungetch2 = Cpp.ungetch;
166 Cpp.ungetch = c;
169 /* Reads a directive, whose first character is given by "c", into "name".
171 static boolean readDirective (int c, char *const name, unsigned int maxLength)
173 unsigned int i;
175 for (i = 0 ; i < maxLength - 1 ; ++i)
177 if (i > 0)
179 c = fileGetc ();
180 if (c == EOF || ! isalpha (c))
182 fileUngetc (c);
183 break;
186 name [i] = c;
188 name [i] = '\0'; /* null terminate */
190 return (boolean) isspacetab (c);
193 /* Reads an identifier, whose first character is given by "c", into "tag",
194 * together with the file location and corresponding line number.
196 static void readIdentifier (int c, vString *const name)
198 vStringClear (name);
201 vStringPut (name, c);
202 } while (c = fileGetc (), (c != EOF && isident (c)));
203 fileUngetc (c);
204 vStringTerminate (name);
207 static conditionalInfo *currentConditional (void)
209 return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
212 static boolean isIgnore (void)
214 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
217 static boolean setIgnore (const boolean ignore)
219 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
222 static boolean isIgnoreBranch (void)
224 conditionalInfo *const ifdef = currentConditional ();
226 /* Force a single branch if an incomplete statement is discovered
227 * en route. This may have allowed earlier branches containing complete
228 * statements to be followed, but we must follow no further branches.
230 if (Cpp.resolveRequired && ! BraceFormat)
231 ifdef->singleBranch = TRUE;
233 /* We will ignore this branch in the following cases:
235 * 1. We are ignoring all branches (conditional was within an ignored
236 * branch of the parent conditional)
237 * 2. A branch has already been chosen and either of:
238 * a. A statement was incomplete upon entering the conditional
239 * b. A statement is incomplete upon encountering a branch
241 return (boolean) (ifdef->ignoreAllBranches ||
242 (ifdef->branchChosen && ifdef->singleBranch));
245 static void chooseBranch (void)
247 if (! BraceFormat)
249 conditionalInfo *const ifdef = currentConditional ();
251 ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
252 Cpp.resolveRequired);
256 /* Pushes one nesting level for an #if directive, indicating whether or not
257 * the branch should be ignored and whether a branch has already been chosen.
259 static boolean pushConditional (const boolean firstBranchChosen)
261 const boolean ignoreAllBranches = isIgnore (); /* current ignore */
262 boolean ignoreBranch = FALSE;
264 if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
266 conditionalInfo *ifdef;
268 ++Cpp.directive.nestLevel;
269 ifdef = currentConditional ();
271 /* We take a snapshot of whether there is an incomplete statement in
272 * progress upon encountering the preprocessor conditional. If so,
273 * then we will flag that only a single branch of the conditional
274 * should be followed.
276 ifdef->ignoreAllBranches = ignoreAllBranches;
277 ifdef->singleBranch = Cpp.resolveRequired;
278 ifdef->branchChosen = firstBranchChosen;
279 ifdef->ignoring = (boolean) (ignoreAllBranches || (
280 ! firstBranchChosen && ! BraceFormat &&
281 (ifdef->singleBranch || !Option.if0)));
282 ignoreBranch = ifdef->ignoring;
284 return ignoreBranch;
287 /* Pops one nesting level for an #endif directive.
289 static boolean popConditional (void)
291 if (Cpp.directive.nestLevel > 0)
292 --Cpp.directive.nestLevel;
294 return isIgnore ();
297 static void makeDefineTag (const char *const name, boolean parameterized)
299 const boolean isFileScope = (boolean) (! isHeaderFile ());
301 if (includingDefineTags () &&
302 (! isFileScope || Option.include.fileScope))
304 tagEntryInfo e;
306 initTagEntry (&e, name);
308 e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
309 e.isFileScope = isFileScope;
310 e.truncateLine = TRUE;
311 e.kindName = "macro";
312 e.kind = 'd';
313 if (parameterized)
315 e.extensionFields.arglist = getArglistFromFilePos(getInputFilePosition()
316 , e.name);
318 makeTagEntry (&e);
319 if (parameterized)
320 free((char *) e.extensionFields.arglist);
324 static void directiveDefine (const int c)
326 boolean parameterized;
327 int nc;
329 if (isident1 (c))
331 readIdentifier (c, Cpp.directive.name);
332 nc = fileGetc ();
333 fileUngetc (nc);
334 parameterized = (boolean) (nc == '(');
335 if (! isIgnore ())
336 makeDefineTag (vStringValue (Cpp.directive.name), parameterized);
338 Cpp.directive.state = DRCTV_NONE;
341 static void directivePragma (int c)
343 if (isident1 (c))
345 readIdentifier (c, Cpp.directive.name);
346 if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
348 /* generate macro tag for weak name */
351 c = fileGetc ();
352 } while (c == SPACE);
353 if (isident1 (c))
355 readIdentifier (c, Cpp.directive.name);
356 makeDefineTag (vStringValue (Cpp.directive.name), FALSE);
360 Cpp.directive.state = DRCTV_NONE;
363 static boolean directiveIf (const int c)
365 const boolean ignore = pushConditional ((boolean) (c != '0'));
367 Cpp.directive.state = DRCTV_NONE;
369 return ignore;
372 static boolean directiveHash (const int c)
374 boolean ignore = FALSE;
375 char directive [MaxDirectiveName];
376 DebugStatement ( const boolean ignore0 = isIgnore (); )
378 readDirective (c, directive, MaxDirectiveName);
379 if (stringMatch (directive, "define"))
380 Cpp.directive.state = DRCTV_DEFINE;
381 else if (stringMatch (directive, "undef"))
382 Cpp.directive.state = DRCTV_UNDEF;
383 else if (strncmp (directive, "if", (size_t) 2) == 0)
384 Cpp.directive.state = DRCTV_IF;
385 else if (stringMatch (directive, "elif") ||
386 stringMatch (directive, "else"))
388 ignore = setIgnore (isIgnoreBranch ());
389 if (! ignore && stringMatch (directive, "else"))
390 chooseBranch ();
391 Cpp.directive.state = DRCTV_NONE;
392 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
394 else if (stringMatch (directive, "endif"))
396 DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
397 ignore = popConditional ();
398 Cpp.directive.state = DRCTV_NONE;
399 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
401 else if (stringMatch (directive, "pragma"))
402 Cpp.directive.state = DRCTV_PRAGMA;
403 else
404 Cpp.directive.state = DRCTV_NONE;
406 return ignore;
409 /* Handles a pre-processor directive whose first character is given by "c".
411 static boolean handleDirective (const int c)
413 boolean ignore = isIgnore ();
415 switch (Cpp.directive.state)
417 case DRCTV_NONE: ignore = isIgnore (); break;
418 case DRCTV_DEFINE: directiveDefine (c); break;
419 case DRCTV_HASH: ignore = directiveHash (c); break;
420 case DRCTV_IF: ignore = directiveIf (c); break;
421 case DRCTV_PRAGMA: directivePragma (c); break;
422 case DRCTV_UNDEF: directiveDefine (c); break;
424 return ignore;
427 /* Called upon reading of a slash ('/') characters, determines whether a
428 * comment is encountered, and its type.
430 static Comment isComment (void)
432 Comment comment;
433 const int next = fileGetc ();
435 if (next == '*')
436 comment = COMMENT_C;
437 else if (next == '/')
438 comment = COMMENT_CPLUS;
439 else if (next == '+')
440 comment = COMMENT_D;
441 else
443 fileUngetc (next);
444 comment = COMMENT_NONE;
446 return comment;
449 /* Skips over a C style comment. According to ANSI specification a comment
450 * is treated as white space, so we perform this substitution.
452 int skipOverCComment (void)
454 int c = fileGetc ();
456 while (c != EOF)
458 if (c != '*')
459 c = fileGetc ();
460 else
462 const int next = fileGetc ();
464 if (next != '/')
465 c = next;
466 else
468 c = SPACE; /* replace comment with space */
469 break;
473 return c;
476 /* Skips over a C++ style comment.
478 static int skipOverCplusComment (void)
480 int c;
482 while ((c = fileGetc ()) != EOF)
484 if (c == BACKSLASH)
485 fileGetc (); /* throw away next character, too */
486 else if (c == NEWLINE)
487 break;
489 return c;
492 /* Skips over a D style comment.
493 * Really we should match nested /+ comments. At least they're less common.
495 static int skipOverDComment (void)
497 int c = fileGetc ();
499 while (c != EOF)
501 if (c != '+')
502 c = fileGetc ();
503 else
505 const int next = fileGetc ();
507 if (next != '/')
508 c = next;
509 else
511 c = SPACE; /* replace comment with space */
512 break;
516 return c;
519 /* Skips to the end of a string, returning a special character to
520 * symbolically represent a generic string.
522 static int skipToEndOfString (boolean ignoreBackslash)
524 int c;
526 while ((c = fileGetc ()) != EOF)
528 if (c == BACKSLASH && ! ignoreBackslash)
529 fileGetc (); /* throw away next character, too */
530 else if (c == DOUBLE_QUOTE)
531 break;
533 return STRING_SYMBOL; /* symbolic representation of string */
536 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
537 * special character to symbolically represent a generic character.
538 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
540 static int skipToEndOfChar (void)
542 int c;
543 int count = 0, veraBase = '\0';
545 while ((c = fileGetc ()) != EOF)
547 ++count;
548 if (c == BACKSLASH)
549 fileGetc (); /* throw away next character, too */
550 else if (c == SINGLE_QUOTE)
551 break;
552 else if (c == NEWLINE)
554 fileUngetc (c);
555 break;
557 else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL)
558 veraBase = c;
559 else if (veraBase != '\0' && ! isalnum (c))
561 fileUngetc (c);
562 break;
565 return CHAR_SYMBOL; /* symbolic representation of character */
568 /* This function returns the next character, stripping out comments,
569 * C pre-processor directives, and the contents of single and double
570 * quoted strings. In short, strip anything which places a burden upon
571 * the tokenizer.
573 extern int cppGetc (void)
575 boolean directive = FALSE;
576 boolean ignore = FALSE;
577 int c;
579 if (Cpp.ungetch != '\0')
581 c = Cpp.ungetch;
582 Cpp.ungetch = Cpp.ungetch2;
583 Cpp.ungetch2 = '\0';
584 return c; /* return here to avoid re-calling debugPutc () */
586 else do
588 c = fileGetc ();
589 process:
590 switch (c)
592 case EOF:
593 ignore = FALSE;
594 directive = FALSE;
595 break;
597 case TAB:
598 case SPACE:
599 break; /* ignore most white space */
601 case NEWLINE:
602 if (directive && ! ignore)
603 directive = FALSE;
604 Cpp.directive.accept = TRUE;
605 break;
607 case DOUBLE_QUOTE:
608 Cpp.directive.accept = FALSE;
609 c = skipToEndOfString (FALSE);
610 break;
612 case '#':
613 if (Cpp.directive.accept)
615 directive = TRUE;
616 Cpp.directive.state = DRCTV_HASH;
617 Cpp.directive.accept = FALSE;
619 break;
621 case SINGLE_QUOTE:
622 Cpp.directive.accept = FALSE;
623 c = skipToEndOfChar ();
624 break;
626 case '/':
628 const Comment comment = isComment ();
630 if (comment == COMMENT_C)
631 c = skipOverCComment ();
632 else if (comment == COMMENT_CPLUS)
634 c = skipOverCplusComment ();
635 if (c == NEWLINE)
636 fileUngetc (c);
638 else if (comment == COMMENT_D)
639 c = skipOverDComment ();
640 else
641 Cpp.directive.accept = FALSE;
642 break;
645 case BACKSLASH:
647 int next = fileGetc ();
649 if (next == NEWLINE)
650 continue;
651 else if (next == '?')
652 cppUngetc (next);
653 else
654 fileUngetc (next);
655 break;
658 case '?':
660 int next = fileGetc ();
661 if (next != '?')
662 fileUngetc (next);
663 else
665 next = fileGetc ();
666 switch (next)
668 case '(': c = '['; break;
669 case ')': c = ']'; break;
670 case '<': c = '{'; break;
671 case '>': c = '}'; break;
672 case '/': c = BACKSLASH; goto process;
673 case '!': c = '|'; break;
674 case SINGLE_QUOTE: c = '^'; break;
675 case '-': c = '~'; break;
676 case '=': c = '#'; goto process;
677 default:
678 fileUngetc (next);
679 cppUngetc ('?');
680 break;
683 } break;
685 default:
686 if (c == '@' && Cpp.hasAtLiteralStrings)
688 int next = fileGetc ();
689 if (next == DOUBLE_QUOTE)
691 Cpp.directive.accept = FALSE;
692 c = skipToEndOfString (TRUE);
693 break;
696 Cpp.directive.accept = FALSE;
697 if (directive)
698 ignore = handleDirective (c);
699 break;
701 } while (directive || ignore);
703 DebugStatement ( debugPutc (DEBUG_CPP, c); )
704 DebugStatement ( if (c == NEWLINE)
705 debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
707 return c;
710 extern char *getArglistFromFilePos(MIOPos startPosition, const char *tokenName)
712 MIOPos originalPosition;
713 char *result = NULL;
714 char *arglist = NULL;
715 long pos1, pos2;
717 pos2 = mio_tell(File.mio);
719 mio_getpos(File.mio, &originalPosition);
720 mio_setpos(File.mio, &startPosition);
721 pos1 = mio_tell(File.mio);
723 if (pos2 > pos1)
725 result = (char *) g_malloc(sizeof(char ) * (pos2 - pos1 + 2));
726 if (result != NULL && mio_read(File.mio, result, sizeof(char), pos2 - pos1 + 1) > 0)
728 result[pos2-pos1+1] = '\0';
729 arglist = getArglistFromStr(result, tokenName);
731 g_free(result);
733 mio_setpos(File.mio, &originalPosition);
734 return arglist;
737 typedef enum
739 st_none_t,
740 st_escape_t,
741 st_c_comment_t,
742 st_cpp_comment_t,
743 st_double_quote_t,
744 st_single_quote_t
745 } ParseState;
747 static void stripCodeBuffer(char *buf)
749 int i = 0, pos = 0;
750 ParseState state = st_none_t, prev_state = st_none_t;
752 while (buf[i] != '\0')
754 switch(buf[i])
756 case '/':
757 if (st_none_t == state)
759 /* Check if this is the start of a comment */
760 if (buf[i+1] == '*') /* C comment */
761 state = st_c_comment_t;
762 else if (buf[i+1] == '/') /* C++ comment */
763 state = st_cpp_comment_t;
764 else /* Normal character */
765 buf[pos++] = '/';
767 else if (st_c_comment_t == state)
769 /* Check if this is the end of a C comment */
770 if (buf[i-1] == '*')
772 if ((pos > 0) && (buf[pos-1] != ' '))
773 buf[pos++] = ' ';
774 state = st_none_t;
777 break;
778 case '"':
779 if (st_none_t == state)
780 state = st_double_quote_t;
781 else if (st_double_quote_t == state)
782 state = st_none_t;
783 break;
784 case '\'':
785 if (st_none_t == state)
786 state = st_single_quote_t;
787 else if (st_single_quote_t == state)
788 state = st_none_t;
789 break;
790 default:
791 if ((buf[i] == '\\') && (st_escape_t != state))
793 prev_state = state;
794 state = st_escape_t;
796 else if (st_escape_t == state)
798 state = prev_state;
799 prev_state = st_none_t;
801 else if ((buf[i] == '\n') && (st_cpp_comment_t == state))
803 if ((pos > 0) && (buf[pos-1] != ' '))
804 buf[pos++] = ' ';
805 state = st_none_t;
807 else if (st_none_t == state)
809 if (isspace(buf[i]))
811 if ((pos > 0) && (buf[pos-1] != ' '))
812 buf[pos++] = ' ';
814 else
815 buf[pos++] = buf[i];
817 break;
819 ++i;
821 buf[pos] = '\0';
822 return;
825 extern char *getArglistFromStr(char *buf, const char *name)
827 char *start, *end;
828 int level;
829 if ((NULL == buf) || (NULL == name) || ('\0' == name[0]))
830 return NULL;
831 stripCodeBuffer(buf);
832 if (NULL == (start = strstr(buf, name)))
833 return NULL;
834 if (NULL == (start = strchr(start, '(')))
835 return NULL;
836 for (level = 1, end = start + 1; level > 0; ++end)
838 if ('\0' == *end)
839 break;
840 else if ('(' == *end)
841 ++ level;
842 else if (')' == *end)
843 -- level;
845 *end = '\0';
846 return strdup(start);
849 /* vi:set tabstop=4 shiftwidth=4: */