Version bump.
[geany-mirror.git] / tagmanager / get.c
blobe2d79aa31ee6b64dbdc19de4e69e8eb720f508dd
1 /*
2 * $Id$
4 * Copyright (c) 1996-2002, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains the high level source read functions (preprocessor
10 * directives are handled within this level).
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
19 #include <glib.h>
21 #include "entry.h"
22 #include "get.h"
23 #include "options.h"
24 #include "read.h"
25 #include "vstring.h"
28 * MACROS
30 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
31 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
34 * DATA DECLARATIONS
36 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;
38 enum eCppLimits {
39 MaxCppNestingLevel = 20,
40 MaxDirectiveName = 10
43 /* Defines the one nesting level of a preprocessor conditional.
45 typedef struct sConditionalInfo {
46 boolean ignoreAllBranches; /* ignoring parent conditional branch */
47 boolean singleBranch; /* choose only one branch */
48 boolean branchChosen; /* branch already selected */
49 boolean ignoring; /* current ignore state */
50 } conditionalInfo;
52 enum eState {
53 DRCTV_NONE, /* no known directive - ignore to end of line */
54 DRCTV_DEFINE, /* "#define" encountered */
55 DRCTV_HASH, /* initial '#' read; determine directive */
56 DRCTV_IF, /* "#if" or "#ifdef" encountered */
57 DRCTV_PRAGMA, /* #pragma encountered */
58 DRCTV_UNDEF /* "#undef" encountered */
61 /* Defines the current state of the pre-processor.
63 typedef struct sCppState {
64 int ungetch, ungetch2; /* ungotten characters, if any */
65 boolean resolveRequired; /* must resolve if/else/elif/endif branch */
66 boolean hasAtLiteralStrings; /* supports @"c:\" strings */
67 struct sDirective {
68 enum eState state; /* current directive being processed */
69 boolean accept; /* is a directive syntactically permitted? */
70 vString * name; /* macro name */
71 unsigned int nestLevel; /* level 0 is not used */
72 conditionalInfo ifdef [MaxCppNestingLevel];
73 } directive;
74 } cppState;
77 * DATA DEFINITIONS
80 /* Use brace formatting to detect end of block.
82 static boolean BraceFormat = FALSE;
84 static cppState Cpp = {
85 '\0', '\0', /* ungetch characters */
86 FALSE, /* resolveRequired */
87 FALSE, /* hasAtLiteralStrings */
89 DRCTV_NONE, /* state */
90 FALSE, /* accept */
91 NULL, /* tag name */
92 0, /* nestLevel */
93 { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */
94 } /* directive */
98 * FUNCTION DEFINITIONS
101 extern boolean isBraceFormat (void)
103 return BraceFormat;
106 extern unsigned int getDirectiveNestLevel (void)
108 return Cpp.directive.nestLevel;
111 extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
113 BraceFormat = state;
115 Cpp.ungetch = '\0';
116 Cpp.ungetch2 = '\0';
117 Cpp.resolveRequired = FALSE;
118 Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
120 Cpp.directive.state = DRCTV_NONE;
121 Cpp.directive.accept = TRUE;
122 Cpp.directive.nestLevel = 0;
124 Cpp.directive.ifdef [0].ignoreAllBranches = FALSE;
125 Cpp.directive.ifdef [0].singleBranch = FALSE;
126 Cpp.directive.ifdef [0].branchChosen = FALSE;
127 Cpp.directive.ifdef [0].ignoring = FALSE;
129 if (Cpp.directive.name == NULL)
130 Cpp.directive.name = vStringNew ();
131 else
132 vStringClear (Cpp.directive.name);
135 extern void cppTerminate (void)
137 if (Cpp.directive.name != NULL)
139 vStringDelete (Cpp.directive.name);
140 Cpp.directive.name = NULL;
144 extern void cppBeginStatement (void)
146 Cpp.resolveRequired = TRUE;
149 extern void cppEndStatement (void)
151 Cpp.resolveRequired = FALSE;
155 * Scanning functions
157 * This section handles preprocessor directives. It strips out all
158 * directives and may emit a tag for #define directives.
161 /* This puts a character back into the input queue for the source File.
162 * Up to two characters may be ungotten.
164 extern void cppUngetc (const int c)
166 Assert (Cpp.ungetch2 == '\0');
167 Cpp.ungetch2 = Cpp.ungetch;
168 Cpp.ungetch = c;
171 /* Reads a directive, whose first character is given by "c", into "name".
173 static boolean readDirective (int c, char *const name, unsigned int maxLength)
175 unsigned int i;
177 for (i = 0 ; i < maxLength - 1 ; ++i)
179 if (i > 0)
181 c = fileGetc ();
182 if (c == EOF || ! isalpha (c))
184 fileUngetc (c);
185 break;
188 name [i] = c;
190 name [i] = '\0'; /* null terminate */
192 return (boolean) isspacetab (c);
195 /* Reads an identifier, whose first character is given by "c", into "tag",
196 * together with the file location and corresponding line number.
198 static void readIdentifier (int c, vString *const name)
200 vStringClear (name);
203 vStringPut (name, c);
204 } while (c = fileGetc (), (c != EOF && isident (c)));
205 fileUngetc (c);
206 vStringTerminate (name);
209 static conditionalInfo *currentConditional (void)
211 return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
214 static boolean isIgnore (void)
216 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
219 static boolean setIgnore (const boolean ignore)
221 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
224 static boolean isIgnoreBranch (void)
226 conditionalInfo *const ifdef = currentConditional ();
228 /* Force a single branch if an incomplete statement is discovered
229 * en route. This may have allowed earlier branches containing complete
230 * statements to be followed, but we must follow no further branches.
232 if (Cpp.resolveRequired && ! BraceFormat)
233 ifdef->singleBranch = TRUE;
235 /* We will ignore this branch in the following cases:
237 * 1. We are ignoring all branches (conditional was within an ignored
238 * branch of the parent conditional)
239 * 2. A branch has already been chosen and either of:
240 * a. A statement was incomplete upon entering the conditional
241 * b. A statement is incomplete upon encountering a branch
243 return (boolean) (ifdef->ignoreAllBranches ||
244 (ifdef->branchChosen && ifdef->singleBranch));
247 static void chooseBranch (void)
249 if (! BraceFormat)
251 conditionalInfo *const ifdef = currentConditional ();
253 ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
254 Cpp.resolveRequired);
258 /* Pushes one nesting level for an #if directive, indicating whether or not
259 * the branch should be ignored and whether a branch has already been chosen.
261 static boolean pushConditional (const boolean firstBranchChosen)
263 const boolean ignoreAllBranches = isIgnore (); /* current ignore */
264 boolean ignoreBranch = FALSE;
266 if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
268 conditionalInfo *ifdef;
270 ++Cpp.directive.nestLevel;
271 ifdef = currentConditional ();
273 /* We take a snapshot of whether there is an incomplete statement in
274 * progress upon encountering the preprocessor conditional. If so,
275 * then we will flag that only a single branch of the conditional
276 * should be followed.
278 ifdef->ignoreAllBranches = ignoreAllBranches;
279 ifdef->singleBranch = Cpp.resolveRequired;
280 ifdef->branchChosen = firstBranchChosen;
281 ifdef->ignoring = (boolean) (ignoreAllBranches || (
282 ! firstBranchChosen && ! BraceFormat &&
283 (ifdef->singleBranch || !Option.if0)));
284 ignoreBranch = ifdef->ignoring;
286 return ignoreBranch;
289 /* Pops one nesting level for an #endif directive.
291 static boolean popConditional (void)
293 if (Cpp.directive.nestLevel > 0)
294 --Cpp.directive.nestLevel;
296 return isIgnore ();
299 static void makeDefineTag (const char *const name, boolean parameterized)
301 const boolean isFileScope = (boolean) (! isHeaderFile ());
303 if (includingDefineTags () &&
304 (! isFileScope || Option.include.fileScope))
306 tagEntryInfo e;
308 initTagEntry (&e, name);
310 e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
311 e.isFileScope = isFileScope;
312 e.truncateLine = TRUE;
313 e.kindName = "macro";
314 e.kind = 'd';
315 if (parameterized)
317 if (useFile()) {
318 e.extensionFields.arglist = getArglistFromFilePos(getInputFilePosition()
319 , e.name);
321 else {
322 e.extensionFields.arglist = getArglistFromBufferPos(getInputBufferPosition()
323 , e.name);
326 makeTagEntry (&e);
327 if (parameterized)
328 free((char *) e.extensionFields.arglist);
332 static void directiveDefine (const int c)
334 boolean parameterized;
335 int nc;
337 if (isident1 (c))
339 readIdentifier (c, Cpp.directive.name);
340 nc = fileGetc ();
341 fileUngetc (nc);
342 parameterized = (boolean) (nc == '(');
343 if (! isIgnore ())
344 makeDefineTag (vStringValue (Cpp.directive.name), parameterized);
346 Cpp.directive.state = DRCTV_NONE;
349 static void directivePragma (int c)
351 if (isident1 (c))
353 readIdentifier (c, Cpp.directive.name);
354 if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
356 /* generate macro tag for weak name */
359 c = fileGetc ();
360 } while (c == SPACE);
361 if (isident1 (c))
363 readIdentifier (c, Cpp.directive.name);
364 makeDefineTag (vStringValue (Cpp.directive.name), FALSE);
368 Cpp.directive.state = DRCTV_NONE;
371 static boolean directiveIf (const int c)
373 const boolean ignore = pushConditional ((boolean) (c != '0'));
375 Cpp.directive.state = DRCTV_NONE;
377 return ignore;
380 static boolean directiveHash (const int c)
382 boolean ignore = FALSE;
383 char directive [MaxDirectiveName];
384 DebugStatement ( const boolean ignore0 = isIgnore (); )
386 readDirective (c, directive, MaxDirectiveName);
387 if (stringMatch (directive, "define"))
388 Cpp.directive.state = DRCTV_DEFINE;
389 else if (stringMatch (directive, "undef"))
390 Cpp.directive.state = DRCTV_UNDEF;
391 else if (strncmp (directive, "if", (size_t) 2) == 0)
392 Cpp.directive.state = DRCTV_IF;
393 else if (stringMatch (directive, "elif") ||
394 stringMatch (directive, "else"))
396 ignore = setIgnore (isIgnoreBranch ());
397 if (! ignore && stringMatch (directive, "else"))
398 chooseBranch ();
399 Cpp.directive.state = DRCTV_NONE;
400 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
402 else if (stringMatch (directive, "endif"))
404 DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
405 ignore = popConditional ();
406 Cpp.directive.state = DRCTV_NONE;
407 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
409 else if (stringMatch (directive, "pragma"))
410 Cpp.directive.state = DRCTV_PRAGMA;
411 else
412 Cpp.directive.state = DRCTV_NONE;
414 return ignore;
417 /* Handles a pre-processor directive whose first character is given by "c".
419 static boolean handleDirective (const int c)
421 boolean ignore = isIgnore ();
423 switch (Cpp.directive.state)
425 case DRCTV_NONE: ignore = isIgnore (); break;
426 case DRCTV_DEFINE: directiveDefine (c); break;
427 case DRCTV_HASH: ignore = directiveHash (c); break;
428 case DRCTV_IF: ignore = directiveIf (c); break;
429 case DRCTV_PRAGMA: directivePragma (c); break;
430 case DRCTV_UNDEF: directiveDefine (c); break;
432 return ignore;
435 /* Called upon reading of a slash ('/') characters, determines whether a
436 * comment is encountered, and its type.
438 static Comment isComment (void)
440 Comment comment;
441 const int next = fileGetc ();
443 if (next == '*')
444 comment = COMMENT_C;
445 else if (next == '/')
446 comment = COMMENT_CPLUS;
447 else
449 fileUngetc (next);
450 comment = COMMENT_NONE;
452 return comment;
455 /* Skips over a C style comment. According to ANSI specification a comment
456 * is treated as white space, so we perform this substitution.
458 int skipOverCComment (void)
460 int c = fileGetc ();
462 while (c != EOF)
464 if (c != '*')
465 c = fileGetc ();
466 else
468 const int next = fileGetc ();
470 if (next != '/')
471 c = next;
472 else
474 c = SPACE; /* replace comment with space */
475 break;
479 return c;
482 /* Skips over a C++ style comment.
484 static int skipOverCplusComment (void)
486 int c;
488 while ((c = fileGetc ()) != EOF)
490 if (c == BACKSLASH)
491 fileGetc (); /* throw away next character, too */
492 else if (c == NEWLINE)
493 break;
495 return c;
498 /* Skips to the end of a string, returning a special character to
499 * symbolically represent a generic string.
501 static int skipToEndOfString (boolean ignoreBackslash)
503 int c;
505 while ((c = fileGetc ()) != EOF)
507 if (c == BACKSLASH && ! ignoreBackslash)
508 fileGetc (); /* throw away next character, too */
509 else if (c == DOUBLE_QUOTE)
510 break;
512 return STRING_SYMBOL; /* symbolic representation of string */
515 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
516 * special character to symbolically represent a generic character.
517 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
519 static int skipToEndOfChar (void)
521 int c;
522 int count = 0, veraBase = '\0';
524 while ((c = fileGetc ()) != EOF)
526 ++count;
527 if (c == BACKSLASH)
528 fileGetc (); /* throw away next character, too */
529 else if (c == SINGLE_QUOTE)
530 break;
531 else if (c == NEWLINE)
533 fileUngetc (c);
534 break;
536 else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL)
537 veraBase = c;
538 else if (veraBase != '\0' && ! isalnum (c))
540 fileUngetc (c);
541 break;
544 return CHAR_SYMBOL; /* symbolic representation of character */
547 /* This function returns the next character, stripping out comments,
548 * C pre-processor directives, and the contents of single and double
549 * quoted strings. In short, strip anything which places a burden upon
550 * the tokenizer.
552 extern int cppGetc (void)
554 boolean directive = FALSE;
555 boolean ignore = FALSE;
556 int c;
558 if (Cpp.ungetch != '\0')
560 c = Cpp.ungetch;
561 Cpp.ungetch = Cpp.ungetch2;
562 Cpp.ungetch2 = '\0';
563 return c; /* return here to avoid re-calling debugPutc () */
565 else do
567 c = fileGetc ();
568 process:
569 switch (c)
571 case EOF:
572 ignore = FALSE;
573 directive = FALSE;
574 break;
576 case TAB:
577 case SPACE:
578 break; /* ignore most white space */
580 case NEWLINE:
581 if (directive && ! ignore)
582 directive = FALSE;
583 Cpp.directive.accept = TRUE;
584 break;
586 case DOUBLE_QUOTE:
587 Cpp.directive.accept = FALSE;
588 c = skipToEndOfString (FALSE);
589 break;
591 case '#':
592 if (Cpp.directive.accept)
594 directive = TRUE;
595 Cpp.directive.state = DRCTV_HASH;
596 Cpp.directive.accept = FALSE;
598 break;
600 case SINGLE_QUOTE:
601 Cpp.directive.accept = FALSE;
602 c = skipToEndOfChar ();
603 break;
605 case '/':
607 const Comment comment = isComment ();
609 if (comment == COMMENT_C)
610 c = skipOverCComment ();
611 else if (comment == COMMENT_CPLUS)
613 c = skipOverCplusComment ();
614 if (c == NEWLINE)
615 fileUngetc (c);
617 else
618 Cpp.directive.accept = FALSE;
619 break;
622 case BACKSLASH:
624 int next = fileGetc ();
626 if (next == NEWLINE)
627 continue;
628 else if (next == '?')
629 cppUngetc (next);
630 else
631 fileUngetc (next);
632 break;
635 case '?':
637 int next = fileGetc ();
638 if (next != '?')
639 fileUngetc (next);
640 else
642 next = fileGetc ();
643 switch (next)
645 case '(': c = '['; break;
646 case ')': c = ']'; break;
647 case '<': c = '{'; break;
648 case '>': c = '}'; break;
649 case '/': c = BACKSLASH; goto process;
650 case '!': c = '|'; break;
651 case SINGLE_QUOTE: c = '^'; break;
652 case '-': c = '~'; break;
653 case '=': c = '#'; goto process;
654 default:
655 fileUngetc (next);
656 cppUngetc ('?');
657 break;
660 } break;
662 default:
663 if (c == '@' && Cpp.hasAtLiteralStrings)
665 int next = fileGetc ();
666 if (next == DOUBLE_QUOTE)
668 Cpp.directive.accept = FALSE;
669 c = skipToEndOfString (TRUE);
670 break;
673 Cpp.directive.accept = FALSE;
674 if (directive)
675 ignore = handleDirective (c);
676 break;
678 } while (directive || ignore);
680 DebugStatement ( debugPutc (DEBUG_CPP, c); )
681 DebugStatement ( if (c == NEWLINE)
682 debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
684 return c;
687 extern char *getArglistFromBufferPos(int startPosition, const char *tokenName)
689 int bufferOriginalPosition;
690 char *result = NULL;
691 char *arglist = NULL;
692 long pos1, pos2;
694 /* FIXME startPosition as well as getBufPos() are mostly wrong here */
695 pos2 = getBufPos();
697 if (!useFile()) {
698 bufferOriginalPosition = getBufPos ();
699 setBufPos(startPosition);
700 pos1 = File.fpBufferPosition;
702 else
703 return NULL;
705 if (pos2 > pos1)
707 result = (char *) g_malloc(sizeof(char ) * (pos2 - pos1 + 2));
708 if (result != NULL)
710 memcpy(result, &File.fpBuffer[getBufPos()], pos2 - pos1 + 1);
711 result[pos2-pos1+1] = '\0';
712 arglist = getArglistFromStr(result, tokenName);
713 g_free(result);
716 setBufPos (bufferOriginalPosition);
717 return arglist;
720 extern char *getArglistFromFilePos(fpos_t startPosition, const char *tokenName)
722 fpos_t originalPosition;
723 char *result = NULL;
724 char *arglist = NULL;
725 long pos1, pos2;
727 pos2 = ftell(File.fp);
729 fgetpos(File.fp, &originalPosition);
730 fsetpos(File.fp, &startPosition);
731 pos1 = ftell(File.fp);
733 if (pos2 > pos1)
735 result = (char *) g_malloc(sizeof(char ) * (pos2 - pos1 + 2));
736 if (result != NULL && fread(result, sizeof(char), pos2 - pos1 + 1, File.fp) > 0)
738 result[pos2-pos1+1] = '\0';
739 arglist = getArglistFromStr(result, tokenName);
741 g_free(result);
743 fsetpos(File.fp, &originalPosition);
744 return arglist;
747 typedef enum
749 st_none_t,
750 st_escape_t,
751 st_c_comment_t,
752 st_cpp_comment_t,
753 st_double_quote_t,
754 st_single_quote_t
755 } ParseState;
757 static void stripCodeBuffer(char *buf)
759 int i = 0, pos = 0;
760 ParseState state = st_none_t, prev_state = st_none_t;
762 while (buf[i] != '\0')
764 switch(buf[i])
766 case '/':
767 if (st_none_t == state)
769 /* Check if this is the start of a comment */
770 if (buf[i+1] == '*') /* C comment */
771 state = st_c_comment_t;
772 else if (buf[i+1] == '/') /* C++ comment */
773 state = st_cpp_comment_t;
774 else /* Normal character */
775 buf[pos++] = '/';
777 else if (st_c_comment_t == state)
779 /* Check if this is the end of a C comment */
780 if (buf[i-1] == '*')
782 if ((pos > 0) && (buf[pos-1] != ' '))
783 buf[pos++] = ' ';
784 state = st_none_t;
787 break;
788 case '"':
789 if (st_none_t == state)
790 state = st_double_quote_t;
791 else if (st_double_quote_t == state)
792 state = st_none_t;
793 break;
794 case '\'':
795 if (st_none_t == state)
796 state = st_single_quote_t;
797 else if (st_single_quote_t == state)
798 state = st_none_t;
799 break;
800 default:
801 if ((buf[i] == '\\') && (st_escape_t != state))
803 prev_state = state;
804 state = st_escape_t;
806 else if (st_escape_t == state)
808 state = prev_state;
809 prev_state = st_none_t;
811 else if ((buf[i] == '\n') && (st_cpp_comment_t == state))
813 if ((pos > 0) && (buf[pos-1] != ' '))
814 buf[pos++] = ' ';
815 state = st_none_t;
817 else if (st_none_t == state)
819 if (isspace(buf[i]))
821 if ((pos > 0) && (buf[pos-1] != ' '))
822 buf[pos++] = ' ';
824 else
825 buf[pos++] = buf[i];
827 break;
829 ++i;
831 buf[pos] = '\0';
832 return;
835 extern char *getArglistFromStr(char *buf, const char *name)
837 char *start, *end;
838 int level;
839 if ((NULL == buf) || (NULL == name) || ('\0' == name[0]))
840 return NULL;
841 stripCodeBuffer(buf);
842 if (NULL == (start = strstr(buf, name)))
843 return NULL;
844 if (NULL == (start = strchr(start, '(')))
845 return NULL;
846 for (level = 1, end = start + 1; level > 0; ++end)
848 if ('\0' == *end)
849 break;
850 else if ('(' == *end)
851 ++ level;
852 else if (')' == *end)
853 -- level;
855 *end = '\0';
856 return strdup(start);
859 /* vi:set tabstop=4 shiftwidth=4: */