Updated Spanish translation
[anjuta-git-plugin.git] / tagmanager / get.c
blob98bab019b4b2ca6a4c2a667ec69720b4062b0930
1 /*
2 * $Id$
4 * Copyright (c) 1996-2002, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains the high level source read functions (preprocessor
10 * directives are handled within this level).
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <string.h>
20 #include "debug.h"
21 #include "entry.h"
22 #include "get.h"
23 #include "options.h"
24 #include "read.h"
25 #include "vstring.h"
28 * MACROS
30 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
31 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
34 * DATA DECLARATIONS
36 typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;
38 enum eCppLimits {
39 MaxCppNestingLevel = 20,
40 MaxDirectiveName = 10
43 /* Defines the one nesting level of a preprocessor conditional.
45 typedef struct sConditionalInfo {
46 boolean ignoreAllBranches; /* ignoring parent conditional branch */
47 boolean singleBranch; /* choose only one branch */
48 boolean branchChosen; /* branch already selected */
49 boolean ignoring; /* current ignore state */
50 } conditionalInfo;
52 enum eState {
53 DRCTV_NONE, /* no known directive - ignore to end of line */
54 DRCTV_DEFINE, /* "#define" encountered */
55 DRCTV_HASH, /* initial '#' read; determine directive */
56 DRCTV_IF, /* "#if" or "#ifdef" encountered */
57 DRCTV_PRAGMA, /* #pragma encountered */
58 DRCTV_UNDEF /* "#undef" encountered */
61 /* Defines the current state of the pre-processor.
63 typedef struct sCppState {
64 int ungetch, ungetch2; /* ungotten characters, if any */
65 boolean resolveRequired; /* must resolve if/else/elif/endif branch */
66 struct sDirective {
67 enum eState state; /* current directive being processed */
68 boolean accept; /* is a directive syntatically permitted? */
69 vString * name; /* macro name */
70 unsigned int nestLevel; /* level 0 is not used */
71 conditionalInfo ifdef [MaxCppNestingLevel];
72 } directive;
73 } cppState;
76 * DATA DEFINITIONS
79 /* Use brace formatting to detect end of block.
81 static boolean BraceFormat = FALSE;
83 static cppState Cpp = {
84 '\0', '\0', /* ungetch characters */
85 FALSE, /* resolveRequired */
87 DRCTV_NONE, /* state */
88 FALSE, /* accept */
89 NULL, /* tag name */
90 0, /* nestLevel */
91 { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */
92 } /* directive */
96 * FUNCTION DEFINITIONS
99 extern boolean isBraceFormat (void)
101 return BraceFormat;
104 extern unsigned int getDirectiveNestLevel (void)
106 return Cpp.directive.nestLevel;
109 extern void cppInit (const boolean state)
111 BraceFormat = state;
113 Cpp.ungetch = '\0';
114 Cpp.ungetch2 = '\0';
115 Cpp.resolveRequired = FALSE;
117 Cpp.directive.state = DRCTV_NONE;
118 Cpp.directive.accept = TRUE;
119 Cpp.directive.nestLevel = 0;
121 Cpp.directive.ifdef [0].ignoreAllBranches = FALSE;
122 Cpp.directive.ifdef [0].singleBranch = FALSE;
123 Cpp.directive.ifdef [0].branchChosen = FALSE;
124 Cpp.directive.ifdef [0].ignoring = FALSE;
126 if (Cpp.directive.name == NULL)
127 Cpp.directive.name = vStringNew ();
128 else
129 vStringClear (Cpp.directive.name);
132 extern void cppTerminate (void)
134 if (Cpp.directive.name != NULL)
136 vStringDelete (Cpp.directive.name);
137 Cpp.directive.name = NULL;
141 extern void cppBeginStatement (void)
143 Cpp.resolveRequired = TRUE;
146 extern void cppEndStatement (void)
148 Cpp.resolveRequired = FALSE;
152 * Scanning functions
154 * This section handles preprocessor directives. It strips out all
155 * directives and may emit a tag for #define directives.
158 /* This puts a character back into the input queue for the source File.
159 * Up to two characters may be ungotten.
161 extern void cppUngetc (const int c)
163 Assert (Cpp.ungetch2 == '\0');
164 Cpp.ungetch2 = Cpp.ungetch;
165 Cpp.ungetch = c;
168 /* Reads a directive, whose first character is given by "c", into "name".
170 static boolean readDirective (int c, char *const name, unsigned int maxLength)
172 unsigned int i;
174 for (i = 0 ; i < maxLength - 1 ; ++i)
176 if (i > 0)
178 c = fileGetc ();
179 if (c == EOF || ! isalpha (c))
181 fileUngetc (c);
182 break;
185 name [i] = c;
187 name [i] = '\0'; /* null terminate */
189 return (boolean) isspacetab (c);
192 /* Reads an identifier, whose first character is given by "c", into "tag",
193 * together with the file location and corresponding line number.
195 static void readIdentifier (int c, vString *const name)
197 vStringClear (name);
200 vStringPut (name, c);
201 } while (c = fileGetc (), (c != EOF && isident (c)));
202 fileUngetc (c);
203 vStringTerminate (name);
206 static conditionalInfo *currentConditional (void)
208 return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
211 static boolean isIgnore (void)
213 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
216 static boolean setIgnore (const boolean ignore)
218 return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
221 static boolean isIgnoreBranch (void)
223 conditionalInfo *const ifdef = currentConditional ();
225 /* Force a single branch if an incomplete statement is discovered
226 * en route. This may have allowed earlier branches containing complete
227 * statements to be followed, but we must follow no further branches.
229 if (Cpp.resolveRequired && ! BraceFormat)
230 ifdef->singleBranch = TRUE;
232 /* We will ignore this branch in the following cases:
234 * 1. We are ignoring all branches (conditional was within an ignored
235 * branch of the parent conditional)
236 * 2. A branch has already been chosen and either of:
237 * a. A statement was incomplete upon entering the conditional
238 * b. A statement is incomplete upon encountering a branch
240 return (boolean) (ifdef->ignoreAllBranches ||
241 (ifdef->branchChosen && ifdef->singleBranch));
244 static void chooseBranch (void)
246 if (! BraceFormat)
248 conditionalInfo *const ifdef = currentConditional ();
250 ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
251 Cpp.resolveRequired);
255 /* Pushes one nesting level for an #if directive, indicating whether or not
256 * the branch should be ignored and whether a branch has already been chosen.
258 static boolean pushConditional (const boolean firstBranchChosen)
260 const boolean ignoreAllBranches = isIgnore (); /* current ignore */
261 boolean ignoreBranch = FALSE;
263 if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
265 conditionalInfo *ifdef;
267 ++Cpp.directive.nestLevel;
268 ifdef = currentConditional ();
270 /* We take a snapshot of whether there is an incomplete statement in
271 * progress upon encountering the preprocessor conditional. If so,
272 * then we will flag that only a single branch of the conditional
273 * should be followed.
275 ifdef->ignoreAllBranches = ignoreAllBranches;
276 ifdef->singleBranch = Cpp.resolveRequired;
277 ifdef->branchChosen = firstBranchChosen;
278 ifdef->ignoring = (boolean) (ignoreAllBranches || (
279 ! firstBranchChosen && ! BraceFormat &&
280 (ifdef->singleBranch || !Option.if0)));
281 ignoreBranch = ifdef->ignoring;
283 return ignoreBranch;
286 /* Pops one nesting level for an #endif directive.
288 static boolean popConditional (void)
290 if (Cpp.directive.nestLevel > 0)
291 --Cpp.directive.nestLevel;
293 return isIgnore ();
296 static void makeDefineTag (const char *const name)
298 const boolean isFileScope = (boolean) (! isHeaderFile ());
300 if (includingDefineTags () &&
301 (! isFileScope || Option.include.fileScope))
303 tagEntryInfo e;
304 initTagEntry (&e, name);
305 e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
306 e.isFileScope = isFileScope;
307 e.truncateLine = TRUE;
308 e.kindName = "macro";
309 e.kind = 'd';
310 makeTagEntry (&e);
314 static void directiveDefine (const int c)
316 if (isident1 (c))
318 readIdentifier (c, Cpp.directive.name);
319 if (! isIgnore ())
320 makeDefineTag (vStringValue (Cpp.directive.name));
322 Cpp.directive.state = DRCTV_NONE;
325 static void directivePragma (int c)
327 if (isident1 (c))
329 readIdentifier (c, Cpp.directive.name);
330 if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
332 /* generate macro tag for weak name */
335 c = fileGetc ();
336 } while (c == SPACE);
337 if (isident1 (c))
339 readIdentifier (c, Cpp.directive.name);
340 makeDefineTag (vStringValue (Cpp.directive.name));
344 Cpp.directive.state = DRCTV_NONE;
347 static boolean directiveIf (const int c)
349 DebugStatement ( const boolean ignore0 = isIgnore (); )
350 const boolean ignore = pushConditional ((boolean) (c != '0'));
352 Cpp.directive.state = DRCTV_NONE;
353 DebugStatement ( debugCppNest (TRUE, Cpp.directive.nestLevel);
354 if (ignore != ignore0) debugCppIgnore (ignore); )
356 return ignore;
359 static boolean directiveHash (const int c)
361 boolean ignore = FALSE;
362 char directive [MaxDirectiveName];
363 DebugStatement ( const boolean ignore0 = isIgnore (); )
365 readDirective (c, directive, MaxDirectiveName);
366 if (stringMatch (directive, "define"))
367 Cpp.directive.state = DRCTV_DEFINE;
368 else if (stringMatch (directive, "undef"))
369 Cpp.directive.state = DRCTV_UNDEF;
370 else if (strncmp (directive, "if", (size_t) 2) == 0)
371 Cpp.directive.state = DRCTV_IF;
372 else if (stringMatch (directive, "elif") ||
373 stringMatch (directive, "else"))
375 ignore = setIgnore (isIgnoreBranch ());
376 if (! ignore && stringMatch (directive, "else"))
377 chooseBranch ();
378 Cpp.directive.state = DRCTV_NONE;
379 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
381 else if (stringMatch (directive, "endif"))
383 DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
384 ignore = popConditional ();
385 Cpp.directive.state = DRCTV_NONE;
386 DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
388 else if (stringMatch (directive, "pragma"))
389 Cpp.directive.state = DRCTV_PRAGMA;
390 else
391 Cpp.directive.state = DRCTV_NONE;
393 return ignore;
396 /* Handles a pre-processor directive whose first character is given by "c".
398 static boolean handleDirective (const int c)
400 boolean ignore = isIgnore ();
402 switch (Cpp.directive.state)
404 case DRCTV_NONE: ignore = isIgnore (); break;
405 case DRCTV_DEFINE: directiveDefine (c); break;
406 case DRCTV_HASH: ignore = directiveHash (c); break;
407 case DRCTV_IF: ignore = directiveIf (c); break;
408 case DRCTV_PRAGMA: directivePragma (c); break;
409 case DRCTV_UNDEF: directiveDefine (c); break;
411 return ignore;
414 /* Called upon reading of a slash ('/') characters, determines whether a
415 * comment is encountered, and its type.
417 static Comment isComment (void)
419 Comment comment;
420 const int next = fileGetc ();
422 if (next == '*')
423 comment = COMMENT_C;
424 else if (next == '/')
425 comment = COMMENT_CPLUS;
426 else
428 fileUngetc (next);
429 comment = COMMENT_NONE;
431 return comment;
434 /* Skips over a C style comment. According to ANSI specification a comment
435 * is treated as white space, so we perform this subsitution.
437 static int skipOverCComment (void)
439 int c = fileGetc ();
441 while (c != EOF)
443 if (c != '*')
444 c = fileGetc ();
445 else
447 const int next = fileGetc ();
449 if (next != '/')
450 c = next;
451 else
453 c = SPACE; /* replace comment with space */
454 break;
458 return c;
461 /* Skips over a C++ style comment.
463 static int skipOverCplusComment (void)
465 int c;
467 while ((c = fileGetc ()) != EOF)
469 if (c == BACKSLASH)
470 fileGetc (); /* throw away next character, too */
471 else if (c == NEWLINE)
472 break;
474 return c;
477 /* Skips to the end of a string, returning a special character to
478 * symbolically represent a generic string.
480 static int skipToEndOfString (void)
482 int c;
484 while ((c = fileGetc ()) != EOF)
486 if (c == BACKSLASH)
487 fileGetc (); /* throw away next character, too */
488 else if (c == DOUBLE_QUOTE)
489 break;
491 return STRING_SYMBOL; /* symbolic representation of string */
494 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
495 * special character to symbolically represent a generic character.
496 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
498 static int skipToEndOfChar (void)
500 int c;
501 int count = 0, veraBase = '\0';
503 while ((c = fileGetc ()) != EOF)
505 ++count;
506 if (c == BACKSLASH)
507 fileGetc (); /* throw away next character, too */
508 else if (c == SINGLE_QUOTE)
509 break;
510 else if (c == NEWLINE)
512 fileUngetc (c);
513 break;
515 else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL)
516 veraBase = c;
517 else if (veraBase != '\0' && ! isalnum (c))
519 fileUngetc (c);
520 break;
523 return CHAR_SYMBOL; /* symbolic representation of character */
526 /* This function returns the next character, stripping out comments,
527 * C pre-processor directives, and the contents of single and double
528 * quoted strings. In short, strip anything which places a burden upon
529 * the tokenizer.
531 extern int cppGetc (void)
533 boolean directive = FALSE;
534 boolean ignore = FALSE;
535 int c;
537 if (Cpp.ungetch != '\0')
539 c = Cpp.ungetch;
540 Cpp.ungetch = Cpp.ungetch2;
541 Cpp.ungetch2 = '\0';
542 return c; /* return here to avoid re-calling debugPutc () */
544 else do
546 c = fileGetc ();
547 process:
548 switch (c)
550 case EOF:
551 ignore = FALSE;
552 directive = FALSE;
553 break;
555 case TAB:
556 case SPACE:
557 break; /* ignore most white space */
559 case NEWLINE:
560 if (directive && ! ignore)
561 directive = FALSE;
562 Cpp.directive.accept = TRUE;
563 break;
565 case DOUBLE_QUOTE:
566 Cpp.directive.accept = FALSE;
567 c = skipToEndOfString ();
568 break;
570 case '#':
571 if (Cpp.directive.accept)
573 directive = TRUE;
574 Cpp.directive.state = DRCTV_HASH;
575 Cpp.directive.accept = FALSE;
577 break;
579 case SINGLE_QUOTE:
580 Cpp.directive.accept = FALSE;
581 c = skipToEndOfChar ();
582 break;
584 case '/':
586 const Comment comment = isComment ();
588 if (comment == COMMENT_C)
589 c = skipOverCComment ();
590 else if (comment == COMMENT_CPLUS)
592 c = skipOverCplusComment ();
593 if (c == NEWLINE)
594 fileUngetc (c);
596 else
597 Cpp.directive.accept = FALSE;
598 break;
601 case BACKSLASH:
603 int next = fileGetc ();
605 if (next == NEWLINE)
606 continue;
607 else if (next == '?')
608 cppUngetc (next);
609 else
610 fileUngetc (next);
611 break;
614 case '?':
616 int next = fileGetc ();
617 if (next != '?')
618 fileUngetc (next);
619 else
621 next = fileGetc ();
622 switch (next)
624 case '(': c = '['; break;
625 case ')': c = ']'; break;
626 case '<': c = '{'; break;
627 case '>': c = '}'; break;
628 case '/': c = BACKSLASH; goto process;
629 case '!': c = '|'; break;
630 case SINGLE_QUOTE: c = '^'; break;
631 case '-': c = '~'; break;
632 case '=': c = '#'; goto process;
633 default:
634 fileUngetc (next);
635 cppUngetc ('?');
636 break;
639 } break;
641 default:
642 Cpp.directive.accept = FALSE;
643 if (directive)
644 ignore = handleDirective (c);
645 break;
647 } while (directive || ignore);
649 DebugStatement ( debugPutc (DEBUG_CPP, c); )
650 DebugStatement ( if (c == NEWLINE)
651 debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
653 return c;
656 extern char *getArglistFromBufferPos(int startPosition, const char *tokenName)
658 int bufferOriginalPosition;
659 char *result = NULL;
660 char *arglist = NULL;
661 long pos1, pos2;
663 pos2 = getBufPos();
665 if (!useFile()) {
666 bufferOriginalPosition = getBufPos ();
667 setBufPos(startPosition);
668 pos1 = File.fpBufferPosition;
670 else
671 return NULL;
673 if (pos2 > pos1)
675 result = (char *) malloc(sizeof(char ) * (pos2 - pos1 + 2));
676 if (result != NULL)
678 memcpy(result, &File.fpBuffer[getBufPos()], pos2 - pos1 + 1);
679 result[pos2-pos1+1] = '\0';
680 arglist = getArglistFromStr(result, tokenName);
681 free(result);
684 setBufPos (bufferOriginalPosition);
685 return arglist;
688 extern char *getArglistFromFilePos(fpos_t startPosition, const char *tokenName)
690 fpos_t originalPosition;
691 char *result = NULL;
692 char *arglist = NULL;
693 long pos1, pos2;
695 pos2 = ftell(File.fp);
697 fgetpos(File.fp, &originalPosition);
698 fsetpos(File.fp, &startPosition);
699 pos1 = ftell(File.fp);
701 if (pos2 > pos1)
703 result = (char *) malloc(sizeof(char ) * (pos2 - pos1 + 2));
704 if (result != NULL)
706 fread(result, sizeof(char), pos2 - pos1 + 1, File.fp);
707 result[pos2-pos1+1] = '\0';
708 arglist = getArglistFromStr(result, tokenName);
709 free(result);
712 fsetpos(File.fp, &originalPosition);
713 return arglist;
716 typedef enum
718 st_none_t,
719 st_escape_t,
720 st_c_comment_t,
721 st_cpp_comment_t,
722 st_double_quote_t,
723 st_single_quote_t
724 } ParseState;
726 static void stripCodeBuffer(char *buf)
728 int i = 0, pos = 0;
729 ParseState state = st_none_t, prev_state = st_none_t;
731 while (buf[i] != '\0')
733 switch(buf[i])
735 case '/':
736 if (st_none_t == state)
738 /* Check if this is the start of a comment */
739 if (buf[i+1] == '*') /* C comment */
740 state = st_c_comment_t;
741 else if (buf[i+1] == '/') /* C++ comment */
742 state = st_cpp_comment_t;
743 else /* Normal character */
744 buf[pos++] = '/';
746 else if (st_c_comment_t == state)
748 /* Check if this is the end of a C comment */
749 if (buf[i-1] == '*')
751 if ((pos > 0) && (buf[pos-1] != ' '))
752 buf[pos++] = ' ';
753 state = st_none_t;
756 break;
757 case '"':
758 if (st_none_t == state)
759 state = st_double_quote_t;
760 else if (st_double_quote_t == state)
761 state = st_none_t;
762 break;
763 case '\'':
764 if (st_none_t == state)
765 state = st_single_quote_t;
766 else if (st_single_quote_t == state)
767 state = st_none_t;
768 break;
769 default:
770 if ((buf[i] == '\\') && (st_escape_t != state))
772 prev_state = state;
773 state = st_escape_t;
775 else if (st_escape_t == state)
777 state = prev_state;
778 prev_state = st_none_t;
780 else if ((buf[i] == '\n') && (st_cpp_comment_t == state))
782 if ((pos > 0) && (buf[pos-1] != ' '))
783 buf[pos++] = ' ';
784 state = st_none_t;
786 else if (st_none_t == state)
788 if (isspace(buf[i]))
790 if ((pos > 0) && (buf[pos-1] != ' '))
791 buf[pos++] = ' ';
793 else
794 buf[pos++] = buf[i];
796 break;
798 ++i;
800 buf[pos] = '\0';
801 return;
804 extern char *getArglistFromStr(char *buf, const char *name)
806 char *start, *end;
807 int level;
808 if ((NULL == buf) || (NULL == name) || ('\0' == name[0]))
809 return NULL;
810 stripCodeBuffer(buf);
811 if (NULL == (start = strstr(buf, name)))
812 return NULL;
813 if (NULL == (start = strchr(start, '(')))
814 return NULL;
815 for (level = 1, end = start + 1; level > 0; ++end)
817 if ('\0' == *end)
818 break;
819 else if ('(' == *end)
820 ++ level;
821 else if (')' == *end)
822 -- level;
824 *end = '\0';
825 return strdup(start);
829 /* vi:set tabstop=4 shiftwidth=4: */