2 * Copyright (c) 1996-2002, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains the high level source read functions (preprocessor
8 * directives are handled within this level).
14 #include "general.h" /* must always come first */
28 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
29 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
34 typedef enum { COMMENT_NONE
, COMMENT_C
, COMMENT_CPLUS
, COMMENT_D
} Comment
;
37 MaxCppNestingLevel
= 20,
41 /* Defines the one nesting level of a preprocessor conditional.
43 typedef struct sConditionalInfo
{
44 boolean ignoreAllBranches
; /* ignoring parent conditional branch */
45 boolean singleBranch
; /* choose only one branch */
46 boolean branchChosen
; /* branch already selected */
47 boolean ignoring
; /* current ignore state */
51 DRCTV_NONE
, /* no known directive - ignore to end of line */
52 DRCTV_DEFINE
, /* "#define" encountered */
53 DRCTV_HASH
, /* initial '#' read; determine directive */
54 DRCTV_IF
, /* "#if" or "#ifdef" encountered */
55 DRCTV_PRAGMA
, /* #pragma encountered */
56 DRCTV_UNDEF
/* "#undef" encountered */
59 /* Defines the current state of the pre-processor.
61 typedef struct sCppState
{
62 int ungetch
, ungetch2
; /* ungotten characters, if any */
63 boolean resolveRequired
; /* must resolve if/else/elif/endif branch */
64 boolean hasAtLiteralStrings
; /* supports @"c:\" strings */
66 enum eState state
; /* current directive being processed */
67 boolean accept
; /* is a directive syntactically permitted? */
68 vString
* name
; /* macro name */
69 unsigned int nestLevel
; /* level 0 is not used */
70 conditionalInfo ifdef
[MaxCppNestingLevel
];
78 /* Use brace formatting to detect end of block.
80 static boolean BraceFormat
= FALSE
;
82 static cppState Cpp
= {
83 '\0', '\0', /* ungetch characters */
84 FALSE
, /* resolveRequired */
85 FALSE
, /* hasAtLiteralStrings */
87 DRCTV_NONE
, /* state */
91 { {FALSE
,FALSE
,FALSE
,FALSE
} } /* ifdef array */
96 * FUNCTION DEFINITIONS
99 extern boolean
isBraceFormat (void)
104 extern unsigned int getDirectiveNestLevel (void)
106 return Cpp
.directive
.nestLevel
;
109 extern void cppInit (const boolean state
, const boolean hasAtLiteralStrings
)
115 Cpp
.resolveRequired
= FALSE
;
116 Cpp
.hasAtLiteralStrings
= hasAtLiteralStrings
;
118 Cpp
.directive
.state
= DRCTV_NONE
;
119 Cpp
.directive
.accept
= TRUE
;
120 Cpp
.directive
.nestLevel
= 0;
122 Cpp
.directive
.ifdef
[0].ignoreAllBranches
= FALSE
;
123 Cpp
.directive
.ifdef
[0].singleBranch
= FALSE
;
124 Cpp
.directive
.ifdef
[0].branchChosen
= FALSE
;
125 Cpp
.directive
.ifdef
[0].ignoring
= FALSE
;
127 if (Cpp
.directive
.name
== NULL
)
128 Cpp
.directive
.name
= vStringNew ();
130 vStringClear (Cpp
.directive
.name
);
133 extern void cppTerminate (void)
135 if (Cpp
.directive
.name
!= NULL
)
137 vStringDelete (Cpp
.directive
.name
);
138 Cpp
.directive
.name
= NULL
;
142 extern void cppBeginStatement (void)
144 Cpp
.resolveRequired
= TRUE
;
147 extern void cppEndStatement (void)
149 Cpp
.resolveRequired
= FALSE
;
155 * This section handles preprocessor directives. It strips out all
156 * directives and may emit a tag for #define directives.
159 /* This puts a character back into the input queue for the source File.
160 * Up to two characters may be ungotten.
162 extern void cppUngetc (const int c
)
164 Assert (Cpp
.ungetch2
== '\0');
165 Cpp
.ungetch2
= Cpp
.ungetch
;
169 /* Reads a directive, whose first character is given by "c", into "name".
171 static boolean
readDirective (int c
, char *const name
, unsigned int maxLength
)
175 for (i
= 0 ; i
< maxLength
- 1 ; ++i
)
180 if (c
== EOF
|| ! isalpha (c
))
188 name
[i
] = '\0'; /* null terminate */
190 return (boolean
) isspacetab (c
);
193 /* Reads an identifier, whose first character is given by "c", into "tag",
194 * together with the file location and corresponding line number.
196 static void readIdentifier (int c
, vString
*const name
)
201 vStringPut (name
, c
);
202 } while (c
= fileGetc (), (c
!= EOF
&& isident (c
)));
204 vStringTerminate (name
);
207 static conditionalInfo
*currentConditional (void)
209 return &Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
];
212 static boolean
isIgnore (void)
214 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
;
217 static boolean
setIgnore (const boolean ignore
)
219 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
= ignore
;
222 static boolean
isIgnoreBranch (void)
224 conditionalInfo
*const ifdef
= currentConditional ();
226 /* Force a single branch if an incomplete statement is discovered
227 * en route. This may have allowed earlier branches containing complete
228 * statements to be followed, but we must follow no further branches.
230 if (Cpp
.resolveRequired
&& ! BraceFormat
)
231 ifdef
->singleBranch
= TRUE
;
233 /* We will ignore this branch in the following cases:
235 * 1. We are ignoring all branches (conditional was within an ignored
236 * branch of the parent conditional)
237 * 2. A branch has already been chosen and either of:
238 * a. A statement was incomplete upon entering the conditional
239 * b. A statement is incomplete upon encountering a branch
241 return (boolean
) (ifdef
->ignoreAllBranches
||
242 (ifdef
->branchChosen
&& ifdef
->singleBranch
));
245 static void chooseBranch (void)
249 conditionalInfo
*const ifdef
= currentConditional ();
251 ifdef
->branchChosen
= (boolean
) (ifdef
->singleBranch
||
252 Cpp
.resolveRequired
);
256 /* Pushes one nesting level for an #if directive, indicating whether or not
257 * the branch should be ignored and whether a branch has already been chosen.
259 static boolean
pushConditional (const boolean firstBranchChosen
)
261 const boolean ignoreAllBranches
= isIgnore (); /* current ignore */
262 boolean ignoreBranch
= FALSE
;
264 if (Cpp
.directive
.nestLevel
< (unsigned int) MaxCppNestingLevel
- 1)
266 conditionalInfo
*ifdef
;
268 ++Cpp
.directive
.nestLevel
;
269 ifdef
= currentConditional ();
271 /* We take a snapshot of whether there is an incomplete statement in
272 * progress upon encountering the preprocessor conditional. If so,
273 * then we will flag that only a single branch of the conditional
274 * should be followed.
276 ifdef
->ignoreAllBranches
= ignoreAllBranches
;
277 ifdef
->singleBranch
= Cpp
.resolveRequired
;
278 ifdef
->branchChosen
= firstBranchChosen
;
279 ifdef
->ignoring
= (boolean
) (ignoreAllBranches
|| (
280 ! firstBranchChosen
&& ! BraceFormat
&&
281 (ifdef
->singleBranch
|| !Option
.if0
)));
282 ignoreBranch
= ifdef
->ignoring
;
287 /* Pops one nesting level for an #endif directive.
289 static boolean
popConditional (void)
291 if (Cpp
.directive
.nestLevel
> 0)
292 --Cpp
.directive
.nestLevel
;
297 static void makeDefineTag (const char *const name
, boolean parameterized
)
299 const boolean isFileScope
= (boolean
) (! isHeaderFile ());
301 if (includingDefineTags () &&
302 (! isFileScope
|| Option
.include
.fileScope
))
306 initTagEntry (&e
, name
);
308 e
.lineNumberEntry
= (boolean
) (Option
.locate
!= EX_PATTERN
);
309 e
.isFileScope
= isFileScope
;
310 e
.truncateLine
= TRUE
;
311 e
.kindName
= "macro";
315 e
.extensionFields
.arglist
= getArglistFromFilePos(getInputFilePosition()
320 free((char *) e
.extensionFields
.arglist
);
324 static void directiveDefine (const int c
)
326 boolean parameterized
;
331 readIdentifier (c
, Cpp
.directive
.name
);
334 parameterized
= (boolean
) (nc
== '(');
336 makeDefineTag (vStringValue (Cpp
.directive
.name
), parameterized
);
338 Cpp
.directive
.state
= DRCTV_NONE
;
341 static void directivePragma (int c
)
345 readIdentifier (c
, Cpp
.directive
.name
);
346 if (stringMatch (vStringValue (Cpp
.directive
.name
), "weak"))
348 /* generate macro tag for weak name */
352 } while (c
== SPACE
);
355 readIdentifier (c
, Cpp
.directive
.name
);
356 makeDefineTag (vStringValue (Cpp
.directive
.name
), FALSE
);
360 Cpp
.directive
.state
= DRCTV_NONE
;
363 static boolean
directiveIf (const int c
)
365 const boolean ignore
= pushConditional ((boolean
) (c
!= '0'));
367 Cpp
.directive
.state
= DRCTV_NONE
;
372 static boolean
directiveHash (const int c
)
374 boolean ignore
= FALSE
;
375 char directive
[MaxDirectiveName
];
376 DebugStatement ( const boolean ignore0
= isIgnore (); )
378 readDirective (c
, directive
, MaxDirectiveName
);
379 if (stringMatch (directive
, "define"))
380 Cpp
.directive
.state
= DRCTV_DEFINE
;
381 else if (stringMatch (directive
, "undef"))
382 Cpp
.directive
.state
= DRCTV_UNDEF
;
383 else if (strncmp (directive
, "if", (size_t) 2) == 0)
384 Cpp
.directive
.state
= DRCTV_IF
;
385 else if (stringMatch (directive
, "elif") ||
386 stringMatch (directive
, "else"))
388 ignore
= setIgnore (isIgnoreBranch ());
389 if (! ignore
&& stringMatch (directive
, "else"))
391 Cpp
.directive
.state
= DRCTV_NONE
;
392 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
394 else if (stringMatch (directive
, "endif"))
396 DebugStatement ( debugCppNest (FALSE
, Cpp
.directive
.nestLevel
); )
397 ignore
= popConditional ();
398 Cpp
.directive
.state
= DRCTV_NONE
;
399 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
401 else if (stringMatch (directive
, "pragma"))
402 Cpp
.directive
.state
= DRCTV_PRAGMA
;
404 Cpp
.directive
.state
= DRCTV_NONE
;
409 /* Handles a pre-processor directive whose first character is given by "c".
411 static boolean
handleDirective (const int c
)
413 boolean ignore
= isIgnore ();
415 switch (Cpp
.directive
.state
)
417 case DRCTV_NONE
: ignore
= isIgnore (); break;
418 case DRCTV_DEFINE
: directiveDefine (c
); break;
419 case DRCTV_HASH
: ignore
= directiveHash (c
); break;
420 case DRCTV_IF
: ignore
= directiveIf (c
); break;
421 case DRCTV_PRAGMA
: directivePragma (c
); break;
422 case DRCTV_UNDEF
: directiveDefine (c
); break;
427 /* Called upon reading of a slash ('/') characters, determines whether a
428 * comment is encountered, and its type.
430 static Comment
isComment (void)
433 const int next
= fileGetc ();
437 else if (next
== '/')
438 comment
= COMMENT_CPLUS
;
439 else if (next
== '+')
444 comment
= COMMENT_NONE
;
449 /* Skips over a C style comment. According to ANSI specification a comment
450 * is treated as white space, so we perform this substitution.
452 int skipOverCComment (void)
462 const int next
= fileGetc ();
468 c
= SPACE
; /* replace comment with space */
476 /* Skips over a C++ style comment.
478 static int skipOverCplusComment (void)
482 while ((c
= fileGetc ()) != EOF
)
485 fileGetc (); /* throw away next character, too */
486 else if (c
== NEWLINE
)
492 /* Skips over a D style comment.
493 * Really we should match nested /+ comments. At least they're less common.
495 static int skipOverDComment (void)
505 const int next
= fileGetc ();
511 c
= SPACE
; /* replace comment with space */
519 /* Skips to the end of a string, returning a special character to
520 * symbolically represent a generic string.
522 static int skipToEndOfString (boolean ignoreBackslash
)
526 while ((c
= fileGetc ()) != EOF
)
528 if (c
== BACKSLASH
&& ! ignoreBackslash
)
529 fileGetc (); /* throw away next character, too */
530 else if (c
== DOUBLE_QUOTE
)
533 return STRING_SYMBOL
; /* symbolic representation of string */
536 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
537 * special character to symbolically represent a generic character.
538 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
540 static int skipToEndOfChar (void)
543 int count
= 0, veraBase
= '\0';
545 while ((c
= fileGetc ()) != EOF
)
549 fileGetc (); /* throw away next character, too */
550 else if (c
== SINGLE_QUOTE
)
552 else if (c
== NEWLINE
)
557 else if (count
== 1 && strchr ("DHOB", toupper (c
)) != NULL
)
559 else if (veraBase
!= '\0' && ! isalnum (c
))
565 return CHAR_SYMBOL
; /* symbolic representation of character */
568 /* This function returns the next character, stripping out comments,
569 * C pre-processor directives, and the contents of single and double
570 * quoted strings. In short, strip anything which places a burden upon
573 extern int cppGetc (void)
575 boolean directive
= FALSE
;
576 boolean ignore
= FALSE
;
579 if (Cpp
.ungetch
!= '\0')
582 Cpp
.ungetch
= Cpp
.ungetch2
;
584 return c
; /* return here to avoid re-calling debugPutc () */
599 break; /* ignore most white space */
602 if (directive
&& ! ignore
)
604 Cpp
.directive
.accept
= TRUE
;
608 Cpp
.directive
.accept
= FALSE
;
609 c
= skipToEndOfString (FALSE
);
613 if (Cpp
.directive
.accept
)
616 Cpp
.directive
.state
= DRCTV_HASH
;
617 Cpp
.directive
.accept
= FALSE
;
622 Cpp
.directive
.accept
= FALSE
;
623 c
= skipToEndOfChar ();
628 const Comment comment
= isComment ();
630 if (comment
== COMMENT_C
)
631 c
= skipOverCComment ();
632 else if (comment
== COMMENT_CPLUS
)
634 c
= skipOverCplusComment ();
638 else if (comment
== COMMENT_D
)
639 c
= skipOverDComment ();
641 Cpp
.directive
.accept
= FALSE
;
647 int next
= fileGetc ();
651 else if (next
== '?')
660 int next
= fileGetc ();
668 case '(': c
= '['; break;
669 case ')': c
= ']'; break;
670 case '<': c
= '{'; break;
671 case '>': c
= '}'; break;
672 case '/': c
= BACKSLASH
; goto process
;
673 case '!': c
= '|'; break;
674 case SINGLE_QUOTE
: c
= '^'; break;
675 case '-': c
= '~'; break;
676 case '=': c
= '#'; goto process
;
686 if (c
== '@' && Cpp
.hasAtLiteralStrings
)
688 int next
= fileGetc ();
689 if (next
== DOUBLE_QUOTE
)
691 Cpp
.directive
.accept
= FALSE
;
692 c
= skipToEndOfString (TRUE
);
696 Cpp
.directive
.accept
= FALSE
;
698 ignore
= handleDirective (c
);
701 } while (directive
|| ignore
);
703 DebugStatement ( debugPutc (DEBUG_CPP
, c
); )
704 DebugStatement ( if (c
== NEWLINE
)
705 debugPrintf (DEBUG_CPP
, "%6ld: ", getInputLineNumber () + 1); )
710 extern char *getArglistFromFilePos(MIOPos startPosition
, const char *tokenName
)
712 MIOPos originalPosition
;
714 char *arglist
= NULL
;
717 pos2
= mio_tell(File
.mio
);
719 mio_getpos(File
.mio
, &originalPosition
);
720 mio_setpos(File
.mio
, &startPosition
);
721 pos1
= mio_tell(File
.mio
);
725 result
= (char *) g_malloc(sizeof(char ) * (pos2
- pos1
+ 2));
726 if (result
!= NULL
&& mio_read(File
.mio
, result
, sizeof(char), pos2
- pos1
+ 1) > 0)
728 result
[pos2
-pos1
+1] = '\0';
729 arglist
= getArglistFromStr(result
, tokenName
);
733 mio_setpos(File
.mio
, &originalPosition
);
747 static void stripCodeBuffer(char *buf
)
750 ParseState state
= st_none_t
, prev_state
= st_none_t
;
752 while (buf
[i
] != '\0')
757 if (st_none_t
== state
)
759 /* Check if this is the start of a comment */
760 if (buf
[i
+1] == '*') /* C comment */
761 state
= st_c_comment_t
;
762 else if (buf
[i
+1] == '/') /* C++ comment */
763 state
= st_cpp_comment_t
;
764 else /* Normal character */
767 else if (st_c_comment_t
== state
)
769 /* Check if this is the end of a C comment */
772 if ((pos
> 0) && (buf
[pos
-1] != ' '))
779 if (st_none_t
== state
)
780 state
= st_double_quote_t
;
781 else if (st_double_quote_t
== state
)
785 if (st_none_t
== state
)
786 state
= st_single_quote_t
;
787 else if (st_single_quote_t
== state
)
791 if ((buf
[i
] == '\\') && (st_escape_t
!= state
))
796 else if (st_escape_t
== state
)
799 prev_state
= st_none_t
;
801 else if ((buf
[i
] == '\n') && (st_cpp_comment_t
== state
))
803 if ((pos
> 0) && (buf
[pos
-1] != ' '))
807 else if (st_none_t
== state
)
811 if ((pos
> 0) && (buf
[pos
-1] != ' '))
825 extern char *getArglistFromStr(char *buf
, const char *name
)
829 if ((NULL
== buf
) || (NULL
== name
) || ('\0' == name
[0]))
831 stripCodeBuffer(buf
);
832 if (NULL
== (start
= strstr(buf
, name
)))
834 if (NULL
== (start
= strchr(start
, '(')))
836 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
840 else if ('(' == *end
)
842 else if (')' == *end
)
846 return strdup(start
);
849 /* vi:set tabstop=4 shiftwidth=4: */