4 * Copyright (c) 1996-2002, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains the high level source read functions (preprocessor
10 * directives are handled within this level).
16 #include "general.h" /* must always come first */
30 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
31 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
36 typedef enum { COMMENT_NONE
, COMMENT_C
, COMMENT_CPLUS
} Comment
;
39 MaxCppNestingLevel
= 20,
43 /* Defines the one nesting level of a preprocessor conditional.
45 typedef struct sConditionalInfo
{
46 boolean ignoreAllBranches
; /* ignoring parent conditional branch */
47 boolean singleBranch
; /* choose only one branch */
48 boolean branchChosen
; /* branch already selected */
49 boolean ignoring
; /* current ignore state */
53 DRCTV_NONE
, /* no known directive - ignore to end of line */
54 DRCTV_DEFINE
, /* "#define" encountered */
55 DRCTV_HASH
, /* initial '#' read; determine directive */
56 DRCTV_IF
, /* "#if" or "#ifdef" encountered */
57 DRCTV_PRAGMA
, /* #pragma encountered */
58 DRCTV_UNDEF
/* "#undef" encountered */
61 /* Defines the current state of the pre-processor.
63 typedef struct sCppState
{
64 int ungetch
, ungetch2
; /* ungotten characters, if any */
65 boolean resolveRequired
; /* must resolve if/else/elif/endif branch */
66 boolean hasAtLiteralStrings
; /* supports @"c:\" strings */
68 enum eState state
; /* current directive being processed */
69 boolean accept
; /* is a directive syntactically permitted? */
70 vString
* name
; /* macro name */
71 unsigned int nestLevel
; /* level 0 is not used */
72 conditionalInfo ifdef
[MaxCppNestingLevel
];
80 /* Use brace formatting to detect end of block.
82 static boolean BraceFormat
= FALSE
;
84 static cppState Cpp
= {
85 '\0', '\0', /* ungetch characters */
86 FALSE
, /* resolveRequired */
87 FALSE
, /* hasAtLiteralStrings */
89 DRCTV_NONE
, /* state */
93 { {FALSE
,FALSE
,FALSE
,FALSE
} } /* ifdef array */
98 * FUNCTION DEFINITIONS
101 extern boolean
isBraceFormat (void)
106 extern unsigned int getDirectiveNestLevel (void)
108 return Cpp
.directive
.nestLevel
;
111 extern void cppInit (const boolean state
, const boolean hasAtLiteralStrings
)
117 Cpp
.resolveRequired
= FALSE
;
118 Cpp
.hasAtLiteralStrings
= hasAtLiteralStrings
;
120 Cpp
.directive
.state
= DRCTV_NONE
;
121 Cpp
.directive
.accept
= TRUE
;
122 Cpp
.directive
.nestLevel
= 0;
124 Cpp
.directive
.ifdef
[0].ignoreAllBranches
= FALSE
;
125 Cpp
.directive
.ifdef
[0].singleBranch
= FALSE
;
126 Cpp
.directive
.ifdef
[0].branchChosen
= FALSE
;
127 Cpp
.directive
.ifdef
[0].ignoring
= FALSE
;
129 if (Cpp
.directive
.name
== NULL
)
130 Cpp
.directive
.name
= vStringNew ();
132 vStringClear (Cpp
.directive
.name
);
135 extern void cppTerminate (void)
137 if (Cpp
.directive
.name
!= NULL
)
139 vStringDelete (Cpp
.directive
.name
);
140 Cpp
.directive
.name
= NULL
;
144 extern void cppBeginStatement (void)
146 Cpp
.resolveRequired
= TRUE
;
149 extern void cppEndStatement (void)
151 Cpp
.resolveRequired
= FALSE
;
157 * This section handles preprocessor directives. It strips out all
158 * directives and may emit a tag for #define directives.
161 /* This puts a character back into the input queue for the source File.
162 * Up to two characters may be ungotten.
164 extern void cppUngetc (const int c
)
166 Assert (Cpp
.ungetch2
== '\0');
167 Cpp
.ungetch2
= Cpp
.ungetch
;
171 /* Reads a directive, whose first character is given by "c", into "name".
173 static boolean
readDirective (int c
, char *const name
, unsigned int maxLength
)
177 for (i
= 0 ; i
< maxLength
- 1 ; ++i
)
182 if (c
== EOF
|| ! isalpha (c
))
190 name
[i
] = '\0'; /* null terminate */
192 return (boolean
) isspacetab (c
);
195 /* Reads an identifier, whose first character is given by "c", into "tag",
196 * together with the file location and corresponding line number.
198 static void readIdentifier (int c
, vString
*const name
)
203 vStringPut (name
, c
);
204 } while (c
= fileGetc (), (c
!= EOF
&& isident (c
)));
206 vStringTerminate (name
);
209 static conditionalInfo
*currentConditional (void)
211 return &Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
];
214 static boolean
isIgnore (void)
216 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
;
219 static boolean
setIgnore (const boolean ignore
)
221 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
= ignore
;
224 static boolean
isIgnoreBranch (void)
226 conditionalInfo
*const ifdef
= currentConditional ();
228 /* Force a single branch if an incomplete statement is discovered
229 * en route. This may have allowed earlier branches containing complete
230 * statements to be followed, but we must follow no further branches.
232 if (Cpp
.resolveRequired
&& ! BraceFormat
)
233 ifdef
->singleBranch
= TRUE
;
235 /* We will ignore this branch in the following cases:
237 * 1. We are ignoring all branches (conditional was within an ignored
238 * branch of the parent conditional)
239 * 2. A branch has already been chosen and either of:
240 * a. A statement was incomplete upon entering the conditional
241 * b. A statement is incomplete upon encountering a branch
243 return (boolean
) (ifdef
->ignoreAllBranches
||
244 (ifdef
->branchChosen
&& ifdef
->singleBranch
));
247 static void chooseBranch (void)
251 conditionalInfo
*const ifdef
= currentConditional ();
253 ifdef
->branchChosen
= (boolean
) (ifdef
->singleBranch
||
254 Cpp
.resolveRequired
);
258 /* Pushes one nesting level for an #if directive, indicating whether or not
259 * the branch should be ignored and whether a branch has already been chosen.
261 static boolean
pushConditional (const boolean firstBranchChosen
)
263 const boolean ignoreAllBranches
= isIgnore (); /* current ignore */
264 boolean ignoreBranch
= FALSE
;
266 if (Cpp
.directive
.nestLevel
< (unsigned int) MaxCppNestingLevel
- 1)
268 conditionalInfo
*ifdef
;
270 ++Cpp
.directive
.nestLevel
;
271 ifdef
= currentConditional ();
273 /* We take a snapshot of whether there is an incomplete statement in
274 * progress upon encountering the preprocessor conditional. If so,
275 * then we will flag that only a single branch of the conditional
276 * should be followed.
278 ifdef
->ignoreAllBranches
= ignoreAllBranches
;
279 ifdef
->singleBranch
= Cpp
.resolveRequired
;
280 ifdef
->branchChosen
= firstBranchChosen
;
281 ifdef
->ignoring
= (boolean
) (ignoreAllBranches
|| (
282 ! firstBranchChosen
&& ! BraceFormat
&&
283 (ifdef
->singleBranch
|| !Option
.if0
)));
284 ignoreBranch
= ifdef
->ignoring
;
289 /* Pops one nesting level for an #endif directive.
291 static boolean
popConditional (void)
293 if (Cpp
.directive
.nestLevel
> 0)
294 --Cpp
.directive
.nestLevel
;
299 static void makeDefineTag (const char *const name
, boolean parameterized
)
301 const boolean isFileScope
= (boolean
) (! isHeaderFile ());
303 if (includingDefineTags () &&
304 (! isFileScope
|| Option
.include
.fileScope
))
308 initTagEntry (&e
, name
);
310 e
.lineNumberEntry
= (boolean
) (Option
.locate
!= EX_PATTERN
);
311 e
.isFileScope
= isFileScope
;
312 e
.truncateLine
= TRUE
;
313 e
.kindName
= "macro";
318 e
.extensionFields
.arglist
= getArglistFromFilePos(getInputFilePosition()
322 e
.extensionFields
.arglist
= getArglistFromBufferPos(getInputBufferPosition()
328 free((char *) e
.extensionFields
.arglist
);
332 static void directiveDefine (const int c
)
334 boolean parameterized
;
339 readIdentifier (c
, Cpp
.directive
.name
);
342 parameterized
= (boolean
) (nc
== '(');
344 makeDefineTag (vStringValue (Cpp
.directive
.name
), parameterized
);
346 Cpp
.directive
.state
= DRCTV_NONE
;
349 static void directivePragma (int c
)
353 readIdentifier (c
, Cpp
.directive
.name
);
354 if (stringMatch (vStringValue (Cpp
.directive
.name
), "weak"))
356 /* generate macro tag for weak name */
360 } while (c
== SPACE
);
363 readIdentifier (c
, Cpp
.directive
.name
);
364 makeDefineTag (vStringValue (Cpp
.directive
.name
), FALSE
);
368 Cpp
.directive
.state
= DRCTV_NONE
;
371 static boolean
directiveIf (const int c
)
373 const boolean ignore
= pushConditional ((boolean
) (c
!= '0'));
375 Cpp
.directive
.state
= DRCTV_NONE
;
380 static boolean
directiveHash (const int c
)
382 boolean ignore
= FALSE
;
383 char directive
[MaxDirectiveName
];
384 DebugStatement ( const boolean ignore0
= isIgnore (); )
386 readDirective (c
, directive
, MaxDirectiveName
);
387 if (stringMatch (directive
, "define"))
388 Cpp
.directive
.state
= DRCTV_DEFINE
;
389 else if (stringMatch (directive
, "undef"))
390 Cpp
.directive
.state
= DRCTV_UNDEF
;
391 else if (strncmp (directive
, "if", (size_t) 2) == 0)
392 Cpp
.directive
.state
= DRCTV_IF
;
393 else if (stringMatch (directive
, "elif") ||
394 stringMatch (directive
, "else"))
396 ignore
= setIgnore (isIgnoreBranch ());
397 if (! ignore
&& stringMatch (directive
, "else"))
399 Cpp
.directive
.state
= DRCTV_NONE
;
400 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
402 else if (stringMatch (directive
, "endif"))
404 DebugStatement ( debugCppNest (FALSE
, Cpp
.directive
.nestLevel
); )
405 ignore
= popConditional ();
406 Cpp
.directive
.state
= DRCTV_NONE
;
407 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
409 else if (stringMatch (directive
, "pragma"))
410 Cpp
.directive
.state
= DRCTV_PRAGMA
;
412 Cpp
.directive
.state
= DRCTV_NONE
;
417 /* Handles a pre-processor directive whose first character is given by "c".
419 static boolean
handleDirective (const int c
)
421 boolean ignore
= isIgnore ();
423 switch (Cpp
.directive
.state
)
425 case DRCTV_NONE
: ignore
= isIgnore (); break;
426 case DRCTV_DEFINE
: directiveDefine (c
); break;
427 case DRCTV_HASH
: ignore
= directiveHash (c
); break;
428 case DRCTV_IF
: ignore
= directiveIf (c
); break;
429 case DRCTV_PRAGMA
: directivePragma (c
); break;
430 case DRCTV_UNDEF
: directiveDefine (c
); break;
435 /* Called upon reading of a slash ('/') characters, determines whether a
436 * comment is encountered, and its type.
438 static Comment
isComment (void)
441 const int next
= fileGetc ();
445 else if (next
== '/')
446 comment
= COMMENT_CPLUS
;
450 comment
= COMMENT_NONE
;
455 /* Skips over a C style comment. According to ANSI specification a comment
456 * is treated as white space, so we perform this substitution.
458 int skipOverCComment (void)
468 const int next
= fileGetc ();
474 c
= SPACE
; /* replace comment with space */
482 /* Skips over a C++ style comment.
484 static int skipOverCplusComment (void)
488 while ((c
= fileGetc ()) != EOF
)
491 fileGetc (); /* throw away next character, too */
492 else if (c
== NEWLINE
)
498 /* Skips to the end of a string, returning a special character to
499 * symbolically represent a generic string.
501 static int skipToEndOfString (boolean ignoreBackslash
)
505 while ((c
= fileGetc ()) != EOF
)
507 if (c
== BACKSLASH
&& ! ignoreBackslash
)
508 fileGetc (); /* throw away next character, too */
509 else if (c
== DOUBLE_QUOTE
)
512 return STRING_SYMBOL
; /* symbolic representation of string */
515 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
516 * special character to symbolically represent a generic character.
517 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
519 static int skipToEndOfChar (void)
522 int count
= 0, veraBase
= '\0';
524 while ((c
= fileGetc ()) != EOF
)
528 fileGetc (); /* throw away next character, too */
529 else if (c
== SINGLE_QUOTE
)
531 else if (c
== NEWLINE
)
536 else if (count
== 1 && strchr ("DHOB", toupper (c
)) != NULL
)
538 else if (veraBase
!= '\0' && ! isalnum (c
))
544 return CHAR_SYMBOL
; /* symbolic representation of character */
547 /* This function returns the next character, stripping out comments,
548 * C pre-processor directives, and the contents of single and double
549 * quoted strings. In short, strip anything which places a burden upon
552 extern int cppGetc (void)
554 boolean directive
= FALSE
;
555 boolean ignore
= FALSE
;
558 if (Cpp
.ungetch
!= '\0')
561 Cpp
.ungetch
= Cpp
.ungetch2
;
563 return c
; /* return here to avoid re-calling debugPutc () */
578 break; /* ignore most white space */
581 if (directive
&& ! ignore
)
583 Cpp
.directive
.accept
= TRUE
;
587 Cpp
.directive
.accept
= FALSE
;
588 c
= skipToEndOfString (FALSE
);
592 if (Cpp
.directive
.accept
)
595 Cpp
.directive
.state
= DRCTV_HASH
;
596 Cpp
.directive
.accept
= FALSE
;
601 Cpp
.directive
.accept
= FALSE
;
602 c
= skipToEndOfChar ();
607 const Comment comment
= isComment ();
609 if (comment
== COMMENT_C
)
610 c
= skipOverCComment ();
611 else if (comment
== COMMENT_CPLUS
)
613 c
= skipOverCplusComment ();
618 Cpp
.directive
.accept
= FALSE
;
624 int next
= fileGetc ();
628 else if (next
== '?')
637 int next
= fileGetc ();
645 case '(': c
= '['; break;
646 case ')': c
= ']'; break;
647 case '<': c
= '{'; break;
648 case '>': c
= '}'; break;
649 case '/': c
= BACKSLASH
; goto process
;
650 case '!': c
= '|'; break;
651 case SINGLE_QUOTE
: c
= '^'; break;
652 case '-': c
= '~'; break;
653 case '=': c
= '#'; goto process
;
663 if (c
== '@' && Cpp
.hasAtLiteralStrings
)
665 int next
= fileGetc ();
666 if (next
== DOUBLE_QUOTE
)
668 Cpp
.directive
.accept
= FALSE
;
669 c
= skipToEndOfString (TRUE
);
673 Cpp
.directive
.accept
= FALSE
;
675 ignore
= handleDirective (c
);
678 } while (directive
|| ignore
);
680 DebugStatement ( debugPutc (DEBUG_CPP
, c
); )
681 DebugStatement ( if (c
== NEWLINE
)
682 debugPrintf (DEBUG_CPP
, "%6ld: ", getInputLineNumber () + 1); )
687 extern char *getArglistFromBufferPos(int startPosition
, const char *tokenName
)
689 int bufferOriginalPosition
;
691 char *arglist
= NULL
;
694 /* FIXME startPosition as well as getBufPos() are mostly wrong here */
698 bufferOriginalPosition
= getBufPos ();
699 setBufPos(startPosition
);
700 pos1
= File
.fpBufferPosition
;
707 result
= (char *) g_malloc(sizeof(char ) * (pos2
- pos1
+ 2));
710 memcpy(result
, &File
.fpBuffer
[getBufPos()], pos2
- pos1
+ 1);
711 result
[pos2
-pos1
+1] = '\0';
712 arglist
= getArglistFromStr(result
, tokenName
);
716 setBufPos (bufferOriginalPosition
);
720 extern char *getArglistFromFilePos(fpos_t startPosition
, const char *tokenName
)
722 fpos_t originalPosition
;
724 char *arglist
= NULL
;
727 pos2
= ftell(File
.fp
);
729 fgetpos(File
.fp
, &originalPosition
);
730 fsetpos(File
.fp
, &startPosition
);
731 pos1
= ftell(File
.fp
);
735 result
= (char *) g_malloc(sizeof(char ) * (pos2
- pos1
+ 2));
736 if (result
!= NULL
&& fread(result
, sizeof(char), pos2
- pos1
+ 1, File
.fp
) > 0)
738 result
[pos2
-pos1
+1] = '\0';
739 arglist
= getArglistFromStr(result
, tokenName
);
743 fsetpos(File
.fp
, &originalPosition
);
757 static void stripCodeBuffer(char *buf
)
760 ParseState state
= st_none_t
, prev_state
= st_none_t
;
762 while (buf
[i
] != '\0')
767 if (st_none_t
== state
)
769 /* Check if this is the start of a comment */
770 if (buf
[i
+1] == '*') /* C comment */
771 state
= st_c_comment_t
;
772 else if (buf
[i
+1] == '/') /* C++ comment */
773 state
= st_cpp_comment_t
;
774 else /* Normal character */
777 else if (st_c_comment_t
== state
)
779 /* Check if this is the end of a C comment */
782 if ((pos
> 0) && (buf
[pos
-1] != ' '))
789 if (st_none_t
== state
)
790 state
= st_double_quote_t
;
791 else if (st_double_quote_t
== state
)
795 if (st_none_t
== state
)
796 state
= st_single_quote_t
;
797 else if (st_single_quote_t
== state
)
801 if ((buf
[i
] == '\\') && (st_escape_t
!= state
))
806 else if (st_escape_t
== state
)
809 prev_state
= st_none_t
;
811 else if ((buf
[i
] == '\n') && (st_cpp_comment_t
== state
))
813 if ((pos
> 0) && (buf
[pos
-1] != ' '))
817 else if (st_none_t
== state
)
821 if ((pos
> 0) && (buf
[pos
-1] != ' '))
835 extern char *getArglistFromStr(char *buf
, const char *name
)
839 if ((NULL
== buf
) || (NULL
== name
) || ('\0' == name
[0]))
841 stripCodeBuffer(buf
);
842 if (NULL
== (start
= strstr(buf
, name
)))
844 if (NULL
== (start
= strchr(start
, '(')))
846 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
850 else if ('(' == *end
)
852 else if (')' == *end
)
856 return strdup(start
);
859 /* vi:set tabstop=4 shiftwidth=4: */