4 * Copyright (c) 1996-2002, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains the high level source read functions (preprocessor
10 * directives are handled within this level).
16 #include "general.h" /* must always come first */
30 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
31 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
36 typedef enum { COMMENT_NONE
, COMMENT_C
, COMMENT_CPLUS
} Comment
;
39 MaxCppNestingLevel
= 20,
43 /* Defines the one nesting level of a preprocessor conditional.
45 typedef struct sConditionalInfo
{
46 boolean ignoreAllBranches
; /* ignoring parent conditional branch */
47 boolean singleBranch
; /* choose only one branch */
48 boolean branchChosen
; /* branch already selected */
49 boolean ignoring
; /* current ignore state */
53 DRCTV_NONE
, /* no known directive - ignore to end of line */
54 DRCTV_DEFINE
, /* "#define" encountered */
55 DRCTV_HASH
, /* initial '#' read; determine directive */
56 DRCTV_IF
, /* "#if" or "#ifdef" encountered */
57 DRCTV_PRAGMA
, /* #pragma encountered */
58 DRCTV_UNDEF
/* "#undef" encountered */
61 /* Defines the current state of the pre-processor.
63 typedef struct sCppState
{
64 int ungetch
, ungetch2
; /* ungotten characters, if any */
65 boolean resolveRequired
; /* must resolve if/else/elif/endif branch */
67 enum eState state
; /* current directive being processed */
68 boolean accept
; /* is a directive syntatically permitted? */
69 vString
* name
; /* macro name */
70 unsigned int nestLevel
; /* level 0 is not used */
71 conditionalInfo ifdef
[MaxCppNestingLevel
];
79 /* Use brace formatting to detect end of block.
81 static boolean BraceFormat
= FALSE
;
83 static cppState Cpp
= {
84 '\0', '\0', /* ungetch characters */
85 FALSE
, /* resolveRequired */
87 DRCTV_NONE
, /* state */
91 { {FALSE
,FALSE
,FALSE
,FALSE
} } /* ifdef array */
96 * FUNCTION DEFINITIONS
99 extern boolean
isBraceFormat (void)
104 extern unsigned int getDirectiveNestLevel (void)
106 return Cpp
.directive
.nestLevel
;
109 extern void cppInit (const boolean state
)
115 Cpp
.resolveRequired
= FALSE
;
117 Cpp
.directive
.state
= DRCTV_NONE
;
118 Cpp
.directive
.accept
= TRUE
;
119 Cpp
.directive
.nestLevel
= 0;
121 Cpp
.directive
.ifdef
[0].ignoreAllBranches
= FALSE
;
122 Cpp
.directive
.ifdef
[0].singleBranch
= FALSE
;
123 Cpp
.directive
.ifdef
[0].branchChosen
= FALSE
;
124 Cpp
.directive
.ifdef
[0].ignoring
= FALSE
;
126 if (Cpp
.directive
.name
== NULL
)
127 Cpp
.directive
.name
= vStringNew ();
129 vStringClear (Cpp
.directive
.name
);
132 extern void cppTerminate (void)
134 if (Cpp
.directive
.name
!= NULL
)
136 vStringDelete (Cpp
.directive
.name
);
137 Cpp
.directive
.name
= NULL
;
141 extern void cppBeginStatement (void)
143 Cpp
.resolveRequired
= TRUE
;
146 extern void cppEndStatement (void)
148 Cpp
.resolveRequired
= FALSE
;
154 * This section handles preprocessor directives. It strips out all
155 * directives and may emit a tag for #define directives.
158 /* This puts a character back into the input queue for the source File.
159 * Up to two characters may be ungotten.
161 extern void cppUngetc (const int c
)
163 Assert (Cpp
.ungetch2
== '\0');
164 Cpp
.ungetch2
= Cpp
.ungetch
;
168 /* Reads a directive, whose first character is given by "c", into "name".
170 static boolean
readDirective (int c
, char *const name
, unsigned int maxLength
)
174 for (i
= 0 ; i
< maxLength
- 1 ; ++i
)
179 if (c
== EOF
|| ! isalpha (c
))
187 name
[i
] = '\0'; /* null terminate */
189 return (boolean
) isspacetab (c
);
192 /* Reads an identifier, whose first character is given by "c", into "tag",
193 * together with the file location and corresponding line number.
195 static void readIdentifier (int c
, vString
*const name
)
200 vStringPut (name
, c
);
201 } while (c
= fileGetc (), (c
!= EOF
&& isident (c
)));
203 vStringTerminate (name
);
206 static conditionalInfo
*currentConditional (void)
208 return &Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
];
211 static boolean
isIgnore (void)
213 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
;
216 static boolean
setIgnore (const boolean ignore
)
218 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
= ignore
;
221 static boolean
isIgnoreBranch (void)
223 conditionalInfo
*const ifdef
= currentConditional ();
225 /* Force a single branch if an incomplete statement is discovered
226 * en route. This may have allowed earlier branches containing complete
227 * statements to be followed, but we must follow no further branches.
229 if (Cpp
.resolveRequired
&& ! BraceFormat
)
230 ifdef
->singleBranch
= TRUE
;
232 /* We will ignore this branch in the following cases:
234 * 1. We are ignoring all branches (conditional was within an ignored
235 * branch of the parent conditional)
236 * 2. A branch has already been chosen and either of:
237 * a. A statement was incomplete upon entering the conditional
238 * b. A statement is incomplete upon encountering a branch
240 return (boolean
) (ifdef
->ignoreAllBranches
||
241 (ifdef
->branchChosen
&& ifdef
->singleBranch
));
244 static void chooseBranch (void)
248 conditionalInfo
*const ifdef
= currentConditional ();
250 ifdef
->branchChosen
= (boolean
) (ifdef
->singleBranch
||
251 Cpp
.resolveRequired
);
255 /* Pushes one nesting level for an #if directive, indicating whether or not
256 * the branch should be ignored and whether a branch has already been chosen.
258 static boolean
pushConditional (const boolean firstBranchChosen
)
260 const boolean ignoreAllBranches
= isIgnore (); /* current ignore */
261 boolean ignoreBranch
= FALSE
;
263 if (Cpp
.directive
.nestLevel
< (unsigned int) MaxCppNestingLevel
- 1)
265 conditionalInfo
*ifdef
;
267 ++Cpp
.directive
.nestLevel
;
268 ifdef
= currentConditional ();
270 /* We take a snapshot of whether there is an incomplete statement in
271 * progress upon encountering the preprocessor conditional. If so,
272 * then we will flag that only a single branch of the conditional
273 * should be followed.
275 ifdef
->ignoreAllBranches
= ignoreAllBranches
;
276 ifdef
->singleBranch
= Cpp
.resolveRequired
;
277 ifdef
->branchChosen
= firstBranchChosen
;
278 ifdef
->ignoring
= (boolean
) (ignoreAllBranches
|| (
279 ! firstBranchChosen
&& ! BraceFormat
&&
280 (ifdef
->singleBranch
|| !Option
.if0
)));
281 ignoreBranch
= ifdef
->ignoring
;
286 /* Pops one nesting level for an #endif directive.
288 static boolean
popConditional (void)
290 if (Cpp
.directive
.nestLevel
> 0)
291 --Cpp
.directive
.nestLevel
;
296 static void makeDefineTag (const char *const name
)
298 const boolean isFileScope
= (boolean
) (! isHeaderFile ());
300 if (includingDefineTags () &&
301 (! isFileScope
|| Option
.include
.fileScope
))
304 initTagEntry (&e
, name
);
305 e
.lineNumberEntry
= (boolean
) (Option
.locate
!= EX_PATTERN
);
306 e
.isFileScope
= isFileScope
;
307 e
.truncateLine
= TRUE
;
308 e
.kindName
= "macro";
314 static void directiveDefine (const int c
)
318 readIdentifier (c
, Cpp
.directive
.name
);
320 makeDefineTag (vStringValue (Cpp
.directive
.name
));
322 Cpp
.directive
.state
= DRCTV_NONE
;
325 static void directivePragma (int c
)
329 readIdentifier (c
, Cpp
.directive
.name
);
330 if (stringMatch (vStringValue (Cpp
.directive
.name
), "weak"))
332 /* generate macro tag for weak name */
336 } while (c
== SPACE
);
339 readIdentifier (c
, Cpp
.directive
.name
);
340 makeDefineTag (vStringValue (Cpp
.directive
.name
));
344 Cpp
.directive
.state
= DRCTV_NONE
;
347 static boolean
directiveIf (const int c
)
349 DebugStatement ( const boolean ignore0
= isIgnore (); )
350 const boolean ignore
= pushConditional ((boolean
) (c
!= '0'));
352 Cpp
.directive
.state
= DRCTV_NONE
;
353 DebugStatement ( debugCppNest (TRUE
, Cpp
.directive
.nestLevel
);
354 if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
359 static boolean
directiveHash (const int c
)
361 boolean ignore
= FALSE
;
362 char directive
[MaxDirectiveName
];
363 DebugStatement ( const boolean ignore0
= isIgnore (); )
365 readDirective (c
, directive
, MaxDirectiveName
);
366 if (stringMatch (directive
, "define"))
367 Cpp
.directive
.state
= DRCTV_DEFINE
;
368 else if (stringMatch (directive
, "undef"))
369 Cpp
.directive
.state
= DRCTV_UNDEF
;
370 else if (strncmp (directive
, "if", (size_t) 2) == 0)
371 Cpp
.directive
.state
= DRCTV_IF
;
372 else if (stringMatch (directive
, "elif") ||
373 stringMatch (directive
, "else"))
375 ignore
= setIgnore (isIgnoreBranch ());
376 if (! ignore
&& stringMatch (directive
, "else"))
378 Cpp
.directive
.state
= DRCTV_NONE
;
379 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
381 else if (stringMatch (directive
, "endif"))
383 DebugStatement ( debugCppNest (FALSE
, Cpp
.directive
.nestLevel
); )
384 ignore
= popConditional ();
385 Cpp
.directive
.state
= DRCTV_NONE
;
386 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
388 else if (stringMatch (directive
, "pragma"))
389 Cpp
.directive
.state
= DRCTV_PRAGMA
;
391 Cpp
.directive
.state
= DRCTV_NONE
;
396 /* Handles a pre-processor directive whose first character is given by "c".
398 static boolean
handleDirective (const int c
)
400 boolean ignore
= isIgnore ();
402 switch (Cpp
.directive
.state
)
404 case DRCTV_NONE
: ignore
= isIgnore (); break;
405 case DRCTV_DEFINE
: directiveDefine (c
); break;
406 case DRCTV_HASH
: ignore
= directiveHash (c
); break;
407 case DRCTV_IF
: ignore
= directiveIf (c
); break;
408 case DRCTV_PRAGMA
: directivePragma (c
); break;
409 case DRCTV_UNDEF
: directiveDefine (c
); break;
414 /* Called upon reading of a slash ('/') characters, determines whether a
415 * comment is encountered, and its type.
417 static Comment
isComment (void)
420 const int next
= fileGetc ();
424 else if (next
== '/')
425 comment
= COMMENT_CPLUS
;
429 comment
= COMMENT_NONE
;
434 /* Skips over a C style comment. According to ANSI specification a comment
435 * is treated as white space, so we perform this subsitution.
437 static int skipOverCComment (void)
447 const int next
= fileGetc ();
453 c
= SPACE
; /* replace comment with space */
461 /* Skips over a C++ style comment.
463 static int skipOverCplusComment (void)
467 while ((c
= fileGetc ()) != EOF
)
470 fileGetc (); /* throw away next character, too */
471 else if (c
== NEWLINE
)
477 /* Skips to the end of a string, returning a special character to
478 * symbolically represent a generic string.
480 static int skipToEndOfString (void)
484 while ((c
= fileGetc ()) != EOF
)
487 fileGetc (); /* throw away next character, too */
488 else if (c
== DOUBLE_QUOTE
)
491 return STRING_SYMBOL
; /* symbolic representation of string */
494 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
495 * special character to symbolically represent a generic character.
496 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
498 static int skipToEndOfChar (void)
501 int count
= 0, veraBase
= '\0';
503 while ((c
= fileGetc ()) != EOF
)
507 fileGetc (); /* throw away next character, too */
508 else if (c
== SINGLE_QUOTE
)
510 else if (c
== NEWLINE
)
515 else if (count
== 1 && strchr ("DHOB", toupper (c
)) != NULL
)
517 else if (veraBase
!= '\0' && ! isalnum (c
))
523 return CHAR_SYMBOL
; /* symbolic representation of character */
526 /* This function returns the next character, stripping out comments,
527 * C pre-processor directives, and the contents of single and double
528 * quoted strings. In short, strip anything which places a burden upon
531 extern int cppGetc (void)
533 boolean directive
= FALSE
;
534 boolean ignore
= FALSE
;
537 if (Cpp
.ungetch
!= '\0')
540 Cpp
.ungetch
= Cpp
.ungetch2
;
542 return c
; /* return here to avoid re-calling debugPutc () */
557 break; /* ignore most white space */
560 if (directive
&& ! ignore
)
562 Cpp
.directive
.accept
= TRUE
;
566 Cpp
.directive
.accept
= FALSE
;
567 c
= skipToEndOfString ();
571 if (Cpp
.directive
.accept
)
574 Cpp
.directive
.state
= DRCTV_HASH
;
575 Cpp
.directive
.accept
= FALSE
;
580 Cpp
.directive
.accept
= FALSE
;
581 c
= skipToEndOfChar ();
586 const Comment comment
= isComment ();
588 if (comment
== COMMENT_C
)
589 c
= skipOverCComment ();
590 else if (comment
== COMMENT_CPLUS
)
592 c
= skipOverCplusComment ();
597 Cpp
.directive
.accept
= FALSE
;
603 int next
= fileGetc ();
607 else if (next
== '?')
616 int next
= fileGetc ();
624 case '(': c
= '['; break;
625 case ')': c
= ']'; break;
626 case '<': c
= '{'; break;
627 case '>': c
= '}'; break;
628 case '/': c
= BACKSLASH
; goto process
;
629 case '!': c
= '|'; break;
630 case SINGLE_QUOTE
: c
= '^'; break;
631 case '-': c
= '~'; break;
632 case '=': c
= '#'; goto process
;
642 Cpp
.directive
.accept
= FALSE
;
644 ignore
= handleDirective (c
);
647 } while (directive
|| ignore
);
649 DebugStatement ( debugPutc (DEBUG_CPP
, c
); )
650 DebugStatement ( if (c
== NEWLINE
)
651 debugPrintf (DEBUG_CPP
, "%6ld: ", getInputLineNumber () + 1); )
656 extern char *getArglistFromBufferPos(int startPosition
, const char *tokenName
)
658 int bufferOriginalPosition
;
660 char *arglist
= NULL
;
666 bufferOriginalPosition
= getBufPos ();
667 setBufPos(startPosition
);
668 pos1
= File
.fpBufferPosition
;
675 result
= (char *) malloc(sizeof(char ) * (pos2
- pos1
+ 2));
678 memcpy(result
, &File
.fpBuffer
[getBufPos()], pos2
- pos1
+ 1);
679 result
[pos2
-pos1
+1] = '\0';
680 arglist
= getArglistFromStr(result
, tokenName
);
684 setBufPos (bufferOriginalPosition
);
688 extern char *getArglistFromFilePos(fpos_t startPosition
, const char *tokenName
)
690 fpos_t originalPosition
;
692 char *arglist
= NULL
;
695 pos2
= ftell(File
.fp
);
697 fgetpos(File
.fp
, &originalPosition
);
698 fsetpos(File
.fp
, &startPosition
);
699 pos1
= ftell(File
.fp
);
703 result
= (char *) malloc(sizeof(char ) * (pos2
- pos1
+ 2));
706 fread(result
, sizeof(char), pos2
- pos1
+ 1, File
.fp
);
707 result
[pos2
-pos1
+1] = '\0';
708 arglist
= getArglistFromStr(result
, tokenName
);
712 fsetpos(File
.fp
, &originalPosition
);
726 static void stripCodeBuffer(char *buf
)
729 ParseState state
= st_none_t
, prev_state
= st_none_t
;
731 while (buf
[i
] != '\0')
736 if (st_none_t
== state
)
738 /* Check if this is the start of a comment */
739 if (buf
[i
+1] == '*') /* C comment */
740 state
= st_c_comment_t
;
741 else if (buf
[i
+1] == '/') /* C++ comment */
742 state
= st_cpp_comment_t
;
743 else /* Normal character */
746 else if (st_c_comment_t
== state
)
748 /* Check if this is the end of a C comment */
751 if ((pos
> 0) && (buf
[pos
-1] != ' '))
758 if (st_none_t
== state
)
759 state
= st_double_quote_t
;
760 else if (st_double_quote_t
== state
)
764 if (st_none_t
== state
)
765 state
= st_single_quote_t
;
766 else if (st_single_quote_t
== state
)
770 if ((buf
[i
] == '\\') && (st_escape_t
!= state
))
775 else if (st_escape_t
== state
)
778 prev_state
= st_none_t
;
780 else if ((buf
[i
] == '\n') && (st_cpp_comment_t
== state
))
782 if ((pos
> 0) && (buf
[pos
-1] != ' '))
786 else if (st_none_t
== state
)
790 if ((pos
> 0) && (buf
[pos
-1] != ' '))
804 extern char *getArglistFromStr(char *buf
, const char *name
)
808 if ((NULL
== buf
) || (NULL
== name
) || ('\0' == name
[0]))
810 stripCodeBuffer(buf
);
811 if (NULL
== (start
= strstr(buf
, name
)))
813 if (NULL
== (start
= strchr(start
, '(')))
815 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
819 else if ('(' == *end
)
821 else if (')' == *end
)
825 return strdup(start
);
829 /* vi:set tabstop=4 shiftwidth=4: */