2 * Copyright (c) 1996-2002, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains the high level source read functions (preprocessor
8 * directives are handled within this level).
14 #include "general.h" /* must always come first */
28 #define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
29 #define isspacetab(c) ((c) == SPACE || (c) == TAB)
34 typedef enum { COMMENT_NONE
, COMMENT_C
, COMMENT_CPLUS
, COMMENT_D
} Comment
;
37 MaxCppNestingLevel
= 20,
41 /* Defines the one nesting level of a preprocessor conditional.
43 typedef struct sConditionalInfo
{
44 boolean ignoreAllBranches
; /* ignoring parent conditional branch */
45 boolean singleBranch
; /* choose only one branch */
46 boolean branchChosen
; /* branch already selected */
47 boolean ignoring
; /* current ignore state */
51 DRCTV_NONE
, /* no known directive - ignore to end of line */
52 DRCTV_DEFINE
, /* "#define" encountered */
53 DRCTV_HASH
, /* initial '#' read; determine directive */
54 DRCTV_IF
, /* "#if" or "#ifdef" encountered */
55 DRCTV_PRAGMA
, /* #pragma encountered */
56 DRCTV_UNDEF
/* "#undef" encountered */
59 /* Defines the current state of the pre-processor.
61 typedef struct sCppState
{
62 int ungetch
, ungetch2
; /* ungotten characters, if any */
63 boolean resolveRequired
; /* must resolve if/else/elif/endif branch */
64 boolean hasAtLiteralStrings
; /* supports @"c:\" strings */
65 boolean hasCxxRawLiteralStrings
; /* supports R"xxx(...)xxx" strings */
67 enum eState state
; /* current directive being processed */
68 boolean accept
; /* is a directive syntactically permitted? */
69 vString
* name
; /* macro name */
70 unsigned int nestLevel
; /* level 0 is not used */
71 conditionalInfo ifdef
[MaxCppNestingLevel
];
79 /* Use brace formatting to detect end of block.
81 static boolean BraceFormat
= FALSE
;
83 static cppState Cpp
= {
84 '\0', '\0', /* ungetch characters */
85 FALSE
, /* resolveRequired */
86 FALSE
, /* hasAtLiteralStrings */
87 FALSE
, /* hasCxxRawLiteralStrings */
89 DRCTV_NONE
, /* state */
93 { {FALSE
,FALSE
,FALSE
,FALSE
} } /* ifdef array */
98 * FUNCTION DEFINITIONS
101 extern boolean
isBraceFormat (void)
106 extern unsigned int getDirectiveNestLevel (void)
108 return Cpp
.directive
.nestLevel
;
111 extern void cppInit (const boolean state
, const boolean hasAtLiteralStrings
,
112 const boolean hasCxxRawLiteralStrings
)
118 Cpp
.resolveRequired
= FALSE
;
119 Cpp
.hasAtLiteralStrings
= hasAtLiteralStrings
;
120 Cpp
.hasCxxRawLiteralStrings
= hasCxxRawLiteralStrings
;
122 Cpp
.directive
.state
= DRCTV_NONE
;
123 Cpp
.directive
.accept
= TRUE
;
124 Cpp
.directive
.nestLevel
= 0;
126 Cpp
.directive
.ifdef
[0].ignoreAllBranches
= FALSE
;
127 Cpp
.directive
.ifdef
[0].singleBranch
= FALSE
;
128 Cpp
.directive
.ifdef
[0].branchChosen
= FALSE
;
129 Cpp
.directive
.ifdef
[0].ignoring
= FALSE
;
131 if (Cpp
.directive
.name
== NULL
)
132 Cpp
.directive
.name
= vStringNew ();
134 vStringClear (Cpp
.directive
.name
);
137 extern void cppTerminate (void)
139 if (Cpp
.directive
.name
!= NULL
)
141 vStringDelete (Cpp
.directive
.name
);
142 Cpp
.directive
.name
= NULL
;
146 extern void cppBeginStatement (void)
148 Cpp
.resolveRequired
= TRUE
;
151 extern void cppEndStatement (void)
153 Cpp
.resolveRequired
= FALSE
;
159 * This section handles preprocessor directives. It strips out all
160 * directives and may emit a tag for #define directives.
163 /* This puts a character back into the input queue for the source File.
164 * Up to two characters may be ungotten.
166 extern void cppUngetc (const int c
)
168 Assert (Cpp
.ungetch2
== '\0');
169 Cpp
.ungetch2
= Cpp
.ungetch
;
173 /* Reads a directive, whose first character is given by "c", into "name".
175 static boolean
readDirective (int c
, char *const name
, unsigned int maxLength
)
179 for (i
= 0 ; i
< maxLength
- 1 ; ++i
)
183 c
= getcFromInputFile ();
184 if (c
== EOF
|| ! isalpha (c
))
186 ungetcToInputFile (c
);
192 name
[i
] = '\0'; /* null terminate */
194 return (boolean
) isspacetab (c
);
197 /* Reads an identifier, whose first character is given by "c", into "tag",
198 * together with the file location and corresponding line number.
200 static void readIdentifier (int c
, vString
*const name
)
205 vStringPut (name
, c
);
206 c
= getcFromInputFile ();
207 } while (c
!= EOF
&& isident (c
));
208 ungetcToInputFile (c
);
209 vStringTerminate (name
);
212 static conditionalInfo
*currentConditional (void)
214 return &Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
];
217 static boolean
isIgnore (void)
219 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
;
222 static boolean
setIgnore (const boolean ignore
)
224 return Cpp
.directive
.ifdef
[Cpp
.directive
.nestLevel
].ignoring
= ignore
;
227 static boolean
isIgnoreBranch (void)
229 conditionalInfo
*const ifdef
= currentConditional ();
231 /* Force a single branch if an incomplete statement is discovered
232 * en route. This may have allowed earlier branches containing complete
233 * statements to be followed, but we must follow no further branches.
235 if (Cpp
.resolveRequired
&& ! BraceFormat
)
236 ifdef
->singleBranch
= TRUE
;
238 /* We will ignore this branch in the following cases:
240 * 1. We are ignoring all branches (conditional was within an ignored
241 * branch of the parent conditional)
242 * 2. A branch has already been chosen and either of:
243 * a. A statement was incomplete upon entering the conditional
244 * b. A statement is incomplete upon encountering a branch
246 return (boolean
) (ifdef
->ignoreAllBranches
||
247 (ifdef
->branchChosen
&& ifdef
->singleBranch
));
250 static void chooseBranch (void)
254 conditionalInfo
*const ifdef
= currentConditional ();
256 ifdef
->branchChosen
= (boolean
) (ifdef
->singleBranch
||
257 Cpp
.resolveRequired
);
261 /* Pushes one nesting level for an #if directive, indicating whether or not
262 * the branch should be ignored and whether a branch has already been chosen.
264 static boolean
pushConditional (const boolean firstBranchChosen
)
266 const boolean ignoreAllBranches
= isIgnore (); /* current ignore */
267 boolean ignoreBranch
= FALSE
;
269 if (Cpp
.directive
.nestLevel
< (unsigned int) MaxCppNestingLevel
- 1)
271 conditionalInfo
*ifdef
;
273 ++Cpp
.directive
.nestLevel
;
274 ifdef
= currentConditional ();
276 /* We take a snapshot of whether there is an incomplete statement in
277 * progress upon encountering the preprocessor conditional. If so,
278 * then we will flag that only a single branch of the conditional
279 * should be followed.
281 ifdef
->ignoreAllBranches
= ignoreAllBranches
;
282 ifdef
->singleBranch
= Cpp
.resolveRequired
;
283 ifdef
->branchChosen
= firstBranchChosen
;
284 ifdef
->ignoring
= (boolean
) (ignoreAllBranches
|| (
285 ! firstBranchChosen
&& ! BraceFormat
&&
286 (ifdef
->singleBranch
|| !Option
.if0
)));
287 ignoreBranch
= ifdef
->ignoring
;
292 /* Pops one nesting level for an #endif directive.
294 static boolean
popConditional (void)
296 if (Cpp
.directive
.nestLevel
> 0)
297 --Cpp
.directive
.nestLevel
;
302 static void makeDefineTag (const char *const name
, boolean parameterized
)
304 const boolean isFileScope
= (boolean
) (! isHeaderFile ());
306 if (includingDefineTags () &&
307 (! isFileScope
|| Option
.include
.fileScope
))
311 initTagEntry (&e
, name
);
313 e
.lineNumberEntry
= (boolean
) (Option
.locate
!= EX_PATTERN
);
314 e
.isFileScope
= isFileScope
;
315 e
.truncateLine
= TRUE
;
316 e
.kindName
= "macro";
320 e
.extensionFields
.signature
= getArglistFromFilePos(getInputFilePosition()
325 free((char *) e
.extensionFields
.signature
);
329 static void directiveDefine (const int c
)
331 boolean parameterized
;
336 readIdentifier (c
, Cpp
.directive
.name
);
337 nc
= getcFromInputFile ();
338 ungetcToInputFile (nc
);
339 parameterized
= (boolean
) (nc
== '(');
341 makeDefineTag (vStringValue (Cpp
.directive
.name
), parameterized
);
343 Cpp
.directive
.state
= DRCTV_NONE
;
346 static void directivePragma (int c
)
350 readIdentifier (c
, Cpp
.directive
.name
);
351 if (stringMatch (vStringValue (Cpp
.directive
.name
), "weak"))
353 /* generate macro tag for weak name */
356 c
= getcFromInputFile ();
357 } while (c
== SPACE
);
360 readIdentifier (c
, Cpp
.directive
.name
);
361 makeDefineTag (vStringValue (Cpp
.directive
.name
), FALSE
);
365 Cpp
.directive
.state
= DRCTV_NONE
;
368 static boolean
directiveIf (const int c
)
370 const boolean ignore
= pushConditional ((boolean
) (c
!= '0'));
372 Cpp
.directive
.state
= DRCTV_NONE
;
377 static boolean
directiveHash (const int c
)
379 boolean ignore
= FALSE
;
380 char directive
[MaxDirectiveName
];
381 DebugStatement ( const boolean ignore0
= isIgnore (); )
383 readDirective (c
, directive
, MaxDirectiveName
);
384 if (stringMatch (directive
, "define"))
385 Cpp
.directive
.state
= DRCTV_DEFINE
;
386 else if (stringMatch (directive
, "undef"))
387 Cpp
.directive
.state
= DRCTV_UNDEF
;
388 else if (strncmp (directive
, "if", (size_t) 2) == 0)
389 Cpp
.directive
.state
= DRCTV_IF
;
390 else if (stringMatch (directive
, "elif") ||
391 stringMatch (directive
, "else"))
393 ignore
= setIgnore (isIgnoreBranch ());
394 if (! ignore
&& stringMatch (directive
, "else"))
396 Cpp
.directive
.state
= DRCTV_NONE
;
397 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
399 else if (stringMatch (directive
, "endif"))
401 DebugStatement ( debugCppNest (FALSE
, Cpp
.directive
.nestLevel
); )
402 ignore
= popConditional ();
403 Cpp
.directive
.state
= DRCTV_NONE
;
404 DebugStatement ( if (ignore
!= ignore0
) debugCppIgnore (ignore
); )
406 else if (stringMatch (directive
, "pragma"))
407 Cpp
.directive
.state
= DRCTV_PRAGMA
;
409 Cpp
.directive
.state
= DRCTV_NONE
;
414 /* Handles a pre-processor directive whose first character is given by "c".
416 static boolean
handleDirective (const int c
)
418 boolean ignore
= isIgnore ();
420 switch (Cpp
.directive
.state
)
422 case DRCTV_NONE
: ignore
= isIgnore (); break;
423 case DRCTV_DEFINE
: directiveDefine (c
); break;
424 case DRCTV_HASH
: ignore
= directiveHash (c
); break;
425 case DRCTV_IF
: ignore
= directiveIf (c
); break;
426 case DRCTV_PRAGMA
: directivePragma (c
); break;
427 case DRCTV_UNDEF
: directiveDefine (c
); break;
432 /* Called upon reading of a slash ('/') characters, determines whether a
433 * comment is encountered, and its type.
435 static Comment
isComment (void)
438 const int next
= getcFromInputFile ();
442 else if (next
== '/')
443 comment
= COMMENT_CPLUS
;
444 else if (next
== '+')
448 ungetcToInputFile (next
);
449 comment
= COMMENT_NONE
;
454 /* Skips over a C style comment. According to ANSI specification a comment
455 * is treated as white space, so we perform this substitution.
457 int skipOverCComment (void)
459 int c
= getcFromInputFile ();
464 c
= getcFromInputFile ();
467 const int next
= getcFromInputFile ();
473 c
= SPACE
; /* replace comment with space */
481 /* Skips over a C++ style comment.
483 static int skipOverCplusComment (void)
487 while ((c
= getcFromInputFile ()) != EOF
)
490 getcFromInputFile (); /* throw away next character, too */
491 else if (c
== NEWLINE
)
497 /* Skips over a D style comment.
498 * Really we should match nested /+ comments. At least they're less common.
500 static int skipOverDComment (void)
502 int c
= getcFromInputFile ();
507 c
= getcFromInputFile ();
510 const int next
= getcFromInputFile ();
516 c
= SPACE
; /* replace comment with space */
524 /* Skips to the end of a string, returning a special character to
525 * symbolically represent a generic string.
527 static int skipToEndOfString (boolean ignoreBackslash
)
531 while ((c
= getcFromInputFile ()) != EOF
)
533 if (c
== BACKSLASH
&& ! ignoreBackslash
)
534 getcFromInputFile (); /* throw away next character, too */
535 else if (c
== DOUBLE_QUOTE
)
538 return STRING_SYMBOL
; /* symbolic representation of string */
541 static int isCxxRawLiteralDelimiterChar (int c
)
543 return (c
!= ' ' && c
!= '\f' && c
!= '\n' && c
!= '\r' && c
!= '\t' && c
!= '\v' &&
544 c
!= '(' && c
!= ')' && c
!= '\\');
547 static int skipToEndOfCxxRawLiteralString (void)
549 int c
= getcFromInputFile ();
551 if (c
!= '(' && ! isCxxRawLiteralDelimiterChar (c
))
553 ungetcToInputFile (c
);
554 c
= skipToEndOfString (FALSE
);
559 unsigned int delimLen
= 0;
560 boolean collectDelim
= TRUE
;
566 if (isCxxRawLiteralDelimiterChar (c
) &&
567 delimLen
< (sizeof delim
/ sizeof *delim
))
568 delim
[delimLen
++] = c
;
570 collectDelim
= FALSE
;
576 while ((c
= getcFromInputFile ()) != EOF
&& i
< delimLen
&& delim
[i
] == c
)
578 if (i
== delimLen
&& c
== DOUBLE_QUOTE
)
581 ungetcToInputFile (c
);
584 while ((c
= getcFromInputFile ()) != EOF
);
590 /* Skips to the end of the three (possibly four) 'c' sequence, returning a
591 * special character to symbolically represent a generic character.
592 * Also detects Vera numbers that include a base specifier (ie. 'b1010).
594 static int skipToEndOfChar (void)
597 int count
= 0, veraBase
= '\0';
599 while ((c
= getcFromInputFile ()) != EOF
)
603 getcFromInputFile (); /* throw away next character, too */
604 else if (c
== SINGLE_QUOTE
)
606 else if (c
== NEWLINE
)
608 ungetcToInputFile (c
);
611 else if (count
== 1 && strchr ("DHOB", toupper (c
)) != NULL
)
613 else if (veraBase
!= '\0' && ! isalnum (c
))
615 ungetcToInputFile (c
);
619 return CHAR_SYMBOL
; /* symbolic representation of character */
622 /* This function returns the next character, stripping out comments,
623 * C pre-processor directives, and the contents of single and double
624 * quoted strings. In short, strip anything which places a burden upon
627 extern int cppGetc (void)
629 boolean directive
= FALSE
;
630 boolean ignore
= FALSE
;
633 if (Cpp
.ungetch
!= '\0')
636 Cpp
.ungetch
= Cpp
.ungetch2
;
638 return c
; /* return here to avoid re-calling debugPutc () */
642 c
= getcFromInputFile ();
653 break; /* ignore most white space */
656 if (directive
&& ! ignore
)
658 Cpp
.directive
.accept
= TRUE
;
662 Cpp
.directive
.accept
= FALSE
;
663 c
= skipToEndOfString (FALSE
);
667 if (Cpp
.directive
.accept
)
670 Cpp
.directive
.state
= DRCTV_HASH
;
671 Cpp
.directive
.accept
= FALSE
;
676 Cpp
.directive
.accept
= FALSE
;
677 c
= skipToEndOfChar ();
682 const Comment comment
= isComment ();
684 if (comment
== COMMENT_C
)
685 c
= skipOverCComment ();
686 else if (comment
== COMMENT_CPLUS
)
688 c
= skipOverCplusComment ();
690 ungetcToInputFile (c
);
692 else if (comment
== COMMENT_D
)
693 c
= skipOverDComment ();
695 Cpp
.directive
.accept
= FALSE
;
701 int next
= getcFromInputFile ();
706 ungetcToInputFile (next
);
712 int next
= getcFromInputFile ();
714 ungetcToInputFile (next
);
717 next
= getcFromInputFile ();
720 case '(': c
= '['; break;
721 case ')': c
= ']'; break;
722 case '<': c
= '{'; break;
723 case '>': c
= '}'; break;
724 case '/': c
= BACKSLASH
; goto process
;
725 case '!': c
= '|'; break;
726 case SINGLE_QUOTE
: c
= '^'; break;
727 case '-': c
= '~'; break;
728 case '=': c
= '#'; goto process
;
730 ungetcToInputFile ('?');
731 ungetcToInputFile (next
);
738 * input: <: :> <% %> %: %:%:
739 * output: [ ] { } # ##
743 int next
= getcFromInputFile ();
746 case ':': c
= '['; break;
747 case '%': c
= '{'; break;
748 default: ungetcToInputFile (next
);
754 int next
= getcFromInputFile ();
758 ungetcToInputFile (next
);
763 int next
= getcFromInputFile ();
766 case '>': c
= '}'; break;
767 case ':': c
= '#'; goto process
;
768 default: ungetcToInputFile (next
);
774 if (c
== '@' && Cpp
.hasAtLiteralStrings
)
776 int next
= getcFromInputFile ();
777 if (next
== DOUBLE_QUOTE
)
779 Cpp
.directive
.accept
= FALSE
;
780 c
= skipToEndOfString (TRUE
);
784 ungetcToInputFile (next
);
786 else if (c
== 'R' && Cpp
.hasCxxRawLiteralStrings
)
788 /* OMG!11 HACK!!11 Get the previous character.
790 * We need to know whether the previous character was an identifier or not,
791 * because "R" has to be on its own, not part of an identifier. This allows
792 * for constructs like:
795 * const char *p = FOUR"5";
797 * which is not a raw literal, but a preprocessor concatenation.
804 * which is perfectly valid (yet probably very unlikely). */
805 int prev
= fileGetNthPrevC (1, '\0');
806 int prev2
= fileGetNthPrevC (2, '\0');
807 int prev3
= fileGetNthPrevC (3, '\0');
809 if (! isident (prev
) ||
810 (! isident (prev2
) && (prev
== 'L' || prev
== 'u' || prev
== 'U')) ||
811 (! isident (prev3
) && (prev2
== 'u' && prev
== '8')))
813 int next
= getcFromInputFile ();
814 if (next
!= DOUBLE_QUOTE
)
815 ungetcToInputFile (next
);
818 Cpp
.directive
.accept
= FALSE
;
819 c
= skipToEndOfCxxRawLiteralString ();
825 Cpp
.directive
.accept
= FALSE
;
827 ignore
= handleDirective (c
);
830 } while (directive
|| ignore
);
832 DebugStatement ( debugPutc (DEBUG_CPP
, c
); )
833 DebugStatement ( if (c
== NEWLINE
)
834 debugPrintf (DEBUG_CPP
, "%6ld: ", getInputLineNumber () + 1); )
839 extern char *getArglistFromFilePos(MIOPos startPosition
, const char *tokenName
)
841 MIOPos originalPosition
;
843 char *arglist
= NULL
;
846 pos2
= mio_tell(File
.fp
);
848 mio_getpos(File
.fp
, &originalPosition
);
849 mio_setpos(File
.fp
, &startPosition
);
850 pos1
= mio_tell(File
.fp
);
854 size_t len
= pos2
- pos1
;
856 result
= (char *) g_malloc(len
+ 1);
857 if (result
!= NULL
&& (len
= mio_read(File
.fp
, result
, 1, len
)) > 0)
860 arglist
= getArglistFromStr(result
, tokenName
);
864 mio_setpos(File
.fp
, &originalPosition
);
878 static void stripCodeBuffer(char *buf
)
881 ParseState state
= st_none_t
, prev_state
= st_none_t
;
883 while (buf
[i
] != '\0')
888 if (st_none_t
== state
)
890 /* Check if this is the start of a comment */
891 if (buf
[i
+1] == '*') /* C comment */
892 state
= st_c_comment_t
;
893 else if (buf
[i
+1] == '/') /* C++ comment */
894 state
= st_cpp_comment_t
;
895 else /* Normal character */
898 else if (st_c_comment_t
== state
)
900 /* Check if this is the end of a C comment */
903 if ((pos
> 0) && (buf
[pos
-1] != ' '))
910 if (st_none_t
== state
)
911 state
= st_double_quote_t
;
912 else if (st_double_quote_t
== state
)
916 if (st_none_t
== state
)
917 state
= st_single_quote_t
;
918 else if (st_single_quote_t
== state
)
922 if ((buf
[i
] == '\\') && (st_escape_t
!= state
))
927 else if (st_escape_t
== state
)
930 prev_state
= st_none_t
;
932 else if ((buf
[i
] == '\n') && (st_cpp_comment_t
== state
))
934 if ((pos
> 0) && (buf
[pos
-1] != ' '))
938 else if (st_none_t
== state
)
942 if ((pos
> 0) && (buf
[pos
-1] != ' '))
956 extern char *getArglistFromStr(char *buf
, const char *name
)
960 if ((NULL
== buf
) || (NULL
== name
) || ('\0' == name
[0]))
962 stripCodeBuffer(buf
);
963 if (NULL
== (start
= strstr(buf
, name
)))
965 if (NULL
== (start
= strchr(start
, '(')))
967 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
971 else if ('(' == *end
)
973 else if (')' == *end
)
977 return strdup(start
);
980 /* vi:set tabstop=4 shiftwidth=4: */