1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the Preprocessor interface.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
17 #include "clang/Lex/MacroInfo.h"
18 #include "clang/Lex/Lexer.h"
19 #include "clang/Lex/PTHLexer.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/TokenLexer.h"
22 #include "clang/Lex/PTHManager.h"
23 #include "clang/Basic/Builtins.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/IdentifierTable.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/OwningPtr.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Allocator.h"
36 class ExternalPreprocessorSource
;
38 class FileSystemOptions
;
41 class PragmaNamespace
;
47 class CodeCompletionHandler
;
48 class DirectoryLookup
;
49 class PreprocessingRecord
;
50 class PPMacroExpansionTrap
;
52 /// Preprocessor - This object engages in a tight little dance with the lexer to
53 /// efficiently preprocess tokens. Lexers know only about tokens within a
54 /// single source file, and don't know anything about preprocessor-level issues
55 /// like the #include stack, token expansion, etc.
60 const TargetInfo
&Target
;
62 const FileSystemOptions
&FileSystemOpts
;
63 SourceManager
&SourceMgr
;
64 ScratchBuffer
*ScratchBuf
;
65 HeaderSearch
&HeaderInfo
;
67 /// \brief External source of macros.
68 ExternalPreprocessorSource
*ExternalSource
;
70 /// PTH - An optional PTHManager object used for getting tokens from
71 /// a token cache rather than lexing the original source file.
72 llvm::OwningPtr
<PTHManager
> PTH
;
74 /// BP - A BumpPtrAllocator object used to quickly allocate and release
75 /// objects internal to the Preprocessor.
76 llvm::BumpPtrAllocator BP
;
78 /// Identifiers for builtin macros and other builtins.
79 IdentifierInfo
*Ident__LINE__
, *Ident__FILE__
; // __LINE__, __FILE__
80 IdentifierInfo
*Ident__DATE__
, *Ident__TIME__
; // __DATE__, __TIME__
81 IdentifierInfo
*Ident__INCLUDE_LEVEL__
; // __INCLUDE_LEVEL__
82 IdentifierInfo
*Ident__BASE_FILE__
; // __BASE_FILE__
83 IdentifierInfo
*Ident__TIMESTAMP__
; // __TIMESTAMP__
84 IdentifierInfo
*Ident__COUNTER__
; // __COUNTER__
85 IdentifierInfo
*Ident_Pragma
, *Ident__pragma
; // _Pragma, __pragma
86 IdentifierInfo
*Ident__VA_ARGS__
; // __VA_ARGS__
87 IdentifierInfo
*Ident__has_feature
; // __has_feature
88 IdentifierInfo
*Ident__has_builtin
; // __has_builtin
89 IdentifierInfo
*Ident__has_attribute
; // __has_attribute
90 IdentifierInfo
*Ident__has_include
; // __has_include
91 IdentifierInfo
*Ident__has_include_next
; // __has_include_next
93 SourceLocation DATELoc
, TIMELoc
;
94 unsigned CounterValue
; // Next __COUNTER__ value.
97 /// MaxIncludeStackDepth - Maximum depth of #includes.
98 MaxAllowedIncludeStackDepth
= 200
101 // State that is set before the preprocessor begins.
102 bool KeepComments
: 1;
103 bool KeepMacroComments
: 1;
105 // State that changes while the preprocessor runs:
106 bool InMacroArgs
: 1; // True if parsing fn macro invocation args.
108 /// Whether the preprocessor owns the header search object.
109 bool OwnsHeaderSearch
: 1;
111 /// DisableMacroExpansion - True if macro expansion is disabled.
112 bool DisableMacroExpansion
: 1;
114 /// \brief This is set to true when a macro is expanded.
115 /// Used by PPMacroExpansionTrap.
116 bool MacroExpansionFlag
: 1;
117 friend class PPMacroExpansionTrap
;
119 /// \brief Whether we have already loaded macros from the external source.
120 mutable bool ReadMacrosFromExternalSource
: 1;
122 /// Identifiers - This is mapping/lookup information for all identifiers in
123 /// the program, including program keywords.
124 mutable IdentifierTable Identifiers
;
126 /// Selectors - This table contains all the selectors in the program. Unlike
127 /// IdentifierTable above, this table *isn't* populated by the preprocessor.
128 /// It is declared/instantiated here because it's role/lifetime is
129 /// conceptually similar the IdentifierTable. In addition, the current control
130 /// flow (in clang::ParseAST()), make it convenient to put here.
131 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
132 /// the lifetime of the preprocessor.
133 SelectorTable Selectors
;
135 /// BuiltinInfo - Information about builtins.
136 Builtin::Context BuiltinInfo
;
138 /// PragmaHandlers - This tracks all of the pragmas that the client registered
139 /// with this preprocessor.
140 PragmaNamespace
*PragmaHandlers
;
142 /// \brief Tracks all of the comment handlers that the client registered
143 /// with this preprocessor.
144 std::vector
<CommentHandler
*> CommentHandlers
;
146 /// \brief The code-completion handler.
147 CodeCompletionHandler
*CodeComplete
;
149 /// \brief The file that we're performing code-completion for, if any.
150 const FileEntry
*CodeCompletionFile
;
152 /// \brief The number of bytes that we will initially skip when entering the
153 /// main file, which is used when loading a precompiled preamble, along
154 /// with a flag that indicates whether skipping this number of bytes will
155 /// place the lexer at the start of a line.
156 std::pair
<unsigned, bool> SkipMainFilePreamble
;
158 /// CurLexer - This is the current top of the stack that we're lexing from if
159 /// not expanding a macro and we are lexing directly from source code.
160 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
161 llvm::OwningPtr
<Lexer
> CurLexer
;
163 /// CurPTHLexer - This is the current top of stack that we're lexing from if
164 /// not expanding from a macro and we are lexing from a PTH cache.
165 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
166 llvm::OwningPtr
<PTHLexer
> CurPTHLexer
;
168 /// CurPPLexer - This is the current top of the stack what we're lexing from
169 /// if not expanding a macro. This is an alias for either CurLexer or
171 PreprocessorLexer
*CurPPLexer
;
173 /// CurLookup - The DirectoryLookup structure used to find the current
174 /// FileEntry, if CurLexer is non-null and if applicable. This allows us to
175 /// implement #include_next and find directory-specific properties.
176 const DirectoryLookup
*CurDirLookup
;
178 /// CurTokenLexer - This is the current macro we are expanding, if we are
179 /// expanding a macro. One of CurLexer and CurTokenLexer must be null.
180 llvm::OwningPtr
<TokenLexer
> CurTokenLexer
;
182 /// IncludeMacroStack - This keeps track of the stack of files currently
183 /// #included, and macros currently being expanded from, not counting
184 /// CurLexer/CurTokenLexer.
185 struct IncludeStackInfo
{
187 PTHLexer
*ThePTHLexer
;
188 PreprocessorLexer
*ThePPLexer
;
189 TokenLexer
*TheTokenLexer
;
190 const DirectoryLookup
*TheDirLookup
;
192 IncludeStackInfo(Lexer
*L
, PTHLexer
* P
, PreprocessorLexer
* PPL
,
193 TokenLexer
* TL
, const DirectoryLookup
*D
)
194 : TheLexer(L
), ThePTHLexer(P
), ThePPLexer(PPL
), TheTokenLexer(TL
),
197 std::vector
<IncludeStackInfo
> IncludeMacroStack
;
199 /// Callbacks - These are actions invoked when some preprocessor activity is
200 /// encountered (e.g. a file is #included, etc).
201 PPCallbacks
*Callbacks
;
203 /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
204 /// to the actual definition of the macro.
205 llvm::DenseMap
<IdentifierInfo
*, MacroInfo
*> Macros
;
207 /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
208 /// reused for quick allocation.
209 MacroArgs
*MacroArgCache
;
210 friend class MacroArgs
;
212 /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
213 /// push_macro directive, we keep a MacroInfo stack used to restore
214 /// previous macro value.
215 llvm::DenseMap
<IdentifierInfo
*, std::vector
<MacroInfo
*> > PragmaPushMacroInfo
;
217 // Various statistics we track for performance analysis.
218 unsigned NumDirectives
, NumIncluded
, NumDefined
, NumUndefined
, NumPragma
;
219 unsigned NumIf
, NumElse
, NumEndif
;
220 unsigned NumEnteredSourceFiles
, MaxIncludeStackDepth
;
221 unsigned NumMacroExpanded
, NumFnMacroExpanded
, NumBuiltinMacroExpanded
;
222 unsigned NumFastMacroExpanded
, NumTokenPaste
, NumFastTokenPaste
;
225 /// Predefines - This string is the predefined macros that preprocessor
226 /// should use from the command line etc.
227 std::string Predefines
;
229 /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
230 enum { TokenLexerCacheSize
= 8 };
231 unsigned NumCachedTokenLexers
;
232 TokenLexer
*TokenLexerCache
[TokenLexerCacheSize
];
234 /// \brief A record of the macro definitions and instantiations that
235 /// occurred during preprocessing.
237 /// This is an optional side structure that can be enabled with
238 /// \c createPreprocessingRecord() prior to preprocessing.
239 PreprocessingRecord
*Record
;
241 private: // Cached tokens state.
242 typedef llvm::SmallVector
<Token
, 1> CachedTokensTy
;
244 /// CachedTokens - Cached tokens are stored here when we do backtracking or
245 /// lookahead. They are "lexed" by the CachingLex() method.
246 CachedTokensTy CachedTokens
;
248 /// CachedLexPos - The position of the cached token that CachingLex() should
249 /// "lex" next. If it points beyond the CachedTokens vector, it means that
250 /// a normal Lex() should be invoked.
251 CachedTokensTy::size_type CachedLexPos
;
253 /// BacktrackPositions - Stack of backtrack positions, allowing nested
254 /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
255 /// indicate where CachedLexPos should be set when the BackTrack() method is
256 /// invoked (at which point the last position is popped).
257 std::vector
<CachedTokensTy::size_type
> BacktrackPositions
;
259 struct MacroInfoChain
{
261 MacroInfoChain
*Next
;
262 MacroInfoChain
*Prev
;
265 /// MacroInfos are managed as a chain for easy disposal. This is the head
267 MacroInfoChain
*MIChainHead
;
269 /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
271 MacroInfoChain
*MICache
;
273 MacroInfo
*getInfoForMacro(IdentifierInfo
*II
) const;
276 Preprocessor(Diagnostic
&diags
, const LangOptions
&opts
,
277 const TargetInfo
&target
,
278 SourceManager
&SM
, HeaderSearch
&Headers
,
279 IdentifierInfoLookup
*IILookup
= 0,
280 bool OwnsHeaderSearch
= false);
284 Diagnostic
&getDiagnostics() const { return *Diags
; }
285 void setDiagnostics(Diagnostic
&D
) { Diags
= &D
; }
287 const LangOptions
&getLangOptions() const { return Features
; }
288 const TargetInfo
&getTargetInfo() const { return Target
; }
289 FileManager
&getFileManager() const { return FileMgr
; }
290 const FileSystemOptions
&getFileSystemOpts() const { return FileSystemOpts
; }
291 SourceManager
&getSourceManager() const { return SourceMgr
; }
292 HeaderSearch
&getHeaderSearchInfo() const { return HeaderInfo
; }
294 IdentifierTable
&getIdentifierTable() { return Identifiers
; }
295 SelectorTable
&getSelectorTable() { return Selectors
; }
296 Builtin::Context
&getBuiltinInfo() { return BuiltinInfo
; }
297 llvm::BumpPtrAllocator
&getPreprocessorAllocator() { return BP
; }
299 void setPTHManager(PTHManager
* pm
);
301 PTHManager
*getPTHManager() { return PTH
.get(); }
303 void setExternalSource(ExternalPreprocessorSource
*Source
) {
304 ExternalSource
= Source
;
307 ExternalPreprocessorSource
*getExternalSource() const {
308 return ExternalSource
;
311 /// SetCommentRetentionState - Control whether or not the preprocessor retains
312 /// comments in output.
313 void SetCommentRetentionState(bool KeepComments
, bool KeepMacroComments
) {
314 this->KeepComments
= KeepComments
| KeepMacroComments
;
315 this->KeepMacroComments
= KeepMacroComments
;
318 bool getCommentRetentionState() const { return KeepComments
; }
320 /// isCurrentLexer - Return true if we are lexing directly from the specified
322 bool isCurrentLexer(const PreprocessorLexer
*L
) const {
323 return CurPPLexer
== L
;
326 /// getCurrentLexer - Return the current lexer being lexed from. Note
327 /// that this ignores any potentially active macro expansions and _Pragma
328 /// expansions going on at the time.
329 PreprocessorLexer
*getCurrentLexer() const { return CurPPLexer
; }
331 /// getCurrentFileLexer - Return the current file lexer being lexed from.
332 /// Note that this ignores any potentially active macro expansions and _Pragma
333 /// expansions going on at the time.
334 PreprocessorLexer
*getCurrentFileLexer() const;
336 /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
337 /// Note that this class takes ownership of any PPCallbacks object given to
339 PPCallbacks
*getPPCallbacks() const { return Callbacks
; }
340 void addPPCallbacks(PPCallbacks
*C
) {
342 C
= new PPChainedCallbacks(C
, Callbacks
);
346 /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
347 /// or null if it isn't #define'd.
348 MacroInfo
*getMacroInfo(IdentifierInfo
*II
) const {
349 if (!II
->hasMacroDefinition())
352 return getInfoForMacro(II
);
355 /// setMacroInfo - Specify a macro for this identifier.
357 void setMacroInfo(IdentifierInfo
*II
, MacroInfo
*MI
);
359 /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
360 /// state of the macro table. This visits every currently-defined macro.
361 typedef llvm::DenseMap
<IdentifierInfo
*,
362 MacroInfo
*>::const_iterator macro_iterator
;
363 macro_iterator
macro_begin(bool IncludeExternalMacros
= true) const;
364 macro_iterator
macro_end(bool IncludeExternalMacros
= true) const;
366 const std::string
&getPredefines() const { return Predefines
; }
367 /// setPredefines - Set the predefines for this Preprocessor. These
368 /// predefines are automatically injected when parsing the main file.
369 void setPredefines(const char *P
) { Predefines
= P
; }
370 void setPredefines(const std::string
&P
) { Predefines
= P
; }
372 /// getIdentifierInfo - Return information about the specified preprocessor
373 /// identifier token. The version of this method that takes two character
374 /// pointers is preferred unless the identifier is already available as a
375 /// string (this avoids allocation and copying of memory to construct an
377 IdentifierInfo
*getIdentifierInfo(llvm::StringRef Name
) const {
378 return &Identifiers
.get(Name
);
381 /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
382 /// If 'Namespace' is non-null, then it is a token required to exist on the
383 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
384 void AddPragmaHandler(llvm::StringRef Namespace
, PragmaHandler
*Handler
);
385 void AddPragmaHandler(PragmaHandler
*Handler
) {
386 AddPragmaHandler(llvm::StringRef(), Handler
);
389 /// RemovePragmaHandler - Remove the specific pragma handler from
390 /// the preprocessor. If \arg Namespace is non-null, then it should
391 /// be the namespace that \arg Handler was added to. It is an error
392 /// to remove a handler that has not been registered.
393 void RemovePragmaHandler(llvm::StringRef Namespace
, PragmaHandler
*Handler
);
394 void RemovePragmaHandler(PragmaHandler
*Handler
) {
395 RemovePragmaHandler(llvm::StringRef(), Handler
);
398 /// \brief Add the specified comment handler to the preprocessor.
399 void AddCommentHandler(CommentHandler
*Handler
);
401 /// \brief Remove the specified comment handler.
403 /// It is an error to remove a handler that has not been registered.
404 void RemoveCommentHandler(CommentHandler
*Handler
);
406 /// \brief Set the code completion handler to the given object.
407 void setCodeCompletionHandler(CodeCompletionHandler
&Handler
) {
408 CodeComplete
= &Handler
;
411 /// \brief Retrieve the current code-completion handler.
412 CodeCompletionHandler
*getCodeCompletionHandler() const {
416 /// \brief Clear out the code completion handler.
417 void clearCodeCompletionHandler() {
421 /// \brief Hook used by the lexer to invoke the "natural language" code
422 /// completion point.
423 void CodeCompleteNaturalLanguage();
425 /// \brief Retrieve the preprocessing record, or NULL if there is no
426 /// preprocessing record.
427 PreprocessingRecord
*getPreprocessingRecord() const { return Record
; }
429 /// \brief Create a new preprocessing record, which will keep track of
430 /// all macro expansions, macro definitions, etc.
431 void createPreprocessingRecord();
433 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
434 /// which implicitly adds the builtin defines etc.
435 void EnterMainSourceFile();
437 /// EndSourceFile - Inform the preprocessor callbacks that processing is
439 void EndSourceFile();
441 /// EnterSourceFile - Add a source file to the top of the include stack and
442 /// start lexing tokens from it instead of the current buffer. Emit an error
443 /// and don't enter the file on error.
444 void EnterSourceFile(FileID CurFileID
, const DirectoryLookup
*Dir
,
447 /// EnterMacro - Add a Macro to the top of the include stack and start lexing
448 /// tokens from it instead of the current buffer. Args specifies the
449 /// tokens input to a function-like macro.
451 /// ILEnd specifies the location of the ')' for a function-like macro or the
452 /// identifier for an object-like macro.
453 void EnterMacro(Token
&Identifier
, SourceLocation ILEnd
, MacroArgs
*Args
);
455 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
456 /// which will cause the lexer to start returning the specified tokens.
458 /// If DisableMacroExpansion is true, tokens lexed from the token stream will
459 /// not be subject to further macro expansion. Otherwise, these tokens will
460 /// be re-macro-expanded when/if expansion is enabled.
462 /// If OwnsTokens is false, this method assumes that the specified stream of
463 /// tokens has a permanent owner somewhere, so they do not need to be copied.
464 /// If it is true, it assumes the array of tokens is allocated with new[] and
467 void EnterTokenStream(const Token
*Toks
, unsigned NumToks
,
468 bool DisableMacroExpansion
, bool OwnsTokens
);
470 /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
471 /// lexer stack. This should only be used in situations where the current
472 /// state of the top-of-stack lexer is known.
473 void RemoveTopOfLexerStack();
475 /// EnableBacktrackAtThisPos - From the point that this method is called, and
476 /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
477 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
478 /// make the Preprocessor re-lex the same tokens.
480 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
481 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
482 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
484 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
485 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
486 /// tokens will continue indefinitely.
488 void EnableBacktrackAtThisPos();
490 /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
491 void CommitBacktrackedTokens();
493 /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
494 /// EnableBacktrackAtThisPos() was previously called.
497 /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
498 /// caching of tokens is on.
499 bool isBacktrackEnabled() const { return !BacktrackPositions
.empty(); }
501 /// Lex - To lex a token from the preprocessor, just pull a token from the
502 /// current lexer or macro object.
503 void Lex(Token
&Result
) {
505 CurLexer
->Lex(Result
);
506 else if (CurPTHLexer
)
507 CurPTHLexer
->Lex(Result
);
508 else if (CurTokenLexer
)
509 CurTokenLexer
->Lex(Result
);
514 /// LexNonComment - Lex a token. If it's a comment, keep lexing until we get
515 /// something not a comment. This is useful in -E -C mode where comments
516 /// would foul up preprocessor directive handling.
517 void LexNonComment(Token
&Result
) {
520 while (Result
.getKind() == tok::comment
);
523 /// LexUnexpandedToken - This is just like Lex, but this disables macro
524 /// expansion of identifier tokens.
525 void LexUnexpandedToken(Token
&Result
) {
526 // Disable macro expansion.
527 bool OldVal
= DisableMacroExpansion
;
528 DisableMacroExpansion
= true;
533 DisableMacroExpansion
= OldVal
;
536 /// LookAhead - This peeks ahead N tokens and returns that token without
537 /// consuming any tokens. LookAhead(0) returns the next token that would be
538 /// returned by Lex(), LookAhead(1) returns the token after it, etc. This
539 /// returns normal tokens after phase 5. As such, it is equivalent to using
540 /// 'Lex', not 'LexUnexpandedToken'.
541 const Token
&LookAhead(unsigned N
) {
542 if (CachedLexPos
+ N
< CachedTokens
.size())
543 return CachedTokens
[CachedLexPos
+N
];
545 return PeekAhead(N
+1);
548 /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
549 /// this allows to revert a specific number of tokens.
550 /// Note that the number of tokens being reverted should be up to the last
551 /// backtrack position, not more.
552 void RevertCachedTokens(unsigned N
) {
553 assert(isBacktrackEnabled() &&
554 "Should only be called when tokens are cached for backtracking");
555 assert(signed(CachedLexPos
) - signed(N
) >= signed(BacktrackPositions
.back())
556 && "Should revert tokens up to the last backtrack position, not more");
557 assert(signed(CachedLexPos
) - signed(N
) >= 0 &&
558 "Corrupted backtrack positions ?");
562 /// EnterToken - Enters a token in the token stream to be lexed next. If
563 /// BackTrack() is called afterwards, the token will remain at the insertion
565 void EnterToken(const Token
&Tok
) {
566 EnterCachingLexMode();
567 CachedTokens
.insert(CachedTokens
.begin()+CachedLexPos
, Tok
);
570 /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
571 /// tokens (because backtrack is enabled) it should replace the most recent
572 /// cached tokens with the given annotation token. This function has no effect
573 /// if backtracking is not enabled.
575 /// Note that the use of this function is just for optimization; so that the
576 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
578 void AnnotateCachedTokens(const Token
&Tok
) {
579 assert(Tok
.isAnnotation() && "Expected annotation token");
580 if (CachedLexPos
!= 0 && isBacktrackEnabled())
581 AnnotatePreviousCachedTokens(Tok
);
584 /// \brief Replace the last token with an annotation token.
586 /// Like AnnotateCachedTokens(), this routine replaces an
587 /// already-parsed (and resolved) token with an annotation
588 /// token. However, this routine only replaces the last token with
589 /// the annotation token; it does not affect any other cached
590 /// tokens. This function has no effect if backtracking is not
592 void ReplaceLastTokenWithAnnotation(const Token
&Tok
) {
593 assert(Tok
.isAnnotation() && "Expected annotation token");
594 if (CachedLexPos
!= 0 && isBacktrackEnabled())
595 CachedTokens
[CachedLexPos
-1] = Tok
;
598 /// \brief Specify the point at which code-completion will be performed.
600 /// \param File the file in which code completion should occur. If
601 /// this file is included multiple times, code-completion will
602 /// perform completion the first time it is included. If NULL, this
603 /// function clears out the code-completion point.
605 /// \param Line the line at which code completion should occur
608 /// \param Column the column at which code completion should occur
611 /// \returns true if an error occurred, false otherwise.
612 bool SetCodeCompletionPoint(const FileEntry
*File
,
613 unsigned Line
, unsigned Column
);
615 /// \brief Determine if this source location refers into the file
616 /// for which we are performing code completion.
617 bool isCodeCompletionFile(SourceLocation FileLoc
) const;
619 /// \brief Instruct the preprocessor to skip part of the main
620 /// the main source file.
622 /// \brief Bytes The number of bytes in the preamble to skip.
624 /// \brief StartOfLine Whether skipping these bytes puts the lexer at the
626 void setSkipMainFilePreamble(unsigned Bytes
, bool StartOfLine
) {
627 SkipMainFilePreamble
.first
= Bytes
;
628 SkipMainFilePreamble
.second
= StartOfLine
;
631 /// Diag - Forwarding function for diagnostics. This emits a diagnostic at
632 /// the specified Token's location, translating the token's start
633 /// position in the current buffer into a SourcePosition object for rendering.
634 DiagnosticBuilder
Diag(SourceLocation Loc
, unsigned DiagID
) {
635 return Diags
->Report(Loc
, DiagID
);
638 DiagnosticBuilder
Diag(const Token
&Tok
, unsigned DiagID
) {
639 return Diags
->Report(Tok
.getLocation(), DiagID
);
642 /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a
643 /// token is the characters used to represent the token in the source file
644 /// after trigraph expansion and escaped-newline folding. In particular, this
645 /// wants to get the true, uncanonicalized, spelling of things like digraphs
648 /// \param Invalid If non-NULL, will be set \c true if an error occurs.
649 std::string
getSpelling(const Token
&Tok
, bool *Invalid
= 0) const {
650 return Lexer::getSpelling(Tok
, SourceMgr
, Features
, Invalid
);
653 /// getSpelling - This method is used to get the spelling of a token into a
654 /// preallocated buffer, instead of as an std::string. The caller is required
655 /// to allocate enough space for the token, which is guaranteed to be at least
656 /// Tok.getLength() bytes long. The length of the actual result is returned.
658 /// Note that this method may do two possible things: it may either fill in
659 /// the buffer specified with characters, or it may *change the input pointer*
660 /// to point to a constant buffer with the data already in it (avoiding a
661 /// copy). The caller is not allowed to modify the returned buffer pointer
662 /// if an internal buffer is returned.
663 unsigned getSpelling(const Token
&Tok
, const char *&Buffer
,
664 bool *Invalid
= 0) const {
665 return Lexer::getSpelling(Tok
, Buffer
, SourceMgr
, Features
, Invalid
);
668 /// getSpelling - This method is used to get the spelling of a token into a
669 /// SmallVector. Note that the returned StringRef may not point to the
670 /// supplied buffer if a copy can be avoided.
671 llvm::StringRef
getSpelling(const Token
&Tok
,
672 llvm::SmallVectorImpl
<char> &Buffer
,
673 bool *Invalid
= 0) const;
675 /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
676 /// with length 1, return the character.
677 char getSpellingOfSingleCharacterNumericConstant(const Token
&Tok
,
678 bool *Invalid
= 0) const {
679 assert(Tok
.is(tok::numeric_constant
) &&
680 Tok
.getLength() == 1 && "Called on unsupported token");
681 assert(!Tok
.needsCleaning() && "Token can't need cleaning with length 1");
683 // If the token is carrying a literal data pointer, just use it.
684 if (const char *D
= Tok
.getLiteralData())
687 // Otherwise, fall back on getCharacterData, which is slower, but always
689 return *SourceMgr
.getCharacterData(Tok
.getLocation(), Invalid
);
692 /// CreateString - Plop the specified string into a scratch buffer and set the
693 /// specified token's location and length to it. If specified, the source
694 /// location provides a location of the instantiation point of the token.
695 void CreateString(const char *Buf
, unsigned Len
,
696 Token
&Tok
, SourceLocation SourceLoc
= SourceLocation());
698 /// \brief Computes the source location just past the end of the
699 /// token at this source location.
701 /// This routine can be used to produce a source location that
702 /// points just past the end of the token referenced by \p Loc, and
703 /// is generally used when a diagnostic needs to point just after a
704 /// token where it expected something different that it received. If
705 /// the returned source location would not be meaningful (e.g., if
706 /// it points into a macro), this routine returns an invalid
709 /// \param Offset an offset from the end of the token, where the source
710 /// location should refer to. The default offset (0) produces a source
711 /// location pointing just past the end of the token; an offset of 1 produces
712 /// a source location pointing to the last character in the token, etc.
713 SourceLocation
getLocForEndOfToken(SourceLocation Loc
, unsigned Offset
= 0) {
714 return Lexer::getLocForEndOfToken(Loc
, Offset
, SourceMgr
, Features
);
717 /// DumpToken - Print the token to stderr, used for debugging.
719 void DumpToken(const Token
&Tok
, bool DumpFlags
= false) const;
720 void DumpLocation(SourceLocation Loc
) const;
721 void DumpMacro(const MacroInfo
&MI
) const;
723 /// AdvanceToTokenCharacter - Given a location that specifies the start of a
724 /// token, return a new location that specifies a character within the token.
725 SourceLocation
AdvanceToTokenCharacter(SourceLocation TokStart
,
726 unsigned Char
) const {
727 return Lexer::AdvanceToTokenCharacter(TokStart
, Char
, SourceMgr
, Features
);
730 /// IncrementPasteCounter - Increment the counters for the number of token
731 /// paste operations performed. If fast was specified, this is a 'fast paste'
734 void IncrementPasteCounter(bool isFast
) {
743 /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
744 /// comment (/##/) in microsoft mode, this method handles updating the current
745 /// state, returning the token on the next source line.
746 void HandleMicrosoftCommentPaste(Token
&Tok
);
748 //===--------------------------------------------------------------------===//
749 // Preprocessor callback methods. These are invoked by a lexer as various
750 // directives and events are found.
752 /// LookUpIdentifierInfo - Given a tok::identifier token, look up the
753 /// identifier information for the token and install it into the token.
754 IdentifierInfo
*LookUpIdentifierInfo(Token
&Identifier
,
755 const char *BufPtr
= 0) const;
757 /// HandleIdentifier - This callback is invoked when the lexer reads an
758 /// identifier and has filled in the tokens IdentifierInfo member. This
759 /// callback potentially macro expands it or turns it into a named token (like
761 void HandleIdentifier(Token
&Identifier
);
764 /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
765 /// the current file. This either returns the EOF token and returns true, or
766 /// pops a level off the include stack and returns false, at which point the
767 /// client should call lex again.
768 bool HandleEndOfFile(Token
&Result
, bool isEndOfMacro
= false);
770 /// HandleEndOfTokenLexer - This callback is invoked when the current
771 /// TokenLexer hits the end of its token stream.
772 bool HandleEndOfTokenLexer(Token
&Result
);
774 /// HandleDirective - This callback is invoked when the lexer sees a # token
775 /// at the start of a line. This consumes the directive, modifies the
776 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
777 /// read is the correct one.
778 void HandleDirective(Token
&Result
);
780 /// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If
781 /// not, emit a diagnostic and consume up until the eom. If EnableMacros is
782 /// true, then we consider macros that expand to zero tokens as being ok.
783 void CheckEndOfDirective(const char *Directive
, bool EnableMacros
= false);
785 /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
786 /// current line until the tok::eom token is found.
787 void DiscardUntilEndOfDirective();
789 /// SawDateOrTime - This returns true if the preprocessor has seen a use of
790 /// __DATE__ or __TIME__ in the file so far.
791 bool SawDateOrTime() const {
792 return DATELoc
!= SourceLocation() || TIMELoc
!= SourceLocation();
794 unsigned getCounterValue() const { return CounterValue
; }
795 void setCounterValue(unsigned V
) { CounterValue
= V
; }
797 /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
799 MacroInfo
*AllocateMacroInfo(SourceLocation L
);
801 /// CloneMacroInfo - Allocate a new MacroInfo object which is clone of MI.
802 MacroInfo
*CloneMacroInfo(const MacroInfo
&MI
);
804 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
805 /// checked and spelled filename, e.g. as an operand of #include. This returns
806 /// true if the input filename was in <>'s or false if it were in ""'s. The
807 /// caller is expected to provide a buffer that is large enough to hold the
808 /// spelling of the filename, but is also expected to handle the case when
809 /// this method decides to use a different buffer.
810 bool GetIncludeFilenameSpelling(SourceLocation Loc
,llvm::StringRef
&Filename
);
812 /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
813 /// return null on failure. isAngled indicates whether the file reference is
814 /// for system #include's or not (i.e. using <> instead of "").
815 const FileEntry
*LookupFile(llvm::StringRef Filename
,
816 bool isAngled
, const DirectoryLookup
*FromDir
,
817 const DirectoryLookup
*&CurDir
);
819 /// GetCurLookup - The DirectoryLookup structure used to find the current
820 /// FileEntry, if CurLexer is non-null and if applicable. This allows us to
821 /// implement #include_next and find directory-specific properties.
822 const DirectoryLookup
*GetCurDirLookup() { return CurDirLookup
; }
824 /// isInPrimaryFile - Return true if we're in the top-level file, not in a
826 bool isInPrimaryFile() const;
828 /// ConcatenateIncludeName - Handle cases where the #include name is expanded
829 /// from a macro as multiple tokens, which need to be glued together. This
830 /// occurs for code like:
831 /// #define FOO <a/b.h>
833 /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
835 /// This code concatenates and consumes tokens up to the '>' token. It
836 /// returns false if the > was found, otherwise it returns true if it finds
837 /// and consumes the EOM marker.
838 bool ConcatenateIncludeName(llvm::SmallString
<128> &FilenameBuffer
,
839 SourceLocation
&End
);
843 void PushIncludeMacroStack() {
844 IncludeMacroStack
.push_back(IncludeStackInfo(CurLexer
.take(),
847 CurTokenLexer
.take(),
852 void PopIncludeMacroStack() {
853 CurLexer
.reset(IncludeMacroStack
.back().TheLexer
);
854 CurPTHLexer
.reset(IncludeMacroStack
.back().ThePTHLexer
);
855 CurPPLexer
= IncludeMacroStack
.back().ThePPLexer
;
856 CurTokenLexer
.reset(IncludeMacroStack
.back().TheTokenLexer
);
857 CurDirLookup
= IncludeMacroStack
.back().TheDirLookup
;
858 IncludeMacroStack
.pop_back();
861 /// AllocateMacroInfo - Allocate a new MacroInfo object.
862 MacroInfo
*AllocateMacroInfo();
864 /// ReleaseMacroInfo - Release the specified MacroInfo. This memory will
865 /// be reused for allocating new MacroInfo objects.
866 void ReleaseMacroInfo(MacroInfo
* MI
);
868 /// ReadMacroName - Lex and validate a macro name, which occurs after a
869 /// #define or #undef. This emits a diagnostic, sets the token kind to eom,
870 /// and discards the rest of the macro line if the macro name is invalid.
871 void ReadMacroName(Token
&MacroNameTok
, char isDefineUndef
= 0);
873 /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
874 /// definition has just been read. Lex the rest of the arguments and the
875 /// closing ), updating MI with what we learn. Return true if an error occurs
876 /// parsing the arg list.
877 bool ReadMacroDefinitionArgList(MacroInfo
*MI
);
879 /// SkipExcludedConditionalBlock - We just read a #if or related directive and
880 /// decided that the subsequent tokens are in the #if'd out portion of the
881 /// file. Lex the rest of the file, until we see an #endif. If
882 /// FoundNonSkipPortion is true, then we have already emitted code for part of
883 /// this #if directive, so #else/#elif blocks should never be entered. If
884 /// FoundElse is false, then #else directives are ok, if not, then we have
885 /// already seen one so a #else directive is a duplicate. When this returns,
886 /// the caller can lex the first valid token.
887 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc
,
888 bool FoundNonSkipPortion
, bool FoundElse
);
890 /// PTHSkipExcludedConditionalBlock - A fast PTH version of
891 /// SkipExcludedConditionalBlock.
892 void PTHSkipExcludedConditionalBlock();
894 /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
895 /// may occur after a #if or #elif directive and return it as a bool. If the
896 /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
897 bool EvaluateDirectiveExpression(IdentifierInfo
*&IfNDefMacro
);
899 /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
900 /// #pragma GCC poison/system_header/dependency and #pragma once.
901 void RegisterBuiltinPragmas();
903 /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
904 /// identifier table.
905 void RegisterBuiltinMacros();
907 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
908 /// be expanded as a macro, handle it and return the next token as 'Tok'. If
909 /// the macro should not be expanded return true, otherwise return false.
910 bool HandleMacroExpandedIdentifier(Token
&Tok
, MacroInfo
*MI
);
912 /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
913 /// lexed is a '('. If so, consume the token and return true, if not, this
914 /// method should have no observable side-effect on the lexed tokens.
915 bool isNextPPTokenLParen();
917 /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
918 /// invoked to read all of the formal arguments specified for the macro
919 /// invocation. This returns null on error.
920 MacroArgs
*ReadFunctionLikeMacroArgs(Token
&MacroName
, MacroInfo
*MI
,
921 SourceLocation
&InstantiationEnd
);
923 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
924 /// as a builtin macro, handle it and return the next token as 'Tok'.
925 void ExpandBuiltinMacro(Token
&Tok
);
927 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
928 /// return the first token after the directive. The _Pragma token has just
929 /// been read into 'Tok'.
930 void Handle_Pragma(Token
&Tok
);
932 /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
933 /// is not enclosed within a string literal.
934 void HandleMicrosoft__pragma(Token
&Tok
);
936 void Handle_Pragma(unsigned Introducer
, const std::string
&StrVal
,
937 SourceLocation PragmaLoc
, SourceLocation RParenLoc
);
939 /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
940 /// start lexing tokens from it instead of the current buffer.
941 void EnterSourceFileWithLexer(Lexer
*TheLexer
, const DirectoryLookup
*Dir
);
943 /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
944 /// start getting tokens from it using the PTH cache.
945 void EnterSourceFileWithPTH(PTHLexer
*PL
, const DirectoryLookup
*Dir
);
947 /// IsFileLexer - Returns true if we are lexing from a file and not a
948 /// pragma or a macro.
949 static bool IsFileLexer(const Lexer
* L
, const PreprocessorLexer
* P
) {
950 return L
? !L
->isPragmaLexer() : P
!= 0;
953 static bool IsFileLexer(const IncludeStackInfo
& I
) {
954 return IsFileLexer(I
.TheLexer
, I
.ThePPLexer
);
957 bool IsFileLexer() const {
958 return IsFileLexer(CurLexer
.get(), CurPPLexer
);
961 //===--------------------------------------------------------------------===//
963 void CachingLex(Token
&Result
);
964 bool InCachingLexMode() const {
965 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
966 // that we are past EOF, not that we are in CachingLex mode.
967 return CurPPLexer
== 0 && CurTokenLexer
== 0 && CurPTHLexer
== 0 &&
968 !IncludeMacroStack
.empty();
970 void EnterCachingLexMode();
971 void ExitCachingLexMode() {
972 if (InCachingLexMode())
973 RemoveTopOfLexerStack();
975 const Token
&PeekAhead(unsigned N
);
976 void AnnotatePreviousCachedTokens(const Token
&Tok
);
978 //===--------------------------------------------------------------------===//
979 /// Handle*Directive - implement the various preprocessor directives. These
980 /// should side-effect the current preprocessor object so that the next call
981 /// to Lex() will return the appropriate token next.
982 void HandleLineDirective(Token
&Tok
);
983 void HandleDigitDirective(Token
&Tok
);
984 void HandleUserDiagnosticDirective(Token
&Tok
, bool isWarning
);
985 void HandleIdentSCCSDirective(Token
&Tok
);
988 void HandleIncludeDirective(SourceLocation HashLoc
,
990 const DirectoryLookup
*LookupFrom
= 0,
991 bool isImport
= false);
992 void HandleIncludeNextDirective(SourceLocation HashLoc
, Token
&Tok
);
993 void HandleIncludeMacrosDirective(SourceLocation HashLoc
, Token
&Tok
);
994 void HandleImportDirective(SourceLocation HashLoc
, Token
&Tok
);
997 void HandleDefineDirective(Token
&Tok
);
998 void HandleUndefDirective(Token
&Tok
);
1000 // Conditional Inclusion.
1001 void HandleIfdefDirective(Token
&Tok
, bool isIfndef
,
1002 bool ReadAnyTokensBeforeDirective
);
1003 void HandleIfDirective(Token
&Tok
, bool ReadAnyTokensBeforeDirective
);
1004 void HandleEndifDirective(Token
&Tok
);
1005 void HandleElseDirective(Token
&Tok
);
1006 void HandleElifDirective(Token
&Tok
);
1009 void HandlePragmaDirective(unsigned Introducer
);
1011 void HandlePragmaOnce(Token
&OnceTok
);
1012 void HandlePragmaMark();
1013 void HandlePragmaPoison(Token
&PoisonTok
);
1014 void HandlePragmaSystemHeader(Token
&SysHeaderTok
);
1015 void HandlePragmaDependency(Token
&DependencyTok
);
1016 void HandlePragmaComment(Token
&CommentTok
);
1017 void HandlePragmaMessage(Token
&MessageTok
);
1018 void HandlePragmaPushMacro(Token
&Tok
);
1019 void HandlePragmaPopMacro(Token
&Tok
);
1020 IdentifierInfo
*ParsePragmaPushOrPopMacro(Token
&Tok
);
1022 // Return true and store the first token only if any CommentHandler
1023 // has inserted some tokens and getCommentRetentionState() is false.
1024 bool HandleComment(Token
&Token
, SourceRange Comment
);
1027 /// \brief Abstract base class that describes a handler that will receive
1028 /// source ranges for each of the comments encountered in the source file.
1029 class CommentHandler
{
1031 virtual ~CommentHandler();
1033 // The handler shall return true if it has pushed any tokens
1034 // to be read using e.g. EnterToken or EnterTokenStream.
1035 virtual bool HandleComment(Preprocessor
&PP
, SourceRange Comment
) = 0;
1038 /// \brief RAII class that determines when any macro expansion has occurred
1039 /// between the time the instance was created and the time it was
1041 class PPMacroExpansionTrap
{
1044 PPMacroExpansionTrap(Preprocessor
&PP
) : PP(PP
) { reset(); }
1045 bool hasMacroExpansionOccured() const { return PP
.MacroExpansionFlag
; }
1046 void reset() { PP
.MacroExpansionFlag
= false; }
1049 } // end namespace clang