Don't warn for empty 'if' body if there is a macro that expands to nothing, e.g:
[clang.git] / include / clang / Lex / Preprocessor.h
blob2194d6fe620ac2a95b389c47e3ec169cd48766f3
1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the Preprocessor interface.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
17 #include "clang/Lex/MacroInfo.h"
18 #include "clang/Lex/Lexer.h"
19 #include "clang/Lex/PTHLexer.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/TokenLexer.h"
22 #include "clang/Lex/PTHManager.h"
23 #include "clang/Basic/Builtins.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/IdentifierTable.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/OwningPtr.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Allocator.h"
31 #include <vector>
33 namespace clang {
35 class SourceManager;
36 class ExternalPreprocessorSource;
37 class FileManager;
38 class FileSystemOptions;
39 class FileEntry;
40 class HeaderSearch;
41 class PragmaNamespace;
42 class PragmaHandler;
43 class CommentHandler;
44 class ScratchBuffer;
45 class TargetInfo;
46 class PPCallbacks;
47 class CodeCompletionHandler;
48 class DirectoryLookup;
49 class PreprocessingRecord;
50 class PPMacroExpansionTrap;
52 /// Preprocessor - This object engages in a tight little dance with the lexer to
53 /// efficiently preprocess tokens. Lexers know only about tokens within a
54 /// single source file, and don't know anything about preprocessor-level issues
55 /// like the #include stack, token expansion, etc.
56 ///
57 class Preprocessor {
58 Diagnostic *Diags;
59 LangOptions Features;
60 const TargetInfo &Target;
61 FileManager &FileMgr;
62 const FileSystemOptions &FileSystemOpts;
63 SourceManager &SourceMgr;
64 ScratchBuffer *ScratchBuf;
65 HeaderSearch &HeaderInfo;
67 /// \brief External source of macros.
68 ExternalPreprocessorSource *ExternalSource;
70 /// PTH - An optional PTHManager object used for getting tokens from
71 /// a token cache rather than lexing the original source file.
72 llvm::OwningPtr<PTHManager> PTH;
74 /// BP - A BumpPtrAllocator object used to quickly allocate and release
75 /// objects internal to the Preprocessor.
76 llvm::BumpPtrAllocator BP;
78 /// Identifiers for builtin macros and other builtins.
79 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
80 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
81 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
82 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
83 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
84 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
85 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
86 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
87 IdentifierInfo *Ident__has_feature; // __has_feature
88 IdentifierInfo *Ident__has_builtin; // __has_builtin
89 IdentifierInfo *Ident__has_attribute; // __has_attribute
90 IdentifierInfo *Ident__has_include; // __has_include
91 IdentifierInfo *Ident__has_include_next; // __has_include_next
93 SourceLocation DATELoc, TIMELoc;
94 unsigned CounterValue; // Next __COUNTER__ value.
96 enum {
97 /// MaxIncludeStackDepth - Maximum depth of #includes.
98 MaxAllowedIncludeStackDepth = 200
101 // State that is set before the preprocessor begins.
102 bool KeepComments : 1;
103 bool KeepMacroComments : 1;
105 // State that changes while the preprocessor runs:
106 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
108 /// Whether the preprocessor owns the header search object.
109 bool OwnsHeaderSearch : 1;
111 /// DisableMacroExpansion - True if macro expansion is disabled.
112 bool DisableMacroExpansion : 1;
114 /// \brief This is set to true when a macro is expanded.
115 /// Used by PPMacroExpansionTrap.
116 bool MacroExpansionFlag : 1;
117 friend class PPMacroExpansionTrap;
119 /// \brief Whether we have already loaded macros from the external source.
120 mutable bool ReadMacrosFromExternalSource : 1;
122 /// Identifiers - This is mapping/lookup information for all identifiers in
123 /// the program, including program keywords.
124 mutable IdentifierTable Identifiers;
126 /// Selectors - This table contains all the selectors in the program. Unlike
127 /// IdentifierTable above, this table *isn't* populated by the preprocessor.
128 /// It is declared/instantiated here because it's role/lifetime is
129 /// conceptually similar the IdentifierTable. In addition, the current control
130 /// flow (in clang::ParseAST()), make it convenient to put here.
131 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
132 /// the lifetime of the preprocessor.
133 SelectorTable Selectors;
135 /// BuiltinInfo - Information about builtins.
136 Builtin::Context BuiltinInfo;
138 /// PragmaHandlers - This tracks all of the pragmas that the client registered
139 /// with this preprocessor.
140 PragmaNamespace *PragmaHandlers;
142 /// \brief Tracks all of the comment handlers that the client registered
143 /// with this preprocessor.
144 std::vector<CommentHandler *> CommentHandlers;
146 /// \brief The code-completion handler.
147 CodeCompletionHandler *CodeComplete;
149 /// \brief The file that we're performing code-completion for, if any.
150 const FileEntry *CodeCompletionFile;
152 /// \brief The number of bytes that we will initially skip when entering the
153 /// main file, which is used when loading a precompiled preamble, along
154 /// with a flag that indicates whether skipping this number of bytes will
155 /// place the lexer at the start of a line.
156 std::pair<unsigned, bool> SkipMainFilePreamble;
158 /// CurLexer - This is the current top of the stack that we're lexing from if
159 /// not expanding a macro and we are lexing directly from source code.
160 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
161 llvm::OwningPtr<Lexer> CurLexer;
163 /// CurPTHLexer - This is the current top of stack that we're lexing from if
164 /// not expanding from a macro and we are lexing from a PTH cache.
165 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
166 llvm::OwningPtr<PTHLexer> CurPTHLexer;
168 /// CurPPLexer - This is the current top of the stack what we're lexing from
169 /// if not expanding a macro. This is an alias for either CurLexer or
170 /// CurPTHLexer.
171 PreprocessorLexer *CurPPLexer;
173 /// CurLookup - The DirectoryLookup structure used to find the current
174 /// FileEntry, if CurLexer is non-null and if applicable. This allows us to
175 /// implement #include_next and find directory-specific properties.
176 const DirectoryLookup *CurDirLookup;
178 /// CurTokenLexer - This is the current macro we are expanding, if we are
179 /// expanding a macro. One of CurLexer and CurTokenLexer must be null.
180 llvm::OwningPtr<TokenLexer> CurTokenLexer;
182 /// IncludeMacroStack - This keeps track of the stack of files currently
183 /// #included, and macros currently being expanded from, not counting
184 /// CurLexer/CurTokenLexer.
185 struct IncludeStackInfo {
186 Lexer *TheLexer;
187 PTHLexer *ThePTHLexer;
188 PreprocessorLexer *ThePPLexer;
189 TokenLexer *TheTokenLexer;
190 const DirectoryLookup *TheDirLookup;
192 IncludeStackInfo(Lexer *L, PTHLexer* P, PreprocessorLexer* PPL,
193 TokenLexer* TL, const DirectoryLookup *D)
194 : TheLexer(L), ThePTHLexer(P), ThePPLexer(PPL), TheTokenLexer(TL),
195 TheDirLookup(D) {}
197 std::vector<IncludeStackInfo> IncludeMacroStack;
199 /// Callbacks - These are actions invoked when some preprocessor activity is
200 /// encountered (e.g. a file is #included, etc).
201 PPCallbacks *Callbacks;
203 /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
204 /// to the actual definition of the macro.
205 llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
207 /// MacroArgCache - This is a "freelist" of MacroArg objects that can be
208 /// reused for quick allocation.
209 MacroArgs *MacroArgCache;
210 friend class MacroArgs;
212 /// PragmaPushMacroInfo - For each IdentifierInfo used in a #pragma
213 /// push_macro directive, we keep a MacroInfo stack used to restore
214 /// previous macro value.
215 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
217 // Various statistics we track for performance analysis.
218 unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma;
219 unsigned NumIf, NumElse, NumEndif;
220 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
221 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
222 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
223 unsigned NumSkipped;
225 /// Predefines - This string is the predefined macros that preprocessor
226 /// should use from the command line etc.
227 std::string Predefines;
229 /// TokenLexerCache - Cache macro expanders to reduce malloc traffic.
230 enum { TokenLexerCacheSize = 8 };
231 unsigned NumCachedTokenLexers;
232 TokenLexer *TokenLexerCache[TokenLexerCacheSize];
234 /// \brief A record of the macro definitions and instantiations that
235 /// occurred during preprocessing.
237 /// This is an optional side structure that can be enabled with
238 /// \c createPreprocessingRecord() prior to preprocessing.
239 PreprocessingRecord *Record;
241 private: // Cached tokens state.
242 typedef llvm::SmallVector<Token, 1> CachedTokensTy;
244 /// CachedTokens - Cached tokens are stored here when we do backtracking or
245 /// lookahead. They are "lexed" by the CachingLex() method.
246 CachedTokensTy CachedTokens;
248 /// CachedLexPos - The position of the cached token that CachingLex() should
249 /// "lex" next. If it points beyond the CachedTokens vector, it means that
250 /// a normal Lex() should be invoked.
251 CachedTokensTy::size_type CachedLexPos;
253 /// BacktrackPositions - Stack of backtrack positions, allowing nested
254 /// backtracks. The EnableBacktrackAtThisPos() method pushes a position to
255 /// indicate where CachedLexPos should be set when the BackTrack() method is
256 /// invoked (at which point the last position is popped).
257 std::vector<CachedTokensTy::size_type> BacktrackPositions;
259 struct MacroInfoChain {
260 MacroInfo MI;
261 MacroInfoChain *Next;
262 MacroInfoChain *Prev;
265 /// MacroInfos are managed as a chain for easy disposal. This is the head
266 /// of that list.
267 MacroInfoChain *MIChainHead;
269 /// MICache - A "freelist" of MacroInfo objects that can be reused for quick
270 /// allocation.
271 MacroInfoChain *MICache;
273 MacroInfo *getInfoForMacro(IdentifierInfo *II) const;
275 public:
276 Preprocessor(Diagnostic &diags, const LangOptions &opts,
277 const TargetInfo &target,
278 SourceManager &SM, HeaderSearch &Headers,
279 IdentifierInfoLookup *IILookup = 0,
280 bool OwnsHeaderSearch = false);
282 ~Preprocessor();
284 Diagnostic &getDiagnostics() const { return *Diags; }
285 void setDiagnostics(Diagnostic &D) { Diags = &D; }
287 const LangOptions &getLangOptions() const { return Features; }
288 const TargetInfo &getTargetInfo() const { return Target; }
289 FileManager &getFileManager() const { return FileMgr; }
290 const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
291 SourceManager &getSourceManager() const { return SourceMgr; }
292 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
294 IdentifierTable &getIdentifierTable() { return Identifiers; }
295 SelectorTable &getSelectorTable() { return Selectors; }
296 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
297 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
299 void setPTHManager(PTHManager* pm);
301 PTHManager *getPTHManager() { return PTH.get(); }
303 void setExternalSource(ExternalPreprocessorSource *Source) {
304 ExternalSource = Source;
307 ExternalPreprocessorSource *getExternalSource() const {
308 return ExternalSource;
311 /// SetCommentRetentionState - Control whether or not the preprocessor retains
312 /// comments in output.
313 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
314 this->KeepComments = KeepComments | KeepMacroComments;
315 this->KeepMacroComments = KeepMacroComments;
318 bool getCommentRetentionState() const { return KeepComments; }
320 /// isCurrentLexer - Return true if we are lexing directly from the specified
321 /// lexer.
322 bool isCurrentLexer(const PreprocessorLexer *L) const {
323 return CurPPLexer == L;
326 /// getCurrentLexer - Return the current lexer being lexed from. Note
327 /// that this ignores any potentially active macro expansions and _Pragma
328 /// expansions going on at the time.
329 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
331 /// getCurrentFileLexer - Return the current file lexer being lexed from.
332 /// Note that this ignores any potentially active macro expansions and _Pragma
333 /// expansions going on at the time.
334 PreprocessorLexer *getCurrentFileLexer() const;
336 /// getPPCallbacks/addPPCallbacks - Accessors for preprocessor callbacks.
337 /// Note that this class takes ownership of any PPCallbacks object given to
338 /// it.
339 PPCallbacks *getPPCallbacks() const { return Callbacks; }
340 void addPPCallbacks(PPCallbacks *C) {
341 if (Callbacks)
342 C = new PPChainedCallbacks(C, Callbacks);
343 Callbacks = C;
346 /// getMacroInfo - Given an identifier, return the MacroInfo it is #defined to
347 /// or null if it isn't #define'd.
348 MacroInfo *getMacroInfo(IdentifierInfo *II) const {
349 if (!II->hasMacroDefinition())
350 return 0;
352 return getInfoForMacro(II);
355 /// setMacroInfo - Specify a macro for this identifier.
357 void setMacroInfo(IdentifierInfo *II, MacroInfo *MI);
359 /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
360 /// state of the macro table. This visits every currently-defined macro.
361 typedef llvm::DenseMap<IdentifierInfo*,
362 MacroInfo*>::const_iterator macro_iterator;
363 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
364 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
366 const std::string &getPredefines() const { return Predefines; }
367 /// setPredefines - Set the predefines for this Preprocessor. These
368 /// predefines are automatically injected when parsing the main file.
369 void setPredefines(const char *P) { Predefines = P; }
370 void setPredefines(const std::string &P) { Predefines = P; }
372 /// getIdentifierInfo - Return information about the specified preprocessor
373 /// identifier token. The version of this method that takes two character
374 /// pointers is preferred unless the identifier is already available as a
375 /// string (this avoids allocation and copying of memory to construct an
376 /// std::string).
377 IdentifierInfo *getIdentifierInfo(llvm::StringRef Name) const {
378 return &Identifiers.get(Name);
381 /// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
382 /// If 'Namespace' is non-null, then it is a token required to exist on the
383 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
384 void AddPragmaHandler(llvm::StringRef Namespace, PragmaHandler *Handler);
385 void AddPragmaHandler(PragmaHandler *Handler) {
386 AddPragmaHandler(llvm::StringRef(), Handler);
389 /// RemovePragmaHandler - Remove the specific pragma handler from
390 /// the preprocessor. If \arg Namespace is non-null, then it should
391 /// be the namespace that \arg Handler was added to. It is an error
392 /// to remove a handler that has not been registered.
393 void RemovePragmaHandler(llvm::StringRef Namespace, PragmaHandler *Handler);
394 void RemovePragmaHandler(PragmaHandler *Handler) {
395 RemovePragmaHandler(llvm::StringRef(), Handler);
398 /// \brief Add the specified comment handler to the preprocessor.
399 void AddCommentHandler(CommentHandler *Handler);
401 /// \brief Remove the specified comment handler.
403 /// It is an error to remove a handler that has not been registered.
404 void RemoveCommentHandler(CommentHandler *Handler);
406 /// \brief Set the code completion handler to the given object.
407 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
408 CodeComplete = &Handler;
411 /// \brief Retrieve the current code-completion handler.
412 CodeCompletionHandler *getCodeCompletionHandler() const {
413 return CodeComplete;
416 /// \brief Clear out the code completion handler.
417 void clearCodeCompletionHandler() {
418 CodeComplete = 0;
421 /// \brief Hook used by the lexer to invoke the "natural language" code
422 /// completion point.
423 void CodeCompleteNaturalLanguage();
425 /// \brief Retrieve the preprocessing record, or NULL if there is no
426 /// preprocessing record.
427 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
429 /// \brief Create a new preprocessing record, which will keep track of
430 /// all macro expansions, macro definitions, etc.
431 void createPreprocessingRecord();
433 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
434 /// which implicitly adds the builtin defines etc.
435 void EnterMainSourceFile();
437 /// EndSourceFile - Inform the preprocessor callbacks that processing is
438 /// complete.
439 void EndSourceFile();
441 /// EnterSourceFile - Add a source file to the top of the include stack and
442 /// start lexing tokens from it instead of the current buffer. Emit an error
443 /// and don't enter the file on error.
444 void EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
445 SourceLocation Loc);
447 /// EnterMacro - Add a Macro to the top of the include stack and start lexing
448 /// tokens from it instead of the current buffer. Args specifies the
449 /// tokens input to a function-like macro.
451 /// ILEnd specifies the location of the ')' for a function-like macro or the
452 /// identifier for an object-like macro.
453 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroArgs *Args);
455 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
456 /// which will cause the lexer to start returning the specified tokens.
458 /// If DisableMacroExpansion is true, tokens lexed from the token stream will
459 /// not be subject to further macro expansion. Otherwise, these tokens will
460 /// be re-macro-expanded when/if expansion is enabled.
462 /// If OwnsTokens is false, this method assumes that the specified stream of
463 /// tokens has a permanent owner somewhere, so they do not need to be copied.
464 /// If it is true, it assumes the array of tokens is allocated with new[] and
465 /// must be freed.
467 void EnterTokenStream(const Token *Toks, unsigned NumToks,
468 bool DisableMacroExpansion, bool OwnsTokens);
470 /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
471 /// lexer stack. This should only be used in situations where the current
472 /// state of the top-of-stack lexer is known.
473 void RemoveTopOfLexerStack();
475 /// EnableBacktrackAtThisPos - From the point that this method is called, and
476 /// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
477 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
478 /// make the Preprocessor re-lex the same tokens.
480 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
481 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
482 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
484 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
485 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
486 /// tokens will continue indefinitely.
488 void EnableBacktrackAtThisPos();
490 /// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
491 void CommitBacktrackedTokens();
493 /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
494 /// EnableBacktrackAtThisPos() was previously called.
495 void Backtrack();
497 /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
498 /// caching of tokens is on.
499 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
501 /// Lex - To lex a token from the preprocessor, just pull a token from the
502 /// current lexer or macro object.
503 void Lex(Token &Result) {
504 if (CurLexer)
505 CurLexer->Lex(Result);
506 else if (CurPTHLexer)
507 CurPTHLexer->Lex(Result);
508 else if (CurTokenLexer)
509 CurTokenLexer->Lex(Result);
510 else
511 CachingLex(Result);
514 /// LexNonComment - Lex a token. If it's a comment, keep lexing until we get
515 /// something not a comment. This is useful in -E -C mode where comments
516 /// would foul up preprocessor directive handling.
517 void LexNonComment(Token &Result) {
519 Lex(Result);
520 while (Result.getKind() == tok::comment);
523 /// LexUnexpandedToken - This is just like Lex, but this disables macro
524 /// expansion of identifier tokens.
525 void LexUnexpandedToken(Token &Result) {
526 // Disable macro expansion.
527 bool OldVal = DisableMacroExpansion;
528 DisableMacroExpansion = true;
529 // Lex the token.
530 Lex(Result);
532 // Reenable it.
533 DisableMacroExpansion = OldVal;
536 /// LookAhead - This peeks ahead N tokens and returns that token without
537 /// consuming any tokens. LookAhead(0) returns the next token that would be
538 /// returned by Lex(), LookAhead(1) returns the token after it, etc. This
539 /// returns normal tokens after phase 5. As such, it is equivalent to using
540 /// 'Lex', not 'LexUnexpandedToken'.
541 const Token &LookAhead(unsigned N) {
542 if (CachedLexPos + N < CachedTokens.size())
543 return CachedTokens[CachedLexPos+N];
544 else
545 return PeekAhead(N+1);
548 /// RevertCachedTokens - When backtracking is enabled and tokens are cached,
549 /// this allows to revert a specific number of tokens.
550 /// Note that the number of tokens being reverted should be up to the last
551 /// backtrack position, not more.
552 void RevertCachedTokens(unsigned N) {
553 assert(isBacktrackEnabled() &&
554 "Should only be called when tokens are cached for backtracking");
555 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
556 && "Should revert tokens up to the last backtrack position, not more");
557 assert(signed(CachedLexPos) - signed(N) >= 0 &&
558 "Corrupted backtrack positions ?");
559 CachedLexPos -= N;
562 /// EnterToken - Enters a token in the token stream to be lexed next. If
563 /// BackTrack() is called afterwards, the token will remain at the insertion
564 /// point.
565 void EnterToken(const Token &Tok) {
566 EnterCachingLexMode();
567 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
570 /// AnnotateCachedTokens - We notify the Preprocessor that if it is caching
571 /// tokens (because backtrack is enabled) it should replace the most recent
572 /// cached tokens with the given annotation token. This function has no effect
573 /// if backtracking is not enabled.
575 /// Note that the use of this function is just for optimization; so that the
576 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
577 /// invoked.
578 void AnnotateCachedTokens(const Token &Tok) {
579 assert(Tok.isAnnotation() && "Expected annotation token");
580 if (CachedLexPos != 0 && isBacktrackEnabled())
581 AnnotatePreviousCachedTokens(Tok);
584 /// \brief Replace the last token with an annotation token.
586 /// Like AnnotateCachedTokens(), this routine replaces an
587 /// already-parsed (and resolved) token with an annotation
588 /// token. However, this routine only replaces the last token with
589 /// the annotation token; it does not affect any other cached
590 /// tokens. This function has no effect if backtracking is not
591 /// enabled.
592 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
593 assert(Tok.isAnnotation() && "Expected annotation token");
594 if (CachedLexPos != 0 && isBacktrackEnabled())
595 CachedTokens[CachedLexPos-1] = Tok;
598 /// \brief Specify the point at which code-completion will be performed.
600 /// \param File the file in which code completion should occur. If
601 /// this file is included multiple times, code-completion will
602 /// perform completion the first time it is included. If NULL, this
603 /// function clears out the code-completion point.
605 /// \param Line the line at which code completion should occur
606 /// (1-based).
608 /// \param Column the column at which code completion should occur
609 /// (1-based).
611 /// \returns true if an error occurred, false otherwise.
612 bool SetCodeCompletionPoint(const FileEntry *File,
613 unsigned Line, unsigned Column);
615 /// \brief Determine if this source location refers into the file
616 /// for which we are performing code completion.
617 bool isCodeCompletionFile(SourceLocation FileLoc) const;
619 /// \brief Instruct the preprocessor to skip part of the main
620 /// the main source file.
622 /// \brief Bytes The number of bytes in the preamble to skip.
624 /// \brief StartOfLine Whether skipping these bytes puts the lexer at the
625 /// start of a line.
626 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
627 SkipMainFilePreamble.first = Bytes;
628 SkipMainFilePreamble.second = StartOfLine;
631 /// Diag - Forwarding function for diagnostics. This emits a diagnostic at
632 /// the specified Token's location, translating the token's start
633 /// position in the current buffer into a SourcePosition object for rendering.
634 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
635 return Diags->Report(Loc, DiagID);
638 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) {
639 return Diags->Report(Tok.getLocation(), DiagID);
642 /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a
643 /// token is the characters used to represent the token in the source file
644 /// after trigraph expansion and escaped-newline folding. In particular, this
645 /// wants to get the true, uncanonicalized, spelling of things like digraphs
646 /// UCNs, etc.
648 /// \param Invalid If non-NULL, will be set \c true if an error occurs.
649 std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
650 return Lexer::getSpelling(Tok, SourceMgr, Features, Invalid);
653 /// getSpelling - This method is used to get the spelling of a token into a
654 /// preallocated buffer, instead of as an std::string. The caller is required
655 /// to allocate enough space for the token, which is guaranteed to be at least
656 /// Tok.getLength() bytes long. The length of the actual result is returned.
658 /// Note that this method may do two possible things: it may either fill in
659 /// the buffer specified with characters, or it may *change the input pointer*
660 /// to point to a constant buffer with the data already in it (avoiding a
661 /// copy). The caller is not allowed to modify the returned buffer pointer
662 /// if an internal buffer is returned.
663 unsigned getSpelling(const Token &Tok, const char *&Buffer,
664 bool *Invalid = 0) const {
665 return Lexer::getSpelling(Tok, Buffer, SourceMgr, Features, Invalid);
668 /// getSpelling - This method is used to get the spelling of a token into a
669 /// SmallVector. Note that the returned StringRef may not point to the
670 /// supplied buffer if a copy can be avoided.
671 llvm::StringRef getSpelling(const Token &Tok,
672 llvm::SmallVectorImpl<char> &Buffer,
673 bool *Invalid = 0) const;
675 /// getSpellingOfSingleCharacterNumericConstant - Tok is a numeric constant
676 /// with length 1, return the character.
677 char getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
678 bool *Invalid = 0) const {
679 assert(Tok.is(tok::numeric_constant) &&
680 Tok.getLength() == 1 && "Called on unsupported token");
681 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
683 // If the token is carrying a literal data pointer, just use it.
684 if (const char *D = Tok.getLiteralData())
685 return *D;
687 // Otherwise, fall back on getCharacterData, which is slower, but always
688 // works.
689 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
692 /// CreateString - Plop the specified string into a scratch buffer and set the
693 /// specified token's location and length to it. If specified, the source
694 /// location provides a location of the instantiation point of the token.
695 void CreateString(const char *Buf, unsigned Len,
696 Token &Tok, SourceLocation SourceLoc = SourceLocation());
698 /// \brief Computes the source location just past the end of the
699 /// token at this source location.
701 /// This routine can be used to produce a source location that
702 /// points just past the end of the token referenced by \p Loc, and
703 /// is generally used when a diagnostic needs to point just after a
704 /// token where it expected something different that it received. If
705 /// the returned source location would not be meaningful (e.g., if
706 /// it points into a macro), this routine returns an invalid
707 /// source location.
709 /// \param Offset an offset from the end of the token, where the source
710 /// location should refer to. The default offset (0) produces a source
711 /// location pointing just past the end of the token; an offset of 1 produces
712 /// a source location pointing to the last character in the token, etc.
713 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
714 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, Features);
717 /// DumpToken - Print the token to stderr, used for debugging.
719 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
720 void DumpLocation(SourceLocation Loc) const;
721 void DumpMacro(const MacroInfo &MI) const;
723 /// AdvanceToTokenCharacter - Given a location that specifies the start of a
724 /// token, return a new location that specifies a character within the token.
725 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
726 unsigned Char) const {
727 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, Features);
730 /// IncrementPasteCounter - Increment the counters for the number of token
731 /// paste operations performed. If fast was specified, this is a 'fast paste'
732 /// case we handled.
734 void IncrementPasteCounter(bool isFast) {
735 if (isFast)
736 ++NumFastTokenPaste;
737 else
738 ++NumTokenPaste;
741 void PrintStats();
743 /// HandleMicrosoftCommentPaste - When the macro expander pastes together a
744 /// comment (/##/) in microsoft mode, this method handles updating the current
745 /// state, returning the token on the next source line.
746 void HandleMicrosoftCommentPaste(Token &Tok);
748 //===--------------------------------------------------------------------===//
749 // Preprocessor callback methods. These are invoked by a lexer as various
750 // directives and events are found.
752 /// LookUpIdentifierInfo - Given a tok::identifier token, look up the
753 /// identifier information for the token and install it into the token.
754 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier,
755 const char *BufPtr = 0) const;
757 /// HandleIdentifier - This callback is invoked when the lexer reads an
758 /// identifier and has filled in the tokens IdentifierInfo member. This
759 /// callback potentially macro expands it or turns it into a named token (like
760 /// 'for').
761 void HandleIdentifier(Token &Identifier);
764 /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
765 /// the current file. This either returns the EOF token and returns true, or
766 /// pops a level off the include stack and returns false, at which point the
767 /// client should call lex again.
768 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
770 /// HandleEndOfTokenLexer - This callback is invoked when the current
771 /// TokenLexer hits the end of its token stream.
772 bool HandleEndOfTokenLexer(Token &Result);
774 /// HandleDirective - This callback is invoked when the lexer sees a # token
775 /// at the start of a line. This consumes the directive, modifies the
776 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
777 /// read is the correct one.
778 void HandleDirective(Token &Result);
780 /// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If
781 /// not, emit a diagnostic and consume up until the eom. If EnableMacros is
782 /// true, then we consider macros that expand to zero tokens as being ok.
783 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
785 /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
786 /// current line until the tok::eom token is found.
787 void DiscardUntilEndOfDirective();
789 /// SawDateOrTime - This returns true if the preprocessor has seen a use of
790 /// __DATE__ or __TIME__ in the file so far.
791 bool SawDateOrTime() const {
792 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
794 unsigned getCounterValue() const { return CounterValue; }
795 void setCounterValue(unsigned V) { CounterValue = V; }
797 /// AllocateMacroInfo - Allocate a new MacroInfo object with the provide
798 /// SourceLocation.
799 MacroInfo *AllocateMacroInfo(SourceLocation L);
801 /// CloneMacroInfo - Allocate a new MacroInfo object which is clone of MI.
802 MacroInfo *CloneMacroInfo(const MacroInfo &MI);
804 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
805 /// checked and spelled filename, e.g. as an operand of #include. This returns
806 /// true if the input filename was in <>'s or false if it were in ""'s. The
807 /// caller is expected to provide a buffer that is large enough to hold the
808 /// spelling of the filename, but is also expected to handle the case when
809 /// this method decides to use a different buffer.
810 bool GetIncludeFilenameSpelling(SourceLocation Loc,llvm::StringRef &Filename);
812 /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
813 /// return null on failure. isAngled indicates whether the file reference is
814 /// for system #include's or not (i.e. using <> instead of "").
815 const FileEntry *LookupFile(llvm::StringRef Filename,
816 bool isAngled, const DirectoryLookup *FromDir,
817 const DirectoryLookup *&CurDir);
819 /// GetCurLookup - The DirectoryLookup structure used to find the current
820 /// FileEntry, if CurLexer is non-null and if applicable. This allows us to
821 /// implement #include_next and find directory-specific properties.
822 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
824 /// isInPrimaryFile - Return true if we're in the top-level file, not in a
825 /// #include.
826 bool isInPrimaryFile() const;
828 /// ConcatenateIncludeName - Handle cases where the #include name is expanded
829 /// from a macro as multiple tokens, which need to be glued together. This
830 /// occurs for code like:
831 /// #define FOO <a/b.h>
832 /// #include FOO
833 /// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
835 /// This code concatenates and consumes tokens up to the '>' token. It
836 /// returns false if the > was found, otherwise it returns true if it finds
837 /// and consumes the EOM marker.
838 bool ConcatenateIncludeName(llvm::SmallString<128> &FilenameBuffer,
839 SourceLocation &End);
841 private:
843 void PushIncludeMacroStack() {
844 IncludeMacroStack.push_back(IncludeStackInfo(CurLexer.take(),
845 CurPTHLexer.take(),
846 CurPPLexer,
847 CurTokenLexer.take(),
848 CurDirLookup));
849 CurPPLexer = 0;
852 void PopIncludeMacroStack() {
853 CurLexer.reset(IncludeMacroStack.back().TheLexer);
854 CurPTHLexer.reset(IncludeMacroStack.back().ThePTHLexer);
855 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
856 CurTokenLexer.reset(IncludeMacroStack.back().TheTokenLexer);
857 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
858 IncludeMacroStack.pop_back();
861 /// AllocateMacroInfo - Allocate a new MacroInfo object.
862 MacroInfo *AllocateMacroInfo();
864 /// ReleaseMacroInfo - Release the specified MacroInfo. This memory will
865 /// be reused for allocating new MacroInfo objects.
866 void ReleaseMacroInfo(MacroInfo* MI);
868 /// ReadMacroName - Lex and validate a macro name, which occurs after a
869 /// #define or #undef. This emits a diagnostic, sets the token kind to eom,
870 /// and discards the rest of the macro line if the macro name is invalid.
871 void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
873 /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
874 /// definition has just been read. Lex the rest of the arguments and the
875 /// closing ), updating MI with what we learn. Return true if an error occurs
876 /// parsing the arg list.
877 bool ReadMacroDefinitionArgList(MacroInfo *MI);
879 /// SkipExcludedConditionalBlock - We just read a #if or related directive and
880 /// decided that the subsequent tokens are in the #if'd out portion of the
881 /// file. Lex the rest of the file, until we see an #endif. If
882 /// FoundNonSkipPortion is true, then we have already emitted code for part of
883 /// this #if directive, so #else/#elif blocks should never be entered. If
884 /// FoundElse is false, then #else directives are ok, if not, then we have
885 /// already seen one so a #else directive is a duplicate. When this returns,
886 /// the caller can lex the first valid token.
887 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
888 bool FoundNonSkipPortion, bool FoundElse);
890 /// PTHSkipExcludedConditionalBlock - A fast PTH version of
891 /// SkipExcludedConditionalBlock.
892 void PTHSkipExcludedConditionalBlock();
894 /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
895 /// may occur after a #if or #elif directive and return it as a bool. If the
896 /// expression is equivalent to "!defined(X)" return X in IfNDefMacro.
897 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
899 /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
900 /// #pragma GCC poison/system_header/dependency and #pragma once.
901 void RegisterBuiltinPragmas();
903 /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
904 /// identifier table.
905 void RegisterBuiltinMacros();
907 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
908 /// be expanded as a macro, handle it and return the next token as 'Tok'. If
909 /// the macro should not be expanded return true, otherwise return false.
910 bool HandleMacroExpandedIdentifier(Token &Tok, MacroInfo *MI);
912 /// isNextPPTokenLParen - Determine whether the next preprocessor token to be
913 /// lexed is a '('. If so, consume the token and return true, if not, this
914 /// method should have no observable side-effect on the lexed tokens.
915 bool isNextPPTokenLParen();
917 /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
918 /// invoked to read all of the formal arguments specified for the macro
919 /// invocation. This returns null on error.
920 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
921 SourceLocation &InstantiationEnd);
923 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
924 /// as a builtin macro, handle it and return the next token as 'Tok'.
925 void ExpandBuiltinMacro(Token &Tok);
927 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
928 /// return the first token after the directive. The _Pragma token has just
929 /// been read into 'Tok'.
930 void Handle_Pragma(Token &Tok);
932 /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
933 /// is not enclosed within a string literal.
934 void HandleMicrosoft__pragma(Token &Tok);
936 void Handle_Pragma(unsigned Introducer, const std::string &StrVal,
937 SourceLocation PragmaLoc, SourceLocation RParenLoc);
939 /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and
940 /// start lexing tokens from it instead of the current buffer.
941 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
943 /// EnterSourceFileWithPTH - Add a lexer to the top of the include stack and
944 /// start getting tokens from it using the PTH cache.
945 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
947 /// IsFileLexer - Returns true if we are lexing from a file and not a
948 /// pragma or a macro.
949 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
950 return L ? !L->isPragmaLexer() : P != 0;
953 static bool IsFileLexer(const IncludeStackInfo& I) {
954 return IsFileLexer(I.TheLexer, I.ThePPLexer);
957 bool IsFileLexer() const {
958 return IsFileLexer(CurLexer.get(), CurPPLexer);
961 //===--------------------------------------------------------------------===//
962 // Caching stuff.
963 void CachingLex(Token &Result);
964 bool InCachingLexMode() const {
965 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
966 // that we are past EOF, not that we are in CachingLex mode.
967 return CurPPLexer == 0 && CurTokenLexer == 0 && CurPTHLexer == 0 &&
968 !IncludeMacroStack.empty();
970 void EnterCachingLexMode();
971 void ExitCachingLexMode() {
972 if (InCachingLexMode())
973 RemoveTopOfLexerStack();
975 const Token &PeekAhead(unsigned N);
976 void AnnotatePreviousCachedTokens(const Token &Tok);
978 //===--------------------------------------------------------------------===//
979 /// Handle*Directive - implement the various preprocessor directives. These
980 /// should side-effect the current preprocessor object so that the next call
981 /// to Lex() will return the appropriate token next.
982 void HandleLineDirective(Token &Tok);
983 void HandleDigitDirective(Token &Tok);
984 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
985 void HandleIdentSCCSDirective(Token &Tok);
987 // File inclusion.
988 void HandleIncludeDirective(SourceLocation HashLoc,
989 Token &Tok,
990 const DirectoryLookup *LookupFrom = 0,
991 bool isImport = false);
992 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
993 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
994 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
996 // Macro handling.
997 void HandleDefineDirective(Token &Tok);
998 void HandleUndefDirective(Token &Tok);
1000 // Conditional Inclusion.
1001 void HandleIfdefDirective(Token &Tok, bool isIfndef,
1002 bool ReadAnyTokensBeforeDirective);
1003 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1004 void HandleEndifDirective(Token &Tok);
1005 void HandleElseDirective(Token &Tok);
1006 void HandleElifDirective(Token &Tok);
1008 // Pragmas.
1009 void HandlePragmaDirective(unsigned Introducer);
1010 public:
1011 void HandlePragmaOnce(Token &OnceTok);
1012 void HandlePragmaMark();
1013 void HandlePragmaPoison(Token &PoisonTok);
1014 void HandlePragmaSystemHeader(Token &SysHeaderTok);
1015 void HandlePragmaDependency(Token &DependencyTok);
1016 void HandlePragmaComment(Token &CommentTok);
1017 void HandlePragmaMessage(Token &MessageTok);
1018 void HandlePragmaPushMacro(Token &Tok);
1019 void HandlePragmaPopMacro(Token &Tok);
1020 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1022 // Return true and store the first token only if any CommentHandler
1023 // has inserted some tokens and getCommentRetentionState() is false.
1024 bool HandleComment(Token &Token, SourceRange Comment);
1027 /// \brief Abstract base class that describes a handler that will receive
1028 /// source ranges for each of the comments encountered in the source file.
1029 class CommentHandler {
1030 public:
1031 virtual ~CommentHandler();
1033 // The handler shall return true if it has pushed any tokens
1034 // to be read using e.g. EnterToken or EnterTokenStream.
1035 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1038 /// \brief RAII class that determines when any macro expansion has occurred
1039 /// between the time the instance was created and the time it was
1040 /// queried.
1041 class PPMacroExpansionTrap {
1042 Preprocessor &PP;
1043 public:
1044 PPMacroExpansionTrap(Preprocessor &PP) : PP(PP) { reset(); }
1045 bool hasMacroExpansionOccured() const { return PP.MacroExpansionFlag; }
1046 void reset() { PP.MacroExpansionFlag = false; }
1049 } // end namespace clang
1051 #endif