1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "clang/AST/AST.h"
7 #include "clang/AST/ASTConsumer.h"
8 #include "clang/AST/ASTContext.h"
9 #include "clang/AST/Expr.h"
10 #include "clang/AST/ExprCXX.h"
11 #include "clang/AST/Mangle.h"
12 #include "clang/AST/RecordLayout.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/Version.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendPluginRegistry.h"
19 #include "clang/Lex/Lexer.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/Support/JSON.h"
24 #include "llvm/Support/raw_ostream.h"
33 #include <unordered_set>
38 #include "FileOperations.h"
39 #include "StringOperations.h"
40 #include "from-clangd/HeuristicResolver.h"
42 #if CLANG_VERSION_MAJOR < 8
43 // Starting with Clang 8.0 some basic functions have been renamed
44 #define getBeginLoc getLocStart
45 #define getEndLoc getLocEnd
47 // We want std::make_unique, but that's only available in c++14. In versions
48 // prior to that, we need to fall back to llvm's make_unique. It's also the
49 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
50 // build with c++11, at least as suggested by the llvm-config --cxxflags on
51 // non-windows platforms. mozilla-central seems to build with -std=c++17 on
52 // windows so we need to make this decision based on __cplusplus instead of
53 // the CLANG_VERSION_MAJOR.
54 #if __cplusplus < 201402L
55 using llvm::make_unique
;
57 using std::make_unique
;
60 using namespace clang
;
62 const std::string
GENERATED("__GENERATED__" PATHSEP_STRING
);
64 // Absolute path to directory containing source code.
67 // Absolute path to objdir (including generated code).
70 // Absolute path where analysis JSON output will be stored.
74 // The file was either in the source tree nor objdir. It might be a system
75 // include, for example.
77 // A file from the source tree.
79 // A file from the objdir.
83 // Takes an absolute path to a file, and returns the type of file it is. If
84 // it's a Source or Generated file, the provided inout path argument is modified
85 // in-place so that it is relative to the source dir or objdir, respectively.
86 FileType
relativizePath(std::string
& path
) {
87 if (path
.compare(0, Objdir
.length(), Objdir
) == 0) {
88 path
.replace(0, Objdir
.length(), GENERATED
);
89 return FileType::Generated
;
91 // Empty filenames can get turned into Srcdir when they are resolved as
92 // absolute paths, so we should exclude files that are exactly equal to
93 // Srcdir or anything outside Srcdir.
94 if (path
.length() > Srcdir
.length() && path
.compare(0, Srcdir
.length(), Srcdir
) == 0) {
95 // Remove the trailing `/' as well.
96 path
.erase(0, Srcdir
.length() + 1);
97 return FileType::Source
;
99 return FileType::Unknown
;
102 #if !defined(_WIN32) && !defined(_WIN64)
103 #include <sys/time.h>
105 static double time() {
107 gettimeofday(&Tv
, nullptr);
108 return double(Tv
.tv_sec
) + double(Tv
.tv_usec
) / 1000000.;
112 // Return true if |input| is a valid C++ identifier. We don't want to generate
113 // analysis information for operators, string literals, etc. by accident since
114 // it trips up consumers of the data.
115 static bool isValidIdentifier(std::string Input
) {
116 for (char C
: Input
) {
117 if (!(isalpha(C
) || isdigit(C
) || C
== '_')) {
125 RAIITracer(const char *log
) : mLog(log
) {
126 printf("<%s>\n", mLog
);
130 printf("</%s>\n", mLog
);
136 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
140 // For each C++ file seen by the analysis (.cpp or .h), we track a
141 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
142 // it's in the source dir or the objdir). We also store the analysis output
145 FileInfo(std::string
&Rname
) : Realname(Rname
) {
146 switch (relativizePath(Realname
)) {
147 case FileType::Generated
:
151 case FileType::Source
:
155 case FileType::Unknown
:
161 std::string Realname
;
162 std::vector
<std::string
> Output
;
169 class PreprocessorHook
: public PPCallbacks
{
170 IndexConsumer
*Indexer
;
173 PreprocessorHook(IndexConsumer
*C
) : Indexer(C
) {}
175 virtual void FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
176 SrcMgr::CharacteristicKind FileType
,
177 FileID PrevFID
) override
;
179 virtual void InclusionDirective(SourceLocation HashLoc
,
180 const Token
&IncludeTok
,
183 CharSourceRange FileNameRange
,
184 #if CLANG_VERSION_MAJOR >= 16
185 OptionalFileEntryRef File
,
186 #elif CLANG_VERSION_MAJOR >= 15
187 Optional
<FileEntryRef
> File
,
189 const FileEntry
*File
,
191 StringRef SearchPath
,
192 StringRef RelativePath
,
193 const Module
*Imported
,
194 SrcMgr::CharacteristicKind FileType
) override
;
196 virtual void MacroDefined(const Token
&Tok
,
197 const MacroDirective
*Md
) override
;
199 virtual void MacroExpands(const Token
&Tok
, const MacroDefinition
&Md
,
200 SourceRange Range
, const MacroArgs
*Ma
) override
;
201 virtual void MacroUndefined(const Token
&Tok
, const MacroDefinition
&Md
,
202 const MacroDirective
*Undef
) override
;
203 virtual void Defined(const Token
&Tok
, const MacroDefinition
&Md
,
204 SourceRange Range
) override
;
205 virtual void Ifdef(SourceLocation Loc
, const Token
&Tok
,
206 const MacroDefinition
&Md
) override
;
207 virtual void Ifndef(SourceLocation Loc
, const Token
&Tok
,
208 const MacroDefinition
&Md
) override
;
211 class IndexConsumer
: public ASTConsumer
,
212 public RecursiveASTVisitor
<IndexConsumer
>,
213 public DiagnosticConsumer
{
215 CompilerInstance
&CI
;
218 std::map
<FileID
, std::unique_ptr
<FileInfo
>> FileMap
;
219 MangleContext
*CurMangleContext
;
220 ASTContext
*AstContext
;
221 std::unique_ptr
<clangd::HeuristicResolver
> Resolver
;
223 typedef RecursiveASTVisitor
<IndexConsumer
> Super
;
225 // Tracks the set of declarations that the current expression/statement is
227 struct AutoSetContext
{
228 AutoSetContext(IndexConsumer
*Self
, NamedDecl
*Context
, bool VisitImplicit
= false)
229 : Self(Self
), Prev(Self
->CurDeclContext
), Decl(Context
) {
230 this->VisitImplicit
= VisitImplicit
|| (Prev
? Prev
->VisitImplicit
: false);
231 Self
->CurDeclContext
= this;
234 ~AutoSetContext() { Self
->CurDeclContext
= Prev
; }
237 AutoSetContext
*Prev
;
241 AutoSetContext
*CurDeclContext
;
243 FileInfo
*getFileInfo(SourceLocation Loc
) {
244 FileID Id
= SM
.getFileID(Loc
);
246 std::map
<FileID
, std::unique_ptr
<FileInfo
>>::iterator It
;
247 It
= FileMap
.find(Id
);
248 if (It
== FileMap
.end()) {
249 // We haven't seen this file before. We need to make the FileInfo
250 // structure information ourselves
251 std::string Filename
= std::string(SM
.getFilename(Loc
));
252 std::string Absolute
;
253 // If Loc is a macro id rather than a file id, it Filename might be
254 // empty. Also for some types of file locations that are clang-internal
255 // like "<scratch>" it can return an empty Filename. In these cases we
256 // want to leave Absolute as empty.
257 if (!Filename
.empty()) {
258 Absolute
= getAbsolutePath(Filename
);
259 if (Absolute
.empty()) {
263 std::unique_ptr
<FileInfo
> Info
= make_unique
<FileInfo
>(Absolute
);
264 It
= FileMap
.insert(std::make_pair(Id
, std::move(Info
))).first
;
266 return It
->second
.get();
269 // Helpers for processing declarations
270 // Should we ignore this location?
271 bool isInterestingLocation(SourceLocation Loc
) {
272 if (Loc
.isInvalid()) {
276 return getFileInfo(Loc
)->Interesting
;
279 // Convert location to "line:column" or "line:column-column" given length.
280 // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
281 // column is 0-based and unpadded.
282 std::string
locationToString(SourceLocation Loc
, size_t Length
= 0) {
283 std::pair
<FileID
, unsigned> Pair
= SM
.getDecomposedLoc(Loc
);
286 unsigned Line
= SM
.getLineNumber(Pair
.first
, Pair
.second
, &IsInvalid
);
290 unsigned Column
= SM
.getColumnNumber(Pair
.first
, Pair
.second
, &IsInvalid
);
296 return stringFormat("%05d:%d-%d", Line
, Column
- 1, Column
- 1 + Length
);
298 return stringFormat("%05d:%d", Line
, Column
- 1);
302 // Convert SourceRange to "line-line".
303 // In the resulting string rep, line is 1-based.
304 std::string
lineRangeToString(SourceRange Range
) {
305 std::pair
<FileID
, unsigned> Begin
= SM
.getDecomposedLoc(Range
.getBegin());
306 std::pair
<FileID
, unsigned> End
= SM
.getDecomposedLoc(Range
.getEnd());
309 unsigned Line1
= SM
.getLineNumber(Begin
.first
, Begin
.second
, &IsInvalid
);
313 unsigned Line2
= SM
.getLineNumber(End
.first
, End
.second
, &IsInvalid
);
318 return stringFormat("%d-%d", Line1
, Line2
);
321 // Convert SourceRange to "line:column-line:column".
322 // In the resulting string rep, line is 1-based, column is 0-based.
323 std::string
fullRangeToString(SourceRange Range
) {
324 std::pair
<FileID
, unsigned> Begin
= SM
.getDecomposedLoc(Range
.getBegin());
325 std::pair
<FileID
, unsigned> End
= SM
.getDecomposedLoc(Range
.getEnd());
328 unsigned Line1
= SM
.getLineNumber(Begin
.first
, Begin
.second
, &IsInvalid
);
332 unsigned Column1
= SM
.getColumnNumber(Begin
.first
, Begin
.second
, &IsInvalid
);
336 unsigned Line2
= SM
.getLineNumber(End
.first
, End
.second
, &IsInvalid
);
340 unsigned Column2
= SM
.getColumnNumber(End
.first
, End
.second
, &IsInvalid
);
345 return stringFormat("%d:%d-%d:%d", Line1
, Column1
- 1, Line2
, Column2
- 1);
348 // Returns the qualified name of `d` without considering template parameters.
349 std::string
getQualifiedName(const NamedDecl
*D
) {
350 const DeclContext
*Ctx
= D
->getDeclContext();
351 if (Ctx
->isFunctionOrMethod()) {
352 return D
->getQualifiedNameAsString();
355 std::vector
<const DeclContext
*> Contexts
;
358 while (Ctx
&& isa
<NamedDecl
>(Ctx
)) {
359 Contexts
.push_back(Ctx
);
360 Ctx
= Ctx
->getParent();
365 std::reverse(Contexts
.begin(), Contexts
.end());
367 for (const DeclContext
*DC
: Contexts
) {
368 if (const auto *Spec
= dyn_cast
<ClassTemplateSpecializationDecl
>(DC
)) {
369 Result
+= Spec
->getNameAsString();
371 if (Spec
->getSpecializationKind() == TSK_ExplicitSpecialization
) {
373 llvm::raw_string_ostream
Stream(Backing
);
374 const TemplateArgumentList
&TemplateArgs
= Spec
->getTemplateArgs();
375 printTemplateArgumentList(
376 Stream
, TemplateArgs
.asArray(), PrintingPolicy(CI
.getLangOpts()));
377 Result
+= Stream
.str();
379 } else if (const auto *Nd
= dyn_cast
<NamespaceDecl
>(DC
)) {
380 if (Nd
->isAnonymousNamespace() || Nd
->isInline()) {
383 Result
+= Nd
->getNameAsString();
384 } else if (const auto *Rd
= dyn_cast
<RecordDecl
>(DC
)) {
385 if (!Rd
->getIdentifier()) {
386 Result
+= "(anonymous)";
388 Result
+= Rd
->getNameAsString();
390 } else if (const auto *Fd
= dyn_cast
<FunctionDecl
>(DC
)) {
391 Result
+= Fd
->getNameAsString();
392 } else if (const auto *Ed
= dyn_cast
<EnumDecl
>(DC
)) {
393 // C++ [dcl.enum]p10: Each enum-name and each unscoped
394 // enumerator is declared in the scope that immediately contains
395 // the enum-specifier. Each scoped enumerator is declared in the
396 // scope of the enumeration.
397 if (Ed
->isScoped() || Ed
->getIdentifier())
398 Result
+= Ed
->getNameAsString();
402 Result
+= cast
<NamedDecl
>(DC
)->getNameAsString();
407 if (D
->getDeclName())
408 Result
+= D
->getNameAsString();
410 Result
+= "(anonymous)";
415 std::string
mangleLocation(SourceLocation Loc
,
416 std::string Backup
= std::string()) {
417 FileInfo
*F
= getFileInfo(Loc
);
418 std::string Filename
= F
->Realname
;
419 if (Filename
.length() == 0 && Backup
.length() != 0) {
423 // Since generated files may be different on different platforms,
424 // we need to include a platform-specific thing in the hash. Otherwise
425 // we can end up with hash collisions where different symbols from
426 // different platforms map to the same thing.
427 char* Platform
= getenv("MOZSEARCH_PLATFORM");
428 Filename
= std::string(Platform
? Platform
: "") + std::string("@") + Filename
;
430 return hash(Filename
+ std::string("@") + locationToString(Loc
));
433 bool isAcceptableSymbolChar(char c
) {
434 return isalpha(c
) || isdigit(c
) || c
== '_' || c
== '/';
437 std::string
mangleFile(std::string Filename
, FileType Type
) {
438 // "Mangle" the file path, such that:
439 // 1. The majority of paths will still be mostly human-readable.
440 // 2. The sanitization algorithm doesn't produce collisions where two
441 // different unsanitized paths can result in the same sanitized paths.
442 // 3. The produced symbol doesn't cause problems with downstream consumers.
443 // In order to accomplish this, we keep alphanumeric chars, underscores,
444 // and slashes, and replace everything else with an "@xx" hex encoding.
445 // The majority of path characters are letters and slashes which don't get
446 // encoded, so that satisfies (1). Since "@" characters in the unsanitized
447 // path get encoded, there should be no "@" characters in the sanitized path
448 // that got preserved from the unsanitized input, so that should satisfy (2).
449 // And (3) was done by trial-and-error. Note in particular the dot (.)
450 // character needs to be encoded, or the symbol-search feature of mozsearch
451 // doesn't work correctly, as all dot characters in the symbol query get
453 for (size_t i
= 0; i
< Filename
.length(); i
++) {
454 char c
= Filename
[i
];
455 if (isAcceptableSymbolChar(c
)) {
459 sprintf(hex
, "@%02X", ((int)c
) & 0xFF);
460 Filename
.replace(i
, 1, hex
);
464 if (Type
== FileType::Generated
) {
465 // Since generated files may be different on different platforms,
466 // we need to include a platform-specific thing in the hash. Otherwise
467 // we can end up with hash collisions where different symbols from
468 // different platforms map to the same thing.
469 char* Platform
= getenv("MOZSEARCH_PLATFORM");
470 Filename
= std::string(Platform
? Platform
: "") + std::string("@") + Filename
;
475 std::string
mangleQualifiedName(std::string Name
) {
476 std::replace(Name
.begin(), Name
.end(), ' ', '_');
480 std::string
getMangledName(clang::MangleContext
*Ctx
,
481 const clang::NamedDecl
*Decl
) {
482 if (isa
<FunctionDecl
>(Decl
) && cast
<FunctionDecl
>(Decl
)->isExternC()) {
483 return cast
<FunctionDecl
>(Decl
)->getNameAsString();
486 if (isa
<FunctionDecl
>(Decl
) || isa
<VarDecl
>(Decl
)) {
487 const DeclContext
*DC
= Decl
->getDeclContext();
488 if (isa
<TranslationUnitDecl
>(DC
) || isa
<NamespaceDecl
>(DC
) ||
489 isa
<LinkageSpecDecl
>(DC
) ||
490 // isa<ExternCContextDecl>(DC) ||
492 llvm::SmallVector
<char, 512> Output
;
493 llvm::raw_svector_ostream
Out(Output
);
494 #if CLANG_VERSION_MAJOR >= 11
495 // This code changed upstream in version 11:
496 // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
498 if (const CXXConstructorDecl
*D
= dyn_cast
<CXXConstructorDecl
>(Decl
)) {
499 GD
= GlobalDecl(D
, Ctor_Complete
);
500 } else if (const CXXDestructorDecl
*D
=
501 dyn_cast
<CXXDestructorDecl
>(Decl
)) {
502 GD
= GlobalDecl(D
, Dtor_Complete
);
504 GD
= GlobalDecl(Decl
);
506 Ctx
->mangleName(GD
, Out
);
508 if (const CXXConstructorDecl
*D
= dyn_cast
<CXXConstructorDecl
>(Decl
)) {
509 Ctx
->mangleCXXCtor(D
, CXXCtorType::Ctor_Complete
, Out
);
510 } else if (const CXXDestructorDecl
*D
=
511 dyn_cast
<CXXDestructorDecl
>(Decl
)) {
512 Ctx
->mangleCXXDtor(D
, CXXDtorType::Dtor_Complete
, Out
);
514 Ctx
->mangleName(Decl
, Out
);
517 return Out
.str().str();
519 return std::string("V_") + mangleLocation(Decl
->getLocation()) +
520 std::string("_") + hash(std::string(Decl
->getName()));
522 } else if (isa
<TagDecl
>(Decl
) || isa
<TypedefNameDecl
>(Decl
) ||
523 isa
<ObjCInterfaceDecl
>(Decl
)) {
524 if (!Decl
->getIdentifier()) {
526 return std::string("T_") + mangleLocation(Decl
->getLocation());
529 return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl
));
530 } else if (isa
<NamespaceDecl
>(Decl
) || isa
<NamespaceAliasDecl
>(Decl
)) {
531 if (!Decl
->getIdentifier()) {
533 return std::string("NS_") + mangleLocation(Decl
->getLocation());
536 return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl
));
537 } else if (const ObjCIvarDecl
*D2
= dyn_cast
<ObjCIvarDecl
>(Decl
)) {
538 const ObjCInterfaceDecl
*Iface
= D2
->getContainingInterface();
539 return std::string("F_<") + getMangledName(Ctx
, Iface
) + ">_" +
540 D2
->getNameAsString();
541 } else if (const FieldDecl
*D2
= dyn_cast
<FieldDecl
>(Decl
)) {
542 const RecordDecl
*Record
= D2
->getParent();
543 return std::string("F_<") + getMangledName(Ctx
, Record
) + ">_" +
544 D2
->getNameAsString();
545 } else if (const EnumConstantDecl
*D2
= dyn_cast
<EnumConstantDecl
>(Decl
)) {
546 const DeclContext
*DC
= Decl
->getDeclContext();
547 if (const NamedDecl
*Named
= dyn_cast
<NamedDecl
>(DC
)) {
548 return std::string("E_<") + getMangledName(Ctx
, Named
) + ">_" +
549 D2
->getNameAsString();
554 return std::string("");
557 void debugLocation(SourceLocation Loc
) {
558 std::string S
= locationToString(Loc
);
559 StringRef Filename
= SM
.getFilename(Loc
);
560 printf("--> %s %s\n", std::string(Filename
).c_str(), S
.c_str());
563 void debugRange(SourceRange Range
) {
565 debugLocation(Range
.getBegin());
566 debugLocation(Range
.getEnd());
570 IndexConsumer(CompilerInstance
&CI
)
571 : CI(CI
), SM(CI
.getSourceManager()), LO(CI
.getLangOpts()), CurMangleContext(nullptr),
572 AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
573 CI
.getPreprocessor().addPPCallbacks(
574 make_unique
<PreprocessorHook
>(this));
577 virtual DiagnosticConsumer
*clone(DiagnosticsEngine
&Diags
) const {
578 return new IndexConsumer(CI
);
581 #if !defined(_WIN32) && !defined(_WIN64)
583 AutoTime(double *Counter
) : Counter(Counter
), Start(time()) {}
586 *Counter
+= time() - Start
;
590 *Counter
+= time() - Start
;
598 // All we need is to follow the final declaration.
599 virtual void HandleTranslationUnit(ASTContext
&Ctx
) {
601 clang::ItaniumMangleContext::create(Ctx
, CI
.getDiagnostics());
604 Resolver
= std::make_unique
<clangd::HeuristicResolver
>(Ctx
);
605 TraverseDecl(Ctx
.getTranslationUnitDecl());
607 // Emit the JSON data for all files now.
608 std::map
<FileID
, std::unique_ptr
<FileInfo
>>::iterator It
;
609 for (It
= FileMap
.begin(); It
!= FileMap
.end(); It
++) {
610 if (!It
->second
->Interesting
) {
614 FileInfo
&Info
= *It
->second
;
616 std::string Filename
= Outdir
+ Info
.Realname
;
617 std::string SrcFilename
= Info
.Generated
618 ? Objdir
+ Info
.Realname
.substr(GENERATED
.length())
619 : Srcdir
+ PATHSEP_STRING
+ Info
.Realname
;
621 ensurePath(Filename
);
623 // We lock the output file in case some other clang process is trying to
624 // write to it at the same time.
625 AutoLockFile
Lock(SrcFilename
, Filename
);
627 if (!Lock
.success()) {
628 fprintf(stderr
, "Unable to lock file %s\n", Filename
.c_str());
632 // Merge our results with the existing lines from the output file.
633 // This ensures that header files that are included multiple times
634 // in different ways are analyzed completely.
635 std::ifstream
Fin(Filename
.c_str(), std::ios::in
| std::ios::binary
);
636 FILE *OutFp
= Lock
.openTmp();
638 fprintf(stderr
, "Unable to open tmp out file for %s\n", Filename
.c_str());
642 // Sort our new results and get an iterator to them
643 std::sort(Info
.Output
.begin(), Info
.Output
.end());
644 std::vector
<std::string
>::const_iterator NewLinesIter
= Info
.Output
.begin();
645 std::string LastNewWritten
;
647 // Loop over the existing (sorted) lines in the analysis output file.
648 // (The good() check also handles the case where Fin did not exist when we
652 std::getline(Fin
, OldLine
);
654 if (OldLine
.length() == 0) {
657 // We need to put the newlines back that getline() eats.
658 OldLine
.push_back('\n');
660 // Write any results from Info.Output that are lexicographically
661 // smaller than OldLine (read from the existing file), but make sure
662 // to skip duplicates. Keep advancing NewLinesIter until we reach an
663 // entry that is lexicographically greater than OldLine.
664 for (; NewLinesIter
!= Info
.Output
.end(); NewLinesIter
++) {
665 if (*NewLinesIter
> OldLine
) {
668 if (*NewLinesIter
== OldLine
) {
671 if (*NewLinesIter
== LastNewWritten
) {
672 // dedupe the new entries being written
675 if (fwrite(NewLinesIter
->c_str(), NewLinesIter
->length(), 1, OutFp
) != 1) {
676 fprintf(stderr
, "Unable to write %zu bytes[1] to tmp output file for %s\n",
677 NewLinesIter
->length(), Filename
.c_str());
680 LastNewWritten
= *NewLinesIter
;
683 // Write the entry read from the existing file.
684 if (fwrite(OldLine
.c_str(), OldLine
.length(), 1, OutFp
) != 1) {
685 fprintf(stderr
, "Unable to write %zu bytes[2] to tmp output file for %s\n",
686 OldLine
.length(), Filename
.c_str());
691 // We finished reading from Fin
694 // Finish iterating our new results, discarding duplicates
695 for (; NewLinesIter
!= Info
.Output
.end(); NewLinesIter
++) {
696 if (*NewLinesIter
== LastNewWritten
) {
699 if (fwrite(NewLinesIter
->c_str(), NewLinesIter
->length(), 1, OutFp
) != 1) {
700 fprintf(stderr
, "Unable to write %zu bytes[3] to tmp output file for %s\n",
701 NewLinesIter
->length(), Filename
.c_str());
704 LastNewWritten
= *NewLinesIter
;
707 // Done writing all the things, close it and replace the old output file
710 if (!Lock
.moveTmp()) {
711 fprintf(stderr
, "Unable to move tmp output file into place for %s (err %d)\n", Filename
.c_str(), errno
);
717 // Unfortunately, we have to override all these methods in order to track the
718 // context we're inside.
720 bool TraverseEnumDecl(EnumDecl
*D
) {
721 AutoSetContext
Asc(this, D
);
722 return Super::TraverseEnumDecl(D
);
724 bool TraverseRecordDecl(RecordDecl
*D
) {
725 AutoSetContext
Asc(this, D
);
726 return Super::TraverseRecordDecl(D
);
728 bool TraverseCXXRecordDecl(CXXRecordDecl
*D
) {
729 AutoSetContext
Asc(this, D
);
730 return Super::TraverseCXXRecordDecl(D
);
732 bool TraverseFunctionDecl(FunctionDecl
*D
) {
733 AutoSetContext
Asc(this, D
);
734 const FunctionDecl
*Def
;
735 // (See the larger AutoTemplateContext comment for more information.) If a
736 // method on a templated class is declared out-of-line, we need to analyze
737 // the definition inside the scope of the template or else we won't properly
738 // handle member access on the templated type.
739 if (TemplateStack
&& D
->isDefined(Def
) && Def
&& D
!= Def
) {
740 TraverseFunctionDecl(const_cast<FunctionDecl
*>(Def
));
742 return Super::TraverseFunctionDecl(D
);
744 bool TraverseCXXMethodDecl(CXXMethodDecl
*D
) {
745 AutoSetContext
Asc(this, D
);
746 const FunctionDecl
*Def
;
747 // See TraverseFunctionDecl.
748 if (TemplateStack
&& D
->isDefined(Def
) && Def
&& D
!= Def
) {
749 TraverseFunctionDecl(const_cast<FunctionDecl
*>(Def
));
751 return Super::TraverseCXXMethodDecl(D
);
753 bool TraverseCXXConstructorDecl(CXXConstructorDecl
*D
) {
754 AutoSetContext
Asc(this, D
, /*VisitImplicit=*/true);
755 const FunctionDecl
*Def
;
756 // See TraverseFunctionDecl.
757 if (TemplateStack
&& D
->isDefined(Def
) && Def
&& D
!= Def
) {
758 TraverseFunctionDecl(const_cast<FunctionDecl
*>(Def
));
760 return Super::TraverseCXXConstructorDecl(D
);
762 bool TraverseCXXConversionDecl(CXXConversionDecl
*D
) {
763 AutoSetContext
Asc(this, D
);
764 const FunctionDecl
*Def
;
765 // See TraverseFunctionDecl.
766 if (TemplateStack
&& D
->isDefined(Def
) && Def
&& D
!= Def
) {
767 TraverseFunctionDecl(const_cast<FunctionDecl
*>(Def
));
769 return Super::TraverseCXXConversionDecl(D
);
771 bool TraverseCXXDestructorDecl(CXXDestructorDecl
*D
) {
772 AutoSetContext
Asc(this, D
);
773 const FunctionDecl
*Def
;
774 // See TraverseFunctionDecl.
775 if (TemplateStack
&& D
->isDefined(Def
) && Def
&& D
!= Def
) {
776 TraverseFunctionDecl(const_cast<FunctionDecl
*>(Def
));
778 return Super::TraverseCXXDestructorDecl(D
);
781 // Used to keep track of the context in which a token appears.
783 // Ultimately this becomes the "context" JSON property.
786 // Ultimately this becomes the "contextsym" JSON property.
790 Context(std::string Name
, std::string Symbol
)
791 : Name(Name
), Symbol(Symbol
) {}
794 Context
translateContext(NamedDecl
*D
) {
795 const FunctionDecl
*F
= dyn_cast
<FunctionDecl
>(D
);
796 if (F
&& F
->isTemplateInstantiation()) {
797 D
= F
->getTemplateInstantiationPattern();
800 return Context(D
->getQualifiedNameAsString(), getMangledName(CurMangleContext
, D
));
803 Context
getContext(SourceLocation Loc
) {
804 if (SM
.isMacroBodyExpansion(Loc
)) {
805 // If we're inside a macro definition, we don't return any context. It
806 // will probably not be what the user expects if we do.
810 if (CurDeclContext
) {
811 return translateContext(CurDeclContext
->Decl
);
816 // Similar to GetContext(SourceLocation), but it skips the declaration passed
817 // in. This is useful if we want the context of a declaration that's already
819 Context
getContext(Decl
*D
) {
820 if (SM
.isMacroBodyExpansion(D
->getLocation())) {
821 // If we're inside a macro definition, we don't return any context. It
822 // will probably not be what the user expects if we do.
826 AutoSetContext
*Ctxt
= CurDeclContext
;
828 if (Ctxt
->Decl
!= D
) {
829 return translateContext(Ctxt
->Decl
);
836 // Analyzing template code is tricky. Suppose we have this code:
839 // bool Foo(T* ptr) { return T::StaticMethod(ptr); }
841 // If we analyze the body of Foo without knowing the type T, then we will not
842 // be able to generate any information for StaticMethod. However, analyzing
843 // Foo for every possible instantiation is inefficient and it also generates
844 // too much data in some cases. For example, the following code would generate
845 // one definition of Baz for every instantiation, which is undesirable:
848 // class Bar { struct Baz { ... }; };
850 // To solve this problem, we analyze templates only once. We do so in a
851 // GatherDependent mode where we look for "dependent scoped member
852 // expressions" (i.e., things like StaticMethod). We keep track of the
853 // locations of these expressions. If we find one or more of them, we analyze
854 // the template for each instantiation, in an AnalyzeDependent mode. This mode
855 // ignores all source locations except for the ones where we found dependent
856 // scoped member expressions before. For these locations, we generate a
857 // separate JSON result for each instantiation.
859 // We inherit our parent's mode if it is exists. This is because if our
860 // parent is in analyze mode, it means we've already lived a full life in
861 // gather mode and we must not restart in gather mode or we'll cause the
862 // indexer to visit EVERY identifier, which is way too much data.
863 struct AutoTemplateContext
{
864 AutoTemplateContext(IndexConsumer
*Self
)
866 , CurMode(Self
->TemplateStack
? Self
->TemplateStack
->CurMode
: Mode::GatherDependent
)
867 , Parent(Self
->TemplateStack
) {
868 Self
->TemplateStack
= this;
871 ~AutoTemplateContext() { Self
->TemplateStack
= Parent
; }
873 // We traverse templates in two modes:
875 // Gather mode does not traverse into specializations. It looks for
876 // locations where it would help to have more info from template
880 // Analyze mode traverses into template specializations and records
881 // information about token locations saved in gather mode.
885 // We found a dependent scoped member expression! Keep track of it for
887 void visitDependent(SourceLocation Loc
) {
888 if (CurMode
== Mode::AnalyzeDependent
) {
892 DependentLocations
.insert(Loc
.getRawEncoding());
894 Parent
->visitDependent(Loc
);
898 bool inGatherMode() {
899 return CurMode
== Mode::GatherDependent
;
902 // Do we need to perform the extra AnalyzeDependent passes (one per
904 bool needsAnalysis() const {
905 if (!DependentLocations
.empty()) {
909 return Parent
->needsAnalysis();
914 void switchMode() { CurMode
= Mode::AnalyzeDependent
; }
916 // Do we want to analyze each template instantiation separately?
917 bool shouldVisitTemplateInstantiations() const {
918 if (CurMode
== Mode::AnalyzeDependent
) {
922 return Parent
->shouldVisitTemplateInstantiations();
927 // For a given expression/statement, should we emit JSON data for it?
928 bool shouldVisit(SourceLocation Loc
) {
929 if (CurMode
== Mode::GatherDependent
) {
932 if (DependentLocations
.find(Loc
.getRawEncoding()) !=
933 DependentLocations
.end()) {
937 return Parent
->shouldVisit(Loc
);
945 std::unordered_set
<unsigned> DependentLocations
;
946 AutoTemplateContext
*Parent
;
949 AutoTemplateContext
*TemplateStack
;
951 bool shouldVisitTemplateInstantiations() const {
953 return TemplateStack
->shouldVisitTemplateInstantiations();
958 bool shouldVisitImplicitCode() const {
959 return CurDeclContext
&& CurDeclContext
->VisitImplicit
;
962 bool TraverseClassTemplateDecl(ClassTemplateDecl
*D
) {
963 AutoTemplateContext
Atc(this);
964 Super::TraverseClassTemplateDecl(D
);
966 if (!Atc
.needsAnalysis()) {
972 if (D
!= D
->getCanonicalDecl()) {
976 for (auto *Spec
: D
->specializations()) {
977 for (auto *Rd
: Spec
->redecls()) {
978 // We don't want to visit injected-class-names in this traversal.
979 if (cast
<CXXRecordDecl
>(Rd
)->isInjectedClassName())
989 bool TraverseFunctionTemplateDecl(FunctionTemplateDecl
*D
) {
990 AutoTemplateContext
Atc(this);
991 if (Atc
.inGatherMode()) {
992 Super::TraverseFunctionTemplateDecl(D
);
995 if (!Atc
.needsAnalysis()) {
1001 if (D
!= D
->getCanonicalDecl()) {
1005 for (auto *Spec
: D
->specializations()) {
1006 for (auto *Rd
: Spec
->redecls()) {
1014 bool shouldVisit(SourceLocation Loc
) {
1015 if (TemplateStack
) {
1016 return TemplateStack
->shouldVisit(Loc
);
1022 // Flag to omit the identifier from being cross-referenced across files.
1023 // This is usually desired for local variables.
1024 NoCrossref
= 1 << 0,
1025 // Flag to indicate the token with analysis data is not an identifier. Indicates
1026 // we want to skip the check that tries to ensure a sane identifier token.
1027 NotIdentifierToken
= 1 << 1,
1028 // This indicates that the end of the provided SourceRange is valid and
1029 // should be respected. If this flag is not set, the visitIdentifier
1030 // function should use only the start of the SourceRange and auto-detect
1031 // the end based on whatever token is found at the start.
1032 LocRangeEndValid
= 1 << 2
1035 void emitStructuredInfo(SourceLocation Loc
, const RecordDecl
*decl
) {
1036 std::string json_str
;
1037 llvm::raw_string_ostream
ros(json_str
);
1038 llvm::json::OStream
J(ros
);
1039 // Start the top-level object.
1042 unsigned StartOffset
= SM
.getFileOffset(Loc
);
1043 unsigned EndOffset
=
1044 StartOffset
+ Lexer::MeasureTokenLength(Loc
, SM
, CI
.getLangOpts());
1045 J
.attribute("loc", locationToString(Loc
, EndOffset
- StartOffset
));
1046 J
.attribute("structured", 1);
1047 J
.attribute("pretty", getQualifiedName(decl
));
1048 J
.attribute("sym", getMangledName(CurMangleContext
, decl
));
1050 J
.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl
->getTagKind()));
1052 const ASTContext
&C
= *AstContext
;
1053 const ASTRecordLayout
&Layout
= C
.getASTRecordLayout(decl
);
1055 J
.attribute("sizeBytes", Layout
.getSize().getQuantity());
1057 auto cxxDecl
= dyn_cast
<CXXRecordDecl
>(decl
);
1060 J
.attributeBegin("supers");
1062 for (const CXXBaseSpecifier
&Base
: cxxDecl
->bases()) {
1063 const CXXRecordDecl
*BaseDecl
= Base
.getType()->getAsCXXRecordDecl();
1067 J
.attribute("pretty", getQualifiedName(BaseDecl
));
1068 J
.attribute("sym", getMangledName(CurMangleContext
, BaseDecl
));
1070 J
.attributeBegin("props");
1072 if (Base
.isVirtual()) {
1083 J
.attributeBegin("methods");
1085 for (const CXXMethodDecl
*MethodDecl
: cxxDecl
->methods()) {
1088 J
.attribute("pretty", getQualifiedName(MethodDecl
));
1089 J
.attribute("sym", getMangledName(CurMangleContext
, MethodDecl
));
1091 // TODO: Better figure out what to do for non-isUserProvided methods
1092 // which means there's potentially semantic data that doesn't correspond
1093 // to a source location in the source. Should we be emitting
1094 // structured info for those when we're processing the class here?
1096 J
.attributeBegin("props");
1098 if (MethodDecl
->isStatic()) {
1101 if (MethodDecl
->isInstance()) {
1102 J
.value("instance");
1104 if (MethodDecl
->isVirtual()) {
1107 if (MethodDecl
->isUserProvided()) {
1110 if (MethodDecl
->isDefaulted()) {
1111 J
.value("defaulted");
1113 if (MethodDecl
->isDeleted()) {
1116 if (MethodDecl
->isConstexpr()) {
1117 J
.value("constexpr");
1128 J
.attributeBegin("fields");
1130 uint64_t iField
= 0;
1131 for (RecordDecl::field_iterator It
= decl
->field_begin(),
1132 End
= decl
->field_end(); It
!= End
; ++It
, ++iField
) {
1133 const FieldDecl
&Field
= **It
;
1134 uint64_t localOffsetBits
= Layout
.getFieldOffset(iField
);
1135 CharUnits localOffsetBytes
= C
.toCharUnitsFromBits(localOffsetBits
);
1138 J
.attribute("pretty", getQualifiedName(&Field
));
1139 J
.attribute("sym", getMangledName(CurMangleContext
, &Field
));
1140 QualType FieldType
= Field
.getType();
1141 J
.attribute("type", FieldType
.getAsString());
1142 QualType CanonicalFieldType
= FieldType
.getCanonicalType();
1143 const TagDecl
*tagDecl
= CanonicalFieldType
->getAsTagDecl();
1145 J
.attribute("typesym", getMangledName(CurMangleContext
, tagDecl
));
1147 J
.attribute("offsetBytes", localOffsetBytes
.getQuantity());
1148 if (Field
.isBitField()) {
1149 J
.attributeBegin("bitPositions");
1152 J
.attribute("begin", unsigned(localOffsetBits
- C
.toBits(localOffsetBytes
)));
1153 J
.attribute("width", Field
.getBitWidthValue(C
));
1158 // Try and get the field as a record itself so we can know its size, but
1159 // we don't actually want to recurse into it.
1160 if (auto FieldRec
= Field
.getType()->getAs
<RecordType
>()) {
1161 auto const &FieldLayout
= C
.getASTRecordLayout(FieldRec
->getDecl());
1162 J
.attribute("sizeBytes", FieldLayout
.getSize().getQuantity());
1164 // We were unable to get it as a record, which suggests it's a normal
1165 // type, in which case let's just ask for the type size. (Maybe this
1166 // would also work for the above case too?)
1167 uint64_t typeSizeBits
= C
.getTypeSize(Field
.getType());
1168 CharUnits typeSizeBytes
= C
.toCharUnitsFromBits(typeSizeBits
);
1169 J
.attribute("sizeBytes", typeSizeBytes
.getQuantity());
1177 // End the top-level object.
1180 FileInfo
*F
= getFileInfo(Loc
);
1181 // we want a newline.
1183 F
->Output
.push_back(std::move(ros
.str()));
1186 void emitStructuredInfo(SourceLocation Loc
, const FunctionDecl
*decl
) {
1187 std::string json_str
;
1188 llvm::raw_string_ostream
ros(json_str
);
1189 llvm::json::OStream
J(ros
);
1190 // Start the top-level object.
1193 unsigned StartOffset
= SM
.getFileOffset(Loc
);
1194 unsigned EndOffset
=
1195 StartOffset
+ Lexer::MeasureTokenLength(Loc
, SM
, CI
.getLangOpts());
1196 J
.attribute("loc", locationToString(Loc
, EndOffset
- StartOffset
));
1197 J
.attribute("structured", 1);
1198 J
.attribute("pretty", getQualifiedName(decl
));
1199 J
.attribute("sym", getMangledName(CurMangleContext
, decl
));
1201 auto cxxDecl
= dyn_cast
<CXXMethodDecl
>(decl
);
1204 J
.attribute("kind", "method");
1205 if (auto parentDecl
= cxxDecl
->getParent()) {
1206 J
.attribute("parentsym", getMangledName(CurMangleContext
, parentDecl
));
1209 J
.attributeBegin("overrides");
1211 for (const CXXMethodDecl
*MethodDecl
: cxxDecl
->overridden_methods()) {
1214 // TODO: Make sure we're doing template traversals appropriately...
1215 // findOverriddenMethods (now removed) liked to do:
1216 // if (Decl->isTemplateInstantiation()) {
1217 // Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
1219 // I think our pre-emptive dereferencing/avoidance of templates may
1220 // protect us from this, but it needs more investigation.
1222 J
.attribute("pretty", getQualifiedName(MethodDecl
));
1223 J
.attribute("sym", getMangledName(CurMangleContext
, MethodDecl
));
1231 J
.attribute("kind", "function");
1235 J
.attributeBegin("props");
1237 // some of these are only possible on a CXXMethodDecl, but we want them all
1238 // in the same array, so condition these first ones.
1240 if (cxxDecl
->isStatic()) {
1243 if (cxxDecl
->isInstance()) {
1244 J
.value("instance");
1246 if (cxxDecl
->isVirtual()) {
1249 if (cxxDecl
->isUserProvided()) {
1253 if (decl
->isDefaulted()) {
1254 J
.value("defaulted");
1256 if (decl
->isDeleted()) {
1259 if (decl
->isConstexpr()) {
1260 J
.value("constexpr");
1265 // End the top-level object.
1268 FileInfo
*F
= getFileInfo(Loc
);
1269 // we want a newline.
1271 F
->Output
.push_back(std::move(ros
.str()));
1275 * Emit structured info for a field. Right now the intent is for this to just
1276 * be a pointer to its parent's structured info with this method entirely
1277 * avoiding getting the ASTRecordLayout.
1279 * TODO: Give more thought on where to locate the canonical info on fields and
1280 * how to normalize their exposure over the web. We could relink the info
1281 * both at cross-reference time and web-server lookup time. This is also
1282 * called out in `analysis.md`.
1284 void emitStructuredInfo(SourceLocation Loc
, const FieldDecl
*decl
) {
1285 // XXX the call to decl::getParent will assert below for ObjCIvarDecl
1286 // instances because their DecContext is not a RecordDecl. So just bail
1288 // TODO: better support ObjC.
1289 if (const ObjCIvarDecl
*D2
= dyn_cast
<ObjCIvarDecl
>(decl
)) {
1293 std::string json_str
;
1294 llvm::raw_string_ostream
ros(json_str
);
1295 llvm::json::OStream
J(ros
);
1296 // Start the top-level object.
1299 unsigned StartOffset
= SM
.getFileOffset(Loc
);
1300 unsigned EndOffset
=
1301 StartOffset
+ Lexer::MeasureTokenLength(Loc
, SM
, CI
.getLangOpts());
1302 J
.attribute("loc", locationToString(Loc
, EndOffset
- StartOffset
));
1303 J
.attribute("structured", 1);
1304 J
.attribute("pretty", getQualifiedName(decl
));
1305 J
.attribute("sym", getMangledName(CurMangleContext
, decl
));
1306 J
.attribute("kind", "field");
1308 if (auto parentDecl
= decl
->getParent()) {
1309 J
.attribute("parentsym", getMangledName(CurMangleContext
, parentDecl
));
1312 // End the top-level object.
1315 FileInfo
*F
= getFileInfo(Loc
);
1316 // we want a newline.
1318 F
->Output
.push_back(std::move(ros
.str()));
1321 // XXX Type annotating.
1322 // QualType is the type class. It has helpers like TagDecl via getAsTagDecl.
1323 // ValueDecl exposes a getType() method.
1325 // Arguably it makes sense to only expose types that Searchfox has definitions
1326 // for as first-class. Probably the way to go is like context/contextsym.
1327 // We expose a "type" which is just a human-readable string which has no
1328 // semantic purposes and is just a display string, plus then a "typesym" which
1329 // we expose if we were able to map the type.
1331 // Other meta-info: field offsets. Ancestor types.
1333 // This is the only function that emits analysis JSON data. It should be
1334 // called for each identifier that corresponds to a symbol.
1335 void visitIdentifier(const char *Kind
, const char *SyntaxKind
,
1336 llvm::StringRef QualName
, SourceRange LocRange
,
1338 QualType MaybeType
= QualType(),
1339 Context TokenContext
= Context(), int Flags
= 0,
1340 SourceRange PeekRange
= SourceRange(),
1341 SourceRange NestingRange
= SourceRange()) {
1342 SourceLocation Loc
= LocRange
.getBegin();
1343 if (!shouldVisit(Loc
)) {
1347 // Find the file positions corresponding to the token.
1348 unsigned StartOffset
= SM
.getFileOffset(Loc
);
1349 unsigned EndOffset
= (Flags
& LocRangeEndValid
)
1350 ? SM
.getFileOffset(LocRange
.getEnd())
1351 : StartOffset
+ Lexer::MeasureTokenLength(Loc
, SM
, CI
.getLangOpts());
1353 std::string LocStr
= locationToString(Loc
, EndOffset
- StartOffset
);
1354 std::string RangeStr
= locationToString(Loc
, EndOffset
- StartOffset
);
1355 std::string PeekRangeStr
;
1357 if (!(Flags
& NotIdentifierToken
)) {
1358 // Get the token's characters so we can make sure it's a valid token.
1359 const char *StartChars
= SM
.getCharacterData(Loc
);
1360 std::string
Text(StartChars
, EndOffset
- StartOffset
);
1361 if (!isValidIdentifier(Text
)) {
1366 FileInfo
*F
= getFileInfo(Loc
);
1368 if (!(Flags
& NoCrossref
)) {
1369 std::string json_str
;
1370 llvm::raw_string_ostream
ros(json_str
);
1371 llvm::json::OStream
J(ros
);
1372 // Start the top-level object.
1375 J
.attribute("loc", LocStr
);
1376 J
.attribute("target", 1);
1377 J
.attribute("kind", Kind
);
1378 J
.attribute("pretty", QualName
.data());
1379 J
.attribute("sym", Symbol
);
1380 if (!TokenContext
.Name
.empty()) {
1381 J
.attribute("context", TokenContext
.Name
);
1383 if (!TokenContext
.Symbol
.empty()) {
1384 J
.attribute("contextsym", TokenContext
.Symbol
);
1386 if (PeekRange
.isValid()) {
1387 PeekRangeStr
= lineRangeToString(PeekRange
);
1388 if (!PeekRangeStr
.empty()) {
1389 J
.attribute("peekRange", PeekRangeStr
);
1393 // End the top-level object.
1395 // we want a newline.
1397 F
->Output
.push_back(std::move(ros
.str()));
1400 // Generate a single "source":1 for all the symbols. If we search from here,
1401 // we want to union the results for every symbol in `symbols`.
1402 std::string json_str
;
1403 llvm::raw_string_ostream
ros(json_str
);
1404 llvm::json::OStream
J(ros
);
1405 // Start the top-level object.
1408 J
.attribute("loc", RangeStr
);
1409 J
.attribute("source", 1);
1411 if (NestingRange
.isValid()) {
1412 std::string NestingRangeStr
= fullRangeToString(NestingRange
);
1413 if (!NestingRangeStr
.empty()) {
1414 J
.attribute("nestingRange", NestingRangeStr
);
1419 if (Flags
& NoCrossref
) {
1420 J
.attribute("syntax", "");
1423 Syntax
.push_back(',');
1424 Syntax
.append(SyntaxKind
);
1425 J
.attribute("syntax", Syntax
);
1428 if (!MaybeType
.isNull()) {
1429 J
.attribute("type", MaybeType
.getAsString());
1430 QualType canonical
= MaybeType
.getCanonicalType();
1431 const TagDecl
*decl
= canonical
->getAsTagDecl();
1433 std::string Mangled
= getMangledName(CurMangleContext
, decl
);
1434 J
.attribute("typesym", Mangled
);
1438 std::string
Pretty(SyntaxKind
);
1439 Pretty
.push_back(' ');
1440 Pretty
.append(QualName
.data());
1441 J
.attribute("pretty", Pretty
);
1443 J
.attribute("sym", Symbol
);
1445 if (Flags
& NoCrossref
) {
1446 J
.attribute("no_crossref", 1);
1449 // End the top-level object.
1452 // we want a newline.
1454 F
->Output
.push_back(std::move(ros
.str()));
1457 void normalizeLocation(SourceLocation
*Loc
) {
1458 *Loc
= SM
.getSpellingLoc(*Loc
);
1461 // For cases where the left-brace is not directly accessible from the AST,
1462 // helper to use the lexer to find the brace. Make sure you're picking the
1463 // start location appropriately!
1464 SourceLocation
findLeftBraceFromLoc(SourceLocation Loc
) {
1465 return Lexer::findLocationAfterToken(Loc
, tok::l_brace
, SM
, LO
, false);
1468 // If the provided statement is compound, return its range.
1469 SourceRange
getCompoundStmtRange(Stmt
* D
) {
1471 return SourceRange();
1474 CompoundStmt
*D2
= dyn_cast
<CompoundStmt
>(D
);
1476 return D2
->getSourceRange();
1479 return SourceRange();
1482 SourceRange
getFunctionPeekRange(FunctionDecl
* D
) {
1483 // We always start at the start of the function decl, which may include the
1484 // return type on a separate line.
1485 SourceLocation Start
= D
->getBeginLoc();
1487 // By default, we end at the line containing the function's name.
1488 SourceLocation End
= D
->getLocation();
1490 std::pair
<FileID
, unsigned> FuncLoc
= SM
.getDecomposedLoc(End
);
1492 // But if there are parameters, we want to include those as well.
1493 for (ParmVarDecl
* Param
: D
->parameters()) {
1494 std::pair
<FileID
, unsigned> ParamLoc
= SM
.getDecomposedLoc(Param
->getLocation());
1496 // It's possible there are macros involved or something. We don't include
1497 // the parameters in that case.
1498 if (ParamLoc
.first
== FuncLoc
.first
) {
1499 // Assume parameters are in order, so we always take the last one.
1500 End
= Param
->getEndLoc();
1504 return SourceRange(Start
, End
);
1507 SourceRange
getTagPeekRange(TagDecl
* D
) {
1508 SourceLocation Start
= D
->getBeginLoc();
1510 // By default, we end at the line containing the name.
1511 SourceLocation End
= D
->getLocation();
1513 std::pair
<FileID
, unsigned> FuncLoc
= SM
.getDecomposedLoc(End
);
1515 if (CXXRecordDecl
* D2
= dyn_cast
<CXXRecordDecl
>(D
)) {
1516 // But if there are parameters, we want to include those as well.
1517 for (CXXBaseSpecifier
& Base
: D2
->bases()) {
1518 std::pair
<FileID
, unsigned> Loc
= SM
.getDecomposedLoc(Base
.getEndLoc());
1520 // It's possible there are macros involved or something. We don't include
1521 // the parameters in that case.
1522 if (Loc
.first
== FuncLoc
.first
) {
1523 // Assume parameters are in order, so we always take the last one.
1524 End
= Base
.getEndLoc();
1529 return SourceRange(Start
, End
);
1532 SourceRange
getCommentRange(NamedDecl
* D
) {
1533 const RawComment
* RC
=
1534 AstContext
->getRawCommentForDeclNoCache(D
);
1536 return SourceRange();
1539 return RC
->getSourceRange();
1542 // Sanity checks that all ranges are in the same file, returning the first if
1543 // they're in different files. Unions the ranges based on which is first.
1544 SourceRange
combineRanges(SourceRange Range1
, SourceRange Range2
) {
1545 if (Range1
.isInvalid()) {
1548 if (Range2
.isInvalid()) {
1552 std::pair
<FileID
, unsigned> Begin1
= SM
.getDecomposedLoc(Range1
.getBegin());
1553 std::pair
<FileID
, unsigned> End1
= SM
.getDecomposedLoc(Range1
.getEnd());
1554 std::pair
<FileID
, unsigned> Begin2
= SM
.getDecomposedLoc(Range2
.getBegin());
1555 std::pair
<FileID
, unsigned> End2
= SM
.getDecomposedLoc(Range2
.getEnd());
1557 if (End1
.first
!= Begin2
.first
) {
1558 // Something weird is probably happening with the preprocessor. Just
1559 // return the first range.
1563 // See which range comes first.
1564 if (Begin1
.second
<= End2
.second
) {
1565 return SourceRange(Range1
.getBegin(), Range2
.getEnd());
1567 return SourceRange(Range2
.getBegin(), Range1
.getEnd());
1571 // Given a location and a range, returns the range if:
1572 // - The location and the range live in the same file.
1573 // - The range is well ordered (end is not before begin).
1574 // Returns an empty range otherwise.
1575 SourceRange
validateRange(SourceLocation Loc
, SourceRange Range
) {
1576 std::pair
<FileID
, unsigned> Decomposed
= SM
.getDecomposedLoc(Loc
);
1577 std::pair
<FileID
, unsigned> Begin
= SM
.getDecomposedLoc(Range
.getBegin());
1578 std::pair
<FileID
, unsigned> End
= SM
.getDecomposedLoc(Range
.getEnd());
1580 if (Begin
.first
!= Decomposed
.first
|| End
.first
!= Decomposed
.first
) {
1581 return SourceRange();
1584 if (Begin
.second
>= End
.second
) {
1585 return SourceRange();
1591 bool VisitNamedDecl(NamedDecl
*D
) {
1592 SourceLocation Loc
= D
->getLocation();
1594 // If the token is from a macro expansion and the expansion location
1595 // is interesting, use that instead as it tends to be more useful.
1596 SourceLocation expandedLoc
= Loc
;
1597 if (SM
.isMacroBodyExpansion(Loc
)) {
1598 Loc
= SM
.getFileLoc(Loc
);
1601 normalizeLocation(&Loc
);
1602 if (!isInterestingLocation(Loc
)) {
1606 if (isa
<ParmVarDecl
>(D
) && !D
->getDeclName().getAsIdentifierInfo()) {
1607 // Unnamed parameter in function proto.
1612 const char *Kind
= "def";
1613 const char *PrettyKind
= "?";
1614 bool wasTemplate
= false;
1615 SourceRange
PeekRange(D
->getBeginLoc(), D
->getEndLoc());
1616 // The nesting range identifies the left brace and right brace, which
1617 // heavily depends on the AST node type.
1618 SourceRange NestingRange
;
1619 if (FunctionDecl
*D2
= dyn_cast
<FunctionDecl
>(D
)) {
1620 if (D2
->isTemplateInstantiation()) {
1622 D
= D2
->getTemplateInstantiationPattern();
1624 // We treat pure virtual declarations as definitions.
1625 Kind
= (D2
->isThisDeclarationADefinition() || D2
->isPure()) ? "def" : "decl";
1626 PrettyKind
= "function";
1627 PeekRange
= getFunctionPeekRange(D2
);
1629 // Only emit the nesting range if:
1630 // - This is a definition AND
1631 // - This isn't a template instantiation. Function templates'
1632 // instantiations can end up as a definition with a Loc at their point
1633 // of declaration but with the CompoundStmt of the template's
1634 // point of definition. This really messes up the nesting range logic.
1635 // At the time of writing this, the test repo's `big_header.h`'s
1636 // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
1637 // instantiated by `big_cpp.cpp` triggers this phenomenon.
1639 // Note: As covered elsewhere, template processing is tricky and it's
1640 // conceivable that we may change traversal patterns in the future,
1641 // mooting this guard.
1642 if (D2
->isThisDeclarationADefinition() &&
1643 !D2
->isTemplateInstantiation()) {
1644 // The CompoundStmt range is the brace range.
1645 NestingRange
= getCompoundStmtRange(D2
->getBody());
1647 } else if (TagDecl
*D2
= dyn_cast
<TagDecl
>(D
)) {
1648 Kind
= D2
->isThisDeclarationADefinition() ? "def" : "forward";
1649 PrettyKind
= "type";
1651 if (D2
->isThisDeclarationADefinition() && D2
->getDefinition() == D2
) {
1652 PeekRange
= getTagPeekRange(D2
);
1653 NestingRange
= D2
->getBraceRange();
1655 PeekRange
= SourceRange();
1657 } else if (isa
<TypedefNameDecl
>(D
)) {
1659 PrettyKind
= "type";
1660 PeekRange
= SourceRange(Loc
, Loc
);
1661 } else if (VarDecl
*D2
= dyn_cast
<VarDecl
>(D
)) {
1662 if (D2
->isLocalVarDeclOrParm()) {
1666 Kind
= D2
->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
1669 PrettyKind
= "variable";
1670 } else if (isa
<NamespaceDecl
>(D
) || isa
<NamespaceAliasDecl
>(D
)) {
1672 PrettyKind
= "namespace";
1673 PeekRange
= SourceRange(Loc
, Loc
);
1674 NamespaceDecl
*D2
= dyn_cast
<NamespaceDecl
>(D
);
1676 // There's no exposure of the left brace so we have to find it.
1677 NestingRange
= SourceRange(
1678 findLeftBraceFromLoc(D2
->isAnonymousNamespace() ? D2
->getBeginLoc() : Loc
),
1679 D2
->getRBraceLoc());
1681 } else if (isa
<FieldDecl
>(D
)) {
1683 PrettyKind
= "field";
1684 } else if (isa
<EnumConstantDecl
>(D
)) {
1686 PrettyKind
= "enum constant";
1691 QualType qtype
= QualType();
1692 if (ValueDecl
*D2
= dyn_cast
<ValueDecl
>(D
)) {
1693 qtype
= D2
->getType();
1696 SourceRange CommentRange
= getCommentRange(D
);
1697 PeekRange
= combineRanges(PeekRange
, CommentRange
);
1698 PeekRange
= validateRange(Loc
, PeekRange
);
1699 NestingRange
= validateRange(Loc
, NestingRange
);
1701 std::string Symbol
= getMangledName(CurMangleContext
, D
);
1703 // In the case of destructors, Loc might point to the ~ character. In that
1704 // case we want to skip to the name of the class. However, Loc might also
1705 // point to other places that generate destructors, such as the use site of
1706 // a macro that expands to generate a destructor, or a lambda (apparently
1707 // clang 8 creates a destructor declaration for at least some lambdas). In
1708 // the former case we'll use the macro use site as the location, and in the
1709 // latter we'll just drop the declaration.
1710 if (isa
<CXXDestructorDecl
>(D
)) {
1711 PrettyKind
= "destructor";
1712 const char *P
= SM
.getCharacterData(Loc
);
1714 // Advance Loc to the class name
1717 unsigned Skipped
= 1;
1718 while (*P
== ' ' || *P
== '\t' || *P
== '\r' || *P
== '\n') {
1723 Loc
= Loc
.getLocWithOffset(Skipped
);
1725 // See if the destructor is coming from a macro expansion
1726 P
= SM
.getCharacterData(expandedLoc
);
1731 // It is, so just use Loc as-is
1735 visitIdentifier(Kind
, PrettyKind
, getQualifiedName(D
), SourceRange(Loc
), Symbol
,
1737 getContext(D
), Flags
, PeekRange
, NestingRange
);
1739 // In-progress structured info emission.
1740 if (RecordDecl
*D2
= dyn_cast
<RecordDecl
>(D
)) {
1741 if (D2
->isThisDeclarationADefinition() &&
1742 // XXX getASTRecordLayout doesn't work for dependent types, so we
1743 // avoid calling into emitStructuredInfo for now if there's a
1744 // dependent type or if we're in any kind of template context. This
1745 // should be re-evaluated once this is working for normal classes and
1746 // we can better evaluate what is useful.
1747 !D2
->isDependentType() &&
1749 emitStructuredInfo(Loc
, D2
);
1752 if (FunctionDecl
*D2
= dyn_cast
<FunctionDecl
>(D
)) {
1753 if ((D2
->isThisDeclarationADefinition() || D2
->isPure()) &&
1754 // a clause at the top should have generalized and set wasTemplate so
1755 // it shouldn't be the case that isTemplateInstantiation() is true.
1756 !D2
->isTemplateInstantiation() &&
1758 !D2
->isFunctionTemplateSpecialization() &&
1760 emitStructuredInfo(Loc
, D2
);
1763 if (FieldDecl
*D2
= dyn_cast
<FieldDecl
>(D
)) {
1764 if (!D2
->isTemplated() &&
1766 emitStructuredInfo(Loc
, D2
);
1773 bool VisitCXXConstructExpr(CXXConstructExpr
*E
) {
1774 SourceLocation Loc
= E
->getBeginLoc();
1775 normalizeLocation(&Loc
);
1776 if (!isInterestingLocation(Loc
)) {
1780 FunctionDecl
*Ctor
= E
->getConstructor();
1781 if (Ctor
->isTemplateInstantiation()) {
1782 Ctor
= Ctor
->getTemplateInstantiationPattern();
1784 std::string Mangled
= getMangledName(CurMangleContext
, Ctor
);
1786 // FIXME: Need to do something different for list initialization.
1788 visitIdentifier("use", "constructor", getQualifiedName(Ctor
), Loc
, Mangled
,
1789 QualType(), getContext(Loc
));
1794 bool VisitCallExpr(CallExpr
*E
) {
1795 Decl
*Callee
= E
->getCalleeDecl();
1796 if (!Callee
|| !FunctionDecl::classof(Callee
)) {
1800 const NamedDecl
*NamedCallee
= dyn_cast
<NamedDecl
>(Callee
);
1804 const FunctionDecl
*F
= dyn_cast
<FunctionDecl
>(NamedCallee
);
1805 if (F
->isTemplateInstantiation()) {
1806 NamedCallee
= F
->getTemplateInstantiationPattern();
1809 std::string Mangled
= getMangledName(CurMangleContext
, NamedCallee
);
1812 Expr
*CalleeExpr
= E
->getCallee()->IgnoreParenImpCasts();
1814 if (CXXOperatorCallExpr::classof(E
)) {
1815 // Just take the first token.
1816 CXXOperatorCallExpr
*Op
= dyn_cast
<CXXOperatorCallExpr
>(E
);
1817 Loc
= Op
->getOperatorLoc();
1818 Flags
|= NotIdentifierToken
;
1819 } else if (MemberExpr::classof(CalleeExpr
)) {
1820 MemberExpr
*Member
= dyn_cast
<MemberExpr
>(CalleeExpr
);
1821 Loc
= Member
->getMemberLoc();
1822 } else if (DeclRefExpr::classof(CalleeExpr
)) {
1823 // We handle this in VisitDeclRefExpr.
1829 normalizeLocation(&Loc
);
1831 if (!isInterestingLocation(Loc
)) {
1835 visitIdentifier("use", "function", getQualifiedName(NamedCallee
), Loc
, Mangled
,
1836 E
->getCallReturnType(*AstContext
), getContext(Loc
), Flags
);
1841 bool VisitTagTypeLoc(TagTypeLoc L
) {
1842 SourceLocation Loc
= L
.getBeginLoc();
1843 normalizeLocation(&Loc
);
1844 if (!isInterestingLocation(Loc
)) {
1848 TagDecl
*Decl
= L
.getDecl();
1849 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1850 visitIdentifier("use", "type", getQualifiedName(Decl
), Loc
, Mangled
,
1851 L
.getType(), getContext(Loc
));
1855 bool VisitTypedefTypeLoc(TypedefTypeLoc L
) {
1856 SourceLocation Loc
= L
.getBeginLoc();
1857 normalizeLocation(&Loc
);
1858 if (!isInterestingLocation(Loc
)) {
1862 NamedDecl
*Decl
= L
.getTypedefNameDecl();
1863 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1864 visitIdentifier("use", "type", getQualifiedName(Decl
), Loc
, Mangled
,
1865 L
.getType(), getContext(Loc
));
1869 bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L
) {
1870 SourceLocation Loc
= L
.getBeginLoc();
1871 normalizeLocation(&Loc
);
1872 if (!isInterestingLocation(Loc
)) {
1876 NamedDecl
*Decl
= L
.getDecl();
1877 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1878 visitIdentifier("use", "type", getQualifiedName(Decl
), Loc
, Mangled
,
1879 L
.getType(), getContext(Loc
));
1883 bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L
) {
1884 SourceLocation Loc
= L
.getBeginLoc();
1885 normalizeLocation(&Loc
);
1886 if (!isInterestingLocation(Loc
)) {
1890 TemplateDecl
*Td
= L
.getTypePtr()->getTemplateName().getAsTemplateDecl();
1891 if (ClassTemplateDecl
*D
= dyn_cast
<ClassTemplateDecl
>(Td
)) {
1892 NamedDecl
*Decl
= D
->getTemplatedDecl();
1893 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1894 visitIdentifier("use", "type", getQualifiedName(Decl
), Loc
, Mangled
,
1895 QualType(), getContext(Loc
));
1896 } else if (TypeAliasTemplateDecl
*D
= dyn_cast
<TypeAliasTemplateDecl
>(Td
)) {
1897 NamedDecl
*Decl
= D
->getTemplatedDecl();
1898 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1899 visitIdentifier("use", "type", getQualifiedName(Decl
), Loc
, Mangled
,
1900 QualType(), getContext(Loc
));
1906 bool VisitDependentNameTypeLoc(DependentNameTypeLoc L
) {
1907 SourceLocation Loc
= L
.getNameLoc();
1908 normalizeLocation(&Loc
);
1909 if (!isInterestingLocation(Loc
)) {
1913 for (const NamedDecl
*D
:
1914 Resolver
->resolveDependentNameType(L
.getTypePtr())) {
1915 visitHeuristicResult(Loc
, D
);
1920 bool VisitDeclRefExpr(DeclRefExpr
*E
) {
1921 SourceLocation Loc
= E
->getExprLoc();
1922 normalizeLocation(&Loc
);
1923 if (!isInterestingLocation(Loc
)) {
1927 if (E
->hasQualifier()) {
1928 Loc
= E
->getNameInfo().getLoc();
1929 normalizeLocation(&Loc
);
1932 NamedDecl
*Decl
= E
->getDecl();
1933 if (const VarDecl
*D2
= dyn_cast
<VarDecl
>(Decl
)) {
1935 if (D2
->isLocalVarDeclOrParm()) {
1938 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1939 visitIdentifier("use", "variable", getQualifiedName(Decl
), Loc
, Mangled
,
1940 D2
->getType(), getContext(Loc
), Flags
);
1941 } else if (isa
<FunctionDecl
>(Decl
)) {
1942 const FunctionDecl
*F
= dyn_cast
<FunctionDecl
>(Decl
);
1943 if (F
->isTemplateInstantiation()) {
1944 Decl
= F
->getTemplateInstantiationPattern();
1947 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1948 visitIdentifier("use", "function", getQualifiedName(Decl
), Loc
, Mangled
,
1949 E
->getType(), getContext(Loc
));
1950 } else if (isa
<EnumConstantDecl
>(Decl
)) {
1951 std::string Mangled
= getMangledName(CurMangleContext
, Decl
);
1952 visitIdentifier("use", "enum", getQualifiedName(Decl
), Loc
, Mangled
,
1953 E
->getType(), getContext(Loc
));
1959 bool VisitCXXConstructorDecl(CXXConstructorDecl
*D
) {
1960 if (!isInterestingLocation(D
->getLocation())) {
1964 for (CXXConstructorDecl::init_const_iterator It
= D
->init_begin();
1965 It
!= D
->init_end(); ++It
) {
1966 const CXXCtorInitializer
*Ci
= *It
;
1967 if (!Ci
->getMember() || !Ci
->isWritten()) {
1971 SourceLocation Loc
= Ci
->getMemberLocation();
1972 normalizeLocation(&Loc
);
1973 if (!isInterestingLocation(Loc
)) {
1977 FieldDecl
*Member
= Ci
->getMember();
1978 std::string Mangled
= getMangledName(CurMangleContext
, Member
);
1979 visitIdentifier("use", "field", getQualifiedName(Member
), Loc
, Mangled
,
1980 Member
->getType(), getContext(D
));
1986 bool VisitMemberExpr(MemberExpr
*E
) {
1987 SourceLocation Loc
= E
->getExprLoc();
1988 normalizeLocation(&Loc
);
1989 if (!isInterestingLocation(Loc
)) {
1993 ValueDecl
*Decl
= E
->getMemberDecl();
1994 if (FieldDecl
*Field
= dyn_cast
<FieldDecl
>(Decl
)) {
1995 std::string Mangled
= getMangledName(CurMangleContext
, Field
);
1996 visitIdentifier("use", "field", getQualifiedName(Field
), Loc
, Mangled
,
1997 Field
->getType(), getContext(Loc
));
2002 // Helper function for producing heuristic results for usages in dependent
2003 // code. These should be distinguished from concrete results (obtained for
2004 // dependent code using the AutoTemplateContext machinery) once bug 1833552 is
2006 // We don't expect this method to be intentionally called multiple times for
2007 // a given (Loc, NamedDecl) pair because our callers should be mutually
2008 // exclusive AST node types. However, it's fine if this method is called
2009 // multiple time for a given pair because we explicitly de-duplicate records
2010 // with an identical string representation (which is a good reason to have
2011 // this helper, as it ensures identical representations).
2012 void visitHeuristicResult(SourceLocation Loc
, const NamedDecl
*ND
) {
2013 if (const TemplateDecl
*TD
= dyn_cast
<TemplateDecl
>(ND
)) {
2014 ND
= TD
->getTemplatedDecl();
2017 const char *SyntaxKind
= nullptr;
2018 if (const FunctionDecl
*F
= dyn_cast
<FunctionDecl
>(ND
)) {
2019 MaybeType
= F
->getType();
2020 SyntaxKind
= "function";
2021 } else if (const FieldDecl
*F
= dyn_cast
<FieldDecl
>(ND
)) {
2022 MaybeType
= F
->getType();
2023 SyntaxKind
= "field";
2024 } else if (const EnumConstantDecl
*E
= dyn_cast
<EnumConstantDecl
>(ND
)) {
2025 MaybeType
= E
->getType();
2026 SyntaxKind
= "enum";
2027 } else if (const TypedefNameDecl
*T
= dyn_cast
<TypedefNameDecl
>(ND
)) {
2028 MaybeType
= T
->getUnderlyingType();
2029 SyntaxKind
= "type";
2032 std::string Mangled
= getMangledName(CurMangleContext
, ND
);
2033 visitIdentifier("use", SyntaxKind
, getQualifiedName(ND
), Loc
, Mangled
,
2034 MaybeType
, getContext(Loc
));
2038 bool VisitOverloadExpr(OverloadExpr
*E
) {
2039 SourceLocation Loc
= E
->getExprLoc();
2040 normalizeLocation(&Loc
);
2041 if (!isInterestingLocation(Loc
)) {
2045 for (auto *Candidate
: E
->decls()) {
2046 visitHeuristicResult(Loc
, Candidate
);
2051 bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr
*E
) {
2052 SourceLocation Loc
= E
->getMemberLoc();
2053 normalizeLocation(&Loc
);
2054 if (!isInterestingLocation(Loc
)) {
2058 // If possible, provide a heuristic result without instantiation.
2059 for (const NamedDecl
*D
: Resolver
->resolveMemberExpr(E
)) {
2060 visitHeuristicResult(Loc
, D
);
2063 // Also record this location so that if we have instantiations, we can
2064 // gather more accurate results from them.
2065 if (TemplateStack
) {
2066 TemplateStack
->visitDependent(Loc
);
2071 bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr
*E
) {
2072 SourceLocation Loc
= E
->getLocation();
2073 normalizeLocation(&Loc
);
2074 if (!isInterestingLocation(Loc
)) {
2078 for (const NamedDecl
*D
: Resolver
->resolveDeclRefExpr(E
)) {
2079 visitHeuristicResult(Loc
, D
);
2084 void enterSourceFile(SourceLocation Loc
) {
2085 normalizeLocation(&Loc
);
2086 FileInfo
* newFile
= getFileInfo(Loc
);
2087 if (!newFile
->Interesting
) {
2090 FileType type
= newFile
->Generated
? FileType::Generated
: FileType::Source
;
2091 std::string symbol
=
2092 std::string("FILE_") + mangleFile(newFile
->Realname
, type
);
2094 // We use an explicit zero-length source range at the start of the file. If we
2095 // don't set the LocRangeEndValid flag, the visitIdentifier code will use the
2096 // entire first token, which could be e.g. a long multiline-comment.
2097 visitIdentifier("def", "file", newFile
->Realname
, SourceRange(Loc
),
2098 symbol
, QualType(), Context(),
2099 NotIdentifierToken
| LocRangeEndValid
);
2102 void inclusionDirective(SourceRange FileNameRange
, const FileEntry
* File
) {
2103 std::string
includedFile(File
->tryGetRealPathName());
2104 FileType type
= relativizePath(includedFile
);
2105 if (type
== FileType::Unknown
) {
2108 std::string symbol
=
2109 std::string("FILE_") + mangleFile(includedFile
, type
);
2111 visitIdentifier("use", "file", includedFile
, FileNameRange
, symbol
,
2112 QualType(), Context(),
2113 NotIdentifierToken
| LocRangeEndValid
);
2116 void macroDefined(const Token
&Tok
, const MacroDirective
*Macro
) {
2117 if (Macro
->getMacroInfo()->isBuiltinMacro()) {
2120 SourceLocation Loc
= Tok
.getLocation();
2121 normalizeLocation(&Loc
);
2122 if (!isInterestingLocation(Loc
)) {
2126 IdentifierInfo
*Ident
= Tok
.getIdentifierInfo();
2128 std::string Mangled
=
2129 std::string("M_") + mangleLocation(Loc
, std::string(Ident
->getName()));
2130 visitIdentifier("def", "macro", Ident
->getName(), Loc
, Mangled
);
2134 void macroUsed(const Token
&Tok
, const MacroInfo
*Macro
) {
2138 if (Macro
->isBuiltinMacro()) {
2141 SourceLocation Loc
= Tok
.getLocation();
2142 normalizeLocation(&Loc
);
2143 if (!isInterestingLocation(Loc
)) {
2147 IdentifierInfo
*Ident
= Tok
.getIdentifierInfo();
2149 std::string Mangled
=
2151 mangleLocation(Macro
->getDefinitionLoc(), std::string(Ident
->getName()));
2152 visitIdentifier("use", "macro", Ident
->getName(), Loc
, Mangled
);
2157 void PreprocessorHook::FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
2158 SrcMgr::CharacteristicKind FileType
,
2159 FileID PrevFID
= FileID()) {
2161 case PPCallbacks::RenameFile
:
2162 case PPCallbacks::SystemHeaderPragma
:
2163 // Don't care about these, since we want the actual on-disk filenames
2165 case PPCallbacks::EnterFile
:
2166 Indexer
->enterSourceFile(Loc
);
2168 case PPCallbacks::ExitFile
:
2169 // Don't care about exiting files
2174 void PreprocessorHook::InclusionDirective(SourceLocation HashLoc
,
2175 const Token
&IncludeTok
,
2178 CharSourceRange FileNameRange
,
2179 #if CLANG_VERSION_MAJOR >= 16
2180 OptionalFileEntryRef File
,
2181 #elif CLANG_VERSION_MAJOR >= 15
2182 Optional
<FileEntryRef
> File
,
2184 const FileEntry
*File
,
2186 StringRef SearchPath
,
2187 StringRef RelativePath
,
2188 const Module
*Imported
,
2189 SrcMgr::CharacteristicKind FileType
) {
2190 #if CLANG_VERSION_MAJOR >= 15
2194 Indexer
->inclusionDirective(FileNameRange
.getAsRange(), &File
->getFileEntry());
2196 Indexer
->inclusionDirective(FileNameRange
.getAsRange(), File
);
2200 void PreprocessorHook::MacroDefined(const Token
&Tok
,
2201 const MacroDirective
*Md
) {
2202 Indexer
->macroDefined(Tok
, Md
);
2205 void PreprocessorHook::MacroExpands(const Token
&Tok
, const MacroDefinition
&Md
,
2206 SourceRange Range
, const MacroArgs
*Ma
) {
2207 Indexer
->macroUsed(Tok
, Md
.getMacroInfo());
2210 void PreprocessorHook::MacroUndefined(const Token
&Tok
,
2211 const MacroDefinition
&Md
,
2212 const MacroDirective
*Undef
)
2214 Indexer
->macroUsed(Tok
, Md
.getMacroInfo());
2217 void PreprocessorHook::Defined(const Token
&Tok
, const MacroDefinition
&Md
,
2218 SourceRange Range
) {
2219 Indexer
->macroUsed(Tok
, Md
.getMacroInfo());
2222 void PreprocessorHook::Ifdef(SourceLocation Loc
, const Token
&Tok
,
2223 const MacroDefinition
&Md
) {
2224 Indexer
->macroUsed(Tok
, Md
.getMacroInfo());
2227 void PreprocessorHook::Ifndef(SourceLocation Loc
, const Token
&Tok
,
2228 const MacroDefinition
&Md
) {
2229 Indexer
->macroUsed(Tok
, Md
.getMacroInfo());
2232 class IndexAction
: public PluginASTAction
{
2234 std::unique_ptr
<ASTConsumer
> CreateASTConsumer(CompilerInstance
&CI
,
2235 llvm::StringRef F
) {
2236 return make_unique
<IndexConsumer
>(CI
);
2239 bool ParseArgs(const CompilerInstance
&CI
,
2240 const std::vector
<std::string
> &Args
) {
2241 if (Args
.size() != 3) {
2242 DiagnosticsEngine
&D
= CI
.getDiagnostics();
2243 unsigned DiagID
= D
.getCustomDiagID(
2244 DiagnosticsEngine::Error
,
2245 "Need arguments for the source, output, and object directories");
2250 // Load our directories
2251 Srcdir
= getAbsolutePath(Args
[0]);
2252 if (Srcdir
.empty()) {
2253 DiagnosticsEngine
&D
= CI
.getDiagnostics();
2254 unsigned DiagID
= D
.getCustomDiagID(
2255 DiagnosticsEngine::Error
, "Source directory '%0' does not exist");
2256 D
.Report(DiagID
) << Args
[0];
2260 ensurePath(Args
[1] + PATHSEP_STRING
);
2261 Outdir
= getAbsolutePath(Args
[1]);
2262 Outdir
+= PATHSEP_STRING
;
2264 Objdir
= getAbsolutePath(Args
[2]);
2265 if (Objdir
.empty()) {
2266 DiagnosticsEngine
&D
= CI
.getDiagnostics();
2267 unsigned DiagID
= D
.getCustomDiagID(DiagnosticsEngine::Error
,
2268 "Objdir '%0' does not exist");
2269 D
.Report(DiagID
) << Args
[2];
2272 Objdir
+= PATHSEP_STRING
;
2274 printf("MOZSEARCH: %s %s %s\n", Srcdir
.c_str(), Outdir
.c_str(),
2280 void printHelp(llvm::raw_ostream
&Ros
) {
2281 Ros
<< "Help for mozsearch plugin goes here\n";
2285 static FrontendPluginRegistry::Add
<IndexAction
>
2286 Y("mozsearch-index", "create the mozsearch index database");