Bug 1814798 - pt 1. Add bool to enable/disable PHC at runtime r=glandium
[gecko.git] / build / clang-plugin / mozsearch-plugin / MozsearchIndexer.cpp
blob1866c9438593b6e317a32169bff74fbf4bc722e9
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "clang/AST/AST.h"
7 #include "clang/AST/ASTConsumer.h"
8 #include "clang/AST/ASTContext.h"
9 #include "clang/AST/Expr.h"
10 #include "clang/AST/ExprCXX.h"
11 #include "clang/AST/Mangle.h"
12 #include "clang/AST/RecordLayout.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/Version.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendPluginRegistry.h"
19 #include "clang/Lex/Lexer.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/Support/JSON.h"
24 #include "llvm/Support/raw_ostream.h"
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <memory>
30 #include <sstream>
31 #include <string>
32 #include <tuple>
33 #include <unordered_set>
35 #include <stdio.h>
36 #include <stdlib.h>
38 #include "FileOperations.h"
39 #include "StringOperations.h"
40 #include "from-clangd/HeuristicResolver.h"
42 #if CLANG_VERSION_MAJOR < 8
43 // Starting with Clang 8.0 some basic functions have been renamed
44 #define getBeginLoc getLocStart
45 #define getEndLoc getLocEnd
46 #endif
47 // We want std::make_unique, but that's only available in c++14. In versions
48 // prior to that, we need to fall back to llvm's make_unique. It's also the
49 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
50 // build with c++11, at least as suggested by the llvm-config --cxxflags on
51 // non-windows platforms. mozilla-central seems to build with -std=c++17 on
52 // windows so we need to make this decision based on __cplusplus instead of
53 // the CLANG_VERSION_MAJOR.
54 #if __cplusplus < 201402L
55 using llvm::make_unique;
56 #else
57 using std::make_unique;
58 #endif
60 using namespace clang;
62 const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
64 // Absolute path to directory containing source code.
65 std::string Srcdir;
67 // Absolute path to objdir (including generated code).
68 std::string Objdir;
70 // Absolute path where analysis JSON output will be stored.
71 std::string Outdir;
73 enum class FileType {
74 // The file was either in the source tree nor objdir. It might be a system
75 // include, for example.
76 Unknown,
77 // A file from the source tree.
78 Source,
79 // A file from the objdir.
80 Generated,
83 // Takes an absolute path to a file, and returns the type of file it is. If
84 // it's a Source or Generated file, the provided inout path argument is modified
85 // in-place so that it is relative to the source dir or objdir, respectively.
86 FileType relativizePath(std::string& path) {
87 if (path.compare(0, Objdir.length(), Objdir) == 0) {
88 path.replace(0, Objdir.length(), GENERATED);
89 return FileType::Generated;
91 // Empty filenames can get turned into Srcdir when they are resolved as
92 // absolute paths, so we should exclude files that are exactly equal to
93 // Srcdir or anything outside Srcdir.
94 if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) {
95 // Remove the trailing `/' as well.
96 path.erase(0, Srcdir.length() + 1);
97 return FileType::Source;
99 return FileType::Unknown;
102 #if !defined(_WIN32) && !defined(_WIN64)
103 #include <sys/time.h>
105 static double time() {
106 struct timeval Tv;
107 gettimeofday(&Tv, nullptr);
108 return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
110 #endif
112 // Return true if |input| is a valid C++ identifier. We don't want to generate
113 // analysis information for operators, string literals, etc. by accident since
114 // it trips up consumers of the data.
115 static bool isValidIdentifier(std::string Input) {
116 for (char C : Input) {
117 if (!(isalpha(C) || isdigit(C) || C == '_')) {
118 return false;
121 return true;
124 struct RAIITracer {
125 RAIITracer(const char *log) : mLog(log) {
126 printf("<%s>\n", mLog);
129 ~RAIITracer() {
130 printf("</%s>\n", mLog);
133 const char* mLog;
136 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
138 class IndexConsumer;
140 // For each C++ file seen by the analysis (.cpp or .h), we track a
141 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
142 // it's in the source dir or the objdir). We also store the analysis output
143 // here.
144 struct FileInfo {
145 FileInfo(std::string &Rname) : Realname(Rname) {
146 switch (relativizePath(Realname)) {
147 case FileType::Generated:
148 Interesting = true;
149 Generated = true;
150 break;
151 case FileType::Source:
152 Interesting = true;
153 Generated = false;
154 break;
155 case FileType::Unknown:
156 Interesting = false;
157 Generated = false;
158 break;
161 std::string Realname;
162 std::vector<std::string> Output;
163 bool Interesting;
164 bool Generated;
167 class IndexConsumer;
169 class PreprocessorHook : public PPCallbacks {
170 IndexConsumer *Indexer;
172 public:
173 PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
175 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
176 SrcMgr::CharacteristicKind FileType,
177 FileID PrevFID) override;
179 virtual void InclusionDirective(SourceLocation HashLoc,
180 const Token &IncludeTok,
181 StringRef FileName,
182 bool IsAngled,
183 CharSourceRange FileNameRange,
184 #if CLANG_VERSION_MAJOR >= 16
185 OptionalFileEntryRef File,
186 #elif CLANG_VERSION_MAJOR >= 15
187 Optional<FileEntryRef> File,
188 #else
189 const FileEntry *File,
190 #endif
191 StringRef SearchPath,
192 StringRef RelativePath,
193 const Module *Imported,
194 SrcMgr::CharacteristicKind FileType) override;
196 virtual void MacroDefined(const Token &Tok,
197 const MacroDirective *Md) override;
199 virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
200 SourceRange Range, const MacroArgs *Ma) override;
201 virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
202 const MacroDirective *Undef) override;
203 virtual void Defined(const Token &Tok, const MacroDefinition &Md,
204 SourceRange Range) override;
205 virtual void Ifdef(SourceLocation Loc, const Token &Tok,
206 const MacroDefinition &Md) override;
207 virtual void Ifndef(SourceLocation Loc, const Token &Tok,
208 const MacroDefinition &Md) override;
211 class IndexConsumer : public ASTConsumer,
212 public RecursiveASTVisitor<IndexConsumer>,
213 public DiagnosticConsumer {
214 private:
215 CompilerInstance &CI;
216 SourceManager &SM;
217 LangOptions &LO;
218 std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
219 MangleContext *CurMangleContext;
220 ASTContext *AstContext;
221 std::unique_ptr<clangd::HeuristicResolver> Resolver;
223 typedef RecursiveASTVisitor<IndexConsumer> Super;
225 // Tracks the set of declarations that the current expression/statement is
226 // nested inside of.
227 struct AutoSetContext {
228 AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
229 : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
230 this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
231 Self->CurDeclContext = this;
234 ~AutoSetContext() { Self->CurDeclContext = Prev; }
236 IndexConsumer *Self;
237 AutoSetContext *Prev;
238 NamedDecl *Decl;
239 bool VisitImplicit;
241 AutoSetContext *CurDeclContext;
243 FileInfo *getFileInfo(SourceLocation Loc) {
244 FileID Id = SM.getFileID(Loc);
246 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
247 It = FileMap.find(Id);
248 if (It == FileMap.end()) {
249 // We haven't seen this file before. We need to make the FileInfo
250 // structure information ourselves
251 std::string Filename = std::string(SM.getFilename(Loc));
252 std::string Absolute;
253 // If Loc is a macro id rather than a file id, it Filename might be
254 // empty. Also for some types of file locations that are clang-internal
255 // like "<scratch>" it can return an empty Filename. In these cases we
256 // want to leave Absolute as empty.
257 if (!Filename.empty()) {
258 Absolute = getAbsolutePath(Filename);
259 if (Absolute.empty()) {
260 Absolute = Filename;
263 std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
264 It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
266 return It->second.get();
269 // Helpers for processing declarations
270 // Should we ignore this location?
271 bool isInterestingLocation(SourceLocation Loc) {
272 if (Loc.isInvalid()) {
273 return false;
276 return getFileInfo(Loc)->Interesting;
279 // Convert location to "line:column" or "line:column-column" given length.
280 // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
281 // column is 0-based and unpadded.
282 std::string locationToString(SourceLocation Loc, size_t Length = 0) {
283 std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
285 bool IsInvalid;
286 unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
287 if (IsInvalid) {
288 return "";
290 unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
291 if (IsInvalid) {
292 return "";
295 if (Length) {
296 return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
297 } else {
298 return stringFormat("%05d:%d", Line, Column - 1);
302 // Convert SourceRange to "line-line".
303 // In the resulting string rep, line is 1-based.
304 std::string lineRangeToString(SourceRange Range) {
305 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
306 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
308 bool IsInvalid;
309 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
310 if (IsInvalid) {
311 return "";
313 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
314 if (IsInvalid) {
315 return "";
318 return stringFormat("%d-%d", Line1, Line2);
321 // Convert SourceRange to "line:column-line:column".
322 // In the resulting string rep, line is 1-based, column is 0-based.
323 std::string fullRangeToString(SourceRange Range) {
324 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
325 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
327 bool IsInvalid;
328 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
329 if (IsInvalid) {
330 return "";
332 unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
333 if (IsInvalid) {
334 return "";
336 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
337 if (IsInvalid) {
338 return "";
340 unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
341 if (IsInvalid) {
342 return "";
345 return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
348 // Returns the qualified name of `d` without considering template parameters.
349 std::string getQualifiedName(const NamedDecl *D) {
350 const DeclContext *Ctx = D->getDeclContext();
351 if (Ctx->isFunctionOrMethod()) {
352 return D->getQualifiedNameAsString();
355 std::vector<const DeclContext *> Contexts;
357 // Collect contexts.
358 while (Ctx && isa<NamedDecl>(Ctx)) {
359 Contexts.push_back(Ctx);
360 Ctx = Ctx->getParent();
363 std::string Result;
365 std::reverse(Contexts.begin(), Contexts.end());
367 for (const DeclContext *DC : Contexts) {
368 if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
369 Result += Spec->getNameAsString();
371 if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
372 std::string Backing;
373 llvm::raw_string_ostream Stream(Backing);
374 const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
375 printTemplateArgumentList(
376 Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
377 Result += Stream.str();
379 } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
380 if (Nd->isAnonymousNamespace() || Nd->isInline()) {
381 continue;
383 Result += Nd->getNameAsString();
384 } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
385 if (!Rd->getIdentifier()) {
386 Result += "(anonymous)";
387 } else {
388 Result += Rd->getNameAsString();
390 } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
391 Result += Fd->getNameAsString();
392 } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
393 // C++ [dcl.enum]p10: Each enum-name and each unscoped
394 // enumerator is declared in the scope that immediately contains
395 // the enum-specifier. Each scoped enumerator is declared in the
396 // scope of the enumeration.
397 if (Ed->isScoped() || Ed->getIdentifier())
398 Result += Ed->getNameAsString();
399 else
400 continue;
401 } else {
402 Result += cast<NamedDecl>(DC)->getNameAsString();
404 Result += "::";
407 if (D->getDeclName())
408 Result += D->getNameAsString();
409 else
410 Result += "(anonymous)";
412 return Result;
415 std::string mangleLocation(SourceLocation Loc,
416 std::string Backup = std::string()) {
417 FileInfo *F = getFileInfo(Loc);
418 std::string Filename = F->Realname;
419 if (Filename.length() == 0 && Backup.length() != 0) {
420 return Backup;
422 if (F->Generated) {
423 // Since generated files may be different on different platforms,
424 // we need to include a platform-specific thing in the hash. Otherwise
425 // we can end up with hash collisions where different symbols from
426 // different platforms map to the same thing.
427 char* Platform = getenv("MOZSEARCH_PLATFORM");
428 Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
430 return hash(Filename + std::string("@") + locationToString(Loc));
433 bool isAcceptableSymbolChar(char c) {
434 return isalpha(c) || isdigit(c) || c == '_' || c == '/';
437 std::string mangleFile(std::string Filename, FileType Type) {
438 // "Mangle" the file path, such that:
439 // 1. The majority of paths will still be mostly human-readable.
440 // 2. The sanitization algorithm doesn't produce collisions where two
441 // different unsanitized paths can result in the same sanitized paths.
442 // 3. The produced symbol doesn't cause problems with downstream consumers.
443 // In order to accomplish this, we keep alphanumeric chars, underscores,
444 // and slashes, and replace everything else with an "@xx" hex encoding.
445 // The majority of path characters are letters and slashes which don't get
446 // encoded, so that satisfies (1). Since "@" characters in the unsanitized
447 // path get encoded, there should be no "@" characters in the sanitized path
448 // that got preserved from the unsanitized input, so that should satisfy (2).
449 // And (3) was done by trial-and-error. Note in particular the dot (.)
450 // character needs to be encoded, or the symbol-search feature of mozsearch
451 // doesn't work correctly, as all dot characters in the symbol query get
452 // replaced by #.
453 for (size_t i = 0; i < Filename.length(); i++) {
454 char c = Filename[i];
455 if (isAcceptableSymbolChar(c)) {
456 continue;
458 char hex[4];
459 sprintf(hex, "@%02X", ((int)c) & 0xFF);
460 Filename.replace(i, 1, hex);
461 i += 2;
464 if (Type == FileType::Generated) {
465 // Since generated files may be different on different platforms,
466 // we need to include a platform-specific thing in the hash. Otherwise
467 // we can end up with hash collisions where different symbols from
468 // different platforms map to the same thing.
469 char* Platform = getenv("MOZSEARCH_PLATFORM");
470 Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
472 return Filename;
475 std::string mangleQualifiedName(std::string Name) {
476 std::replace(Name.begin(), Name.end(), ' ', '_');
477 return Name;
480 std::string getMangledName(clang::MangleContext *Ctx,
481 const clang::NamedDecl *Decl) {
482 if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
483 return cast<FunctionDecl>(Decl)->getNameAsString();
486 if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
487 const DeclContext *DC = Decl->getDeclContext();
488 if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
489 isa<LinkageSpecDecl>(DC) ||
490 // isa<ExternCContextDecl>(DC) ||
491 isa<TagDecl>(DC)) {
492 llvm::SmallVector<char, 512> Output;
493 llvm::raw_svector_ostream Out(Output);
494 #if CLANG_VERSION_MAJOR >= 11
495 // This code changed upstream in version 11:
496 // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
497 GlobalDecl GD;
498 if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
499 GD = GlobalDecl(D, Ctor_Complete);
500 } else if (const CXXDestructorDecl *D =
501 dyn_cast<CXXDestructorDecl>(Decl)) {
502 GD = GlobalDecl(D, Dtor_Complete);
503 } else {
504 GD = GlobalDecl(Decl);
506 Ctx->mangleName(GD, Out);
507 #else
508 if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
509 Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
510 } else if (const CXXDestructorDecl *D =
511 dyn_cast<CXXDestructorDecl>(Decl)) {
512 Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
513 } else {
514 Ctx->mangleName(Decl, Out);
516 #endif
517 return Out.str().str();
518 } else {
519 return std::string("V_") + mangleLocation(Decl->getLocation()) +
520 std::string("_") + hash(std::string(Decl->getName()));
522 } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
523 isa<ObjCInterfaceDecl>(Decl)) {
524 if (!Decl->getIdentifier()) {
525 // Anonymous.
526 return std::string("T_") + mangleLocation(Decl->getLocation());
529 return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
530 } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
531 if (!Decl->getIdentifier()) {
532 // Anonymous.
533 return std::string("NS_") + mangleLocation(Decl->getLocation());
536 return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
537 } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
538 const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
539 return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
540 D2->getNameAsString();
541 } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
542 const RecordDecl *Record = D2->getParent();
543 return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
544 D2->getNameAsString();
545 } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
546 const DeclContext *DC = Decl->getDeclContext();
547 if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
548 return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
549 D2->getNameAsString();
553 assert(false);
554 return std::string("");
557 void debugLocation(SourceLocation Loc) {
558 std::string S = locationToString(Loc);
559 StringRef Filename = SM.getFilename(Loc);
560 printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
563 void debugRange(SourceRange Range) {
564 printf("Range\n");
565 debugLocation(Range.getBegin());
566 debugLocation(Range.getEnd());
569 public:
570 IndexConsumer(CompilerInstance &CI)
571 : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
572 AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
573 CI.getPreprocessor().addPPCallbacks(
574 make_unique<PreprocessorHook>(this));
577 virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
578 return new IndexConsumer(CI);
581 #if !defined(_WIN32) && !defined(_WIN64)
582 struct AutoTime {
583 AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
584 ~AutoTime() {
585 if (Start) {
586 *Counter += time() - Start;
589 void stop() {
590 *Counter += time() - Start;
591 Start = 0;
593 double *Counter;
594 double Start;
596 #endif
598 // All we need is to follow the final declaration.
599 virtual void HandleTranslationUnit(ASTContext &Ctx) {
600 CurMangleContext =
601 clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
603 AstContext = &Ctx;
604 Resolver = std::make_unique<clangd::HeuristicResolver>(Ctx);
605 TraverseDecl(Ctx.getTranslationUnitDecl());
607 // Emit the JSON data for all files now.
608 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
609 for (It = FileMap.begin(); It != FileMap.end(); It++) {
610 if (!It->second->Interesting) {
611 continue;
614 FileInfo &Info = *It->second;
616 std::string Filename = Outdir + Info.Realname;
617 std::string SrcFilename = Info.Generated
618 ? Objdir + Info.Realname.substr(GENERATED.length())
619 : Srcdir + PATHSEP_STRING + Info.Realname;
621 ensurePath(Filename);
623 // We lock the output file in case some other clang process is trying to
624 // write to it at the same time.
625 AutoLockFile Lock(SrcFilename, Filename);
627 if (!Lock.success()) {
628 fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
629 exit(1);
632 // Merge our results with the existing lines from the output file.
633 // This ensures that header files that are included multiple times
634 // in different ways are analyzed completely.
635 std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary);
636 FILE *OutFp = Lock.openTmp();
637 if (!OutFp) {
638 fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
639 exit(1);
642 // Sort our new results and get an iterator to them
643 std::sort(Info.Output.begin(), Info.Output.end());
644 std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
645 std::string LastNewWritten;
647 // Loop over the existing (sorted) lines in the analysis output file.
648 // (The good() check also handles the case where Fin did not exist when we
649 // went to open it.)
650 while(Fin.good()) {
651 std::string OldLine;
652 std::getline(Fin, OldLine);
653 // Skip blank lines.
654 if (OldLine.length() == 0) {
655 continue;
657 // We need to put the newlines back that getline() eats.
658 OldLine.push_back('\n');
660 // Write any results from Info.Output that are lexicographically
661 // smaller than OldLine (read from the existing file), but make sure
662 // to skip duplicates. Keep advancing NewLinesIter until we reach an
663 // entry that is lexicographically greater than OldLine.
664 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
665 if (*NewLinesIter > OldLine) {
666 break;
668 if (*NewLinesIter == OldLine) {
669 continue;
671 if (*NewLinesIter == LastNewWritten) {
672 // dedupe the new entries being written
673 continue;
675 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
676 fprintf(stderr, "Unable to write %zu bytes[1] to tmp output file for %s\n",
677 NewLinesIter->length(), Filename.c_str());
678 exit(1);
680 LastNewWritten = *NewLinesIter;
683 // Write the entry read from the existing file.
684 if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
685 fprintf(stderr, "Unable to write %zu bytes[2] to tmp output file for %s\n",
686 OldLine.length(), Filename.c_str());
687 exit(1);
691 // We finished reading from Fin
692 Fin.close();
694 // Finish iterating our new results, discarding duplicates
695 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
696 if (*NewLinesIter == LastNewWritten) {
697 continue;
699 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
700 fprintf(stderr, "Unable to write %zu bytes[3] to tmp output file for %s\n",
701 NewLinesIter->length(), Filename.c_str());
702 exit(1);
704 LastNewWritten = *NewLinesIter;
707 // Done writing all the things, close it and replace the old output file
708 // with the new one.
709 fclose(OutFp);
710 if (!Lock.moveTmp()) {
711 fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
712 exit(1);
717 // Unfortunately, we have to override all these methods in order to track the
718 // context we're inside.
720 bool TraverseEnumDecl(EnumDecl *D) {
721 AutoSetContext Asc(this, D);
722 return Super::TraverseEnumDecl(D);
724 bool TraverseRecordDecl(RecordDecl *D) {
725 AutoSetContext Asc(this, D);
726 return Super::TraverseRecordDecl(D);
728 bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
729 AutoSetContext Asc(this, D);
730 return Super::TraverseCXXRecordDecl(D);
732 bool TraverseFunctionDecl(FunctionDecl *D) {
733 AutoSetContext Asc(this, D);
734 const FunctionDecl *Def;
735 // (See the larger AutoTemplateContext comment for more information.) If a
736 // method on a templated class is declared out-of-line, we need to analyze
737 // the definition inside the scope of the template or else we won't properly
738 // handle member access on the templated type.
739 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
740 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
742 return Super::TraverseFunctionDecl(D);
744 bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
745 AutoSetContext Asc(this, D);
746 const FunctionDecl *Def;
747 // See TraverseFunctionDecl.
748 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
749 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
751 return Super::TraverseCXXMethodDecl(D);
753 bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
754 AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
755 const FunctionDecl *Def;
756 // See TraverseFunctionDecl.
757 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
758 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
760 return Super::TraverseCXXConstructorDecl(D);
762 bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
763 AutoSetContext Asc(this, D);
764 const FunctionDecl *Def;
765 // See TraverseFunctionDecl.
766 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
767 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
769 return Super::TraverseCXXConversionDecl(D);
771 bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
772 AutoSetContext Asc(this, D);
773 const FunctionDecl *Def;
774 // See TraverseFunctionDecl.
775 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
776 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
778 return Super::TraverseCXXDestructorDecl(D);
781 // Used to keep track of the context in which a token appears.
782 struct Context {
783 // Ultimately this becomes the "context" JSON property.
784 std::string Name;
786 // Ultimately this becomes the "contextsym" JSON property.
787 std::string Symbol;
789 Context() {}
790 Context(std::string Name, std::string Symbol)
791 : Name(Name), Symbol(Symbol) {}
794 Context translateContext(NamedDecl *D) {
795 const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
796 if (F && F->isTemplateInstantiation()) {
797 D = F->getTemplateInstantiationPattern();
800 return Context(D->getQualifiedNameAsString(), getMangledName(CurMangleContext, D));
803 Context getContext(SourceLocation Loc) {
804 if (SM.isMacroBodyExpansion(Loc)) {
805 // If we're inside a macro definition, we don't return any context. It
806 // will probably not be what the user expects if we do.
807 return Context();
810 if (CurDeclContext) {
811 return translateContext(CurDeclContext->Decl);
813 return Context();
816 // Similar to GetContext(SourceLocation), but it skips the declaration passed
817 // in. This is useful if we want the context of a declaration that's already
818 // on the stack.
819 Context getContext(Decl *D) {
820 if (SM.isMacroBodyExpansion(D->getLocation())) {
821 // If we're inside a macro definition, we don't return any context. It
822 // will probably not be what the user expects if we do.
823 return Context();
826 AutoSetContext *Ctxt = CurDeclContext;
827 while (Ctxt) {
828 if (Ctxt->Decl != D) {
829 return translateContext(Ctxt->Decl);
831 Ctxt = Ctxt->Prev;
833 return Context();
836 // Analyzing template code is tricky. Suppose we have this code:
838 // template<class T>
839 // bool Foo(T* ptr) { return T::StaticMethod(ptr); }
841 // If we analyze the body of Foo without knowing the type T, then we will not
842 // be able to generate any information for StaticMethod. However, analyzing
843 // Foo for every possible instantiation is inefficient and it also generates
844 // too much data in some cases. For example, the following code would generate
845 // one definition of Baz for every instantiation, which is undesirable:
847 // template<class T>
848 // class Bar { struct Baz { ... }; };
850 // To solve this problem, we analyze templates only once. We do so in a
851 // GatherDependent mode where we look for "dependent scoped member
852 // expressions" (i.e., things like StaticMethod). We keep track of the
853 // locations of these expressions. If we find one or more of them, we analyze
854 // the template for each instantiation, in an AnalyzeDependent mode. This mode
855 // ignores all source locations except for the ones where we found dependent
856 // scoped member expressions before. For these locations, we generate a
857 // separate JSON result for each instantiation.
859 // We inherit our parent's mode if it is exists. This is because if our
860 // parent is in analyze mode, it means we've already lived a full life in
861 // gather mode and we must not restart in gather mode or we'll cause the
862 // indexer to visit EVERY identifier, which is way too much data.
863 struct AutoTemplateContext {
864 AutoTemplateContext(IndexConsumer *Self)
865 : Self(Self)
866 , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
867 , Parent(Self->TemplateStack) {
868 Self->TemplateStack = this;
871 ~AutoTemplateContext() { Self->TemplateStack = Parent; }
873 // We traverse templates in two modes:
874 enum class Mode {
875 // Gather mode does not traverse into specializations. It looks for
876 // locations where it would help to have more info from template
877 // specializations.
878 GatherDependent,
880 // Analyze mode traverses into template specializations and records
881 // information about token locations saved in gather mode.
882 AnalyzeDependent,
885 // We found a dependent scoped member expression! Keep track of it for
886 // later.
887 void visitDependent(SourceLocation Loc) {
888 if (CurMode == Mode::AnalyzeDependent) {
889 return;
892 DependentLocations.insert(Loc.getRawEncoding());
893 if (Parent) {
894 Parent->visitDependent(Loc);
898 bool inGatherMode() {
899 return CurMode == Mode::GatherDependent;
902 // Do we need to perform the extra AnalyzeDependent passes (one per
903 // instantiation)?
904 bool needsAnalysis() const {
905 if (!DependentLocations.empty()) {
906 return true;
908 if (Parent) {
909 return Parent->needsAnalysis();
911 return false;
914 void switchMode() { CurMode = Mode::AnalyzeDependent; }
916 // Do we want to analyze each template instantiation separately?
917 bool shouldVisitTemplateInstantiations() const {
918 if (CurMode == Mode::AnalyzeDependent) {
919 return true;
921 if (Parent) {
922 return Parent->shouldVisitTemplateInstantiations();
924 return false;
927 // For a given expression/statement, should we emit JSON data for it?
928 bool shouldVisit(SourceLocation Loc) {
929 if (CurMode == Mode::GatherDependent) {
930 return true;
932 if (DependentLocations.find(Loc.getRawEncoding()) !=
933 DependentLocations.end()) {
934 return true;
936 if (Parent) {
937 return Parent->shouldVisit(Loc);
939 return false;
942 private:
943 IndexConsumer *Self;
944 Mode CurMode;
945 std::unordered_set<unsigned> DependentLocations;
946 AutoTemplateContext *Parent;
949 AutoTemplateContext *TemplateStack;
951 bool shouldVisitTemplateInstantiations() const {
952 if (TemplateStack) {
953 return TemplateStack->shouldVisitTemplateInstantiations();
955 return false;
958 bool shouldVisitImplicitCode() const {
959 return CurDeclContext && CurDeclContext->VisitImplicit;
962 bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
963 AutoTemplateContext Atc(this);
964 Super::TraverseClassTemplateDecl(D);
966 if (!Atc.needsAnalysis()) {
967 return true;
970 Atc.switchMode();
972 if (D != D->getCanonicalDecl()) {
973 return true;
976 for (auto *Spec : D->specializations()) {
977 for (auto *Rd : Spec->redecls()) {
978 // We don't want to visit injected-class-names in this traversal.
979 if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
980 continue;
982 TraverseDecl(Rd);
986 return true;
989 bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
990 AutoTemplateContext Atc(this);
991 if (Atc.inGatherMode()) {
992 Super::TraverseFunctionTemplateDecl(D);
995 if (!Atc.needsAnalysis()) {
996 return true;
999 Atc.switchMode();
1001 if (D != D->getCanonicalDecl()) {
1002 return true;
1005 for (auto *Spec : D->specializations()) {
1006 for (auto *Rd : Spec->redecls()) {
1007 TraverseDecl(Rd);
1011 return true;
1014 bool shouldVisit(SourceLocation Loc) {
1015 if (TemplateStack) {
1016 return TemplateStack->shouldVisit(Loc);
1018 return true;
1021 enum {
1022 // Flag to omit the identifier from being cross-referenced across files.
1023 // This is usually desired for local variables.
1024 NoCrossref = 1 << 0,
1025 // Flag to indicate the token with analysis data is not an identifier. Indicates
1026 // we want to skip the check that tries to ensure a sane identifier token.
1027 NotIdentifierToken = 1 << 1,
1028 // This indicates that the end of the provided SourceRange is valid and
1029 // should be respected. If this flag is not set, the visitIdentifier
1030 // function should use only the start of the SourceRange and auto-detect
1031 // the end based on whatever token is found at the start.
1032 LocRangeEndValid = 1 << 2
1035 void emitStructuredInfo(SourceLocation Loc, const RecordDecl *decl) {
1036 std::string json_str;
1037 llvm::raw_string_ostream ros(json_str);
1038 llvm::json::OStream J(ros);
1039 // Start the top-level object.
1040 J.objectBegin();
1042 unsigned StartOffset = SM.getFileOffset(Loc);
1043 unsigned EndOffset =
1044 StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
1045 J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
1046 J.attribute("structured", 1);
1047 J.attribute("pretty", getQualifiedName(decl));
1048 J.attribute("sym", getMangledName(CurMangleContext, decl));
1050 J.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl->getTagKind()));
1052 const ASTContext &C = *AstContext;
1053 const ASTRecordLayout &Layout = C.getASTRecordLayout(decl);
1055 J.attribute("sizeBytes", Layout.getSize().getQuantity());
1057 auto cxxDecl = dyn_cast<CXXRecordDecl>(decl);
1059 if (cxxDecl) {
1060 J.attributeBegin("supers");
1061 J.arrayBegin();
1062 for (const CXXBaseSpecifier &Base : cxxDecl->bases()) {
1063 const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
1065 J.objectBegin();
1067 J.attribute("pretty", getQualifiedName(BaseDecl));
1068 J.attribute("sym", getMangledName(CurMangleContext, BaseDecl));
1070 J.attributeBegin("props");
1071 J.arrayBegin();
1072 if (Base.isVirtual()) {
1073 J.value("virtual");
1075 J.arrayEnd();
1076 J.attributeEnd();
1078 J.objectEnd();
1080 J.arrayEnd();
1081 J.attributeEnd();
1083 J.attributeBegin("methods");
1084 J.arrayBegin();
1085 for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) {
1086 J.objectBegin();
1088 J.attribute("pretty", getQualifiedName(MethodDecl));
1089 J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
1091 // TODO: Better figure out what to do for non-isUserProvided methods
1092 // which means there's potentially semantic data that doesn't correspond
1093 // to a source location in the source. Should we be emitting
1094 // structured info for those when we're processing the class here?
1096 J.attributeBegin("props");
1097 J.arrayBegin();
1098 if (MethodDecl->isStatic()) {
1099 J.value("static");
1101 if (MethodDecl->isInstance()) {
1102 J.value("instance");
1104 if (MethodDecl->isVirtual()) {
1105 J.value("virtual");
1107 if (MethodDecl->isUserProvided()) {
1108 J.value("user");
1110 if (MethodDecl->isDefaulted()) {
1111 J.value("defaulted");
1113 if (MethodDecl->isDeleted()) {
1114 J.value("deleted");
1116 if (MethodDecl->isConstexpr()) {
1117 J.value("constexpr");
1119 J.arrayEnd();
1120 J.attributeEnd();
1122 J.objectEnd();
1124 J.arrayEnd();
1125 J.attributeEnd();
1128 J.attributeBegin("fields");
1129 J.arrayBegin();
1130 uint64_t iField = 0;
1131 for (RecordDecl::field_iterator It = decl->field_begin(),
1132 End = decl->field_end(); It != End; ++It, ++iField) {
1133 const FieldDecl &Field = **It;
1134 uint64_t localOffsetBits = Layout.getFieldOffset(iField);
1135 CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits);
1137 J.objectBegin();
1138 J.attribute("pretty", getQualifiedName(&Field));
1139 J.attribute("sym", getMangledName(CurMangleContext, &Field));
1140 QualType FieldType = Field.getType();
1141 J.attribute("type", FieldType.getAsString());
1142 QualType CanonicalFieldType = FieldType.getCanonicalType();
1143 const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl();
1144 if (tagDecl) {
1145 J.attribute("typesym", getMangledName(CurMangleContext, tagDecl));
1147 J.attribute("offsetBytes", localOffsetBytes.getQuantity());
1148 if (Field.isBitField()) {
1149 J.attributeBegin("bitPositions");
1150 J.objectBegin();
1152 J.attribute("begin", unsigned(localOffsetBits - C.toBits(localOffsetBytes)));
1153 J.attribute("width", Field.getBitWidthValue(C));
1155 J.objectEnd();
1156 J.attributeEnd();
1157 } else {
1158 // Try and get the field as a record itself so we can know its size, but
1159 // we don't actually want to recurse into it.
1160 if (auto FieldRec = Field.getType()->getAs<RecordType>()) {
1161 auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl());
1162 J.attribute("sizeBytes", FieldLayout.getSize().getQuantity());
1163 } else {
1164 // We were unable to get it as a record, which suggests it's a normal
1165 // type, in which case let's just ask for the type size. (Maybe this
1166 // would also work for the above case too?)
1167 uint64_t typeSizeBits = C.getTypeSize(Field.getType());
1168 CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits);
1169 J.attribute("sizeBytes", typeSizeBytes.getQuantity());
1172 J.objectEnd();
1174 J.arrayEnd();
1175 J.attributeEnd();
1177 // End the top-level object.
1178 J.objectEnd();
1180 FileInfo *F = getFileInfo(Loc);
1181 // we want a newline.
1182 ros << '\n';
1183 F->Output.push_back(std::move(ros.str()));
1186 void emitStructuredInfo(SourceLocation Loc, const FunctionDecl *decl) {
1187 std::string json_str;
1188 llvm::raw_string_ostream ros(json_str);
1189 llvm::json::OStream J(ros);
1190 // Start the top-level object.
1191 J.objectBegin();
1193 unsigned StartOffset = SM.getFileOffset(Loc);
1194 unsigned EndOffset =
1195 StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
1196 J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
1197 J.attribute("structured", 1);
1198 J.attribute("pretty", getQualifiedName(decl));
1199 J.attribute("sym", getMangledName(CurMangleContext, decl));
1201 auto cxxDecl = dyn_cast<CXXMethodDecl>(decl);
1203 if (cxxDecl) {
1204 J.attribute("kind", "method");
1205 if (auto parentDecl = cxxDecl->getParent()) {
1206 J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
1209 J.attributeBegin("overrides");
1210 J.arrayBegin();
1211 for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) {
1212 J.objectBegin();
1214 // TODO: Make sure we're doing template traversals appropriately...
1215 // findOverriddenMethods (now removed) liked to do:
1216 // if (Decl->isTemplateInstantiation()) {
1217 // Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
1218 // }
1219 // I think our pre-emptive dereferencing/avoidance of templates may
1220 // protect us from this, but it needs more investigation.
1222 J.attribute("pretty", getQualifiedName(MethodDecl));
1223 J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
1225 J.objectEnd();
1227 J.arrayEnd();
1228 J.attributeEnd();
1230 } else {
1231 J.attribute("kind", "function");
1234 // ## Props
1235 J.attributeBegin("props");
1236 J.arrayBegin();
1237 // some of these are only possible on a CXXMethodDecl, but we want them all
1238 // in the same array, so condition these first ones.
1239 if (cxxDecl) {
1240 if (cxxDecl->isStatic()) {
1241 J.value("static");
1243 if (cxxDecl->isInstance()) {
1244 J.value("instance");
1246 if (cxxDecl->isVirtual()) {
1247 J.value("virtual");
1249 if (cxxDecl->isUserProvided()) {
1250 J.value("user");
1253 if (decl->isDefaulted()) {
1254 J.value("defaulted");
1256 if (decl->isDeleted()) {
1257 J.value("deleted");
1259 if (decl->isConstexpr()) {
1260 J.value("constexpr");
1262 J.arrayEnd();
1263 J.attributeEnd();
1265 // End the top-level object.
1266 J.objectEnd();
1268 FileInfo *F = getFileInfo(Loc);
1269 // we want a newline.
1270 ros << '\n';
1271 F->Output.push_back(std::move(ros.str()));
1275 * Emit structured info for a field. Right now the intent is for this to just
1276 * be a pointer to its parent's structured info with this method entirely
1277 * avoiding getting the ASTRecordLayout.
1279 * TODO: Give more thought on where to locate the canonical info on fields and
1280 * how to normalize their exposure over the web. We could relink the info
1281 * both at cross-reference time and web-server lookup time. This is also
1282 * called out in `analysis.md`.
1284 void emitStructuredInfo(SourceLocation Loc, const FieldDecl *decl) {
1285 // XXX the call to decl::getParent will assert below for ObjCIvarDecl
1286 // instances because their DecContext is not a RecordDecl. So just bail
1287 // for now.
1288 // TODO: better support ObjC.
1289 if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(decl)) {
1290 return;
1293 std::string json_str;
1294 llvm::raw_string_ostream ros(json_str);
1295 llvm::json::OStream J(ros);
1296 // Start the top-level object.
1297 J.objectBegin();
1299 unsigned StartOffset = SM.getFileOffset(Loc);
1300 unsigned EndOffset =
1301 StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
1302 J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
1303 J.attribute("structured", 1);
1304 J.attribute("pretty", getQualifiedName(decl));
1305 J.attribute("sym", getMangledName(CurMangleContext, decl));
1306 J.attribute("kind", "field");
1308 if (auto parentDecl = decl->getParent()) {
1309 J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
1312 // End the top-level object.
1313 J.objectEnd();
1315 FileInfo *F = getFileInfo(Loc);
1316 // we want a newline.
1317 ros << '\n';
1318 F->Output.push_back(std::move(ros.str()));
1321 // XXX Type annotating.
1322 // QualType is the type class. It has helpers like TagDecl via getAsTagDecl.
1323 // ValueDecl exposes a getType() method.
1325 // Arguably it makes sense to only expose types that Searchfox has definitions
1326 // for as first-class. Probably the way to go is like context/contextsym.
1327 // We expose a "type" which is just a human-readable string which has no
1328 // semantic purposes and is just a display string, plus then a "typesym" which
1329 // we expose if we were able to map the type.
1331 // Other meta-info: field offsets. Ancestor types.
1333 // This is the only function that emits analysis JSON data. It should be
1334 // called for each identifier that corresponds to a symbol.
1335 void visitIdentifier(const char *Kind, const char *SyntaxKind,
1336 llvm::StringRef QualName, SourceRange LocRange,
1337 std::string Symbol,
1338 QualType MaybeType = QualType(),
1339 Context TokenContext = Context(), int Flags = 0,
1340 SourceRange PeekRange = SourceRange(),
1341 SourceRange NestingRange = SourceRange()) {
1342 SourceLocation Loc = LocRange.getBegin();
1343 if (!shouldVisit(Loc)) {
1344 return;
1347 // Find the file positions corresponding to the token.
1348 unsigned StartOffset = SM.getFileOffset(Loc);
1349 unsigned EndOffset = (Flags & LocRangeEndValid)
1350 ? SM.getFileOffset(LocRange.getEnd())
1351 : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
1353 std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
1354 std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
1355 std::string PeekRangeStr;
1357 if (!(Flags & NotIdentifierToken)) {
1358 // Get the token's characters so we can make sure it's a valid token.
1359 const char *StartChars = SM.getCharacterData(Loc);
1360 std::string Text(StartChars, EndOffset - StartOffset);
1361 if (!isValidIdentifier(Text)) {
1362 return;
1366 FileInfo *F = getFileInfo(Loc);
1368 if (!(Flags & NoCrossref)) {
1369 std::string json_str;
1370 llvm::raw_string_ostream ros(json_str);
1371 llvm::json::OStream J(ros);
1372 // Start the top-level object.
1373 J.objectBegin();
1375 J.attribute("loc", LocStr);
1376 J.attribute("target", 1);
1377 J.attribute("kind", Kind);
1378 J.attribute("pretty", QualName.data());
1379 J.attribute("sym", Symbol);
1380 if (!TokenContext.Name.empty()) {
1381 J.attribute("context", TokenContext.Name);
1383 if (!TokenContext.Symbol.empty()) {
1384 J.attribute("contextsym", TokenContext.Symbol);
1386 if (PeekRange.isValid()) {
1387 PeekRangeStr = lineRangeToString(PeekRange);
1388 if (!PeekRangeStr.empty()) {
1389 J.attribute("peekRange", PeekRangeStr);
1393 // End the top-level object.
1394 J.objectEnd();
1395 // we want a newline.
1396 ros << '\n';
1397 F->Output.push_back(std::move(ros.str()));
1400 // Generate a single "source":1 for all the symbols. If we search from here,
1401 // we want to union the results for every symbol in `symbols`.
1402 std::string json_str;
1403 llvm::raw_string_ostream ros(json_str);
1404 llvm::json::OStream J(ros);
1405 // Start the top-level object.
1406 J.objectBegin();
1408 J.attribute("loc", RangeStr);
1409 J.attribute("source", 1);
1411 if (NestingRange.isValid()) {
1412 std::string NestingRangeStr = fullRangeToString(NestingRange);
1413 if (!NestingRangeStr.empty()) {
1414 J.attribute("nestingRange", NestingRangeStr);
1418 std::string Syntax;
1419 if (Flags & NoCrossref) {
1420 J.attribute("syntax", "");
1421 } else {
1422 Syntax = Kind;
1423 Syntax.push_back(',');
1424 Syntax.append(SyntaxKind);
1425 J.attribute("syntax", Syntax);
1428 if (!MaybeType.isNull()) {
1429 J.attribute("type", MaybeType.getAsString());
1430 QualType canonical = MaybeType.getCanonicalType();
1431 const TagDecl *decl = canonical->getAsTagDecl();
1432 if (decl) {
1433 std::string Mangled = getMangledName(CurMangleContext, decl);
1434 J.attribute("typesym", Mangled);
1438 std::string Pretty(SyntaxKind);
1439 Pretty.push_back(' ');
1440 Pretty.append(QualName.data());
1441 J.attribute("pretty", Pretty);
1443 J.attribute("sym", Symbol);
1445 if (Flags & NoCrossref) {
1446 J.attribute("no_crossref", 1);
1449 // End the top-level object.
1450 J.objectEnd();
1452 // we want a newline.
1453 ros << '\n';
1454 F->Output.push_back(std::move(ros.str()));
1457 void normalizeLocation(SourceLocation *Loc) {
1458 *Loc = SM.getSpellingLoc(*Loc);
1461 // For cases where the left-brace is not directly accessible from the AST,
1462 // helper to use the lexer to find the brace. Make sure you're picking the
1463 // start location appropriately!
1464 SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
1465 return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
1468 // If the provided statement is compound, return its range.
1469 SourceRange getCompoundStmtRange(Stmt* D) {
1470 if (!D) {
1471 return SourceRange();
1474 CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
1475 if (D2) {
1476 return D2->getSourceRange();
1479 return SourceRange();
1482 SourceRange getFunctionPeekRange(FunctionDecl* D) {
1483 // We always start at the start of the function decl, which may include the
1484 // return type on a separate line.
1485 SourceLocation Start = D->getBeginLoc();
1487 // By default, we end at the line containing the function's name.
1488 SourceLocation End = D->getLocation();
1490 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1492 // But if there are parameters, we want to include those as well.
1493 for (ParmVarDecl* Param : D->parameters()) {
1494 std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
1496 // It's possible there are macros involved or something. We don't include
1497 // the parameters in that case.
1498 if (ParamLoc.first == FuncLoc.first) {
1499 // Assume parameters are in order, so we always take the last one.
1500 End = Param->getEndLoc();
1504 return SourceRange(Start, End);
1507 SourceRange getTagPeekRange(TagDecl* D) {
1508 SourceLocation Start = D->getBeginLoc();
1510 // By default, we end at the line containing the name.
1511 SourceLocation End = D->getLocation();
1513 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1515 if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
1516 // But if there are parameters, we want to include those as well.
1517 for (CXXBaseSpecifier& Base : D2->bases()) {
1518 std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
1520 // It's possible there are macros involved or something. We don't include
1521 // the parameters in that case.
1522 if (Loc.first == FuncLoc.first) {
1523 // Assume parameters are in order, so we always take the last one.
1524 End = Base.getEndLoc();
1529 return SourceRange(Start, End);
1532 SourceRange getCommentRange(NamedDecl* D) {
1533 const RawComment* RC =
1534 AstContext->getRawCommentForDeclNoCache(D);
1535 if (!RC) {
1536 return SourceRange();
1539 return RC->getSourceRange();
1542 // Sanity checks that all ranges are in the same file, returning the first if
1543 // they're in different files. Unions the ranges based on which is first.
1544 SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
1545 if (Range1.isInvalid()) {
1546 return Range2;
1548 if (Range2.isInvalid()) {
1549 return Range1;
1552 std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
1553 std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
1554 std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
1555 std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
1557 if (End1.first != Begin2.first) {
1558 // Something weird is probably happening with the preprocessor. Just
1559 // return the first range.
1560 return Range1;
1563 // See which range comes first.
1564 if (Begin1.second <= End2.second) {
1565 return SourceRange(Range1.getBegin(), Range2.getEnd());
1566 } else {
1567 return SourceRange(Range2.getBegin(), Range1.getEnd());
1571 // Given a location and a range, returns the range if:
1572 // - The location and the range live in the same file.
1573 // - The range is well ordered (end is not before begin).
1574 // Returns an empty range otherwise.
1575 SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
1576 std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
1577 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
1578 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
1580 if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
1581 return SourceRange();
1584 if (Begin.second >= End.second) {
1585 return SourceRange();
1588 return Range;
1591 bool VisitNamedDecl(NamedDecl *D) {
1592 SourceLocation Loc = D->getLocation();
1594 // If the token is from a macro expansion and the expansion location
1595 // is interesting, use that instead as it tends to be more useful.
1596 SourceLocation expandedLoc = Loc;
1597 if (SM.isMacroBodyExpansion(Loc)) {
1598 Loc = SM.getFileLoc(Loc);
1601 normalizeLocation(&Loc);
1602 if (!isInterestingLocation(Loc)) {
1603 return true;
1606 if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
1607 // Unnamed parameter in function proto.
1608 return true;
1611 int Flags = 0;
1612 const char *Kind = "def";
1613 const char *PrettyKind = "?";
1614 bool wasTemplate = false;
1615 SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
1616 // The nesting range identifies the left brace and right brace, which
1617 // heavily depends on the AST node type.
1618 SourceRange NestingRange;
1619 if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
1620 if (D2->isTemplateInstantiation()) {
1621 wasTemplate = true;
1622 D = D2->getTemplateInstantiationPattern();
1624 // We treat pure virtual declarations as definitions.
1625 Kind = (D2->isThisDeclarationADefinition() || D2->isPure()) ? "def" : "decl";
1626 PrettyKind = "function";
1627 PeekRange = getFunctionPeekRange(D2);
1629 // Only emit the nesting range if:
1630 // - This is a definition AND
1631 // - This isn't a template instantiation. Function templates'
1632 // instantiations can end up as a definition with a Loc at their point
1633 // of declaration but with the CompoundStmt of the template's
1634 // point of definition. This really messes up the nesting range logic.
1635 // At the time of writing this, the test repo's `big_header.h`'s
1636 // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
1637 // instantiated by `big_cpp.cpp` triggers this phenomenon.
1639 // Note: As covered elsewhere, template processing is tricky and it's
1640 // conceivable that we may change traversal patterns in the future,
1641 // mooting this guard.
1642 if (D2->isThisDeclarationADefinition() &&
1643 !D2->isTemplateInstantiation()) {
1644 // The CompoundStmt range is the brace range.
1645 NestingRange = getCompoundStmtRange(D2->getBody());
1647 } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
1648 Kind = D2->isThisDeclarationADefinition() ? "def" : "forward";
1649 PrettyKind = "type";
1651 if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
1652 PeekRange = getTagPeekRange(D2);
1653 NestingRange = D2->getBraceRange();
1654 } else {
1655 PeekRange = SourceRange();
1657 } else if (isa<TypedefNameDecl>(D)) {
1658 Kind = "def";
1659 PrettyKind = "type";
1660 PeekRange = SourceRange(Loc, Loc);
1661 } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
1662 if (D2->isLocalVarDeclOrParm()) {
1663 Flags = NoCrossref;
1666 Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
1667 ? "decl"
1668 : "def";
1669 PrettyKind = "variable";
1670 } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
1671 Kind = "def";
1672 PrettyKind = "namespace";
1673 PeekRange = SourceRange(Loc, Loc);
1674 NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
1675 if (D2) {
1676 // There's no exposure of the left brace so we have to find it.
1677 NestingRange = SourceRange(
1678 findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
1679 D2->getRBraceLoc());
1681 } else if (isa<FieldDecl>(D)) {
1682 Kind = "def";
1683 PrettyKind = "field";
1684 } else if (isa<EnumConstantDecl>(D)) {
1685 Kind = "def";
1686 PrettyKind = "enum constant";
1687 } else {
1688 return true;
1691 QualType qtype = QualType();
1692 if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) {
1693 qtype = D2->getType();
1696 SourceRange CommentRange = getCommentRange(D);
1697 PeekRange = combineRanges(PeekRange, CommentRange);
1698 PeekRange = validateRange(Loc, PeekRange);
1699 NestingRange = validateRange(Loc, NestingRange);
1701 std::string Symbol = getMangledName(CurMangleContext, D);
1703 // In the case of destructors, Loc might point to the ~ character. In that
1704 // case we want to skip to the name of the class. However, Loc might also
1705 // point to other places that generate destructors, such as the use site of
1706 // a macro that expands to generate a destructor, or a lambda (apparently
1707 // clang 8 creates a destructor declaration for at least some lambdas). In
1708 // the former case we'll use the macro use site as the location, and in the
1709 // latter we'll just drop the declaration.
1710 if (isa<CXXDestructorDecl>(D)) {
1711 PrettyKind = "destructor";
1712 const char *P = SM.getCharacterData(Loc);
1713 if (*P == '~') {
1714 // Advance Loc to the class name
1715 P++;
1717 unsigned Skipped = 1;
1718 while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
1719 P++;
1720 Skipped++;
1723 Loc = Loc.getLocWithOffset(Skipped);
1724 } else {
1725 // See if the destructor is coming from a macro expansion
1726 P = SM.getCharacterData(expandedLoc);
1727 if (*P != '~') {
1728 // It's not
1729 return true;
1731 // It is, so just use Loc as-is
1735 visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbol,
1736 qtype,
1737 getContext(D), Flags, PeekRange, NestingRange);
1739 // In-progress structured info emission.
1740 if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) {
1741 if (D2->isThisDeclarationADefinition() &&
1742 // XXX getASTRecordLayout doesn't work for dependent types, so we
1743 // avoid calling into emitStructuredInfo for now if there's a
1744 // dependent type or if we're in any kind of template context. This
1745 // should be re-evaluated once this is working for normal classes and
1746 // we can better evaluate what is useful.
1747 !D2->isDependentType() &&
1748 !TemplateStack) {
1749 emitStructuredInfo(Loc, D2);
1752 if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
1753 if ((D2->isThisDeclarationADefinition() || D2->isPure()) &&
1754 // a clause at the top should have generalized and set wasTemplate so
1755 // it shouldn't be the case that isTemplateInstantiation() is true.
1756 !D2->isTemplateInstantiation() &&
1757 !wasTemplate &&
1758 !D2->isFunctionTemplateSpecialization() &&
1759 !TemplateStack) {
1760 emitStructuredInfo(Loc, D2);
1763 if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) {
1764 if (!D2->isTemplated() &&
1765 !TemplateStack) {
1766 emitStructuredInfo(Loc, D2);
1770 return true;
1773 bool VisitCXXConstructExpr(CXXConstructExpr *E) {
1774 SourceLocation Loc = E->getBeginLoc();
1775 normalizeLocation(&Loc);
1776 if (!isInterestingLocation(Loc)) {
1777 return true;
1780 FunctionDecl *Ctor = E->getConstructor();
1781 if (Ctor->isTemplateInstantiation()) {
1782 Ctor = Ctor->getTemplateInstantiationPattern();
1784 std::string Mangled = getMangledName(CurMangleContext, Ctor);
1786 // FIXME: Need to do something different for list initialization.
1788 visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
1789 QualType(), getContext(Loc));
1791 return true;
1794 bool VisitCallExpr(CallExpr *E) {
1795 Decl *Callee = E->getCalleeDecl();
1796 if (!Callee || !FunctionDecl::classof(Callee)) {
1797 return true;
1800 const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
1802 SourceLocation Loc;
1804 const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
1805 if (F->isTemplateInstantiation()) {
1806 NamedCallee = F->getTemplateInstantiationPattern();
1809 std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
1810 int Flags = 0;
1812 Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
1814 if (CXXOperatorCallExpr::classof(E)) {
1815 // Just take the first token.
1816 CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
1817 Loc = Op->getOperatorLoc();
1818 Flags |= NotIdentifierToken;
1819 } else if (MemberExpr::classof(CalleeExpr)) {
1820 MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
1821 Loc = Member->getMemberLoc();
1822 } else if (DeclRefExpr::classof(CalleeExpr)) {
1823 // We handle this in VisitDeclRefExpr.
1824 return true;
1825 } else {
1826 return true;
1829 normalizeLocation(&Loc);
1831 if (!isInterestingLocation(Loc)) {
1832 return true;
1835 visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
1836 E->getCallReturnType(*AstContext), getContext(Loc), Flags);
1838 return true;
1841 bool VisitTagTypeLoc(TagTypeLoc L) {
1842 SourceLocation Loc = L.getBeginLoc();
1843 normalizeLocation(&Loc);
1844 if (!isInterestingLocation(Loc)) {
1845 return true;
1848 TagDecl *Decl = L.getDecl();
1849 std::string Mangled = getMangledName(CurMangleContext, Decl);
1850 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1851 L.getType(), getContext(Loc));
1852 return true;
1855 bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
1856 SourceLocation Loc = L.getBeginLoc();
1857 normalizeLocation(&Loc);
1858 if (!isInterestingLocation(Loc)) {
1859 return true;
1862 NamedDecl *Decl = L.getTypedefNameDecl();
1863 std::string Mangled = getMangledName(CurMangleContext, Decl);
1864 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1865 L.getType(), getContext(Loc));
1866 return true;
1869 bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
1870 SourceLocation Loc = L.getBeginLoc();
1871 normalizeLocation(&Loc);
1872 if (!isInterestingLocation(Loc)) {
1873 return true;
1876 NamedDecl *Decl = L.getDecl();
1877 std::string Mangled = getMangledName(CurMangleContext, Decl);
1878 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1879 L.getType(), getContext(Loc));
1880 return true;
1883 bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
1884 SourceLocation Loc = L.getBeginLoc();
1885 normalizeLocation(&Loc);
1886 if (!isInterestingLocation(Loc)) {
1887 return true;
1890 TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
1891 if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
1892 NamedDecl *Decl = D->getTemplatedDecl();
1893 std::string Mangled = getMangledName(CurMangleContext, Decl);
1894 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1895 QualType(), getContext(Loc));
1896 } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
1897 NamedDecl *Decl = D->getTemplatedDecl();
1898 std::string Mangled = getMangledName(CurMangleContext, Decl);
1899 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1900 QualType(), getContext(Loc));
1903 return true;
1906 bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) {
1907 SourceLocation Loc = L.getNameLoc();
1908 normalizeLocation(&Loc);
1909 if (!isInterestingLocation(Loc)) {
1910 return true;
1913 for (const NamedDecl *D :
1914 Resolver->resolveDependentNameType(L.getTypePtr())) {
1915 visitHeuristicResult(Loc, D);
1917 return true;
1920 bool VisitDeclRefExpr(DeclRefExpr *E) {
1921 SourceLocation Loc = E->getExprLoc();
1922 normalizeLocation(&Loc);
1923 if (!isInterestingLocation(Loc)) {
1924 return true;
1927 if (E->hasQualifier()) {
1928 Loc = E->getNameInfo().getLoc();
1929 normalizeLocation(&Loc);
1932 NamedDecl *Decl = E->getDecl();
1933 if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
1934 int Flags = 0;
1935 if (D2->isLocalVarDeclOrParm()) {
1936 Flags = NoCrossref;
1938 std::string Mangled = getMangledName(CurMangleContext, Decl);
1939 visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
1940 D2->getType(), getContext(Loc), Flags);
1941 } else if (isa<FunctionDecl>(Decl)) {
1942 const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
1943 if (F->isTemplateInstantiation()) {
1944 Decl = F->getTemplateInstantiationPattern();
1947 std::string Mangled = getMangledName(CurMangleContext, Decl);
1948 visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
1949 E->getType(), getContext(Loc));
1950 } else if (isa<EnumConstantDecl>(Decl)) {
1951 std::string Mangled = getMangledName(CurMangleContext, Decl);
1952 visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
1953 E->getType(), getContext(Loc));
1956 return true;
1959 bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
1960 if (!isInterestingLocation(D->getLocation())) {
1961 return true;
1964 for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
1965 It != D->init_end(); ++It) {
1966 const CXXCtorInitializer *Ci = *It;
1967 if (!Ci->getMember() || !Ci->isWritten()) {
1968 continue;
1971 SourceLocation Loc = Ci->getMemberLocation();
1972 normalizeLocation(&Loc);
1973 if (!isInterestingLocation(Loc)) {
1974 continue;
1977 FieldDecl *Member = Ci->getMember();
1978 std::string Mangled = getMangledName(CurMangleContext, Member);
1979 visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
1980 Member->getType(), getContext(D));
1983 return true;
1986 bool VisitMemberExpr(MemberExpr *E) {
1987 SourceLocation Loc = E->getExprLoc();
1988 normalizeLocation(&Loc);
1989 if (!isInterestingLocation(Loc)) {
1990 return true;
1993 ValueDecl *Decl = E->getMemberDecl();
1994 if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
1995 std::string Mangled = getMangledName(CurMangleContext, Field);
1996 visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
1997 Field->getType(), getContext(Loc));
1999 return true;
2002 // Helper function for producing heuristic results for usages in dependent
2003 // code. These should be distinguished from concrete results (obtained for
2004 // dependent code using the AutoTemplateContext machinery) once bug 1833552 is
2005 // fixed.
2006 // We don't expect this method to be intentionally called multiple times for
2007 // a given (Loc, NamedDecl) pair because our callers should be mutually
2008 // exclusive AST node types. However, it's fine if this method is called
2009 // multiple time for a given pair because we explicitly de-duplicate records
2010 // with an identical string representation (which is a good reason to have
2011 // this helper, as it ensures identical representations).
2012 void visitHeuristicResult(SourceLocation Loc, const NamedDecl *ND) {
2013 if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(ND)) {
2014 ND = TD->getTemplatedDecl();
2016 QualType MaybeType;
2017 const char *SyntaxKind = nullptr;
2018 if (const FunctionDecl *F = dyn_cast<FunctionDecl>(ND)) {
2019 MaybeType = F->getType();
2020 SyntaxKind = "function";
2021 } else if (const FieldDecl *F = dyn_cast<FieldDecl>(ND)) {
2022 MaybeType = F->getType();
2023 SyntaxKind = "field";
2024 } else if (const EnumConstantDecl *E = dyn_cast<EnumConstantDecl>(ND)) {
2025 MaybeType = E->getType();
2026 SyntaxKind = "enum";
2027 } else if (const TypedefNameDecl *T = dyn_cast<TypedefNameDecl>(ND)) {
2028 MaybeType = T->getUnderlyingType();
2029 SyntaxKind = "type";
2031 if (SyntaxKind) {
2032 std::string Mangled = getMangledName(CurMangleContext, ND);
2033 visitIdentifier("use", SyntaxKind, getQualifiedName(ND), Loc, Mangled,
2034 MaybeType, getContext(Loc));
2038 bool VisitOverloadExpr(OverloadExpr *E) {
2039 SourceLocation Loc = E->getExprLoc();
2040 normalizeLocation(&Loc);
2041 if (!isInterestingLocation(Loc)) {
2042 return true;
2045 for (auto *Candidate : E->decls()) {
2046 visitHeuristicResult(Loc, Candidate);
2048 return true;
2051 bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
2052 SourceLocation Loc = E->getMemberLoc();
2053 normalizeLocation(&Loc);
2054 if (!isInterestingLocation(Loc)) {
2055 return true;
2058 // If possible, provide a heuristic result without instantiation.
2059 for (const NamedDecl *D : Resolver->resolveMemberExpr(E)) {
2060 visitHeuristicResult(Loc, D);
2063 // Also record this location so that if we have instantiations, we can
2064 // gather more accurate results from them.
2065 if (TemplateStack) {
2066 TemplateStack->visitDependent(Loc);
2068 return true;
2071 bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
2072 SourceLocation Loc = E->getLocation();
2073 normalizeLocation(&Loc);
2074 if (!isInterestingLocation(Loc)) {
2075 return true;
2078 for (const NamedDecl *D : Resolver->resolveDeclRefExpr(E)) {
2079 visitHeuristicResult(Loc, D);
2081 return true;
2084 void enterSourceFile(SourceLocation Loc) {
2085 normalizeLocation(&Loc);
2086 FileInfo* newFile = getFileInfo(Loc);
2087 if (!newFile->Interesting) {
2088 return;
2090 FileType type = newFile->Generated ? FileType::Generated : FileType::Source;
2091 std::string symbol =
2092 std::string("FILE_") + mangleFile(newFile->Realname, type);
2094 // We use an explicit zero-length source range at the start of the file. If we
2095 // don't set the LocRangeEndValid flag, the visitIdentifier code will use the
2096 // entire first token, which could be e.g. a long multiline-comment.
2097 visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc),
2098 symbol, QualType(), Context(),
2099 NotIdentifierToken | LocRangeEndValid);
2102 void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) {
2103 std::string includedFile(File->tryGetRealPathName());
2104 FileType type = relativizePath(includedFile);
2105 if (type == FileType::Unknown) {
2106 return;
2108 std::string symbol =
2109 std::string("FILE_") + mangleFile(includedFile, type);
2111 visitIdentifier("use", "file", includedFile, FileNameRange, symbol,
2112 QualType(), Context(),
2113 NotIdentifierToken | LocRangeEndValid);
2116 void macroDefined(const Token &Tok, const MacroDirective *Macro) {
2117 if (Macro->getMacroInfo()->isBuiltinMacro()) {
2118 return;
2120 SourceLocation Loc = Tok.getLocation();
2121 normalizeLocation(&Loc);
2122 if (!isInterestingLocation(Loc)) {
2123 return;
2126 IdentifierInfo *Ident = Tok.getIdentifierInfo();
2127 if (Ident) {
2128 std::string Mangled =
2129 std::string("M_") + mangleLocation(Loc, std::string(Ident->getName()));
2130 visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
2134 void macroUsed(const Token &Tok, const MacroInfo *Macro) {
2135 if (!Macro) {
2136 return;
2138 if (Macro->isBuiltinMacro()) {
2139 return;
2141 SourceLocation Loc = Tok.getLocation();
2142 normalizeLocation(&Loc);
2143 if (!isInterestingLocation(Loc)) {
2144 return;
2147 IdentifierInfo *Ident = Tok.getIdentifierInfo();
2148 if (Ident) {
2149 std::string Mangled =
2150 std::string("M_") +
2151 mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName()));
2152 visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
2157 void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason,
2158 SrcMgr::CharacteristicKind FileType,
2159 FileID PrevFID = FileID()) {
2160 switch (Reason) {
2161 case PPCallbacks::RenameFile:
2162 case PPCallbacks::SystemHeaderPragma:
2163 // Don't care about these, since we want the actual on-disk filenames
2164 break;
2165 case PPCallbacks::EnterFile:
2166 Indexer->enterSourceFile(Loc);
2167 break;
2168 case PPCallbacks::ExitFile:
2169 // Don't care about exiting files
2170 break;
2174 void PreprocessorHook::InclusionDirective(SourceLocation HashLoc,
2175 const Token &IncludeTok,
2176 StringRef FileName,
2177 bool IsAngled,
2178 CharSourceRange FileNameRange,
2179 #if CLANG_VERSION_MAJOR >= 16
2180 OptionalFileEntryRef File,
2181 #elif CLANG_VERSION_MAJOR >= 15
2182 Optional<FileEntryRef> File,
2183 #else
2184 const FileEntry *File,
2185 #endif
2186 StringRef SearchPath,
2187 StringRef RelativePath,
2188 const Module *Imported,
2189 SrcMgr::CharacteristicKind FileType) {
2190 #if CLANG_VERSION_MAJOR >= 15
2191 if (!File) {
2192 return;
2194 Indexer->inclusionDirective(FileNameRange.getAsRange(), &File->getFileEntry());
2195 #else
2196 Indexer->inclusionDirective(FileNameRange.getAsRange(), File);
2197 #endif
2200 void PreprocessorHook::MacroDefined(const Token &Tok,
2201 const MacroDirective *Md) {
2202 Indexer->macroDefined(Tok, Md);
2205 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
2206 SourceRange Range, const MacroArgs *Ma) {
2207 Indexer->macroUsed(Tok, Md.getMacroInfo());
2210 void PreprocessorHook::MacroUndefined(const Token &Tok,
2211 const MacroDefinition &Md,
2212 const MacroDirective *Undef)
2214 Indexer->macroUsed(Tok, Md.getMacroInfo());
2217 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
2218 SourceRange Range) {
2219 Indexer->macroUsed(Tok, Md.getMacroInfo());
2222 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
2223 const MacroDefinition &Md) {
2224 Indexer->macroUsed(Tok, Md.getMacroInfo());
2227 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
2228 const MacroDefinition &Md) {
2229 Indexer->macroUsed(Tok, Md.getMacroInfo());
2232 class IndexAction : public PluginASTAction {
2233 protected:
2234 std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
2235 llvm::StringRef F) {
2236 return make_unique<IndexConsumer>(CI);
2239 bool ParseArgs(const CompilerInstance &CI,
2240 const std::vector<std::string> &Args) {
2241 if (Args.size() != 3) {
2242 DiagnosticsEngine &D = CI.getDiagnostics();
2243 unsigned DiagID = D.getCustomDiagID(
2244 DiagnosticsEngine::Error,
2245 "Need arguments for the source, output, and object directories");
2246 D.Report(DiagID);
2247 return false;
2250 // Load our directories
2251 Srcdir = getAbsolutePath(Args[0]);
2252 if (Srcdir.empty()) {
2253 DiagnosticsEngine &D = CI.getDiagnostics();
2254 unsigned DiagID = D.getCustomDiagID(
2255 DiagnosticsEngine::Error, "Source directory '%0' does not exist");
2256 D.Report(DiagID) << Args[0];
2257 return false;
2260 ensurePath(Args[1] + PATHSEP_STRING);
2261 Outdir = getAbsolutePath(Args[1]);
2262 Outdir += PATHSEP_STRING;
2264 Objdir = getAbsolutePath(Args[2]);
2265 if (Objdir.empty()) {
2266 DiagnosticsEngine &D = CI.getDiagnostics();
2267 unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
2268 "Objdir '%0' does not exist");
2269 D.Report(DiagID) << Args[2];
2270 return false;
2272 Objdir += PATHSEP_STRING;
2274 printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
2275 Objdir.c_str());
2277 return true;
2280 void printHelp(llvm::raw_ostream &Ros) {
2281 Ros << "Help for mozsearch plugin goes here\n";
2285 static FrontendPluginRegistry::Add<IndexAction>
2286 Y("mozsearch-index", "create the mozsearch index database");