//===--- FindSymbols.cpp ------------------------------------*- C++-*------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "FindSymbols.h" #include "AST.h" #include "ClangdUnit.h" #include "FuzzyMatch.h" #include "Logger.h" #include "Quality.h" #include "SourceCode.h" #include "index/Index.h" #include "clang/AST/DeclTemplate.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/IndexingAction.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #define DEBUG_TYPE "FindSymbols" namespace clang { namespace clangd { namespace { // Convert a index::SymbolKind to clangd::SymbolKind (LSP) // Note, some are not perfect matches and should be improved when this LSP // issue is addressed: // https://github.com/Microsoft/language-server-protocol/issues/344 SymbolKind indexSymbolKindToSymbolKind(index::SymbolKind Kind) { switch (Kind) { case index::SymbolKind::Unknown: return SymbolKind::Variable; case index::SymbolKind::Module: return SymbolKind::Module; case index::SymbolKind::Namespace: return SymbolKind::Namespace; case index::SymbolKind::NamespaceAlias: return SymbolKind::Namespace; case index::SymbolKind::Macro: return SymbolKind::String; case index::SymbolKind::Enum: return SymbolKind::Enum; case index::SymbolKind::Struct: return SymbolKind::Struct; case index::SymbolKind::Class: return SymbolKind::Class; case index::SymbolKind::Protocol: return SymbolKind::Interface; case index::SymbolKind::Extension: return SymbolKind::Interface; case index::SymbolKind::Union: return SymbolKind::Class; case index::SymbolKind::TypeAlias: return SymbolKind::Class; case index::SymbolKind::Function: return SymbolKind::Function; case index::SymbolKind::Variable: return SymbolKind::Variable; case index::SymbolKind::Field: return SymbolKind::Field; case index::SymbolKind::EnumConstant: return SymbolKind::EnumMember; case index::SymbolKind::InstanceMethod: case index::SymbolKind::ClassMethod: case index::SymbolKind::StaticMethod: return SymbolKind::Method; case index::SymbolKind::InstanceProperty: case index::SymbolKind::ClassProperty: case index::SymbolKind::StaticProperty: return SymbolKind::Property; case index::SymbolKind::Constructor: case index::SymbolKind::Destructor: return SymbolKind::Method; case index::SymbolKind::ConversionFunction: return SymbolKind::Function; case index::SymbolKind::Parameter: return SymbolKind::Variable; case index::SymbolKind::Using: return SymbolKind::Namespace; } llvm_unreachable("invalid symbol kind"); } using ScoredSymbolInfo = std::pair; struct ScoredSymbolGreater { bool operator()(const ScoredSymbolInfo &L, const ScoredSymbolInfo &R) { if (L.first != R.first) return L.first > R.first; return L.second.name < R.second.name; // Earlier name is better. } }; } // namespace llvm::Expected> getWorkspaceSymbols(llvm::StringRef Query, int Limit, const SymbolIndex *const Index, llvm::StringRef HintPath) { std::vector Result; if (Query.empty() || !Index) return Result; auto Names = splitQualifiedName(Query); FuzzyFindRequest Req; Req.Query = Names.second; // FuzzyFind doesn't want leading :: qualifier bool IsGlobalQuery = Names.first.consume_front("::"); // Restrict results to the scope in the query string if present (global or // not). if (IsGlobalQuery || !Names.first.empty()) Req.Scopes = {Names.first}; else Req.AnyScope = true; if (Limit) Req.Limit = Limit; TopN Top( Req.Limit ? *Req.Limit : std::numeric_limits::max()); FuzzyMatcher Filter(Req.Query); Index->fuzzyFind(Req, [HintPath, &Top, &Filter](const Symbol &Sym) { // Prefer the definition over e.g. a function declaration in a header auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration; auto Uri = URI::parse(CD.FileURI); if (!Uri) { log("Workspace symbol: Could not parse URI '{0}' for symbol '{1}'.", CD.FileURI, Sym.Name); return; } auto Path = URI::resolve(*Uri, HintPath); if (!Path) { log("Workspace symbol: Could not resolve path for URI '{0}' for symbol " "'{1}'.", Uri->toString(), Sym.Name); return; } Location L; // Use HintPath as TUPath since there is no TU associated with this // request. L.uri = URIForFile::canonicalize(*Path, HintPath); Position Start, End; Start.line = CD.Start.line(); Start.character = CD.Start.column(); End.line = CD.End.line(); End.character = CD.End.column(); L.range = {Start, End}; SymbolKind SK = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind); std::string Scope = Sym.Scope; llvm::StringRef ScopeRef = Scope; ScopeRef.consume_back("::"); SymbolInformation Info = {Sym.Name, SK, L, ScopeRef}; SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; Relevance.Query = SymbolRelevanceSignals::Generic; if (auto NameMatch = Filter.match(Sym.Name)) Relevance.NameMatch = *NameMatch; else { log("Workspace symbol: {0} didn't match query {1}", Sym.Name, Filter.pattern()); return; } Relevance.merge(Sym); auto Score = evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate()); dlog("FindSymbols: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, Score, Quality, Relevance); Top.push({Score, std::move(Info)}); }); for (auto &R : std::move(Top).items()) Result.push_back(std::move(R.second)); return Result; } namespace { llvm::Optional declToSym(ASTContext &Ctx, const NamedDecl &ND) { auto &SM = Ctx.getSourceManager(); SourceLocation NameLoc = findNameLoc(&ND); // getFileLoc is a good choice for us, but we also need to make sure // sourceLocToPosition won't switch files, so we call getSpellingLoc on top of // that to make sure it does not switch files. // FIXME: sourceLocToPosition should not switch files! SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc())); SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc())); if (NameLoc.isInvalid() || BeginLoc.isInvalid() || EndLoc.isInvalid()) return llvm::None; if (!SM.isWrittenInMainFile(NameLoc) || !SM.isWrittenInMainFile(BeginLoc) || !SM.isWrittenInMainFile(EndLoc)) return llvm::None; Position NameBegin = sourceLocToPosition(SM, NameLoc); Position NameEnd = sourceLocToPosition( SM, Lexer::getLocForEndOfToken(NameLoc, 0, SM, Ctx.getLangOpts())); index::SymbolInfo SymInfo = index::getSymbolInfo(&ND); // FIXME: this is not classifying constructors, destructors and operators // correctly (they're all "methods"). SymbolKind SK = indexSymbolKindToSymbolKind(SymInfo.Kind); DocumentSymbol SI; SI.name = printName(Ctx, ND); SI.kind = SK; SI.deprecated = ND.isDeprecated(); SI.range = Range{sourceLocToPosition(SM, BeginLoc), sourceLocToPosition(SM, EndLoc)}; SI.selectionRange = Range{NameBegin, NameEnd}; if (!SI.range.contains(SI.selectionRange)) { // 'selectionRange' must be contained in 'range', so in cases where clang // reports unrelated ranges we need to reconcile somehow. SI.range = SI.selectionRange; } return SI; } /// A helper class to build an outline for the parse AST. It traverse the AST /// directly instead of using RecursiveASTVisitor (RAV) for three main reasons: /// - there is no way to keep RAV from traversing subtrees we're not /// interested in. E.g. not traversing function locals or implicit template /// instantiations. /// - it's easier to combine results of recursive passes, e.g. /// - visiting decls is actually simple, so we don't hit the complicated /// cases that RAV mostly helps with (types and expressions, etc.) class DocumentOutline { public: DocumentOutline(ParsedAST &AST) : AST(AST) {} /// Builds the document outline for the generated AST. std::vector build() { std::vector Results; for (auto &TopLevel : AST.getLocalTopLevelDecls()) traverseDecl(TopLevel, Results); return Results; } private: enum class VisitKind { No, OnlyDecl, DeclAndChildren }; void traverseDecl(Decl *D, std::vector &Results) { if (auto *Templ = llvm::dyn_cast(D)) D = Templ->getTemplatedDecl(); auto *ND = llvm::dyn_cast(D); if (!ND) return; VisitKind Visit = shouldVisit(ND); if (Visit == VisitKind::No) return; llvm::Optional Sym = declToSym(AST.getASTContext(), *ND); if (!Sym) return; if (Visit == VisitKind::DeclAndChildren) traverseChildren(D, Sym->children); Results.push_back(std::move(*Sym)); } void traverseChildren(Decl *D, std::vector &Results) { auto *Scope = llvm::dyn_cast(D); if (!Scope) return; for (auto *C : Scope->decls()) traverseDecl(C, Results); } VisitKind shouldVisit(NamedDecl *D) { if (D->isImplicit()) return VisitKind::No; if (auto Func = llvm::dyn_cast(D)) { // Some functions are implicit template instantiations, those should be // ignored. if (auto *Info = Func->getTemplateSpecializationInfo()) { if (!Info->isExplicitInstantiationOrSpecialization()) return VisitKind::No; } // Only visit the function itself, do not visit the children (i.e. // function parameters, etc.) return VisitKind::OnlyDecl; } // Handle template instantiations. We have three cases to consider: // - explicit instantiations, e.g. 'template class std::vector;' // Visit the decl itself (it's present in the code), but not the // children. // - implicit instantiations, i.e. not written by the user. // Do not visit at all, they are not present in the code. // - explicit specialization, e.g. 'template <> class vector {};' // Visit both the decl and its children, both are written in the code. if (auto *TemplSpec = llvm::dyn_cast(D)) { if (TemplSpec->isExplicitInstantiationOrSpecialization()) return TemplSpec->isExplicitSpecialization() ? VisitKind::DeclAndChildren : VisitKind::OnlyDecl; return VisitKind::No; } if (auto *TemplSpec = llvm::dyn_cast(D)) { if (TemplSpec->isExplicitInstantiationOrSpecialization()) return TemplSpec->isExplicitSpecialization() ? VisitKind::DeclAndChildren : VisitKind::OnlyDecl; return VisitKind::No; } // For all other cases, visit both the children and the decl. return VisitKind::DeclAndChildren; } ParsedAST &AST; }; std::vector collectDocSymbols(ParsedAST &AST) { return DocumentOutline(AST).build(); } } // namespace llvm::Expected> getDocumentSymbols(ParsedAST &AST) { return collectDocSymbols(AST); } } // namespace clangd } // namespace clang