Add file attributes to symbol map
[hiphop-php.git] / hphp / runtime / ext / facts / fact-extractor.cpp
blob60b5644e85166ebd00e874ca71f4d632b6cf85a3
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source path is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the path LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include <algorithm>
18 #include <memory>
19 #include <string>
21 #include <folly/Optional.h>
22 #include <folly/dynamic.h>
23 #include <folly/executors/CPUThreadPoolExecutor.h>
24 #include <folly/futures/Future.h>
26 #include "hphp/runtime/base/program-functions.h"
27 #include "hphp/runtime/base/runtime-option.h"
28 #include "hphp/runtime/ext/facts/exception.h"
29 #include "hphp/runtime/ext/facts/fact-extractor.h"
30 #include "hphp/runtime/ext/facts/thread-factory.h"
31 #include "hphp/runtime/vm/extern-compiler.h"
32 #include "hphp/util/logger.h"
33 #include "hphp/util/match.h"
34 #include "hphp/util/text-util.h"
35 #include "hphp/util/trace.h"
37 TRACE_SET_MOD(facts);
39 namespace HPHP {
40 namespace Facts {
42 namespace {
44 TypeKind fromString(const std::string& str) {
45 if (str == "class") {
46 return TypeKind::Class;
47 } else if (str == "interface") {
48 return TypeKind::Interface;
49 } else if (str == "enum") {
50 return TypeKind::Enum;
51 } else if (str == "trait") {
52 return TypeKind::Trait;
53 } else if (str == "typeAlias") {
54 return TypeKind::TypeAlias;
56 return TypeKind::Unknown;
59 std::vector<std::string> move_str_vec(folly::dynamic* stringList) {
60 if (stringList == nullptr) {
61 return {};
63 std::vector<std::string> ret;
64 ret.reserve(stringList->size());
65 for (auto& item : *stringList) {
66 ret.push_back(std::move(item).getString());
68 return ret;
71 std::vector<Attribute> move_attr_vec(folly::dynamic* attrList) {
72 if (attrList == nullptr) {
73 return {};
76 std::vector<Attribute> ret;
77 for (auto& item : attrList->items()) {
78 Attribute attr;
79 attr.m_name = item.first.getString();
80 for (auto& arg : std::move(item.second)) {
81 attr.m_args.push_back(std::move(arg));
83 ret.push_back(std::move(attr));
85 return ret;
88 std::vector<MethodDetails> move_method_vec(folly::dynamic* methodList) {
89 if (!methodList) {
90 return {};
92 std::vector<MethodDetails> ret;
93 ret.reserve(methodList->size());
94 for (auto& [method, details] : methodList->items()) {
95 ret.push_back(MethodDetails{
96 .m_name = method.getString(),
97 .m_attributes = move_attr_vec(details.get_ptr("attributes"))});
99 return ret;
102 std::vector<TypeDetails> move_type_vec(folly::dynamic* types) {
103 if (types == nullptr) {
104 return {};
106 std::vector<TypeDetails> ret;
107 for (auto& type : *types) {
108 auto typeKind = fromString(std::move(type.at("kindOf")).getString());
109 ret.push_back(TypeDetails{
110 .m_name = std::move(type.at("name")).getString(),
111 .m_kind = typeKind,
112 .m_flags = static_cast<int>(std::move(type.at("flags")).getInt()),
113 .m_baseTypes = move_str_vec(type.get_ptr("baseTypes")),
114 .m_attributes = move_attr_vec(type.get_ptr("attributes")),
115 .m_requireExtends = move_str_vec(type.get_ptr("requireExtends")),
116 .m_requireImplements = move_str_vec(type.get_ptr("requireImplements")),
117 .m_methods = move_method_vec(type.get_ptr("methods"))});
119 return ret;
122 FileFacts make_file_facts(folly::dynamic facts) {
123 try {
124 return {
125 .m_types = move_type_vec(facts.get_ptr("types")),
126 .m_functions = move_str_vec(facts.get_ptr("functions")),
127 .m_constants = move_str_vec(facts.get_ptr("constants")),
128 .m_attributes = move_attr_vec(facts.get_ptr("fileAttributes")),
129 .m_sha1hex = std::move(facts.at("sha1sum")).getString()};
130 } catch (const folly::TypeError& e) {
131 throw FactsExtractionExc{e.what()};
135 // Given a string like "foo bla bla bla ... bla bar", returns a
136 // printable string like "foo [1234 bytes omitted] bar", where the
137 // length of the prefix and suffix taken from the string are specified
138 // by `excerpt_len`. Note that the actual output might be a bit
139 // longer, due to escaping (e.g., if the string starts with nulls).
140 std::string summarized_string(std::string_view s, int excerpt_len) {
141 std::string to_encode;
142 // The 20 bytes of slack is to avoid silly things like:
143 // [...2 bytes omitted...]
144 // where we might as well just print them.
145 if (s.size() < 2 * excerpt_len + 20) {
146 to_encode = s;
147 } else {
148 to_encode = folly::sformat(
149 "{} [...{} bytes omitted...] {}",
150 s.substr(0, excerpt_len),
151 s.size() - 2 * excerpt_len,
152 s.substr(s.size() - excerpt_len));
154 return ::HPHP::escapeStringForCPP(to_encode);
157 folly::dynamic parse_json(const std::string& json) {
158 try {
159 return folly::parseJson(json);
160 } catch (const folly::json::parse_error& e) {
161 throw FactsExtractionExc{folly::sformat(
162 "{} - JSON is \"{}\"", e.what(), summarized_string(json, 80))};
166 ExtractorFactory s_extractorFactory = nullptr;
168 struct SimpleExtractor final : public Extractor {
169 explicit SimpleExtractor(folly::Executor& exec) : Extractor{exec} {
172 ~SimpleExtractor() override = default;
174 folly::SemiFuture<std::string> get(const PathAndHash& key) override {
175 return folly::via(
176 &m_exec, [path = key.m_path]() { return facts_json_from_path(path); });
180 } // namespace
182 std::string facts_json_from_path(const folly::fs::path& path) {
183 assertx(path.is_absolute());
184 auto parser = acquire_facts_parser();
186 auto const result = extract_facts(
187 *parser, path.native(), "", 0, RepoOptions::forFile(path.c_str()));
188 return match<std::string>(
189 result,
190 [&](const FactsJSONString& r) { return r.value; },
191 [&](const std::string& err) -> std::string {
192 throw FactsExtractionExc{err};
196 void setExtractorFactory(ExtractorFactory factory) {
197 s_extractorFactory = factory;
200 std::vector<folly::Try<FileFacts>> facts_from_paths(
201 const folly::fs::path& root,
202 const std::vector<PathAndHash>& pathsAndHashes) {
204 folly::CPUThreadPoolExecutor exec{
205 std::min(
206 RuntimeOption::EvalHackCompilerWorkers,
207 static_cast<uint64_t>(pathsAndHashes.size())),
208 make_thread_factory("FactExtractor")};
210 // If we defined a fancy memcache Extractor in closed-source code, use that.
211 // Otherwise use the SimpleExtractor.
212 auto extractor = [&]() -> std::unique_ptr<Extractor> {
213 if (s_extractorFactory) {
214 FTRACE(3, "Creating a custom HPHP::Facts::Extractor.\n");
215 return s_extractorFactory(exec);
216 } else {
217 FTRACE(3, "Creating a HPHP::Facts::SimpleExtractor.\n");
218 return std::make_unique<SimpleExtractor>(exec);
220 }();
222 std::vector<folly::SemiFuture<FileFacts>> factsFutures;
223 factsFutures.reserve(pathsAndHashes.size());
224 for (auto const& pathAndHash : pathsAndHashes) {
225 assertx(pathAndHash.m_path.is_relative());
226 PathAndHash absPathAndHash{root / pathAndHash.m_path, pathAndHash.m_hash};
227 auto factsFromCacheFuture =
228 [&exec, &extractor, absPathAndHash]() -> folly::Future<folly::dynamic> {
229 if (UNLIKELY(!absPathAndHash.m_hash)) {
230 // We don't know the file's hash yet, so we don't know which key to use
231 // to query memcache. We'll try to extract facts from disk instead.
232 throw FactsExtractionExc{"No hash provided"};
234 return extractor->get(absPathAndHash)
235 .via(&exec)
236 .thenValue(
237 [absPathAndHash](std::string&& factsJson) -> folly::dynamic {
238 auto facts = parse_json(factsJson);
239 auto const& hash = *absPathAndHash.m_hash;
240 if (UNLIKELY(facts.at("sha1sum").getString() != hash)) {
241 // The hash we got out of memcache doesn't match the hash
242 // we expected. We'll try to extract facts from disk
243 // instead.
244 throw FactsExtractionExc{folly::sformat(
245 "Error extracting {} from memcache: hash '{}' != '{}'",
246 absPathAndHash.m_path.native(),
247 facts.at("sha1sum").getString(),
248 hash)};
250 return facts;
252 }();
253 factsFutures.emplace_back(
254 std::move(factsFromCacheFuture)
255 .thenTry([absPathAndHash = std::move(absPathAndHash)](
256 folly::Try<folly::dynamic>&& facts) {
257 if (facts.hasValue()) {
258 return *std::move(facts);
259 } else {
260 Logger::Info(
261 "Error extracting %s: %s\n",
262 absPathAndHash.m_path.native().c_str(),
263 facts.exception().what().c_str());
264 return parse_json(facts_json_from_path(absPathAndHash.m_path));
267 .thenValue([](folly::dynamic&& facts) {
268 return make_file_facts(std::move(facts));
269 }));
272 return folly::collectAll(factsFutures).wait().get();
275 } // namespace Facts
276 } // namespace HPHP