Added in some missing fields (columns) to watchman_events tables
[hiphop-php.git] / hphp / runtime / ext / facts / fact-extractor.cpp
blob278f7169d12a9473d1cf18031860a580b5f15b35
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source path is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the path LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include <algorithm>
18 #include <memory>
19 #include <string>
21 #include <folly/executors/CPUThreadPoolExecutor.h>
22 #include <folly/futures/Future.h>
23 #include <folly/json/dynamic.h>
24 #include <folly/logging/xlog.h>
26 #include "hphp/runtime/base/program-functions.h"
27 #include "hphp/runtime/base/runtime-option.h"
28 #include "hphp/runtime/ext/facts/exception.h"
29 #include "hphp/runtime/ext/facts/fact-extractor.h"
30 #include "hphp/runtime/ext/facts/thread-factory.h"
31 #include "hphp/runtime/vm/unit-parser.h"
32 #include "hphp/util/configs/autoload.h" // @manual=//hphp/util/configs:autoload
33 #include "hphp/util/logger.h"
34 #include "hphp/util/match.h"
35 #include "hphp/util/text-util.h"
37 namespace HPHP {
38 namespace Facts {
40 namespace {
42 // Given a string like "foo bla bla bla ... bla bar", returns a
43 // printable string like "foo [1234 bytes omitted] bar", where the
44 // length of the prefix and suffix taken from the string are specified
45 // by `excerpt_len`. Note that the actual output might be a bit
46 // longer, due to escaping (e.g., if the string starts with nulls).
47 std::string summarized_string(std::string_view blob, int excerpt_len) {
48 auto s = folly::hexlify(blob);
49 std::string to_encode;
50 // The 20 bytes of slack is to avoid silly things like:
51 // [...2 bytes omitted...]
52 // where we might as well just print them.
53 if (s.size() < 2 * excerpt_len + 20) {
54 to_encode = s;
55 } else {
56 to_encode = folly::sformat(
57 "{} [...{} bytes omitted...] {}",
58 s.substr(0, excerpt_len),
59 s.size() - 2 * excerpt_len,
60 s.substr(s.size() - excerpt_len));
62 return ::HPHP::escapeStringForCPP(to_encode);
65 hackc::FileFacts decode_facts(const std::string& blob) {
66 try {
67 return hackc::binary_to_facts(blob);
68 } catch (const std::exception& e) {
69 throw FactsExtractionExc{folly::sformat(
70 "{} - blob is \"{}\"", e.what(), summarized_string(blob, 80))};
74 ExtractorFactory* s_extractorFactory = nullptr;
76 struct SimpleExtractor final : Extractor {
77 explicit SimpleExtractor(folly::Executor::KeepAlive<folly::Executor> token)
78 : Extractor{token} {}
80 ~SimpleExtractor() override = default;
82 folly::SemiFuture<std::string> get(const PathAndOptionalHash& key) override {
83 return folly::via(m_token, [key]() { return facts_binary_from_path(key); });
87 } // namespace
89 std::string facts_binary_from_path(const PathAndOptionalHash& path) {
90 assertx(path.m_path.is_absolute());
92 auto const result = extract_facts(
93 path.m_path.native(),
94 RepoOptions::forFile(path.m_path.c_str()).flags(),
95 path.m_hash ? *path.m_hash : "");
96 return match<std::string>(
97 result,
98 [&](const FactsBinaryString& r) { return r.value; },
99 [&](const std::string& err) -> std::string {
100 throw FactsExtractionExc{err};
104 void setExtractorFactory(ExtractorFactory* factory) {
105 s_extractorFactory = factory;
108 std::unique_ptr<Extractor> makeExtractor(
109 folly::Executor::KeepAlive<folly::Executor> token) {
110 // If we defined an external Extractor in closed-source code, use that.
111 // Otherwise use the SimpleExtractor.
112 if (s_extractorFactory && Cfg::Autoload::EnableExternFactExtractor) {
113 XLOG(INFO) << "Creating a external HPHP::Facts::Extractor.";
114 return s_extractorFactory->make(token);
116 XLOG(INFO) << "Creating an internal HPHP::Facts::SimpleExtractor.";
117 return std::make_unique<SimpleExtractor>(token);
120 std::vector<folly::Try<FileFacts>> facts_from_paths(
121 const std::filesystem::path& root,
122 const std::vector<PathAndOptionalHash>& pathsAndHashes) {
123 folly::CPUThreadPoolExecutor exec{
124 std::min(
125 RuntimeOption::EvalFactsWorkers,
126 static_cast<uint64_t>(pathsAndHashes.size())),
127 make_thread_factory("FactExtractor")};
129 // If we defined an external Extractor in closed-source code, use that.
130 // Otherwise use the SimpleExtractor.
131 auto extractor = makeExtractor(folly::getKeepAliveToken(exec));
133 std::atomic<int> completed_tasks = 0;
134 std::vector<folly::SemiFuture<FileFacts>> factsFutures;
135 factsFutures.reserve(pathsAndHashes.size());
137 XLOGF(INFO, "Extracting facts for {} files.", pathsAndHashes.size());
138 for (int i = 0; i < pathsAndHashes.size(); ++i) {
139 auto const& pathAndHash = pathsAndHashes.at(i);
140 XLOG_EVERY_N(INFO, 50000) << "Enqueued " << i << " out of "
141 << pathsAndHashes.size() << " updates.";
143 assertx(pathAndHash.m_path.is_relative());
144 PathAndOptionalHash absPathAndHash{
145 root / pathAndHash.m_path, pathAndHash.m_hash};
146 factsFutures.push_back(
147 folly::via(
148 &exec,
149 [&extractor, absPathAndHash]() {
150 if (UNLIKELY(!absPathAndHash.m_hash)) {
151 // We don't know the file's hash yet, so we don't know
152 // which key to use to query memcache. We'll try to extract
153 // facts from disk instead.
154 throw FactsExtractionExc{"No hash provided"};
156 return extractor->get(absPathAndHash);
158 .thenValue(
159 [absPathAndHash](
160 std::string&& factsBinary) -> hackc::FileFacts {
161 auto facts = decode_facts(factsBinary);
162 auto const& hash = *absPathAndHash.m_hash;
163 if (UNLIKELY(facts.sha1sum != hash)) {
164 // The hash we got out of memcache doesn't match the hash
165 // we expected. We'll try to extract facts from disk
166 // instead.
167 throw FactsExtractionExc{folly::sformat(
168 "Error extracting {} from memcache: hash '{}' != '{}'",
169 absPathAndHash.m_path.native(),
170 std::string{facts.sha1sum},
171 hash)};
173 return facts;
175 .thenTry([absPathAndHash](folly::Try<hackc::FileFacts>&& facts) {
176 if (facts.hasValue()) {
177 return *std::move(facts);
178 } else {
179 XLOGF(
180 WARN,
181 "Error extracting {}: {}",
182 absPathAndHash.m_path.native().c_str(),
183 facts.exception().what().c_str());
184 // There might have been a SHA1 mismatch due to a filesystem
185 // race. Try again without an expected hash.
186 PathAndOptionalHash withoutHash{absPathAndHash.m_path, {}};
187 return decode_facts(facts_binary_from_path(withoutHash));
190 .thenTry([&completed_tasks,
191 &pathsAndHashes](folly::Try<FileFacts>&& facts) {
192 int completed = ++completed_tasks;
193 XLOG_EVERY_N(INFO, 50000)
194 << "Finished " << completed << " out of "
195 << pathsAndHashes.size() << " updates.";
196 return std::move(facts);
197 }));
200 XLOG(INFO) << "Done spawning facts_from_paths futures.";
201 return folly::collectAll(factsFutures).wait().get();
204 void prefetchDb(const std::filesystem::path& root, const SQLiteKey& dbKey) {
205 XLOG(INFO) << "::prefetchDb " << root << " " << dbKey.toString();
206 if (s_extractorFactory && Cfg::Autoload::EnableExternFactExtractor) {
207 s_extractorFactory->prefetchDb(root, dbKey);
211 } // namespace Facts
212 } // namespace HPHP