2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source path is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the path LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
21 #include <folly/executors/CPUThreadPoolExecutor.h>
22 #include <folly/futures/Future.h>
23 #include <folly/json/dynamic.h>
24 #include <folly/logging/xlog.h>
26 #include "hphp/runtime/base/program-functions.h"
27 #include "hphp/runtime/base/runtime-option.h"
28 #include "hphp/runtime/ext/facts/exception.h"
29 #include "hphp/runtime/ext/facts/fact-extractor.h"
30 #include "hphp/runtime/ext/facts/thread-factory.h"
31 #include "hphp/runtime/vm/unit-parser.h"
32 #include "hphp/util/configs/autoload.h" // @manual=//hphp/util/configs:autoload
33 #include "hphp/util/logger.h"
34 #include "hphp/util/match.h"
35 #include "hphp/util/text-util.h"
42 // Given a string like "foo bla bla bla ... bla bar", returns a
43 // printable string like "foo [1234 bytes omitted] bar", where the
44 // length of the prefix and suffix taken from the string are specified
45 // by `excerpt_len`. Note that the actual output might be a bit
46 // longer, due to escaping (e.g., if the string starts with nulls).
47 std::string
summarized_string(std::string_view blob
, int excerpt_len
) {
48 auto s
= folly::hexlify(blob
);
49 std::string to_encode
;
50 // The 20 bytes of slack is to avoid silly things like:
51 // [...2 bytes omitted...]
52 // where we might as well just print them.
53 if (s
.size() < 2 * excerpt_len
+ 20) {
56 to_encode
= folly::sformat(
57 "{} [...{} bytes omitted...] {}",
58 s
.substr(0, excerpt_len
),
59 s
.size() - 2 * excerpt_len
,
60 s
.substr(s
.size() - excerpt_len
));
62 return ::HPHP::escapeStringForCPP(to_encode
);
65 hackc::FileFacts
decode_facts(const std::string
& blob
) {
67 return hackc::binary_to_facts(blob
);
68 } catch (const std::exception
& e
) {
69 throw FactsExtractionExc
{folly::sformat(
70 "{} - blob is \"{}\"", e
.what(), summarized_string(blob
, 80))};
74 ExtractorFactory
* s_extractorFactory
= nullptr;
76 struct SimpleExtractor final
: Extractor
{
77 explicit SimpleExtractor(folly::Executor::KeepAlive
<folly::Executor
> token
)
80 ~SimpleExtractor() override
= default;
82 folly::SemiFuture
<std::string
> get(const PathAndOptionalHash
& key
) override
{
83 return folly::via(m_token
, [key
]() { return facts_binary_from_path(key
); });
89 std::string
facts_binary_from_path(const PathAndOptionalHash
& path
) {
90 assertx(path
.m_path
.is_absolute());
92 auto const result
= extract_facts(
94 RepoOptions::forFile(path
.m_path
.c_str()).flags(),
95 path
.m_hash
? *path
.m_hash
: "");
96 return match
<std::string
>(
98 [&](const FactsBinaryString
& r
) { return r
.value
; },
99 [&](const std::string
& err
) -> std::string
{
100 throw FactsExtractionExc
{err
};
104 void setExtractorFactory(ExtractorFactory
* factory
) {
105 s_extractorFactory
= factory
;
108 std::unique_ptr
<Extractor
> makeExtractor(
109 folly::Executor::KeepAlive
<folly::Executor
> token
) {
110 // If we defined an external Extractor in closed-source code, use that.
111 // Otherwise use the SimpleExtractor.
112 if (s_extractorFactory
&& Cfg::Autoload::EnableExternFactExtractor
) {
113 XLOG(INFO
) << "Creating a external HPHP::Facts::Extractor.";
114 return s_extractorFactory
->make(token
);
116 XLOG(INFO
) << "Creating an internal HPHP::Facts::SimpleExtractor.";
117 return std::make_unique
<SimpleExtractor
>(token
);
120 std::vector
<folly::Try
<FileFacts
>> facts_from_paths(
121 const std::filesystem::path
& root
,
122 const std::vector
<PathAndOptionalHash
>& pathsAndHashes
) {
123 folly::CPUThreadPoolExecutor exec
{
125 RuntimeOption::EvalFactsWorkers
,
126 static_cast<uint64_t>(pathsAndHashes
.size())),
127 make_thread_factory("FactExtractor")};
129 // If we defined an external Extractor in closed-source code, use that.
130 // Otherwise use the SimpleExtractor.
131 auto extractor
= makeExtractor(folly::getKeepAliveToken(exec
));
133 std::atomic
<int> completed_tasks
= 0;
134 std::vector
<folly::SemiFuture
<FileFacts
>> factsFutures
;
135 factsFutures
.reserve(pathsAndHashes
.size());
137 XLOGF(INFO
, "Extracting facts for {} files.", pathsAndHashes
.size());
138 for (int i
= 0; i
< pathsAndHashes
.size(); ++i
) {
139 auto const& pathAndHash
= pathsAndHashes
.at(i
);
140 XLOG_EVERY_N(INFO
, 50000) << "Enqueued " << i
<< " out of "
141 << pathsAndHashes
.size() << " updates.";
143 assertx(pathAndHash
.m_path
.is_relative());
144 PathAndOptionalHash absPathAndHash
{
145 root
/ pathAndHash
.m_path
, pathAndHash
.m_hash
};
146 factsFutures
.push_back(
149 [&extractor
, absPathAndHash
]() {
150 if (UNLIKELY(!absPathAndHash
.m_hash
)) {
151 // We don't know the file's hash yet, so we don't know
152 // which key to use to query memcache. We'll try to extract
153 // facts from disk instead.
154 throw FactsExtractionExc
{"No hash provided"};
156 return extractor
->get(absPathAndHash
);
160 std::string
&& factsBinary
) -> hackc::FileFacts
{
161 auto facts
= decode_facts(factsBinary
);
162 auto const& hash
= *absPathAndHash
.m_hash
;
163 if (UNLIKELY(facts
.sha1sum
!= hash
)) {
164 // The hash we got out of memcache doesn't match the hash
165 // we expected. We'll try to extract facts from disk
167 throw FactsExtractionExc
{folly::sformat(
168 "Error extracting {} from memcache: hash '{}' != '{}'",
169 absPathAndHash
.m_path
.native(),
170 std::string
{facts
.sha1sum
},
175 .thenTry([absPathAndHash
](folly::Try
<hackc::FileFacts
>&& facts
) {
176 if (facts
.hasValue()) {
177 return *std::move(facts
);
181 "Error extracting {}: {}",
182 absPathAndHash
.m_path
.native().c_str(),
183 facts
.exception().what().c_str());
184 // There might have been a SHA1 mismatch due to a filesystem
185 // race. Try again without an expected hash.
186 PathAndOptionalHash withoutHash
{absPathAndHash
.m_path
, {}};
187 return decode_facts(facts_binary_from_path(withoutHash
));
190 .thenTry([&completed_tasks
,
191 &pathsAndHashes
](folly::Try
<FileFacts
>&& facts
) {
192 int completed
= ++completed_tasks
;
193 XLOG_EVERY_N(INFO
, 50000)
194 << "Finished " << completed
<< " out of "
195 << pathsAndHashes
.size() << " updates.";
196 return std::move(facts
);
200 XLOG(INFO
) << "Done spawning facts_from_paths futures.";
201 return folly::collectAll(factsFutures
).wait().get();
204 void prefetchDb(const std::filesystem::path
& root
, const SQLiteKey
& dbKey
) {
205 XLOG(INFO
) << "::prefetchDb " << root
<< " " << dbKey
.toString();
206 if (s_extractorFactory
&& Cfg::Autoload::EnableExternFactExtractor
) {
207 s_extractorFactory
->prefetchDb(root
, dbKey
);