Restructure build time bytecode cacheing:
[hiphop-php.git] / hphp / compiler / package.cpp
blobcc4f1245a01c85bd6f6a26200aad0f7e72f0a10b
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/compiler/package.h"
19 #include <fstream>
20 #include <map>
21 #include <memory>
22 #include <set>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <utility>
26 #include <vector>
28 #include <folly/String.h>
29 #include <folly/portability/Dirent.h>
30 #include <folly/portability/Unistd.h>
32 #include "hphp/compiler/analysis/analysis_result.h"
33 #include "hphp/compiler/json.h"
34 #include "hphp/compiler/option.h"
35 #include "hphp/runtime/base/execution-context.h"
36 #include "hphp/runtime/base/file-util-defs.h"
37 #include "hphp/runtime/base/file-util.h"
38 #include "hphp/runtime/base/program-functions.h"
39 #include "hphp/runtime/vm/as.h"
40 #include "hphp/runtime/vm/extern-compiler.h"
41 #include "hphp/runtime/vm/repo.h"
42 #include "hphp/runtime/vm/unit-emitter.h"
43 #include "hphp/util/exception.h"
44 #include "hphp/util/job-queue.h"
45 #include "hphp/util/logger.h"
46 #include "hphp/util/process.h"
47 #include "hphp/zend/zend-string.h"
49 using namespace HPHP;
50 using std::set;
52 ///////////////////////////////////////////////////////////////////////////////
54 Package::Package(const char* root, bool /*bShortTags*/ /* = true */)
55 : m_dispatcher(nullptr), m_lineCount(0), m_charCount(0) {
56 m_root = FileUtil::normalizeDir(root);
57 m_ar = std::make_shared<AnalysisResult>();
58 m_fileCache = std::make_shared<FileCache>();
61 void Package::addAllFiles(bool force) {
62 if (Option::PackageDirectories.empty() && Option::PackageFiles.empty()) {
63 addDirectory("/", force);
64 } else {
65 for (auto const& dir : Option::PackageDirectories) {
66 addDirectory(dir, force);
68 for (auto const& file : Option::PackageFiles) {
69 addSourceFile(file);
74 void Package::addInputList(const std::string& listFileName) {
75 assert(!listFileName.empty());
76 auto const f = fopen(listFileName.c_str(), "r");
77 if (f == nullptr) {
78 throw Exception("Unable to open %s: %s", listFileName.c_str(),
79 folly::errnoStr(errno).c_str());
81 char fileName[PATH_MAX];
82 while (fgets(fileName, sizeof(fileName), f)) {
83 int len = strlen(fileName);
84 if (fileName[len - 1] == '\n') fileName[len - 1] = '\0';
85 len = strlen(fileName);
86 if (len) {
87 if (FileUtil::isDirSeparator(fileName[len - 1])) {
88 addDirectory(fileName, false);
89 } else {
90 addSourceFile(fileName);
94 fclose(f);
97 void Package::addStaticFile(const std::string& fileName) {
98 assert(!fileName.empty());
99 m_extraStaticFiles.insert(fileName);
102 void Package::addStaticDirectory(const std::string& path) {
103 m_staticDirectories.insert(path);
106 void Package::addDirectory(const std::string &path, bool force) {
107 m_directories[path] |= force;
110 std::shared_ptr<FileCache> Package::getFileCache() {
111 for (auto const& dir : m_directories) {
112 std::vector<std::string> files;
113 FileUtil::find(files, m_root, dir.first, /* php */ false,
114 &Option::PackageExcludeStaticDirs,
115 &Option::PackageExcludeStaticFiles);
116 Option::FilterFiles(files, Option::PackageExcludeStaticPatterns);
117 for (auto& file : files) {
118 auto const rpath = file.substr(m_root.size());
119 if (!m_fileCache->fileExists(rpath.c_str())) {
120 Logger::Verbose("saving %s", file.c_str());
121 m_fileCache->write(rpath.c_str(), file.c_str());
125 for (auto const& dir : m_staticDirectories) {
126 std::vector<std::string> files;
127 FileUtil::find(files, m_root, dir, /* php */ false);
128 for (auto& file : files) {
129 auto const rpath = file.substr(m_root.size());
130 if (!m_fileCache->fileExists(rpath.c_str())) {
131 Logger::Verbose("saving %s", file.c_str());
132 m_fileCache->write(rpath.c_str(), file.c_str());
136 for (auto const& file : m_extraStaticFiles) {
137 if (!m_fileCache->fileExists(file.c_str())) {
138 auto const fullpath = m_root + file;
139 Logger::Verbose("saving %s", fullpath.c_str());
140 m_fileCache->write(file.c_str(), fullpath.c_str());
144 for (auto const& pair : m_discoveredStaticFiles) {
145 auto const file = pair.first.c_str();
146 if (!m_fileCache->fileExists(file)) {
147 const char *fullpath = pair.second.c_str();
148 Logger::Verbose("saving %s", fullpath[0] ? fullpath : file);
149 if (fullpath[0]) {
150 m_fileCache->write(file, fullpath);
151 } else {
152 m_fileCache->write(file);
157 return m_fileCache;
160 ///////////////////////////////////////////////////////////////////////////////
162 namespace {
164 struct ParseItem {
165 ParseItem() : fileName(nullptr), check(false), force(false) {}
166 ParseItem(const std::string* file, bool check) :
167 fileName(file),
168 check(check),
169 force(false)
171 ParseItem(const std::string& dir, bool force) :
172 dirName(dir),
173 fileName(nullptr),
174 check(false),
175 force(force)
177 std::string dirName;
178 const std::string* fileName;
179 bool check; // whether its an error if the file isn't found
180 bool force; // true to skip filters
183 struct ParserWorker
184 : JobQueueWorker<ParseItem, Package*, true, true>
186 bool m_ret{true};
187 void doJob(JobType job) override {
188 auto const ret = [&] {
189 try {
190 if (job.fileName) {
191 return m_context->parseImpl(job.fileName);
193 m_context->addSourceDirectory(job.dirName, job.force);
194 return true;
195 } catch (Exception& e) {
196 Logger::Error(e.getMessage());
197 return false;
198 } catch (...) {
199 Logger::Error("Fatal: An unexpected exception was thrown");
200 return false;
202 }();
203 if (!ret && job.check) {
204 Logger::Error("Fatal: Unable to stat/parse %s", job.fileName->c_str());
205 m_ret = false;
209 void onThreadEnter() override {
210 g_context.getCheck();
212 void onThreadExit() override {
213 hphp_memory_cleanup();
217 using ParserDispatcher = JobQueueDispatcher<ParserWorker>;
221 ///////////////////////////////////////////////////////////////////////////////
223 void Package::addSourceFile(const std::string& fileName,
224 bool check /* = false */) {
225 if (!fileName.empty()) {
226 auto canonFileName =
227 FileUtil::canonicalize(String(fileName)).toCppString();
228 auto const file = [&] {
229 Lock lock(m_mutex);
230 auto const info = m_filesToParse.insert(canonFileName);
231 return info.second && m_dispatcher ? &*info.first : nullptr;
232 }();
234 if (file) {
235 static_cast<ParserDispatcher*>(m_dispatcher)->enqueue({file, check});
240 void Package::addSourceDirectory(const std::string& path,
241 bool force) {
242 FileUtil::find(
243 m_root, path, /* php */ true,
244 [&] (const std::string& name, bool dir) {
245 if (!dir) {
246 if (!force) {
247 if (Option::PackageExcludeFiles.count(name) ||
248 Option::IsFileExcluded(name, Option::PackageExcludePatterns)) {
249 return false;
252 addSourceFile(name, true);
253 return true;
255 if (!force && Option::PackageExcludeDirs.count(name)) {
256 return false;
258 if (path == name ||
259 (name.size() == path.size() + 1 &&
260 name.back() == FileUtil::getDirSeparator() &&
261 name.compare(0, path.size(), path) == 0)) {
262 // find immediately calls us back with a canonicalized version
263 // of path; we want to ignore that, and let it proceed to
264 // iterate the directory.
265 return true;
267 // Process the directory as a new job
268 static_cast<ParserDispatcher*>(m_dispatcher)->enqueue({name, force});
269 // Don't iterate the directory in this job.
270 return false;
274 bool Package::parse(bool check, std::thread& unit_emitter_thread) {
275 if (m_filesToParse.empty() && m_directories.empty()) {
276 return true;
279 auto const threadCount = Option::ParserThreadCount <= 0 ?
280 1 : Option::ParserThreadCount;
282 // process system lib files which were deferred during process-init
283 // (if necessary).
284 auto syslib_ues = m_ar->getHhasFiles();
285 if (RuntimeOption::RepoCommit &&
286 RuntimeOption::RepoLocalPath.size() &&
287 RuntimeOption::RepoLocalMode == "rw") {
288 m_ueq.emplace();
289 // note useHHBBC is needed because when program is set, m_ar might
290 // be cleared before the thread finishes running, so we would
291 // segfault trying to check it. Note that when program is *not*
292 // set, we wait for the thread to finish before clearing m_ar (so
293 // the guarded addHhasFile is safe).
294 unit_emitter_thread = std::thread {
295 [&, useHHBBC{m_ar->program().get() != nullptr}] {
296 HphpSessionAndThread _(Treadmill::SessionKind::CompilerEmit);
297 static const unsigned kBatchSize = 8;
298 std::vector<std::unique_ptr<UnitEmitter>> batched_ues;
299 folly::Optional<Timer> timer;
301 auto commitSome = [&] {
302 batchCommit(batched_ues);
303 if (!useHHBBC) {
304 for (auto& ue : batched_ues) {
305 m_ar->addHhasFile(std::move(ue));
308 batched_ues.clear();
311 while (auto ue = m_ueq->pop()) {
312 if (!timer) timer.emplace(Timer::WallTime, "Caching parsed units...");
313 batched_ues.push_back(std::move(ue));
314 if (batched_ues.size() == kBatchSize) {
315 commitSome();
318 if (batched_ues.size()) commitSome();
323 if (RuntimeOption::RepoLocalPath.size() &&
324 RuntimeOption::RepoLocalMode != "--") {
325 auto units = Repo::get().enumerateUnits(RepoIdLocal, false);
326 for (auto& elm : units) {
327 m_locally_cached_bytecode.insert(elm.first);
331 HphpSession _(Treadmill::SessionKind::CompilerEmit);
333 // If we're using the hack compiler, make sure it agrees on the thread count.
334 RuntimeOption::EvalHackCompilerWorkers = threadCount;
335 ParserDispatcher dispatcher { threadCount, threadCount, 0, false, this };
337 m_dispatcher = &dispatcher;
339 auto const files = std::move(m_filesToParse);
341 dispatcher.start();
342 for (auto const& file : files) {
343 addSourceFile(file, check);
345 for (auto const& dir : m_directories) {
346 addSourceDirectory(dir.first, dir.second);
349 for (auto& ue : syslib_ues) {
350 addUnitEmitter(std::move(ue));
352 syslib_ues.clear();
354 dispatcher.waitEmpty();
356 if (m_ueq) {
357 m_ueq->push(nullptr);
358 if (!m_ar->program().get()) {
359 unit_emitter_thread.join();
363 m_dispatcher = nullptr;
365 auto workers = dispatcher.getWorkers();
366 for (unsigned int i = 0; i < workers.size(); i++) {
367 ParserWorker *worker = workers[i];
368 if (!worker->m_ret) return false;
371 return true;
374 void Package::addUnitEmitter(std::unique_ptr<UnitEmitter> ue) {
375 for (auto& ent : ue->m_symbol_refs) {
376 m_ar->parseOnDemandBy(ent.first, ent.second);
378 if (m_ar->program().get()) {
379 HHBBC::add_unit_to_program(ue.get(), *m_ar->program());
381 // m_repoId != -1 means it was read from the local repo - so there's
382 // no need to write it back.
383 if (m_ueq && ue->m_repoId == -1) {
384 m_ueq->push(std::move(ue));
385 } else if (!m_ar->program().get()) {
386 m_ar->addHhasFile(std::move(ue));
391 * Note that the string pointed to by fileName must live until the
392 * Package is destroyed. Its expected to be an element of
393 * m_filesToParse.
395 bool Package::parseImpl(const std::string* fileName) {
396 if (fileName->empty()) return false;
398 std::string fullPath;
399 if (FileUtil::isDirSeparator(fileName->front())) {
400 fullPath = *fileName;
401 } else {
402 fullPath = m_root + *fileName;
405 struct stat sb;
406 if (stat(fullPath.c_str(), &sb)) {
407 if (fullPath.find(' ') == std::string::npos) {
408 Logger::Error("Unable to stat file %s", fullPath.c_str());
410 return false;
412 if ((sb.st_mode & S_IFMT) == S_IFDIR) {
413 Logger::Error("Unable to parse directory: %s", fullPath.c_str());
414 return false;
417 if (RuntimeOption::EvalAllowHhas) {
418 if (fileName->size() > 5 &&
419 !fileName->compare(fileName->size() - 5, std::string::npos, ".hhas")) {
420 std::ifstream s(*fileName);
421 std::string content {
422 std::istreambuf_iterator<char>(s), std::istreambuf_iterator<char>()
424 SHA1 sha1{string_sha1(content)};
426 std::unique_ptr<UnitEmitter> ue{
427 assemble_string(content.data(), content.size(), fileName->c_str(), sha1,
428 Native::s_noNativeFuncs)
430 addUnitEmitter(std::move(ue));
431 return true;
435 auto report = [&] (int lines) {
436 struct stat fst;
437 // @lint-ignore HOWTOEVEN1
438 stat(fullPath.c_str(), &fst);
440 Lock lock(m_mutex);
441 m_lineCount += lines;
442 m_charCount += fst.st_size;
443 if (!m_extraStaticFiles.count(*fileName) &&
444 !m_discoveredStaticFiles.count(*fileName)) {
445 if (Option::CachePHPFile) {
446 m_discoveredStaticFiles[*fileName] = fullPath;
447 } else {
448 m_discoveredStaticFiles[*fileName] = "";
453 std::ifstream s(fullPath);
454 std::string content {
455 std::istreambuf_iterator<char>(s), std::istreambuf_iterator<char>() };
457 auto const& options = RepoOptions::forFile(fullPath.data());
458 auto const sha1 = SHA1{mangleUnitSha1(string_sha1(content),
459 *fileName,
460 options)};
461 if (RuntimeOption::RepoLocalPath.size() &&
462 RuntimeOption::RepoLocalMode != "--" &&
463 m_locally_cached_bytecode.count(*fileName)) {
464 // Try the repo; if it's not already there, invoke the compiler.
465 if (auto ue = Repo::get().urp().loadEmitter(
466 *fileName, sha1, Native::s_noNativeFuncs
467 )) {
468 addUnitEmitter(std::move(ue));
469 return true;
473 // Invoke external compiler. If it fails to compile the file we log an
474 // error and and skip it.
475 auto uc = UnitCompiler::create(
476 content.data(), content.size(), fileName->c_str(), sha1,
477 Native::s_noNativeFuncs, false, options);
478 assertx(uc);
479 try {
480 auto ue = uc->compile(true);
481 if (ue && !ue->m_ICE) {
482 addUnitEmitter(std::move(ue));
483 report(0);
484 return true;
485 } else {
486 Logger::Error(
487 "Unable to compile using %s compiler: %s",
488 uc->getName(),
489 fullPath.c_str());
490 return false;
492 } catch (const BadCompilerException& exc) {
493 Logger::Error("Bad external compiler: %s", exc.what());
494 return false;
498 ///////////////////////////////////////////////////////////////////////////////
500 void Package::saveStatsToFile(const char *filename, int totalSeconds) const {
501 std::ofstream f(filename);
502 if (f) {
503 JSON::CodeError::OutputStream o(f);
504 JSON::CodeError::MapStream ms(o);
506 ms.add("FileCount", getFileCount())
507 .add("LineCount", getLineCount())
508 .add("CharCount", getCharCount())
509 .add("TotalTime", totalSeconds);
511 if (getLineCount()) {
512 ms.add("AvgCharPerLine", getCharCount() / getLineCount());
515 ms.done();
516 f.close();