Updating submodules
[hiphop-php.git] / hphp / compiler / compiler.cpp
blobde6fb90cba119dd58ed86a0857cf629f92d30783
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/compiler/compiler.h"
19 #include "hphp/compiler/option.h"
20 #include "hphp/compiler/package.h"
22 #include "hphp/hack/src/hackc/ffi_bridge/compiler_ffi.rs.h"
24 #include "hphp/hhbbc/hhbbc.h"
25 #include "hphp/hhbbc/misc.h"
26 #include "hphp/hhbbc/options.h"
28 #include "hphp/runtime/base/config.h"
29 #include "hphp/runtime/base/configs/configs.h"
30 #include "hphp/runtime/base/configs/repo-global-data-generated.h"
31 #include "hphp/runtime/base/file-util.h"
32 #include "hphp/runtime/base/ini-setting.h"
33 #include "hphp/runtime/base/preg.h"
34 #include "hphp/runtime/base/program-functions.h"
35 #include "hphp/runtime/base/variable-serializer.h"
36 #include "hphp/runtime/version.h"
38 #include "hphp/runtime/vm/builtin-symbol-map.h"
39 #include "hphp/runtime/vm/disas.h"
40 #include "hphp/runtime/vm/preclass-emitter.h"
41 #include "hphp/runtime/vm/repo-autoload-map-builder.h"
42 #include "hphp/runtime/vm/repo-global-data.h"
43 #include "hphp/runtime/vm/type-alias-emitter.h"
44 #include "hphp/runtime/vm/unit-emitter.h"
46 #include "hphp/util/async-func.h"
47 #include "hphp/util/build-info.h"
48 #include "hphp/util/configs/php7.h"
49 #include "hphp/util/current-executable.h"
50 #include "hphp/util/exception.h"
51 #include "hphp/util/hdf.h"
52 #include "hphp/util/job-queue.h"
53 #include "hphp/util/logger.h"
54 #include "hphp/util/process.h"
55 #include "hphp/util/process-exec.h"
56 #include "hphp/util/rds-local.h"
57 #include "hphp/util/text-util.h"
58 #include "hphp/util/timer.h"
59 #ifndef _MSC_VER
60 #include "hphp/util/light-process.h"
61 #endif
63 #include "hphp/hhvm/process-init.h"
65 #include <sys/types.h>
66 #ifndef _MSC_VER
67 #include <sys/wait.h>
68 #include <dlfcn.h>
69 #endif
71 #include <boost/algorithm/string/replace.hpp>
72 #include <boost/program_options/options_description.hpp>
73 #include <boost/program_options/positional_options.hpp>
74 #include <boost/program_options/variables_map.hpp>
75 #include <boost/program_options/parsers.hpp>
77 #include <cerrno>
78 #include <exception>
79 #include <filesystem>
80 #include <fstream>
82 #include <folly/portability/SysStat.h>
84 using namespace boost::program_options;
86 namespace coro = folly::coro;
88 namespace HPHP {
90 using namespace extern_worker;
92 ///////////////////////////////////////////////////////////////////////////////
94 namespace {
96 ///////////////////////////////////////////////////////////////////////////////
98 struct CompilerOptions {
99 std::string outputDir;
100 std::vector<std::string> config;
101 std::vector<std::string> confStrings;
102 std::vector<std::string> iniStrings;
103 std::string repoOptionsDir;
104 std::string inputDir;
105 std::vector<std::string> inputs;
106 std::string inputList;
107 std::vector<std::string> dirs;
108 std::vector<std::string> excludeDirs;
109 std::vector<std::string> excludeFiles;
110 std::vector<std::string> excludePatterns;
111 std::vector<std::string> excludeStaticDirs;
112 std::vector<std::string> excludeStaticFiles;
113 std::vector<std::string> excludeStaticPatterns;
114 std::vector<std::string> cfiles;
115 std::vector<std::string> cdirs;
116 std::string push_phases;
117 std::string matched_overrides;
118 int logLevel;
119 std::string filecache;
120 bool coredump;
121 std::string ondemandEdgesPath;
124 ///////////////////////////////////////////////////////////////////////////////
126 void applyBuildOverrides(IniSetting::Map& ini,
127 Hdf& config,
128 CompilerOptions& po) {
129 std::string push_phases = Config::GetString(ini, config, "Build.PushPhases");
130 po.push_phases = push_phases;
131 // convert push phases to newline-separated, to make matching them less
132 // error-prone.
133 replaceAll(push_phases, ",", "\n");
134 bool loggedOnce = false;
136 for (Hdf hdf = config["Overrides"].firstChild();
137 hdf.exists();
138 hdf = hdf.next()) {
139 if (!loggedOnce) {
140 Logger::Info(folly::sformat(
141 "Matching build overrides using: push_phases='{}'",
142 po.push_phases));
143 loggedOnce = true;
145 if (Config::matchHdfPattern(push_phases, ini, hdf, "push_phase" , "m")) {
146 Logger::Info(folly::sformat("Matched override: {}", hdf.getName()));
147 folly::format(
148 &po.matched_overrides,
149 "{}{}",
150 po.matched_overrides.empty() ? "" : ",",
151 hdf.getName()
154 if (hdf.exists("clear")) {
155 std::vector<std::string> list;
156 hdf["clear"].configGet(list);
157 for (auto const& s : list) {
158 config.remove(s);
161 config.copy(hdf["overwrite"]);
162 // no break here, so we can continue to match more overrides
164 hdf["overwrite"].setVisited(); // avoid lint complaining
165 if (hdf.exists("clear")) {
166 // when the tier does not match, "clear" is not accessed
167 // mark it visited, so the linter does not complain
168 hdf["clear"].setVisited();
173 // Parse queryStr as a JSON-encoded watchman query expression, adding the the
174 // directories specified in the query to package. Only supports 'expression'
175 // queries and the 'dirname' term.
176 bool addAutoloadQueryToPackage(Package& package, const std::string& queryStr) {
177 try {
178 auto query = folly::parseJson(queryStr);
179 if (!query.isObject()) {
180 Logger::FError("Autoload.Query is not a JSON Object");
181 return false;
183 auto expr = query["expression"];
184 for (auto& term : expr) {
185 if (term.isArray() && term[0] == "dirname") {
186 Logger::FInfo("adding autoload dir {}", term[1].asString());
187 package.addDirectory(term[1].asString());
190 return true;
191 } catch (const folly::json::parse_error& e) {
192 Logger::FError("Error JSON-parsing Autoload.Query = \"{}\": {}",
193 queryStr, e.what());
194 return false;
198 void addListToPackage(Package& package, const std::vector<std::string>& dirs,
199 const CompilerOptions& po) {
200 namespace fs = std::filesystem;
201 std::string prefix{""};
202 if (po.repoOptionsDir != po.inputDir) {
203 auto const input = fs::path(po.inputDir);
204 auto const rdr = fs::path(po.repoOptionsDir);
205 prefix = fs::relative(po.repoOptionsDir, po.inputDir).native();
206 if (!prefix.empty() && prefix.back() != '/') prefix += '/';
208 for (auto const& dir : dirs) {
209 Logger::FInfo("adding autoload dir {}", dir);
210 package.addDirectory(prefix + dir);
214 void addInputsToPackage(Package& package, const CompilerOptions& po) {
215 if (po.dirs.empty() && po.inputs.empty() && po.inputList.empty()) {
216 package.addDirectory("/");
217 } else {
218 for (auto const& dir : po.dirs) {
219 package.addDirectory(dir);
221 for (auto const& cdir : po.cdirs) {
222 package.addStaticDirectory(cdir);
224 for (auto const& cfile : po.cfiles) {
225 package.addStaticFile(cfile);
227 for (auto const& input : po.inputs) {
228 package.addSourceFile(input);
230 if (!po.inputList.empty()) {
231 package.addInputList(po.inputList);
236 void genText(const UnitEmitter& ue, const std::string& outputPath) {
237 assertx(Option::GenerateTextHHBC || Option::GenerateHhasHHBC);
239 auto const unit = ue.create();
241 auto const basePath = [&] {
242 auto fullPath = outputPath;
243 if (!fullPath.empty() &&
244 !FileUtil::isDirSeparator(fullPath[fullPath.size() - 1])) {
245 fullPath += FileUtil::getDirSeparator();
248 auto const fileName = "php/" + unit->filepath()->toCppString();
249 if (fileName.size() > 4 &&
250 fileName.substr(fileName.length() - 4) == ".php") {
251 fullPath += fileName.substr(0, fileName.length() - 4);
252 } else {
253 fullPath += fileName;
256 for (auto pos = outputPath.size(); pos < fullPath.size(); pos++) {
257 if (FileUtil::isDirSeparator(fullPath[pos])) {
258 mkdir(fullPath.substr(0, pos).c_str(), 0777);
261 return fullPath;
262 }();
264 if (Option::GenerateTextHHBC) {
265 auto const fullPath = basePath + ".hhbc.txt";
266 std::ofstream f(fullPath.c_str());
267 if (!f) {
268 Logger::Error("Unable to open %s for write", fullPath.c_str());
269 } else {
270 f << "Hash: " << ue.sha1().toString() << std::endl;
271 f << unit->toString();
272 f.close();
276 if (Option::GenerateHhasHHBC) {
277 auto const fullPath = basePath + ".hhas";
278 std::ofstream f(fullPath.c_str());
279 if (!f) {
280 Logger::Error("Unable to open %s for write", fullPath.c_str());
281 } else {
282 f << disassemble(unit.get());
283 f.close();
289 * It's an invariant that symbols in the repo must be Unique and
290 * Persistent. Verify all relevant symbols are unique and set the
291 * appropriate Attrs.
293 struct SymbolSets {
294 SymbolSets() {
295 // These aren't stored in the repo, but we still need to check for
296 // collisions against them, so put them in the maps.
297 for (auto const& kv : Native::getConstants()) {
298 assertx(kv.second.m_type != KindOfUninit);
299 add(constants, kv.first, nullptr, "constant");
303 // For local parses, where we have an UnitEmitter
304 void add(UnitEmitter& ue) {
305 // Verify uniqueness of symbols and set Attrs appropriately.
306 auto const path = ue.m_filepath;
308 add(units, path, path, "unit");
310 for (auto const pce : ue.preclasses()) {
311 pce->setAttrs(pce->attrs() | AttrPersistent);
312 if (pce->attrs() & AttrEnum) add(enums, pce->name(), path, "enum");
313 add(classes, pce->name(), path, "class", typeAliases);
315 for (auto& fe : ue.fevec()) {
316 if (fe->attrs & AttrIsMethCaller) {
317 if (addNoFail(funcs, fe->name, path, "function")) {
318 fe->attrs |= AttrPersistent;
320 } else {
321 fe->attrs |= AttrPersistent;
322 add(funcs, fe->name, path, "function");
325 for (auto& te : ue.typeAliases()) {
326 te->setAttrs(te->attrs() | AttrPersistent);
327 add(typeAliases, te->name(), path, "type alias", classes);
329 for (auto& c : ue.constants()) {
330 c.attrs |= AttrPersistent;
331 add(constants, c.name, path, "constant");
333 for (auto& m : ue.modules()) {
334 m.attrs |= AttrPersistent;
335 add(modules, m.name, path, "module");
339 // For remote parses, where we don't have an UnitEmitter
340 void add(const Package::ParseMeta::Definitions& d, const StringData* path) {
341 add(units, path, path, "unit");
343 for (auto const& c : d.m_classes) {
344 add(classes, c, path, "class", typeAliases);
346 for (auto const& e : d.m_enums) {
347 add(enums, e, path, "enum");
348 add(classes, e, path, "class", typeAliases);
350 for (auto const& f : d.m_funcs) {
351 add(funcs, f, path, "function");
353 for (auto const& m : d.m_methCallers) {
354 addNoFail(funcs, m, path, "function");
356 for (auto const& a : d.m_typeAliases) {
357 add(typeAliases, a, path, "type alias", classes);
359 for (auto const& c : d.m_constants) {
360 add(constants, c, path, "constant");
362 for (auto const& m : d.m_modules) {
363 add(modules, m, path, "module");
367 struct NonUnique : std::runtime_error {
368 using std::runtime_error::runtime_error;
371 private:
372 template <typename T>
373 void add(T& map,
374 const StringData* name,
375 const StringData* unit,
376 const char* type) {
377 assertx(name->isStatic());
378 assertx(!unit || unit->isStatic());
379 auto const ret = map.emplace(name, unit);
380 if (!ret.second) return fail(name, unit, ret.first->second, type);
383 template <typename T>
384 bool addNoFail(T& map,
385 const StringData* name,
386 const StringData* unit,
387 const char* type) {
388 assertx(name->isStatic());
389 assertx(!unit || unit->isStatic());
390 return map.emplace(name, unit).second;
393 template <typename T, typename E>
394 void add(T& map,
395 const StringData* name,
396 const StringData* unit,
397 const char* type,
398 const E& other) {
399 assertx(name->isStatic());
400 assertx(!unit || unit->isStatic());
401 auto const it = other.find(name);
402 if (it != other.end()) return fail(name, unit, it->second, "symbol");
403 add(map, name, unit, type);
406 [[noreturn]]
407 void fail(const StringData* name,
408 const StringData* unit1,
409 const StringData* unit2,
410 const char* type) {
411 auto const filename = [] (const StringData* u) {
412 if (!u) return "BUILTIN";
413 return u->data();
416 throw NonUnique{
417 folly::sformat(
418 "More than one {} with the name {}. In {} and {}",
419 type,
420 name,
421 filename(unit1),
422 filename(unit2)
427 using TMap = folly_concurrent_hash_map_simd<
428 const StringData*,
429 const StringData*,
430 string_data_hash,
431 string_data_tsame
433 using FMap = folly_concurrent_hash_map_simd<
434 const StringData*,
435 const StringData*,
436 string_data_hash,
437 string_data_fsame
439 using Map = folly_concurrent_hash_map_simd<
440 const StringData*,
441 const StringData*,
442 string_data_hash,
443 string_data_same
446 TMap enums;
447 TMap classes;
448 FMap funcs;
449 TMap typeAliases;
450 Map constants;
451 Map modules;
452 Map units;
455 RepoGlobalData getGlobalData() {
456 auto const now = std::chrono::high_resolution_clock::now();
457 auto const nanos =
458 std::chrono::duration_cast<std::chrono::nanoseconds>(
459 now.time_since_epoch()
462 auto gd = RepoGlobalData{};
463 gd.Signature = nanos.count();
465 Cfg::StoreToGlobalData(gd);
467 gd.EnableArgsInBacktraces = RuntimeOption::EnableArgsInBacktraces;
468 gd.AbortBuildOnVerifyError = RuntimeOption::EvalAbortBuildOnVerifyError;
469 gd.EvalCoeffectEnforcementLevels = RO::EvalCoeffectEnforcementLevels;
471 if (Option::ConstFoldFileBC) {
472 gd.SourceRootForFileBC.emplace(Cfg::Server::SourceRoot);
475 for (auto const& elm : RuntimeOption::ConstantFunctions) {
476 auto const s = internal_serialize(tvAsCVarRef(elm.second));
477 gd.ConstantFunctions.emplace_back(elm.first, s.toCppString());
479 std::sort(gd.ConstantFunctions.begin(), gd.ConstantFunctions.end());
481 return gd;
484 void setCoredumps(CompilerOptions& po) {
485 struct rlimit64 rl;
486 getrlimit64(RLIMIT_CORE, &rl);
487 if (!po.coredump) {
488 po.coredump = rl.rlim_cur > 0;
489 return;
491 rl.rlim_cur = 8000000000LL;
492 if (rl.rlim_max < rl.rlim_cur) {
493 rl.rlim_max = rl.rlim_cur;
495 setrlimit64(RLIMIT_CORE, &rl);
498 int prepareOptions(CompilerOptions &po, int argc, char **argv) {
499 options_description desc("HipHop Compiler for PHP Usage:\n\n"
500 "\thphp <options> <inputs>\n\n"
501 "Options");
503 std::vector<std::string> formats;
505 desc.add_options()
506 ("help", "display this message")
507 ("version", "display version number")
508 ("format,f", value<std::vector<std::string>>(&formats)->composing(),
509 "HHBC Output format: binary (default) | hhas | text | none")
510 ("repo-options-dir", value<std::string>(&po.repoOptionsDir),
511 "repo options directory")
512 ("input-dir", value<std::string>(&po.inputDir), "input directory")
513 ("inputs,i", value<std::vector<std::string>>(&po.inputs)->composing(),
514 "input file names")
515 ("input-list", value<std::string>(&po.inputList),
516 "file containing list of file names, one per line")
517 ("dir", value<std::vector<std::string>>(&po.dirs)->composing(),
518 "directories containing all input files")
519 ("exclude-dir",
520 value<std::vector<std::string>>(&po.excludeDirs)->composing(),
521 "directories to exclude from the input")
522 ("exclude-file",
523 value<std::vector<std::string>>(&po.excludeFiles)->composing(),
524 "files to exclude from the input, even if referenced by included files")
525 ("exclude-pattern",
526 value<std::vector<std::string>>(&po.excludePatterns)->composing(),
527 "regex (in 'find' command's regex command line option format) of files "
528 "or directories to exclude from the input, even if referenced by "
529 "included files")
530 ("exclude-static-pattern",
531 value<std::vector<std::string>>(&po.excludeStaticPatterns)->composing(),
532 "regex (in 'find' command's regex command line option format) of files "
533 "or directories to exclude from static content cache")
534 ("exclude-static-dir",
535 value<std::vector<std::string>>(&po.excludeStaticDirs)->composing(),
536 "directories to exclude from static content cache")
537 ("exclude-static-file",
538 value<std::vector<std::string>>(&po.excludeStaticFiles)->composing(),
539 "files to exclude from static content cache")
540 ("cfile", value<std::vector<std::string>>(&po.cfiles)->composing(),
541 "extra static files forced to include without exclusion checking")
542 ("cdir", value<std::vector<std::string>>(&po.cdirs)->composing(),
543 "extra directories for static files without exclusion checking")
544 ("output-dir,o", value<std::string>(&po.outputDir), "output directory")
545 ("config,c", value<std::vector<std::string>>(&po.config)->composing(),
546 "config file name")
547 ("config-value,v",
548 value<std::vector<std::string>>(&po.confStrings)->composing(),
549 "individual configuration string in a format of name=value, where "
550 "name can be any valid configuration for a config file")
551 ("define,d", value<std::vector<std::string>>(&po.iniStrings)->composing(),
552 "define an ini setting in the same format ( foo[=bar] ) as provided in a "
553 ".ini file")
554 ("log,l",
555 value<int>(&po.logLevel)->default_value(-1),
556 "-1: (default); 0: no logging; 1: errors only; 2: warnings and errors; "
557 "3: informational as well; 4: really verbose.")
558 ("file-cache",
559 value<std::string>(&po.filecache),
560 "if specified, generate a static file cache with this file name")
561 ("coredump",
562 value<bool>(&po.coredump)->default_value(false),
563 "turn on coredump")
564 ("compiler-id", "display the git hash for the compiler id")
565 ("repo-schema", "display the repo schema id used by this app")
566 ("report-ondemand-edges",
567 value<std::string>(&po.ondemandEdgesPath),
568 "Write parse-on-demand dependency edges to the specified file")
571 positional_options_description p;
572 p.add("inputs", -1);
573 variables_map vm;
574 try {
575 auto opts = command_line_parser(argc, argv).options(desc)
576 .positional(p).run();
577 try {
578 store(opts, vm);
579 notify(vm);
580 #if defined(BOOST_VERSION) && BOOST_VERSION >= 105000 && BOOST_VERSION <= 105400
581 } catch (const error_with_option_name &e) {
582 std::string wrong_name = e.get_option_name();
583 std::string right_name = get_right_option_name(opts, wrong_name);
584 std::string message = e.what();
585 if (right_name != "") {
586 boost::replace_all(message, wrong_name, right_name);
588 Logger::Error("Error in command line: %s", message.c_str());
589 std::cout << desc << "\n";
590 return -1;
591 #endif
592 } catch (const error& e) {
593 Logger::Error("Error in command line: %s", e.what());
594 std::cout << desc << "\n";
595 return -1;
597 } catch (const unknown_option& e) {
598 Logger::Error("Error in command line: %s", e.what());
599 std::cout << desc << "\n";
600 return -1;
601 } catch (const error& e) {
602 Logger::Error("Error in command line: %s", e.what());
603 std::cout << desc << "\n";
604 return -1;
605 } catch (...) {
606 Logger::Error("Error in command line parsing.");
607 std::cout << desc << "\n";
608 return -1;
610 if (argc <= 1 || vm.count("help")) {
611 std::cout << desc << "\n";
612 return 1;
614 if (vm.count("version")) {
615 std::cout << "HipHop Repo Compiler";
616 std::cout << " " << HHVM_VERSION;
617 std::cout << " (" << (debug ? "dbg" : "rel") << ")\n";
618 std::cout << "Compiler: " << compilerId() << "\n";
619 std::cout << "Repo schema: " << repoSchemaId() << "\n";
620 return 1;
623 if (vm.count("compiler-id")) {
624 std::cout << compilerId() << "\n";
625 return 1;
628 if (vm.count("repo-schema")) {
629 std::cout << repoSchemaId() << "\n";
630 return 1;
633 if (po.outputDir.empty()) {
634 Logger::Error("Error in command line: output-dir must be provided.");
635 std::cout << desc << "\n";
636 return -1;
639 // log level
640 if (po.logLevel != -1) {
641 Logger::LogLevel = (Logger::LogLevelType)po.logLevel;
642 } else {
643 Logger::LogLevel = Logger::LogInfo;
645 Logger::Escape = false;
646 Logger::AlwaysEscapeLog = false;
648 if (!formats.empty()) {
649 for (auto const& format : formats) {
650 if (format == "text") {
651 Option::GenerateTextHHBC = true;
652 } else if (format == "hhas") {
653 Option::GenerateHhasHHBC = true;
654 } else if (format == "binary") {
655 Option::GenerateBinaryHHBC = true;
656 } else if (format == "none") {
657 if (formats.size() > 1) {
658 Logger::Error("Cannot specify 'none' with other formats");
659 return -1;
661 Option::NoOutputHHBC = true;
662 } else {
663 Logger::Error("Unknown format for HHBC target: %s", format.c_str());
664 std::cout << desc << "\n";
665 return -1;
668 } else {
669 Option::GenerateBinaryHHBC = true;
672 tl_heap.getCheck();
673 IniSetting::Map ini = IniSetting::Map::object;
674 Hdf config;
675 for (auto const& file : po.config) {
676 Config::ParseConfigFile(file, ini, config);
678 for (auto const& iniString : po.iniStrings) {
679 Config::ParseIniString(iniString, ini);
681 for (auto const& confString : po.confStrings) {
682 Config::ParseHdfString(confString, config);
684 applyBuildOverrides(ini, config, po);
685 Hdf runtime = config["Runtime"];
686 // The configuration command line strings were already processed above
687 // Don't process them again.
689 // Note that some options depends on RepoAuthoritative, we thus
690 // set/unset them here. We restore it to false since we need
691 // compile_systemlib_string to actually parse the file instead of
692 // trying to load it from repo (which is the case when
693 // RepoAuthoritative is true).
694 RuntimeOption::RepoAuthoritative = true;
695 // Set RepoPath to satisfy assertions (we need a path set in
696 // RepoAuthoritative). It will never actually be used.
697 RuntimeOption::RepoPath = "/tmp/dummy.hhbc";
698 // We don't want debug info in repo builds, since we don't support attaching
699 // a debugger in repo authoritative mode, but we want the default for debug
700 // info to be true so that it's present in sandboxes. Override that default
701 // here, since we only get here when building for repo authoritative mode.
702 RuntimeOption::RepoDebugInfo = false;
703 RuntimeOption::Load(ini, runtime);
704 Option::Load(ini, config);
705 RuntimeOption::RepoAuthoritative = false;
706 RuntimeOption::RepoPath = "";
707 Cfg::Jit::Enabled = false;
708 RuntimeOption::EvalLowStaticArrays = false;
710 std::vector<std::string> badnodes;
711 config.lint(badnodes);
712 for (auto const& badnode : badnodes) {
713 Logger::Error("Possible bad config node: %s", badnode.c_str());
716 // we need to initialize pcre cache table very early
717 pcre_init();
719 if (po.inputDir.empty()) po.inputDir = '.';
720 po.inputDir = FileUtil::normalizeDir(po.inputDir);
722 if (po.repoOptionsDir.empty()) {
723 po.repoOptionsDir = po.inputDir;
724 } else {
725 po.repoOptionsDir = FileUtil::normalizeDir(po.repoOptionsDir);
728 for (auto const& dir : po.excludeDirs) {
729 Option::PackageExcludeDirs.insert(FileUtil::normalizeDir(dir));
731 for (auto const& file : po.excludeFiles) {
732 Option::PackageExcludeFiles.insert(file);
734 for (auto const& pattern : po.excludePatterns) {
735 Option::PackageExcludePatterns.insert(
736 format_pattern(pattern, true /* prefixSlash */));
738 for (auto const& dir : po.excludeStaticDirs) {
739 Option::PackageExcludeStaticDirs.insert(FileUtil::normalizeDir(dir));
741 for (auto const& file : po.excludeStaticFiles) {
742 Option::PackageExcludeStaticFiles.insert(file);
744 for (auto const& pattern : po.excludeStaticPatterns) {
745 Option::PackageExcludeStaticPatterns.insert(
746 format_pattern(pattern, true /* prefixSlash */));
749 return 0;
752 ///////////////////////////////////////////////////////////////////////////////
754 Options makeExternWorkerOptions(const CompilerOptions& po) {
755 Options options;
756 options
757 .setUseCase(Option::ExternWorkerUseCase)
758 .setFeaturesFile(Option::ExternWorkerFeaturesFile)
759 .setWorkerPath(Option::ExternWorkerPath)
760 .setUseSubprocess(Option::ExternWorkerUseCase.empty()
761 ? Options::UseSubprocess::Always
762 : Options::UseSubprocess::Never)
763 .setCacheExecs(Option::ExternWorkerUseExecCache)
764 .setCleanup(Option::ExternWorkerCleanup)
765 .setUseEdenFS(RO::EvalUseEdenFS)
766 .setUseRichClient(Option::ExternWorkerUseRichClient)
767 .setUseZippyRichClient(Option::ExternWorkerUseZippyRichClient)
768 .setUseP2P(Option::ExternWorkerUseP2P)
769 .setCasConnectionCount(Option::ExternWorkerCasConnectionCount)
770 .setEngineConnectionCount(Option::ExternWorkerEngineConnectionCount)
771 .setAcConnectionCount(Option::ExternWorkerAcConnectionCount)
772 .setVerboseLogging(Option::ExternWorkerVerboseLogging);
773 if (Option::ExternWorkerTimeoutSecs > 0) {
774 options.setTimeout(std::chrono::seconds{Option::ExternWorkerTimeoutSecs});
776 if (!Option::ExternWorkerWorkingDir.empty()) {
777 options.setWorkingDir(Option::ExternWorkerWorkingDir);
778 } else {
779 options.setWorkingDir(po.outputDir);
781 if (Option::ExternWorkerThrottleRetries >= 0) {
782 options.setThrottleRetries(Option::ExternWorkerThrottleRetries);
784 if (Option::ExternWorkerThrottleBaseWaitMSecs >= 0) {
785 options.setThrottleBaseWait(
786 std::chrono::milliseconds{Option::ExternWorkerThrottleBaseWaitMSecs}
789 return options;
792 void logPhaseStats(const std::string& phase, const Package& package,
793 extern_worker::Client& client, StructuredLogEntry& sample, int64_t micros)
795 auto const& stats = client.getStats();
796 Logger::FInfo(
797 "{}",
798 stats.toString(
799 phase,
800 folly::sformat("total package files {:,}", package.getTotalFiles())
804 sample.setInt(phase + "_total_files", package.getTotalFiles());
806 sample.setInt(phase + "_micros", micros);
807 if (auto const t = package.inputsTime()) {
808 sample.setInt(
809 phase + "_input_micros",
810 std::chrono::duration_cast<std::chrono::microseconds>(*t).count()
813 if (auto const t = package.ondemandTime()) {
814 sample.setInt(
815 phase + "_ondemand_micros",
816 std::chrono::duration_cast<std::chrono::microseconds>(*t).count()
820 stats.logSample(phase, sample);
823 namespace {
824 // Upload all builtin decls, and pass their IndexMeta summary and
825 // Ref<UnitDecls> to callback() to include in the overall UnitIndex. This
826 // makes systemlib decls visible to files being compiled as part of the
827 // full repo build, but does not make repo decls available to systemlib.
828 coro::Task<bool> indexBuiltinSymbolDecls(
829 const Package::IndexCallback& callback,
830 TicketExecutor& executor,
831 extern_worker::Client& client
833 std::vector<coro::TaskWithExecutor<void>> tasks;
834 auto const declCallback = [&](auto const* d) -> coro::Task<void> {
835 auto const symbols = hackc::decls_to_symbols(*d->decls);
836 auto summary = summary_of_symbols(symbols);
837 callback(
839 summary,
840 co_await client.store(Package::UnitDecls{
841 summary,
842 std::string{d->serialized.begin(), d->serialized.end()}
845 co_return;
847 for (auto const& d: Native::getAllBuiltinDecls()) {
848 tasks.emplace_back(declCallback(d).scheduleOn(executor.sticky()));
850 co_await coro::collectAllRange(std::move(tasks));
851 co_return true;
855 // Compute a UnitIndex by parsing decls for all autoload-eligible files.
856 // If no Autoload.Query is specified by RepoOptions, this just indexes
857 // the input files.
858 std::unique_ptr<UnitIndex> computeIndex(
859 const CompilerOptions& po,
860 StructuredLogEntry& sample,
861 TicketExecutor& executor,
862 extern_worker::Client& client
864 auto index = std::make_unique<UnitIndex>();
865 auto const indexUnit = [&] (
866 std::string&& rpath,
867 Package::IndexMeta&& meta,
868 Ref<Package::UnitDecls>&& declsRef
870 auto locations = std::make_shared<UnitIndex::Locations>(
871 std::move(rpath), std::move(declsRef)
873 auto insert = [&](auto const& names, auto& map, const char* kind) {
874 for (auto name : names) {
875 auto const ret = map.emplace(name, locations);
876 if (!ret.second) {
877 Logger::FWarning("Duplicate {} {} in {} and {}",
878 kind, name, ret.first->first, locations->rpath
883 insert(meta.types, index->types, "type");
884 insert(meta.funcs, index->funcs, "function");
885 insert(meta.constants, index->constants, "constant");
886 insert(meta.modules, index->modules, "module");
889 Package indexPackage{po.inputDir, executor, client, po.coredump};
890 Timer indexTimer(Timer::WallTime, "indexing");
892 auto const& repoFlags = RepoOptions::forFile(po.repoOptionsDir).flags();
893 auto const& dirs = repoFlags.autoloadRepoBuildSearchDirs();
894 auto const queryStr = repoFlags.autoloadQuery();
895 if (!dirs.empty()) {
896 addListToPackage(indexPackage, dirs, po);
897 } else if (!queryStr.empty()) {
898 // Index the files specified by Autoload.Query
899 if (!addAutoloadQueryToPackage(indexPackage, queryStr)) return nullptr;
900 } else {
901 // index just the input files
902 addInputsToPackage(indexPackage, po);
904 // Here, we are doing the following in parallel:
905 // * Indexing the build package
906 // * Indexing builtin decls to be used by decl driven bytecode compilation
907 // If DDB is not enabled, we will return early from the second task.
908 auto const [indexingRepoOK, indexingSystemlibDeclsOK] =
909 coro::blockingWait(coro::collectAll(
910 indexPackage.index(indexUnit),
911 coro::co_invoke([&]() -> coro::Task<bool> {
912 if (RO::EvalEnableDecl) {
913 co_return co_await
914 indexBuiltinSymbolDecls(indexUnit, executor, client);
916 co_return true;
920 if (!indexingRepoOK || !indexingSystemlibDeclsOK) return nullptr;
922 logPhaseStats("index", indexPackage, client, sample,
923 indexTimer.getMicroSeconds());
924 Logger::FInfo("index size: types={:,} funcs={:,} constants={:,} modules={:,}",
925 index->types.size(),
926 index->funcs.size(),
927 index->constants.size(),
928 index->modules.size()
930 client.resetStats();
932 return index;
935 ///////////////////////////////////////////////////////////////////////////////
937 // Parses a file and produces an UnitEmitter. Used when we're not
938 // going to run HHBBC.
939 struct ParseJob {
940 static std::string name() { return "hphpc-parse"; }
942 static void init(const Package::Config& config,
943 Package::FileMetaVec meta) {
944 Package::parseInit(config, std::move(meta));
946 static Package::ParseMetaVec fini() {
947 return Package::parseFini();
950 static UnitEmitterSerdeWrapper run(const std::string& contents,
951 const RepoOptionsFlags& flags,
952 Variadic<Package::UnitDecls> decls) {
953 return Package::parseRun(contents, flags, std::move(decls.vals));
957 using WPI = HHBBC::WholeProgramInput;
959 // Parses a file (as ParseJob does), but then hands the UnitEmitter
960 // off to HHBBC to produce a WholeProgramInput key and value. This is
961 // for when we are going to run HHBBC.
962 struct ParseForHHBBCJob {
963 static std::string name() { return "hphpc-parse-for-hhbbc"; }
965 static void init(const Package::Config& config,
966 const HHBBC::Config& hhbbcConfig,
967 Package::FileMetaVec meta) {
968 Package::parseInit(config, std::move(meta));
969 HHBBC::options = hhbbcConfig.o;
970 hhbbcConfig.gd.load(true);
972 static std::tuple<Package::ParseMetaVec, std::vector<WPI::Key>> fini() {
973 return std::make_tuple(Package::parseFini(), std::move(s_inputKeys));
976 static Variadic<WPI::Value> run(const std::string& contents,
977 const RepoOptionsFlags& flags,
978 Variadic<Package::UnitDecls> decls) {
979 auto wrapper = Package::parseRun(contents, flags, std::move(decls.vals));
980 if (!wrapper.m_ue) return {};
982 std::vector<WPI::Value> values;
983 for (auto& [key, value] : WPI::make(std::move(wrapper.m_ue))) {
984 s_inputKeys.emplace_back(std::move(key));
985 values.emplace_back(std::move(value));
987 return Variadic<WPI::Value>{std::move(values)};
990 static std::vector<WPI::Key> s_inputKeys;
993 std::vector<WPI::Key> ParseForHHBBCJob::s_inputKeys;
995 Job<ParseJob> s_parseJob;
996 Job<ParseForHHBBCJob> s_parseForHHBBCJob;
998 // A ParsedFile owns all the HHBC state associated with a parsed source
999 // file before we have decided to add it to the Program.
1000 struct ParsedFile {
1001 explicit ParsedFile(Package::ParseMeta m)
1002 : parseMeta(std::move(m))
1004 ParsedFile(Package::ParseMeta m, Ref<UnitEmitterSerdeWrapper> w)
1005 : parseMeta(std::move(m)), ueRef(std::move(w))
1008 Package::ParseMeta parseMeta;
1009 Optional<Ref<UnitEmitterSerdeWrapper>> ueRef;
1010 std::vector<std::pair<WPI::Key, Ref<WPI::Value>>> hhbbcInputs;
1013 using ParsedFiles = folly_concurrent_hash_map_simd<
1014 std::string,
1015 std::unique_ptr<ParsedFile>
1018 ///////////////////////////////////////////////////////////////////////////////
1020 bool process(CompilerOptions &po) {
1021 #ifndef _MSC_VER
1022 LightProcess::Initialize(Cfg::Server::LightProcessFilePrefix,
1023 Cfg::Server::LightProcessCount,
1024 RuntimeOption::EvalRecordSubprocessTimes,
1025 {});
1026 #endif
1028 setCoredumps(po);
1030 register_process_init();
1032 StructuredLogEntry sample;
1033 sample.setStr("debug", debug ? "true" : "false");
1034 sample.setStr("use_case", Option::ExternWorkerUseCase);
1035 sample.setStr("features_file", Option::ExternWorkerFeaturesFile);
1036 sample.setStr("worker_path", Option::ExternWorkerPath);
1037 sample.setInt("use_rich_client", Option::ExternWorkerUseRichClient);
1038 sample.setInt("use_zippy_rich_client",
1039 Option::ExternWorkerUseZippyRichClient);
1040 sample.setInt("use_p2p", Option::ExternWorkerUseP2P);
1041 sample.setInt("cas_connection_count", Option::ExternWorkerCasConnectionCount);
1042 sample.setInt("engine_connection_count", Option::ExternWorkerEngineConnectionCount);
1043 sample.setInt("ac_connection_count", Option::ExternWorkerAcConnectionCount);
1044 sample.setInt("use_exec_cache", Option::ExternWorkerUseExecCache);
1045 sample.setInt("timeout_secs", Option::ExternWorkerTimeoutSecs);
1046 sample.setInt("cleanup", Option::ExternWorkerCleanup);
1047 sample.setInt("throttle_retries", Option::ExternWorkerThrottleRetries);
1048 sample.setInt("throttle_base_wait_ms",
1049 Option::ExternWorkerThrottleBaseWaitMSecs);
1050 sample.setStr("working_dir", Option::ExternWorkerWorkingDir);
1051 sample.setInt("parser_group_size", Option::ParserGroupSize);
1052 sample.setInt("parser_dir_group_size_limit", Option::ParserDirGroupSizeLimit);
1053 sample.setInt("parser_thread_count", Option::ParserThreadCount);
1054 sample.setInt("parser_optimistic_store", Option::ParserOptimisticStore);
1055 sample.setInt("parser_async_cleanup", Option::ParserAsyncCleanup);
1056 sample.setStr("push_phases", po.push_phases);
1057 sample.setStr("matched_overrides", po.matched_overrides);
1058 sample.setStr("use_hphpc", "true");
1059 sample.setStr("use_hhbbc", RO::EvalUseHHBBC ? "true" : "false");
1061 // Track the unit-emitters created for system during
1062 // hphp_process_init().
1063 SystemLib::keepRegisteredUnitEmitters(true);
1064 hphp_process_init();
1065 SCOPE_EXIT { hphp_process_exit(); };
1066 SystemLib::keepRegisteredUnitEmitters(false);
1068 auto const outputFile = po.outputDir + "/hhvm.hhbc";
1069 unlink(outputFile.c_str());
1071 auto executor = std::make_unique<TicketExecutor>(
1072 "HPHPcWorker",
1074 size_t(Option::ParserThreadCount <= 0 ? 1 : Option::ParserThreadCount),
1075 [] {
1076 hphp_thread_init();
1077 hphp_session_init(Treadmill::SessionKind::CompilerEmit);
1079 [] {
1080 hphp_context_exit();
1081 hphp_session_exit();
1082 hphp_thread_exit();
1084 std::chrono::minutes{15}
1086 auto client =
1087 std::make_unique<Client>(executor->sticky(), makeExternWorkerOptions(po));
1089 sample.setStr("extern_worker_impl", client->implName());
1090 sample.setStr("extern_worker_session", client->session());
1092 auto index = computeIndex(po, sample, *executor, *client);
1093 if (!index) return false;
1095 // Always used, but we can clear it early to save memory.
1096 Optional<SymbolSets> unique;
1097 unique.emplace();
1099 // HHBBC specific state (if we're going to run it).
1100 Optional<WPI> hhbbcInputs;
1101 Optional<CoroAsyncValue<Ref<HHBBC::Config>>> hhbbcConfig;
1102 if (RO::EvalUseHHBBC) {
1103 hhbbcInputs.emplace();
1104 // We want to do this as early as possible
1105 hhbbcConfig.emplace(
1106 [&client] () {
1107 return client->store(HHBBC::Config::get(getGlobalData()));
1109 executor->sticky()
1113 hphp_fast_set<const StringData*> moduleInDeployment;
1114 if (!Cfg::Eval::ActiveDeployment.empty()) {
1115 // Many files will be in the same module, so it is better to precompute
1116 // a mapping of whether a given module is in the current deployment
1117 auto const& packageInfo =
1118 RepoOptions::forFile(po.repoOptionsDir).packageInfo();
1119 auto const it = packageInfo.deployments().find(Cfg::Eval::ActiveDeployment);
1120 if (it == end(packageInfo.deployments())) {
1121 Logger::FError("The active deployment is set to {}; "
1122 "however, it is not defined in the {}/{} file",
1123 Cfg::Eval::ActiveDeployment,
1124 po.repoOptionsDir,
1125 kPackagesToml);
1126 return false;
1129 moduleInDeployment.reserve(index->modules.size());
1130 for (auto const& [module, _] : index->modules) {
1131 assertx(!moduleInDeployment.contains(module));
1132 if (packageInfo.moduleInDeployment(module,
1133 it->second,
1134 DeployKind::HardOrSoft)) {
1135 moduleInDeployment.insert(module);
1138 // Check for the default module separately since there is no module
1139 // declaration for the default module.
1140 static auto const defaultModule = makeStaticString(Module::DEFAULT);
1141 if (packageInfo.moduleInDeployment(defaultModule,
1142 it->second,
1143 DeployKind::HardOrSoft)) {
1144 moduleInDeployment.insert(defaultModule);
1148 Optional<RepoAutoloadMapBuilder> autoload;
1149 Optional<RepoFileBuilder> repo;
1150 std::atomic<uint32_t> nextSn{0};
1151 std::atomic<size_t> numUnits{0};
1152 std::mutex repoLock;
1154 // Emit a fully processed unit (either processed by HHBBC or not).
1155 auto const emitUnit = [&] (std::unique_ptr<UnitEmitter> ue) {
1156 assertx(ue);
1157 if (Option::NoOutputHHBC) return;
1159 assertx(Option::GenerateBinaryHHBC ||
1160 Option::GenerateTextHHBC ||
1161 Option::GenerateHhasHHBC);
1163 if (Option::GenerateTextHHBC || Option::GenerateHhasHHBC) {
1164 auto old_repo_auth = RuntimeOption::RepoAuthoritative;
1165 RuntimeOption::RepoAuthoritative = RuntimeOption::EvalUseHHBBC;
1166 SCOPE_EXIT { RuntimeOption::RepoAuthoritative = old_repo_auth; };
1167 genText(*ue, po.outputDir);
1170 if (!Option::GenerateBinaryHHBC) return;
1172 ++numUnits;
1174 if (!RO::EvalUseHHBBC) {
1175 // HHBBC assigns m_sn and the SHA1, but we have to do it ourself
1176 // if we're not running it.
1177 auto const sn = nextSn++;
1178 ue->m_symbol_refs.clear();
1179 ue->m_sn = sn;
1180 ue->setSha1(SHA1 { sn });
1181 unique->add(*ue);
1184 autoload->addUnit(*ue);
1185 RepoFileBuilder::EncodedUE encoded{*ue};
1186 std::scoped_lock<std::mutex> _{repoLock};
1187 repo->add(encoded);
1190 // This will contain all files eligible to be in the program: input files
1191 // and all ondemand-eligible files, except files excluded by CLI options.
1192 auto parsedFiles = std::make_unique<ParsedFiles>();
1194 // Process unit-emitters produced locally (usually systemlib stuff).
1195 auto const emitLocalUnit = [&] (Package::UEVec ues) -> coro::Task<void> {
1196 if (RO::EvalUseHHBBC) {
1197 // If we're using HHBBC, turn them into WholeProgramInput
1198 // key/values (after checking uniqueness), upload the values,
1199 // and store them in the WholeProgramInput.
1200 std::vector<WPI::Key> keys;
1201 std::vector<WPI::Value> values;
1203 for (auto& ue : ues) {
1204 unique->add(*ue);
1205 for (auto& [key, value] : WPI::make(std::move(ue))) {
1206 keys.emplace_back(std::move(key));
1207 values.emplace_back(std::move(value));
1211 if (keys.empty()) co_return;
1212 auto valueRefs = co_await client->storeMulti(std::move(values));
1214 auto const numKeys = keys.size();
1215 assertx(valueRefs.size() == numKeys);
1217 for (size_t i = 0; i < numKeys; ++i) {
1218 hhbbcInputs->add(std::move(keys[i]), std::move(valueRefs[i]));
1220 co_return;
1223 // Otherwise just emit it
1224 for (auto& ue : ues) emitUnit(std::move(ue));
1225 co_return;
1228 // Parse a group of files remotely
1229 auto const parseRemoteUnit = [&] (const Ref<Package::Config>& config,
1230 Ref<Package::FileMetaVec> fileMetas,
1231 std::vector<Package::FileData> files,
1232 Client::ExecMetadata metadata)
1233 -> coro::Task<Package::ParseMetaVec> {
1234 if (RO::EvalUseHHBBC) {
1235 // Run the HHBBC parse job, which produces WholeProgramInput
1236 // key/values.
1237 auto hhbbcConfigRef = co_await hhbbcConfig->getCopy();
1238 auto [inputValueRefs, metaRefs] =
1239 co_await client->exec(
1240 s_parseForHHBBCJob,
1241 std::make_tuple(
1242 config,
1243 std::move(hhbbcConfigRef),
1244 std::move(fileMetas)
1246 std::move(files),
1247 std::move(metadata)
1250 // The parse metadata and the keys are loaded, but the values
1251 // are kept as Refs.
1252 auto [parseMetas, inputKeys] = co_await client->load(std::move(metaRefs));
1254 // Stop now if the index contains any missing decls.
1255 // parseRun() will retry this job with additional inputs.
1256 if (index->containsAnyMissing(parseMetas)) {
1257 co_return parseMetas;
1260 always_assert(parseMetas.size() == inputValueRefs.size());
1261 auto const numKeys = inputKeys.size();
1262 size_t keyIdx = 0;
1263 for (size_t i = 0, n = parseMetas.size(); i < n; i++) {
1264 auto& p = parseMetas[i];
1265 p.m_missing = Package::DeclNames{}; // done with this list now.
1266 if (!p.m_filepath) continue;
1267 auto& valueRefs = inputValueRefs[i];
1268 auto filename = p.m_filepath->toCppString();
1269 auto pf = std::make_unique<ParsedFile>(std::move(p));
1270 pf->hhbbcInputs.reserve(valueRefs.size());
1271 for (auto& r : valueRefs) {
1272 always_assert(keyIdx < numKeys);
1273 pf->hhbbcInputs.emplace_back(
1274 std::move(inputKeys[keyIdx]), std::move(r)
1276 ++keyIdx;
1278 parsedFiles->emplace(filename, std::move(pf));
1281 // Indicate we're done by returning an empty vec.
1282 co_return Package::ParseMetaVec{};
1285 // Otherwise, do a "normal" (non-HHBBC parse job), load the
1286 // unit-emitters and parse metadata, and emit the unit-emitters.
1287 auto [ueRefs, metaRefs] =
1288 co_await client->exec(
1289 s_parseJob,
1290 std::make_tuple(config, std::move(fileMetas)),
1291 std::move(files),
1292 std::move(metadata)
1295 auto parseMetas = co_await client->load(std::move(metaRefs));
1297 // Stop now if the index contains any missing decls.
1298 // parseRun() will retry this job with additional inputs.
1299 if (index->containsAnyMissing(parseMetas)) {
1300 co_return parseMetas;
1303 always_assert(parseMetas.size() == ueRefs.size());
1304 for (size_t i = 0, n = parseMetas.size(); i < n; i++) {
1305 auto& p = parseMetas[i];
1306 p.m_missing = Package::DeclNames{}; // done with this list now.
1307 if (!p.m_filepath) continue;
1308 auto filename = p.m_filepath->toCppString();
1309 auto pf = std::make_unique<ParsedFile>(
1310 std::move(p), std::move(ueRefs[i])
1312 parsedFiles->emplace(filename, std::move(pf));
1315 // Indicate we're done by returning an empty vec.
1316 co_return Package::ParseMetaVec{};
1319 // Emit a group of files that were parsed remotely
1320 auto const emitRemoteUnit = [&] (
1321 const std::vector<std::filesystem::path>& rpaths
1322 ) -> coro::Task<Package::EmitCallBackResult> {
1323 Package::ParseMetaVec parseMetas;
1324 Package::ParseMetaItemsToSkipSet itemsToSkip;
1326 auto const shouldIncludeInBuild = [&] (const Package::ParseMeta& p) {
1327 if (Cfg::Eval::ActiveDeployment.empty()) return true;
1328 // If the unit defines any modules, then it is always included
1329 if (!p.m_definitions.m_modules.empty()) return true;
1330 return p.m_module_use && moduleInDeployment.contains(p.m_module_use);
1333 if (RO::EvalUseHHBBC) {
1334 // Retrieve HHBBC WPI (Key, Ref<Value>) pairs that were already parsed.
1335 // No Async I/O is necessary in this case.
1336 for (size_t i = 0, n = rpaths.size(); i < n; ++i) {
1337 auto& rpath = rpaths[i];
1338 auto it = parsedFiles->find(rpath.native());
1339 if (it == parsedFiles->end()) {
1340 // If you see this error in a test case, add a line to to test.php.hphp_opts:
1341 // --inputs=hphp/path/to/file.inc
1342 Package::ParseMeta bad;
1343 bad.m_abort = folly::sformat("Unknown include file: {}\n", rpath.native());
1344 parseMetas.emplace_back(std::move(bad));
1345 continue;
1347 auto& pf = it->second;
1348 parseMetas.emplace_back(std::move(pf->parseMeta));
1349 auto& p = parseMetas.back();
1350 if (!p.m_filepath) continue;
1351 if (!shouldIncludeInBuild(p)) {
1352 Logger::FVerbose("Dropping {} from the repo build because module {} is "
1353 "not part of {} deployment",
1354 p.m_filepath,
1355 p.m_module_use ? p.m_module_use->data() : "top-level",
1356 Cfg::Eval::ActiveDeployment);
1357 itemsToSkip.insert(i);
1358 continue;
1360 // We don't have unit-emitters to do uniqueness checking, but
1361 // the parse metadata has the definitions we can use instead.
1362 unique->add(p.m_definitions, p.m_filepath);
1363 auto inputs = std::move(pf->hhbbcInputs);
1364 for (auto& e : inputs) {
1365 hhbbcInputs->add(std::move(e.first), std::move(e.second));
1368 co_return std::make_pair(std::move(parseMetas),
1369 std::move(itemsToSkip));
1372 // Otherwise, retrieve ParseMeta and load unit-emitters from a normal
1373 // ParseJob, then emit the unit-emitters.
1374 std::vector<Ref<UnitEmitterSerdeWrapper>> ueRefs;
1375 ueRefs.reserve(rpaths.size());
1376 for (size_t i = 0, n = rpaths.size(); i < n; ++i) {
1377 auto& rpath = rpaths[i];
1378 auto it = parsedFiles->find(rpath);
1379 if (it == parsedFiles->end()) {
1380 // If you see this error in a test case, add a line to to test.php.hphp_opts:
1381 // --inputs=hphp/path/to/file.inc
1382 Package::ParseMeta bad;
1383 bad.m_abort = folly::sformat("Unknown include file: {}", rpath.native());
1384 parseMetas.emplace_back(std::move(bad));
1385 continue;
1387 auto& pf = it->second;
1388 auto& p = pf->parseMeta;
1389 if (!shouldIncludeInBuild(p)) {
1390 Logger::FVerbose("Dropping {} from the repo build because module {} is "
1391 "not part of {} deployment",
1392 p.m_filepath,
1393 p.m_module_use ? p.m_module_use->data() : "top-level",
1394 Cfg::Eval::ActiveDeployment);
1395 itemsToSkip.insert(i);
1396 continue;
1398 parseMetas.emplace_back(std::move(pf->parseMeta));
1399 ueRefs.emplace_back(std::move(*pf->ueRef));
1402 always_assert(parseMetas.size() == ueRefs.size());
1403 auto ueWrappers = co_await client->load(std::move(ueRefs));
1405 for (auto& wrapper : ueWrappers) {
1406 if (!wrapper.m_ue) continue;
1407 emitUnit(std::move(wrapper.m_ue));
1409 co_return std::make_pair(std::move(parseMetas),
1410 std::move(itemsToSkip));
1414 // Parsing phase: compile all input files and autoload files to bytecode.
1415 // Deferring emit reduces wall time by parsing all files in parallel in
1416 // one pass, then computing the full transitive closure of ondemand files
1417 // in one go while emitting. Unreferenced ondemand files are discarded.
1418 auto parsePackage = std::make_unique<Package>(
1419 po.inputDir,
1420 *executor,
1421 *client,
1422 po.coredump
1424 Timer parseTimer(Timer::WallTime, "parsing");
1426 // Parse the input files specified on the command line
1427 addInputsToPackage(*parsePackage, po);
1428 auto const& repoFlags = RepoOptions::forFile(po.repoOptionsDir).flags();
1429 auto const& dirs = repoFlags.autoloadRepoBuildSearchDirs();
1430 auto const queryStr = repoFlags.autoloadQuery();
1431 if (!dirs.empty()) {
1432 addListToPackage(*parsePackage, dirs, po);
1433 } else if (!queryStr.empty()) {
1434 // Parse all the files specified by Autoload.Query
1435 if (!addAutoloadQueryToPackage(*parsePackage, queryStr)) return false;
1438 if (!coro::blockingWait(parsePackage->parse(*index,
1439 parseRemoteUnit))) {
1440 return false;
1443 logPhaseStats("parse", *parsePackage, *client, sample,
1444 parseTimer.getMicroSeconds());
1445 client->resetStats();
1448 auto package = std::make_unique<Package>(
1449 po.inputDir,
1450 *executor,
1451 *client,
1452 po.coredump
1456 // Emit phase: emit systemlib units, all input files, and the transitive
1457 // closure of files referenced by symbolRefs.
1458 Timer emitTimer(Timer::WallTime, "emit");
1459 addInputsToPackage(*package, po);
1461 if (!RO::EvalUseHHBBC && Option::GenerateBinaryHHBC) {
1462 // Initialize autoload and repo for emitUnit() to populate
1463 autoload.emplace();
1464 repo.emplace(outputFile);
1467 if (!coro::blockingWait(package->emit(*index, emitRemoteUnit, emitLocalUnit,
1468 po.ondemandEdgesPath))) {
1469 return false;
1472 // We didn't run any extern worker jobs, and in HHBBC mode we
1473 // also didn't load anything. Most of these stats are zero but a
1474 // few are still interesting.
1475 logPhaseStats("emit", *package, *client, sample,
1476 emitTimer.getMicroSeconds());
1479 std::thread fileCache{
1480 [&, package = std::move(package), parsedFiles = std::move(parsedFiles),
1481 index = std::move(index)] () mutable {
1483 Timer t{Timer::WallTime, "dropping unused files"};
1484 parsedFiles.reset();
1487 Timer t{Timer::WallTime, "dropping index"};
1488 index.reset();
1490 SCOPE_EXIT { package.reset(); };
1491 if (po.filecache.empty()) return;
1492 Timer _{Timer::WallTime, "saving file cache..."};
1493 HphpSessionAndThread session{Treadmill::SessionKind::CompilerEmit};
1494 package->writeVirtualFileSystem(po.filecache.c_str());
1495 struct stat sb;
1496 stat(po.filecache.c_str(), &sb);
1497 Logger::Info("%" PRId64" MB %s saved",
1498 (int64_t)sb.st_size/(1024*1024), po.filecache.c_str());
1501 SCOPE_EXIT { fileCache.join(); };
1503 std::thread asyncDispose;
1504 SCOPE_EXIT { if (asyncDispose.joinable()) asyncDispose.join(); };
1505 auto const dispose = [&] (std::unique_ptr<TicketExecutor> e,
1506 std::unique_ptr<Client> c) {
1507 if (!Option::ParserAsyncCleanup) {
1508 // If we don't want to cleanup asynchronously, do so now.
1509 c.reset();
1510 e.reset();
1511 return;
1513 // All the thread does is reset the unique_ptr to run the dtor.
1514 asyncDispose = std::thread{
1515 [e = std::move(e), c = std::move(c)] () mutable {
1516 c.reset();
1517 e.reset();
1522 auto const logSample = [&] {
1523 // Only log big builds.
1524 if (numUnits >= RO::EvalHHBBCMinUnitsToLog) {
1525 sample.force_init = true;
1526 StructuredLog::log("hhvm_whole_program", sample);
1528 return true;
1531 auto const finish = [&] {
1532 if (!Option::GenerateBinaryHHBC) return true;
1533 Timer _{Timer::WallTime, "finalizing repo"};
1534 auto const& packageInfo =
1535 RepoOptions::forFile(po.repoOptionsDir).packageInfo();
1536 repo->finish(getGlobalData(), *autoload, packageInfo);
1537 return true;
1539 if (!RO::EvalUseHHBBC) {
1540 logSample();
1541 dispose(std::move(executor), std::move(client));
1542 return finish();
1545 // We don't need these anymore, and since they can consume a lot of
1546 // memory, free them before doing anything else.
1547 unique.reset();
1548 hhbbcConfig.reset();
1550 assertx(!autoload.has_value());
1551 assertx(!repo.has_value());
1552 if (Option::GenerateBinaryHHBC) {
1553 autoload.emplace();
1554 repo.emplace(outputFile);
1557 if (Option::ConstFoldFileBC) {
1558 HHBBC::options.SourceRootForFileBC = Cfg::Server::SourceRoot;
1560 HHBBC::options.CoreDump = po.coredump;
1562 Timer timer{Timer::WallTime, "running HHBBC"};
1563 HphpSession session{Treadmill::SessionKind::HHBBC};
1565 client->resetStats();
1566 HHBBC::trace_time::register_client_stats(client->getStatsPtr());
1568 HHBBC::whole_program(
1569 std::move(*hhbbcInputs),
1570 HHBBC::Config::get(getGlobalData()),
1571 std::move(executor),
1572 std::move(client),
1573 emitUnit,
1574 dispose,
1575 &sample,
1576 Option::ParserThreadCount > 0 ? Option::ParserThreadCount : 0
1579 finish();
1580 sample.setInt("hhbbc_micros", timer.getMicroSeconds());
1581 logSample();
1582 return true;
1585 ///////////////////////////////////////////////////////////////////////////////
1589 ///////////////////////////////////////////////////////////////////////////////
1591 int compiler_main(int argc, char **argv) {
1592 try {
1593 rds::local::init();
1594 SCOPE_EXIT { rds::local::fini(); };
1596 CompilerOptions po;
1597 auto const ret = prepareOptions(po, argc, argv);
1598 if (ret == 1) return 0; // --help
1599 if (ret != 0) return ret; // command line error
1601 Timer totalTimer(Timer::WallTime, "running hphp");
1602 always_assert_flog(
1603 mkdir(po.outputDir.c_str(), 0777) == 0 || errno == EEXIST,
1604 "Unable to mkdir({}): {}",
1605 po.outputDir.c_str(),
1606 folly::errnoStr(errno)
1608 if (!process(po)) {
1609 Logger::Error("hphp failed");
1610 return -1;
1611 } else {
1612 Logger::Info("all files saved in %s ...", po.outputDir.c_str());
1613 return 0;
1615 } catch (const Exception& e) {
1616 Logger::Error("Exception: %s", e.getMessage().c_str());
1617 } catch (const std::exception& e) {
1618 Logger::Error("std::exception: %s", e.what());
1619 } catch (...) {
1620 Logger::Error("(non-standard exception \"%s\" was thrown)",
1621 current_exception_name().c_str());
1623 return -1;
1626 ///////////////////////////////////////////////////////////////////////////////