Re-sync with internal repository
[hiphop-php.git] / third-party / watchman / src / watchman / watcher / eden.cpp
blobfae141255bc047d5513949b7ab0f2d488b325c8f
1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
8 #include <cpptoml.h>
9 #include <folly/String.h>
10 #include <folly/futures/Future.h>
11 #include <folly/io/async/AsyncSocket.h>
12 #include <folly/io/async/EventBase.h>
13 #include <folly/io/async/EventBaseManager.h>
14 #include <folly/logging/xlog.h>
15 #include <thrift/lib/cpp2/async/HeaderClientChannel.h>
16 #include <thrift/lib/cpp2/async/PooledRequestChannel.h>
17 #include <thrift/lib/cpp2/async/ReconnectingRequestChannel.h>
18 #include <thrift/lib/cpp2/async/RetryingRequestChannel.h>
19 #include <thrift/lib/cpp2/async/RocketClientChannel.h>
20 #include <algorithm>
21 #include <chrono>
22 #include <iterator>
23 #include <thread>
24 #include "eden/fs/service/gen-cpp2/StreamingEdenService.h"
25 #include "watchman/ChildProcess.h"
26 #include "watchman/Errors.h"
27 #include "watchman/LRUCache.h"
28 #include "watchman/QueryableView.h"
29 #include "watchman/ThreadPool.h"
30 #include "watchman/fs/FSDetect.h"
31 #include "watchman/fs/FileDescriptor.h"
32 #include "watchman/query/GlobTree.h"
33 #include "watchman/query/Query.h"
34 #include "watchman/query/QueryContext.h"
35 #include "watchman/query/eval.h"
36 #include "watchman/root/Root.h"
37 #include "watchman/scm/SCM.h"
38 #include "watchman/thirdparty/wildmatch/wildmatch.h"
39 #include "watchman/watcher/Watcher.h"
40 #include "watchman/watcher/WatcherRegistry.h"
42 using apache::thrift::TApplicationException;
43 using namespace facebook::eden;
44 using folly::AsyncSocket;
45 using folly::to;
46 using std::make_unique;
48 namespace {
49 using EdenDtype = facebook::eden::Dtype;
50 using watchman::DType;
52 DType getDTypeFromEden(EdenDtype dtype) {
53 // TODO: Eden guarantees that dtypes have consistent values on all platforms,
54 // including Windows. If we made Watchman guarantee that too, this could be
55 // replaced with a static_cast.
57 switch (dtype) {
58 case EdenDtype::UNKNOWN:
59 return DType::Unknown;
60 case EdenDtype::FIFO:
61 return DType::Fifo;
62 case EdenDtype::CHAR:
63 return DType::Char;
64 case EdenDtype::DIR:
65 return DType::Dir;
66 case EdenDtype::BLOCK:
67 return DType::Block;
68 case EdenDtype::REGULAR:
69 return DType::Regular;
70 case EdenDtype::LINK:
71 return DType::Symlink;
72 case EdenDtype::SOCKET:
73 return DType::Socket;
74 case EdenDtype::WHITEOUT:
75 return DType::Whiteout;
77 return DType::Unknown;
80 SyncBehavior getSyncBehavior() {
81 // Use a no-sync behavior as syncToNow will be called if a synchronization is
82 // necessary which will do the proper synchronization.
83 auto sync = SyncBehavior{};
84 sync.syncTimeoutSeconds() = 0;
85 return sync;
88 } // namespace
90 namespace watchman {
91 namespace {
92 struct NameAndDType {
93 std::string name;
94 DType dtype;
96 explicit NameAndDType(const std::string& name, DType dtype = DType::Unknown)
97 : name(name), dtype(dtype) {}
100 /** This is a helper for settling out subscription events.
101 * We have a single instance of the callback object that we schedule
102 * each time we get an update from the eden server. If we are already
103 * scheduled we will cancel it and reschedule it.
105 class SettleCallback : public folly::HHWheelTimer::Callback {
106 public:
107 SettleCallback(folly::EventBase* eventBase, std::shared_ptr<Root> root)
108 : eventBase_(eventBase), root_(std::move(root)) {}
110 void timeoutExpired() noexcept override {
111 try {
112 auto settledPayload = json_object({{"settled", json_true()}});
113 root_->unilateralResponses->enqueue(std::move(settledPayload));
114 } catch (const std::exception& exc) {
115 log(ERR,
116 "error while dispatching settle payload; cancel watch: ",
117 exc.what(),
118 "\n");
119 eventBase_->terminateLoopSoon();
123 void callbackCanceled() noexcept override {
124 // We must override this because the default is to call timeoutExpired().
125 // We don't want that to happen because we're only canceled in the case
126 // where we want to delay the timeoutExpired() callback.
129 private:
130 folly::EventBase* eventBase_;
131 std::shared_ptr<Root> root_;
134 // Resolve the eden socket; On POSIX systems we use the .eden dir that is
135 // present in every dir of an eden mount to locate the symlink to the socket.
136 // On Windows systems, .eden is only present in the repo root and contains
137 // the toml config file with the path to the socket.
138 std::string resolveSocketPath(w_string_piece rootPath) {
139 #ifdef _WIN32
140 auto configPath = to<std::string>(rootPath.view(), "/.eden/config");
141 auto config = cpptoml::parse_file(configPath);
143 return *config->get_qualified_as<std::string>("Config.socket");
144 #else
145 auto path = to<std::string>(rootPath.view(), "/.eden/socket");
146 // It is important to resolve the link because the path in the eden mount
147 // may exceed the maximum permitted unix domain socket path length.
148 // This is actually how things our in our integration test environment.
149 return readSymbolicLink(path.c_str()).string();
150 #endif
153 folly::SocketAddress getEdenSocketAddress(w_string_piece rootPath) {
154 folly::SocketAddress addr;
156 auto socketPath = resolveSocketPath(rootPath);
157 addr.setFromPath(to<std::string>(socketPath));
158 return addr;
161 /** Create a thrift client that will connect to the eden server associated
162 * with the current user. */
163 std::unique_ptr<StreamingEdenServiceAsyncClient> getEdenClient(
164 std::shared_ptr<apache::thrift::RequestChannel> channel) {
165 return make_unique<StreamingEdenServiceAsyncClient>(std::move(channel));
168 class GetJournalPositionCallback : public folly::HHWheelTimer::Callback {
169 public:
170 GetJournalPositionCallback(
171 folly::EventBase* eventBase,
172 std::shared_ptr<apache::thrift::RequestChannel> thriftChannel,
173 std::string mountPoint)
174 : eventBase_{eventBase},
175 thriftChannel_{std::move(thriftChannel)},
176 mountPoint_{std::move(mountPoint)} {}
178 void timeoutExpired() noexcept override {
179 try {
180 auto edenClient = getEdenClient(thriftChannel_);
182 // Calling getCurrentJournalPosition will allow EdenFS to send new
183 // notification about files changed.
184 JournalPosition journal;
185 edenClient->sync_getCurrentJournalPosition(journal, mountPoint_);
186 } catch (const std::exception& exc) {
187 log(ERR,
188 "error while getting EdenFS's journal position; cancel watch: ",
189 exc.what(),
190 "\n");
191 eventBase_->terminateLoopSoon();
195 private:
196 folly::EventBase* eventBase_;
197 std::shared_ptr<apache::thrift::RequestChannel> thriftChannel_;
198 std::string mountPoint_;
201 class EdenFileResult : public FileResult {
202 public:
203 EdenFileResult(
204 const w_string& rootPath,
205 std::shared_ptr<apache::thrift::RequestChannel> thriftChannel,
206 const w_string& fullName,
207 ClockTicks* ticks = nullptr,
208 bool isNew = false,
209 DType dtype = DType::Unknown)
210 : rootPath_(rootPath),
211 thriftChannel_{std::move(thriftChannel)},
212 fullName_(fullName),
213 dtype_(dtype) {
214 otime_.ticks = ctime_.ticks = 0;
215 otime_.timestamp = ctime_.timestamp = 0;
216 if (ticks) {
217 otime_.ticks = *ticks;
218 if (isNew) {
219 // the "ctime" in the context of FileResult represents the point
220 // in time that we saw the file transition !exists -> exists.
221 // We don't strictly know the point at which that happened for results
222 // returned from eden, but it will tell us whether that happened in
223 // a given since query window by listing the file in the created files
224 // set. We set the isNew flag in this case. The goal here is to
225 // ensure that the code in query/eval.cpp considers us to be new too,
226 // and that works because we set the created time ticks == the last
227 // change tick. The logic in query/eval.cpp will consider this to
228 // be new because the ctime > lower bound in the since query.
229 // When isNew is not set our ctime tick value is initialized to
230 // zero which always fails that is_new check.
231 ctime_.ticks = otime_.ticks;
236 std::optional<FileInformation> stat() override {
237 if (!stat_.has_value()) {
238 accessorNeedsProperties(FileResult::Property::FullFileInformation);
239 return std::nullopt;
241 return stat_;
244 std::optional<DType> dtype() override {
245 // We're using Unknown as the default value to avoid also wrapping
246 // this value up in an Optional in our internal storage.
247 // In theory this is ambiguous, but in practice Eden will never
248 // return Unknown for dtype values so this is safe to use with
249 // impunity.
250 if (dtype_ != DType::Unknown) {
251 return dtype_;
253 if (stat_.has_value()) {
254 return stat_->dtype();
256 accessorNeedsProperties(FileResult::Property::FileDType);
257 return std::nullopt;
260 std::optional<size_t> size() override {
261 if (!stat_.has_value()) {
262 accessorNeedsProperties(FileResult::Property::Size);
263 return std::nullopt;
265 return stat_->size;
268 std::optional<struct timespec> accessedTime() override {
269 if (!stat_.has_value()) {
270 accessorNeedsProperties(FileResult::Property::StatTimeStamps);
271 return std::nullopt;
273 return stat_->atime;
276 std::optional<struct timespec> modifiedTime() override {
277 if (!stat_.has_value()) {
278 accessorNeedsProperties(FileResult::Property::StatTimeStamps);
279 return std::nullopt;
281 return stat_->mtime;
284 std::optional<struct timespec> changedTime() override {
285 if (!stat_.has_value()) {
286 accessorNeedsProperties(FileResult::Property::StatTimeStamps);
287 return std::nullopt;
289 return stat_->ctime;
292 w_string_piece baseName() override {
293 return fullName_.piece().baseName();
296 w_string_piece dirName() override {
297 return fullName_.piece().dirName();
300 void setExists(bool exists) noexcept {
301 exists_ = exists;
302 if (!exists) {
303 stat_ = FileInformation::makeDeletedFileInformation();
307 std::optional<bool> exists() override {
308 if (!exists_.has_value()) {
309 accessorNeedsProperties(FileResult::Property::Exists);
310 return std::nullopt;
312 return exists_;
315 std::optional<w_string> readLink() override {
316 if (symlinkTarget_.has_value()) {
317 return symlinkTarget_;
319 accessorNeedsProperties(FileResult::Property::SymlinkTarget);
320 return std::nullopt;
323 std::optional<ClockStamp> ctime() override {
324 return ctime_;
327 std::optional<ClockStamp> otime() override {
328 return otime_;
331 std::optional<FileResult::ContentHash> getContentSha1() override {
332 if (!sha1_.has_value()) {
333 accessorNeedsProperties(FileResult::Property::ContentSha1);
334 return std::nullopt;
336 switch (sha1_->getType()) {
337 // Copy thrift SHA1Result aka (std::string) into
338 // watchman FileResult::ContentHash aka (std::array<uint8_t, 20>)
339 case SHA1Result::Type::sha1: {
340 auto& hash = sha1_->get_sha1();
341 FileResult::ContentHash result;
342 std::copy(hash.begin(), hash.end(), result.begin());
344 return result;
347 // Thrift error occured
348 case SHA1Result::Type::error: {
349 auto& err = sha1_->get_error();
350 XCHECK(err.errorCode());
351 throw std::system_error(
352 *err.errorCode(), std::generic_category(), *err.message());
355 // Something is wrong with type union
356 default:
357 throw std::runtime_error(
358 "Unknown thrift data for EdenFileResult::getContentSha1");
362 void batchFetchProperties(
363 const std::vector<std::unique_ptr<FileResult>>& files) override {
364 std::vector<EdenFileResult*> getFileInformationFiles;
365 std::vector<std::string> getFileInformationNames;
366 // If only dtype and exists are needed, Eden has a cheaper API for
367 // retrieving them.
368 bool onlyEntryInfoNeeded = true;
370 std::vector<EdenFileResult*> getShaFiles;
371 std::vector<std::string> getShaNames;
373 std::vector<EdenFileResult*> getSymlinkFiles;
375 for (auto& f : files) {
376 auto& edenFile = dynamic_cast<EdenFileResult&>(*f);
378 auto relName = edenFile.fullName_.piece();
380 if (rootPath_ == edenFile.fullName_) {
381 // The root tree inode has changed
382 relName = "";
383 } else {
384 // Strip off the mount point prefix for the names we're going
385 // to pass to eden. The +1 is its trailing slash.
386 relName.advance(rootPath_.size() + 1);
389 if (edenFile.neededProperties() & FileResult::Property::SymlinkTarget) {
390 // We need to know if the node is a symlink
391 edenFile.accessorNeedsProperties(FileResult::Property::FileDType);
393 getSymlinkFiles.emplace_back(&edenFile);
396 if (edenFile.neededProperties() &
397 (FileResult::Property::FileDType | FileResult::Property::CTime |
398 FileResult::Property::OTime | FileResult::Property::Exists |
399 FileResult::Property::Size | FileResult::Property::StatTimeStamps |
400 FileResult::Property::FullFileInformation)) {
401 getFileInformationFiles.emplace_back(&edenFile);
402 getFileInformationNames.emplace_back(relName.data(), relName.size());
404 if (edenFile.neededProperties() &
405 ~(FileResult::Property::FileDType | FileResult::Property::Exists)) {
406 // We could maintain two lists and call both getFileInformation and
407 // getEntryInformation in parallel, but in practice the set of
408 // properties should usually be the same across all files.
409 onlyEntryInfoNeeded = false;
413 if (edenFile.neededProperties() & FileResult::Property::ContentSha1) {
414 getShaFiles.emplace_back(&edenFile);
415 getShaNames.emplace_back(relName.data(), relName.size());
418 // If we were to throw later in this method, we will have forgotten
419 // the input set of properties, but it is ok: if we do decide to
420 // re-evaluate after throwing, the accessors will set the mask up
421 // accordingly and we'll end up calling back in here if needed.
422 edenFile.clearNeededProperties();
425 auto client = getEdenClient(thriftChannel_);
426 loadFileInformation(
427 client.get(),
428 rootPath_,
429 getFileInformationNames,
430 getFileInformationFiles,
431 onlyEntryInfoNeeded);
433 // TODO: add eden bulk readlink call
434 loadSymlinkTargets(client.get(), getSymlinkFiles);
436 if (!getShaFiles.empty()) {
437 std::vector<SHA1Result> sha1s;
438 client->sync_getSHA1(
439 sha1s, std::string{rootPath_.view()}, getShaNames, getSyncBehavior());
441 if (sha1s.size() != getShaFiles.size()) {
442 log(ERR,
443 "Requested SHA-1 of ",
444 getShaFiles.size(),
445 " but Eden returned ",
446 sha1s.size(),
447 " results -- ignoring");
448 } else {
449 auto sha1Iter = sha1s.begin();
450 for (auto& edenFile : getShaFiles) {
451 edenFile->sha1_ = *sha1Iter++;
457 private:
458 w_string rootPath_;
459 std::shared_ptr<apache::thrift::RequestChannel> thriftChannel_;
460 w_string fullName_;
461 std::optional<FileInformation> stat_;
462 std::optional<bool> exists_;
463 ClockStamp ctime_;
464 ClockStamp otime_;
465 std::optional<SHA1Result> sha1_;
466 std::optional<w_string> symlinkTarget_;
467 DType dtype_{DType::Unknown};
469 // Read the symlink targets for each of the provided `files`. The files
470 // had SymlinkTarget set in neededProperties prior to clearing it in
471 // the batchFetchProperties() method that calls us, so we know that
472 // we unconditionally need to read these links.
473 static void loadSymlinkTargets(
474 StreamingEdenServiceAsyncClient*,
475 const std::vector<EdenFileResult*>& files) {
476 for (auto& edenFile : files) {
477 if (!edenFile->stat_->isSymlink()) {
478 // If this file is not a symlink then we immediately yield
479 // a nullptr w_string instance rather than propagating an error.
480 // This behavior is relied upon by the field rendering code and
481 // checked in test_symlink.py.
482 edenFile->symlinkTarget_ = w_string();
483 continue;
485 edenFile->symlinkTarget_ = readSymbolicLink(edenFile->fullName_.c_str());
489 static void loadFileInformation(
490 StreamingEdenServiceAsyncClient* client,
491 const w_string& rootPath,
492 const std::vector<std::string>& names,
493 const std::vector<EdenFileResult*>& outFiles,
494 bool onlyEntryInfoNeeded) {
495 w_assert(
496 names.size() == outFiles.size(), "names.size must == outFiles.size");
497 if (names.empty()) {
498 return;
501 auto applyResults = [&](const auto& edenInfo) {
502 if (names.size() != edenInfo.size()) {
503 log(ERR,
504 "Requested file information of ",
505 names.size(),
506 " files but Eden returned information for ",
507 edenInfo.size(),
508 " files. Treating missing entries as missing files.");
511 auto infoIter = edenInfo.begin();
512 for (auto& edenFileResult : outFiles) {
513 if (infoIter == edenInfo.end()) {
514 edenFileResult->setExists(false);
515 } else {
516 edenFileResult->applyInformationOrError(*infoIter);
517 ++infoIter;
522 if (onlyEntryInfoNeeded) {
523 std::vector<EntryInformationOrError> info;
524 try {
525 client->sync_getEntryInformation(
526 info, std::string{rootPath.view()}, names, getSyncBehavior());
527 applyResults(info);
528 return;
529 } catch (const TApplicationException& ex) {
530 if (TApplicationException::UNKNOWN_METHOD != ex.getType()) {
531 throw;
533 // getEntryInformation is not available in this version of
534 // Eden. Fall back to the older, more expensive
535 // getFileInformation below.
539 std::vector<FileInformationOrError> info;
540 client->sync_getFileInformation(
541 info, std::string{rootPath.view()}, names, getSyncBehavior());
542 applyResults(info);
545 void applyInformationOrError(const EntryInformationOrError& infoOrErr) {
546 if (infoOrErr.getType() == EntryInformationOrError::Type::info) {
547 dtype_ = getDTypeFromEden(*infoOrErr.get_info().dtype());
548 setExists(true);
549 } else {
550 setExists(false);
554 void applyInformationOrError(const FileInformationOrError& infoOrErr) {
555 if (infoOrErr.getType() == FileInformationOrError::Type::info) {
556 FileInformation stat;
558 stat.size = *infoOrErr.get_info().size();
559 stat.mode = *infoOrErr.get_info().mode();
560 stat.mtime.tv_sec = *infoOrErr.get_info().mtime()->seconds();
561 stat.mtime.tv_nsec = *infoOrErr.get_info().mtime()->nanoSeconds();
563 stat_ = std::move(stat);
564 setExists(true);
565 } else {
566 setExists(false);
571 static std::string escapeGlobSpecialChars(w_string_piece str) {
572 std::string result;
574 for (size_t i = 0; i < str.size(); ++i) {
575 auto c = str[i];
576 switch (c) {
577 case '*':
578 case '?':
579 case '[':
580 case ']':
581 case '\\':
582 result.append("\\");
583 break;
585 result.append(&c, 1);
588 return result;
591 /** filter out paths that are ignored or that are not part of the
592 * relative_root restriction in a query.
593 * Ideally we'd pass this information into eden so that it doesn't
594 * have to walk those paths and return the data to us, but for the
595 * moment we have to filter it out of the results.
596 * We need to respect the ignore_dirs configuration setting and
597 * also remove anything that doesn't match the relative_root constraint
598 * in the query. */
599 void filterOutPaths(std::vector<NameAndDType>& files, QueryContext* ctx) {
600 files.erase(
601 std::remove_if(
602 files.begin(),
603 files.end(),
604 [ctx](const NameAndDType& item) {
605 auto full = w_string::pathCat({ctx->root->root_path, item.name});
607 if (!ctx->fileMatchesRelativeRoot(full)) {
608 // Not in the desired area, so filter it out
609 return true;
612 return ctx->root->ignore.isIgnored(full.data(), full.size());
614 files.end());
617 void appendGlobResultToNameAndDTypeVec(
618 std::vector<NameAndDType>& results,
619 Glob&& glob) {
620 size_t i = 0;
621 size_t numDTypes = glob.get_dtypes().size();
623 for (auto& name : glob.get_matchingFiles()) {
624 // The server may not support dtypes, so this list may be empty.
625 // This cast is OK because eden returns the system dependent bits to us, and
626 // our DType enum is declared in terms of those bits
627 auto dtype = i < numDTypes ? static_cast<DType>(glob.get_dtypes()[i])
628 : DType::Unknown;
629 results.emplace_back(name, dtype);
630 ++i;
634 /** Returns the files that match the glob. */
635 std::vector<NameAndDType> globNameAndDType(
636 StreamingEdenServiceAsyncClient* client,
637 const std::string& mountPoint,
638 const std::vector<std::string>& globPatterns,
639 bool includeDotfiles,
640 bool splitGlobPattern = false) {
641 // TODO(xavierd): Once the config: "eden_split_glob_pattern" is rolled out
642 // everywhere, remove this code.
643 if (splitGlobPattern && globPatterns.size() > 1) {
644 folly::DrivableExecutor* executor =
645 folly::EventBaseManager::get()->getEventBase();
647 std::vector<folly::Future<Glob>> globFutures;
648 globFutures.reserve(globPatterns.size());
649 for (const std::string& globPattern : globPatterns) {
650 GlobParams params;
651 params.mountPoint() = mountPoint;
652 params.globs() = std::vector<std::string>{globPattern};
653 params.includeDotfiles() = includeDotfiles;
654 params.wantDtype() = true;
655 params.sync() = getSyncBehavior();
657 globFutures.emplace_back(
658 client->semifuture_globFiles(params).via(executor));
661 std::vector<NameAndDType> allResults;
662 for (folly::Future<Glob>& globFuture : globFutures) {
663 appendGlobResultToNameAndDTypeVec(
664 allResults, std::move(globFuture).getVia(executor));
666 return allResults;
667 } else {
668 GlobParams params;
669 params.mountPoint() = mountPoint;
670 params.globs() = globPatterns;
671 params.includeDotfiles() = includeDotfiles;
672 params.wantDtype() = true;
673 params.sync() = getSyncBehavior();
675 Glob glob;
676 client->sync_globFiles(glob, params);
677 std::vector<NameAndDType> result;
678 appendGlobResultToNameAndDTypeVec(result, std::move(glob));
679 return result;
683 namespace {
686 * Construct a pooled Thrift channel that will automatically reconnect to
687 * EdenFS on error.
689 std::shared_ptr<apache::thrift::RequestChannel> makeThriftChannel(
690 w_string rootPath,
691 int numRetries) {
692 auto channel = apache::thrift::PooledRequestChannel::newChannel(
693 folly::EventBaseManager::get()->getEventBase(),
694 folly::getUnsafeMutableGlobalIOExecutor(),
695 [numRetries, rootPath = std::move(rootPath)](folly::EventBase& eb) {
696 return apache::thrift::RetryingRequestChannel::newChannel(
698 numRetries,
699 apache::thrift::ReconnectingRequestChannel::newChannel(
700 eb, [rootPath](folly::EventBase& eb) {
701 return apache::thrift::RocketClientChannel::newChannel(
702 AsyncSocket::newSocket(
703 &eb, getEdenSocketAddress(rootPath)));
704 }));
706 return channel;
709 } // namespace
711 class EdenView final : public QueryableView {
712 public:
713 explicit EdenView(const w_string& root_path, const Configuration& config)
714 : QueryableView{root_path, /*requiresCrawl=*/false},
715 rootPath_(root_path),
716 thriftChannel_(makeThriftChannel(
717 rootPath_,
718 config.getInt("eden_retry_connection_count", 3))),
719 mountPoint_(root_path.string()),
720 splitGlobPattern_(config.getBool("eden_split_glob_pattern", false)),
721 thresholdForFreshInstance_(config.getInt(
722 "eden_file_count_threshold_for_fresh_instance",
723 10000)) {}
725 void timeGenerator(const Query* /*query*/, QueryContext* ctx) const override {
726 ctx->generationStarted();
728 if (ctx->since.is_timestamp()) {
729 throw QueryExecError(
730 "timestamp based since queries are not supported with eden");
733 auto allFilesResult = getAllChangesSince(ctx);
734 auto resultTicks = allFilesResult.ticks;
735 auto& fileInfo = allFilesResult.fileInfo;
736 // We use the list of created files to synthesize the "new" field
737 // in the file results
738 auto& createdFileNames = allFilesResult.createdFileNames;
740 // Filter out any ignored files
741 filterOutPaths(fileInfo, ctx);
743 auto isFreshInstance = ctx->since.is_fresh_instance();
744 for (auto& item : fileInfo) {
745 // a file is considered new if it was present in the created files
746 // set returned from eden.
747 bool isNew = createdFileNames.find(item.name) != createdFileNames.end();
749 auto file = make_unique<EdenFileResult>(
750 rootPath_,
751 thriftChannel_,
752 w_string::pathCat({mountPoint_, item.name}),
753 &resultTicks,
754 isNew,
755 item.dtype);
757 if (isFreshInstance) {
758 // Fresh instance queries only return data about files
759 // that currently exist, and we know this to be true
760 // here because our list of files comes from evaluating
761 // a glob.
762 file->setExists(true);
765 w_query_process_file(ctx->query, ctx, std::move(file));
768 ctx->bumpNumWalked(fileInfo.size());
771 folly::SemiFuture<folly::Unit> waitForSettle(
772 std::chrono::milliseconds /*settle_period*/) override {
773 // We could implement this feature for EdenFS, but since the
774 // Watchman-EdenFS integration is correct and waitForSettle is a workaround
775 // for broken filesystem notification APIs, do nothing for now.
776 return folly::unit;
779 CookieSync::SyncResult syncToNow(
780 const std::shared_ptr<Root>& root,
781 std::chrono::milliseconds timeout) override {
782 try {
783 return sync(root).get(timeout);
784 } catch (const folly::FutureTimeout& ex) {
785 throw std::system_error(ETIMEDOUT, std::generic_category(), ex.what());
787 return {};
790 folly::SemiFuture<CookieSync::SyncResult> sync(
791 const std::shared_ptr<Root>&) override {
792 return folly::makeSemiFutureWith([this]() {
793 // Set an unlimited timeout. The caller is responsible for using a
794 // timeout to bound the time spent in this method.
795 facebook::eden::SyncBehavior sync;
796 sync.syncTimeoutSeconds() = -1;
798 facebook::eden::SynchronizeWorkingCopyParams params;
799 params.sync() = sync;
801 auto client = getEdenClient(thriftChannel_);
802 return client->semifuture_synchronizeWorkingCopy(
803 mountPoint_, params);
805 .defer([](folly::Try<folly::Unit> try_) {
806 if (try_.hasException()) {
807 if (auto* exc =
808 try_.tryGetExceptionObject<TApplicationException>()) {
809 if (exc->getType() == TApplicationException::UNKNOWN_METHOD) {
810 return folly::Try{CookieSync::SyncResult{}};
813 return folly::Try<CookieSync::SyncResult>{
814 std::move(try_.exception())};
816 return folly::Try{CookieSync::SyncResult{}};
820 void executeGlobBasedQuery(
821 const std::vector<std::string>& globStrings,
822 const Query* query,
823 QueryContext* ctx,
824 bool includeDir = true) const {
825 auto client = getEdenClient(thriftChannel_);
827 auto includeDotfiles = (query->glob_flags & WM_PERIOD) == 0;
828 auto fileInfo = globNameAndDType(
829 client.get(),
830 mountPoint_,
831 globStrings,
832 includeDotfiles,
833 splitGlobPattern_);
835 // Filter out any ignored files
836 filterOutPaths(fileInfo, ctx);
838 for (auto& item : fileInfo) {
839 auto file = make_unique<EdenFileResult>(
840 rootPath_,
841 thriftChannel_,
842 w_string::pathCat({mountPoint_, item.name}),
843 /*ticks=*/nullptr,
844 /*isNew=*/false,
845 item.dtype);
847 // The results of a glob are known to exist
848 file->setExists(true);
850 // Skip processing directories
851 if (!includeDir && item.dtype == DType::Dir) {
852 continue;
855 w_query_process_file(ctx->query, ctx, std::move(file));
858 ctx->bumpNumWalked(fileInfo.size());
861 // Helper for computing a relative path prefix piece.
862 // The returned piece is owned by the supplied context object!
863 w_string_piece computeRelativePathPiece(QueryContext* ctx) const {
864 w_string_piece rel;
865 if (ctx->query->relative_root) {
866 rel = ctx->query->relative_root;
867 rel.advance(ctx->root->root_path.size() + 1);
869 return rel;
872 /** Walks files that match the supplied set of paths */
873 void pathGenerator(const Query* query, QueryContext* ctx) const override {
874 ctx->generationStarted();
875 // If the query is anchored to a relative_root, use that that
876 // avoid sucking down a massive list of files from eden
877 auto rel = computeRelativePathPiece(ctx);
879 std::vector<std::string> globStrings;
880 // Translate the path list into a list of globs
881 for (auto& path : *query->paths) {
882 if (path.depth > 0) {
883 // We don't have an easy way to express depth constraints
884 // in the existing glob API, so we just punt for the moment.
885 // I believe that this sort of query is quite rare anyway.
886 throw QueryExecError(
887 "the eden watcher only supports depth 0 or depth -1");
889 // -1 depth is infinite which we can translate to a recursive
890 // glob. 0 depth is direct descendant which we can translate
891 // to a simple * wildcard.
892 auto glob = path.depth == -1 ? "**/*" : "*";
894 globStrings.emplace_back(std::string{
895 w_string::pathCat({rel, escapeGlobSpecialChars(path.name), glob})
896 .view()});
898 executeGlobBasedQuery(globStrings, query, ctx);
900 // We send another round of glob queries to query about the information
901 // about the path themselves since we want to include the paths if they are
902 // files.
903 // TODO(zeyi): replace this with builtin path generator inside EdenFS
904 globStrings.clear();
905 for (auto& path : *query->paths) {
906 globStrings.emplace_back(std::string{
907 w_string::pathCat({rel, escapeGlobSpecialChars(path.name)}).view()});
910 executeGlobBasedQuery(globStrings, query, ctx, false);
913 void globGenerator(const Query* query, QueryContext* ctx) const override {
914 if (!query->glob_tree) {
915 // If we are called via the codepath in the query evaluator that
916 // just speculatively executes queries then `glob` may not be
917 // present; short-circuit in that case.
918 return;
921 ctx->generationStarted();
922 // If the query is anchored to a relative_root, use that that
923 // avoid sucking down a massive list of files from eden
924 auto rel = computeRelativePathPiece(ctx);
926 std::vector<std::string> globStrings;
927 for (auto& glob : query->glob_tree->unparse()) {
928 globStrings.emplace_back(
929 std::string{w_string::pathCat({rel, glob}).view()});
932 // More glob flags/functionality:
933 auto noescape = bool(query->glob_flags & WM_NOESCAPE);
934 if (noescape) {
935 throw QueryExecError(
936 "glob_noescape is not supported for the eden watcher");
938 executeGlobBasedQuery(globStrings, query, ctx);
941 void allFilesGenerator(const Query* query, QueryContext* ctx) const override {
942 ctx->generationStarted();
943 // If the query is anchored to a relative_root, use that that
944 // avoid sucking down a massive list of files from eden
945 std::string globPattern;
946 auto rel = computeRelativePathPiece(ctx);
947 if (rel.size() > 0) {
948 globPattern.append(rel.data(), rel.size());
949 globPattern.append("/");
951 globPattern.append("**");
952 executeGlobBasedQuery(std::vector<std::string>{globPattern}, query, ctx);
955 ClockPosition getMostRecentRootNumberAndTickValue() const override {
956 auto client = getEdenClient(thriftChannel_);
957 JournalPosition position;
958 client->sync_getCurrentJournalPosition(position, mountPoint_);
959 return ClockPosition(
960 *position.mountGeneration(), *position.sequenceNumber());
963 w_string getCurrentClockString() const override {
964 return getMostRecentRootNumberAndTickValue().toClockString();
967 bool doAnyOfTheseFilesExist(
968 const std::vector<w_string>& /*fileNames*/) const override {
969 return false;
972 void startThreads(const std::shared_ptr<Root>& root) override {
973 auto self = shared_from_this();
974 std::thread thr([self, this, root]() { subscriberThread(root); });
975 thr.detach();
978 void stopThreads() override {
979 subscriberEventBase_.terminateLoopSoon();
982 json_ref getWatcherDebugInfo() const override {
983 return json_null();
986 void clearWatcherDebugInfo() override {}
988 using EdenFSSubcription =
989 apache::thrift::ClientBufferedStream<JournalPosition>::Subscription;
991 EdenFSSubcription rocketSubscribe(
992 std::shared_ptr<Root> root,
993 SettleCallback& settleCallback,
994 GetJournalPositionCallback& getJournalPositionCallback,
995 std::chrono::milliseconds settleTimeout) {
996 auto client = getEdenClient(thriftChannel_);
997 auto stream = client->sync_subscribeStreamTemporary(
998 std::string(root->root_path.data(), root->root_path.size()));
999 return std::move(stream).subscribeExTry(
1000 &subscriberEventBase_,
1001 [&settleCallback,
1002 &getJournalPositionCallback,
1003 this,
1004 root,
1005 settleTimeout](folly::Try<JournalPosition>&& t) {
1006 if (t.hasValue()) {
1007 try {
1008 log(DBG, "Got subscription push from eden\n");
1009 if (settleCallback.isScheduled()) {
1010 log(DBG, "reschedule settle timeout\n");
1011 settleCallback.cancelTimeout();
1013 subscriberEventBase_.timer().scheduleTimeout(
1014 &settleCallback, settleTimeout);
1016 // For bursty writes to the working copy, let's limit the
1017 // amount of notification that Watchman receives by
1018 // scheduling a getCurrentJournalPosition call in the future.
1020 // Thus, we're guarantee to only receive one notification per
1021 // settleTimeout/2 and no more, regardless of how much
1022 // writing is done in the repository.
1023 subscriberEventBase_.timer().scheduleTimeout(
1024 &getJournalPositionCallback, settleTimeout / 2);
1025 } catch (const std::exception& exc) {
1026 log(ERR,
1027 "Exception while processing eden subscription: ",
1028 exc.what(),
1029 ": cancel watch\n");
1030 subscriberEventBase_.terminateLoopSoon();
1032 } else {
1033 auto reason = t.hasException()
1034 ? folly::exceptionStr(std::move(t.exception()))
1035 : "controlled shutdown";
1036 log(ERR,
1037 "subscription stream ended: ",
1038 w_string_piece(reason.data(), reason.size()),
1039 ", cancel watch\n");
1040 // We won't be called again, but we terminate the loop
1041 // just to make sure.
1042 subscriberEventBase_.terminateLoopSoon();
1047 // This is the thread that we use to listen to the stream of
1048 // changes coming in from the EdenFS server.
1049 void subscriberThread(std::shared_ptr<Root> root) noexcept {
1050 SCOPE_EXIT {
1051 // ensure that the root gets torn down,
1052 // otherwise we'd leave it in a broken state.
1053 root->cancel();
1056 w_set_thread_name("edensub ", root->root_path.view());
1057 log(DBG, "Started subscription thread\n");
1059 std::optional<EdenFSSubcription> subscription;
1060 SCOPE_EXIT {
1061 if (subscription.has_value()) {
1062 subscription->cancel();
1063 std::move(*subscription).join();
1067 try {
1068 // Prepare the callback
1069 SettleCallback settleCallback{&subscriberEventBase_, root};
1070 GetJournalPositionCallback getJournalPositionCallback{
1071 &subscriberEventBase_, thriftChannel_, mountPoint_};
1072 // Figure out the correct value for settling
1073 std::chrono::milliseconds settleTimeout(root->trigger_settle);
1075 subscription = rocketSubscribe(
1076 root, settleCallback, getJournalPositionCallback, settleTimeout);
1078 // This will run until the stream ends
1079 log(DBG, "Started subscription thread loop\n");
1080 subscribeReadyPromise_.setValue();
1081 subscriberEventBase_.loop();
1083 } catch (const std::exception& exc) {
1084 log(ERR,
1085 "uncaught exception in subscription thread, cancel watch:",
1086 exc.what(),
1087 "\n");
1091 const w_string& getName() const override {
1092 static w_string name("eden");
1093 return name;
1096 folly::SemiFuture<folly::Unit> waitUntilReadyToQuery() override {
1097 return subscribeReadyPromise_.getSemiFuture();
1100 private:
1102 * Returns all the files in the watched directory for a fresh instance.
1104 * In the case where the query specifically ask for an empty file list on a
1105 * fresh instance, an empty vector will be returned.
1107 std::vector<NameAndDType> getAllFilesForFreshInstance(
1108 QueryContext* ctx) const {
1109 if (ctx->query->empty_on_fresh_instance) {
1110 // Avoid a full tree walk if we don't need it!
1111 return std::vector<NameAndDType>();
1114 std::string globPattern;
1115 if (ctx->query->relative_root) {
1116 w_string_piece rel(ctx->query->relative_root);
1117 rel.advance(ctx->root->root_path.size() + 1);
1118 globPattern.append(rel.data(), rel.size());
1119 globPattern.append("/");
1121 globPattern.append("**");
1123 auto includeDotfiles = (ctx->query->glob_flags & WM_PERIOD) == 0;
1125 auto client = getEdenClient(thriftChannel_);
1126 return globNameAndDType(
1127 client.get(),
1128 mountPoint_,
1129 std::vector<std::string>{std::move(globPattern)},
1130 includeDotfiles);
1133 struct GetAllChangesSinceResult {
1134 ClockTicks ticks;
1135 std::vector<NameAndDType> fileInfo;
1136 std::unordered_set<std::string> createdFileNames;
1140 * Build a GetAllChangesSinceResult for a fresh instance.
1142 GetAllChangesSinceResult makeFreshInstance(QueryContext* ctx) const {
1143 GetAllChangesSinceResult result;
1145 ctx->since.set_fresh_instance();
1146 result.ticks = ctx->clockAtStartOfQuery.position().ticks;
1147 result.fileInfo = getAllFilesForFreshInstance(ctx);
1149 return result;
1152 GetAllChangesSinceResult getAllChangesSinceStreaming(
1153 QueryContext* ctx) const {
1154 JournalPosition position;
1155 position.mountGeneration() = ctx->clockAtStartOfQuery.position().rootNumber;
1156 // dial back to the sequence number from the query
1157 position.sequenceNumber() =
1158 std::get<QuerySince::Clock>(ctx->since.since).ticks;
1160 StreamChangesSinceParams params;
1161 params.mountPoint() = mountPoint_;
1162 params.fromPosition() = position;
1164 auto client = getEdenClient(thriftChannel_);
1165 auto [resultChangesSince, stream] = client->sync_streamChangesSince(params);
1167 GetAllChangesSinceResult result;
1168 result.ticks = *resultChangesSince.toPosition()->sequenceNumber();
1170 // -1 = removed
1171 // 0 = changed
1172 // 1 = added
1173 std::unordered_map<std::string, int> byFile;
1174 std::unordered_map<std::string, EdenDtype> dtypes;
1176 std::move(stream).subscribeInline(
1177 [&](folly::Try<ChangedFileResult>&& changeTry) mutable {
1178 if (changeTry.hasException()) {
1179 log(ERR,
1180 "Error: ",
1181 folly::exceptionStr(changeTry.exception()),
1182 "\n");
1183 result = makeFreshInstance(ctx);
1184 return false;
1187 if (!changeTry.hasValue()) {
1188 // End of the stream.
1189 return false;
1192 const auto& change = changeTry.value();
1193 auto& name = *change.name();
1195 // Changes needs to be deduplicated so a file that was added and then
1196 // removed is reported as MODIFIED.
1197 switch (*change.status()) {
1198 case ScmFileStatus::ADDED:
1199 byFile[name] += 1;
1200 break;
1201 case ScmFileStatus::MODIFIED:
1202 byFile[name];
1203 break;
1204 case ScmFileStatus::REMOVED:
1205 byFile[name] -= 1;
1206 break;
1207 case ScmFileStatus::IGNORED:
1208 break;
1211 auto dtype = *change.dtype();
1212 auto [element, inserted] = dtypes.emplace(name, dtype);
1213 if (!inserted && element->second != dtype) {
1214 // Due to streamChangesSince not providing any ordering guarantee,
1215 // Watchman can't tell what DType a file has in the case where it
1216 // changed. Thus let's fallback to an UNKNOWN type, and Watchman
1217 // will later query the actual DType from EdenFS.
1218 element->second = EdenDtype::UNKNOWN;
1221 // Engineers usually don't work on a thousands of files, but on an
1222 // giant monorepo, the set of files changed in between 2 revisions
1223 // can be very large, and continuing down this route would force
1224 // Watchman to fetch metadata about a ton of files, causing delay in
1225 // answering the query and large amount of network traffic.
1227 // On these monorepos, tools also set the empty_on_fresh_instance
1228 // flag, thus we can simply pretend to return a fresh instance and an
1229 // empty fileInfo list.
1230 if (thresholdForFreshInstance_ != 0 &&
1231 byFile.size() > thresholdForFreshInstance_ &&
1232 ctx->query->empty_on_fresh_instance) {
1233 result = makeFreshInstance(ctx);
1234 return false;
1237 return true;
1240 for (auto& [name, count] : byFile) {
1241 result.fileInfo.emplace_back(name, getDTypeFromEden(dtypes[name]));
1242 if (count > 0) {
1243 result.createdFileNames.emplace(name);
1247 return result;
1251 * Compute and return all the changes that occured since the last call.
1253 * On error, or when thresholdForFreshInstance_ is exceeded, the clock will
1254 * be modified to indicate a fresh instance and an empty set of files will be
1255 * returned.
1257 GetAllChangesSinceResult getAllChangesSince(QueryContext* ctx) const {
1258 if (ctx->since.is_fresh_instance()) {
1259 // Earlier in the processing flow, we decided that the rootNumber
1260 // didn't match the current root which means that eden was restarted.
1261 // We need to translate this to a fresh instance result set and
1262 // return a list of all possible matching files.
1263 return makeFreshInstance(ctx);
1266 try {
1267 return getAllChangesSinceStreaming(ctx);
1268 } catch (const EdenError& err) {
1269 // ERANGE: mountGeneration differs
1270 // EDOM: journal was truncated.
1271 // For other situations we let the error propagate.
1272 XCHECK(err.errorCode());
1273 if (*err.errorCode() != ERANGE && *err.errorCode() != EDOM) {
1274 throw;
1276 // mountGeneration differs, or journal was truncated,
1277 // so treat this as equivalent to a fresh instance result
1278 return makeFreshInstance(ctx);
1279 } catch (const SCMError& err) {
1280 // Most likely this means a checkout occurred but we encountered
1281 // an error trying to get the list of files changed between the two
1282 // commits. Generate a fresh instance result since we were unable
1283 // to compute the list of files changed.
1284 log(ERR,
1285 "SCM error while processing EdenFS journal update: ",
1286 err.what(),
1287 "\n");
1288 return makeFreshInstance(ctx);
1292 w_string rootPath_;
1293 std::shared_ptr<apache::thrift::RequestChannel> thriftChannel_;
1294 folly::EventBase subscriberEventBase_;
1295 std::string mountPoint_;
1296 folly::SharedPromise<folly::Unit> subscribeReadyPromise_;
1297 bool splitGlobPattern_;
1298 unsigned int thresholdForFreshInstance_;
1301 #ifdef _WIN32
1302 // Test if EdenFS is stopped for the given path.
1303 bool isEdenStopped(w_string root) {
1304 static const w_string_piece kStar{"*"};
1305 static const w_string_piece kNonExistencePath{"EDEN_TEST_NON_EXISTENCE_PATH"};
1306 auto queryRaw = w_string::pathCat({root, kNonExistencePath, kStar});
1307 auto query = queryRaw.normalizeSeparators();
1308 std::wstring wquery = query.piece().asWideUNC();
1309 WIN32_FIND_DATAW ffd;
1311 auto find = FindFirstFileW(wquery.c_str(), &ffd);
1312 SCOPE_EXIT {
1313 if (find != INVALID_HANDLE_VALUE) {
1314 FindClose(find);
1318 auto lastError = GetLastError();
1320 // When EdenFS is not running, `FindFirstFile` will fail with this error
1321 // since it can't reach EdenFS to query directory information.
1322 if (find == INVALID_HANDLE_VALUE &&
1323 lastError == ERROR_FILE_SYSTEM_VIRTUALIZATION_UNAVAILABLE) {
1324 log(DBG, "edenfs is NOT RUNNING\n");
1325 return true;
1328 log(DBG, "edenfs is RUNNING\n");
1329 return false;
1332 bool isProjfs(const w_string& path) {
1333 try {
1334 auto fd =
1335 openFileHandle(path.c_str(), OpenFileHandleOptions::queryFileInfo());
1336 return fd.getReparseTag() == IO_REPARSE_TAG_PROJFS;
1337 } catch (const std::exception&) {
1338 return false;
1342 w_string findEdenFSRoot(w_string_piece root_path) {
1343 w_string path = root_path.asWString();
1344 w_string result = nullptr;
1345 while (true) {
1346 if (isProjfs(path)) {
1347 result = path;
1348 } else {
1349 break;
1352 auto next = path.dirName();
1353 if (next == path) {
1354 return "";
1357 path = next;
1360 return result;
1362 #endif
1364 std::shared_ptr<QueryableView> detectEden(
1365 const w_string& root_path,
1366 const w_string& fstype,
1367 const Configuration& config) {
1368 #ifdef _WIN32
1369 (void)fstype;
1370 auto edenRoot = findEdenFSRoot(root_path);
1371 log(DBG, "detected eden root: ", edenRoot, "\n");
1372 if (!edenRoot) {
1373 throw std::runtime_error(
1374 to<std::string>("Not an Eden clone: ", root_path.view()));
1377 if (isEdenStopped(root_path)) {
1378 throw TerminalWatcherError(to<std::string>(
1379 root_path.view(),
1380 " appears to be an offline EdenFS mount. "
1381 "Try running `edenfsctl start` to bring it back online and "
1382 "then retry your watch"));
1385 #else
1386 if (!is_edenfs_fs_type(fstype) && fstype != "fuse" &&
1387 fstype != "osxfuse_eden" && fstype != "macfuse_eden" &&
1388 fstype != "edenfs_eden") {
1389 // Not an active EdenFS mount. Perhaps it isn't mounted yet?
1390 auto readme = to<std::string>(root_path.view(), "/README_EDEN.txt");
1391 try {
1392 (void)getFileInformation(readme.c_str());
1393 } catch (const std::exception&) {
1394 // We don't really care if the readme doesn't exist or is inaccessible,
1395 // we just wanted to do a best effort check for the readme file.
1396 // If we can't access it, we're still not in a position to treat
1397 // this as an EdenFS mount so record the issue and allow falling
1398 // back to one of the other watchers.
1399 throw std::runtime_error(
1400 to<std::string>(fstype.view(), " is not a FUSE file system"));
1403 // If we get here, then the readme file/symlink exists.
1404 // If the readme exists then this is an offline eden mount.
1405 // We can't watch it using this watcher in its current state,
1406 // and we don't want to allow falling back to inotify as that
1407 // will be horribly slow.
1408 throw TerminalWatcherError(to<std::string>(
1409 root_path.view(),
1410 " appears to be an offline EdenFS mount. "
1411 "Try running `eden doctor` to bring it back online and "
1412 "then retry your watch"));
1415 // Given that the readlink() succeeded, assume this is an Eden mount.
1416 auto edenRoot = readSymbolicLink(
1417 to<std::string>(root_path.view(), "/.eden/root").c_str());
1419 #endif
1420 if (edenRoot != root_path) {
1421 // We aren't at the root of the eden mount.
1422 // Throw a TerminalWatcherError to indicate that the Eden watcher is the
1423 // correct watcher type for this directory (so don't try other watcher
1424 // types), but that it can't be used due to an error.
1425 throw TerminalWatcherError(to<std::string>(
1426 "you may only watch from the root of an eden mount point. "
1427 "Try again using ",
1428 edenRoot));
1431 try {
1432 return std::make_shared<EdenView>(root_path, config);
1433 } catch (const std::exception& exc) {
1434 throw TerminalWatcherError(to<std::string>(
1435 "Failed to initialize eden watcher, and since this is an Eden "
1436 "repo, will not allow falling back to another watcher. Error was: ",
1437 exc.what()));
1441 } // namespace
1443 static WatcherRegistry
1444 reg("eden", detectEden, 100 /* prefer eden above others */);
1445 } // namespace watchman