2 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
9 #include <folly/String.h>
10 #include <folly/futures/Future.h>
11 #include <folly/io/async/AsyncSocket.h>
12 #include <folly/io/async/EventBase.h>
13 #include <folly/io/async/EventBaseManager.h>
14 #include <folly/logging/xlog.h>
15 #include <thrift/lib/cpp2/async/HeaderClientChannel.h>
16 #include <thrift/lib/cpp2/async/PooledRequestChannel.h>
17 #include <thrift/lib/cpp2/async/ReconnectingRequestChannel.h>
18 #include <thrift/lib/cpp2/async/RetryingRequestChannel.h>
19 #include <thrift/lib/cpp2/async/RocketClientChannel.h>
24 #include "eden/fs/service/gen-cpp2/StreamingEdenService.h"
25 #include "watchman/ChildProcess.h"
26 #include "watchman/Errors.h"
27 #include "watchman/LRUCache.h"
28 #include "watchman/QueryableView.h"
29 #include "watchman/ThreadPool.h"
30 #include "watchman/fs/FSDetect.h"
31 #include "watchman/fs/FileDescriptor.h"
32 #include "watchman/query/GlobTree.h"
33 #include "watchman/query/Query.h"
34 #include "watchman/query/QueryContext.h"
35 #include "watchman/query/eval.h"
36 #include "watchman/root/Root.h"
37 #include "watchman/scm/SCM.h"
38 #include "watchman/thirdparty/wildmatch/wildmatch.h"
39 #include "watchman/watcher/Watcher.h"
40 #include "watchman/watcher/WatcherRegistry.h"
42 using apache::thrift::TApplicationException
;
43 using namespace facebook::eden
;
44 using folly::AsyncSocket
;
46 using std::make_unique
;
49 using EdenDtype
= facebook::eden::Dtype
;
50 using watchman::DType
;
52 DType
getDTypeFromEden(EdenDtype dtype
) {
53 // TODO: Eden guarantees that dtypes have consistent values on all platforms,
54 // including Windows. If we made Watchman guarantee that too, this could be
55 // replaced with a static_cast.
58 case EdenDtype::UNKNOWN
:
59 return DType::Unknown
;
66 case EdenDtype::BLOCK
:
68 case EdenDtype::REGULAR
:
69 return DType::Regular
;
71 return DType::Symlink
;
72 case EdenDtype::SOCKET
:
74 case EdenDtype::WHITEOUT
:
75 return DType::Whiteout
;
77 return DType::Unknown
;
80 SyncBehavior
getSyncBehavior() {
81 // Use a no-sync behavior as syncToNow will be called if a synchronization is
82 // necessary which will do the proper synchronization.
83 auto sync
= SyncBehavior
{};
84 sync
.syncTimeoutSeconds() = 0;
96 explicit NameAndDType(const std::string
& name
, DType dtype
= DType::Unknown
)
97 : name(name
), dtype(dtype
) {}
100 /** This is a helper for settling out subscription events.
101 * We have a single instance of the callback object that we schedule
102 * each time we get an update from the eden server. If we are already
103 * scheduled we will cancel it and reschedule it.
105 class SettleCallback
: public folly::HHWheelTimer::Callback
{
107 SettleCallback(folly::EventBase
* eventBase
, std::shared_ptr
<Root
> root
)
108 : eventBase_(eventBase
), root_(std::move(root
)) {}
110 void timeoutExpired() noexcept override
{
112 auto settledPayload
= json_object({{"settled", json_true()}});
113 root_
->unilateralResponses
->enqueue(std::move(settledPayload
));
114 } catch (const std::exception
& exc
) {
116 "error while dispatching settle payload; cancel watch: ",
119 eventBase_
->terminateLoopSoon();
123 void callbackCanceled() noexcept override
{
124 // We must override this because the default is to call timeoutExpired().
125 // We don't want that to happen because we're only canceled in the case
126 // where we want to delay the timeoutExpired() callback.
130 folly::EventBase
* eventBase_
;
131 std::shared_ptr
<Root
> root_
;
134 // Resolve the eden socket; On POSIX systems we use the .eden dir that is
135 // present in every dir of an eden mount to locate the symlink to the socket.
136 // On Windows systems, .eden is only present in the repo root and contains
137 // the toml config file with the path to the socket.
138 std::string
resolveSocketPath(w_string_piece rootPath
) {
140 auto configPath
= to
<std::string
>(rootPath
.view(), "/.eden/config");
141 auto config
= cpptoml::parse_file(configPath
);
143 return *config
->get_qualified_as
<std::string
>("Config.socket");
145 auto path
= to
<std::string
>(rootPath
.view(), "/.eden/socket");
146 // It is important to resolve the link because the path in the eden mount
147 // may exceed the maximum permitted unix domain socket path length.
148 // This is actually how things our in our integration test environment.
149 return readSymbolicLink(path
.c_str()).string();
153 folly::SocketAddress
getEdenSocketAddress(w_string_piece rootPath
) {
154 folly::SocketAddress addr
;
156 auto socketPath
= resolveSocketPath(rootPath
);
157 addr
.setFromPath(to
<std::string
>(socketPath
));
161 /** Create a thrift client that will connect to the eden server associated
162 * with the current user. */
163 std::unique_ptr
<StreamingEdenServiceAsyncClient
> getEdenClient(
164 std::shared_ptr
<apache::thrift::RequestChannel
> channel
) {
165 return make_unique
<StreamingEdenServiceAsyncClient
>(std::move(channel
));
168 class GetJournalPositionCallback
: public folly::HHWheelTimer::Callback
{
170 GetJournalPositionCallback(
171 folly::EventBase
* eventBase
,
172 std::shared_ptr
<apache::thrift::RequestChannel
> thriftChannel
,
173 std::string mountPoint
)
174 : eventBase_
{eventBase
},
175 thriftChannel_
{std::move(thriftChannel
)},
176 mountPoint_
{std::move(mountPoint
)} {}
178 void timeoutExpired() noexcept override
{
180 auto edenClient
= getEdenClient(thriftChannel_
);
182 // Calling getCurrentJournalPosition will allow EdenFS to send new
183 // notification about files changed.
184 JournalPosition journal
;
185 edenClient
->sync_getCurrentJournalPosition(journal
, mountPoint_
);
186 } catch (const std::exception
& exc
) {
188 "error while getting EdenFS's journal position; cancel watch: ",
191 eventBase_
->terminateLoopSoon();
196 folly::EventBase
* eventBase_
;
197 std::shared_ptr
<apache::thrift::RequestChannel
> thriftChannel_
;
198 std::string mountPoint_
;
201 class EdenFileResult
: public FileResult
{
204 const w_string
& rootPath
,
205 std::shared_ptr
<apache::thrift::RequestChannel
> thriftChannel
,
206 const w_string
& fullName
,
207 ClockTicks
* ticks
= nullptr,
209 DType dtype
= DType::Unknown
)
210 : rootPath_(rootPath
),
211 thriftChannel_
{std::move(thriftChannel
)},
214 otime_
.ticks
= ctime_
.ticks
= 0;
215 otime_
.timestamp
= ctime_
.timestamp
= 0;
217 otime_
.ticks
= *ticks
;
219 // the "ctime" in the context of FileResult represents the point
220 // in time that we saw the file transition !exists -> exists.
221 // We don't strictly know the point at which that happened for results
222 // returned from eden, but it will tell us whether that happened in
223 // a given since query window by listing the file in the created files
224 // set. We set the isNew flag in this case. The goal here is to
225 // ensure that the code in query/eval.cpp considers us to be new too,
226 // and that works because we set the created time ticks == the last
227 // change tick. The logic in query/eval.cpp will consider this to
228 // be new because the ctime > lower bound in the since query.
229 // When isNew is not set our ctime tick value is initialized to
230 // zero which always fails that is_new check.
231 ctime_
.ticks
= otime_
.ticks
;
236 std::optional
<FileInformation
> stat() override
{
237 if (!stat_
.has_value()) {
238 accessorNeedsProperties(FileResult::Property::FullFileInformation
);
244 std::optional
<DType
> dtype() override
{
245 // We're using Unknown as the default value to avoid also wrapping
246 // this value up in an Optional in our internal storage.
247 // In theory this is ambiguous, but in practice Eden will never
248 // return Unknown for dtype values so this is safe to use with
250 if (dtype_
!= DType::Unknown
) {
253 if (stat_
.has_value()) {
254 return stat_
->dtype();
256 accessorNeedsProperties(FileResult::Property::FileDType
);
260 std::optional
<size_t> size() override
{
261 if (!stat_
.has_value()) {
262 accessorNeedsProperties(FileResult::Property::Size
);
268 std::optional
<struct timespec
> accessedTime() override
{
269 if (!stat_
.has_value()) {
270 accessorNeedsProperties(FileResult::Property::StatTimeStamps
);
276 std::optional
<struct timespec
> modifiedTime() override
{
277 if (!stat_
.has_value()) {
278 accessorNeedsProperties(FileResult::Property::StatTimeStamps
);
284 std::optional
<struct timespec
> changedTime() override
{
285 if (!stat_
.has_value()) {
286 accessorNeedsProperties(FileResult::Property::StatTimeStamps
);
292 w_string_piece
baseName() override
{
293 return fullName_
.piece().baseName();
296 w_string_piece
dirName() override
{
297 return fullName_
.piece().dirName();
300 void setExists(bool exists
) noexcept
{
303 stat_
= FileInformation::makeDeletedFileInformation();
307 std::optional
<bool> exists() override
{
308 if (!exists_
.has_value()) {
309 accessorNeedsProperties(FileResult::Property::Exists
);
315 std::optional
<w_string
> readLink() override
{
316 if (symlinkTarget_
.has_value()) {
317 return symlinkTarget_
;
319 accessorNeedsProperties(FileResult::Property::SymlinkTarget
);
323 std::optional
<ClockStamp
> ctime() override
{
327 std::optional
<ClockStamp
> otime() override
{
331 std::optional
<FileResult::ContentHash
> getContentSha1() override
{
332 if (!sha1_
.has_value()) {
333 accessorNeedsProperties(FileResult::Property::ContentSha1
);
336 switch (sha1_
->getType()) {
337 // Copy thrift SHA1Result aka (std::string) into
338 // watchman FileResult::ContentHash aka (std::array<uint8_t, 20>)
339 case SHA1Result::Type::sha1
: {
340 auto& hash
= sha1_
->get_sha1();
341 FileResult::ContentHash result
;
342 std::copy(hash
.begin(), hash
.end(), result
.begin());
347 // Thrift error occured
348 case SHA1Result::Type::error
: {
349 auto& err
= sha1_
->get_error();
350 XCHECK(err
.errorCode());
351 throw std::system_error(
352 *err
.errorCode(), std::generic_category(), *err
.message());
355 // Something is wrong with type union
357 throw std::runtime_error(
358 "Unknown thrift data for EdenFileResult::getContentSha1");
362 void batchFetchProperties(
363 const std::vector
<std::unique_ptr
<FileResult
>>& files
) override
{
364 std::vector
<EdenFileResult
*> getFileInformationFiles
;
365 std::vector
<std::string
> getFileInformationNames
;
366 // If only dtype and exists are needed, Eden has a cheaper API for
368 bool onlyEntryInfoNeeded
= true;
370 std::vector
<EdenFileResult
*> getShaFiles
;
371 std::vector
<std::string
> getShaNames
;
373 std::vector
<EdenFileResult
*> getSymlinkFiles
;
375 for (auto& f
: files
) {
376 auto& edenFile
= dynamic_cast<EdenFileResult
&>(*f
);
378 auto relName
= edenFile
.fullName_
.piece();
380 if (rootPath_
== edenFile
.fullName_
) {
381 // The root tree inode has changed
384 // Strip off the mount point prefix for the names we're going
385 // to pass to eden. The +1 is its trailing slash.
386 relName
.advance(rootPath_
.size() + 1);
389 if (edenFile
.neededProperties() & FileResult::Property::SymlinkTarget
) {
390 // We need to know if the node is a symlink
391 edenFile
.accessorNeedsProperties(FileResult::Property::FileDType
);
393 getSymlinkFiles
.emplace_back(&edenFile
);
396 if (edenFile
.neededProperties() &
397 (FileResult::Property::FileDType
| FileResult::Property::CTime
|
398 FileResult::Property::OTime
| FileResult::Property::Exists
|
399 FileResult::Property::Size
| FileResult::Property::StatTimeStamps
|
400 FileResult::Property::FullFileInformation
)) {
401 getFileInformationFiles
.emplace_back(&edenFile
);
402 getFileInformationNames
.emplace_back(relName
.data(), relName
.size());
404 if (edenFile
.neededProperties() &
405 ~(FileResult::Property::FileDType
| FileResult::Property::Exists
)) {
406 // We could maintain two lists and call both getFileInformation and
407 // getEntryInformation in parallel, but in practice the set of
408 // properties should usually be the same across all files.
409 onlyEntryInfoNeeded
= false;
413 if (edenFile
.neededProperties() & FileResult::Property::ContentSha1
) {
414 getShaFiles
.emplace_back(&edenFile
);
415 getShaNames
.emplace_back(relName
.data(), relName
.size());
418 // If we were to throw later in this method, we will have forgotten
419 // the input set of properties, but it is ok: if we do decide to
420 // re-evaluate after throwing, the accessors will set the mask up
421 // accordingly and we'll end up calling back in here if needed.
422 edenFile
.clearNeededProperties();
425 auto client
= getEdenClient(thriftChannel_
);
429 getFileInformationNames
,
430 getFileInformationFiles
,
431 onlyEntryInfoNeeded
);
433 // TODO: add eden bulk readlink call
434 loadSymlinkTargets(client
.get(), getSymlinkFiles
);
436 if (!getShaFiles
.empty()) {
437 std::vector
<SHA1Result
> sha1s
;
438 client
->sync_getSHA1(
439 sha1s
, std::string
{rootPath_
.view()}, getShaNames
, getSyncBehavior());
441 if (sha1s
.size() != getShaFiles
.size()) {
443 "Requested SHA-1 of ",
445 " but Eden returned ",
447 " results -- ignoring");
449 auto sha1Iter
= sha1s
.begin();
450 for (auto& edenFile
: getShaFiles
) {
451 edenFile
->sha1_
= *sha1Iter
++;
459 std::shared_ptr
<apache::thrift::RequestChannel
> thriftChannel_
;
461 std::optional
<FileInformation
> stat_
;
462 std::optional
<bool> exists_
;
465 std::optional
<SHA1Result
> sha1_
;
466 std::optional
<w_string
> symlinkTarget_
;
467 DType dtype_
{DType::Unknown
};
469 // Read the symlink targets for each of the provided `files`. The files
470 // had SymlinkTarget set in neededProperties prior to clearing it in
471 // the batchFetchProperties() method that calls us, so we know that
472 // we unconditionally need to read these links.
473 static void loadSymlinkTargets(
474 StreamingEdenServiceAsyncClient
*,
475 const std::vector
<EdenFileResult
*>& files
) {
476 for (auto& edenFile
: files
) {
477 if (!edenFile
->stat_
->isSymlink()) {
478 // If this file is not a symlink then we immediately yield
479 // a nullptr w_string instance rather than propagating an error.
480 // This behavior is relied upon by the field rendering code and
481 // checked in test_symlink.py.
482 edenFile
->symlinkTarget_
= w_string();
485 edenFile
->symlinkTarget_
= readSymbolicLink(edenFile
->fullName_
.c_str());
489 static void loadFileInformation(
490 StreamingEdenServiceAsyncClient
* client
,
491 const w_string
& rootPath
,
492 const std::vector
<std::string
>& names
,
493 const std::vector
<EdenFileResult
*>& outFiles
,
494 bool onlyEntryInfoNeeded
) {
496 names
.size() == outFiles
.size(), "names.size must == outFiles.size");
501 auto applyResults
= [&](const auto& edenInfo
) {
502 if (names
.size() != edenInfo
.size()) {
504 "Requested file information of ",
506 " files but Eden returned information for ",
508 " files. Treating missing entries as missing files.");
511 auto infoIter
= edenInfo
.begin();
512 for (auto& edenFileResult
: outFiles
) {
513 if (infoIter
== edenInfo
.end()) {
514 edenFileResult
->setExists(false);
516 edenFileResult
->applyInformationOrError(*infoIter
);
522 if (onlyEntryInfoNeeded
) {
523 std::vector
<EntryInformationOrError
> info
;
525 client
->sync_getEntryInformation(
526 info
, std::string
{rootPath
.view()}, names
, getSyncBehavior());
529 } catch (const TApplicationException
& ex
) {
530 if (TApplicationException::UNKNOWN_METHOD
!= ex
.getType()) {
533 // getEntryInformation is not available in this version of
534 // Eden. Fall back to the older, more expensive
535 // getFileInformation below.
539 std::vector
<FileInformationOrError
> info
;
540 client
->sync_getFileInformation(
541 info
, std::string
{rootPath
.view()}, names
, getSyncBehavior());
545 void applyInformationOrError(const EntryInformationOrError
& infoOrErr
) {
546 if (infoOrErr
.getType() == EntryInformationOrError::Type::info
) {
547 dtype_
= getDTypeFromEden(*infoOrErr
.get_info().dtype());
554 void applyInformationOrError(const FileInformationOrError
& infoOrErr
) {
555 if (infoOrErr
.getType() == FileInformationOrError::Type::info
) {
556 FileInformation stat
;
558 stat
.size
= *infoOrErr
.get_info().size();
559 stat
.mode
= *infoOrErr
.get_info().mode();
560 stat
.mtime
.tv_sec
= *infoOrErr
.get_info().mtime()->seconds();
561 stat
.mtime
.tv_nsec
= *infoOrErr
.get_info().mtime()->nanoSeconds();
563 stat_
= std::move(stat
);
571 static std::string
escapeGlobSpecialChars(w_string_piece str
) {
574 for (size_t i
= 0; i
< str
.size(); ++i
) {
585 result
.append(&c
, 1);
591 /** filter out paths that are ignored or that are not part of the
592 * relative_root restriction in a query.
593 * Ideally we'd pass this information into eden so that it doesn't
594 * have to walk those paths and return the data to us, but for the
595 * moment we have to filter it out of the results.
596 * We need to respect the ignore_dirs configuration setting and
597 * also remove anything that doesn't match the relative_root constraint
599 void filterOutPaths(std::vector
<NameAndDType
>& files
, QueryContext
* ctx
) {
604 [ctx
](const NameAndDType
& item
) {
605 auto full
= w_string::pathCat({ctx
->root
->root_path
, item
.name
});
607 if (!ctx
->fileMatchesRelativeRoot(full
)) {
608 // Not in the desired area, so filter it out
612 return ctx
->root
->ignore
.isIgnored(full
.data(), full
.size());
617 void appendGlobResultToNameAndDTypeVec(
618 std::vector
<NameAndDType
>& results
,
621 size_t numDTypes
= glob
.get_dtypes().size();
623 for (auto& name
: glob
.get_matchingFiles()) {
624 // The server may not support dtypes, so this list may be empty.
625 // This cast is OK because eden returns the system dependent bits to us, and
626 // our DType enum is declared in terms of those bits
627 auto dtype
= i
< numDTypes
? static_cast<DType
>(glob
.get_dtypes()[i
])
629 results
.emplace_back(name
, dtype
);
634 /** Returns the files that match the glob. */
635 std::vector
<NameAndDType
> globNameAndDType(
636 StreamingEdenServiceAsyncClient
* client
,
637 const std::string
& mountPoint
,
638 const std::vector
<std::string
>& globPatterns
,
639 bool includeDotfiles
,
640 bool splitGlobPattern
= false) {
641 // TODO(xavierd): Once the config: "eden_split_glob_pattern" is rolled out
642 // everywhere, remove this code.
643 if (splitGlobPattern
&& globPatterns
.size() > 1) {
644 folly::DrivableExecutor
* executor
=
645 folly::EventBaseManager::get()->getEventBase();
647 std::vector
<folly::Future
<Glob
>> globFutures
;
648 globFutures
.reserve(globPatterns
.size());
649 for (const std::string
& globPattern
: globPatterns
) {
651 params
.mountPoint() = mountPoint
;
652 params
.globs() = std::vector
<std::string
>{globPattern
};
653 params
.includeDotfiles() = includeDotfiles
;
654 params
.wantDtype() = true;
655 params
.sync() = getSyncBehavior();
657 globFutures
.emplace_back(
658 client
->semifuture_globFiles(params
).via(executor
));
661 std::vector
<NameAndDType
> allResults
;
662 for (folly::Future
<Glob
>& globFuture
: globFutures
) {
663 appendGlobResultToNameAndDTypeVec(
664 allResults
, std::move(globFuture
).getVia(executor
));
669 params
.mountPoint() = mountPoint
;
670 params
.globs() = globPatterns
;
671 params
.includeDotfiles() = includeDotfiles
;
672 params
.wantDtype() = true;
673 params
.sync() = getSyncBehavior();
676 client
->sync_globFiles(glob
, params
);
677 std::vector
<NameAndDType
> result
;
678 appendGlobResultToNameAndDTypeVec(result
, std::move(glob
));
686 * Construct a pooled Thrift channel that will automatically reconnect to
689 std::shared_ptr
<apache::thrift::RequestChannel
> makeThriftChannel(
692 auto channel
= apache::thrift::PooledRequestChannel::newChannel(
693 folly::EventBaseManager::get()->getEventBase(),
694 folly::getUnsafeMutableGlobalIOExecutor(),
695 [numRetries
, rootPath
= std::move(rootPath
)](folly::EventBase
& eb
) {
696 return apache::thrift::RetryingRequestChannel::newChannel(
699 apache::thrift::ReconnectingRequestChannel::newChannel(
700 eb
, [rootPath
](folly::EventBase
& eb
) {
701 return apache::thrift::RocketClientChannel::newChannel(
702 AsyncSocket::newSocket(
703 &eb
, getEdenSocketAddress(rootPath
)));
711 class EdenView final
: public QueryableView
{
713 explicit EdenView(const w_string
& root_path
, const Configuration
& config
)
714 : QueryableView
{root_path
, /*requiresCrawl=*/false},
715 rootPath_(root_path
),
716 thriftChannel_(makeThriftChannel(
718 config
.getInt("eden_retry_connection_count", 3))),
719 mountPoint_(root_path
.string()),
720 splitGlobPattern_(config
.getBool("eden_split_glob_pattern", false)),
721 thresholdForFreshInstance_(config
.getInt(
722 "eden_file_count_threshold_for_fresh_instance",
725 void timeGenerator(const Query
* /*query*/, QueryContext
* ctx
) const override
{
726 ctx
->generationStarted();
728 if (ctx
->since
.is_timestamp()) {
729 throw QueryExecError(
730 "timestamp based since queries are not supported with eden");
733 auto allFilesResult
= getAllChangesSince(ctx
);
734 auto resultTicks
= allFilesResult
.ticks
;
735 auto& fileInfo
= allFilesResult
.fileInfo
;
736 // We use the list of created files to synthesize the "new" field
737 // in the file results
738 auto& createdFileNames
= allFilesResult
.createdFileNames
;
740 // Filter out any ignored files
741 filterOutPaths(fileInfo
, ctx
);
743 auto isFreshInstance
= ctx
->since
.is_fresh_instance();
744 for (auto& item
: fileInfo
) {
745 // a file is considered new if it was present in the created files
746 // set returned from eden.
747 bool isNew
= createdFileNames
.find(item
.name
) != createdFileNames
.end();
749 auto file
= make_unique
<EdenFileResult
>(
752 w_string::pathCat({mountPoint_
, item
.name
}),
757 if (isFreshInstance
) {
758 // Fresh instance queries only return data about files
759 // that currently exist, and we know this to be true
760 // here because our list of files comes from evaluating
762 file
->setExists(true);
765 w_query_process_file(ctx
->query
, ctx
, std::move(file
));
768 ctx
->bumpNumWalked(fileInfo
.size());
771 folly::SemiFuture
<folly::Unit
> waitForSettle(
772 std::chrono::milliseconds
/*settle_period*/) override
{
773 // We could implement this feature for EdenFS, but since the
774 // Watchman-EdenFS integration is correct and waitForSettle is a workaround
775 // for broken filesystem notification APIs, do nothing for now.
779 CookieSync::SyncResult
syncToNow(
780 const std::shared_ptr
<Root
>& root
,
781 std::chrono::milliseconds timeout
) override
{
783 return sync(root
).get(timeout
);
784 } catch (const folly::FutureTimeout
& ex
) {
785 throw std::system_error(ETIMEDOUT
, std::generic_category(), ex
.what());
790 folly::SemiFuture
<CookieSync::SyncResult
> sync(
791 const std::shared_ptr
<Root
>&) override
{
792 return folly::makeSemiFutureWith([this]() {
793 // Set an unlimited timeout. The caller is responsible for using a
794 // timeout to bound the time spent in this method.
795 facebook::eden::SyncBehavior sync
;
796 sync
.syncTimeoutSeconds() = -1;
798 facebook::eden::SynchronizeWorkingCopyParams params
;
799 params
.sync() = sync
;
801 auto client
= getEdenClient(thriftChannel_
);
802 return client
->semifuture_synchronizeWorkingCopy(
803 mountPoint_
, params
);
805 .defer([](folly::Try
<folly::Unit
> try_
) {
806 if (try_
.hasException()) {
808 try_
.tryGetExceptionObject
<TApplicationException
>()) {
809 if (exc
->getType() == TApplicationException::UNKNOWN_METHOD
) {
810 return folly::Try
{CookieSync::SyncResult
{}};
813 return folly::Try
<CookieSync::SyncResult
>{
814 std::move(try_
.exception())};
816 return folly::Try
{CookieSync::SyncResult
{}};
820 void executeGlobBasedQuery(
821 const std::vector
<std::string
>& globStrings
,
824 bool includeDir
= true) const {
825 auto client
= getEdenClient(thriftChannel_
);
827 auto includeDotfiles
= (query
->glob_flags
& WM_PERIOD
) == 0;
828 auto fileInfo
= globNameAndDType(
835 // Filter out any ignored files
836 filterOutPaths(fileInfo
, ctx
);
838 for (auto& item
: fileInfo
) {
839 auto file
= make_unique
<EdenFileResult
>(
842 w_string::pathCat({mountPoint_
, item
.name
}),
847 // The results of a glob are known to exist
848 file
->setExists(true);
850 // Skip processing directories
851 if (!includeDir
&& item
.dtype
== DType::Dir
) {
855 w_query_process_file(ctx
->query
, ctx
, std::move(file
));
858 ctx
->bumpNumWalked(fileInfo
.size());
861 // Helper for computing a relative path prefix piece.
862 // The returned piece is owned by the supplied context object!
863 w_string_piece
computeRelativePathPiece(QueryContext
* ctx
) const {
865 if (ctx
->query
->relative_root
) {
866 rel
= ctx
->query
->relative_root
;
867 rel
.advance(ctx
->root
->root_path
.size() + 1);
872 /** Walks files that match the supplied set of paths */
873 void pathGenerator(const Query
* query
, QueryContext
* ctx
) const override
{
874 ctx
->generationStarted();
875 // If the query is anchored to a relative_root, use that that
876 // avoid sucking down a massive list of files from eden
877 auto rel
= computeRelativePathPiece(ctx
);
879 std::vector
<std::string
> globStrings
;
880 // Translate the path list into a list of globs
881 for (auto& path
: *query
->paths
) {
882 if (path
.depth
> 0) {
883 // We don't have an easy way to express depth constraints
884 // in the existing glob API, so we just punt for the moment.
885 // I believe that this sort of query is quite rare anyway.
886 throw QueryExecError(
887 "the eden watcher only supports depth 0 or depth -1");
889 // -1 depth is infinite which we can translate to a recursive
890 // glob. 0 depth is direct descendant which we can translate
891 // to a simple * wildcard.
892 auto glob
= path
.depth
== -1 ? "**/*" : "*";
894 globStrings
.emplace_back(std::string
{
895 w_string::pathCat({rel
, escapeGlobSpecialChars(path
.name
), glob
})
898 executeGlobBasedQuery(globStrings
, query
, ctx
);
900 // We send another round of glob queries to query about the information
901 // about the path themselves since we want to include the paths if they are
903 // TODO(zeyi): replace this with builtin path generator inside EdenFS
905 for (auto& path
: *query
->paths
) {
906 globStrings
.emplace_back(std::string
{
907 w_string::pathCat({rel
, escapeGlobSpecialChars(path
.name
)}).view()});
910 executeGlobBasedQuery(globStrings
, query
, ctx
, false);
913 void globGenerator(const Query
* query
, QueryContext
* ctx
) const override
{
914 if (!query
->glob_tree
) {
915 // If we are called via the codepath in the query evaluator that
916 // just speculatively executes queries then `glob` may not be
917 // present; short-circuit in that case.
921 ctx
->generationStarted();
922 // If the query is anchored to a relative_root, use that that
923 // avoid sucking down a massive list of files from eden
924 auto rel
= computeRelativePathPiece(ctx
);
926 std::vector
<std::string
> globStrings
;
927 for (auto& glob
: query
->glob_tree
->unparse()) {
928 globStrings
.emplace_back(
929 std::string
{w_string::pathCat({rel
, glob
}).view()});
932 // More glob flags/functionality:
933 auto noescape
= bool(query
->glob_flags
& WM_NOESCAPE
);
935 throw QueryExecError(
936 "glob_noescape is not supported for the eden watcher");
938 executeGlobBasedQuery(globStrings
, query
, ctx
);
941 void allFilesGenerator(const Query
* query
, QueryContext
* ctx
) const override
{
942 ctx
->generationStarted();
943 // If the query is anchored to a relative_root, use that that
944 // avoid sucking down a massive list of files from eden
945 std::string globPattern
;
946 auto rel
= computeRelativePathPiece(ctx
);
947 if (rel
.size() > 0) {
948 globPattern
.append(rel
.data(), rel
.size());
949 globPattern
.append("/");
951 globPattern
.append("**");
952 executeGlobBasedQuery(std::vector
<std::string
>{globPattern
}, query
, ctx
);
955 ClockPosition
getMostRecentRootNumberAndTickValue() const override
{
956 auto client
= getEdenClient(thriftChannel_
);
957 JournalPosition position
;
958 client
->sync_getCurrentJournalPosition(position
, mountPoint_
);
959 return ClockPosition(
960 *position
.mountGeneration(), *position
.sequenceNumber());
963 w_string
getCurrentClockString() const override
{
964 return getMostRecentRootNumberAndTickValue().toClockString();
967 bool doAnyOfTheseFilesExist(
968 const std::vector
<w_string
>& /*fileNames*/) const override
{
972 void startThreads(const std::shared_ptr
<Root
>& root
) override
{
973 auto self
= shared_from_this();
974 std::thread
thr([self
, this, root
]() { subscriberThread(root
); });
978 void stopThreads() override
{
979 subscriberEventBase_
.terminateLoopSoon();
982 json_ref
getWatcherDebugInfo() const override
{
986 void clearWatcherDebugInfo() override
{}
988 using EdenFSSubcription
=
989 apache::thrift::ClientBufferedStream
<JournalPosition
>::Subscription
;
991 EdenFSSubcription
rocketSubscribe(
992 std::shared_ptr
<Root
> root
,
993 SettleCallback
& settleCallback
,
994 GetJournalPositionCallback
& getJournalPositionCallback
,
995 std::chrono::milliseconds settleTimeout
) {
996 auto client
= getEdenClient(thriftChannel_
);
997 auto stream
= client
->sync_subscribeStreamTemporary(
998 std::string(root
->root_path
.data(), root
->root_path
.size()));
999 return std::move(stream
).subscribeExTry(
1000 &subscriberEventBase_
,
1002 &getJournalPositionCallback
,
1005 settleTimeout
](folly::Try
<JournalPosition
>&& t
) {
1008 log(DBG
, "Got subscription push from eden\n");
1009 if (settleCallback
.isScheduled()) {
1010 log(DBG
, "reschedule settle timeout\n");
1011 settleCallback
.cancelTimeout();
1013 subscriberEventBase_
.timer().scheduleTimeout(
1014 &settleCallback
, settleTimeout
);
1016 // For bursty writes to the working copy, let's limit the
1017 // amount of notification that Watchman receives by
1018 // scheduling a getCurrentJournalPosition call in the future.
1020 // Thus, we're guarantee to only receive one notification per
1021 // settleTimeout/2 and no more, regardless of how much
1022 // writing is done in the repository.
1023 subscriberEventBase_
.timer().scheduleTimeout(
1024 &getJournalPositionCallback
, settleTimeout
/ 2);
1025 } catch (const std::exception
& exc
) {
1027 "Exception while processing eden subscription: ",
1029 ": cancel watch\n");
1030 subscriberEventBase_
.terminateLoopSoon();
1033 auto reason
= t
.hasException()
1034 ? folly::exceptionStr(std::move(t
.exception()))
1035 : "controlled shutdown";
1037 "subscription stream ended: ",
1038 w_string_piece(reason
.data(), reason
.size()),
1039 ", cancel watch\n");
1040 // We won't be called again, but we terminate the loop
1041 // just to make sure.
1042 subscriberEventBase_
.terminateLoopSoon();
1047 // This is the thread that we use to listen to the stream of
1048 // changes coming in from the EdenFS server.
1049 void subscriberThread(std::shared_ptr
<Root
> root
) noexcept
{
1051 // ensure that the root gets torn down,
1052 // otherwise we'd leave it in a broken state.
1056 w_set_thread_name("edensub ", root
->root_path
.view());
1057 log(DBG
, "Started subscription thread\n");
1059 std::optional
<EdenFSSubcription
> subscription
;
1061 if (subscription
.has_value()) {
1062 subscription
->cancel();
1063 std::move(*subscription
).join();
1068 // Prepare the callback
1069 SettleCallback settleCallback
{&subscriberEventBase_
, root
};
1070 GetJournalPositionCallback getJournalPositionCallback
{
1071 &subscriberEventBase_
, thriftChannel_
, mountPoint_
};
1072 // Figure out the correct value for settling
1073 std::chrono::milliseconds
settleTimeout(root
->trigger_settle
);
1075 subscription
= rocketSubscribe(
1076 root
, settleCallback
, getJournalPositionCallback
, settleTimeout
);
1078 // This will run until the stream ends
1079 log(DBG
, "Started subscription thread loop\n");
1080 subscribeReadyPromise_
.setValue();
1081 subscriberEventBase_
.loop();
1083 } catch (const std::exception
& exc
) {
1085 "uncaught exception in subscription thread, cancel watch:",
1091 const w_string
& getName() const override
{
1092 static w_string
name("eden");
1096 folly::SemiFuture
<folly::Unit
> waitUntilReadyToQuery() override
{
1097 return subscribeReadyPromise_
.getSemiFuture();
1102 * Returns all the files in the watched directory for a fresh instance.
1104 * In the case where the query specifically ask for an empty file list on a
1105 * fresh instance, an empty vector will be returned.
1107 std::vector
<NameAndDType
> getAllFilesForFreshInstance(
1108 QueryContext
* ctx
) const {
1109 if (ctx
->query
->empty_on_fresh_instance
) {
1110 // Avoid a full tree walk if we don't need it!
1111 return std::vector
<NameAndDType
>();
1114 std::string globPattern
;
1115 if (ctx
->query
->relative_root
) {
1116 w_string_piece
rel(ctx
->query
->relative_root
);
1117 rel
.advance(ctx
->root
->root_path
.size() + 1);
1118 globPattern
.append(rel
.data(), rel
.size());
1119 globPattern
.append("/");
1121 globPattern
.append("**");
1123 auto includeDotfiles
= (ctx
->query
->glob_flags
& WM_PERIOD
) == 0;
1125 auto client
= getEdenClient(thriftChannel_
);
1126 return globNameAndDType(
1129 std::vector
<std::string
>{std::move(globPattern
)},
1133 struct GetAllChangesSinceResult
{
1135 std::vector
<NameAndDType
> fileInfo
;
1136 std::unordered_set
<std::string
> createdFileNames
;
1140 * Build a GetAllChangesSinceResult for a fresh instance.
1142 GetAllChangesSinceResult
makeFreshInstance(QueryContext
* ctx
) const {
1143 GetAllChangesSinceResult result
;
1145 ctx
->since
.set_fresh_instance();
1146 result
.ticks
= ctx
->clockAtStartOfQuery
.position().ticks
;
1147 result
.fileInfo
= getAllFilesForFreshInstance(ctx
);
1152 GetAllChangesSinceResult
getAllChangesSinceStreaming(
1153 QueryContext
* ctx
) const {
1154 JournalPosition position
;
1155 position
.mountGeneration() = ctx
->clockAtStartOfQuery
.position().rootNumber
;
1156 // dial back to the sequence number from the query
1157 position
.sequenceNumber() =
1158 std::get
<QuerySince::Clock
>(ctx
->since
.since
).ticks
;
1160 StreamChangesSinceParams params
;
1161 params
.mountPoint() = mountPoint_
;
1162 params
.fromPosition() = position
;
1164 auto client
= getEdenClient(thriftChannel_
);
1165 auto [resultChangesSince
, stream
] = client
->sync_streamChangesSince(params
);
1167 GetAllChangesSinceResult result
;
1168 result
.ticks
= *resultChangesSince
.toPosition()->sequenceNumber();
1173 std::unordered_map
<std::string
, int> byFile
;
1174 std::unordered_map
<std::string
, EdenDtype
> dtypes
;
1176 std::move(stream
).subscribeInline(
1177 [&](folly::Try
<ChangedFileResult
>&& changeTry
) mutable {
1178 if (changeTry
.hasException()) {
1181 folly::exceptionStr(changeTry
.exception()),
1183 result
= makeFreshInstance(ctx
);
1187 if (!changeTry
.hasValue()) {
1188 // End of the stream.
1192 const auto& change
= changeTry
.value();
1193 auto& name
= *change
.name();
1195 // Changes needs to be deduplicated so a file that was added and then
1196 // removed is reported as MODIFIED.
1197 switch (*change
.status()) {
1198 case ScmFileStatus::ADDED
:
1201 case ScmFileStatus::MODIFIED
:
1204 case ScmFileStatus::REMOVED
:
1207 case ScmFileStatus::IGNORED
:
1211 auto dtype
= *change
.dtype();
1212 auto [element
, inserted
] = dtypes
.emplace(name
, dtype
);
1213 if (!inserted
&& element
->second
!= dtype
) {
1214 // Due to streamChangesSince not providing any ordering guarantee,
1215 // Watchman can't tell what DType a file has in the case where it
1216 // changed. Thus let's fallback to an UNKNOWN type, and Watchman
1217 // will later query the actual DType from EdenFS.
1218 element
->second
= EdenDtype::UNKNOWN
;
1221 // Engineers usually don't work on a thousands of files, but on an
1222 // giant monorepo, the set of files changed in between 2 revisions
1223 // can be very large, and continuing down this route would force
1224 // Watchman to fetch metadata about a ton of files, causing delay in
1225 // answering the query and large amount of network traffic.
1227 // On these monorepos, tools also set the empty_on_fresh_instance
1228 // flag, thus we can simply pretend to return a fresh instance and an
1229 // empty fileInfo list.
1230 if (thresholdForFreshInstance_
!= 0 &&
1231 byFile
.size() > thresholdForFreshInstance_
&&
1232 ctx
->query
->empty_on_fresh_instance
) {
1233 result
= makeFreshInstance(ctx
);
1240 for (auto& [name
, count
] : byFile
) {
1241 result
.fileInfo
.emplace_back(name
, getDTypeFromEden(dtypes
[name
]));
1243 result
.createdFileNames
.emplace(name
);
1251 * Compute and return all the changes that occured since the last call.
1253 * On error, or when thresholdForFreshInstance_ is exceeded, the clock will
1254 * be modified to indicate a fresh instance and an empty set of files will be
1257 GetAllChangesSinceResult
getAllChangesSince(QueryContext
* ctx
) const {
1258 if (ctx
->since
.is_fresh_instance()) {
1259 // Earlier in the processing flow, we decided that the rootNumber
1260 // didn't match the current root which means that eden was restarted.
1261 // We need to translate this to a fresh instance result set and
1262 // return a list of all possible matching files.
1263 return makeFreshInstance(ctx
);
1267 return getAllChangesSinceStreaming(ctx
);
1268 } catch (const EdenError
& err
) {
1269 // ERANGE: mountGeneration differs
1270 // EDOM: journal was truncated.
1271 // For other situations we let the error propagate.
1272 XCHECK(err
.errorCode());
1273 if (*err
.errorCode() != ERANGE
&& *err
.errorCode() != EDOM
) {
1276 // mountGeneration differs, or journal was truncated,
1277 // so treat this as equivalent to a fresh instance result
1278 return makeFreshInstance(ctx
);
1279 } catch (const SCMError
& err
) {
1280 // Most likely this means a checkout occurred but we encountered
1281 // an error trying to get the list of files changed between the two
1282 // commits. Generate a fresh instance result since we were unable
1283 // to compute the list of files changed.
1285 "SCM error while processing EdenFS journal update: ",
1288 return makeFreshInstance(ctx
);
1293 std::shared_ptr
<apache::thrift::RequestChannel
> thriftChannel_
;
1294 folly::EventBase subscriberEventBase_
;
1295 std::string mountPoint_
;
1296 folly::SharedPromise
<folly::Unit
> subscribeReadyPromise_
;
1297 bool splitGlobPattern_
;
1298 unsigned int thresholdForFreshInstance_
;
1302 // Test if EdenFS is stopped for the given path.
1303 bool isEdenStopped(w_string root
) {
1304 static const w_string_piece kStar
{"*"};
1305 static const w_string_piece kNonExistencePath
{"EDEN_TEST_NON_EXISTENCE_PATH"};
1306 auto queryRaw
= w_string::pathCat({root
, kNonExistencePath
, kStar
});
1307 auto query
= queryRaw
.normalizeSeparators();
1308 std::wstring wquery
= query
.piece().asWideUNC();
1309 WIN32_FIND_DATAW ffd
;
1311 auto find
= FindFirstFileW(wquery
.c_str(), &ffd
);
1313 if (find
!= INVALID_HANDLE_VALUE
) {
1318 auto lastError
= GetLastError();
1320 // When EdenFS is not running, `FindFirstFile` will fail with this error
1321 // since it can't reach EdenFS to query directory information.
1322 if (find
== INVALID_HANDLE_VALUE
&&
1323 lastError
== ERROR_FILE_SYSTEM_VIRTUALIZATION_UNAVAILABLE
) {
1324 log(DBG
, "edenfs is NOT RUNNING\n");
1328 log(DBG
, "edenfs is RUNNING\n");
1332 bool isProjfs(const w_string
& path
) {
1335 openFileHandle(path
.c_str(), OpenFileHandleOptions::queryFileInfo());
1336 return fd
.getReparseTag() == IO_REPARSE_TAG_PROJFS
;
1337 } catch (const std::exception
&) {
1342 w_string
findEdenFSRoot(w_string_piece root_path
) {
1343 w_string path
= root_path
.asWString();
1344 w_string result
= nullptr;
1346 if (isProjfs(path
)) {
1352 auto next
= path
.dirName();
1364 std::shared_ptr
<QueryableView
> detectEden(
1365 const w_string
& root_path
,
1366 const w_string
& fstype
,
1367 const Configuration
& config
) {
1370 auto edenRoot
= findEdenFSRoot(root_path
);
1371 log(DBG
, "detected eden root: ", edenRoot
, "\n");
1373 throw std::runtime_error(
1374 to
<std::string
>("Not an Eden clone: ", root_path
.view()));
1377 if (isEdenStopped(root_path
)) {
1378 throw TerminalWatcherError(to
<std::string
>(
1380 " appears to be an offline EdenFS mount. "
1381 "Try running `edenfsctl start` to bring it back online and "
1382 "then retry your watch"));
1386 if (!is_edenfs_fs_type(fstype
) && fstype
!= "fuse" &&
1387 fstype
!= "osxfuse_eden" && fstype
!= "macfuse_eden" &&
1388 fstype
!= "edenfs_eden") {
1389 // Not an active EdenFS mount. Perhaps it isn't mounted yet?
1390 auto readme
= to
<std::string
>(root_path
.view(), "/README_EDEN.txt");
1392 (void)getFileInformation(readme
.c_str());
1393 } catch (const std::exception
&) {
1394 // We don't really care if the readme doesn't exist or is inaccessible,
1395 // we just wanted to do a best effort check for the readme file.
1396 // If we can't access it, we're still not in a position to treat
1397 // this as an EdenFS mount so record the issue and allow falling
1398 // back to one of the other watchers.
1399 throw std::runtime_error(
1400 to
<std::string
>(fstype
.view(), " is not a FUSE file system"));
1403 // If we get here, then the readme file/symlink exists.
1404 // If the readme exists then this is an offline eden mount.
1405 // We can't watch it using this watcher in its current state,
1406 // and we don't want to allow falling back to inotify as that
1407 // will be horribly slow.
1408 throw TerminalWatcherError(to
<std::string
>(
1410 " appears to be an offline EdenFS mount. "
1411 "Try running `eden doctor` to bring it back online and "
1412 "then retry your watch"));
1415 // Given that the readlink() succeeded, assume this is an Eden mount.
1416 auto edenRoot
= readSymbolicLink(
1417 to
<std::string
>(root_path
.view(), "/.eden/root").c_str());
1420 if (edenRoot
!= root_path
) {
1421 // We aren't at the root of the eden mount.
1422 // Throw a TerminalWatcherError to indicate that the Eden watcher is the
1423 // correct watcher type for this directory (so don't try other watcher
1424 // types), but that it can't be used due to an error.
1425 throw TerminalWatcherError(to
<std::string
>(
1426 "you may only watch from the root of an eden mount point. "
1432 return std::make_shared
<EdenView
>(root_path
, config
);
1433 } catch (const std::exception
& exc
) {
1434 throw TerminalWatcherError(to
<std::string
>(
1435 "Failed to initialize eden watcher, and since this is an Eden "
1436 "repo, will not allow falling back to another watcher. Error was: ",
1443 static WatcherRegistry
1444 reg("eden", detectEden
, 100 /* prefer eden above others */);
1445 } // namespace watchman