builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "abspath.h"
   3 #include "config.h"
   4 #include "environment.h"
   5 #include "gettext.h"
   6 #include "parse-options.h"
   7 #include "fsmonitor-ll.h"
   8 #include "fsmonitor-ipc.h"
   9 #include "fsmonitor-path-utils.h"
  10 #include "fsmonitor-settings.h"
  11 #include "compat/fsmonitor/fsm-health.h"
  12 #include "compat/fsmonitor/fsm-listen.h"
  13 #include "fsmonitor--daemon.h"
  14 #include "simple-ipc.h"
  15 #include "khash.h"
  16 #include "pkt-line.h"
  17 #include "trace.h"
  18 #include "trace2.h"
  19
  20 static const char * const builtin_fsmonitor__daemon_usage[] = {
  21         N_("git fsmonitor--daemon start [<options>]"),
  22         N_("git fsmonitor--daemon run [<options>]"),
  23         "git fsmonitor--daemon stop",
  24         "git fsmonitor--daemon status",
  25         NULL
  26 };
  27
  28 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  29 /*
  30  * Global state loaded from config.
  31  */
  32 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  33 static int fsmonitor__ipc_threads = 8;
  34
  35 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  36 static int fsmonitor__start_timeout_sec = 60;
  37
  38 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  39 static int fsmonitor__announce_startup = 0;
  40
  41 static int fsmonitor_config(const char *var, const char *value,
  42                             const struct config_context *ctx, void *cb)
  43 {
  44         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  45                 int i = git_config_int(var, value, ctx->kvi);
  46                 if (i < 1)
  47                         return error(_("value of '%s' out of range: %d"),
  48                                      FSMONITOR__IPC_THREADS, i);
  49                 fsmonitor__ipc_threads = i;
  50                 return 0;
  51         }
  52
  53         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  54                 int i = git_config_int(var, value, ctx->kvi);
  55                 if (i < 0)
  56                         return error(_("value of '%s' out of range: %d"),
  57                                      FSMONITOR__START_TIMEOUT, i);
  58                 fsmonitor__start_timeout_sec = i;
  59                 return 0;
  60         }
  61
  62         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  63                 int is_bool;
  64                 int i = git_config_bool_or_int(var, value, ctx->kvi, &is_bool);
  65                 if (i < 0)
  66                         return error(_("value of '%s' not bool or int: %d"),
  67                                      var, i);
  68                 fsmonitor__announce_startup = i;
  69                 return 0;
  70         }
  71
  72         return git_default_config(var, value, ctx, cb);
  73 }
  74
  75 /*
  76  * Acting as a CLIENT.
  77  *
  78  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  79  * and wait for it to shutdown.
  80  */
  81 static int do_as_client__send_stop(void)
  82 {
  83         struct strbuf answer = STRBUF_INIT;
  84         int ret;
  85
  86         ret = fsmonitor_ipc__send_command("quit", &answer);
  87
  88         /* The quit command does not return any response data. */
  89         strbuf_release(&answer);
  90
  91         if (ret)
  92                 return ret;
  93
  94         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  95         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  96                 sleep_millisec(50);
  97         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  98
  99         return 0;
 100 }
 101
 102 static int do_as_client__status(void)
 103 {
 104         enum ipc_active_state state = fsmonitor_ipc__get_state();
 105
 106         switch (state) {
 107         case IPC_STATE__LISTENING:
 108                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 109                        the_repository->worktree);
 110                 return 0;
 111
 112         default:
 113                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 114                        the_repository->worktree);
 115                 return 1;
 116         }
 117 }
 118
 119 enum fsmonitor_cookie_item_result {
 120         FCIR_ERROR = -1, /* could not create cookie file ? */
 121         FCIR_INIT,
 122         FCIR_SEEN,
 123         FCIR_ABORT,
 124 };
 125
 126 struct fsmonitor_cookie_item {
 127         struct hashmap_entry entry;
 128         char *name;
 129         enum fsmonitor_cookie_item_result result;
 130 };
 131
 132 static int cookies_cmp(const void *data UNUSED,
 133                        const struct hashmap_entry *he1,
 134                        const struct hashmap_entry *he2, const void *keydata)
 135 {
 136         const struct fsmonitor_cookie_item *a =
 137                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 138         const struct fsmonitor_cookie_item *b =
 139                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 140
 141         return strcmp(a->name, keydata ? keydata : b->name);
 142 }
 143
 144 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 145         struct fsmonitor_daemon_state *state)
 146 {
 147         /* assert current thread holding state->main_lock */
 148
 149         int fd;
 150         struct fsmonitor_cookie_item *cookie;
 151         struct strbuf cookie_pathname = STRBUF_INIT;
 152         struct strbuf cookie_filename = STRBUF_INIT;
 153         enum fsmonitor_cookie_item_result result;
 154         int my_cookie_seq;
 155
 156         CALLOC_ARRAY(cookie, 1);
 157
 158         my_cookie_seq = state->cookie_seq++;
 159
 160         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 161
 162         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 163         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 164
 165         cookie->name = strbuf_detach(&cookie_filename, NULL);
 166         cookie->result = FCIR_INIT;
 167         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 168
 169         hashmap_add(&state->cookies, &cookie->entry);
 170
 171         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 172                          cookie->name, cookie_pathname.buf);
 173
 174         /*
 175          * Create the cookie file on disk and then wait for a notification
 176          * that the listener thread has seen it.
 177          */
 178         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 179         if (fd < 0) {
 180                 error_errno(_("could not create fsmonitor cookie '%s'"),
 181                             cookie->name);
 182
 183                 cookie->result = FCIR_ERROR;
 184                 goto done;
 185         }
 186
 187         /*
 188          * Technically, close() and unlink() can fail, but we don't
 189          * care here.  We only created the file to trigger a watch
 190          * event from the FS to know that when we're up to date.
 191          */
 192         close(fd);
 193         unlink(cookie_pathname.buf);
 194
 195         /*
 196          * Technically, this is an infinite wait (well, unless another
 197          * thread sends us an abort).  I'd like to change this to
 198          * use `pthread_cond_timedwait()` and return an error/timeout
 199          * and let the caller do the trivial response thing, but we
 200          * don't have that routine in our thread-utils.
 201          *
 202          * After extensive beta testing I'm not really worried about
 203          * this.  Also note that the above open() and unlink() calls
 204          * will cause at least two FS events on that path, so the odds
 205          * of getting stuck are pretty slim.
 206          */
 207         while (cookie->result == FCIR_INIT)
 208                 pthread_cond_wait(&state->cookies_cond,
 209                                   &state->main_lock);
 210
 211 done:
 212         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 213
 214         result = cookie->result;
 215
 216         free(cookie->name);
 217         free(cookie);
 218         strbuf_release(&cookie_pathname);
 219
 220         return result;
 221 }
 222
 223 /*
 224  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 225  */
 226 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 227                                          const struct string_list *cookie_names)
 228 {
 229         /* assert current thread holding state->main_lock */
 230
 231         int k;
 232         int nr_seen = 0;
 233
 234         for (k = 0; k < cookie_names->nr; k++) {
 235                 struct fsmonitor_cookie_item key;
 236                 struct fsmonitor_cookie_item *cookie;
 237
 238                 key.name = cookie_names->items[k].string;
 239                 hashmap_entry_init(&key.entry, strhash(key.name));
 240
 241                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 242                 if (cookie) {
 243                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 244                                          cookie->name);
 245                         cookie->result = FCIR_SEEN;
 246                         nr_seen++;
 247                 }
 248         }
 249
 250         if (nr_seen)
 251                 pthread_cond_broadcast(&state->cookies_cond);
 252 }
 253
 254 /*
 255  * Set _ABORT on all pending cookies and wake up all client threads.
 256  */
 257 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 258 {
 259         /* assert current thread holding state->main_lock */
 260
 261         struct hashmap_iter iter;
 262         struct fsmonitor_cookie_item *cookie;
 263         int nr_aborted = 0;
 264
 265         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 266                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 267                                  cookie->name);
 268                 cookie->result = FCIR_ABORT;
 269                 nr_aborted++;
 270         }
 271
 272         if (nr_aborted)
 273                 pthread_cond_broadcast(&state->cookies_cond);
 274 }
 275
 276 /*
 277  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 278  * "token" as a virtual timestamp.  Clients can request a summary of all
 279  * created/deleted/modified files relative to a token.  In the response,
 280  * clients receive a new token for the next (relative) request.
 281  *
 282  *
 283  * Token Format
 284  * ============
 285  *
 286  * The contents of the token are private and provider-specific.
 287  *
 288  * For the built-in fsmonitor--daemon, we define a token as follows:
 289  *
 290  *     "builtin" ":" <token_id> ":" <sequence_nr>
 291  *
 292  * The "builtin" prefix is used as a namespace to avoid conflicts
 293  * with other providers (such as Watchman).
 294  *
 295  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 296  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 297  * events that happened while the daemon was monitoring (and in-sync
 298  * with the filesystem).
 299  *
 300  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 301  *     and does not define less-than/greater-than relationships.
 302  *     (There are too many race conditions to rely on file system
 303  *     event timestamps.)
 304  *
 305  * The <sequence_nr> is a simple integer incremented whenever the
 306  * daemon needs to make its state public.  For example, if 1000 file
 307  * system events come in, but no clients have requested the data,
 308  * the daemon can continue to accumulate file changes in the same
 309  * bin and does not need to advance the sequence number.  However,
 310  * as soon as a client does arrive, the daemon needs to start a new
 311  * bin and increment the sequence number.
 312  *
 313  *     The sequence number serves as the boundary between 2 sets
 314  *     of bins -- the older ones that the client has already seen
 315  *     and the newer ones that it hasn't.
 316  *
 317  * When a new <token_id> is created, the <sequence_nr> is reset to
 318  * zero.
 319  *
 320  *
 321  * About Token Ids
 322  * ===============
 323  *
 324  * A new token_id is created:
 325  *
 326  * [1] each time the daemon is started.
 327  *
 328  * [2] any time that the daemon must re-sync with the filesystem
 329  *     (such as when the kernel drops or we miss events on a very
 330  *     active volume).
 331  *
 332  * [3] in response to a client "flush" command (for dropped event
 333  *     testing).
 334  *
 335  * When a new token_id is created, the daemon is free to discard all
 336  * cached filesystem events associated with any previous token_ids.
 337  * Events associated with a non-current token_id will never be sent
 338  * to a client.  A token_id change implicitly means that the daemon
 339  * has gap in its event history.
 340  *
 341  * Therefore, clients that present a token with a stale (non-current)
 342  * token_id will always be given a trivial response.
 343  */
 344 struct fsmonitor_token_data {
 345         struct strbuf token_id;
 346         struct fsmonitor_batch *batch_head;
 347         struct fsmonitor_batch *batch_tail;
 348         uint64_t client_ref_count;
 349 };
 350
 351 struct fsmonitor_batch {
 352         struct fsmonitor_batch *next;
 353         uint64_t batch_seq_nr;
 354         const char **interned_paths;
 355         size_t nr, alloc;
 356         time_t pinned_time;
 357 };
 358
 359 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 360 {
 361         static int test_env_value = -1;
 362         static uint64_t flush_count = 0;
 363         struct fsmonitor_token_data *token;
 364         struct fsmonitor_batch *batch;
 365
 366         CALLOC_ARRAY(token, 1);
 367         batch = fsmonitor_batch__new();
 368
 369         strbuf_init(&token->token_id, 0);
 370         token->batch_head = batch;
 371         token->batch_tail = batch;
 372         token->client_ref_count = 0;
 373
 374         if (test_env_value < 0)
 375                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 376
 377         if (!test_env_value) {
 378                 struct timeval tv;
 379                 struct tm tm;
 380                 time_t secs;
 381
 382                 gettimeofday(&tv, NULL);
 383                 secs = tv.tv_sec;
 384                 gmtime_r(&secs, &tm);
 385
 386                 strbuf_addf(&token->token_id,
 387                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 388                             flush_count++,
 389                             getpid(),
 390                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 391                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 392                             (long)tv.tv_usec);
 393         } else {
 394                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 395         }
 396
 397         /*
 398          * We created a new <token_id> and are starting a new series
 399          * of tokens with a zero <seq_nr>.
 400          *
 401          * Since clients cannot guess our new (non test) <token_id>
 402          * they will always receive a trivial response (because of the
 403          * mismatch on the <token_id>).  The trivial response will
 404          * tell them our new <token_id> so that subsequent requests
 405          * will be relative to our new series.  (And when sending that
 406          * response, we pin the current head of the batch list.)
 407          *
 408          * Even if the client correctly guesses the <token_id>, their
 409          * request of "builtin:<token_id>:0" asks for all changes MORE
 410          * RECENT than batch/bin 0.
 411          *
 412          * This implies that it is a waste to accumulate paths in the
 413          * initial batch/bin (because they will never be transmitted).
 414          *
 415          * So the daemon could be running for days and watching the
 416          * file system, but doesn't need to actually accumulate any
 417          * paths UNTIL we need to set a reference point for a later
 418          * relative request.
 419          *
 420          * However, it is very useful for testing to always have a
 421          * reference point set.  Pin batch 0 to force early file system
 422          * events to accumulate.
 423          */
 424         if (test_env_value)
 425                 batch->pinned_time = time(NULL);
 426
 427         return token;
 428 }
 429
 430 struct fsmonitor_batch *fsmonitor_batch__new(void)
 431 {
 432         struct fsmonitor_batch *batch;
 433
 434         CALLOC_ARRAY(batch, 1);
 435
 436         return batch;
 437 }
 438
 439 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 440 {
 441         while (batch) {
 442                 struct fsmonitor_batch *next = batch->next;
 443
 444                 /*
 445                  * The actual strings within the array of this batch
 446                  * are interned, so we don't own them.  We only own
 447                  * the array.
 448                  */
 449                 free(batch->interned_paths);
 450                 free(batch);
 451
 452                 batch = next;
 453         }
 454 }
 455
 456 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 457                                const char *path)
 458 {
 459         const char *interned_path = strintern(path);
 460
 461         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 462
 463         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 464         batch->interned_paths[batch->nr++] = interned_path;
 465 }
 466
 467 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 468                                      const struct fsmonitor_batch *batch_src)
 469 {
 470         size_t k;
 471
 472         ALLOC_GROW(batch_dest->interned_paths,
 473                    batch_dest->nr + batch_src->nr + 1,
 474                    batch_dest->alloc);
 475
 476         for (k = 0; k < batch_src->nr; k++)
 477                 batch_dest->interned_paths[batch_dest->nr++] =
 478                         batch_src->interned_paths[k];
 479 }
 480
 481 /*
 482  * To keep the batch list from growing unbounded in response to filesystem
 483  * activity, we try to truncate old batches from the end of the list as
 484  * they become irrelevant.
 485  *
 486  * We assume that the .git/index will be updated with the most recent token
 487  * any time the index is updated.  And future commands will only ask for
 488  * recent changes *since* that new token.  So as tokens advance into the
 489  * future, older batch items will never be requested/needed.  So we can
 490  * truncate them without loss of functionality.
 491  *
 492  * However, multiple commands may be talking to the daemon concurrently
 493  * or perform a slow command, so a little "token skew" is possible.
 494  * Therefore, we want this to be a little bit lazy and have a generous
 495  * delay.
 496  *
 497  * The current reader thread walked backwards in time from `token->batch_head`
 498  * back to `batch_marker` somewhere in the middle of the batch list.
 499  *
 500  * Let's walk backwards in time from that marker an arbitrary delay
 501  * and truncate the list there.  Note that these timestamps are completely
 502  * artificial (based on when we pinned the batch item) and not on any
 503  * filesystem activity.
 504  *
 505  * Return the obsolete portion of the list after we have removed it from
 506  * the official list so that the caller can free it after leaving the lock.
 507  */
 508 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 509
 510 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 511         struct fsmonitor_daemon_state *state,
 512         const struct fsmonitor_batch *batch_marker)
 513 {
 514         /* assert current thread holding state->main_lock */
 515
 516         const struct fsmonitor_batch *batch;
 517         struct fsmonitor_batch *remainder;
 518
 519         if (!batch_marker)
 520                 return NULL;
 521
 522         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 523                          batch_marker->batch_seq_nr,
 524                          (uint64_t)batch_marker->pinned_time);
 525
 526         for (batch = batch_marker; batch; batch = batch->next) {
 527                 time_t t;
 528
 529                 if (!batch->pinned_time) /* an overflow batch */
 530                         continue;
 531
 532                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 533                 if (t > batch_marker->pinned_time) /* too close to marker */
 534                         continue;
 535
 536                 goto truncate_past_here;
 537         }
 538
 539         return NULL;
 540
 541 truncate_past_here:
 542         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 543
 544         remainder = ((struct fsmonitor_batch *)batch)->next;
 545         ((struct fsmonitor_batch *)batch)->next = NULL;
 546
 547         return remainder;
 548 }
 549
 550 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 551 {
 552         if (!token)
 553                 return;
 554
 555         assert(token->client_ref_count == 0);
 556
 557         strbuf_release(&token->token_id);
 558
 559         fsmonitor_batch__free_list(token->batch_head);
 560
 561         free(token);
 562 }
 563
 564 /*
 565  * Flush all of our cached data about the filesystem.  Call this if we
 566  * lose sync with the filesystem and miss some notification events.
 567  *
 568  * [1] If we are missing events, then we no longer have a complete
 569  *     history of the directory (relative to our current start token).
 570  *     We should create a new token and start fresh (as if we just
 571  *     booted up).
 572  *
 573  * [2] Some of those lost events may have been for cookie files.  We
 574  *     should assume the worst and abort them rather letting them starve.
 575  *
 576  * If there are no concurrent threads reading the current token data
 577  * series, we can free it now.  Otherwise, let the last reader free
 578  * it.
 579  *
 580  * Either way, the old token data series is no longer associated with
 581  * our state data.
 582  */
 583 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 584 {
 585         /* assert current thread holding state->main_lock */
 586
 587         struct fsmonitor_token_data *free_me = NULL;
 588         struct fsmonitor_token_data *new_one = NULL;
 589
 590         new_one = fsmonitor_new_token_data();
 591
 592         if (state->current_token_data->client_ref_count == 0)
 593                 free_me = state->current_token_data;
 594         state->current_token_data = new_one;
 595
 596         fsmonitor_free_token_data(free_me);
 597
 598         with_lock__abort_all_cookies(state);
 599 }
 600
 601 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 602 {
 603         pthread_mutex_lock(&state->main_lock);
 604         with_lock__do_force_resync(state);
 605         pthread_mutex_unlock(&state->main_lock);
 606 }
 607
 608 /*
 609  * Format an opaque token string to send to the client.
 610  */
 611 static void with_lock__format_response_token(
 612         struct strbuf *response_token,
 613         const struct strbuf *response_token_id,
 614         const struct fsmonitor_batch *batch)
 615 {
 616         /* assert current thread holding state->main_lock */
 617
 618         strbuf_reset(response_token);
 619         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 620                     response_token_id->buf, batch->batch_seq_nr);
 621 }
 622
 623 /*
 624  * Parse an opaque token from the client.
 625  * Returns -1 on error.
 626  */
 627 static int fsmonitor_parse_client_token(const char *buf_token,
 628                                         struct strbuf *requested_token_id,
 629                                         uint64_t *seq_nr)
 630 {
 631         const char *p;
 632         char *p_end;
 633
 634         strbuf_reset(requested_token_id);
 635         *seq_nr = 0;
 636
 637         if (!skip_prefix(buf_token, "builtin:", &p))
 638                 return -1;
 639
 640         while (*p && *p != ':')
 641                 strbuf_addch(requested_token_id, *p++);
 642         if (!*p++)
 643                 return -1;
 644
 645         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 646         if (*p_end)
 647                 return -1;
 648
 649         return 0;
 650 }
 651
 652 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 653
 654 static int do_handle_client(struct fsmonitor_daemon_state *state,
 655                             const char *command,
 656                             ipc_server_reply_cb *reply,
 657                             struct ipc_server_reply_data *reply_data)
 658 {
 659         struct fsmonitor_token_data *token_data = NULL;
 660         struct strbuf response_token = STRBUF_INIT;
 661         struct strbuf requested_token_id = STRBUF_INIT;
 662         struct strbuf payload = STRBUF_INIT;
 663         uint64_t requested_oldest_seq_nr = 0;
 664         uint64_t total_response_len = 0;
 665         const char *p;
 666         const struct fsmonitor_batch *batch_head;
 667         const struct fsmonitor_batch *batch;
 668         struct fsmonitor_batch *remainder = NULL;
 669         intmax_t count = 0, duplicates = 0;
 670         kh_str_t *shown;
 671         int hash_ret;
 672         int do_trivial = 0;
 673         int do_flush = 0;
 674         int do_cookie = 0;
 675         enum fsmonitor_cookie_item_result cookie_result;
 676
 677         /*
 678          * We expect `command` to be of the form:
 679          *
 680          * <command> := quit NUL
 681          *            | flush NUL
 682          *            | <V1-time-since-epoch-ns> NUL
 683          *            | <V2-opaque-fsmonitor-token> NUL
 684          */
 685
 686         if (!strcmp(command, "quit")) {
 687                 /*
 688                  * A client has requested over the socket/pipe that the
 689                  * daemon shutdown.
 690                  *
 691                  * Tell the IPC thread pool to shutdown (which completes
 692                  * the await in the main thread (which can stop the
 693                  * fsmonitor listener thread)).
 694                  *
 695                  * There is no reply to the client.
 696                  */
 697                 return SIMPLE_IPC_QUIT;
 698
 699         } else if (!strcmp(command, "flush")) {
 700                 /*
 701                  * Flush all of our cached data and generate a new token
 702                  * just like if we lost sync with the filesystem.
 703                  *
 704                  * Then send a trivial response using the new token.
 705                  */
 706                 do_flush = 1;
 707                 do_trivial = 1;
 708
 709         } else if (!skip_prefix(command, "builtin:", &p)) {
 710                 /* assume V1 timestamp or garbage */
 711
 712                 char *p_end;
 713
 714                 strtoumax(command, &p_end, 10);
 715                 trace_printf_key(&trace_fsmonitor,
 716                                  ((*p_end) ?
 717                                   "fsmonitor: invalid command line '%s'" :
 718                                   "fsmonitor: unsupported V1 protocol '%s'"),
 719                                  command);
 720                 do_trivial = 1;
 721                 do_cookie = 1;
 722
 723         } else {
 724                 /* We have "builtin:*" */
 725                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 726                                                  &requested_oldest_seq_nr)) {
 727                         trace_printf_key(&trace_fsmonitor,
 728                                          "fsmonitor: invalid V2 protocol token '%s'",
 729                                          command);
 730                         do_trivial = 1;
 731                         do_cookie = 1;
 732
 733                 } else {
 734                         /*
 735                          * We have a V2 valid token:
 736                          *     "builtin:<token_id>:<seq_nr>"
 737                          */
 738                         do_cookie = 1;
 739                 }
 740         }
 741
 742         pthread_mutex_lock(&state->main_lock);
 743
 744         if (!state->current_token_data)
 745                 BUG("fsmonitor state does not have a current token");
 746
 747         /*
 748          * Write a cookie file inside the directory being watched in
 749          * an effort to flush out existing filesystem events that we
 750          * actually care about.  Suspend this client thread until we
 751          * see the filesystem events for this cookie file.
 752          *
 753          * Creating the cookie lets us guarantee that our FS listener
 754          * thread has drained the kernel queue and we are caught up
 755          * with the kernel.
 756          *
 757          * If we cannot create the cookie (or otherwise guarantee that
 758          * we are caught up), we send a trivial response.  We have to
 759          * assume that there might be some very, very recent activity
 760          * on the FS still in flight.
 761          */
 762         if (do_cookie) {
 763                 cookie_result = with_lock__wait_for_cookie(state);
 764                 if (cookie_result != FCIR_SEEN) {
 765                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 766                               cookie_result);
 767                         do_trivial = 1;
 768                 }
 769         }
 770
 771         if (do_flush)
 772                 with_lock__do_force_resync(state);
 773
 774         /*
 775          * We mark the current head of the batch list as "pinned" so
 776          * that the listener thread will treat this item as read-only
 777          * (and prevent any more paths from being added to it) from
 778          * now on.
 779          */
 780         token_data = state->current_token_data;
 781         batch_head = token_data->batch_head;
 782         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 783
 784         /*
 785          * FSMonitor Protocol V2 requires that we send a response header
 786          * with a "new current token" and then all of the paths that changed
 787          * since the "requested token".  We send the seq_nr of the just-pinned
 788          * head batch so that future requests from a client will be relative
 789          * to it.
 790          */
 791         with_lock__format_response_token(&response_token,
 792                                          &token_data->token_id, batch_head);
 793
 794         reply(reply_data, response_token.buf, response_token.len + 1);
 795         total_response_len += response_token.len + 1;
 796
 797         trace2_data_string("fsmonitor", the_repository, "response/token",
 798                            response_token.buf);
 799         trace_printf_key(&trace_fsmonitor, "response token: %s",
 800                          response_token.buf);
 801
 802         if (!do_trivial) {
 803                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 804                         /*
 805                          * The client last spoke to a different daemon
 806                          * instance -OR- the daemon had to resync with
 807                          * the filesystem (and lost events), so reject.
 808                          */
 809                         trace2_data_string("fsmonitor", the_repository,
 810                                            "response/token", "different");
 811                         do_trivial = 1;
 812
 813                 } else if (requested_oldest_seq_nr <
 814                            token_data->batch_tail->batch_seq_nr) {
 815                         /*
 816                          * The client wants older events than we have for
 817                          * this token_id.  This means that the end of our
 818                          * batch list was truncated and we cannot give the
 819                          * client a complete snapshot relative to their
 820                          * request.
 821                          */
 822                         trace_printf_key(&trace_fsmonitor,
 823                                          "client requested truncated data");
 824                         do_trivial = 1;
 825                 }
 826         }
 827
 828         if (do_trivial) {
 829                 pthread_mutex_unlock(&state->main_lock);
 830
 831                 reply(reply_data, "/", 2);
 832
 833                 trace2_data_intmax("fsmonitor", the_repository,
 834                                    "response/trivial", 1);
 835
 836                 goto cleanup;
 837         }
 838
 839         /*
 840          * We're going to hold onto a pointer to the current
 841          * token-data while we walk the list of batches of files.
 842          * During this time, we will NOT be under the lock.
 843          * So we ref-count it.
 844          *
 845          * This allows the listener thread to continue prepending
 846          * new batches of items to the token-data (which we'll ignore).
 847          *
 848          * AND it allows the listener thread to do a token-reset
 849          * (and install a new `current_token_data`).
 850          */
 851         token_data->client_ref_count++;
 852
 853         pthread_mutex_unlock(&state->main_lock);
 854
 855         /*
 856          * The client request is relative to the token that they sent,
 857          * so walk the batch list backwards from the current head back
 858          * to the batch (sequence number) they named.
 859          *
 860          * We use khash to de-dup the list of pathnames.
 861          *
 862          * NEEDSWORK: each batch contains a list of interned strings,
 863          * so we only need to do pointer comparisons here to build the
 864          * hash table.  Currently, we're still comparing the string
 865          * values.
 866          */
 867         shown = kh_init_str();
 868         for (batch = batch_head;
 869              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 870              batch = batch->next) {
 871                 size_t k;
 872
 873                 for (k = 0; k < batch->nr; k++) {
 874                         const char *s = batch->interned_paths[k];
 875                         size_t s_len;
 876
 877                         if (kh_get_str(shown, s) != kh_end(shown))
 878                                 duplicates++;
 879                         else {
 880                                 kh_put_str(shown, s, &hash_ret);
 881
 882                                 trace_printf_key(&trace_fsmonitor,
 883                                                  "send[%"PRIuMAX"]: %s",
 884                                                  count, s);
 885
 886                                 /* Each path gets written with a trailing NUL */
 887                                 s_len = strlen(s) + 1;
 888
 889                                 if (payload.len + s_len >=
 890                                     LARGE_PACKET_DATA_MAX) {
 891                                         reply(reply_data, payload.buf,
 892                                               payload.len);
 893                                         total_response_len += payload.len;
 894                                         strbuf_reset(&payload);
 895                                 }
 896
 897                                 strbuf_add(&payload, s, s_len);
 898                                 count++;
 899                         }
 900                 }
 901         }
 902
 903         if (payload.len) {
 904                 reply(reply_data, payload.buf, payload.len);
 905                 total_response_len += payload.len;
 906         }
 907
 908         kh_release_str(shown);
 909
 910         pthread_mutex_lock(&state->main_lock);
 911
 912         if (token_data->client_ref_count > 0)
 913                 token_data->client_ref_count--;
 914
 915         if (token_data->client_ref_count == 0) {
 916                 if (token_data != state->current_token_data) {
 917                         /*
 918                          * The listener thread did a token-reset while we were
 919                          * walking the batch list.  Therefore, this token is
 920                          * stale and can be discarded completely.  If we are
 921                          * the last reader thread using this token, we own
 922                          * that work.
 923                          */
 924                         fsmonitor_free_token_data(token_data);
 925                 } else if (batch) {
 926                         /*
 927                          * We are holding the lock and are the only
 928                          * reader of the ref-counted portion of the
 929                          * list, so we get the honor of seeing if the
 930                          * list can be truncated to save memory.
 931                          *
 932                          * The main loop did not walk to the end of the
 933                          * list, so this batch is the first item in the
 934                          * batch-list that is older than the requested
 935                          * end-point sequence number.  See if the tail
 936                          * end of the list is obsolete.
 937                          */
 938                         remainder = with_lock__truncate_old_batches(state,
 939                                                                     batch);
 940                 }
 941         }
 942
 943         pthread_mutex_unlock(&state->main_lock);
 944
 945         if (remainder)
 946                 fsmonitor_batch__free_list(remainder);
 947
 948         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 949         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 950         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 951
 952 cleanup:
 953         strbuf_release(&response_token);
 954         strbuf_release(&requested_token_id);
 955         strbuf_release(&payload);
 956
 957         return 0;
 958 }
 959
 960 static ipc_server_application_cb handle_client;
 961
 962 static int handle_client(void *data,
 963                          const char *command, size_t command_len,
 964                          ipc_server_reply_cb *reply,
 965                          struct ipc_server_reply_data *reply_data)
 966 {
 967         struct fsmonitor_daemon_state *state = data;
 968         int result;
 969
 970         /*
 971          * The Simple IPC API now supports {char*, len} arguments, but
 972          * FSMonitor always uses proper null-terminated strings, so
 973          * we can ignore the command_len argument.  (Trust, but verify.)
 974          */
 975         if (command_len != strlen(command))
 976                 BUG("FSMonitor assumes text messages");
 977
 978         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 979
 980         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 981         trace2_data_string("fsmonitor", the_repository, "request", command);
 982
 983         result = do_handle_client(state, command, reply, reply_data);
 984
 985         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 986
 987         return result;
 988 }
 989
 990 #define FSMONITOR_DIR           "fsmonitor--daemon"
 991 #define FSMONITOR_COOKIE_DIR    "cookies"
 992 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 993
 994 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 995         const char *rel)
 996 {
 997         if (fspathncmp(rel, ".git", 4))
 998                 return IS_WORKDIR_PATH;
 999         rel += 4;
1000
1001         if (!*rel)
1002                 return IS_DOT_GIT;
1003         if (*rel != '/')
1004                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
1005         rel++;
1006
1007         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1008                         strlen(FSMONITOR_COOKIE_PREFIX)))
1009                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1010
1011         return IS_INSIDE_DOT_GIT;
1012 }
1013
1014 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1015         const char *rel)
1016 {
1017         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1018                         strlen(FSMONITOR_COOKIE_PREFIX)))
1019                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1020
1021         return IS_INSIDE_GITDIR;
1022 }
1023
1024 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1025         struct fsmonitor_daemon_state *state,
1026         const char *path)
1027 {
1028         const char *rel;
1029
1030         if (fspathncmp(path, state->path_worktree_watch.buf,
1031                        state->path_worktree_watch.len))
1032                 return IS_OUTSIDE_CONE;
1033
1034         rel = path + state->path_worktree_watch.len;
1035
1036         if (!*rel)
1037                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1038         if (*rel != '/')
1039                 return IS_OUTSIDE_CONE;
1040         rel++;
1041
1042         return fsmonitor_classify_path_workdir_relative(rel);
1043 }
1044
1045 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1046         struct fsmonitor_daemon_state *state,
1047         const char *path)
1048 {
1049         const char *rel;
1050         enum fsmonitor_path_type t;
1051
1052         t = try_classify_workdir_abs_path(state, path);
1053         if (state->nr_paths_watching == 1)
1054                 return t;
1055         if (t != IS_OUTSIDE_CONE)
1056                 return t;
1057
1058         if (fspathncmp(path, state->path_gitdir_watch.buf,
1059                        state->path_gitdir_watch.len))
1060                 return IS_OUTSIDE_CONE;
1061
1062         rel = path + state->path_gitdir_watch.len;
1063
1064         if (!*rel)
1065                 return IS_GITDIR; /* it is the <gitdir> exactly */
1066         if (*rel != '/')
1067                 return IS_OUTSIDE_CONE;
1068         rel++;
1069
1070         return fsmonitor_classify_path_gitdir_relative(rel);
1071 }
1072
1073 /*
1074  * We try to combine small batches at the front of the batch-list to avoid
1075  * having a long list.  This hopefully makes it a little easier when we want
1076  * to truncate and maintain the list.  However, we don't want the paths array
1077  * to just keep growing and growing with realloc, so we insert an arbitrary
1078  * limit.
1079  */
1080 #define MY_COMBINE_LIMIT (1024)
1081
1082 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1083                        struct fsmonitor_batch *batch,
1084                        const struct string_list *cookie_names)
1085 {
1086         if (!batch && !cookie_names->nr)
1087                 return;
1088
1089         pthread_mutex_lock(&state->main_lock);
1090
1091         if (batch) {
1092                 struct fsmonitor_batch *head;
1093
1094                 head = state->current_token_data->batch_head;
1095                 if (!head) {
1096                         BUG("token does not have batch");
1097                 } else if (head->pinned_time) {
1098                         /*
1099                          * We cannot alter the current batch list
1100                          * because:
1101                          *
1102                          * [a] it is being transmitted to at least one
1103                          * client and the handle_client() thread has a
1104                          * ref-count, but not a lock on the batch list
1105                          * starting with this item.
1106                          *
1107                          * [b] it has been transmitted in the past to
1108                          * at least one client such that future
1109                          * requests are relative to this head batch.
1110                          *
1111                          * So, we can only prepend a new batch onto
1112                          * the front of the list.
1113                          */
1114                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1115                         batch->next = head;
1116                         state->current_token_data->batch_head = batch;
1117                 } else if (!head->batch_seq_nr) {
1118                         /*
1119                          * Batch 0 is unpinned.  See the note in
1120                          * `fsmonitor_new_token_data()` about why we
1121                          * don't need to accumulate these paths.
1122                          */
1123                         fsmonitor_batch__free_list(batch);
1124                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1125                         /*
1126                          * The head batch in the list has never been
1127                          * transmitted to a client, but folding the
1128                          * contents of the new batch onto it would
1129                          * exceed our arbitrary limit, so just prepend
1130                          * the new batch onto the list.
1131                          */
1132                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1133                         batch->next = head;
1134                         state->current_token_data->batch_head = batch;
1135                 } else {
1136                         /*
1137                          * We are free to add the paths in the given
1138                          * batch onto the end of the current head batch.
1139                          */
1140                         fsmonitor_batch__combine(head, batch);
1141                         fsmonitor_batch__free_list(batch);
1142                 }
1143         }
1144
1145         if (cookie_names->nr)
1146                 with_lock__mark_cookies_seen(state, cookie_names);
1147
1148         pthread_mutex_unlock(&state->main_lock);
1149 }
1150
1151 static void *fsm_health__thread_proc(void *_state)
1152 {
1153         struct fsmonitor_daemon_state *state = _state;
1154
1155         trace2_thread_start("fsm-health");
1156
1157         fsm_health__loop(state);
1158
1159         trace2_thread_exit();
1160         return NULL;
1161 }
1162
1163 static void *fsm_listen__thread_proc(void *_state)
1164 {
1165         struct fsmonitor_daemon_state *state = _state;
1166
1167         trace2_thread_start("fsm-listen");
1168
1169         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1170                          state->path_worktree_watch.buf);
1171         if (state->nr_paths_watching > 1)
1172                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1173                                  state->path_gitdir_watch.buf);
1174
1175         fsm_listen__loop(state);
1176
1177         pthread_mutex_lock(&state->main_lock);
1178         if (state->current_token_data &&
1179             state->current_token_data->client_ref_count == 0)
1180                 fsmonitor_free_token_data(state->current_token_data);
1181         state->current_token_data = NULL;
1182         pthread_mutex_unlock(&state->main_lock);
1183
1184         trace2_thread_exit();
1185         return NULL;
1186 }
1187
1188 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1189 {
1190         struct ipc_server_opts ipc_opts = {
1191                 .nr_threads = fsmonitor__ipc_threads,
1192
1193                 /*
1194                  * We know that there are no other active threads yet,
1195                  * so we can let the IPC layer temporarily chdir() if
1196                  * it needs to when creating the server side of the
1197                  * Unix domain socket.
1198                  */
1199                 .uds_disallow_chdir = 0
1200         };
1201         int health_started = 0;
1202         int listener_started = 0;
1203         int err = 0;
1204
1205         /*
1206          * Start the IPC thread pool before the we've started the file
1207          * system event listener thread so that we have the IPC handle
1208          * before we need it.
1209          */
1210         if (ipc_server_run_async(&state->ipc_server_data,
1211                                  state->path_ipc.buf, &ipc_opts,
1212                                  handle_client, state))
1213                 return error_errno(
1214                         _("could not start IPC thread pool on '%s'"),
1215                         state->path_ipc.buf);
1216
1217         /*
1218          * Start the fsmonitor listener thread to collect filesystem
1219          * events.
1220          */
1221         if (pthread_create(&state->listener_thread, NULL,
1222                            fsm_listen__thread_proc, state)) {
1223                 ipc_server_stop_async(state->ipc_server_data);
1224                 err = error(_("could not start fsmonitor listener thread"));
1225                 goto cleanup;
1226         }
1227         listener_started = 1;
1228
1229         /*
1230          * Start the health thread to watch over our process.
1231          */
1232         if (pthread_create(&state->health_thread, NULL,
1233                            fsm_health__thread_proc, state)) {
1234                 ipc_server_stop_async(state->ipc_server_data);
1235                 err = error(_("could not start fsmonitor health thread"));
1236                 goto cleanup;
1237         }
1238         health_started = 1;
1239
1240         /*
1241          * The daemon is now fully functional in background threads.
1242          * Our primary thread should now just wait while the threads
1243          * do all the work.
1244          */
1245 cleanup:
1246         /*
1247          * Wait for the IPC thread pool to shutdown (whether by client
1248          * request, from filesystem activity, or an error).
1249          */
1250         ipc_server_await(state->ipc_server_data);
1251
1252         /*
1253          * The fsmonitor listener thread may have received a shutdown
1254          * event from the IPC thread pool, but it doesn't hurt to tell
1255          * it again.  And wait for it to shutdown.
1256          */
1257         if (listener_started) {
1258                 fsm_listen__stop_async(state);
1259                 pthread_join(state->listener_thread, NULL);
1260         }
1261
1262         if (health_started) {
1263                 fsm_health__stop_async(state);
1264                 pthread_join(state->health_thread, NULL);
1265         }
1266
1267         if (err)
1268                 return err;
1269         if (state->listen_error_code)
1270                 return state->listen_error_code;
1271         if (state->health_error_code)
1272                 return state->health_error_code;
1273         return 0;
1274 }
1275
1276 static int fsmonitor_run_daemon(void)
1277 {
1278         struct fsmonitor_daemon_state state;
1279         const char *home;
1280         int err;
1281
1282         memset(&state, 0, sizeof(state));
1283
1284         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1285         pthread_mutex_init(&state.main_lock, NULL);
1286         pthread_cond_init(&state.cookies_cond, NULL);
1287         state.listen_error_code = 0;
1288         state.health_error_code = 0;
1289         state.current_token_data = fsmonitor_new_token_data();
1290
1291         /* Prepare to (recursively) watch the <worktree-root> directory. */
1292         strbuf_init(&state.path_worktree_watch, 0);
1293         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1294         state.nr_paths_watching = 1;
1295
1296         strbuf_init(&state.alias.alias, 0);
1297         strbuf_init(&state.alias.points_to, 0);
1298         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1299                 goto done;
1300
1301         /*
1302          * We create and delete cookie files somewhere inside the .git
1303          * directory to help us keep sync with the file system.  If
1304          * ".git" is not a directory, then <gitdir> is not inside the
1305          * cone of <worktree-root>, so set up a second watch to watch
1306          * the <gitdir> so that we get events for the cookie files.
1307          */
1308         strbuf_init(&state.path_gitdir_watch, 0);
1309         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1310         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1311         if (!is_directory(state.path_gitdir_watch.buf)) {
1312                 strbuf_reset(&state.path_gitdir_watch);
1313                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1314                 state.nr_paths_watching = 2;
1315         }
1316
1317         /*
1318          * We will write filesystem syncing cookie files into
1319          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1320          *
1321          * The extra layers of subdirectories here keep us from
1322          * changing the mtime on ".git/" or ".git/foo/" when we create
1323          * or delete cookie files.
1324          *
1325          * There have been problems with some IDEs that do a
1326          * non-recursive watch of the ".git/" directory and run a
1327          * series of commands any time something happens.
1328          *
1329          * For example, if we place our cookie files directly in
1330          * ".git/" or ".git/foo/" then a `git status` (or similar
1331          * command) from the IDE will cause a cookie file to be
1332          * created in one of those dirs.  This causes the mtime of
1333          * those dirs to change.  This triggers the IDE's watch
1334          * notification.  This triggers the IDE to run those commands
1335          * again.  And the process repeats and the machine never goes
1336          * idle.
1337          *
1338          * Adding the extra layers of subdirectories prevents the
1339          * mtime of ".git/" and ".git/foo" from changing when a
1340          * cookie file is created.
1341          */
1342         strbuf_init(&state.path_cookie_prefix, 0);
1343         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1344
1345         strbuf_addch(&state.path_cookie_prefix, '/');
1346         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1347         mkdir(state.path_cookie_prefix.buf, 0777);
1348
1349         strbuf_addch(&state.path_cookie_prefix, '/');
1350         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1351         mkdir(state.path_cookie_prefix.buf, 0777);
1352
1353         strbuf_addch(&state.path_cookie_prefix, '/');
1354
1355         /*
1356          * We create a named-pipe or unix domain socket inside of the
1357          * ".git" directory.  (Well, on Windows, we base our named
1358          * pipe in the NPFS on the absolute path of the git
1359          * directory.)
1360          */
1361         strbuf_init(&state.path_ipc, 0);
1362         strbuf_addstr(&state.path_ipc,
1363                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1364
1365         /*
1366          * Confirm that we can create platform-specific resources for the
1367          * filesystem listener before we bother starting all the threads.
1368          */
1369         if (fsm_listen__ctor(&state)) {
1370                 err = error(_("could not initialize listener thread"));
1371                 goto done;
1372         }
1373
1374         if (fsm_health__ctor(&state)) {
1375                 err = error(_("could not initialize health thread"));
1376                 goto done;
1377         }
1378
1379         /*
1380          * CD out of the worktree root directory.
1381          *
1382          * The common Git startup mechanism causes our CWD to be the
1383          * root of the worktree.  On Windows, this causes our process
1384          * to hold a locked handle on the CWD.  This prevents the
1385          * worktree from being moved or deleted while the daemon is
1386          * running.
1387          *
1388          * We assume that our FS and IPC listener threads have either
1389          * opened all of the handles that they need or will do
1390          * everything using absolute paths.
1391          */
1392         home = getenv("HOME");
1393         if (home && *home && chdir(home))
1394                 die_errno(_("could not cd home '%s'"), home);
1395
1396         err = fsmonitor_run_daemon_1(&state);
1397
1398 done:
1399         pthread_cond_destroy(&state.cookies_cond);
1400         pthread_mutex_destroy(&state.main_lock);
1401         fsm_listen__dtor(&state);
1402         fsm_health__dtor(&state);
1403
1404         ipc_server_free(state.ipc_server_data);
1405
1406         strbuf_release(&state.path_worktree_watch);
1407         strbuf_release(&state.path_gitdir_watch);
1408         strbuf_release(&state.path_cookie_prefix);
1409         strbuf_release(&state.path_ipc);
1410         strbuf_release(&state.alias.alias);
1411         strbuf_release(&state.alias.points_to);
1412
1413         return err;
1414 }
1415
1416 static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED)
1417 {
1418         /*
1419          * Technically, we don't need to probe for an existing daemon
1420          * process, since we could just call `fsmonitor_run_daemon()`
1421          * and let it fail if the pipe/socket is busy.
1422          *
1423          * However, this method gives us a nicer error message for a
1424          * common error case.
1425          */
1426         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1427                 die(_("fsmonitor--daemon is already running '%s'"),
1428                     the_repository->worktree);
1429
1430         if (fsmonitor__announce_startup) {
1431                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1432                         the_repository->worktree);
1433                 fflush(stderr);
1434         }
1435
1436 #ifdef GIT_WINDOWS_NATIVE
1437         if (detach_console)
1438                 FreeConsole();
1439 #endif
1440
1441         return !!fsmonitor_run_daemon();
1442 }
1443
1444 static start_bg_wait_cb bg_wait_cb;
1445
1446 static int bg_wait_cb(const struct child_process *cp UNUSED,
1447                       void *cb_data UNUSED)
1448 {
1449         enum ipc_active_state s = fsmonitor_ipc__get_state();
1450
1451         switch (s) {
1452         case IPC_STATE__LISTENING:
1453                 /* child is "ready" */
1454                 return 0;
1455
1456         case IPC_STATE__NOT_LISTENING:
1457         case IPC_STATE__PATH_NOT_FOUND:
1458                 /* give child more time */
1459                 return 1;
1460
1461         default:
1462         case IPC_STATE__INVALID_PATH:
1463         case IPC_STATE__OTHER_ERROR:
1464                 /* all the time in world won't help */
1465                 return -1;
1466         }
1467 }
1468
1469 static int try_to_start_background_daemon(void)
1470 {
1471         struct child_process cp = CHILD_PROCESS_INIT;
1472         enum start_bg_result sbgr;
1473
1474         /*
1475          * Before we try to create a background daemon process, see
1476          * if a daemon process is already listening.  This makes it
1477          * easier for us to report an already-listening error to the
1478          * console, since our spawn/daemon can only report the success
1479          * of creating the background process (and not whether it
1480          * immediately exited).
1481          */
1482         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1483                 die(_("fsmonitor--daemon is already running '%s'"),
1484                     the_repository->worktree);
1485
1486         if (fsmonitor__announce_startup) {
1487                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1488                         the_repository->worktree);
1489                 fflush(stderr);
1490         }
1491
1492         cp.git_cmd = 1;
1493
1494         strvec_push(&cp.args, "fsmonitor--daemon");
1495         strvec_push(&cp.args, "run");
1496         strvec_push(&cp.args, "--detach");
1497         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1498
1499         cp.no_stdin = 1;
1500         cp.no_stdout = 1;
1501         cp.no_stderr = 1;
1502
1503         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1504                                 fsmonitor__start_timeout_sec);
1505
1506         switch (sbgr) {
1507         case SBGR_READY:
1508                 return 0;
1509
1510         default:
1511         case SBGR_ERROR:
1512         case SBGR_CB_ERROR:
1513                 return error(_("daemon failed to start"));
1514
1515         case SBGR_TIMEOUT:
1516                 return error(_("daemon not online yet"));
1517
1518         case SBGR_DIED:
1519                 return error(_("daemon terminated"));
1520         }
1521 }
1522
1523 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1524 {
1525         const char *subcmd;
1526         enum fsmonitor_reason reason;
1527         int detach_console = 0;
1528
1529         struct option options[] = {
1530                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1531                 OPT_INTEGER(0, "ipc-threads",
1532                             &fsmonitor__ipc_threads,
1533                             N_("use <n> ipc worker threads")),
1534                 OPT_INTEGER(0, "start-timeout",
1535                             &fsmonitor__start_timeout_sec,
1536                             N_("max seconds to wait for background daemon startup")),
1537
1538                 OPT_END()
1539         };
1540
1541         git_config(fsmonitor_config, NULL);
1542
1543         argc = parse_options(argc, argv, prefix, options,
1544                              builtin_fsmonitor__daemon_usage, 0);
1545         if (argc != 1)
1546                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1547         subcmd = argv[0];
1548
1549         if (fsmonitor__ipc_threads < 1)
1550                 die(_("invalid 'ipc-threads' value (%d)"),
1551                     fsmonitor__ipc_threads);
1552
1553         prepare_repo_settings(the_repository);
1554         /*
1555          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1556          * (without bothering to load the `core.fsmonitor` config settings).
1557          *
1558          * If the repo is not compatible, the repo-settings will be set to
1559          * incompatible rather than IPC, so we can use one of the __get
1560          * routines to detect the discrepancy.
1561          */
1562         fsm_settings__set_ipc(the_repository);
1563
1564         reason = fsm_settings__get_reason(the_repository);
1565         if (reason > FSMONITOR_REASON_OK)
1566                 die("%s",
1567                     fsm_settings__get_incompatible_msg(the_repository,
1568                                                        reason));
1569
1570         if (!strcmp(subcmd, "start"))
1571                 return !!try_to_start_background_daemon();
1572
1573         if (!strcmp(subcmd, "run"))
1574                 return !!try_to_run_foreground_daemon(detach_console);
1575
1576         if (!strcmp(subcmd, "stop"))
1577                 return !!do_as_client__send_stop();
1578
1579         if (!strcmp(subcmd, "status"))
1580                 return !!do_as_client__status();
1581
1582         die(_("Unhandled subcommand '%s'"), subcmd);
1583 }
1584
1585 #else
1586 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix UNUSED)
1587 {
1588         struct option options[] = {
1589                 OPT_END()
1590         };
1591
1592         if (argc == 2 && !strcmp(argv[1], "-h"))
1593                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1594
1595         die(_("fsmonitor--daemon not supported on this platform"));
1596 }
1597 #endif