builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "config.h"
   3 #include "parse-options.h"
   4 #include "fsmonitor.h"
   5 #include "fsmonitor-ipc.h"
   6 #include "fsmonitor-path-utils.h"
   7 #include "compat/fsmonitor/fsm-health.h"
   8 #include "compat/fsmonitor/fsm-listen.h"
   9 #include "fsmonitor--daemon.h"
  10 #include "simple-ipc.h"
  11 #include "khash.h"
  12 #include "pkt-line.h"
  13
  14 static const char * const builtin_fsmonitor__daemon_usage[] = {
  15         N_("git fsmonitor--daemon start [<options>]"),
  16         N_("git fsmonitor--daemon run [<options>]"),
  17         "git fsmonitor--daemon stop",
  18         "git fsmonitor--daemon status",
  19         NULL
  20 };
  21
  22 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  23 /*
  24  * Global state loaded from config.
  25  */
  26 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  27 static int fsmonitor__ipc_threads = 8;
  28
  29 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  30 static int fsmonitor__start_timeout_sec = 60;
  31
  32 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  33 static int fsmonitor__announce_startup = 0;
  34
  35 static int fsmonitor_config(const char *var, const char *value, void *cb)
  36 {
  37         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  38                 int i = git_config_int(var, value);
  39                 if (i < 1)
  40                         return error(_("value of '%s' out of range: %d"),
  41                                      FSMONITOR__IPC_THREADS, i);
  42                 fsmonitor__ipc_threads = i;
  43                 return 0;
  44         }
  45
  46         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  47                 int i = git_config_int(var, value);
  48                 if (i < 0)
  49                         return error(_("value of '%s' out of range: %d"),
  50                                      FSMONITOR__START_TIMEOUT, i);
  51                 fsmonitor__start_timeout_sec = i;
  52                 return 0;
  53         }
  54
  55         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  56                 int is_bool;
  57                 int i = git_config_bool_or_int(var, value, &is_bool);
  58                 if (i < 0)
  59                         return error(_("value of '%s' not bool or int: %d"),
  60                                      var, i);
  61                 fsmonitor__announce_startup = i;
  62                 return 0;
  63         }
  64
  65         return git_default_config(var, value, cb);
  66 }
  67
  68 /*
  69  * Acting as a CLIENT.
  70  *
  71  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  72  * and wait for it to shutdown.
  73  */
  74 static int do_as_client__send_stop(void)
  75 {
  76         struct strbuf answer = STRBUF_INIT;
  77         int ret;
  78
  79         ret = fsmonitor_ipc__send_command("quit", &answer);
  80
  81         /* The quit command does not return any response data. */
  82         strbuf_release(&answer);
  83
  84         if (ret)
  85                 return ret;
  86
  87         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  88         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  89                 sleep_millisec(50);
  90         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  91
  92         return 0;
  93 }
  94
  95 static int do_as_client__status(void)
  96 {
  97         enum ipc_active_state state = fsmonitor_ipc__get_state();
  98
  99         switch (state) {
 100         case IPC_STATE__LISTENING:
 101                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 102                        the_repository->worktree);
 103                 return 0;
 104
 105         default:
 106                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 107                        the_repository->worktree);
 108                 return 1;
 109         }
 110 }
 111
 112 enum fsmonitor_cookie_item_result {
 113         FCIR_ERROR = -1, /* could not create cookie file ? */
 114         FCIR_INIT,
 115         FCIR_SEEN,
 116         FCIR_ABORT,
 117 };
 118
 119 struct fsmonitor_cookie_item {
 120         struct hashmap_entry entry;
 121         char *name;
 122         enum fsmonitor_cookie_item_result result;
 123 };
 124
 125 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 126                      const struct hashmap_entry *he2, const void *keydata)
 127 {
 128         const struct fsmonitor_cookie_item *a =
 129                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 130         const struct fsmonitor_cookie_item *b =
 131                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 132
 133         return strcmp(a->name, keydata ? keydata : b->name);
 134 }
 135
 136 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 137         struct fsmonitor_daemon_state *state)
 138 {
 139         /* assert current thread holding state->main_lock */
 140
 141         int fd;
 142         struct fsmonitor_cookie_item *cookie;
 143         struct strbuf cookie_pathname = STRBUF_INIT;
 144         struct strbuf cookie_filename = STRBUF_INIT;
 145         enum fsmonitor_cookie_item_result result;
 146         int my_cookie_seq;
 147
 148         CALLOC_ARRAY(cookie, 1);
 149
 150         my_cookie_seq = state->cookie_seq++;
 151
 152         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 153
 154         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 155         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 156
 157         cookie->name = strbuf_detach(&cookie_filename, NULL);
 158         cookie->result = FCIR_INIT;
 159         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 160
 161         hashmap_add(&state->cookies, &cookie->entry);
 162
 163         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 164                          cookie->name, cookie_pathname.buf);
 165
 166         /*
 167          * Create the cookie file on disk and then wait for a notification
 168          * that the listener thread has seen it.
 169          */
 170         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 171         if (fd < 0) {
 172                 error_errno(_("could not create fsmonitor cookie '%s'"),
 173                             cookie->name);
 174
 175                 cookie->result = FCIR_ERROR;
 176                 goto done;
 177         }
 178
 179         /*
 180          * Technically, close() and unlink() can fail, but we don't
 181          * care here.  We only created the file to trigger a watch
 182          * event from the FS to know that when we're up to date.
 183          */
 184         close(fd);
 185         unlink(cookie_pathname.buf);
 186
 187         /*
 188          * Technically, this is an infinite wait (well, unless another
 189          * thread sends us an abort).  I'd like to change this to
 190          * use `pthread_cond_timedwait()` and return an error/timeout
 191          * and let the caller do the trivial response thing, but we
 192          * don't have that routine in our thread-utils.
 193          *
 194          * After extensive beta testing I'm not really worried about
 195          * this.  Also note that the above open() and unlink() calls
 196          * will cause at least two FS events on that path, so the odds
 197          * of getting stuck are pretty slim.
 198          */
 199         while (cookie->result == FCIR_INIT)
 200                 pthread_cond_wait(&state->cookies_cond,
 201                                   &state->main_lock);
 202
 203 done:
 204         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 205
 206         result = cookie->result;
 207
 208         free(cookie->name);
 209         free(cookie);
 210         strbuf_release(&cookie_pathname);
 211
 212         return result;
 213 }
 214
 215 /*
 216  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 217  */
 218 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 219                                          const struct string_list *cookie_names)
 220 {
 221         /* assert current thread holding state->main_lock */
 222
 223         int k;
 224         int nr_seen = 0;
 225
 226         for (k = 0; k < cookie_names->nr; k++) {
 227                 struct fsmonitor_cookie_item key;
 228                 struct fsmonitor_cookie_item *cookie;
 229
 230                 key.name = cookie_names->items[k].string;
 231                 hashmap_entry_init(&key.entry, strhash(key.name));
 232
 233                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 234                 if (cookie) {
 235                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 236                                          cookie->name);
 237                         cookie->result = FCIR_SEEN;
 238                         nr_seen++;
 239                 }
 240         }
 241
 242         if (nr_seen)
 243                 pthread_cond_broadcast(&state->cookies_cond);
 244 }
 245
 246 /*
 247  * Set _ABORT on all pending cookies and wake up all client threads.
 248  */
 249 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 250 {
 251         /* assert current thread holding state->main_lock */
 252
 253         struct hashmap_iter iter;
 254         struct fsmonitor_cookie_item *cookie;
 255         int nr_aborted = 0;
 256
 257         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 258                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 259                                  cookie->name);
 260                 cookie->result = FCIR_ABORT;
 261                 nr_aborted++;
 262         }
 263
 264         if (nr_aborted)
 265                 pthread_cond_broadcast(&state->cookies_cond);
 266 }
 267
 268 /*
 269  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 270  * "token" as a virtual timestamp.  Clients can request a summary of all
 271  * created/deleted/modified files relative to a token.  In the response,
 272  * clients receive a new token for the next (relative) request.
 273  *
 274  *
 275  * Token Format
 276  * ============
 277  *
 278  * The contents of the token are private and provider-specific.
 279  *
 280  * For the built-in fsmonitor--daemon, we define a token as follows:
 281  *
 282  *     "builtin" ":" <token_id> ":" <sequence_nr>
 283  *
 284  * The "builtin" prefix is used as a namespace to avoid conflicts
 285  * with other providers (such as Watchman).
 286  *
 287  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 288  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 289  * events that happened while the daemon was monitoring (and in-sync
 290  * with the filesystem).
 291  *
 292  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 293  *     and does not define less-than/greater-than relationships.
 294  *     (There are too many race conditions to rely on file system
 295  *     event timestamps.)
 296  *
 297  * The <sequence_nr> is a simple integer incremented whenever the
 298  * daemon needs to make its state public.  For example, if 1000 file
 299  * system events come in, but no clients have requested the data,
 300  * the daemon can continue to accumulate file changes in the same
 301  * bin and does not need to advance the sequence number.  However,
 302  * as soon as a client does arrive, the daemon needs to start a new
 303  * bin and increment the sequence number.
 304  *
 305  *     The sequence number serves as the boundary between 2 sets
 306  *     of bins -- the older ones that the client has already seen
 307  *     and the newer ones that it hasn't.
 308  *
 309  * When a new <token_id> is created, the <sequence_nr> is reset to
 310  * zero.
 311  *
 312  *
 313  * About Token Ids
 314  * ===============
 315  *
 316  * A new token_id is created:
 317  *
 318  * [1] each time the daemon is started.
 319  *
 320  * [2] any time that the daemon must re-sync with the filesystem
 321  *     (such as when the kernel drops or we miss events on a very
 322  *     active volume).
 323  *
 324  * [3] in response to a client "flush" command (for dropped event
 325  *     testing).
 326  *
 327  * When a new token_id is created, the daemon is free to discard all
 328  * cached filesystem events associated with any previous token_ids.
 329  * Events associated with a non-current token_id will never be sent
 330  * to a client.  A token_id change implicitly means that the daemon
 331  * has gap in its event history.
 332  *
 333  * Therefore, clients that present a token with a stale (non-current)
 334  * token_id will always be given a trivial response.
 335  */
 336 struct fsmonitor_token_data {
 337         struct strbuf token_id;
 338         struct fsmonitor_batch *batch_head;
 339         struct fsmonitor_batch *batch_tail;
 340         uint64_t client_ref_count;
 341 };
 342
 343 struct fsmonitor_batch {
 344         struct fsmonitor_batch *next;
 345         uint64_t batch_seq_nr;
 346         const char **interned_paths;
 347         size_t nr, alloc;
 348         time_t pinned_time;
 349 };
 350
 351 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 352 {
 353         static int test_env_value = -1;
 354         static uint64_t flush_count = 0;
 355         struct fsmonitor_token_data *token;
 356         struct fsmonitor_batch *batch;
 357
 358         CALLOC_ARRAY(token, 1);
 359         batch = fsmonitor_batch__new();
 360
 361         strbuf_init(&token->token_id, 0);
 362         token->batch_head = batch;
 363         token->batch_tail = batch;
 364         token->client_ref_count = 0;
 365
 366         if (test_env_value < 0)
 367                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 368
 369         if (!test_env_value) {
 370                 struct timeval tv;
 371                 struct tm tm;
 372                 time_t secs;
 373
 374                 gettimeofday(&tv, NULL);
 375                 secs = tv.tv_sec;
 376                 gmtime_r(&secs, &tm);
 377
 378                 strbuf_addf(&token->token_id,
 379                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 380                             flush_count++,
 381                             getpid(),
 382                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 383                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 384                             (long)tv.tv_usec);
 385         } else {
 386                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 387         }
 388
 389         /*
 390          * We created a new <token_id> and are starting a new series
 391          * of tokens with a zero <seq_nr>.
 392          *
 393          * Since clients cannot guess our new (non test) <token_id>
 394          * they will always receive a trivial response (because of the
 395          * mismatch on the <token_id>).  The trivial response will
 396          * tell them our new <token_id> so that subsequent requests
 397          * will be relative to our new series.  (And when sending that
 398          * response, we pin the current head of the batch list.)
 399          *
 400          * Even if the client correctly guesses the <token_id>, their
 401          * request of "builtin:<token_id>:0" asks for all changes MORE
 402          * RECENT than batch/bin 0.
 403          *
 404          * This implies that it is a waste to accumulate paths in the
 405          * initial batch/bin (because they will never be transmitted).
 406          *
 407          * So the daemon could be running for days and watching the
 408          * file system, but doesn't need to actually accumulate any
 409          * paths UNTIL we need to set a reference point for a later
 410          * relative request.
 411          *
 412          * However, it is very useful for testing to always have a
 413          * reference point set.  Pin batch 0 to force early file system
 414          * events to accumulate.
 415          */
 416         if (test_env_value)
 417                 batch->pinned_time = time(NULL);
 418
 419         return token;
 420 }
 421
 422 struct fsmonitor_batch *fsmonitor_batch__new(void)
 423 {
 424         struct fsmonitor_batch *batch;
 425
 426         CALLOC_ARRAY(batch, 1);
 427
 428         return batch;
 429 }
 430
 431 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 432 {
 433         while (batch) {
 434                 struct fsmonitor_batch *next = batch->next;
 435
 436                 /*
 437                  * The actual strings within the array of this batch
 438                  * are interned, so we don't own them.  We only own
 439                  * the array.
 440                  */
 441                 free(batch->interned_paths);
 442                 free(batch);
 443
 444                 batch = next;
 445         }
 446 }
 447
 448 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 449                                const char *path)
 450 {
 451         const char *interned_path = strintern(path);
 452
 453         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 454
 455         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 456         batch->interned_paths[batch->nr++] = interned_path;
 457 }
 458
 459 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 460                                      const struct fsmonitor_batch *batch_src)
 461 {
 462         size_t k;
 463
 464         ALLOC_GROW(batch_dest->interned_paths,
 465                    batch_dest->nr + batch_src->nr + 1,
 466                    batch_dest->alloc);
 467
 468         for (k = 0; k < batch_src->nr; k++)
 469                 batch_dest->interned_paths[batch_dest->nr++] =
 470                         batch_src->interned_paths[k];
 471 }
 472
 473 /*
 474  * To keep the batch list from growing unbounded in response to filesystem
 475  * activity, we try to truncate old batches from the end of the list as
 476  * they become irrelevant.
 477  *
 478  * We assume that the .git/index will be updated with the most recent token
 479  * any time the index is updated.  And future commands will only ask for
 480  * recent changes *since* that new token.  So as tokens advance into the
 481  * future, older batch items will never be requested/needed.  So we can
 482  * truncate them without loss of functionality.
 483  *
 484  * However, multiple commands may be talking to the daemon concurrently
 485  * or perform a slow command, so a little "token skew" is possible.
 486  * Therefore, we want this to be a little bit lazy and have a generous
 487  * delay.
 488  *
 489  * The current reader thread walked backwards in time from `token->batch_head`
 490  * back to `batch_marker` somewhere in the middle of the batch list.
 491  *
 492  * Let's walk backwards in time from that marker an arbitrary delay
 493  * and truncate the list there.  Note that these timestamps are completely
 494  * artificial (based on when we pinned the batch item) and not on any
 495  * filesystem activity.
 496  *
 497  * Return the obsolete portion of the list after we have removed it from
 498  * the official list so that the caller can free it after leaving the lock.
 499  */
 500 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 501
 502 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 503         struct fsmonitor_daemon_state *state,
 504         const struct fsmonitor_batch *batch_marker)
 505 {
 506         /* assert current thread holding state->main_lock */
 507
 508         const struct fsmonitor_batch *batch;
 509         struct fsmonitor_batch *remainder;
 510
 511         if (!batch_marker)
 512                 return NULL;
 513
 514         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 515                          batch_marker->batch_seq_nr,
 516                          (uint64_t)batch_marker->pinned_time);
 517
 518         for (batch = batch_marker; batch; batch = batch->next) {
 519                 time_t t;
 520
 521                 if (!batch->pinned_time) /* an overflow batch */
 522                         continue;
 523
 524                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 525                 if (t > batch_marker->pinned_time) /* too close to marker */
 526                         continue;
 527
 528                 goto truncate_past_here;
 529         }
 530
 531         return NULL;
 532
 533 truncate_past_here:
 534         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 535
 536         remainder = ((struct fsmonitor_batch *)batch)->next;
 537         ((struct fsmonitor_batch *)batch)->next = NULL;
 538
 539         return remainder;
 540 }
 541
 542 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 543 {
 544         if (!token)
 545                 return;
 546
 547         assert(token->client_ref_count == 0);
 548
 549         strbuf_release(&token->token_id);
 550
 551         fsmonitor_batch__free_list(token->batch_head);
 552
 553         free(token);
 554 }
 555
 556 /*
 557  * Flush all of our cached data about the filesystem.  Call this if we
 558  * lose sync with the filesystem and miss some notification events.
 559  *
 560  * [1] If we are missing events, then we no longer have a complete
 561  *     history of the directory (relative to our current start token).
 562  *     We should create a new token and start fresh (as if we just
 563  *     booted up).
 564  *
 565  * [2] Some of those lost events may have been for cookie files.  We
 566  *     should assume the worst and abort them rather letting them starve.
 567  *
 568  * If there are no concurrent threads reading the current token data
 569  * series, we can free it now.  Otherwise, let the last reader free
 570  * it.
 571  *
 572  * Either way, the old token data series is no longer associated with
 573  * our state data.
 574  */
 575 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 576 {
 577         /* assert current thread holding state->main_lock */
 578
 579         struct fsmonitor_token_data *free_me = NULL;
 580         struct fsmonitor_token_data *new_one = NULL;
 581
 582         new_one = fsmonitor_new_token_data();
 583
 584         if (state->current_token_data->client_ref_count == 0)
 585                 free_me = state->current_token_data;
 586         state->current_token_data = new_one;
 587
 588         fsmonitor_free_token_data(free_me);
 589
 590         with_lock__abort_all_cookies(state);
 591 }
 592
 593 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 594 {
 595         pthread_mutex_lock(&state->main_lock);
 596         with_lock__do_force_resync(state);
 597         pthread_mutex_unlock(&state->main_lock);
 598 }
 599
 600 /*
 601  * Format an opaque token string to send to the client.
 602  */
 603 static void with_lock__format_response_token(
 604         struct strbuf *response_token,
 605         const struct strbuf *response_token_id,
 606         const struct fsmonitor_batch *batch)
 607 {
 608         /* assert current thread holding state->main_lock */
 609
 610         strbuf_reset(response_token);
 611         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 612                     response_token_id->buf, batch->batch_seq_nr);
 613 }
 614
 615 /*
 616  * Parse an opaque token from the client.
 617  * Returns -1 on error.
 618  */
 619 static int fsmonitor_parse_client_token(const char *buf_token,
 620                                         struct strbuf *requested_token_id,
 621                                         uint64_t *seq_nr)
 622 {
 623         const char *p;
 624         char *p_end;
 625
 626         strbuf_reset(requested_token_id);
 627         *seq_nr = 0;
 628
 629         if (!skip_prefix(buf_token, "builtin:", &p))
 630                 return -1;
 631
 632         while (*p && *p != ':')
 633                 strbuf_addch(requested_token_id, *p++);
 634         if (!*p++)
 635                 return -1;
 636
 637         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 638         if (*p_end)
 639                 return -1;
 640
 641         return 0;
 642 }
 643
 644 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 645
 646 static int do_handle_client(struct fsmonitor_daemon_state *state,
 647                             const char *command,
 648                             ipc_server_reply_cb *reply,
 649                             struct ipc_server_reply_data *reply_data)
 650 {
 651         struct fsmonitor_token_data *token_data = NULL;
 652         struct strbuf response_token = STRBUF_INIT;
 653         struct strbuf requested_token_id = STRBUF_INIT;
 654         struct strbuf payload = STRBUF_INIT;
 655         uint64_t requested_oldest_seq_nr = 0;
 656         uint64_t total_response_len = 0;
 657         const char *p;
 658         const struct fsmonitor_batch *batch_head;
 659         const struct fsmonitor_batch *batch;
 660         struct fsmonitor_batch *remainder = NULL;
 661         intmax_t count = 0, duplicates = 0;
 662         kh_str_t *shown;
 663         int hash_ret;
 664         int do_trivial = 0;
 665         int do_flush = 0;
 666         int do_cookie = 0;
 667         enum fsmonitor_cookie_item_result cookie_result;
 668
 669         /*
 670          * We expect `command` to be of the form:
 671          *
 672          * <command> := quit NUL
 673          *            | flush NUL
 674          *            | <V1-time-since-epoch-ns> NUL
 675          *            | <V2-opaque-fsmonitor-token> NUL
 676          */
 677
 678         if (!strcmp(command, "quit")) {
 679                 /*
 680                  * A client has requested over the socket/pipe that the
 681                  * daemon shutdown.
 682                  *
 683                  * Tell the IPC thread pool to shutdown (which completes
 684                  * the await in the main thread (which can stop the
 685                  * fsmonitor listener thread)).
 686                  *
 687                  * There is no reply to the client.
 688                  */
 689                 return SIMPLE_IPC_QUIT;
 690
 691         } else if (!strcmp(command, "flush")) {
 692                 /*
 693                  * Flush all of our cached data and generate a new token
 694                  * just like if we lost sync with the filesystem.
 695                  *
 696                  * Then send a trivial response using the new token.
 697                  */
 698                 do_flush = 1;
 699                 do_trivial = 1;
 700
 701         } else if (!skip_prefix(command, "builtin:", &p)) {
 702                 /* assume V1 timestamp or garbage */
 703
 704                 char *p_end;
 705
 706                 strtoumax(command, &p_end, 10);
 707                 trace_printf_key(&trace_fsmonitor,
 708                                  ((*p_end) ?
 709                                   "fsmonitor: invalid command line '%s'" :
 710                                   "fsmonitor: unsupported V1 protocol '%s'"),
 711                                  command);
 712                 do_trivial = 1;
 713
 714         } else {
 715                 /* We have "builtin:*" */
 716                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 717                                                  &requested_oldest_seq_nr)) {
 718                         trace_printf_key(&trace_fsmonitor,
 719                                          "fsmonitor: invalid V2 protocol token '%s'",
 720                                          command);
 721                         do_trivial = 1;
 722
 723                 } else {
 724                         /*
 725                          * We have a V2 valid token:
 726                          *     "builtin:<token_id>:<seq_nr>"
 727                          */
 728                         do_cookie = 1;
 729                 }
 730         }
 731
 732         pthread_mutex_lock(&state->main_lock);
 733
 734         if (!state->current_token_data)
 735                 BUG("fsmonitor state does not have a current token");
 736
 737         /*
 738          * Write a cookie file inside the directory being watched in
 739          * an effort to flush out existing filesystem events that we
 740          * actually care about.  Suspend this client thread until we
 741          * see the filesystem events for this cookie file.
 742          *
 743          * Creating the cookie lets us guarantee that our FS listener
 744          * thread has drained the kernel queue and we are caught up
 745          * with the kernel.
 746          *
 747          * If we cannot create the cookie (or otherwise guarantee that
 748          * we are caught up), we send a trivial response.  We have to
 749          * assume that there might be some very, very recent activity
 750          * on the FS still in flight.
 751          */
 752         if (do_cookie) {
 753                 cookie_result = with_lock__wait_for_cookie(state);
 754                 if (cookie_result != FCIR_SEEN) {
 755                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 756                               cookie_result);
 757                         do_trivial = 1;
 758                 }
 759         }
 760
 761         if (do_flush)
 762                 with_lock__do_force_resync(state);
 763
 764         /*
 765          * We mark the current head of the batch list as "pinned" so
 766          * that the listener thread will treat this item as read-only
 767          * (and prevent any more paths from being added to it) from
 768          * now on.
 769          */
 770         token_data = state->current_token_data;
 771         batch_head = token_data->batch_head;
 772         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 773
 774         /*
 775          * FSMonitor Protocol V2 requires that we send a response header
 776          * with a "new current token" and then all of the paths that changed
 777          * since the "requested token".  We send the seq_nr of the just-pinned
 778          * head batch so that future requests from a client will be relative
 779          * to it.
 780          */
 781         with_lock__format_response_token(&response_token,
 782                                          &token_data->token_id, batch_head);
 783
 784         reply(reply_data, response_token.buf, response_token.len + 1);
 785         total_response_len += response_token.len + 1;
 786
 787         trace2_data_string("fsmonitor", the_repository, "response/token",
 788                            response_token.buf);
 789         trace_printf_key(&trace_fsmonitor, "response token: %s",
 790                          response_token.buf);
 791
 792         if (!do_trivial) {
 793                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 794                         /*
 795                          * The client last spoke to a different daemon
 796                          * instance -OR- the daemon had to resync with
 797                          * the filesystem (and lost events), so reject.
 798                          */
 799                         trace2_data_string("fsmonitor", the_repository,
 800                                            "response/token", "different");
 801                         do_trivial = 1;
 802
 803                 } else if (requested_oldest_seq_nr <
 804                            token_data->batch_tail->batch_seq_nr) {
 805                         /*
 806                          * The client wants older events than we have for
 807                          * this token_id.  This means that the end of our
 808                          * batch list was truncated and we cannot give the
 809                          * client a complete snapshot relative to their
 810                          * request.
 811                          */
 812                         trace_printf_key(&trace_fsmonitor,
 813                                          "client requested truncated data");
 814                         do_trivial = 1;
 815                 }
 816         }
 817
 818         if (do_trivial) {
 819                 pthread_mutex_unlock(&state->main_lock);
 820
 821                 reply(reply_data, "/", 2);
 822
 823                 trace2_data_intmax("fsmonitor", the_repository,
 824                                    "response/trivial", 1);
 825
 826                 goto cleanup;
 827         }
 828
 829         /*
 830          * We're going to hold onto a pointer to the current
 831          * token-data while we walk the list of batches of files.
 832          * During this time, we will NOT be under the lock.
 833          * So we ref-count it.
 834          *
 835          * This allows the listener thread to continue prepending
 836          * new batches of items to the token-data (which we'll ignore).
 837          *
 838          * AND it allows the listener thread to do a token-reset
 839          * (and install a new `current_token_data`).
 840          */
 841         token_data->client_ref_count++;
 842
 843         pthread_mutex_unlock(&state->main_lock);
 844
 845         /*
 846          * The client request is relative to the token that they sent,
 847          * so walk the batch list backwards from the current head back
 848          * to the batch (sequence number) they named.
 849          *
 850          * We use khash to de-dup the list of pathnames.
 851          *
 852          * NEEDSWORK: each batch contains a list of interned strings,
 853          * so we only need to do pointer comparisons here to build the
 854          * hash table.  Currently, we're still comparing the string
 855          * values.
 856          */
 857         shown = kh_init_str();
 858         for (batch = batch_head;
 859              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 860              batch = batch->next) {
 861                 size_t k;
 862
 863                 for (k = 0; k < batch->nr; k++) {
 864                         const char *s = batch->interned_paths[k];
 865                         size_t s_len;
 866
 867                         if (kh_get_str(shown, s) != kh_end(shown))
 868                                 duplicates++;
 869                         else {
 870                                 kh_put_str(shown, s, &hash_ret);
 871
 872                                 trace_printf_key(&trace_fsmonitor,
 873                                                  "send[%"PRIuMAX"]: %s",
 874                                                  count, s);
 875
 876                                 /* Each path gets written with a trailing NUL */
 877                                 s_len = strlen(s) + 1;
 878
 879                                 if (payload.len + s_len >=
 880                                     LARGE_PACKET_DATA_MAX) {
 881                                         reply(reply_data, payload.buf,
 882                                               payload.len);
 883                                         total_response_len += payload.len;
 884                                         strbuf_reset(&payload);
 885                                 }
 886
 887                                 strbuf_add(&payload, s, s_len);
 888                                 count++;
 889                         }
 890                 }
 891         }
 892
 893         if (payload.len) {
 894                 reply(reply_data, payload.buf, payload.len);
 895                 total_response_len += payload.len;
 896         }
 897
 898         kh_release_str(shown);
 899
 900         pthread_mutex_lock(&state->main_lock);
 901
 902         if (token_data->client_ref_count > 0)
 903                 token_data->client_ref_count--;
 904
 905         if (token_data->client_ref_count == 0) {
 906                 if (token_data != state->current_token_data) {
 907                         /*
 908                          * The listener thread did a token-reset while we were
 909                          * walking the batch list.  Therefore, this token is
 910                          * stale and can be discarded completely.  If we are
 911                          * the last reader thread using this token, we own
 912                          * that work.
 913                          */
 914                         fsmonitor_free_token_data(token_data);
 915                 } else if (batch) {
 916                         /*
 917                          * We are holding the lock and are the only
 918                          * reader of the ref-counted portion of the
 919                          * list, so we get the honor of seeing if the
 920                          * list can be truncated to save memory.
 921                          *
 922                          * The main loop did not walk to the end of the
 923                          * list, so this batch is the first item in the
 924                          * batch-list that is older than the requested
 925                          * end-point sequence number.  See if the tail
 926                          * end of the list is obsolete.
 927                          */
 928                         remainder = with_lock__truncate_old_batches(state,
 929                                                                     batch);
 930                 }
 931         }
 932
 933         pthread_mutex_unlock(&state->main_lock);
 934
 935         if (remainder)
 936                 fsmonitor_batch__free_list(remainder);
 937
 938         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 939         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 940         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 941
 942 cleanup:
 943         strbuf_release(&response_token);
 944         strbuf_release(&requested_token_id);
 945         strbuf_release(&payload);
 946
 947         return 0;
 948 }
 949
 950 static ipc_server_application_cb handle_client;
 951
 952 static int handle_client(void *data,
 953                          const char *command, size_t command_len,
 954                          ipc_server_reply_cb *reply,
 955                          struct ipc_server_reply_data *reply_data)
 956 {
 957         struct fsmonitor_daemon_state *state = data;
 958         int result;
 959
 960         /*
 961          * The Simple IPC API now supports {char*, len} arguments, but
 962          * FSMonitor always uses proper null-terminated strings, so
 963          * we can ignore the command_len argument.  (Trust, but verify.)
 964          */
 965         if (command_len != strlen(command))
 966                 BUG("FSMonitor assumes text messages");
 967
 968         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 969
 970         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 971         trace2_data_string("fsmonitor", the_repository, "request", command);
 972
 973         result = do_handle_client(state, command, reply, reply_data);
 974
 975         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 976
 977         return result;
 978 }
 979
 980 #define FSMONITOR_DIR           "fsmonitor--daemon"
 981 #define FSMONITOR_COOKIE_DIR    "cookies"
 982 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 983
 984 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 985         const char *rel)
 986 {
 987         if (fspathncmp(rel, ".git", 4))
 988                 return IS_WORKDIR_PATH;
 989         rel += 4;
 990
 991         if (!*rel)
 992                 return IS_DOT_GIT;
 993         if (*rel != '/')
 994                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
 995         rel++;
 996
 997         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
 998                         strlen(FSMONITOR_COOKIE_PREFIX)))
 999                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1000
1001         return IS_INSIDE_DOT_GIT;
1002 }
1003
1004 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1005         const char *rel)
1006 {
1007         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1008                         strlen(FSMONITOR_COOKIE_PREFIX)))
1009                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1010
1011         return IS_INSIDE_GITDIR;
1012 }
1013
1014 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1015         struct fsmonitor_daemon_state *state,
1016         const char *path)
1017 {
1018         const char *rel;
1019
1020         if (fspathncmp(path, state->path_worktree_watch.buf,
1021                        state->path_worktree_watch.len))
1022                 return IS_OUTSIDE_CONE;
1023
1024         rel = path + state->path_worktree_watch.len;
1025
1026         if (!*rel)
1027                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1028         if (*rel != '/')
1029                 return IS_OUTSIDE_CONE;
1030         rel++;
1031
1032         return fsmonitor_classify_path_workdir_relative(rel);
1033 }
1034
1035 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1036         struct fsmonitor_daemon_state *state,
1037         const char *path)
1038 {
1039         const char *rel;
1040         enum fsmonitor_path_type t;
1041
1042         t = try_classify_workdir_abs_path(state, path);
1043         if (state->nr_paths_watching == 1)
1044                 return t;
1045         if (t != IS_OUTSIDE_CONE)
1046                 return t;
1047
1048         if (fspathncmp(path, state->path_gitdir_watch.buf,
1049                        state->path_gitdir_watch.len))
1050                 return IS_OUTSIDE_CONE;
1051
1052         rel = path + state->path_gitdir_watch.len;
1053
1054         if (!*rel)
1055                 return IS_GITDIR; /* it is the <gitdir> exactly */
1056         if (*rel != '/')
1057                 return IS_OUTSIDE_CONE;
1058         rel++;
1059
1060         return fsmonitor_classify_path_gitdir_relative(rel);
1061 }
1062
1063 /*
1064  * We try to combine small batches at the front of the batch-list to avoid
1065  * having a long list.  This hopefully makes it a little easier when we want
1066  * to truncate and maintain the list.  However, we don't want the paths array
1067  * to just keep growing and growing with realloc, so we insert an arbitrary
1068  * limit.
1069  */
1070 #define MY_COMBINE_LIMIT (1024)
1071
1072 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1073                        struct fsmonitor_batch *batch,
1074                        const struct string_list *cookie_names)
1075 {
1076         if (!batch && !cookie_names->nr)
1077                 return;
1078
1079         pthread_mutex_lock(&state->main_lock);
1080
1081         if (batch) {
1082                 struct fsmonitor_batch *head;
1083
1084                 head = state->current_token_data->batch_head;
1085                 if (!head) {
1086                         BUG("token does not have batch");
1087                 } else if (head->pinned_time) {
1088                         /*
1089                          * We cannot alter the current batch list
1090                          * because:
1091                          *
1092                          * [a] it is being transmitted to at least one
1093                          * client and the handle_client() thread has a
1094                          * ref-count, but not a lock on the batch list
1095                          * starting with this item.
1096                          *
1097                          * [b] it has been transmitted in the past to
1098                          * at least one client such that future
1099                          * requests are relative to this head batch.
1100                          *
1101                          * So, we can only prepend a new batch onto
1102                          * the front of the list.
1103                          */
1104                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1105                         batch->next = head;
1106                         state->current_token_data->batch_head = batch;
1107                 } else if (!head->batch_seq_nr) {
1108                         /*
1109                          * Batch 0 is unpinned.  See the note in
1110                          * `fsmonitor_new_token_data()` about why we
1111                          * don't need to accumulate these paths.
1112                          */
1113                         fsmonitor_batch__free_list(batch);
1114                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1115                         /*
1116                          * The head batch in the list has never been
1117                          * transmitted to a client, but folding the
1118                          * contents of the new batch onto it would
1119                          * exceed our arbitrary limit, so just prepend
1120                          * the new batch onto the list.
1121                          */
1122                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1123                         batch->next = head;
1124                         state->current_token_data->batch_head = batch;
1125                 } else {
1126                         /*
1127                          * We are free to add the paths in the given
1128                          * batch onto the end of the current head batch.
1129                          */
1130                         fsmonitor_batch__combine(head, batch);
1131                         fsmonitor_batch__free_list(batch);
1132                 }
1133         }
1134
1135         if (cookie_names->nr)
1136                 with_lock__mark_cookies_seen(state, cookie_names);
1137
1138         pthread_mutex_unlock(&state->main_lock);
1139 }
1140
1141 static void *fsm_health__thread_proc(void *_state)
1142 {
1143         struct fsmonitor_daemon_state *state = _state;
1144
1145         trace2_thread_start("fsm-health");
1146
1147         fsm_health__loop(state);
1148
1149         trace2_thread_exit();
1150         return NULL;
1151 }
1152
1153 static void *fsm_listen__thread_proc(void *_state)
1154 {
1155         struct fsmonitor_daemon_state *state = _state;
1156
1157         trace2_thread_start("fsm-listen");
1158
1159         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1160                          state->path_worktree_watch.buf);
1161         if (state->nr_paths_watching > 1)
1162                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1163                                  state->path_gitdir_watch.buf);
1164
1165         fsm_listen__loop(state);
1166
1167         pthread_mutex_lock(&state->main_lock);
1168         if (state->current_token_data &&
1169             state->current_token_data->client_ref_count == 0)
1170                 fsmonitor_free_token_data(state->current_token_data);
1171         state->current_token_data = NULL;
1172         pthread_mutex_unlock(&state->main_lock);
1173
1174         trace2_thread_exit();
1175         return NULL;
1176 }
1177
1178 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1179 {
1180         struct ipc_server_opts ipc_opts = {
1181                 .nr_threads = fsmonitor__ipc_threads,
1182
1183                 /*
1184                  * We know that there are no other active threads yet,
1185                  * so we can let the IPC layer temporarily chdir() if
1186                  * it needs to when creating the server side of the
1187                  * Unix domain socket.
1188                  */
1189                 .uds_disallow_chdir = 0
1190         };
1191         int health_started = 0;
1192         int listener_started = 0;
1193         int err = 0;
1194
1195         /*
1196          * Start the IPC thread pool before the we've started the file
1197          * system event listener thread so that we have the IPC handle
1198          * before we need it.
1199          */
1200         if (ipc_server_run_async(&state->ipc_server_data,
1201                                  state->path_ipc.buf, &ipc_opts,
1202                                  handle_client, state))
1203                 return error_errno(
1204                         _("could not start IPC thread pool on '%s'"),
1205                         state->path_ipc.buf);
1206
1207         /*
1208          * Start the fsmonitor listener thread to collect filesystem
1209          * events.
1210          */
1211         if (pthread_create(&state->listener_thread, NULL,
1212                            fsm_listen__thread_proc, state) < 0) {
1213                 ipc_server_stop_async(state->ipc_server_data);
1214                 err = error(_("could not start fsmonitor listener thread"));
1215                 goto cleanup;
1216         }
1217         listener_started = 1;
1218
1219         /*
1220          * Start the health thread to watch over our process.
1221          */
1222         if (pthread_create(&state->health_thread, NULL,
1223                            fsm_health__thread_proc, state) < 0) {
1224                 ipc_server_stop_async(state->ipc_server_data);
1225                 err = error(_("could not start fsmonitor health thread"));
1226                 goto cleanup;
1227         }
1228         health_started = 1;
1229
1230         /*
1231          * The daemon is now fully functional in background threads.
1232          * Our primary thread should now just wait while the threads
1233          * do all the work.
1234          */
1235 cleanup:
1236         /*
1237          * Wait for the IPC thread pool to shutdown (whether by client
1238          * request, from filesystem activity, or an error).
1239          */
1240         ipc_server_await(state->ipc_server_data);
1241
1242         /*
1243          * The fsmonitor listener thread may have received a shutdown
1244          * event from the IPC thread pool, but it doesn't hurt to tell
1245          * it again.  And wait for it to shutdown.
1246          */
1247         if (listener_started) {
1248                 fsm_listen__stop_async(state);
1249                 pthread_join(state->listener_thread, NULL);
1250         }
1251
1252         if (health_started) {
1253                 fsm_health__stop_async(state);
1254                 pthread_join(state->health_thread, NULL);
1255         }
1256
1257         if (err)
1258                 return err;
1259         if (state->listen_error_code)
1260                 return state->listen_error_code;
1261         if (state->health_error_code)
1262                 return state->health_error_code;
1263         return 0;
1264 }
1265
1266 static int fsmonitor_run_daemon(void)
1267 {
1268         struct fsmonitor_daemon_state state;
1269         const char *home;
1270         int err;
1271
1272         memset(&state, 0, sizeof(state));
1273
1274         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1275         pthread_mutex_init(&state.main_lock, NULL);
1276         pthread_cond_init(&state.cookies_cond, NULL);
1277         state.listen_error_code = 0;
1278         state.health_error_code = 0;
1279         state.current_token_data = fsmonitor_new_token_data();
1280
1281         /* Prepare to (recursively) watch the <worktree-root> directory. */
1282         strbuf_init(&state.path_worktree_watch, 0);
1283         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1284         state.nr_paths_watching = 1;
1285
1286         strbuf_init(&state.alias.alias, 0);
1287         strbuf_init(&state.alias.points_to, 0);
1288         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1289                 goto done;
1290
1291         /*
1292          * We create and delete cookie files somewhere inside the .git
1293          * directory to help us keep sync with the file system.  If
1294          * ".git" is not a directory, then <gitdir> is not inside the
1295          * cone of <worktree-root>, so set up a second watch to watch
1296          * the <gitdir> so that we get events for the cookie files.
1297          */
1298         strbuf_init(&state.path_gitdir_watch, 0);
1299         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1300         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1301         if (!is_directory(state.path_gitdir_watch.buf)) {
1302                 strbuf_reset(&state.path_gitdir_watch);
1303                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1304                 state.nr_paths_watching = 2;
1305         }
1306
1307         /*
1308          * We will write filesystem syncing cookie files into
1309          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1310          *
1311          * The extra layers of subdirectories here keep us from
1312          * changing the mtime on ".git/" or ".git/foo/" when we create
1313          * or delete cookie files.
1314          *
1315          * There have been problems with some IDEs that do a
1316          * non-recursive watch of the ".git/" directory and run a
1317          * series of commands any time something happens.
1318          *
1319          * For example, if we place our cookie files directly in
1320          * ".git/" or ".git/foo/" then a `git status` (or similar
1321          * command) from the IDE will cause a cookie file to be
1322          * created in one of those dirs.  This causes the mtime of
1323          * those dirs to change.  This triggers the IDE's watch
1324          * notification.  This triggers the IDE to run those commands
1325          * again.  And the process repeats and the machine never goes
1326          * idle.
1327          *
1328          * Adding the extra layers of subdirectories prevents the
1329          * mtime of ".git/" and ".git/foo" from changing when a
1330          * cookie file is created.
1331          */
1332         strbuf_init(&state.path_cookie_prefix, 0);
1333         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1334
1335         strbuf_addch(&state.path_cookie_prefix, '/');
1336         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1337         mkdir(state.path_cookie_prefix.buf, 0777);
1338
1339         strbuf_addch(&state.path_cookie_prefix, '/');
1340         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1341         mkdir(state.path_cookie_prefix.buf, 0777);
1342
1343         strbuf_addch(&state.path_cookie_prefix, '/');
1344
1345         /*
1346          * We create a named-pipe or unix domain socket inside of the
1347          * ".git" directory.  (Well, on Windows, we base our named
1348          * pipe in the NPFS on the absolute path of the git
1349          * directory.)
1350          */
1351         strbuf_init(&state.path_ipc, 0);
1352         strbuf_addstr(&state.path_ipc,
1353                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1354
1355         /*
1356          * Confirm that we can create platform-specific resources for the
1357          * filesystem listener before we bother starting all the threads.
1358          */
1359         if (fsm_listen__ctor(&state)) {
1360                 err = error(_("could not initialize listener thread"));
1361                 goto done;
1362         }
1363
1364         if (fsm_health__ctor(&state)) {
1365                 err = error(_("could not initialize health thread"));
1366                 goto done;
1367         }
1368
1369         /*
1370          * CD out of the worktree root directory.
1371          *
1372          * The common Git startup mechanism causes our CWD to be the
1373          * root of the worktree.  On Windows, this causes our process
1374          * to hold a locked handle on the CWD.  This prevents the
1375          * worktree from being moved or deleted while the daemon is
1376          * running.
1377          *
1378          * We assume that our FS and IPC listener threads have either
1379          * opened all of the handles that they need or will do
1380          * everything using absolute paths.
1381          */
1382         home = getenv("HOME");
1383         if (home && *home && chdir(home))
1384                 die_errno(_("could not cd home '%s'"), home);
1385
1386         err = fsmonitor_run_daemon_1(&state);
1387
1388 done:
1389         pthread_cond_destroy(&state.cookies_cond);
1390         pthread_mutex_destroy(&state.main_lock);
1391         fsm_listen__dtor(&state);
1392         fsm_health__dtor(&state);
1393
1394         ipc_server_free(state.ipc_server_data);
1395
1396         strbuf_release(&state.path_worktree_watch);
1397         strbuf_release(&state.path_gitdir_watch);
1398         strbuf_release(&state.path_cookie_prefix);
1399         strbuf_release(&state.path_ipc);
1400         strbuf_release(&state.alias.alias);
1401         strbuf_release(&state.alias.points_to);
1402
1403         return err;
1404 }
1405
1406 static int try_to_run_foreground_daemon(int detach_console)
1407 {
1408         /*
1409          * Technically, we don't need to probe for an existing daemon
1410          * process, since we could just call `fsmonitor_run_daemon()`
1411          * and let it fail if the pipe/socket is busy.
1412          *
1413          * However, this method gives us a nicer error message for a
1414          * common error case.
1415          */
1416         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1417                 die(_("fsmonitor--daemon is already running '%s'"),
1418                     the_repository->worktree);
1419
1420         if (fsmonitor__announce_startup) {
1421                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1422                         the_repository->worktree);
1423                 fflush(stderr);
1424         }
1425
1426 #ifdef GIT_WINDOWS_NATIVE
1427         if (detach_console)
1428                 FreeConsole();
1429 #endif
1430
1431         return !!fsmonitor_run_daemon();
1432 }
1433
1434 static start_bg_wait_cb bg_wait_cb;
1435
1436 static int bg_wait_cb(const struct child_process *cp, void *cb_data)
1437 {
1438         enum ipc_active_state s = fsmonitor_ipc__get_state();
1439
1440         switch (s) {
1441         case IPC_STATE__LISTENING:
1442                 /* child is "ready" */
1443                 return 0;
1444
1445         case IPC_STATE__NOT_LISTENING:
1446         case IPC_STATE__PATH_NOT_FOUND:
1447                 /* give child more time */
1448                 return 1;
1449
1450         default:
1451         case IPC_STATE__INVALID_PATH:
1452         case IPC_STATE__OTHER_ERROR:
1453                 /* all the time in world won't help */
1454                 return -1;
1455         }
1456 }
1457
1458 static int try_to_start_background_daemon(void)
1459 {
1460         struct child_process cp = CHILD_PROCESS_INIT;
1461         enum start_bg_result sbgr;
1462
1463         /*
1464          * Before we try to create a background daemon process, see
1465          * if a daemon process is already listening.  This makes it
1466          * easier for us to report an already-listening error to the
1467          * console, since our spawn/daemon can only report the success
1468          * of creating the background process (and not whether it
1469          * immediately exited).
1470          */
1471         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1472                 die(_("fsmonitor--daemon is already running '%s'"),
1473                     the_repository->worktree);
1474
1475         if (fsmonitor__announce_startup) {
1476                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1477                         the_repository->worktree);
1478                 fflush(stderr);
1479         }
1480
1481         cp.git_cmd = 1;
1482
1483         strvec_push(&cp.args, "fsmonitor--daemon");
1484         strvec_push(&cp.args, "run");
1485         strvec_push(&cp.args, "--detach");
1486         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1487
1488         cp.no_stdin = 1;
1489         cp.no_stdout = 1;
1490         cp.no_stderr = 1;
1491
1492         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1493                                 fsmonitor__start_timeout_sec);
1494
1495         switch (sbgr) {
1496         case SBGR_READY:
1497                 return 0;
1498
1499         default:
1500         case SBGR_ERROR:
1501         case SBGR_CB_ERROR:
1502                 return error(_("daemon failed to start"));
1503
1504         case SBGR_TIMEOUT:
1505                 return error(_("daemon not online yet"));
1506
1507         case SBGR_DIED:
1508                 return error(_("daemon terminated"));
1509         }
1510 }
1511
1512 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1513 {
1514         const char *subcmd;
1515         enum fsmonitor_reason reason;
1516         int detach_console = 0;
1517
1518         struct option options[] = {
1519                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1520                 OPT_INTEGER(0, "ipc-threads",
1521                             &fsmonitor__ipc_threads,
1522                             N_("use <n> ipc worker threads")),
1523                 OPT_INTEGER(0, "start-timeout",
1524                             &fsmonitor__start_timeout_sec,
1525                             N_("max seconds to wait for background daemon startup")),
1526
1527                 OPT_END()
1528         };
1529
1530         git_config(fsmonitor_config, NULL);
1531
1532         argc = parse_options(argc, argv, prefix, options,
1533                              builtin_fsmonitor__daemon_usage, 0);
1534         if (argc != 1)
1535                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1536         subcmd = argv[0];
1537
1538         if (fsmonitor__ipc_threads < 1)
1539                 die(_("invalid 'ipc-threads' value (%d)"),
1540                     fsmonitor__ipc_threads);
1541
1542         prepare_repo_settings(the_repository);
1543         /*
1544          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1545          * (without bothering to load the `core.fsmonitor` config settings).
1546          *
1547          * If the repo is not compatible, the repo-settings will be set to
1548          * incompatible rather than IPC, so we can use one of the __get
1549          * routines to detect the discrepancy.
1550          */
1551         fsm_settings__set_ipc(the_repository);
1552
1553         reason = fsm_settings__get_reason(the_repository);
1554         if (reason > FSMONITOR_REASON_OK)
1555                 die("%s",
1556                     fsm_settings__get_incompatible_msg(the_repository,
1557                                                        reason));
1558
1559         if (!strcmp(subcmd, "start"))
1560                 return !!try_to_start_background_daemon();
1561
1562         if (!strcmp(subcmd, "run"))
1563                 return !!try_to_run_foreground_daemon(detach_console);
1564
1565         if (!strcmp(subcmd, "stop"))
1566                 return !!do_as_client__send_stop();
1567
1568         if (!strcmp(subcmd, "status"))
1569                 return !!do_as_client__status();
1570
1571         die(_("Unhandled subcommand '%s'"), subcmd);
1572 }
1573
1574 #else
1575 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1576 {
1577         struct option options[] = {
1578                 OPT_END()
1579         };
1580
1581         if (argc == 2 && !strcmp(argv[1], "-h"))
1582                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1583
1584         die(_("fsmonitor--daemon not supported on this platform"));
1585 }
1586 #endif