builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "abspath.h"
   3 #include "config.h"
   4 #include "dir.h"
   5 #include "environment.h"
   6 #include "gettext.h"
   7 #include "parse-options.h"
   8 #include "fsmonitor-ll.h"
   9 #include "fsmonitor-ipc.h"
  10 #include "fsmonitor-settings.h"
  11 #include "compat/fsmonitor/fsm-health.h"
  12 #include "compat/fsmonitor/fsm-listen.h"
  13 #include "fsmonitor--daemon.h"
  14 #include "repository.h"
  15 #include "simple-ipc.h"
  16 #include "khash.h"
  17 #include "run-command.h"
  18 #include "trace.h"
  19 #include "trace2.h"
  20
  21 static const char * const builtin_fsmonitor__daemon_usage[] = {
  22         N_("git fsmonitor--daemon start [<options>]"),
  23         N_("git fsmonitor--daemon run [<options>]"),
  24         "git fsmonitor--daemon stop",
  25         "git fsmonitor--daemon status",
  26         NULL
  27 };
  28
  29 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  30 /*
  31  * Global state loaded from config.
  32  */
  33 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  34 static int fsmonitor__ipc_threads = 8;
  35
  36 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  37 static int fsmonitor__start_timeout_sec = 60;
  38
  39 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  40 static int fsmonitor__announce_startup = 0;
  41
  42 static int fsmonitor_config(const char *var, const char *value,
  43                             const struct config_context *ctx, void *cb)
  44 {
  45         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  46                 int i = git_config_int(var, value, ctx->kvi);
  47                 if (i < 1)
  48                         return error(_("value of '%s' out of range: %d"),
  49                                      FSMONITOR__IPC_THREADS, i);
  50                 fsmonitor__ipc_threads = i;
  51                 return 0;
  52         }
  53
  54         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  55                 int i = git_config_int(var, value, ctx->kvi);
  56                 if (i < 0)
  57                         return error(_("value of '%s' out of range: %d"),
  58                                      FSMONITOR__START_TIMEOUT, i);
  59                 fsmonitor__start_timeout_sec = i;
  60                 return 0;
  61         }
  62
  63         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  64                 int is_bool;
  65                 int i = git_config_bool_or_int(var, value, ctx->kvi, &is_bool);
  66                 if (i < 0)
  67                         return error(_("value of '%s' not bool or int: %d"),
  68                                      var, i);
  69                 fsmonitor__announce_startup = i;
  70                 return 0;
  71         }
  72
  73         return git_default_config(var, value, ctx, cb);
  74 }
  75
  76 /*
  77  * Acting as a CLIENT.
  78  *
  79  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  80  * and wait for it to shutdown.
  81  */
  82 static int do_as_client__send_stop(void)
  83 {
  84         struct strbuf answer = STRBUF_INIT;
  85         int ret;
  86
  87         ret = fsmonitor_ipc__send_command("quit", &answer);
  88
  89         /* The quit command does not return any response data. */
  90         strbuf_release(&answer);
  91
  92         if (ret)
  93                 return ret;
  94
  95         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  96         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  97                 sleep_millisec(50);
  98         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  99
 100         return 0;
 101 }
 102
 103 static int do_as_client__status(void)
 104 {
 105         enum ipc_active_state state = fsmonitor_ipc__get_state();
 106
 107         switch (state) {
 108         case IPC_STATE__LISTENING:
 109                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 110                        the_repository->worktree);
 111                 return 0;
 112
 113         default:
 114                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 115                        the_repository->worktree);
 116                 return 1;
 117         }
 118 }
 119
 120 enum fsmonitor_cookie_item_result {
 121         FCIR_ERROR = -1, /* could not create cookie file ? */
 122         FCIR_INIT,
 123         FCIR_SEEN,
 124         FCIR_ABORT,
 125 };
 126
 127 struct fsmonitor_cookie_item {
 128         struct hashmap_entry entry;
 129         char *name;
 130         enum fsmonitor_cookie_item_result result;
 131 };
 132
 133 static int cookies_cmp(const void *data UNUSED,
 134                        const struct hashmap_entry *he1,
 135                        const struct hashmap_entry *he2, const void *keydata)
 136 {
 137         const struct fsmonitor_cookie_item *a =
 138                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 139         const struct fsmonitor_cookie_item *b =
 140                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 141
 142         return strcmp(a->name, keydata ? keydata : b->name);
 143 }
 144
 145 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 146         struct fsmonitor_daemon_state *state)
 147 {
 148         /* assert current thread holding state->main_lock */
 149
 150         int fd;
 151         struct fsmonitor_cookie_item *cookie;
 152         struct strbuf cookie_pathname = STRBUF_INIT;
 153         struct strbuf cookie_filename = STRBUF_INIT;
 154         enum fsmonitor_cookie_item_result result;
 155         int my_cookie_seq;
 156
 157         CALLOC_ARRAY(cookie, 1);
 158
 159         my_cookie_seq = state->cookie_seq++;
 160
 161         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 162
 163         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 164         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 165
 166         cookie->name = strbuf_detach(&cookie_filename, NULL);
 167         cookie->result = FCIR_INIT;
 168         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 169
 170         hashmap_add(&state->cookies, &cookie->entry);
 171
 172         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 173                          cookie->name, cookie_pathname.buf);
 174
 175         /*
 176          * Create the cookie file on disk and then wait for a notification
 177          * that the listener thread has seen it.
 178          */
 179         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 180         if (fd < 0) {
 181                 error_errno(_("could not create fsmonitor cookie '%s'"),
 182                             cookie->name);
 183
 184                 cookie->result = FCIR_ERROR;
 185                 goto done;
 186         }
 187
 188         /*
 189          * Technically, close() and unlink() can fail, but we don't
 190          * care here.  We only created the file to trigger a watch
 191          * event from the FS to know that when we're up to date.
 192          */
 193         close(fd);
 194         unlink(cookie_pathname.buf);
 195
 196         /*
 197          * Technically, this is an infinite wait (well, unless another
 198          * thread sends us an abort).  I'd like to change this to
 199          * use `pthread_cond_timedwait()` and return an error/timeout
 200          * and let the caller do the trivial response thing, but we
 201          * don't have that routine in our thread-utils.
 202          *
 203          * After extensive beta testing I'm not really worried about
 204          * this.  Also note that the above open() and unlink() calls
 205          * will cause at least two FS events on that path, so the odds
 206          * of getting stuck are pretty slim.
 207          */
 208         while (cookie->result == FCIR_INIT)
 209                 pthread_cond_wait(&state->cookies_cond,
 210                                   &state->main_lock);
 211
 212 done:
 213         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 214
 215         result = cookie->result;
 216
 217         free(cookie->name);
 218         free(cookie);
 219         strbuf_release(&cookie_pathname);
 220
 221         return result;
 222 }
 223
 224 /*
 225  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 226  */
 227 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 228                                          const struct string_list *cookie_names)
 229 {
 230         /* assert current thread holding state->main_lock */
 231
 232         int k;
 233         int nr_seen = 0;
 234
 235         for (k = 0; k < cookie_names->nr; k++) {
 236                 struct fsmonitor_cookie_item key;
 237                 struct fsmonitor_cookie_item *cookie;
 238
 239                 key.name = cookie_names->items[k].string;
 240                 hashmap_entry_init(&key.entry, strhash(key.name));
 241
 242                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 243                 if (cookie) {
 244                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 245                                          cookie->name);
 246                         cookie->result = FCIR_SEEN;
 247                         nr_seen++;
 248                 }
 249         }
 250
 251         if (nr_seen)
 252                 pthread_cond_broadcast(&state->cookies_cond);
 253 }
 254
 255 /*
 256  * Set _ABORT on all pending cookies and wake up all client threads.
 257  */
 258 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 259 {
 260         /* assert current thread holding state->main_lock */
 261
 262         struct hashmap_iter iter;
 263         struct fsmonitor_cookie_item *cookie;
 264         int nr_aborted = 0;
 265
 266         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 267                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 268                                  cookie->name);
 269                 cookie->result = FCIR_ABORT;
 270                 nr_aborted++;
 271         }
 272
 273         if (nr_aborted)
 274                 pthread_cond_broadcast(&state->cookies_cond);
 275 }
 276
 277 /*
 278  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 279  * "token" as a virtual timestamp.  Clients can request a summary of all
 280  * created/deleted/modified files relative to a token.  In the response,
 281  * clients receive a new token for the next (relative) request.
 282  *
 283  *
 284  * Token Format
 285  * ============
 286  *
 287  * The contents of the token are private and provider-specific.
 288  *
 289  * For the built-in fsmonitor--daemon, we define a token as follows:
 290  *
 291  *     "builtin" ":" <token_id> ":" <sequence_nr>
 292  *
 293  * The "builtin" prefix is used as a namespace to avoid conflicts
 294  * with other providers (such as Watchman).
 295  *
 296  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 297  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 298  * events that happened while the daemon was monitoring (and in-sync
 299  * with the filesystem).
 300  *
 301  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 302  *     and does not define less-than/greater-than relationships.
 303  *     (There are too many race conditions to rely on file system
 304  *     event timestamps.)
 305  *
 306  * The <sequence_nr> is a simple integer incremented whenever the
 307  * daemon needs to make its state public.  For example, if 1000 file
 308  * system events come in, but no clients have requested the data,
 309  * the daemon can continue to accumulate file changes in the same
 310  * bin and does not need to advance the sequence number.  However,
 311  * as soon as a client does arrive, the daemon needs to start a new
 312  * bin and increment the sequence number.
 313  *
 314  *     The sequence number serves as the boundary between 2 sets
 315  *     of bins -- the older ones that the client has already seen
 316  *     and the newer ones that it hasn't.
 317  *
 318  * When a new <token_id> is created, the <sequence_nr> is reset to
 319  * zero.
 320  *
 321  *
 322  * About Token Ids
 323  * ===============
 324  *
 325  * A new token_id is created:
 326  *
 327  * [1] each time the daemon is started.
 328  *
 329  * [2] any time that the daemon must re-sync with the filesystem
 330  *     (such as when the kernel drops or we miss events on a very
 331  *     active volume).
 332  *
 333  * [3] in response to a client "flush" command (for dropped event
 334  *     testing).
 335  *
 336  * When a new token_id is created, the daemon is free to discard all
 337  * cached filesystem events associated with any previous token_ids.
 338  * Events associated with a non-current token_id will never be sent
 339  * to a client.  A token_id change implicitly means that the daemon
 340  * has gap in its event history.
 341  *
 342  * Therefore, clients that present a token with a stale (non-current)
 343  * token_id will always be given a trivial response.
 344  */
 345 struct fsmonitor_token_data {
 346         struct strbuf token_id;
 347         struct fsmonitor_batch *batch_head;
 348         struct fsmonitor_batch *batch_tail;
 349         uint64_t client_ref_count;
 350 };
 351
 352 struct fsmonitor_batch {
 353         struct fsmonitor_batch *next;
 354         uint64_t batch_seq_nr;
 355         const char **interned_paths;
 356         size_t nr, alloc;
 357         time_t pinned_time;
 358 };
 359
 360 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 361 {
 362         static int test_env_value = -1;
 363         static uint64_t flush_count = 0;
 364         struct fsmonitor_token_data *token;
 365         struct fsmonitor_batch *batch;
 366
 367         CALLOC_ARRAY(token, 1);
 368         batch = fsmonitor_batch__new();
 369
 370         strbuf_init(&token->token_id, 0);
 371         token->batch_head = batch;
 372         token->batch_tail = batch;
 373         token->client_ref_count = 0;
 374
 375         if (test_env_value < 0)
 376                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 377
 378         if (!test_env_value) {
 379                 struct timeval tv;
 380                 struct tm tm;
 381                 time_t secs;
 382
 383                 gettimeofday(&tv, NULL);
 384                 secs = tv.tv_sec;
 385                 gmtime_r(&secs, &tm);
 386
 387                 strbuf_addf(&token->token_id,
 388                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 389                             flush_count++,
 390                             getpid(),
 391                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 392                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 393                             (long)tv.tv_usec);
 394         } else {
 395                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 396         }
 397
 398         /*
 399          * We created a new <token_id> and are starting a new series
 400          * of tokens with a zero <seq_nr>.
 401          *
 402          * Since clients cannot guess our new (non test) <token_id>
 403          * they will always receive a trivial response (because of the
 404          * mismatch on the <token_id>).  The trivial response will
 405          * tell them our new <token_id> so that subsequent requests
 406          * will be relative to our new series.  (And when sending that
 407          * response, we pin the current head of the batch list.)
 408          *
 409          * Even if the client correctly guesses the <token_id>, their
 410          * request of "builtin:<token_id>:0" asks for all changes MORE
 411          * RECENT than batch/bin 0.
 412          *
 413          * This implies that it is a waste to accumulate paths in the
 414          * initial batch/bin (because they will never be transmitted).
 415          *
 416          * So the daemon could be running for days and watching the
 417          * file system, but doesn't need to actually accumulate any
 418          * paths UNTIL we need to set a reference point for a later
 419          * relative request.
 420          *
 421          * However, it is very useful for testing to always have a
 422          * reference point set.  Pin batch 0 to force early file system
 423          * events to accumulate.
 424          */
 425         if (test_env_value)
 426                 batch->pinned_time = time(NULL);
 427
 428         return token;
 429 }
 430
 431 struct fsmonitor_batch *fsmonitor_batch__new(void)
 432 {
 433         struct fsmonitor_batch *batch;
 434
 435         CALLOC_ARRAY(batch, 1);
 436
 437         return batch;
 438 }
 439
 440 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 441 {
 442         while (batch) {
 443                 struct fsmonitor_batch *next = batch->next;
 444
 445                 /*
 446                  * The actual strings within the array of this batch
 447                  * are interned, so we don't own them.  We only own
 448                  * the array.
 449                  */
 450                 free(batch->interned_paths);
 451                 free(batch);
 452
 453                 batch = next;
 454         }
 455 }
 456
 457 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 458                                const char *path)
 459 {
 460         const char *interned_path = strintern(path);
 461
 462         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 463
 464         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 465         batch->interned_paths[batch->nr++] = interned_path;
 466 }
 467
 468 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 469                                      const struct fsmonitor_batch *batch_src)
 470 {
 471         size_t k;
 472
 473         ALLOC_GROW(batch_dest->interned_paths,
 474                    batch_dest->nr + batch_src->nr + 1,
 475                    batch_dest->alloc);
 476
 477         for (k = 0; k < batch_src->nr; k++)
 478                 batch_dest->interned_paths[batch_dest->nr++] =
 479                         batch_src->interned_paths[k];
 480 }
 481
 482 /*
 483  * To keep the batch list from growing unbounded in response to filesystem
 484  * activity, we try to truncate old batches from the end of the list as
 485  * they become irrelevant.
 486  *
 487  * We assume that the .git/index will be updated with the most recent token
 488  * any time the index is updated.  And future commands will only ask for
 489  * recent changes *since* that new token.  So as tokens advance into the
 490  * future, older batch items will never be requested/needed.  So we can
 491  * truncate them without loss of functionality.
 492  *
 493  * However, multiple commands may be talking to the daemon concurrently
 494  * or perform a slow command, so a little "token skew" is possible.
 495  * Therefore, we want this to be a little bit lazy and have a generous
 496  * delay.
 497  *
 498  * The current reader thread walked backwards in time from `token->batch_head`
 499  * back to `batch_marker` somewhere in the middle of the batch list.
 500  *
 501  * Let's walk backwards in time from that marker an arbitrary delay
 502  * and truncate the list there.  Note that these timestamps are completely
 503  * artificial (based on when we pinned the batch item) and not on any
 504  * filesystem activity.
 505  *
 506  * Return the obsolete portion of the list after we have removed it from
 507  * the official list so that the caller can free it after leaving the lock.
 508  */
 509 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 510
 511 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 512         struct fsmonitor_daemon_state *state,
 513         const struct fsmonitor_batch *batch_marker)
 514 {
 515         /* assert current thread holding state->main_lock */
 516
 517         const struct fsmonitor_batch *batch;
 518         struct fsmonitor_batch *remainder;
 519
 520         if (!batch_marker)
 521                 return NULL;
 522
 523         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 524                          batch_marker->batch_seq_nr,
 525                          (uint64_t)batch_marker->pinned_time);
 526
 527         for (batch = batch_marker; batch; batch = batch->next) {
 528                 time_t t;
 529
 530                 if (!batch->pinned_time) /* an overflow batch */
 531                         continue;
 532
 533                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 534                 if (t > batch_marker->pinned_time) /* too close to marker */
 535                         continue;
 536
 537                 goto truncate_past_here;
 538         }
 539
 540         return NULL;
 541
 542 truncate_past_here:
 543         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 544
 545         remainder = ((struct fsmonitor_batch *)batch)->next;
 546         ((struct fsmonitor_batch *)batch)->next = NULL;
 547
 548         return remainder;
 549 }
 550
 551 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 552 {
 553         if (!token)
 554                 return;
 555
 556         assert(token->client_ref_count == 0);
 557
 558         strbuf_release(&token->token_id);
 559
 560         fsmonitor_batch__free_list(token->batch_head);
 561
 562         free(token);
 563 }
 564
 565 /*
 566  * Flush all of our cached data about the filesystem.  Call this if we
 567  * lose sync with the filesystem and miss some notification events.
 568  *
 569  * [1] If we are missing events, then we no longer have a complete
 570  *     history of the directory (relative to our current start token).
 571  *     We should create a new token and start fresh (as if we just
 572  *     booted up).
 573  *
 574  * [2] Some of those lost events may have been for cookie files.  We
 575  *     should assume the worst and abort them rather letting them starve.
 576  *
 577  * If there are no concurrent threads reading the current token data
 578  * series, we can free it now.  Otherwise, let the last reader free
 579  * it.
 580  *
 581  * Either way, the old token data series is no longer associated with
 582  * our state data.
 583  */
 584 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 585 {
 586         /* assert current thread holding state->main_lock */
 587
 588         struct fsmonitor_token_data *free_me = NULL;
 589         struct fsmonitor_token_data *new_one = NULL;
 590
 591         new_one = fsmonitor_new_token_data();
 592
 593         if (state->current_token_data->client_ref_count == 0)
 594                 free_me = state->current_token_data;
 595         state->current_token_data = new_one;
 596
 597         fsmonitor_free_token_data(free_me);
 598
 599         with_lock__abort_all_cookies(state);
 600 }
 601
 602 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 603 {
 604         pthread_mutex_lock(&state->main_lock);
 605         with_lock__do_force_resync(state);
 606         pthread_mutex_unlock(&state->main_lock);
 607 }
 608
 609 /*
 610  * Format an opaque token string to send to the client.
 611  */
 612 static void with_lock__format_response_token(
 613         struct strbuf *response_token,
 614         const struct strbuf *response_token_id,
 615         const struct fsmonitor_batch *batch)
 616 {
 617         /* assert current thread holding state->main_lock */
 618
 619         strbuf_reset(response_token);
 620         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 621                     response_token_id->buf, batch->batch_seq_nr);
 622 }
 623
 624 /*
 625  * Parse an opaque token from the client.
 626  * Returns -1 on error.
 627  */
 628 static int fsmonitor_parse_client_token(const char *buf_token,
 629                                         struct strbuf *requested_token_id,
 630                                         uint64_t *seq_nr)
 631 {
 632         const char *p;
 633         char *p_end;
 634
 635         strbuf_reset(requested_token_id);
 636         *seq_nr = 0;
 637
 638         if (!skip_prefix(buf_token, "builtin:", &p))
 639                 return -1;
 640
 641         while (*p && *p != ':')
 642                 strbuf_addch(requested_token_id, *p++);
 643         if (!*p++)
 644                 return -1;
 645
 646         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 647         if (*p_end)
 648                 return -1;
 649
 650         return 0;
 651 }
 652
 653 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 654
 655 static int do_handle_client(struct fsmonitor_daemon_state *state,
 656                             const char *command,
 657                             ipc_server_reply_cb *reply,
 658                             struct ipc_server_reply_data *reply_data)
 659 {
 660         struct fsmonitor_token_data *token_data = NULL;
 661         struct strbuf response_token = STRBUF_INIT;
 662         struct strbuf requested_token_id = STRBUF_INIT;
 663         struct strbuf payload = STRBUF_INIT;
 664         uint64_t requested_oldest_seq_nr = 0;
 665         uint64_t total_response_len = 0;
 666         const char *p;
 667         const struct fsmonitor_batch *batch_head;
 668         const struct fsmonitor_batch *batch;
 669         struct fsmonitor_batch *remainder = NULL;
 670         intmax_t count = 0, duplicates = 0;
 671         kh_str_t *shown;
 672         int hash_ret;
 673         int do_trivial = 0;
 674         int do_flush = 0;
 675         int do_cookie = 0;
 676         enum fsmonitor_cookie_item_result cookie_result;
 677
 678         /*
 679          * We expect `command` to be of the form:
 680          *
 681          * <command> := quit NUL
 682          *            | flush NUL
 683          *            | <V1-time-since-epoch-ns> NUL
 684          *            | <V2-opaque-fsmonitor-token> NUL
 685          */
 686
 687         if (!strcmp(command, "quit")) {
 688                 /*
 689                  * A client has requested over the socket/pipe that the
 690                  * daemon shutdown.
 691                  *
 692                  * Tell the IPC thread pool to shutdown (which completes
 693                  * the await in the main thread (which can stop the
 694                  * fsmonitor listener thread)).
 695                  *
 696                  * There is no reply to the client.
 697                  */
 698                 return SIMPLE_IPC_QUIT;
 699
 700         } else if (!strcmp(command, "flush")) {
 701                 /*
 702                  * Flush all of our cached data and generate a new token
 703                  * just like if we lost sync with the filesystem.
 704                  *
 705                  * Then send a trivial response using the new token.
 706                  */
 707                 do_flush = 1;
 708                 do_trivial = 1;
 709
 710         } else if (!skip_prefix(command, "builtin:", &p)) {
 711                 /* assume V1 timestamp or garbage */
 712
 713                 char *p_end;
 714
 715                 strtoumax(command, &p_end, 10);
 716                 trace_printf_key(&trace_fsmonitor,
 717                                  ((*p_end) ?
 718                                   "fsmonitor: invalid command line '%s'" :
 719                                   "fsmonitor: unsupported V1 protocol '%s'"),
 720                                  command);
 721                 do_trivial = 1;
 722                 do_cookie = 1;
 723
 724         } else {
 725                 /* We have "builtin:*" */
 726                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 727                                                  &requested_oldest_seq_nr)) {
 728                         trace_printf_key(&trace_fsmonitor,
 729                                          "fsmonitor: invalid V2 protocol token '%s'",
 730                                          command);
 731                         do_trivial = 1;
 732                         do_cookie = 1;
 733
 734                 } else {
 735                         /*
 736                          * We have a V2 valid token:
 737                          *     "builtin:<token_id>:<seq_nr>"
 738                          */
 739                         do_cookie = 1;
 740                 }
 741         }
 742
 743         pthread_mutex_lock(&state->main_lock);
 744
 745         if (!state->current_token_data)
 746                 BUG("fsmonitor state does not have a current token");
 747
 748         /*
 749          * Write a cookie file inside the directory being watched in
 750          * an effort to flush out existing filesystem events that we
 751          * actually care about.  Suspend this client thread until we
 752          * see the filesystem events for this cookie file.
 753          *
 754          * Creating the cookie lets us guarantee that our FS listener
 755          * thread has drained the kernel queue and we are caught up
 756          * with the kernel.
 757          *
 758          * If we cannot create the cookie (or otherwise guarantee that
 759          * we are caught up), we send a trivial response.  We have to
 760          * assume that there might be some very, very recent activity
 761          * on the FS still in flight.
 762          */
 763         if (do_cookie) {
 764                 cookie_result = with_lock__wait_for_cookie(state);
 765                 if (cookie_result != FCIR_SEEN) {
 766                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 767                               cookie_result);
 768                         do_trivial = 1;
 769                 }
 770         }
 771
 772         if (do_flush)
 773                 with_lock__do_force_resync(state);
 774
 775         /*
 776          * We mark the current head of the batch list as "pinned" so
 777          * that the listener thread will treat this item as read-only
 778          * (and prevent any more paths from being added to it) from
 779          * now on.
 780          */
 781         token_data = state->current_token_data;
 782         batch_head = token_data->batch_head;
 783         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 784
 785         /*
 786          * FSMonitor Protocol V2 requires that we send a response header
 787          * with a "new current token" and then all of the paths that changed
 788          * since the "requested token".  We send the seq_nr of the just-pinned
 789          * head batch so that future requests from a client will be relative
 790          * to it.
 791          */
 792         with_lock__format_response_token(&response_token,
 793                                          &token_data->token_id, batch_head);
 794
 795         reply(reply_data, response_token.buf, response_token.len + 1);
 796         total_response_len += response_token.len + 1;
 797
 798         trace2_data_string("fsmonitor", the_repository, "response/token",
 799                            response_token.buf);
 800         trace_printf_key(&trace_fsmonitor, "response token: %s",
 801                          response_token.buf);
 802
 803         if (!do_trivial) {
 804                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 805                         /*
 806                          * The client last spoke to a different daemon
 807                          * instance -OR- the daemon had to resync with
 808                          * the filesystem (and lost events), so reject.
 809                          */
 810                         trace2_data_string("fsmonitor", the_repository,
 811                                            "response/token", "different");
 812                         do_trivial = 1;
 813
 814                 } else if (requested_oldest_seq_nr <
 815                            token_data->batch_tail->batch_seq_nr) {
 816                         /*
 817                          * The client wants older events than we have for
 818                          * this token_id.  This means that the end of our
 819                          * batch list was truncated and we cannot give the
 820                          * client a complete snapshot relative to their
 821                          * request.
 822                          */
 823                         trace_printf_key(&trace_fsmonitor,
 824                                          "client requested truncated data");
 825                         do_trivial = 1;
 826                 }
 827         }
 828
 829         if (do_trivial) {
 830                 pthread_mutex_unlock(&state->main_lock);
 831
 832                 reply(reply_data, "/", 2);
 833
 834                 trace2_data_intmax("fsmonitor", the_repository,
 835                                    "response/trivial", 1);
 836
 837                 goto cleanup;
 838         }
 839
 840         /*
 841          * We're going to hold onto a pointer to the current
 842          * token-data while we walk the list of batches of files.
 843          * During this time, we will NOT be under the lock.
 844          * So we ref-count it.
 845          *
 846          * This allows the listener thread to continue prepending
 847          * new batches of items to the token-data (which we'll ignore).
 848          *
 849          * AND it allows the listener thread to do a token-reset
 850          * (and install a new `current_token_data`).
 851          */
 852         token_data->client_ref_count++;
 853
 854         pthread_mutex_unlock(&state->main_lock);
 855
 856         /*
 857          * The client request is relative to the token that they sent,
 858          * so walk the batch list backwards from the current head back
 859          * to the batch (sequence number) they named.
 860          *
 861          * We use khash to de-dup the list of pathnames.
 862          *
 863          * NEEDSWORK: each batch contains a list of interned strings,
 864          * so we only need to do pointer comparisons here to build the
 865          * hash table.  Currently, we're still comparing the string
 866          * values.
 867          */
 868         shown = kh_init_str();
 869         for (batch = batch_head;
 870              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 871              batch = batch->next) {
 872                 size_t k;
 873
 874                 for (k = 0; k < batch->nr; k++) {
 875                         const char *s = batch->interned_paths[k];
 876                         size_t s_len;
 877
 878                         if (kh_get_str(shown, s) != kh_end(shown))
 879                                 duplicates++;
 880                         else {
 881                                 kh_put_str(shown, s, &hash_ret);
 882
 883                                 trace_printf_key(&trace_fsmonitor,
 884                                                  "send[%"PRIuMAX"]: %s",
 885                                                  count, s);
 886
 887                                 /* Each path gets written with a trailing NUL */
 888                                 s_len = strlen(s) + 1;
 889
 890                                 if (payload.len + s_len >=
 891                                     LARGE_PACKET_DATA_MAX) {
 892                                         reply(reply_data, payload.buf,
 893                                               payload.len);
 894                                         total_response_len += payload.len;
 895                                         strbuf_reset(&payload);
 896                                 }
 897
 898                                 strbuf_add(&payload, s, s_len);
 899                                 count++;
 900                         }
 901                 }
 902         }
 903
 904         if (payload.len) {
 905                 reply(reply_data, payload.buf, payload.len);
 906                 total_response_len += payload.len;
 907         }
 908
 909         kh_release_str(shown);
 910
 911         pthread_mutex_lock(&state->main_lock);
 912
 913         if (token_data->client_ref_count > 0)
 914                 token_data->client_ref_count--;
 915
 916         if (token_data->client_ref_count == 0) {
 917                 if (token_data != state->current_token_data) {
 918                         /*
 919                          * The listener thread did a token-reset while we were
 920                          * walking the batch list.  Therefore, this token is
 921                          * stale and can be discarded completely.  If we are
 922                          * the last reader thread using this token, we own
 923                          * that work.
 924                          */
 925                         fsmonitor_free_token_data(token_data);
 926                 } else if (batch) {
 927                         /*
 928                          * We are holding the lock and are the only
 929                          * reader of the ref-counted portion of the
 930                          * list, so we get the honor of seeing if the
 931                          * list can be truncated to save memory.
 932                          *
 933                          * The main loop did not walk to the end of the
 934                          * list, so this batch is the first item in the
 935                          * batch-list that is older than the requested
 936                          * end-point sequence number.  See if the tail
 937                          * end of the list is obsolete.
 938                          */
 939                         remainder = with_lock__truncate_old_batches(state,
 940                                                                     batch);
 941                 }
 942         }
 943
 944         pthread_mutex_unlock(&state->main_lock);
 945
 946         if (remainder)
 947                 fsmonitor_batch__free_list(remainder);
 948
 949         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 950         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 951         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 952
 953 cleanup:
 954         strbuf_release(&response_token);
 955         strbuf_release(&requested_token_id);
 956         strbuf_release(&payload);
 957
 958         return 0;
 959 }
 960
 961 static ipc_server_application_cb handle_client;
 962
 963 static int handle_client(void *data,
 964                          const char *command, size_t command_len,
 965                          ipc_server_reply_cb *reply,
 966                          struct ipc_server_reply_data *reply_data)
 967 {
 968         struct fsmonitor_daemon_state *state = data;
 969         int result;
 970
 971         /*
 972          * The Simple IPC API now supports {char*, len} arguments, but
 973          * FSMonitor always uses proper null-terminated strings, so
 974          * we can ignore the command_len argument.  (Trust, but verify.)
 975          */
 976         if (command_len != strlen(command))
 977                 BUG("FSMonitor assumes text messages");
 978
 979         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 980
 981         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 982         trace2_data_string("fsmonitor", the_repository, "request", command);
 983
 984         result = do_handle_client(state, command, reply, reply_data);
 985
 986         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 987
 988         return result;
 989 }
 990
 991 #define FSMONITOR_DIR           "fsmonitor--daemon"
 992 #define FSMONITOR_COOKIE_DIR    "cookies"
 993 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 994
 995 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 996         const char *rel)
 997 {
 998         if (fspathncmp(rel, ".git", 4))
 999                 return IS_WORKDIR_PATH;
1000         rel += 4;
1001
1002         if (!*rel)
1003                 return IS_DOT_GIT;
1004         if (*rel != '/')
1005                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
1006         rel++;
1007
1008         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1009                         strlen(FSMONITOR_COOKIE_PREFIX)))
1010                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1011
1012         return IS_INSIDE_DOT_GIT;
1013 }
1014
1015 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1016         const char *rel)
1017 {
1018         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1019                         strlen(FSMONITOR_COOKIE_PREFIX)))
1020                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1021
1022         return IS_INSIDE_GITDIR;
1023 }
1024
1025 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1026         struct fsmonitor_daemon_state *state,
1027         const char *path)
1028 {
1029         const char *rel;
1030
1031         if (fspathncmp(path, state->path_worktree_watch.buf,
1032                        state->path_worktree_watch.len))
1033                 return IS_OUTSIDE_CONE;
1034
1035         rel = path + state->path_worktree_watch.len;
1036
1037         if (!*rel)
1038                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1039         if (*rel != '/')
1040                 return IS_OUTSIDE_CONE;
1041         rel++;
1042
1043         return fsmonitor_classify_path_workdir_relative(rel);
1044 }
1045
1046 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1047         struct fsmonitor_daemon_state *state,
1048         const char *path)
1049 {
1050         const char *rel;
1051         enum fsmonitor_path_type t;
1052
1053         t = try_classify_workdir_abs_path(state, path);
1054         if (state->nr_paths_watching == 1)
1055                 return t;
1056         if (t != IS_OUTSIDE_CONE)
1057                 return t;
1058
1059         if (fspathncmp(path, state->path_gitdir_watch.buf,
1060                        state->path_gitdir_watch.len))
1061                 return IS_OUTSIDE_CONE;
1062
1063         rel = path + state->path_gitdir_watch.len;
1064
1065         if (!*rel)
1066                 return IS_GITDIR; /* it is the <gitdir> exactly */
1067         if (*rel != '/')
1068                 return IS_OUTSIDE_CONE;
1069         rel++;
1070
1071         return fsmonitor_classify_path_gitdir_relative(rel);
1072 }
1073
1074 /*
1075  * We try to combine small batches at the front of the batch-list to avoid
1076  * having a long list.  This hopefully makes it a little easier when we want
1077  * to truncate and maintain the list.  However, we don't want the paths array
1078  * to just keep growing and growing with realloc, so we insert an arbitrary
1079  * limit.
1080  */
1081 #define MY_COMBINE_LIMIT (1024)
1082
1083 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1084                        struct fsmonitor_batch *batch,
1085                        const struct string_list *cookie_names)
1086 {
1087         if (!batch && !cookie_names->nr)
1088                 return;
1089
1090         pthread_mutex_lock(&state->main_lock);
1091
1092         if (batch) {
1093                 struct fsmonitor_batch *head;
1094
1095                 head = state->current_token_data->batch_head;
1096                 if (!head) {
1097                         BUG("token does not have batch");
1098                 } else if (head->pinned_time) {
1099                         /*
1100                          * We cannot alter the current batch list
1101                          * because:
1102                          *
1103                          * [a] it is being transmitted to at least one
1104                          * client and the handle_client() thread has a
1105                          * ref-count, but not a lock on the batch list
1106                          * starting with this item.
1107                          *
1108                          * [b] it has been transmitted in the past to
1109                          * at least one client such that future
1110                          * requests are relative to this head batch.
1111                          *
1112                          * So, we can only prepend a new batch onto
1113                          * the front of the list.
1114                          */
1115                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1116                         batch->next = head;
1117                         state->current_token_data->batch_head = batch;
1118                 } else if (!head->batch_seq_nr) {
1119                         /*
1120                          * Batch 0 is unpinned.  See the note in
1121                          * `fsmonitor_new_token_data()` about why we
1122                          * don't need to accumulate these paths.
1123                          */
1124                         fsmonitor_batch__free_list(batch);
1125                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1126                         /*
1127                          * The head batch in the list has never been
1128                          * transmitted to a client, but folding the
1129                          * contents of the new batch onto it would
1130                          * exceed our arbitrary limit, so just prepend
1131                          * the new batch onto the list.
1132                          */
1133                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1134                         batch->next = head;
1135                         state->current_token_data->batch_head = batch;
1136                 } else {
1137                         /*
1138                          * We are free to add the paths in the given
1139                          * batch onto the end of the current head batch.
1140                          */
1141                         fsmonitor_batch__combine(head, batch);
1142                         fsmonitor_batch__free_list(batch);
1143                 }
1144         }
1145
1146         if (cookie_names->nr)
1147                 with_lock__mark_cookies_seen(state, cookie_names);
1148
1149         pthread_mutex_unlock(&state->main_lock);
1150 }
1151
1152 static void *fsm_health__thread_proc(void *_state)
1153 {
1154         struct fsmonitor_daemon_state *state = _state;
1155
1156         trace2_thread_start("fsm-health");
1157
1158         fsm_health__loop(state);
1159
1160         trace2_thread_exit();
1161         return NULL;
1162 }
1163
1164 static void *fsm_listen__thread_proc(void *_state)
1165 {
1166         struct fsmonitor_daemon_state *state = _state;
1167
1168         trace2_thread_start("fsm-listen");
1169
1170         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1171                          state->path_worktree_watch.buf);
1172         if (state->nr_paths_watching > 1)
1173                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1174                                  state->path_gitdir_watch.buf);
1175
1176         fsm_listen__loop(state);
1177
1178         pthread_mutex_lock(&state->main_lock);
1179         if (state->current_token_data &&
1180             state->current_token_data->client_ref_count == 0)
1181                 fsmonitor_free_token_data(state->current_token_data);
1182         state->current_token_data = NULL;
1183         pthread_mutex_unlock(&state->main_lock);
1184
1185         trace2_thread_exit();
1186         return NULL;
1187 }
1188
1189 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1190 {
1191         struct ipc_server_opts ipc_opts = {
1192                 .nr_threads = fsmonitor__ipc_threads,
1193
1194                 /*
1195                  * We know that there are no other active threads yet,
1196                  * so we can let the IPC layer temporarily chdir() if
1197                  * it needs to when creating the server side of the
1198                  * Unix domain socket.
1199                  */
1200                 .uds_disallow_chdir = 0
1201         };
1202         int health_started = 0;
1203         int listener_started = 0;
1204         int err = 0;
1205
1206         /*
1207          * Start the IPC thread pool before the we've started the file
1208          * system event listener thread so that we have the IPC handle
1209          * before we need it.
1210          */
1211         if (ipc_server_run_async(&state->ipc_server_data,
1212                                  state->path_ipc.buf, &ipc_opts,
1213                                  handle_client, state))
1214                 return error_errno(
1215                         _("could not start IPC thread pool on '%s'"),
1216                         state->path_ipc.buf);
1217
1218         /*
1219          * Start the fsmonitor listener thread to collect filesystem
1220          * events.
1221          */
1222         if (pthread_create(&state->listener_thread, NULL,
1223                            fsm_listen__thread_proc, state)) {
1224                 ipc_server_stop_async(state->ipc_server_data);
1225                 err = error(_("could not start fsmonitor listener thread"));
1226                 goto cleanup;
1227         }
1228         listener_started = 1;
1229
1230         /*
1231          * Start the health thread to watch over our process.
1232          */
1233         if (pthread_create(&state->health_thread, NULL,
1234                            fsm_health__thread_proc, state)) {
1235                 ipc_server_stop_async(state->ipc_server_data);
1236                 err = error(_("could not start fsmonitor health thread"));
1237                 goto cleanup;
1238         }
1239         health_started = 1;
1240
1241         /*
1242          * The daemon is now fully functional in background threads.
1243          * Our primary thread should now just wait while the threads
1244          * do all the work.
1245          */
1246 cleanup:
1247         /*
1248          * Wait for the IPC thread pool to shutdown (whether by client
1249          * request, from filesystem activity, or an error).
1250          */
1251         ipc_server_await(state->ipc_server_data);
1252
1253         /*
1254          * The fsmonitor listener thread may have received a shutdown
1255          * event from the IPC thread pool, but it doesn't hurt to tell
1256          * it again.  And wait for it to shutdown.
1257          */
1258         if (listener_started) {
1259                 fsm_listen__stop_async(state);
1260                 pthread_join(state->listener_thread, NULL);
1261         }
1262
1263         if (health_started) {
1264                 fsm_health__stop_async(state);
1265                 pthread_join(state->health_thread, NULL);
1266         }
1267
1268         if (err)
1269                 return err;
1270         if (state->listen_error_code)
1271                 return state->listen_error_code;
1272         if (state->health_error_code)
1273                 return state->health_error_code;
1274         return 0;
1275 }
1276
1277 static int fsmonitor_run_daemon(void)
1278 {
1279         struct fsmonitor_daemon_state state;
1280         const char *home;
1281         int err;
1282
1283         memset(&state, 0, sizeof(state));
1284
1285         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1286         pthread_mutex_init(&state.main_lock, NULL);
1287         pthread_cond_init(&state.cookies_cond, NULL);
1288         state.listen_error_code = 0;
1289         state.health_error_code = 0;
1290         state.current_token_data = fsmonitor_new_token_data();
1291
1292         /* Prepare to (recursively) watch the <worktree-root> directory. */
1293         strbuf_init(&state.path_worktree_watch, 0);
1294         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1295         state.nr_paths_watching = 1;
1296
1297         strbuf_init(&state.alias.alias, 0);
1298         strbuf_init(&state.alias.points_to, 0);
1299         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1300                 goto done;
1301
1302         /*
1303          * We create and delete cookie files somewhere inside the .git
1304          * directory to help us keep sync with the file system.  If
1305          * ".git" is not a directory, then <gitdir> is not inside the
1306          * cone of <worktree-root>, so set up a second watch to watch
1307          * the <gitdir> so that we get events for the cookie files.
1308          */
1309         strbuf_init(&state.path_gitdir_watch, 0);
1310         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1311         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1312         if (!is_directory(state.path_gitdir_watch.buf)) {
1313                 strbuf_reset(&state.path_gitdir_watch);
1314                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1315                 state.nr_paths_watching = 2;
1316         }
1317
1318         /*
1319          * We will write filesystem syncing cookie files into
1320          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1321          *
1322          * The extra layers of subdirectories here keep us from
1323          * changing the mtime on ".git/" or ".git/foo/" when we create
1324          * or delete cookie files.
1325          *
1326          * There have been problems with some IDEs that do a
1327          * non-recursive watch of the ".git/" directory and run a
1328          * series of commands any time something happens.
1329          *
1330          * For example, if we place our cookie files directly in
1331          * ".git/" or ".git/foo/" then a `git status` (or similar
1332          * command) from the IDE will cause a cookie file to be
1333          * created in one of those dirs.  This causes the mtime of
1334          * those dirs to change.  This triggers the IDE's watch
1335          * notification.  This triggers the IDE to run those commands
1336          * again.  And the process repeats and the machine never goes
1337          * idle.
1338          *
1339          * Adding the extra layers of subdirectories prevents the
1340          * mtime of ".git/" and ".git/foo" from changing when a
1341          * cookie file is created.
1342          */
1343         strbuf_init(&state.path_cookie_prefix, 0);
1344         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1345
1346         strbuf_addch(&state.path_cookie_prefix, '/');
1347         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1348         mkdir(state.path_cookie_prefix.buf, 0777);
1349
1350         strbuf_addch(&state.path_cookie_prefix, '/');
1351         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1352         mkdir(state.path_cookie_prefix.buf, 0777);
1353
1354         strbuf_addch(&state.path_cookie_prefix, '/');
1355
1356         /*
1357          * We create a named-pipe or unix domain socket inside of the
1358          * ".git" directory.  (Well, on Windows, we base our named
1359          * pipe in the NPFS on the absolute path of the git
1360          * directory.)
1361          */
1362         strbuf_init(&state.path_ipc, 0);
1363         strbuf_addstr(&state.path_ipc,
1364                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1365
1366         /*
1367          * Confirm that we can create platform-specific resources for the
1368          * filesystem listener before we bother starting all the threads.
1369          */
1370         if (fsm_listen__ctor(&state)) {
1371                 err = error(_("could not initialize listener thread"));
1372                 goto done;
1373         }
1374
1375         if (fsm_health__ctor(&state)) {
1376                 err = error(_("could not initialize health thread"));
1377                 goto done;
1378         }
1379
1380         /*
1381          * CD out of the worktree root directory.
1382          *
1383          * The common Git startup mechanism causes our CWD to be the
1384          * root of the worktree.  On Windows, this causes our process
1385          * to hold a locked handle on the CWD.  This prevents the
1386          * worktree from being moved or deleted while the daemon is
1387          * running.
1388          *
1389          * We assume that our FS and IPC listener threads have either
1390          * opened all of the handles that they need or will do
1391          * everything using absolute paths.
1392          */
1393         home = getenv("HOME");
1394         if (home && *home && chdir(home))
1395                 die_errno(_("could not cd home '%s'"), home);
1396
1397         err = fsmonitor_run_daemon_1(&state);
1398
1399 done:
1400         pthread_cond_destroy(&state.cookies_cond);
1401         pthread_mutex_destroy(&state.main_lock);
1402         fsm_listen__dtor(&state);
1403         fsm_health__dtor(&state);
1404
1405         ipc_server_free(state.ipc_server_data);
1406
1407         strbuf_release(&state.path_worktree_watch);
1408         strbuf_release(&state.path_gitdir_watch);
1409         strbuf_release(&state.path_cookie_prefix);
1410         strbuf_release(&state.path_ipc);
1411         strbuf_release(&state.alias.alias);
1412         strbuf_release(&state.alias.points_to);
1413
1414         return err;
1415 }
1416
1417 static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED)
1418 {
1419         /*
1420          * Technically, we don't need to probe for an existing daemon
1421          * process, since we could just call `fsmonitor_run_daemon()`
1422          * and let it fail if the pipe/socket is busy.
1423          *
1424          * However, this method gives us a nicer error message for a
1425          * common error case.
1426          */
1427         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1428                 die(_("fsmonitor--daemon is already running '%s'"),
1429                     the_repository->worktree);
1430
1431         if (fsmonitor__announce_startup) {
1432                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1433                         the_repository->worktree);
1434                 fflush(stderr);
1435         }
1436
1437 #ifdef GIT_WINDOWS_NATIVE
1438         if (detach_console)
1439                 FreeConsole();
1440 #endif
1441
1442         return !!fsmonitor_run_daemon();
1443 }
1444
1445 static start_bg_wait_cb bg_wait_cb;
1446
1447 static int bg_wait_cb(const struct child_process *cp UNUSED,
1448                       void *cb_data UNUSED)
1449 {
1450         enum ipc_active_state s = fsmonitor_ipc__get_state();
1451
1452         switch (s) {
1453         case IPC_STATE__LISTENING:
1454                 /* child is "ready" */
1455                 return 0;
1456
1457         case IPC_STATE__NOT_LISTENING:
1458         case IPC_STATE__PATH_NOT_FOUND:
1459                 /* give child more time */
1460                 return 1;
1461
1462         default:
1463         case IPC_STATE__INVALID_PATH:
1464         case IPC_STATE__OTHER_ERROR:
1465                 /* all the time in world won't help */
1466                 return -1;
1467         }
1468 }
1469
1470 static int try_to_start_background_daemon(void)
1471 {
1472         struct child_process cp = CHILD_PROCESS_INIT;
1473         enum start_bg_result sbgr;
1474
1475         /*
1476          * Before we try to create a background daemon process, see
1477          * if a daemon process is already listening.  This makes it
1478          * easier for us to report an already-listening error to the
1479          * console, since our spawn/daemon can only report the success
1480          * of creating the background process (and not whether it
1481          * immediately exited).
1482          */
1483         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1484                 die(_("fsmonitor--daemon is already running '%s'"),
1485                     the_repository->worktree);
1486
1487         if (fsmonitor__announce_startup) {
1488                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1489                         the_repository->worktree);
1490                 fflush(stderr);
1491         }
1492
1493         cp.git_cmd = 1;
1494
1495         strvec_push(&cp.args, "fsmonitor--daemon");
1496         strvec_push(&cp.args, "run");
1497         strvec_push(&cp.args, "--detach");
1498         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1499
1500         cp.no_stdin = 1;
1501         cp.no_stdout = 1;
1502         cp.no_stderr = 1;
1503
1504         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1505                                 fsmonitor__start_timeout_sec);
1506
1507         switch (sbgr) {
1508         case SBGR_READY:
1509                 return 0;
1510
1511         default:
1512         case SBGR_ERROR:
1513         case SBGR_CB_ERROR:
1514                 return error(_("daemon failed to start"));
1515
1516         case SBGR_TIMEOUT:
1517                 return error(_("daemon not online yet"));
1518
1519         case SBGR_DIED:
1520                 return error(_("daemon terminated"));
1521         }
1522 }
1523
1524 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1525 {
1526         const char *subcmd;
1527         enum fsmonitor_reason reason;
1528         int detach_console = 0;
1529
1530         struct option options[] = {
1531                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1532                 OPT_INTEGER(0, "ipc-threads",
1533                             &fsmonitor__ipc_threads,
1534                             N_("use <n> ipc worker threads")),
1535                 OPT_INTEGER(0, "start-timeout",
1536                             &fsmonitor__start_timeout_sec,
1537                             N_("max seconds to wait for background daemon startup")),
1538
1539                 OPT_END()
1540         };
1541
1542         git_config(fsmonitor_config, NULL);
1543
1544         argc = parse_options(argc, argv, prefix, options,
1545                              builtin_fsmonitor__daemon_usage, 0);
1546         if (argc != 1)
1547                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1548         subcmd = argv[0];
1549
1550         if (fsmonitor__ipc_threads < 1)
1551                 die(_("invalid 'ipc-threads' value (%d)"),
1552                     fsmonitor__ipc_threads);
1553
1554         prepare_repo_settings(the_repository);
1555         /*
1556          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1557          * (without bothering to load the `core.fsmonitor` config settings).
1558          *
1559          * If the repo is not compatible, the repo-settings will be set to
1560          * incompatible rather than IPC, so we can use one of the __get
1561          * routines to detect the discrepancy.
1562          */
1563         fsm_settings__set_ipc(the_repository);
1564
1565         reason = fsm_settings__get_reason(the_repository);
1566         if (reason > FSMONITOR_REASON_OK)
1567                 die("%s",
1568                     fsm_settings__get_incompatible_msg(the_repository,
1569                                                        reason));
1570
1571         if (!strcmp(subcmd, "start"))
1572                 return !!try_to_start_background_daemon();
1573
1574         if (!strcmp(subcmd, "run"))
1575                 return !!try_to_run_foreground_daemon(detach_console);
1576
1577         if (!strcmp(subcmd, "stop"))
1578                 return !!do_as_client__send_stop();
1579
1580         if (!strcmp(subcmd, "status"))
1581                 return !!do_as_client__status();
1582
1583         die(_("Unhandled subcommand '%s'"), subcmd);
1584 }
1585
1586 #else
1587 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix UNUSED)
1588 {
1589         struct option options[] = {
1590                 OPT_END()
1591         };
1592
1593         if (argc == 2 && !strcmp(argv[1], "-h"))
1594                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1595
1596         die(_("fsmonitor--daemon not supported on this platform"));
1597 }
1598 #endif