builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "abspath.h"
   3 #include "config.h"
   4 #include "environment.h"
   5 #include "gettext.h"
   6 #include "parse-options.h"
   7 #include "fsmonitor-ll.h"
   8 #include "fsmonitor-ipc.h"
   9 #include "fsmonitor-path-utils.h"
  10 #include "fsmonitor-settings.h"
  11 #include "compat/fsmonitor/fsm-health.h"
  12 #include "compat/fsmonitor/fsm-listen.h"
  13 #include "fsmonitor--daemon.h"
  14 #include "simple-ipc.h"
  15 #include "khash.h"
  16 #include "pkt-line.h"
  17 #include "trace.h"
  18 #include "trace2.h"
  19
  20 static const char * const builtin_fsmonitor__daemon_usage[] = {
  21         N_("git fsmonitor--daemon start [<options>]"),
  22         N_("git fsmonitor--daemon run [<options>]"),
  23         "git fsmonitor--daemon stop",
  24         "git fsmonitor--daemon status",
  25         NULL
  26 };
  27
  28 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  29 /*
  30  * Global state loaded from config.
  31  */
  32 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  33 static int fsmonitor__ipc_threads = 8;
  34
  35 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  36 static int fsmonitor__start_timeout_sec = 60;
  37
  38 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  39 static int fsmonitor__announce_startup = 0;
  40
  41 static int fsmonitor_config(const char *var, const char *value,
  42                             const struct config_context *ctx, void *cb)
  43 {
  44         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  45                 int i = git_config_int(var, value, ctx->kvi);
  46                 if (i < 1)
  47                         return error(_("value of '%s' out of range: %d"),
  48                                      FSMONITOR__IPC_THREADS, i);
  49                 fsmonitor__ipc_threads = i;
  50                 return 0;
  51         }
  52
  53         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  54                 int i = git_config_int(var, value, ctx->kvi);
  55                 if (i < 0)
  56                         return error(_("value of '%s' out of range: %d"),
  57                                      FSMONITOR__START_TIMEOUT, i);
  58                 fsmonitor__start_timeout_sec = i;
  59                 return 0;
  60         }
  61
  62         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  63                 int is_bool;
  64                 int i = git_config_bool_or_int(var, value, ctx->kvi, &is_bool);
  65                 if (i < 0)
  66                         return error(_("value of '%s' not bool or int: %d"),
  67                                      var, i);
  68                 fsmonitor__announce_startup = i;
  69                 return 0;
  70         }
  71
  72         return git_default_config(var, value, ctx, cb);
  73 }
  74
  75 /*
  76  * Acting as a CLIENT.
  77  *
  78  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  79  * and wait for it to shutdown.
  80  */
  81 static int do_as_client__send_stop(void)
  82 {
  83         struct strbuf answer = STRBUF_INIT;
  84         int ret;
  85
  86         ret = fsmonitor_ipc__send_command("quit", &answer);
  87
  88         /* The quit command does not return any response data. */
  89         strbuf_release(&answer);
  90
  91         if (ret)
  92                 return ret;
  93
  94         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  95         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  96                 sleep_millisec(50);
  97         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  98
  99         return 0;
 100 }
 101
 102 static int do_as_client__status(void)
 103 {
 104         enum ipc_active_state state = fsmonitor_ipc__get_state();
 105
 106         switch (state) {
 107         case IPC_STATE__LISTENING:
 108                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 109                        the_repository->worktree);
 110                 return 0;
 111
 112         default:
 113                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 114                        the_repository->worktree);
 115                 return 1;
 116         }
 117 }
 118
 119 enum fsmonitor_cookie_item_result {
 120         FCIR_ERROR = -1, /* could not create cookie file ? */
 121         FCIR_INIT,
 122         FCIR_SEEN,
 123         FCIR_ABORT,
 124 };
 125
 126 struct fsmonitor_cookie_item {
 127         struct hashmap_entry entry;
 128         char *name;
 129         enum fsmonitor_cookie_item_result result;
 130 };
 131
 132 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 133                      const struct hashmap_entry *he2, const void *keydata)
 134 {
 135         const struct fsmonitor_cookie_item *a =
 136                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 137         const struct fsmonitor_cookie_item *b =
 138                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 139
 140         return strcmp(a->name, keydata ? keydata : b->name);
 141 }
 142
 143 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 144         struct fsmonitor_daemon_state *state)
 145 {
 146         /* assert current thread holding state->main_lock */
 147
 148         int fd;
 149         struct fsmonitor_cookie_item *cookie;
 150         struct strbuf cookie_pathname = STRBUF_INIT;
 151         struct strbuf cookie_filename = STRBUF_INIT;
 152         enum fsmonitor_cookie_item_result result;
 153         int my_cookie_seq;
 154
 155         CALLOC_ARRAY(cookie, 1);
 156
 157         my_cookie_seq = state->cookie_seq++;
 158
 159         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 160
 161         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 162         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 163
 164         cookie->name = strbuf_detach(&cookie_filename, NULL);
 165         cookie->result = FCIR_INIT;
 166         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 167
 168         hashmap_add(&state->cookies, &cookie->entry);
 169
 170         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 171                          cookie->name, cookie_pathname.buf);
 172
 173         /*
 174          * Create the cookie file on disk and then wait for a notification
 175          * that the listener thread has seen it.
 176          */
 177         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 178         if (fd < 0) {
 179                 error_errno(_("could not create fsmonitor cookie '%s'"),
 180                             cookie->name);
 181
 182                 cookie->result = FCIR_ERROR;
 183                 goto done;
 184         }
 185
 186         /*
 187          * Technically, close() and unlink() can fail, but we don't
 188          * care here.  We only created the file to trigger a watch
 189          * event from the FS to know that when we're up to date.
 190          */
 191         close(fd);
 192         unlink(cookie_pathname.buf);
 193
 194         /*
 195          * Technically, this is an infinite wait (well, unless another
 196          * thread sends us an abort).  I'd like to change this to
 197          * use `pthread_cond_timedwait()` and return an error/timeout
 198          * and let the caller do the trivial response thing, but we
 199          * don't have that routine in our thread-utils.
 200          *
 201          * After extensive beta testing I'm not really worried about
 202          * this.  Also note that the above open() and unlink() calls
 203          * will cause at least two FS events on that path, so the odds
 204          * of getting stuck are pretty slim.
 205          */
 206         while (cookie->result == FCIR_INIT)
 207                 pthread_cond_wait(&state->cookies_cond,
 208                                   &state->main_lock);
 209
 210 done:
 211         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 212
 213         result = cookie->result;
 214
 215         free(cookie->name);
 216         free(cookie);
 217         strbuf_release(&cookie_pathname);
 218
 219         return result;
 220 }
 221
 222 /*
 223  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 224  */
 225 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 226                                          const struct string_list *cookie_names)
 227 {
 228         /* assert current thread holding state->main_lock */
 229
 230         int k;
 231         int nr_seen = 0;
 232
 233         for (k = 0; k < cookie_names->nr; k++) {
 234                 struct fsmonitor_cookie_item key;
 235                 struct fsmonitor_cookie_item *cookie;
 236
 237                 key.name = cookie_names->items[k].string;
 238                 hashmap_entry_init(&key.entry, strhash(key.name));
 239
 240                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 241                 if (cookie) {
 242                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 243                                          cookie->name);
 244                         cookie->result = FCIR_SEEN;
 245                         nr_seen++;
 246                 }
 247         }
 248
 249         if (nr_seen)
 250                 pthread_cond_broadcast(&state->cookies_cond);
 251 }
 252
 253 /*
 254  * Set _ABORT on all pending cookies and wake up all client threads.
 255  */
 256 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 257 {
 258         /* assert current thread holding state->main_lock */
 259
 260         struct hashmap_iter iter;
 261         struct fsmonitor_cookie_item *cookie;
 262         int nr_aborted = 0;
 263
 264         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 265                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 266                                  cookie->name);
 267                 cookie->result = FCIR_ABORT;
 268                 nr_aborted++;
 269         }
 270
 271         if (nr_aborted)
 272                 pthread_cond_broadcast(&state->cookies_cond);
 273 }
 274
 275 /*
 276  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 277  * "token" as a virtual timestamp.  Clients can request a summary of all
 278  * created/deleted/modified files relative to a token.  In the response,
 279  * clients receive a new token for the next (relative) request.
 280  *
 281  *
 282  * Token Format
 283  * ============
 284  *
 285  * The contents of the token are private and provider-specific.
 286  *
 287  * For the built-in fsmonitor--daemon, we define a token as follows:
 288  *
 289  *     "builtin" ":" <token_id> ":" <sequence_nr>
 290  *
 291  * The "builtin" prefix is used as a namespace to avoid conflicts
 292  * with other providers (such as Watchman).
 293  *
 294  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 295  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 296  * events that happened while the daemon was monitoring (and in-sync
 297  * with the filesystem).
 298  *
 299  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 300  *     and does not define less-than/greater-than relationships.
 301  *     (There are too many race conditions to rely on file system
 302  *     event timestamps.)
 303  *
 304  * The <sequence_nr> is a simple integer incremented whenever the
 305  * daemon needs to make its state public.  For example, if 1000 file
 306  * system events come in, but no clients have requested the data,
 307  * the daemon can continue to accumulate file changes in the same
 308  * bin and does not need to advance the sequence number.  However,
 309  * as soon as a client does arrive, the daemon needs to start a new
 310  * bin and increment the sequence number.
 311  *
 312  *     The sequence number serves as the boundary between 2 sets
 313  *     of bins -- the older ones that the client has already seen
 314  *     and the newer ones that it hasn't.
 315  *
 316  * When a new <token_id> is created, the <sequence_nr> is reset to
 317  * zero.
 318  *
 319  *
 320  * About Token Ids
 321  * ===============
 322  *
 323  * A new token_id is created:
 324  *
 325  * [1] each time the daemon is started.
 326  *
 327  * [2] any time that the daemon must re-sync with the filesystem
 328  *     (such as when the kernel drops or we miss events on a very
 329  *     active volume).
 330  *
 331  * [3] in response to a client "flush" command (for dropped event
 332  *     testing).
 333  *
 334  * When a new token_id is created, the daemon is free to discard all
 335  * cached filesystem events associated with any previous token_ids.
 336  * Events associated with a non-current token_id will never be sent
 337  * to a client.  A token_id change implicitly means that the daemon
 338  * has gap in its event history.
 339  *
 340  * Therefore, clients that present a token with a stale (non-current)
 341  * token_id will always be given a trivial response.
 342  */
 343 struct fsmonitor_token_data {
 344         struct strbuf token_id;
 345         struct fsmonitor_batch *batch_head;
 346         struct fsmonitor_batch *batch_tail;
 347         uint64_t client_ref_count;
 348 };
 349
 350 struct fsmonitor_batch {
 351         struct fsmonitor_batch *next;
 352         uint64_t batch_seq_nr;
 353         const char **interned_paths;
 354         size_t nr, alloc;
 355         time_t pinned_time;
 356 };
 357
 358 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 359 {
 360         static int test_env_value = -1;
 361         static uint64_t flush_count = 0;
 362         struct fsmonitor_token_data *token;
 363         struct fsmonitor_batch *batch;
 364
 365         CALLOC_ARRAY(token, 1);
 366         batch = fsmonitor_batch__new();
 367
 368         strbuf_init(&token->token_id, 0);
 369         token->batch_head = batch;
 370         token->batch_tail = batch;
 371         token->client_ref_count = 0;
 372
 373         if (test_env_value < 0)
 374                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 375
 376         if (!test_env_value) {
 377                 struct timeval tv;
 378                 struct tm tm;
 379                 time_t secs;
 380
 381                 gettimeofday(&tv, NULL);
 382                 secs = tv.tv_sec;
 383                 gmtime_r(&secs, &tm);
 384
 385                 strbuf_addf(&token->token_id,
 386                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 387                             flush_count++,
 388                             getpid(),
 389                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 390                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 391                             (long)tv.tv_usec);
 392         } else {
 393                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 394         }
 395
 396         /*
 397          * We created a new <token_id> and are starting a new series
 398          * of tokens with a zero <seq_nr>.
 399          *
 400          * Since clients cannot guess our new (non test) <token_id>
 401          * they will always receive a trivial response (because of the
 402          * mismatch on the <token_id>).  The trivial response will
 403          * tell them our new <token_id> so that subsequent requests
 404          * will be relative to our new series.  (And when sending that
 405          * response, we pin the current head of the batch list.)
 406          *
 407          * Even if the client correctly guesses the <token_id>, their
 408          * request of "builtin:<token_id>:0" asks for all changes MORE
 409          * RECENT than batch/bin 0.
 410          *
 411          * This implies that it is a waste to accumulate paths in the
 412          * initial batch/bin (because they will never be transmitted).
 413          *
 414          * So the daemon could be running for days and watching the
 415          * file system, but doesn't need to actually accumulate any
 416          * paths UNTIL we need to set a reference point for a later
 417          * relative request.
 418          *
 419          * However, it is very useful for testing to always have a
 420          * reference point set.  Pin batch 0 to force early file system
 421          * events to accumulate.
 422          */
 423         if (test_env_value)
 424                 batch->pinned_time = time(NULL);
 425
 426         return token;
 427 }
 428
 429 struct fsmonitor_batch *fsmonitor_batch__new(void)
 430 {
 431         struct fsmonitor_batch *batch;
 432
 433         CALLOC_ARRAY(batch, 1);
 434
 435         return batch;
 436 }
 437
 438 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 439 {
 440         while (batch) {
 441                 struct fsmonitor_batch *next = batch->next;
 442
 443                 /*
 444                  * The actual strings within the array of this batch
 445                  * are interned, so we don't own them.  We only own
 446                  * the array.
 447                  */
 448                 free(batch->interned_paths);
 449                 free(batch);
 450
 451                 batch = next;
 452         }
 453 }
 454
 455 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 456                                const char *path)
 457 {
 458         const char *interned_path = strintern(path);
 459
 460         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 461
 462         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 463         batch->interned_paths[batch->nr++] = interned_path;
 464 }
 465
 466 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 467                                      const struct fsmonitor_batch *batch_src)
 468 {
 469         size_t k;
 470
 471         ALLOC_GROW(batch_dest->interned_paths,
 472                    batch_dest->nr + batch_src->nr + 1,
 473                    batch_dest->alloc);
 474
 475         for (k = 0; k < batch_src->nr; k++)
 476                 batch_dest->interned_paths[batch_dest->nr++] =
 477                         batch_src->interned_paths[k];
 478 }
 479
 480 /*
 481  * To keep the batch list from growing unbounded in response to filesystem
 482  * activity, we try to truncate old batches from the end of the list as
 483  * they become irrelevant.
 484  *
 485  * We assume that the .git/index will be updated with the most recent token
 486  * any time the index is updated.  And future commands will only ask for
 487  * recent changes *since* that new token.  So as tokens advance into the
 488  * future, older batch items will never be requested/needed.  So we can
 489  * truncate them without loss of functionality.
 490  *
 491  * However, multiple commands may be talking to the daemon concurrently
 492  * or perform a slow command, so a little "token skew" is possible.
 493  * Therefore, we want this to be a little bit lazy and have a generous
 494  * delay.
 495  *
 496  * The current reader thread walked backwards in time from `token->batch_head`
 497  * back to `batch_marker` somewhere in the middle of the batch list.
 498  *
 499  * Let's walk backwards in time from that marker an arbitrary delay
 500  * and truncate the list there.  Note that these timestamps are completely
 501  * artificial (based on when we pinned the batch item) and not on any
 502  * filesystem activity.
 503  *
 504  * Return the obsolete portion of the list after we have removed it from
 505  * the official list so that the caller can free it after leaving the lock.
 506  */
 507 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 508
 509 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 510         struct fsmonitor_daemon_state *state,
 511         const struct fsmonitor_batch *batch_marker)
 512 {
 513         /* assert current thread holding state->main_lock */
 514
 515         const struct fsmonitor_batch *batch;
 516         struct fsmonitor_batch *remainder;
 517
 518         if (!batch_marker)
 519                 return NULL;
 520
 521         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 522                          batch_marker->batch_seq_nr,
 523                          (uint64_t)batch_marker->pinned_time);
 524
 525         for (batch = batch_marker; batch; batch = batch->next) {
 526                 time_t t;
 527
 528                 if (!batch->pinned_time) /* an overflow batch */
 529                         continue;
 530
 531                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 532                 if (t > batch_marker->pinned_time) /* too close to marker */
 533                         continue;
 534
 535                 goto truncate_past_here;
 536         }
 537
 538         return NULL;
 539
 540 truncate_past_here:
 541         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 542
 543         remainder = ((struct fsmonitor_batch *)batch)->next;
 544         ((struct fsmonitor_batch *)batch)->next = NULL;
 545
 546         return remainder;
 547 }
 548
 549 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 550 {
 551         if (!token)
 552                 return;
 553
 554         assert(token->client_ref_count == 0);
 555
 556         strbuf_release(&token->token_id);
 557
 558         fsmonitor_batch__free_list(token->batch_head);
 559
 560         free(token);
 561 }
 562
 563 /*
 564  * Flush all of our cached data about the filesystem.  Call this if we
 565  * lose sync with the filesystem and miss some notification events.
 566  *
 567  * [1] If we are missing events, then we no longer have a complete
 568  *     history of the directory (relative to our current start token).
 569  *     We should create a new token and start fresh (as if we just
 570  *     booted up).
 571  *
 572  * [2] Some of those lost events may have been for cookie files.  We
 573  *     should assume the worst and abort them rather letting them starve.
 574  *
 575  * If there are no concurrent threads reading the current token data
 576  * series, we can free it now.  Otherwise, let the last reader free
 577  * it.
 578  *
 579  * Either way, the old token data series is no longer associated with
 580  * our state data.
 581  */
 582 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 583 {
 584         /* assert current thread holding state->main_lock */
 585
 586         struct fsmonitor_token_data *free_me = NULL;
 587         struct fsmonitor_token_data *new_one = NULL;
 588
 589         new_one = fsmonitor_new_token_data();
 590
 591         if (state->current_token_data->client_ref_count == 0)
 592                 free_me = state->current_token_data;
 593         state->current_token_data = new_one;
 594
 595         fsmonitor_free_token_data(free_me);
 596
 597         with_lock__abort_all_cookies(state);
 598 }
 599
 600 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 601 {
 602         pthread_mutex_lock(&state->main_lock);
 603         with_lock__do_force_resync(state);
 604         pthread_mutex_unlock(&state->main_lock);
 605 }
 606
 607 /*
 608  * Format an opaque token string to send to the client.
 609  */
 610 static void with_lock__format_response_token(
 611         struct strbuf *response_token,
 612         const struct strbuf *response_token_id,
 613         const struct fsmonitor_batch *batch)
 614 {
 615         /* assert current thread holding state->main_lock */
 616
 617         strbuf_reset(response_token);
 618         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 619                     response_token_id->buf, batch->batch_seq_nr);
 620 }
 621
 622 /*
 623  * Parse an opaque token from the client.
 624  * Returns -1 on error.
 625  */
 626 static int fsmonitor_parse_client_token(const char *buf_token,
 627                                         struct strbuf *requested_token_id,
 628                                         uint64_t *seq_nr)
 629 {
 630         const char *p;
 631         char *p_end;
 632
 633         strbuf_reset(requested_token_id);
 634         *seq_nr = 0;
 635
 636         if (!skip_prefix(buf_token, "builtin:", &p))
 637                 return -1;
 638
 639         while (*p && *p != ':')
 640                 strbuf_addch(requested_token_id, *p++);
 641         if (!*p++)
 642                 return -1;
 643
 644         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 645         if (*p_end)
 646                 return -1;
 647
 648         return 0;
 649 }
 650
 651 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 652
 653 static int do_handle_client(struct fsmonitor_daemon_state *state,
 654                             const char *command,
 655                             ipc_server_reply_cb *reply,
 656                             struct ipc_server_reply_data *reply_data)
 657 {
 658         struct fsmonitor_token_data *token_data = NULL;
 659         struct strbuf response_token = STRBUF_INIT;
 660         struct strbuf requested_token_id = STRBUF_INIT;
 661         struct strbuf payload = STRBUF_INIT;
 662         uint64_t requested_oldest_seq_nr = 0;
 663         uint64_t total_response_len = 0;
 664         const char *p;
 665         const struct fsmonitor_batch *batch_head;
 666         const struct fsmonitor_batch *batch;
 667         struct fsmonitor_batch *remainder = NULL;
 668         intmax_t count = 0, duplicates = 0;
 669         kh_str_t *shown;
 670         int hash_ret;
 671         int do_trivial = 0;
 672         int do_flush = 0;
 673         int do_cookie = 0;
 674         enum fsmonitor_cookie_item_result cookie_result;
 675
 676         /*
 677          * We expect `command` to be of the form:
 678          *
 679          * <command> := quit NUL
 680          *            | flush NUL
 681          *            | <V1-time-since-epoch-ns> NUL
 682          *            | <V2-opaque-fsmonitor-token> NUL
 683          */
 684
 685         if (!strcmp(command, "quit")) {
 686                 /*
 687                  * A client has requested over the socket/pipe that the
 688                  * daemon shutdown.
 689                  *
 690                  * Tell the IPC thread pool to shutdown (which completes
 691                  * the await in the main thread (which can stop the
 692                  * fsmonitor listener thread)).
 693                  *
 694                  * There is no reply to the client.
 695                  */
 696                 return SIMPLE_IPC_QUIT;
 697
 698         } else if (!strcmp(command, "flush")) {
 699                 /*
 700                  * Flush all of our cached data and generate a new token
 701                  * just like if we lost sync with the filesystem.
 702                  *
 703                  * Then send a trivial response using the new token.
 704                  */
 705                 do_flush = 1;
 706                 do_trivial = 1;
 707
 708         } else if (!skip_prefix(command, "builtin:", &p)) {
 709                 /* assume V1 timestamp or garbage */
 710
 711                 char *p_end;
 712
 713                 strtoumax(command, &p_end, 10);
 714                 trace_printf_key(&trace_fsmonitor,
 715                                  ((*p_end) ?
 716                                   "fsmonitor: invalid command line '%s'" :
 717                                   "fsmonitor: unsupported V1 protocol '%s'"),
 718                                  command);
 719                 do_trivial = 1;
 720                 do_cookie = 1;
 721
 722         } else {
 723                 /* We have "builtin:*" */
 724                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 725                                                  &requested_oldest_seq_nr)) {
 726                         trace_printf_key(&trace_fsmonitor,
 727                                          "fsmonitor: invalid V2 protocol token '%s'",
 728                                          command);
 729                         do_trivial = 1;
 730                         do_cookie = 1;
 731
 732                 } else {
 733                         /*
 734                          * We have a V2 valid token:
 735                          *     "builtin:<token_id>:<seq_nr>"
 736                          */
 737                         do_cookie = 1;
 738                 }
 739         }
 740
 741         pthread_mutex_lock(&state->main_lock);
 742
 743         if (!state->current_token_data)
 744                 BUG("fsmonitor state does not have a current token");
 745
 746         /*
 747          * Write a cookie file inside the directory being watched in
 748          * an effort to flush out existing filesystem events that we
 749          * actually care about.  Suspend this client thread until we
 750          * see the filesystem events for this cookie file.
 751          *
 752          * Creating the cookie lets us guarantee that our FS listener
 753          * thread has drained the kernel queue and we are caught up
 754          * with the kernel.
 755          *
 756          * If we cannot create the cookie (or otherwise guarantee that
 757          * we are caught up), we send a trivial response.  We have to
 758          * assume that there might be some very, very recent activity
 759          * on the FS still in flight.
 760          */
 761         if (do_cookie) {
 762                 cookie_result = with_lock__wait_for_cookie(state);
 763                 if (cookie_result != FCIR_SEEN) {
 764                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 765                               cookie_result);
 766                         do_trivial = 1;
 767                 }
 768         }
 769
 770         if (do_flush)
 771                 with_lock__do_force_resync(state);
 772
 773         /*
 774          * We mark the current head of the batch list as "pinned" so
 775          * that the listener thread will treat this item as read-only
 776          * (and prevent any more paths from being added to it) from
 777          * now on.
 778          */
 779         token_data = state->current_token_data;
 780         batch_head = token_data->batch_head;
 781         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 782
 783         /*
 784          * FSMonitor Protocol V2 requires that we send a response header
 785          * with a "new current token" and then all of the paths that changed
 786          * since the "requested token".  We send the seq_nr of the just-pinned
 787          * head batch so that future requests from a client will be relative
 788          * to it.
 789          */
 790         with_lock__format_response_token(&response_token,
 791                                          &token_data->token_id, batch_head);
 792
 793         reply(reply_data, response_token.buf, response_token.len + 1);
 794         total_response_len += response_token.len + 1;
 795
 796         trace2_data_string("fsmonitor", the_repository, "response/token",
 797                            response_token.buf);
 798         trace_printf_key(&trace_fsmonitor, "response token: %s",
 799                          response_token.buf);
 800
 801         if (!do_trivial) {
 802                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 803                         /*
 804                          * The client last spoke to a different daemon
 805                          * instance -OR- the daemon had to resync with
 806                          * the filesystem (and lost events), so reject.
 807                          */
 808                         trace2_data_string("fsmonitor", the_repository,
 809                                            "response/token", "different");
 810                         do_trivial = 1;
 811
 812                 } else if (requested_oldest_seq_nr <
 813                            token_data->batch_tail->batch_seq_nr) {
 814                         /*
 815                          * The client wants older events than we have for
 816                          * this token_id.  This means that the end of our
 817                          * batch list was truncated and we cannot give the
 818                          * client a complete snapshot relative to their
 819                          * request.
 820                          */
 821                         trace_printf_key(&trace_fsmonitor,
 822                                          "client requested truncated data");
 823                         do_trivial = 1;
 824                 }
 825         }
 826
 827         if (do_trivial) {
 828                 pthread_mutex_unlock(&state->main_lock);
 829
 830                 reply(reply_data, "/", 2);
 831
 832                 trace2_data_intmax("fsmonitor", the_repository,
 833                                    "response/trivial", 1);
 834
 835                 goto cleanup;
 836         }
 837
 838         /*
 839          * We're going to hold onto a pointer to the current
 840          * token-data while we walk the list of batches of files.
 841          * During this time, we will NOT be under the lock.
 842          * So we ref-count it.
 843          *
 844          * This allows the listener thread to continue prepending
 845          * new batches of items to the token-data (which we'll ignore).
 846          *
 847          * AND it allows the listener thread to do a token-reset
 848          * (and install a new `current_token_data`).
 849          */
 850         token_data->client_ref_count++;
 851
 852         pthread_mutex_unlock(&state->main_lock);
 853
 854         /*
 855          * The client request is relative to the token that they sent,
 856          * so walk the batch list backwards from the current head back
 857          * to the batch (sequence number) they named.
 858          *
 859          * We use khash to de-dup the list of pathnames.
 860          *
 861          * NEEDSWORK: each batch contains a list of interned strings,
 862          * so we only need to do pointer comparisons here to build the
 863          * hash table.  Currently, we're still comparing the string
 864          * values.
 865          */
 866         shown = kh_init_str();
 867         for (batch = batch_head;
 868              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 869              batch = batch->next) {
 870                 size_t k;
 871
 872                 for (k = 0; k < batch->nr; k++) {
 873                         const char *s = batch->interned_paths[k];
 874                         size_t s_len;
 875
 876                         if (kh_get_str(shown, s) != kh_end(shown))
 877                                 duplicates++;
 878                         else {
 879                                 kh_put_str(shown, s, &hash_ret);
 880
 881                                 trace_printf_key(&trace_fsmonitor,
 882                                                  "send[%"PRIuMAX"]: %s",
 883                                                  count, s);
 884
 885                                 /* Each path gets written with a trailing NUL */
 886                                 s_len = strlen(s) + 1;
 887
 888                                 if (payload.len + s_len >=
 889                                     LARGE_PACKET_DATA_MAX) {
 890                                         reply(reply_data, payload.buf,
 891                                               payload.len);
 892                                         total_response_len += payload.len;
 893                                         strbuf_reset(&payload);
 894                                 }
 895
 896                                 strbuf_add(&payload, s, s_len);
 897                                 count++;
 898                         }
 899                 }
 900         }
 901
 902         if (payload.len) {
 903                 reply(reply_data, payload.buf, payload.len);
 904                 total_response_len += payload.len;
 905         }
 906
 907         kh_release_str(shown);
 908
 909         pthread_mutex_lock(&state->main_lock);
 910
 911         if (token_data->client_ref_count > 0)
 912                 token_data->client_ref_count--;
 913
 914         if (token_data->client_ref_count == 0) {
 915                 if (token_data != state->current_token_data) {
 916                         /*
 917                          * The listener thread did a token-reset while we were
 918                          * walking the batch list.  Therefore, this token is
 919                          * stale and can be discarded completely.  If we are
 920                          * the last reader thread using this token, we own
 921                          * that work.
 922                          */
 923                         fsmonitor_free_token_data(token_data);
 924                 } else if (batch) {
 925                         /*
 926                          * We are holding the lock and are the only
 927                          * reader of the ref-counted portion of the
 928                          * list, so we get the honor of seeing if the
 929                          * list can be truncated to save memory.
 930                          *
 931                          * The main loop did not walk to the end of the
 932                          * list, so this batch is the first item in the
 933                          * batch-list that is older than the requested
 934                          * end-point sequence number.  See if the tail
 935                          * end of the list is obsolete.
 936                          */
 937                         remainder = with_lock__truncate_old_batches(state,
 938                                                                     batch);
 939                 }
 940         }
 941
 942         pthread_mutex_unlock(&state->main_lock);
 943
 944         if (remainder)
 945                 fsmonitor_batch__free_list(remainder);
 946
 947         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 948         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 949         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 950
 951 cleanup:
 952         strbuf_release(&response_token);
 953         strbuf_release(&requested_token_id);
 954         strbuf_release(&payload);
 955
 956         return 0;
 957 }
 958
 959 static ipc_server_application_cb handle_client;
 960
 961 static int handle_client(void *data,
 962                          const char *command, size_t command_len,
 963                          ipc_server_reply_cb *reply,
 964                          struct ipc_server_reply_data *reply_data)
 965 {
 966         struct fsmonitor_daemon_state *state = data;
 967         int result;
 968
 969         /*
 970          * The Simple IPC API now supports {char*, len} arguments, but
 971          * FSMonitor always uses proper null-terminated strings, so
 972          * we can ignore the command_len argument.  (Trust, but verify.)
 973          */
 974         if (command_len != strlen(command))
 975                 BUG("FSMonitor assumes text messages");
 976
 977         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 978
 979         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 980         trace2_data_string("fsmonitor", the_repository, "request", command);
 981
 982         result = do_handle_client(state, command, reply, reply_data);
 983
 984         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 985
 986         return result;
 987 }
 988
 989 #define FSMONITOR_DIR           "fsmonitor--daemon"
 990 #define FSMONITOR_COOKIE_DIR    "cookies"
 991 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 992
 993 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 994         const char *rel)
 995 {
 996         if (fspathncmp(rel, ".git", 4))
 997                 return IS_WORKDIR_PATH;
 998         rel += 4;
 999
1000         if (!*rel)
1001                 return IS_DOT_GIT;
1002         if (*rel != '/')
1003                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
1004         rel++;
1005
1006         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1007                         strlen(FSMONITOR_COOKIE_PREFIX)))
1008                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1009
1010         return IS_INSIDE_DOT_GIT;
1011 }
1012
1013 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1014         const char *rel)
1015 {
1016         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1017                         strlen(FSMONITOR_COOKIE_PREFIX)))
1018                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1019
1020         return IS_INSIDE_GITDIR;
1021 }
1022
1023 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1024         struct fsmonitor_daemon_state *state,
1025         const char *path)
1026 {
1027         const char *rel;
1028
1029         if (fspathncmp(path, state->path_worktree_watch.buf,
1030                        state->path_worktree_watch.len))
1031                 return IS_OUTSIDE_CONE;
1032
1033         rel = path + state->path_worktree_watch.len;
1034
1035         if (!*rel)
1036                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1037         if (*rel != '/')
1038                 return IS_OUTSIDE_CONE;
1039         rel++;
1040
1041         return fsmonitor_classify_path_workdir_relative(rel);
1042 }
1043
1044 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1045         struct fsmonitor_daemon_state *state,
1046         const char *path)
1047 {
1048         const char *rel;
1049         enum fsmonitor_path_type t;
1050
1051         t = try_classify_workdir_abs_path(state, path);
1052         if (state->nr_paths_watching == 1)
1053                 return t;
1054         if (t != IS_OUTSIDE_CONE)
1055                 return t;
1056
1057         if (fspathncmp(path, state->path_gitdir_watch.buf,
1058                        state->path_gitdir_watch.len))
1059                 return IS_OUTSIDE_CONE;
1060
1061         rel = path + state->path_gitdir_watch.len;
1062
1063         if (!*rel)
1064                 return IS_GITDIR; /* it is the <gitdir> exactly */
1065         if (*rel != '/')
1066                 return IS_OUTSIDE_CONE;
1067         rel++;
1068
1069         return fsmonitor_classify_path_gitdir_relative(rel);
1070 }
1071
1072 /*
1073  * We try to combine small batches at the front of the batch-list to avoid
1074  * having a long list.  This hopefully makes it a little easier when we want
1075  * to truncate and maintain the list.  However, we don't want the paths array
1076  * to just keep growing and growing with realloc, so we insert an arbitrary
1077  * limit.
1078  */
1079 #define MY_COMBINE_LIMIT (1024)
1080
1081 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1082                        struct fsmonitor_batch *batch,
1083                        const struct string_list *cookie_names)
1084 {
1085         if (!batch && !cookie_names->nr)
1086                 return;
1087
1088         pthread_mutex_lock(&state->main_lock);
1089
1090         if (batch) {
1091                 struct fsmonitor_batch *head;
1092
1093                 head = state->current_token_data->batch_head;
1094                 if (!head) {
1095                         BUG("token does not have batch");
1096                 } else if (head->pinned_time) {
1097                         /*
1098                          * We cannot alter the current batch list
1099                          * because:
1100                          *
1101                          * [a] it is being transmitted to at least one
1102                          * client and the handle_client() thread has a
1103                          * ref-count, but not a lock on the batch list
1104                          * starting with this item.
1105                          *
1106                          * [b] it has been transmitted in the past to
1107                          * at least one client such that future
1108                          * requests are relative to this head batch.
1109                          *
1110                          * So, we can only prepend a new batch onto
1111                          * the front of the list.
1112                          */
1113                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1114                         batch->next = head;
1115                         state->current_token_data->batch_head = batch;
1116                 } else if (!head->batch_seq_nr) {
1117                         /*
1118                          * Batch 0 is unpinned.  See the note in
1119                          * `fsmonitor_new_token_data()` about why we
1120                          * don't need to accumulate these paths.
1121                          */
1122                         fsmonitor_batch__free_list(batch);
1123                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1124                         /*
1125                          * The head batch in the list has never been
1126                          * transmitted to a client, but folding the
1127                          * contents of the new batch onto it would
1128                          * exceed our arbitrary limit, so just prepend
1129                          * the new batch onto the list.
1130                          */
1131                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1132                         batch->next = head;
1133                         state->current_token_data->batch_head = batch;
1134                 } else {
1135                         /*
1136                          * We are free to add the paths in the given
1137                          * batch onto the end of the current head batch.
1138                          */
1139                         fsmonitor_batch__combine(head, batch);
1140                         fsmonitor_batch__free_list(batch);
1141                 }
1142         }
1143
1144         if (cookie_names->nr)
1145                 with_lock__mark_cookies_seen(state, cookie_names);
1146
1147         pthread_mutex_unlock(&state->main_lock);
1148 }
1149
1150 static void *fsm_health__thread_proc(void *_state)
1151 {
1152         struct fsmonitor_daemon_state *state = _state;
1153
1154         trace2_thread_start("fsm-health");
1155
1156         fsm_health__loop(state);
1157
1158         trace2_thread_exit();
1159         return NULL;
1160 }
1161
1162 static void *fsm_listen__thread_proc(void *_state)
1163 {
1164         struct fsmonitor_daemon_state *state = _state;
1165
1166         trace2_thread_start("fsm-listen");
1167
1168         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1169                          state->path_worktree_watch.buf);
1170         if (state->nr_paths_watching > 1)
1171                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1172                                  state->path_gitdir_watch.buf);
1173
1174         fsm_listen__loop(state);
1175
1176         pthread_mutex_lock(&state->main_lock);
1177         if (state->current_token_data &&
1178             state->current_token_data->client_ref_count == 0)
1179                 fsmonitor_free_token_data(state->current_token_data);
1180         state->current_token_data = NULL;
1181         pthread_mutex_unlock(&state->main_lock);
1182
1183         trace2_thread_exit();
1184         return NULL;
1185 }
1186
1187 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1188 {
1189         struct ipc_server_opts ipc_opts = {
1190                 .nr_threads = fsmonitor__ipc_threads,
1191
1192                 /*
1193                  * We know that there are no other active threads yet,
1194                  * so we can let the IPC layer temporarily chdir() if
1195                  * it needs to when creating the server side of the
1196                  * Unix domain socket.
1197                  */
1198                 .uds_disallow_chdir = 0
1199         };
1200         int health_started = 0;
1201         int listener_started = 0;
1202         int err = 0;
1203
1204         /*
1205          * Start the IPC thread pool before the we've started the file
1206          * system event listener thread so that we have the IPC handle
1207          * before we need it.
1208          */
1209         if (ipc_server_run_async(&state->ipc_server_data,
1210                                  state->path_ipc.buf, &ipc_opts,
1211                                  handle_client, state))
1212                 return error_errno(
1213                         _("could not start IPC thread pool on '%s'"),
1214                         state->path_ipc.buf);
1215
1216         /*
1217          * Start the fsmonitor listener thread to collect filesystem
1218          * events.
1219          */
1220         if (pthread_create(&state->listener_thread, NULL,
1221                            fsm_listen__thread_proc, state)) {
1222                 ipc_server_stop_async(state->ipc_server_data);
1223                 err = error(_("could not start fsmonitor listener thread"));
1224                 goto cleanup;
1225         }
1226         listener_started = 1;
1227
1228         /*
1229          * Start the health thread to watch over our process.
1230          */
1231         if (pthread_create(&state->health_thread, NULL,
1232                            fsm_health__thread_proc, state)) {
1233                 ipc_server_stop_async(state->ipc_server_data);
1234                 err = error(_("could not start fsmonitor health thread"));
1235                 goto cleanup;
1236         }
1237         health_started = 1;
1238
1239         /*
1240          * The daemon is now fully functional in background threads.
1241          * Our primary thread should now just wait while the threads
1242          * do all the work.
1243          */
1244 cleanup:
1245         /*
1246          * Wait for the IPC thread pool to shutdown (whether by client
1247          * request, from filesystem activity, or an error).
1248          */
1249         ipc_server_await(state->ipc_server_data);
1250
1251         /*
1252          * The fsmonitor listener thread may have received a shutdown
1253          * event from the IPC thread pool, but it doesn't hurt to tell
1254          * it again.  And wait for it to shutdown.
1255          */
1256         if (listener_started) {
1257                 fsm_listen__stop_async(state);
1258                 pthread_join(state->listener_thread, NULL);
1259         }
1260
1261         if (health_started) {
1262                 fsm_health__stop_async(state);
1263                 pthread_join(state->health_thread, NULL);
1264         }
1265
1266         if (err)
1267                 return err;
1268         if (state->listen_error_code)
1269                 return state->listen_error_code;
1270         if (state->health_error_code)
1271                 return state->health_error_code;
1272         return 0;
1273 }
1274
1275 static int fsmonitor_run_daemon(void)
1276 {
1277         struct fsmonitor_daemon_state state;
1278         const char *home;
1279         int err;
1280
1281         memset(&state, 0, sizeof(state));
1282
1283         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1284         pthread_mutex_init(&state.main_lock, NULL);
1285         pthread_cond_init(&state.cookies_cond, NULL);
1286         state.listen_error_code = 0;
1287         state.health_error_code = 0;
1288         state.current_token_data = fsmonitor_new_token_data();
1289
1290         /* Prepare to (recursively) watch the <worktree-root> directory. */
1291         strbuf_init(&state.path_worktree_watch, 0);
1292         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1293         state.nr_paths_watching = 1;
1294
1295         strbuf_init(&state.alias.alias, 0);
1296         strbuf_init(&state.alias.points_to, 0);
1297         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1298                 goto done;
1299
1300         /*
1301          * We create and delete cookie files somewhere inside the .git
1302          * directory to help us keep sync with the file system.  If
1303          * ".git" is not a directory, then <gitdir> is not inside the
1304          * cone of <worktree-root>, so set up a second watch to watch
1305          * the <gitdir> so that we get events for the cookie files.
1306          */
1307         strbuf_init(&state.path_gitdir_watch, 0);
1308         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1309         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1310         if (!is_directory(state.path_gitdir_watch.buf)) {
1311                 strbuf_reset(&state.path_gitdir_watch);
1312                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1313                 state.nr_paths_watching = 2;
1314         }
1315
1316         /*
1317          * We will write filesystem syncing cookie files into
1318          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1319          *
1320          * The extra layers of subdirectories here keep us from
1321          * changing the mtime on ".git/" or ".git/foo/" when we create
1322          * or delete cookie files.
1323          *
1324          * There have been problems with some IDEs that do a
1325          * non-recursive watch of the ".git/" directory and run a
1326          * series of commands any time something happens.
1327          *
1328          * For example, if we place our cookie files directly in
1329          * ".git/" or ".git/foo/" then a `git status` (or similar
1330          * command) from the IDE will cause a cookie file to be
1331          * created in one of those dirs.  This causes the mtime of
1332          * those dirs to change.  This triggers the IDE's watch
1333          * notification.  This triggers the IDE to run those commands
1334          * again.  And the process repeats and the machine never goes
1335          * idle.
1336          *
1337          * Adding the extra layers of subdirectories prevents the
1338          * mtime of ".git/" and ".git/foo" from changing when a
1339          * cookie file is created.
1340          */
1341         strbuf_init(&state.path_cookie_prefix, 0);
1342         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1343
1344         strbuf_addch(&state.path_cookie_prefix, '/');
1345         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1346         mkdir(state.path_cookie_prefix.buf, 0777);
1347
1348         strbuf_addch(&state.path_cookie_prefix, '/');
1349         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1350         mkdir(state.path_cookie_prefix.buf, 0777);
1351
1352         strbuf_addch(&state.path_cookie_prefix, '/');
1353
1354         /*
1355          * We create a named-pipe or unix domain socket inside of the
1356          * ".git" directory.  (Well, on Windows, we base our named
1357          * pipe in the NPFS on the absolute path of the git
1358          * directory.)
1359          */
1360         strbuf_init(&state.path_ipc, 0);
1361         strbuf_addstr(&state.path_ipc,
1362                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1363
1364         /*
1365          * Confirm that we can create platform-specific resources for the
1366          * filesystem listener before we bother starting all the threads.
1367          */
1368         if (fsm_listen__ctor(&state)) {
1369                 err = error(_("could not initialize listener thread"));
1370                 goto done;
1371         }
1372
1373         if (fsm_health__ctor(&state)) {
1374                 err = error(_("could not initialize health thread"));
1375                 goto done;
1376         }
1377
1378         /*
1379          * CD out of the worktree root directory.
1380          *
1381          * The common Git startup mechanism causes our CWD to be the
1382          * root of the worktree.  On Windows, this causes our process
1383          * to hold a locked handle on the CWD.  This prevents the
1384          * worktree from being moved or deleted while the daemon is
1385          * running.
1386          *
1387          * We assume that our FS and IPC listener threads have either
1388          * opened all of the handles that they need or will do
1389          * everything using absolute paths.
1390          */
1391         home = getenv("HOME");
1392         if (home && *home && chdir(home))
1393                 die_errno(_("could not cd home '%s'"), home);
1394
1395         err = fsmonitor_run_daemon_1(&state);
1396
1397 done:
1398         pthread_cond_destroy(&state.cookies_cond);
1399         pthread_mutex_destroy(&state.main_lock);
1400         fsm_listen__dtor(&state);
1401         fsm_health__dtor(&state);
1402
1403         ipc_server_free(state.ipc_server_data);
1404
1405         strbuf_release(&state.path_worktree_watch);
1406         strbuf_release(&state.path_gitdir_watch);
1407         strbuf_release(&state.path_cookie_prefix);
1408         strbuf_release(&state.path_ipc);
1409         strbuf_release(&state.alias.alias);
1410         strbuf_release(&state.alias.points_to);
1411
1412         return err;
1413 }
1414
1415 static int try_to_run_foreground_daemon(int detach_console)
1416 {
1417         /*
1418          * Technically, we don't need to probe for an existing daemon
1419          * process, since we could just call `fsmonitor_run_daemon()`
1420          * and let it fail if the pipe/socket is busy.
1421          *
1422          * However, this method gives us a nicer error message for a
1423          * common error case.
1424          */
1425         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1426                 die(_("fsmonitor--daemon is already running '%s'"),
1427                     the_repository->worktree);
1428
1429         if (fsmonitor__announce_startup) {
1430                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1431                         the_repository->worktree);
1432                 fflush(stderr);
1433         }
1434
1435 #ifdef GIT_WINDOWS_NATIVE
1436         if (detach_console)
1437                 FreeConsole();
1438 #endif
1439
1440         return !!fsmonitor_run_daemon();
1441 }
1442
1443 static start_bg_wait_cb bg_wait_cb;
1444
1445 static int bg_wait_cb(const struct child_process *cp, void *cb_data)
1446 {
1447         enum ipc_active_state s = fsmonitor_ipc__get_state();
1448
1449         switch (s) {
1450         case IPC_STATE__LISTENING:
1451                 /* child is "ready" */
1452                 return 0;
1453
1454         case IPC_STATE__NOT_LISTENING:
1455         case IPC_STATE__PATH_NOT_FOUND:
1456                 /* give child more time */
1457                 return 1;
1458
1459         default:
1460         case IPC_STATE__INVALID_PATH:
1461         case IPC_STATE__OTHER_ERROR:
1462                 /* all the time in world won't help */
1463                 return -1;
1464         }
1465 }
1466
1467 static int try_to_start_background_daemon(void)
1468 {
1469         struct child_process cp = CHILD_PROCESS_INIT;
1470         enum start_bg_result sbgr;
1471
1472         /*
1473          * Before we try to create a background daemon process, see
1474          * if a daemon process is already listening.  This makes it
1475          * easier for us to report an already-listening error to the
1476          * console, since our spawn/daemon can only report the success
1477          * of creating the background process (and not whether it
1478          * immediately exited).
1479          */
1480         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1481                 die(_("fsmonitor--daemon is already running '%s'"),
1482                     the_repository->worktree);
1483
1484         if (fsmonitor__announce_startup) {
1485                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1486                         the_repository->worktree);
1487                 fflush(stderr);
1488         }
1489
1490         cp.git_cmd = 1;
1491
1492         strvec_push(&cp.args, "fsmonitor--daemon");
1493         strvec_push(&cp.args, "run");
1494         strvec_push(&cp.args, "--detach");
1495         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1496
1497         cp.no_stdin = 1;
1498         cp.no_stdout = 1;
1499         cp.no_stderr = 1;
1500
1501         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1502                                 fsmonitor__start_timeout_sec);
1503
1504         switch (sbgr) {
1505         case SBGR_READY:
1506                 return 0;
1507
1508         default:
1509         case SBGR_ERROR:
1510         case SBGR_CB_ERROR:
1511                 return error(_("daemon failed to start"));
1512
1513         case SBGR_TIMEOUT:
1514                 return error(_("daemon not online yet"));
1515
1516         case SBGR_DIED:
1517                 return error(_("daemon terminated"));
1518         }
1519 }
1520
1521 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1522 {
1523         const char *subcmd;
1524         enum fsmonitor_reason reason;
1525         int detach_console = 0;
1526
1527         struct option options[] = {
1528                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1529                 OPT_INTEGER(0, "ipc-threads",
1530                             &fsmonitor__ipc_threads,
1531                             N_("use <n> ipc worker threads")),
1532                 OPT_INTEGER(0, "start-timeout",
1533                             &fsmonitor__start_timeout_sec,
1534                             N_("max seconds to wait for background daemon startup")),
1535
1536                 OPT_END()
1537         };
1538
1539         git_config(fsmonitor_config, NULL);
1540
1541         argc = parse_options(argc, argv, prefix, options,
1542                              builtin_fsmonitor__daemon_usage, 0);
1543         if (argc != 1)
1544                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1545         subcmd = argv[0];
1546
1547         if (fsmonitor__ipc_threads < 1)
1548                 die(_("invalid 'ipc-threads' value (%d)"),
1549                     fsmonitor__ipc_threads);
1550
1551         prepare_repo_settings(the_repository);
1552         /*
1553          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1554          * (without bothering to load the `core.fsmonitor` config settings).
1555          *
1556          * If the repo is not compatible, the repo-settings will be set to
1557          * incompatible rather than IPC, so we can use one of the __get
1558          * routines to detect the discrepancy.
1559          */
1560         fsm_settings__set_ipc(the_repository);
1561
1562         reason = fsm_settings__get_reason(the_repository);
1563         if (reason > FSMONITOR_REASON_OK)
1564                 die("%s",
1565                     fsm_settings__get_incompatible_msg(the_repository,
1566                                                        reason));
1567
1568         if (!strcmp(subcmd, "start"))
1569                 return !!try_to_start_background_daemon();
1570
1571         if (!strcmp(subcmd, "run"))
1572                 return !!try_to_run_foreground_daemon(detach_console);
1573
1574         if (!strcmp(subcmd, "stop"))
1575                 return !!do_as_client__send_stop();
1576
1577         if (!strcmp(subcmd, "status"))
1578                 return !!do_as_client__status();
1579
1580         die(_("Unhandled subcommand '%s'"), subcmd);
1581 }
1582
1583 #else
1584 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix UNUSED)
1585 {
1586         struct option options[] = {
1587                 OPT_END()
1588         };
1589
1590         if (argc == 2 && !strcmp(argv[1], "-h"))
1591                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1592
1593         die(_("fsmonitor--daemon not supported on this platform"));
1594 }
1595 #endif