builtin/fsmonitor--daemon.c

   1 #include "builtin.h"
   2 #include "abspath.h"
   3 #include "alloc.h"
   4 #include "config.h"
   5 #include "environment.h"
   6 #include "gettext.h"
   7 #include "parse-options.h"
   8 #include "fsmonitor.h"
   9 #include "fsmonitor-ipc.h"
  10 #include "fsmonitor-path-utils.h"
  11 #include "compat/fsmonitor/fsm-health.h"
  12 #include "compat/fsmonitor/fsm-listen.h"
  13 #include "fsmonitor--daemon.h"
  14 #include "simple-ipc.h"
  15 #include "khash.h"
  16 #include "pkt-line.h"
  17 #include "trace2.h"
  18
  19 static const char * const builtin_fsmonitor__daemon_usage[] = {
  20         N_("git fsmonitor--daemon start [<options>]"),
  21         N_("git fsmonitor--daemon run [<options>]"),
  22         "git fsmonitor--daemon stop",
  23         "git fsmonitor--daemon status",
  24         NULL
  25 };
  26
  27 #ifdef HAVE_FSMONITOR_DAEMON_BACKEND
  28 /*
  29  * Global state loaded from config.
  30  */
  31 #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads"
  32 static int fsmonitor__ipc_threads = 8;
  33
  34 #define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout"
  35 static int fsmonitor__start_timeout_sec = 60;
  36
  37 #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup"
  38 static int fsmonitor__announce_startup = 0;
  39
  40 static int fsmonitor_config(const char *var, const char *value, void *cb)
  41 {
  42         if (!strcmp(var, FSMONITOR__IPC_THREADS)) {
  43                 int i = git_config_int(var, value);
  44                 if (i < 1)
  45                         return error(_("value of '%s' out of range: %d"),
  46                                      FSMONITOR__IPC_THREADS, i);
  47                 fsmonitor__ipc_threads = i;
  48                 return 0;
  49         }
  50
  51         if (!strcmp(var, FSMONITOR__START_TIMEOUT)) {
  52                 int i = git_config_int(var, value);
  53                 if (i < 0)
  54                         return error(_("value of '%s' out of range: %d"),
  55                                      FSMONITOR__START_TIMEOUT, i);
  56                 fsmonitor__start_timeout_sec = i;
  57                 return 0;
  58         }
  59
  60         if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) {
  61                 int is_bool;
  62                 int i = git_config_bool_or_int(var, value, &is_bool);
  63                 if (i < 0)
  64                         return error(_("value of '%s' not bool or int: %d"),
  65                                      var, i);
  66                 fsmonitor__announce_startup = i;
  67                 return 0;
  68         }
  69
  70         return git_default_config(var, value, cb);
  71 }
  72
  73 /*
  74  * Acting as a CLIENT.
  75  *
  76  * Send a "quit" command to the `git-fsmonitor--daemon` (if running)
  77  * and wait for it to shutdown.
  78  */
  79 static int do_as_client__send_stop(void)
  80 {
  81         struct strbuf answer = STRBUF_INIT;
  82         int ret;
  83
  84         ret = fsmonitor_ipc__send_command("quit", &answer);
  85
  86         /* The quit command does not return any response data. */
  87         strbuf_release(&answer);
  88
  89         if (ret)
  90                 return ret;
  91
  92         trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL);
  93         while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
  94                 sleep_millisec(50);
  95         trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL);
  96
  97         return 0;
  98 }
  99
 100 static int do_as_client__status(void)
 101 {
 102         enum ipc_active_state state = fsmonitor_ipc__get_state();
 103
 104         switch (state) {
 105         case IPC_STATE__LISTENING:
 106                 printf(_("fsmonitor-daemon is watching '%s'\n"),
 107                        the_repository->worktree);
 108                 return 0;
 109
 110         default:
 111                 printf(_("fsmonitor-daemon is not watching '%s'\n"),
 112                        the_repository->worktree);
 113                 return 1;
 114         }
 115 }
 116
 117 enum fsmonitor_cookie_item_result {
 118         FCIR_ERROR = -1, /* could not create cookie file ? */
 119         FCIR_INIT,
 120         FCIR_SEEN,
 121         FCIR_ABORT,
 122 };
 123
 124 struct fsmonitor_cookie_item {
 125         struct hashmap_entry entry;
 126         char *name;
 127         enum fsmonitor_cookie_item_result result;
 128 };
 129
 130 static int cookies_cmp(const void *data, const struct hashmap_entry *he1,
 131                      const struct hashmap_entry *he2, const void *keydata)
 132 {
 133         const struct fsmonitor_cookie_item *a =
 134                 container_of(he1, const struct fsmonitor_cookie_item, entry);
 135         const struct fsmonitor_cookie_item *b =
 136                 container_of(he2, const struct fsmonitor_cookie_item, entry);
 137
 138         return strcmp(a->name, keydata ? keydata : b->name);
 139 }
 140
 141 static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 142         struct fsmonitor_daemon_state *state)
 143 {
 144         /* assert current thread holding state->main_lock */
 145
 146         int fd;
 147         struct fsmonitor_cookie_item *cookie;
 148         struct strbuf cookie_pathname = STRBUF_INIT;
 149         struct strbuf cookie_filename = STRBUF_INIT;
 150         enum fsmonitor_cookie_item_result result;
 151         int my_cookie_seq;
 152
 153         CALLOC_ARRAY(cookie, 1);
 154
 155         my_cookie_seq = state->cookie_seq++;
 156
 157         strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq);
 158
 159         strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix);
 160         strbuf_addbuf(&cookie_pathname, &cookie_filename);
 161
 162         cookie->name = strbuf_detach(&cookie_filename, NULL);
 163         cookie->result = FCIR_INIT;
 164         hashmap_entry_init(&cookie->entry, strhash(cookie->name));
 165
 166         hashmap_add(&state->cookies, &cookie->entry);
 167
 168         trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 169                          cookie->name, cookie_pathname.buf);
 170
 171         /*
 172          * Create the cookie file on disk and then wait for a notification
 173          * that the listener thread has seen it.
 174          */
 175         fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600);
 176         if (fd < 0) {
 177                 error_errno(_("could not create fsmonitor cookie '%s'"),
 178                             cookie->name);
 179
 180                 cookie->result = FCIR_ERROR;
 181                 goto done;
 182         }
 183
 184         /*
 185          * Technically, close() and unlink() can fail, but we don't
 186          * care here.  We only created the file to trigger a watch
 187          * event from the FS to know that when we're up to date.
 188          */
 189         close(fd);
 190         unlink(cookie_pathname.buf);
 191
 192         /*
 193          * Technically, this is an infinite wait (well, unless another
 194          * thread sends us an abort).  I'd like to change this to
 195          * use `pthread_cond_timedwait()` and return an error/timeout
 196          * and let the caller do the trivial response thing, but we
 197          * don't have that routine in our thread-utils.
 198          *
 199          * After extensive beta testing I'm not really worried about
 200          * this.  Also note that the above open() and unlink() calls
 201          * will cause at least two FS events on that path, so the odds
 202          * of getting stuck are pretty slim.
 203          */
 204         while (cookie->result == FCIR_INIT)
 205                 pthread_cond_wait(&state->cookies_cond,
 206                                   &state->main_lock);
 207
 208 done:
 209         hashmap_remove(&state->cookies, &cookie->entry, NULL);
 210
 211         result = cookie->result;
 212
 213         free(cookie->name);
 214         free(cookie);
 215         strbuf_release(&cookie_pathname);
 216
 217         return result;
 218 }
 219
 220 /*
 221  * Mark these cookies as _SEEN and wake up the corresponding client threads.
 222  */
 223 static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state,
 224                                          const struct string_list *cookie_names)
 225 {
 226         /* assert current thread holding state->main_lock */
 227
 228         int k;
 229         int nr_seen = 0;
 230
 231         for (k = 0; k < cookie_names->nr; k++) {
 232                 struct fsmonitor_cookie_item key;
 233                 struct fsmonitor_cookie_item *cookie;
 234
 235                 key.name = cookie_names->items[k].string;
 236                 hashmap_entry_init(&key.entry, strhash(key.name));
 237
 238                 cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL);
 239                 if (cookie) {
 240                         trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'",
 241                                          cookie->name);
 242                         cookie->result = FCIR_SEEN;
 243                         nr_seen++;
 244                 }
 245         }
 246
 247         if (nr_seen)
 248                 pthread_cond_broadcast(&state->cookies_cond);
 249 }
 250
 251 /*
 252  * Set _ABORT on all pending cookies and wake up all client threads.
 253  */
 254 static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state)
 255 {
 256         /* assert current thread holding state->main_lock */
 257
 258         struct hashmap_iter iter;
 259         struct fsmonitor_cookie_item *cookie;
 260         int nr_aborted = 0;
 261
 262         hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) {
 263                 trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'",
 264                                  cookie->name);
 265                 cookie->result = FCIR_ABORT;
 266                 nr_aborted++;
 267         }
 268
 269         if (nr_aborted)
 270                 pthread_cond_broadcast(&state->cookies_cond);
 271 }
 272
 273 /*
 274  * Requests to and from a FSMonitor Protocol V2 provider use an opaque
 275  * "token" as a virtual timestamp.  Clients can request a summary of all
 276  * created/deleted/modified files relative to a token.  In the response,
 277  * clients receive a new token for the next (relative) request.
 278  *
 279  *
 280  * Token Format
 281  * ============
 282  *
 283  * The contents of the token are private and provider-specific.
 284  *
 285  * For the built-in fsmonitor--daemon, we define a token as follows:
 286  *
 287  *     "builtin" ":" <token_id> ":" <sequence_nr>
 288  *
 289  * The "builtin" prefix is used as a namespace to avoid conflicts
 290  * with other providers (such as Watchman).
 291  *
 292  * The <token_id> is an arbitrary OPAQUE string, such as a GUID,
 293  * UUID, or {timestamp,pid}.  It is used to group all filesystem
 294  * events that happened while the daemon was monitoring (and in-sync
 295  * with the filesystem).
 296  *
 297  *     Unlike FSMonitor Protocol V1, it is not defined as a timestamp
 298  *     and does not define less-than/greater-than relationships.
 299  *     (There are too many race conditions to rely on file system
 300  *     event timestamps.)
 301  *
 302  * The <sequence_nr> is a simple integer incremented whenever the
 303  * daemon needs to make its state public.  For example, if 1000 file
 304  * system events come in, but no clients have requested the data,
 305  * the daemon can continue to accumulate file changes in the same
 306  * bin and does not need to advance the sequence number.  However,
 307  * as soon as a client does arrive, the daemon needs to start a new
 308  * bin and increment the sequence number.
 309  *
 310  *     The sequence number serves as the boundary between 2 sets
 311  *     of bins -- the older ones that the client has already seen
 312  *     and the newer ones that it hasn't.
 313  *
 314  * When a new <token_id> is created, the <sequence_nr> is reset to
 315  * zero.
 316  *
 317  *
 318  * About Token Ids
 319  * ===============
 320  *
 321  * A new token_id is created:
 322  *
 323  * [1] each time the daemon is started.
 324  *
 325  * [2] any time that the daemon must re-sync with the filesystem
 326  *     (such as when the kernel drops or we miss events on a very
 327  *     active volume).
 328  *
 329  * [3] in response to a client "flush" command (for dropped event
 330  *     testing).
 331  *
 332  * When a new token_id is created, the daemon is free to discard all
 333  * cached filesystem events associated with any previous token_ids.
 334  * Events associated with a non-current token_id will never be sent
 335  * to a client.  A token_id change implicitly means that the daemon
 336  * has gap in its event history.
 337  *
 338  * Therefore, clients that present a token with a stale (non-current)
 339  * token_id will always be given a trivial response.
 340  */
 341 struct fsmonitor_token_data {
 342         struct strbuf token_id;
 343         struct fsmonitor_batch *batch_head;
 344         struct fsmonitor_batch *batch_tail;
 345         uint64_t client_ref_count;
 346 };
 347
 348 struct fsmonitor_batch {
 349         struct fsmonitor_batch *next;
 350         uint64_t batch_seq_nr;
 351         const char **interned_paths;
 352         size_t nr, alloc;
 353         time_t pinned_time;
 354 };
 355
 356 static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
 357 {
 358         static int test_env_value = -1;
 359         static uint64_t flush_count = 0;
 360         struct fsmonitor_token_data *token;
 361         struct fsmonitor_batch *batch;
 362
 363         CALLOC_ARRAY(token, 1);
 364         batch = fsmonitor_batch__new();
 365
 366         strbuf_init(&token->token_id, 0);
 367         token->batch_head = batch;
 368         token->batch_tail = batch;
 369         token->client_ref_count = 0;
 370
 371         if (test_env_value < 0)
 372                 test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0);
 373
 374         if (!test_env_value) {
 375                 struct timeval tv;
 376                 struct tm tm;
 377                 time_t secs;
 378
 379                 gettimeofday(&tv, NULL);
 380                 secs = tv.tv_sec;
 381                 gmtime_r(&secs, &tm);
 382
 383                 strbuf_addf(&token->token_id,
 384                             "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ",
 385                             flush_count++,
 386                             getpid(),
 387                             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 388                             tm.tm_hour, tm.tm_min, tm.tm_sec,
 389                             (long)tv.tv_usec);
 390         } else {
 391                 strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
 392         }
 393
 394         /*
 395          * We created a new <token_id> and are starting a new series
 396          * of tokens with a zero <seq_nr>.
 397          *
 398          * Since clients cannot guess our new (non test) <token_id>
 399          * they will always receive a trivial response (because of the
 400          * mismatch on the <token_id>).  The trivial response will
 401          * tell them our new <token_id> so that subsequent requests
 402          * will be relative to our new series.  (And when sending that
 403          * response, we pin the current head of the batch list.)
 404          *
 405          * Even if the client correctly guesses the <token_id>, their
 406          * request of "builtin:<token_id>:0" asks for all changes MORE
 407          * RECENT than batch/bin 0.
 408          *
 409          * This implies that it is a waste to accumulate paths in the
 410          * initial batch/bin (because they will never be transmitted).
 411          *
 412          * So the daemon could be running for days and watching the
 413          * file system, but doesn't need to actually accumulate any
 414          * paths UNTIL we need to set a reference point for a later
 415          * relative request.
 416          *
 417          * However, it is very useful for testing to always have a
 418          * reference point set.  Pin batch 0 to force early file system
 419          * events to accumulate.
 420          */
 421         if (test_env_value)
 422                 batch->pinned_time = time(NULL);
 423
 424         return token;
 425 }
 426
 427 struct fsmonitor_batch *fsmonitor_batch__new(void)
 428 {
 429         struct fsmonitor_batch *batch;
 430
 431         CALLOC_ARRAY(batch, 1);
 432
 433         return batch;
 434 }
 435
 436 void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
 437 {
 438         while (batch) {
 439                 struct fsmonitor_batch *next = batch->next;
 440
 441                 /*
 442                  * The actual strings within the array of this batch
 443                  * are interned, so we don't own them.  We only own
 444                  * the array.
 445                  */
 446                 free(batch->interned_paths);
 447                 free(batch);
 448
 449                 batch = next;
 450         }
 451 }
 452
 453 void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
 454                                const char *path)
 455 {
 456         const char *interned_path = strintern(path);
 457
 458         trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
 459
 460         ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
 461         batch->interned_paths[batch->nr++] = interned_path;
 462 }
 463
 464 static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
 465                                      const struct fsmonitor_batch *batch_src)
 466 {
 467         size_t k;
 468
 469         ALLOC_GROW(batch_dest->interned_paths,
 470                    batch_dest->nr + batch_src->nr + 1,
 471                    batch_dest->alloc);
 472
 473         for (k = 0; k < batch_src->nr; k++)
 474                 batch_dest->interned_paths[batch_dest->nr++] =
 475                         batch_src->interned_paths[k];
 476 }
 477
 478 /*
 479  * To keep the batch list from growing unbounded in response to filesystem
 480  * activity, we try to truncate old batches from the end of the list as
 481  * they become irrelevant.
 482  *
 483  * We assume that the .git/index will be updated with the most recent token
 484  * any time the index is updated.  And future commands will only ask for
 485  * recent changes *since* that new token.  So as tokens advance into the
 486  * future, older batch items will never be requested/needed.  So we can
 487  * truncate them without loss of functionality.
 488  *
 489  * However, multiple commands may be talking to the daemon concurrently
 490  * or perform a slow command, so a little "token skew" is possible.
 491  * Therefore, we want this to be a little bit lazy and have a generous
 492  * delay.
 493  *
 494  * The current reader thread walked backwards in time from `token->batch_head`
 495  * back to `batch_marker` somewhere in the middle of the batch list.
 496  *
 497  * Let's walk backwards in time from that marker an arbitrary delay
 498  * and truncate the list there.  Note that these timestamps are completely
 499  * artificial (based on when we pinned the batch item) and not on any
 500  * filesystem activity.
 501  *
 502  * Return the obsolete portion of the list after we have removed it from
 503  * the official list so that the caller can free it after leaving the lock.
 504  */
 505 #define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */
 506
 507 static struct fsmonitor_batch *with_lock__truncate_old_batches(
 508         struct fsmonitor_daemon_state *state,
 509         const struct fsmonitor_batch *batch_marker)
 510 {
 511         /* assert current thread holding state->main_lock */
 512
 513         const struct fsmonitor_batch *batch;
 514         struct fsmonitor_batch *remainder;
 515
 516         if (!batch_marker)
 517                 return NULL;
 518
 519         trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")",
 520                          batch_marker->batch_seq_nr,
 521                          (uint64_t)batch_marker->pinned_time);
 522
 523         for (batch = batch_marker; batch; batch = batch->next) {
 524                 time_t t;
 525
 526                 if (!batch->pinned_time) /* an overflow batch */
 527                         continue;
 528
 529                 t = batch->pinned_time + MY_TIME_DELAY_SECONDS;
 530                 if (t > batch_marker->pinned_time) /* too close to marker */
 531                         continue;
 532
 533                 goto truncate_past_here;
 534         }
 535
 536         return NULL;
 537
 538 truncate_past_here:
 539         state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch;
 540
 541         remainder = ((struct fsmonitor_batch *)batch)->next;
 542         ((struct fsmonitor_batch *)batch)->next = NULL;
 543
 544         return remainder;
 545 }
 546
 547 static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
 548 {
 549         if (!token)
 550                 return;
 551
 552         assert(token->client_ref_count == 0);
 553
 554         strbuf_release(&token->token_id);
 555
 556         fsmonitor_batch__free_list(token->batch_head);
 557
 558         free(token);
 559 }
 560
 561 /*
 562  * Flush all of our cached data about the filesystem.  Call this if we
 563  * lose sync with the filesystem and miss some notification events.
 564  *
 565  * [1] If we are missing events, then we no longer have a complete
 566  *     history of the directory (relative to our current start token).
 567  *     We should create a new token and start fresh (as if we just
 568  *     booted up).
 569  *
 570  * [2] Some of those lost events may have been for cookie files.  We
 571  *     should assume the worst and abort them rather letting them starve.
 572  *
 573  * If there are no concurrent threads reading the current token data
 574  * series, we can free it now.  Otherwise, let the last reader free
 575  * it.
 576  *
 577  * Either way, the old token data series is no longer associated with
 578  * our state data.
 579  */
 580 static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
 581 {
 582         /* assert current thread holding state->main_lock */
 583
 584         struct fsmonitor_token_data *free_me = NULL;
 585         struct fsmonitor_token_data *new_one = NULL;
 586
 587         new_one = fsmonitor_new_token_data();
 588
 589         if (state->current_token_data->client_ref_count == 0)
 590                 free_me = state->current_token_data;
 591         state->current_token_data = new_one;
 592
 593         fsmonitor_free_token_data(free_me);
 594
 595         with_lock__abort_all_cookies(state);
 596 }
 597
 598 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 599 {
 600         pthread_mutex_lock(&state->main_lock);
 601         with_lock__do_force_resync(state);
 602         pthread_mutex_unlock(&state->main_lock);
 603 }
 604
 605 /*
 606  * Format an opaque token string to send to the client.
 607  */
 608 static void with_lock__format_response_token(
 609         struct strbuf *response_token,
 610         const struct strbuf *response_token_id,
 611         const struct fsmonitor_batch *batch)
 612 {
 613         /* assert current thread holding state->main_lock */
 614
 615         strbuf_reset(response_token);
 616         strbuf_addf(response_token, "builtin:%s:%"PRIu64,
 617                     response_token_id->buf, batch->batch_seq_nr);
 618 }
 619
 620 /*
 621  * Parse an opaque token from the client.
 622  * Returns -1 on error.
 623  */
 624 static int fsmonitor_parse_client_token(const char *buf_token,
 625                                         struct strbuf *requested_token_id,
 626                                         uint64_t *seq_nr)
 627 {
 628         const char *p;
 629         char *p_end;
 630
 631         strbuf_reset(requested_token_id);
 632         *seq_nr = 0;
 633
 634         if (!skip_prefix(buf_token, "builtin:", &p))
 635                 return -1;
 636
 637         while (*p && *p != ':')
 638                 strbuf_addch(requested_token_id, *p++);
 639         if (!*p++)
 640                 return -1;
 641
 642         *seq_nr = (uint64_t)strtoumax(p, &p_end, 10);
 643         if (*p_end)
 644                 return -1;
 645
 646         return 0;
 647 }
 648
 649 KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal)
 650
 651 static int do_handle_client(struct fsmonitor_daemon_state *state,
 652                             const char *command,
 653                             ipc_server_reply_cb *reply,
 654                             struct ipc_server_reply_data *reply_data)
 655 {
 656         struct fsmonitor_token_data *token_data = NULL;
 657         struct strbuf response_token = STRBUF_INIT;
 658         struct strbuf requested_token_id = STRBUF_INIT;
 659         struct strbuf payload = STRBUF_INIT;
 660         uint64_t requested_oldest_seq_nr = 0;
 661         uint64_t total_response_len = 0;
 662         const char *p;
 663         const struct fsmonitor_batch *batch_head;
 664         const struct fsmonitor_batch *batch;
 665         struct fsmonitor_batch *remainder = NULL;
 666         intmax_t count = 0, duplicates = 0;
 667         kh_str_t *shown;
 668         int hash_ret;
 669         int do_trivial = 0;
 670         int do_flush = 0;
 671         int do_cookie = 0;
 672         enum fsmonitor_cookie_item_result cookie_result;
 673
 674         /*
 675          * We expect `command` to be of the form:
 676          *
 677          * <command> := quit NUL
 678          *            | flush NUL
 679          *            | <V1-time-since-epoch-ns> NUL
 680          *            | <V2-opaque-fsmonitor-token> NUL
 681          */
 682
 683         if (!strcmp(command, "quit")) {
 684                 /*
 685                  * A client has requested over the socket/pipe that the
 686                  * daemon shutdown.
 687                  *
 688                  * Tell the IPC thread pool to shutdown (which completes
 689                  * the await in the main thread (which can stop the
 690                  * fsmonitor listener thread)).
 691                  *
 692                  * There is no reply to the client.
 693                  */
 694                 return SIMPLE_IPC_QUIT;
 695
 696         } else if (!strcmp(command, "flush")) {
 697                 /*
 698                  * Flush all of our cached data and generate a new token
 699                  * just like if we lost sync with the filesystem.
 700                  *
 701                  * Then send a trivial response using the new token.
 702                  */
 703                 do_flush = 1;
 704                 do_trivial = 1;
 705
 706         } else if (!skip_prefix(command, "builtin:", &p)) {
 707                 /* assume V1 timestamp or garbage */
 708
 709                 char *p_end;
 710
 711                 strtoumax(command, &p_end, 10);
 712                 trace_printf_key(&trace_fsmonitor,
 713                                  ((*p_end) ?
 714                                   "fsmonitor: invalid command line '%s'" :
 715                                   "fsmonitor: unsupported V1 protocol '%s'"),
 716                                  command);
 717                 do_trivial = 1;
 718                 do_cookie = 1;
 719
 720         } else {
 721                 /* We have "builtin:*" */
 722                 if (fsmonitor_parse_client_token(command, &requested_token_id,
 723                                                  &requested_oldest_seq_nr)) {
 724                         trace_printf_key(&trace_fsmonitor,
 725                                          "fsmonitor: invalid V2 protocol token '%s'",
 726                                          command);
 727                         do_trivial = 1;
 728                         do_cookie = 1;
 729
 730                 } else {
 731                         /*
 732                          * We have a V2 valid token:
 733                          *     "builtin:<token_id>:<seq_nr>"
 734                          */
 735                         do_cookie = 1;
 736                 }
 737         }
 738
 739         pthread_mutex_lock(&state->main_lock);
 740
 741         if (!state->current_token_data)
 742                 BUG("fsmonitor state does not have a current token");
 743
 744         /*
 745          * Write a cookie file inside the directory being watched in
 746          * an effort to flush out existing filesystem events that we
 747          * actually care about.  Suspend this client thread until we
 748          * see the filesystem events for this cookie file.
 749          *
 750          * Creating the cookie lets us guarantee that our FS listener
 751          * thread has drained the kernel queue and we are caught up
 752          * with the kernel.
 753          *
 754          * If we cannot create the cookie (or otherwise guarantee that
 755          * we are caught up), we send a trivial response.  We have to
 756          * assume that there might be some very, very recent activity
 757          * on the FS still in flight.
 758          */
 759         if (do_cookie) {
 760                 cookie_result = with_lock__wait_for_cookie(state);
 761                 if (cookie_result != FCIR_SEEN) {
 762                         error(_("fsmonitor: cookie_result '%d' != SEEN"),
 763                               cookie_result);
 764                         do_trivial = 1;
 765                 }
 766         }
 767
 768         if (do_flush)
 769                 with_lock__do_force_resync(state);
 770
 771         /*
 772          * We mark the current head of the batch list as "pinned" so
 773          * that the listener thread will treat this item as read-only
 774          * (and prevent any more paths from being added to it) from
 775          * now on.
 776          */
 777         token_data = state->current_token_data;
 778         batch_head = token_data->batch_head;
 779         ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL);
 780
 781         /*
 782          * FSMonitor Protocol V2 requires that we send a response header
 783          * with a "new current token" and then all of the paths that changed
 784          * since the "requested token".  We send the seq_nr of the just-pinned
 785          * head batch so that future requests from a client will be relative
 786          * to it.
 787          */
 788         with_lock__format_response_token(&response_token,
 789                                          &token_data->token_id, batch_head);
 790
 791         reply(reply_data, response_token.buf, response_token.len + 1);
 792         total_response_len += response_token.len + 1;
 793
 794         trace2_data_string("fsmonitor", the_repository, "response/token",
 795                            response_token.buf);
 796         trace_printf_key(&trace_fsmonitor, "response token: %s",
 797                          response_token.buf);
 798
 799         if (!do_trivial) {
 800                 if (strcmp(requested_token_id.buf, token_data->token_id.buf)) {
 801                         /*
 802                          * The client last spoke to a different daemon
 803                          * instance -OR- the daemon had to resync with
 804                          * the filesystem (and lost events), so reject.
 805                          */
 806                         trace2_data_string("fsmonitor", the_repository,
 807                                            "response/token", "different");
 808                         do_trivial = 1;
 809
 810                 } else if (requested_oldest_seq_nr <
 811                            token_data->batch_tail->batch_seq_nr) {
 812                         /*
 813                          * The client wants older events than we have for
 814                          * this token_id.  This means that the end of our
 815                          * batch list was truncated and we cannot give the
 816                          * client a complete snapshot relative to their
 817                          * request.
 818                          */
 819                         trace_printf_key(&trace_fsmonitor,
 820                                          "client requested truncated data");
 821                         do_trivial = 1;
 822                 }
 823         }
 824
 825         if (do_trivial) {
 826                 pthread_mutex_unlock(&state->main_lock);
 827
 828                 reply(reply_data, "/", 2);
 829
 830                 trace2_data_intmax("fsmonitor", the_repository,
 831                                    "response/trivial", 1);
 832
 833                 goto cleanup;
 834         }
 835
 836         /*
 837          * We're going to hold onto a pointer to the current
 838          * token-data while we walk the list of batches of files.
 839          * During this time, we will NOT be under the lock.
 840          * So we ref-count it.
 841          *
 842          * This allows the listener thread to continue prepending
 843          * new batches of items to the token-data (which we'll ignore).
 844          *
 845          * AND it allows the listener thread to do a token-reset
 846          * (and install a new `current_token_data`).
 847          */
 848         token_data->client_ref_count++;
 849
 850         pthread_mutex_unlock(&state->main_lock);
 851
 852         /*
 853          * The client request is relative to the token that they sent,
 854          * so walk the batch list backwards from the current head back
 855          * to the batch (sequence number) they named.
 856          *
 857          * We use khash to de-dup the list of pathnames.
 858          *
 859          * NEEDSWORK: each batch contains a list of interned strings,
 860          * so we only need to do pointer comparisons here to build the
 861          * hash table.  Currently, we're still comparing the string
 862          * values.
 863          */
 864         shown = kh_init_str();
 865         for (batch = batch_head;
 866              batch && batch->batch_seq_nr > requested_oldest_seq_nr;
 867              batch = batch->next) {
 868                 size_t k;
 869
 870                 for (k = 0; k < batch->nr; k++) {
 871                         const char *s = batch->interned_paths[k];
 872                         size_t s_len;
 873
 874                         if (kh_get_str(shown, s) != kh_end(shown))
 875                                 duplicates++;
 876                         else {
 877                                 kh_put_str(shown, s, &hash_ret);
 878
 879                                 trace_printf_key(&trace_fsmonitor,
 880                                                  "send[%"PRIuMAX"]: %s",
 881                                                  count, s);
 882
 883                                 /* Each path gets written with a trailing NUL */
 884                                 s_len = strlen(s) + 1;
 885
 886                                 if (payload.len + s_len >=
 887                                     LARGE_PACKET_DATA_MAX) {
 888                                         reply(reply_data, payload.buf,
 889                                               payload.len);
 890                                         total_response_len += payload.len;
 891                                         strbuf_reset(&payload);
 892                                 }
 893
 894                                 strbuf_add(&payload, s, s_len);
 895                                 count++;
 896                         }
 897                 }
 898         }
 899
 900         if (payload.len) {
 901                 reply(reply_data, payload.buf, payload.len);
 902                 total_response_len += payload.len;
 903         }
 904
 905         kh_release_str(shown);
 906
 907         pthread_mutex_lock(&state->main_lock);
 908
 909         if (token_data->client_ref_count > 0)
 910                 token_data->client_ref_count--;
 911
 912         if (token_data->client_ref_count == 0) {
 913                 if (token_data != state->current_token_data) {
 914                         /*
 915                          * The listener thread did a token-reset while we were
 916                          * walking the batch list.  Therefore, this token is
 917                          * stale and can be discarded completely.  If we are
 918                          * the last reader thread using this token, we own
 919                          * that work.
 920                          */
 921                         fsmonitor_free_token_data(token_data);
 922                 } else if (batch) {
 923                         /*
 924                          * We are holding the lock and are the only
 925                          * reader of the ref-counted portion of the
 926                          * list, so we get the honor of seeing if the
 927                          * list can be truncated to save memory.
 928                          *
 929                          * The main loop did not walk to the end of the
 930                          * list, so this batch is the first item in the
 931                          * batch-list that is older than the requested
 932                          * end-point sequence number.  See if the tail
 933                          * end of the list is obsolete.
 934                          */
 935                         remainder = with_lock__truncate_old_batches(state,
 936                                                                     batch);
 937                 }
 938         }
 939
 940         pthread_mutex_unlock(&state->main_lock);
 941
 942         if (remainder)
 943                 fsmonitor_batch__free_list(remainder);
 944
 945         trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len);
 946         trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count);
 947         trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates);
 948
 949 cleanup:
 950         strbuf_release(&response_token);
 951         strbuf_release(&requested_token_id);
 952         strbuf_release(&payload);
 953
 954         return 0;
 955 }
 956
 957 static ipc_server_application_cb handle_client;
 958
 959 static int handle_client(void *data,
 960                          const char *command, size_t command_len,
 961                          ipc_server_reply_cb *reply,
 962                          struct ipc_server_reply_data *reply_data)
 963 {
 964         struct fsmonitor_daemon_state *state = data;
 965         int result;
 966
 967         /*
 968          * The Simple IPC API now supports {char*, len} arguments, but
 969          * FSMonitor always uses proper null-terminated strings, so
 970          * we can ignore the command_len argument.  (Trust, but verify.)
 971          */
 972         if (command_len != strlen(command))
 973                 BUG("FSMonitor assumes text messages");
 974
 975         trace_printf_key(&trace_fsmonitor, "requested token: %s", command);
 976
 977         trace2_region_enter("fsmonitor", "handle_client", the_repository);
 978         trace2_data_string("fsmonitor", the_repository, "request", command);
 979
 980         result = do_handle_client(state, command, reply, reply_data);
 981
 982         trace2_region_leave("fsmonitor", "handle_client", the_repository);
 983
 984         return result;
 985 }
 986
 987 #define FSMONITOR_DIR           "fsmonitor--daemon"
 988 #define FSMONITOR_COOKIE_DIR    "cookies"
 989 #define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/")
 990
 991 enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative(
 992         const char *rel)
 993 {
 994         if (fspathncmp(rel, ".git", 4))
 995                 return IS_WORKDIR_PATH;
 996         rel += 4;
 997
 998         if (!*rel)
 999                 return IS_DOT_GIT;
1000         if (*rel != '/')
1001                 return IS_WORKDIR_PATH; /* e.g. .gitignore */
1002         rel++;
1003
1004         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1005                         strlen(FSMONITOR_COOKIE_PREFIX)))
1006                 return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX;
1007
1008         return IS_INSIDE_DOT_GIT;
1009 }
1010
1011 enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative(
1012         const char *rel)
1013 {
1014         if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX,
1015                         strlen(FSMONITOR_COOKIE_PREFIX)))
1016                 return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX;
1017
1018         return IS_INSIDE_GITDIR;
1019 }
1020
1021 static enum fsmonitor_path_type try_classify_workdir_abs_path(
1022         struct fsmonitor_daemon_state *state,
1023         const char *path)
1024 {
1025         const char *rel;
1026
1027         if (fspathncmp(path, state->path_worktree_watch.buf,
1028                        state->path_worktree_watch.len))
1029                 return IS_OUTSIDE_CONE;
1030
1031         rel = path + state->path_worktree_watch.len;
1032
1033         if (!*rel)
1034                 return IS_WORKDIR_PATH; /* it is the root dir exactly */
1035         if (*rel != '/')
1036                 return IS_OUTSIDE_CONE;
1037         rel++;
1038
1039         return fsmonitor_classify_path_workdir_relative(rel);
1040 }
1041
1042 enum fsmonitor_path_type fsmonitor_classify_path_absolute(
1043         struct fsmonitor_daemon_state *state,
1044         const char *path)
1045 {
1046         const char *rel;
1047         enum fsmonitor_path_type t;
1048
1049         t = try_classify_workdir_abs_path(state, path);
1050         if (state->nr_paths_watching == 1)
1051                 return t;
1052         if (t != IS_OUTSIDE_CONE)
1053                 return t;
1054
1055         if (fspathncmp(path, state->path_gitdir_watch.buf,
1056                        state->path_gitdir_watch.len))
1057                 return IS_OUTSIDE_CONE;
1058
1059         rel = path + state->path_gitdir_watch.len;
1060
1061         if (!*rel)
1062                 return IS_GITDIR; /* it is the <gitdir> exactly */
1063         if (*rel != '/')
1064                 return IS_OUTSIDE_CONE;
1065         rel++;
1066
1067         return fsmonitor_classify_path_gitdir_relative(rel);
1068 }
1069
1070 /*
1071  * We try to combine small batches at the front of the batch-list to avoid
1072  * having a long list.  This hopefully makes it a little easier when we want
1073  * to truncate and maintain the list.  However, we don't want the paths array
1074  * to just keep growing and growing with realloc, so we insert an arbitrary
1075  * limit.
1076  */
1077 #define MY_COMBINE_LIMIT (1024)
1078
1079 void fsmonitor_publish(struct fsmonitor_daemon_state *state,
1080                        struct fsmonitor_batch *batch,
1081                        const struct string_list *cookie_names)
1082 {
1083         if (!batch && !cookie_names->nr)
1084                 return;
1085
1086         pthread_mutex_lock(&state->main_lock);
1087
1088         if (batch) {
1089                 struct fsmonitor_batch *head;
1090
1091                 head = state->current_token_data->batch_head;
1092                 if (!head) {
1093                         BUG("token does not have batch");
1094                 } else if (head->pinned_time) {
1095                         /*
1096                          * We cannot alter the current batch list
1097                          * because:
1098                          *
1099                          * [a] it is being transmitted to at least one
1100                          * client and the handle_client() thread has a
1101                          * ref-count, but not a lock on the batch list
1102                          * starting with this item.
1103                          *
1104                          * [b] it has been transmitted in the past to
1105                          * at least one client such that future
1106                          * requests are relative to this head batch.
1107                          *
1108                          * So, we can only prepend a new batch onto
1109                          * the front of the list.
1110                          */
1111                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1112                         batch->next = head;
1113                         state->current_token_data->batch_head = batch;
1114                 } else if (!head->batch_seq_nr) {
1115                         /*
1116                          * Batch 0 is unpinned.  See the note in
1117                          * `fsmonitor_new_token_data()` about why we
1118                          * don't need to accumulate these paths.
1119                          */
1120                         fsmonitor_batch__free_list(batch);
1121                 } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
1122                         /*
1123                          * The head batch in the list has never been
1124                          * transmitted to a client, but folding the
1125                          * contents of the new batch onto it would
1126                          * exceed our arbitrary limit, so just prepend
1127                          * the new batch onto the list.
1128                          */
1129                         batch->batch_seq_nr = head->batch_seq_nr + 1;
1130                         batch->next = head;
1131                         state->current_token_data->batch_head = batch;
1132                 } else {
1133                         /*
1134                          * We are free to add the paths in the given
1135                          * batch onto the end of the current head batch.
1136                          */
1137                         fsmonitor_batch__combine(head, batch);
1138                         fsmonitor_batch__free_list(batch);
1139                 }
1140         }
1141
1142         if (cookie_names->nr)
1143                 with_lock__mark_cookies_seen(state, cookie_names);
1144
1145         pthread_mutex_unlock(&state->main_lock);
1146 }
1147
1148 static void *fsm_health__thread_proc(void *_state)
1149 {
1150         struct fsmonitor_daemon_state *state = _state;
1151
1152         trace2_thread_start("fsm-health");
1153
1154         fsm_health__loop(state);
1155
1156         trace2_thread_exit();
1157         return NULL;
1158 }
1159
1160 static void *fsm_listen__thread_proc(void *_state)
1161 {
1162         struct fsmonitor_daemon_state *state = _state;
1163
1164         trace2_thread_start("fsm-listen");
1165
1166         trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'",
1167                          state->path_worktree_watch.buf);
1168         if (state->nr_paths_watching > 1)
1169                 trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'",
1170                                  state->path_gitdir_watch.buf);
1171
1172         fsm_listen__loop(state);
1173
1174         pthread_mutex_lock(&state->main_lock);
1175         if (state->current_token_data &&
1176             state->current_token_data->client_ref_count == 0)
1177                 fsmonitor_free_token_data(state->current_token_data);
1178         state->current_token_data = NULL;
1179         pthread_mutex_unlock(&state->main_lock);
1180
1181         trace2_thread_exit();
1182         return NULL;
1183 }
1184
1185 static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state)
1186 {
1187         struct ipc_server_opts ipc_opts = {
1188                 .nr_threads = fsmonitor__ipc_threads,
1189
1190                 /*
1191                  * We know that there are no other active threads yet,
1192                  * so we can let the IPC layer temporarily chdir() if
1193                  * it needs to when creating the server side of the
1194                  * Unix domain socket.
1195                  */
1196                 .uds_disallow_chdir = 0
1197         };
1198         int health_started = 0;
1199         int listener_started = 0;
1200         int err = 0;
1201
1202         /*
1203          * Start the IPC thread pool before the we've started the file
1204          * system event listener thread so that we have the IPC handle
1205          * before we need it.
1206          */
1207         if (ipc_server_run_async(&state->ipc_server_data,
1208                                  state->path_ipc.buf, &ipc_opts,
1209                                  handle_client, state))
1210                 return error_errno(
1211                         _("could not start IPC thread pool on '%s'"),
1212                         state->path_ipc.buf);
1213
1214         /*
1215          * Start the fsmonitor listener thread to collect filesystem
1216          * events.
1217          */
1218         if (pthread_create(&state->listener_thread, NULL,
1219                            fsm_listen__thread_proc, state)) {
1220                 ipc_server_stop_async(state->ipc_server_data);
1221                 err = error(_("could not start fsmonitor listener thread"));
1222                 goto cleanup;
1223         }
1224         listener_started = 1;
1225
1226         /*
1227          * Start the health thread to watch over our process.
1228          */
1229         if (pthread_create(&state->health_thread, NULL,
1230                            fsm_health__thread_proc, state)) {
1231                 ipc_server_stop_async(state->ipc_server_data);
1232                 err = error(_("could not start fsmonitor health thread"));
1233                 goto cleanup;
1234         }
1235         health_started = 1;
1236
1237         /*
1238          * The daemon is now fully functional in background threads.
1239          * Our primary thread should now just wait while the threads
1240          * do all the work.
1241          */
1242 cleanup:
1243         /*
1244          * Wait for the IPC thread pool to shutdown (whether by client
1245          * request, from filesystem activity, or an error).
1246          */
1247         ipc_server_await(state->ipc_server_data);
1248
1249         /*
1250          * The fsmonitor listener thread may have received a shutdown
1251          * event from the IPC thread pool, but it doesn't hurt to tell
1252          * it again.  And wait for it to shutdown.
1253          */
1254         if (listener_started) {
1255                 fsm_listen__stop_async(state);
1256                 pthread_join(state->listener_thread, NULL);
1257         }
1258
1259         if (health_started) {
1260                 fsm_health__stop_async(state);
1261                 pthread_join(state->health_thread, NULL);
1262         }
1263
1264         if (err)
1265                 return err;
1266         if (state->listen_error_code)
1267                 return state->listen_error_code;
1268         if (state->health_error_code)
1269                 return state->health_error_code;
1270         return 0;
1271 }
1272
1273 static int fsmonitor_run_daemon(void)
1274 {
1275         struct fsmonitor_daemon_state state;
1276         const char *home;
1277         int err;
1278
1279         memset(&state, 0, sizeof(state));
1280
1281         hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
1282         pthread_mutex_init(&state.main_lock, NULL);
1283         pthread_cond_init(&state.cookies_cond, NULL);
1284         state.listen_error_code = 0;
1285         state.health_error_code = 0;
1286         state.current_token_data = fsmonitor_new_token_data();
1287
1288         /* Prepare to (recursively) watch the <worktree-root> directory. */
1289         strbuf_init(&state.path_worktree_watch, 0);
1290         strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree()));
1291         state.nr_paths_watching = 1;
1292
1293         strbuf_init(&state.alias.alias, 0);
1294         strbuf_init(&state.alias.points_to, 0);
1295         if ((err = fsmonitor__get_alias(state.path_worktree_watch.buf, &state.alias)))
1296                 goto done;
1297
1298         /*
1299          * We create and delete cookie files somewhere inside the .git
1300          * directory to help us keep sync with the file system.  If
1301          * ".git" is not a directory, then <gitdir> is not inside the
1302          * cone of <worktree-root>, so set up a second watch to watch
1303          * the <gitdir> so that we get events for the cookie files.
1304          */
1305         strbuf_init(&state.path_gitdir_watch, 0);
1306         strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch);
1307         strbuf_addstr(&state.path_gitdir_watch, "/.git");
1308         if (!is_directory(state.path_gitdir_watch.buf)) {
1309                 strbuf_reset(&state.path_gitdir_watch);
1310                 strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir()));
1311                 state.nr_paths_watching = 2;
1312         }
1313
1314         /*
1315          * We will write filesystem syncing cookie files into
1316          * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>.
1317          *
1318          * The extra layers of subdirectories here keep us from
1319          * changing the mtime on ".git/" or ".git/foo/" when we create
1320          * or delete cookie files.
1321          *
1322          * There have been problems with some IDEs that do a
1323          * non-recursive watch of the ".git/" directory and run a
1324          * series of commands any time something happens.
1325          *
1326          * For example, if we place our cookie files directly in
1327          * ".git/" or ".git/foo/" then a `git status` (or similar
1328          * command) from the IDE will cause a cookie file to be
1329          * created in one of those dirs.  This causes the mtime of
1330          * those dirs to change.  This triggers the IDE's watch
1331          * notification.  This triggers the IDE to run those commands
1332          * again.  And the process repeats and the machine never goes
1333          * idle.
1334          *
1335          * Adding the extra layers of subdirectories prevents the
1336          * mtime of ".git/" and ".git/foo" from changing when a
1337          * cookie file is created.
1338          */
1339         strbuf_init(&state.path_cookie_prefix, 0);
1340         strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch);
1341
1342         strbuf_addch(&state.path_cookie_prefix, '/');
1343         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR);
1344         mkdir(state.path_cookie_prefix.buf, 0777);
1345
1346         strbuf_addch(&state.path_cookie_prefix, '/');
1347         strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR);
1348         mkdir(state.path_cookie_prefix.buf, 0777);
1349
1350         strbuf_addch(&state.path_cookie_prefix, '/');
1351
1352         /*
1353          * We create a named-pipe or unix domain socket inside of the
1354          * ".git" directory.  (Well, on Windows, we base our named
1355          * pipe in the NPFS on the absolute path of the git
1356          * directory.)
1357          */
1358         strbuf_init(&state.path_ipc, 0);
1359         strbuf_addstr(&state.path_ipc,
1360                 absolute_path(fsmonitor_ipc__get_path(the_repository)));
1361
1362         /*
1363          * Confirm that we can create platform-specific resources for the
1364          * filesystem listener before we bother starting all the threads.
1365          */
1366         if (fsm_listen__ctor(&state)) {
1367                 err = error(_("could not initialize listener thread"));
1368                 goto done;
1369         }
1370
1371         if (fsm_health__ctor(&state)) {
1372                 err = error(_("could not initialize health thread"));
1373                 goto done;
1374         }
1375
1376         /*
1377          * CD out of the worktree root directory.
1378          *
1379          * The common Git startup mechanism causes our CWD to be the
1380          * root of the worktree.  On Windows, this causes our process
1381          * to hold a locked handle on the CWD.  This prevents the
1382          * worktree from being moved or deleted while the daemon is
1383          * running.
1384          *
1385          * We assume that our FS and IPC listener threads have either
1386          * opened all of the handles that they need or will do
1387          * everything using absolute paths.
1388          */
1389         home = getenv("HOME");
1390         if (home && *home && chdir(home))
1391                 die_errno(_("could not cd home '%s'"), home);
1392
1393         err = fsmonitor_run_daemon_1(&state);
1394
1395 done:
1396         pthread_cond_destroy(&state.cookies_cond);
1397         pthread_mutex_destroy(&state.main_lock);
1398         fsm_listen__dtor(&state);
1399         fsm_health__dtor(&state);
1400
1401         ipc_server_free(state.ipc_server_data);
1402
1403         strbuf_release(&state.path_worktree_watch);
1404         strbuf_release(&state.path_gitdir_watch);
1405         strbuf_release(&state.path_cookie_prefix);
1406         strbuf_release(&state.path_ipc);
1407         strbuf_release(&state.alias.alias);
1408         strbuf_release(&state.alias.points_to);
1409
1410         return err;
1411 }
1412
1413 static int try_to_run_foreground_daemon(int detach_console)
1414 {
1415         /*
1416          * Technically, we don't need to probe for an existing daemon
1417          * process, since we could just call `fsmonitor_run_daemon()`
1418          * and let it fail if the pipe/socket is busy.
1419          *
1420          * However, this method gives us a nicer error message for a
1421          * common error case.
1422          */
1423         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1424                 die(_("fsmonitor--daemon is already running '%s'"),
1425                     the_repository->worktree);
1426
1427         if (fsmonitor__announce_startup) {
1428                 fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"),
1429                         the_repository->worktree);
1430                 fflush(stderr);
1431         }
1432
1433 #ifdef GIT_WINDOWS_NATIVE
1434         if (detach_console)
1435                 FreeConsole();
1436 #endif
1437
1438         return !!fsmonitor_run_daemon();
1439 }
1440
1441 static start_bg_wait_cb bg_wait_cb;
1442
1443 static int bg_wait_cb(const struct child_process *cp, void *cb_data)
1444 {
1445         enum ipc_active_state s = fsmonitor_ipc__get_state();
1446
1447         switch (s) {
1448         case IPC_STATE__LISTENING:
1449                 /* child is "ready" */
1450                 return 0;
1451
1452         case IPC_STATE__NOT_LISTENING:
1453         case IPC_STATE__PATH_NOT_FOUND:
1454                 /* give child more time */
1455                 return 1;
1456
1457         default:
1458         case IPC_STATE__INVALID_PATH:
1459         case IPC_STATE__OTHER_ERROR:
1460                 /* all the time in world won't help */
1461                 return -1;
1462         }
1463 }
1464
1465 static int try_to_start_background_daemon(void)
1466 {
1467         struct child_process cp = CHILD_PROCESS_INIT;
1468         enum start_bg_result sbgr;
1469
1470         /*
1471          * Before we try to create a background daemon process, see
1472          * if a daemon process is already listening.  This makes it
1473          * easier for us to report an already-listening error to the
1474          * console, since our spawn/daemon can only report the success
1475          * of creating the background process (and not whether it
1476          * immediately exited).
1477          */
1478         if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING)
1479                 die(_("fsmonitor--daemon is already running '%s'"),
1480                     the_repository->worktree);
1481
1482         if (fsmonitor__announce_startup) {
1483                 fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"),
1484                         the_repository->worktree);
1485                 fflush(stderr);
1486         }
1487
1488         cp.git_cmd = 1;
1489
1490         strvec_push(&cp.args, "fsmonitor--daemon");
1491         strvec_push(&cp.args, "run");
1492         strvec_push(&cp.args, "--detach");
1493         strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads);
1494
1495         cp.no_stdin = 1;
1496         cp.no_stdout = 1;
1497         cp.no_stderr = 1;
1498
1499         sbgr = start_bg_command(&cp, bg_wait_cb, NULL,
1500                                 fsmonitor__start_timeout_sec);
1501
1502         switch (sbgr) {
1503         case SBGR_READY:
1504                 return 0;
1505
1506         default:
1507         case SBGR_ERROR:
1508         case SBGR_CB_ERROR:
1509                 return error(_("daemon failed to start"));
1510
1511         case SBGR_TIMEOUT:
1512                 return error(_("daemon not online yet"));
1513
1514         case SBGR_DIED:
1515                 return error(_("daemon terminated"));
1516         }
1517 }
1518
1519 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix)
1520 {
1521         const char *subcmd;
1522         enum fsmonitor_reason reason;
1523         int detach_console = 0;
1524
1525         struct option options[] = {
1526                 OPT_BOOL(0, "detach", &detach_console, N_("detach from console")),
1527                 OPT_INTEGER(0, "ipc-threads",
1528                             &fsmonitor__ipc_threads,
1529                             N_("use <n> ipc worker threads")),
1530                 OPT_INTEGER(0, "start-timeout",
1531                             &fsmonitor__start_timeout_sec,
1532                             N_("max seconds to wait for background daemon startup")),
1533
1534                 OPT_END()
1535         };
1536
1537         git_config(fsmonitor_config, NULL);
1538
1539         argc = parse_options(argc, argv, prefix, options,
1540                              builtin_fsmonitor__daemon_usage, 0);
1541         if (argc != 1)
1542                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1543         subcmd = argv[0];
1544
1545         if (fsmonitor__ipc_threads < 1)
1546                 die(_("invalid 'ipc-threads' value (%d)"),
1547                     fsmonitor__ipc_threads);
1548
1549         prepare_repo_settings(the_repository);
1550         /*
1551          * If the repo is fsmonitor-compatible, explicitly set IPC-mode
1552          * (without bothering to load the `core.fsmonitor` config settings).
1553          *
1554          * If the repo is not compatible, the repo-settings will be set to
1555          * incompatible rather than IPC, so we can use one of the __get
1556          * routines to detect the discrepancy.
1557          */
1558         fsm_settings__set_ipc(the_repository);
1559
1560         reason = fsm_settings__get_reason(the_repository);
1561         if (reason > FSMONITOR_REASON_OK)
1562                 die("%s",
1563                     fsm_settings__get_incompatible_msg(the_repository,
1564                                                        reason));
1565
1566         if (!strcmp(subcmd, "start"))
1567                 return !!try_to_start_background_daemon();
1568
1569         if (!strcmp(subcmd, "run"))
1570                 return !!try_to_run_foreground_daemon(detach_console);
1571
1572         if (!strcmp(subcmd, "stop"))
1573                 return !!do_as_client__send_stop();
1574
1575         if (!strcmp(subcmd, "status"))
1576                 return !!do_as_client__status();
1577
1578         die(_("Unhandled subcommand '%s'"), subcmd);
1579 }
1580
1581 #else
1582 int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix UNUSED)
1583 {
1584         struct option options[] = {
1585                 OPT_END()
1586         };
1587
1588         if (argc == 2 && !strcmp(argv[1], "-h"))
1589                 usage_with_options(builtin_fsmonitor__daemon_usage, options);
1590
1591         die(_("fsmonitor--daemon not supported on this platform"));
1592 }
1593 #endif