reftable: document reading and writing indices
[alt-git.git] / fsmonitor.c
blobf670c50937898342f693708c706a0db270be3a6d
1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "dir.h"
4 #include "environment.h"
5 #include "ewah/ewok.h"
6 #include "fsmonitor.h"
7 #include "fsmonitor-ipc.h"
8 #include "run-command.h"
9 #include "strbuf.h"
10 #include "trace2.h"
12 #define INDEX_EXTENSION_VERSION1 (1)
13 #define INDEX_EXTENSION_VERSION2 (2)
14 #define HOOK_INTERFACE_VERSION1 (1)
15 #define HOOK_INTERFACE_VERSION2 (2)
17 struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR);
19 static void assert_index_minimum(struct index_state *istate, size_t pos)
21 if (pos > istate->cache_nr)
22 BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
23 (uintmax_t)pos, istate->cache_nr);
26 static void fsmonitor_ewah_callback(size_t pos, void *is)
28 struct index_state *istate = (struct index_state *)is;
29 struct cache_entry *ce;
31 assert_index_minimum(istate, pos + 1);
33 ce = istate->cache[pos];
34 ce->ce_flags &= ~CE_FSMONITOR_VALID;
37 static int fsmonitor_hook_version(void)
39 int hook_version;
41 if (git_config_get_int("core.fsmonitorhookversion", &hook_version))
42 return -1;
44 if (hook_version == HOOK_INTERFACE_VERSION1 ||
45 hook_version == HOOK_INTERFACE_VERSION2)
46 return hook_version;
48 warning("Invalid hook version '%i' in core.fsmonitorhookversion. "
49 "Must be 1 or 2.", hook_version);
50 return -1;
53 int read_fsmonitor_extension(struct index_state *istate, const void *data,
54 unsigned long sz)
56 const char *index = data;
57 uint32_t hdr_version;
58 uint32_t ewah_size;
59 struct ewah_bitmap *fsmonitor_dirty;
60 int ret;
61 uint64_t timestamp;
62 struct strbuf last_update = STRBUF_INIT;
64 if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t))
65 return error("corrupt fsmonitor extension (too short)");
67 hdr_version = get_be32(index);
68 index += sizeof(uint32_t);
69 if (hdr_version == INDEX_EXTENSION_VERSION1) {
70 timestamp = get_be64(index);
71 strbuf_addf(&last_update, "%"PRIu64"", timestamp);
72 index += sizeof(uint64_t);
73 } else if (hdr_version == INDEX_EXTENSION_VERSION2) {
74 strbuf_addstr(&last_update, index);
75 index += last_update.len + 1;
76 } else {
77 return error("bad fsmonitor version %d", hdr_version);
80 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
82 ewah_size = get_be32(index);
83 index += sizeof(uint32_t);
85 fsmonitor_dirty = ewah_new();
86 ret = ewah_read_mmap(fsmonitor_dirty, index, ewah_size);
87 if (ret != ewah_size) {
88 ewah_free(fsmonitor_dirty);
89 return error("failed to parse ewah bitmap reading fsmonitor index extension");
91 istate->fsmonitor_dirty = fsmonitor_dirty;
93 if (!istate->split_index)
94 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
96 trace2_data_string("index", NULL, "extension/fsmn/read/token",
97 istate->fsmonitor_last_update);
98 trace_printf_key(&trace_fsmonitor,
99 "read fsmonitor extension successful '%s'",
100 istate->fsmonitor_last_update);
101 return 0;
104 void fill_fsmonitor_bitmap(struct index_state *istate)
106 unsigned int i, skipped = 0;
107 istate->fsmonitor_dirty = ewah_new();
108 for (i = 0; i < istate->cache_nr; i++) {
109 if (istate->cache[i]->ce_flags & CE_REMOVE)
110 skipped++;
111 else if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID))
112 ewah_set(istate->fsmonitor_dirty, i - skipped);
116 void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
118 uint32_t hdr_version;
119 uint32_t ewah_start;
120 uint32_t ewah_size = 0;
121 int fixup = 0;
123 if (!istate->split_index)
124 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
126 put_be32(&hdr_version, INDEX_EXTENSION_VERSION2);
127 strbuf_add(sb, &hdr_version, sizeof(uint32_t));
129 strbuf_addstr(sb, istate->fsmonitor_last_update);
130 strbuf_addch(sb, 0); /* Want to keep a NUL */
132 fixup = sb->len;
133 strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */
135 ewah_start = sb->len;
136 ewah_serialize_strbuf(istate->fsmonitor_dirty, sb);
137 ewah_free(istate->fsmonitor_dirty);
138 istate->fsmonitor_dirty = NULL;
140 /* fix up size field */
141 put_be32(&ewah_size, sb->len - ewah_start);
142 memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
144 trace2_data_string("index", NULL, "extension/fsmn/write/token",
145 istate->fsmonitor_last_update);
146 trace_printf_key(&trace_fsmonitor,
147 "write fsmonitor extension successful '%s'",
148 istate->fsmonitor_last_update);
152 * Call the query-fsmonitor hook passing the last update token of the saved results.
154 static int query_fsmonitor_hook(struct repository *r,
155 int version,
156 const char *last_update,
157 struct strbuf *query_result)
159 struct child_process cp = CHILD_PROCESS_INIT;
160 int result;
162 if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
163 return -1;
165 strvec_push(&cp.args, fsm_settings__get_hook_path(r));
166 strvec_pushf(&cp.args, "%d", version);
167 strvec_pushf(&cp.args, "%s", last_update);
168 cp.use_shell = 1;
169 cp.dir = get_git_work_tree();
171 trace2_region_enter("fsm_hook", "query", NULL);
173 result = capture_command(&cp, query_result, 1024);
175 if (result)
176 trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
177 else
178 trace2_data_intmax("fsm_hook", NULL, "query/response-length",
179 query_result->len);
181 trace2_region_leave("fsm_hook", "query", NULL);
183 return result;
186 static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
188 int i, len = strlen(name);
189 int pos = index_name_pos(istate, name, len);
191 trace_printf_key(&trace_fsmonitor,
192 "fsmonitor_refresh_callback '%s' (pos %d)",
193 name, pos);
195 if (name[len - 1] == '/') {
197 * The daemon can decorate directory events, such as
198 * moves or renames, with a trailing slash if the OS
199 * FS Event contains sufficient information, such as
200 * MacOS.
202 * Use this to invalidate the entire cone under that
203 * directory.
205 * We do not expect an exact match because the index
206 * does not normally contain directory entries, so we
207 * start at the insertion point and scan.
209 if (pos < 0)
210 pos = -pos - 1;
212 /* Mark all entries for the folder invalid */
213 for (i = pos; i < istate->cache_nr; i++) {
214 if (!starts_with(istate->cache[i]->name, name))
215 break;
216 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
220 * We need to remove the traling "/" from the path
221 * for the untracked cache.
223 name[len - 1] = '\0';
224 } else if (pos >= 0) {
226 * We have an exact match for this path and can just
227 * invalidate it.
229 istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID;
230 } else {
232 * The path is not a tracked file -or- it is a
233 * directory event on a platform that cannot
234 * distinguish between file and directory events in
235 * the event handler, such as Windows.
237 * Scan as if it is a directory and invalidate the
238 * cone under it. (But remember to ignore items
239 * between "name" and "name/", such as "name-" and
240 * "name.".
242 pos = -pos - 1;
244 for (i = pos; i < istate->cache_nr; i++) {
245 if (!starts_with(istate->cache[i]->name, name))
246 break;
247 if ((unsigned char)istate->cache[i]->name[len] > '/')
248 break;
249 if (istate->cache[i]->name[len] == '/')
250 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
255 * Mark the untracked cache dirty even if it wasn't found in the index
256 * as it could be a new untracked file.
258 untracked_cache_invalidate_path(istate, name, 0);
262 * The number of pathnames that we need to receive from FSMonitor
263 * before we force the index to be updated.
265 * Note that any pathname within the set of received paths MAY cause
266 * cache-entry or istate flag bits to be updated and thus cause the
267 * index to be updated on disk.
269 * However, the response may contain many paths (such as ignored
270 * paths) that will not update any flag bits. And thus not force the
271 * index to be updated. (This is fine and normal.) It also means
272 * that the token will not be updated in the FSMonitor index
273 * extension. So the next Git command will find the same token in the
274 * index, make the same token-relative request, and receive the same
275 * response (plus any newly changed paths). If this response is large
276 * (and continues to grow), performance could be impacted.
278 * For example, if the user runs a build and it writes 100K object
279 * files but doesn't modify any source files, the index would not need
280 * to be updated. The FSMonitor response (after the build and
281 * relative to a pre-build token) might be 5MB. Each subsequent Git
282 * command will receive that same 100K/5MB response until something
283 * causes the index to be updated. And `refresh_fsmonitor()` will
284 * have to iterate over those 100K paths each time.
286 * Performance could be improved if we optionally force update the
287 * index after a very large response and get an updated token into
288 * the FSMonitor index extension. This should allow subsequent
289 * commands to get smaller and more current responses.
291 * The value chosen here does not need to be precise. The index
292 * will be updated automatically the first time the user touches
293 * a tracked file and causes a command like `git status` to
294 * update an mtime to be updated and/or set a flag bit.
296 static int fsmonitor_force_update_threshold = 100;
298 void refresh_fsmonitor(struct index_state *istate)
300 static int warn_once = 0;
301 struct strbuf query_result = STRBUF_INIT;
302 int query_success = 0, hook_version = -1;
303 size_t bol = 0; /* beginning of line */
304 uint64_t last_update;
305 struct strbuf last_update_token = STRBUF_INIT;
306 char *buf;
307 unsigned int i;
308 int is_trivial = 0;
309 struct repository *r = istate->repo;
310 enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
311 enum fsmonitor_reason reason = fsm_settings__get_reason(r);
313 if (!warn_once && reason > FSMONITOR_REASON_OK) {
314 char *msg = fsm_settings__get_incompatible_msg(r, reason);
315 warn_once = 1;
316 warning("%s", msg);
317 free(msg);
320 if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
321 istate->fsmonitor_has_run_once)
322 return;
324 istate->fsmonitor_has_run_once = 1;
326 trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
328 if (fsm_mode == FSMONITOR_MODE_IPC) {
329 query_success = !fsmonitor_ipc__send_query(
330 istate->fsmonitor_last_update ?
331 istate->fsmonitor_last_update : "builtin:fake",
332 &query_result);
333 if (query_success) {
335 * The response contains a series of nul terminated
336 * strings. The first is the new token.
338 * Use `char *buf` as an interlude to trick the CI
339 * static analysis to let us use `strbuf_addstr()`
340 * here (and only copy the token) rather than
341 * `strbuf_addbuf()`.
343 buf = query_result.buf;
344 strbuf_addstr(&last_update_token, buf);
345 bol = last_update_token.len + 1;
346 is_trivial = query_result.buf[bol] == '/';
347 if (is_trivial)
348 trace2_data_intmax("fsm_client", NULL,
349 "query/trivial-response", 1);
350 } else {
352 * The builtin daemon is not available on this
353 * platform -OR- we failed to get a response.
355 * Generate a fake token (rather than a V1
356 * timestamp) for the index extension. (If
357 * they switch back to the hook API, we don't
358 * want ambiguous state.)
360 strbuf_addstr(&last_update_token, "builtin:fake");
363 goto apply_results;
366 assert(fsm_mode == FSMONITOR_MODE_HOOK);
368 hook_version = fsmonitor_hook_version();
371 * This could be racy so save the date/time now and query_fsmonitor_hook
372 * should be inclusive to ensure we don't miss potential changes.
374 last_update = getnanotime();
375 if (hook_version == HOOK_INTERFACE_VERSION1)
376 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
379 * If we have a last update token, call query_fsmonitor_hook for the set of
380 * changes since that token, else assume everything is possibly dirty
381 * and check it all.
383 if (istate->fsmonitor_last_update) {
384 if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
385 query_success = !query_fsmonitor_hook(
386 r, HOOK_INTERFACE_VERSION2,
387 istate->fsmonitor_last_update, &query_result);
389 if (query_success) {
390 if (hook_version < 0)
391 hook_version = HOOK_INTERFACE_VERSION2;
394 * First entry will be the last update token
395 * Need to use a char * variable because static
396 * analysis was suggesting to use strbuf_addbuf
397 * but we don't want to copy the entire strbuf
398 * only the chars up to the first NUL
400 buf = query_result.buf;
401 strbuf_addstr(&last_update_token, buf);
402 if (!last_update_token.len) {
403 warning("Empty last update token.");
404 query_success = 0;
405 } else {
406 bol = last_update_token.len + 1;
407 is_trivial = query_result.buf[bol] == '/';
409 } else if (hook_version < 0) {
410 hook_version = HOOK_INTERFACE_VERSION1;
411 if (!last_update_token.len)
412 strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
416 if (hook_version == HOOK_INTERFACE_VERSION1) {
417 query_success = !query_fsmonitor_hook(
418 r, HOOK_INTERFACE_VERSION1,
419 istate->fsmonitor_last_update, &query_result);
420 if (query_success)
421 is_trivial = query_result.buf[0] == '/';
424 if (is_trivial)
425 trace2_data_intmax("fsm_hook", NULL,
426 "query/trivial-response", 1);
428 trace_performance_since(last_update, "fsmonitor process '%s'",
429 fsm_settings__get_hook_path(r));
430 trace_printf_key(&trace_fsmonitor,
431 "fsmonitor process '%s' returned %s",
432 fsm_settings__get_hook_path(r),
433 query_success ? "success" : "failure");
436 apply_results:
438 * The response from FSMonitor (excluding the header token) is
439 * either:
441 * [a] a (possibly empty) list of NUL delimited relative
442 * pathnames of changed paths. This list can contain
443 * files and directories. Directories have a trailing
444 * slash.
446 * [b] a single '/' to indicate the provider had no
447 * information and that we should consider everything
448 * invalid. We call this a trivial response.
450 trace2_region_enter("fsmonitor", "apply_results", istate->repo);
452 if (query_success && !is_trivial) {
454 * Mark all pathnames returned by the monitor as dirty.
456 * This updates both the cache-entries and the untracked-cache.
458 int count = 0;
460 buf = query_result.buf;
461 for (i = bol; i < query_result.len; i++) {
462 if (buf[i] != '\0')
463 continue;
464 fsmonitor_refresh_callback(istate, buf + bol);
465 bol = i + 1;
466 count++;
468 if (bol < query_result.len) {
469 fsmonitor_refresh_callback(istate, buf + bol);
470 count++;
473 /* Now mark the untracked cache for fsmonitor usage */
474 if (istate->untracked)
475 istate->untracked->use_fsmonitor = 1;
477 if (count > fsmonitor_force_update_threshold)
478 istate->cache_changed |= FSMONITOR_CHANGED;
480 trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
481 count);
483 } else {
485 * We failed to get a response or received a trivial response,
486 * so invalidate everything.
488 * We only want to run the post index changed hook if
489 * we've actually changed entries, so keep track if we
490 * actually changed entries or not.
492 int is_cache_changed = 0;
494 for (i = 0; i < istate->cache_nr; i++) {
495 if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
496 is_cache_changed = 1;
497 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
502 * If we're going to check every file, ensure we save
503 * the results.
505 if (is_cache_changed)
506 istate->cache_changed |= FSMONITOR_CHANGED;
508 if (istate->untracked)
509 istate->untracked->use_fsmonitor = 0;
511 trace2_region_leave("fsmonitor", "apply_results", istate->repo);
513 strbuf_release(&query_result);
515 /* Now that we've updated istate, save the last_update_token */
516 FREE_AND_NULL(istate->fsmonitor_last_update);
517 istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);
521 * The caller wants to turn on FSMonitor. And when the caller writes
522 * the index to disk, a FSMonitor extension should be included. This
523 * requires that `istate->fsmonitor_last_update` not be NULL. But we
524 * have not actually talked to a FSMonitor process yet, so we don't
525 * have an initial value for this field.
527 * For a protocol V1 FSMonitor process, this field is a formatted
528 * "nanoseconds since epoch" field. However, for a protocol V2
529 * FSMonitor process, this field is an opaque token.
531 * Historically, `add_fsmonitor()` has initialized this field to the
532 * current time for protocol V1 processes. There are lots of race
533 * conditions here, but that code has shipped...
535 * The only true solution is to use a V2 FSMonitor and get a current
536 * or default token value (that it understands), but we cannot do that
537 * until we have actually talked to an instance of the FSMonitor process
538 * (but the protocol requires that we send a token first...).
540 * For simplicity, just initialize like we have a V1 process and require
541 * that V2 processes adapt.
543 static void initialize_fsmonitor_last_update(struct index_state *istate)
545 struct strbuf last_update = STRBUF_INIT;
547 strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
548 istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
551 void add_fsmonitor(struct index_state *istate)
553 unsigned int i;
555 if (!istate->fsmonitor_last_update) {
556 trace_printf_key(&trace_fsmonitor, "add fsmonitor");
557 istate->cache_changed |= FSMONITOR_CHANGED;
558 initialize_fsmonitor_last_update(istate);
560 /* reset the fsmonitor state */
561 for (i = 0; i < istate->cache_nr; i++)
562 istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
564 /* reset the untracked cache */
565 if (istate->untracked) {
566 add_untracked_cache(istate);
567 istate->untracked->use_fsmonitor = 1;
570 /* Update the fsmonitor state */
571 refresh_fsmonitor(istate);
575 void remove_fsmonitor(struct index_state *istate)
577 if (istate->fsmonitor_last_update) {
578 trace_printf_key(&trace_fsmonitor, "remove fsmonitor");
579 istate->cache_changed |= FSMONITOR_CHANGED;
580 FREE_AND_NULL(istate->fsmonitor_last_update);
584 void tweak_fsmonitor(struct index_state *istate)
586 unsigned int i;
587 int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
588 > FSMONITOR_MODE_DISABLED);
590 if (istate->fsmonitor_dirty) {
591 if (fsmonitor_enabled) {
592 /* Mark all entries valid */
593 for (i = 0; i < istate->cache_nr; i++) {
594 if (S_ISGITLINK(istate->cache[i]->ce_mode))
595 continue;
596 istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID;
599 /* Mark all previously saved entries as dirty */
600 assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
601 ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
603 refresh_fsmonitor(istate);
606 ewah_free(istate->fsmonitor_dirty);
607 istate->fsmonitor_dirty = NULL;
610 if (fsmonitor_enabled)
611 add_fsmonitor(istate);
612 else
613 remove_fsmonitor(istate);