Merge branch 'ds/midx-write-repack-fix'
[git.git] / reachable.c
blob46613a6bb6f536d0ac999ed0120011961f88dd7e
1 #define USE_THE_REPOSITORY_VARIABLE
3 #include "git-compat-util.h"
4 #include "gettext.h"
5 #include "hex.h"
6 #include "refs.h"
7 #include "commit.h"
8 #include "blob.h"
9 #include "diff.h"
10 #include "revision.h"
11 #include "reachable.h"
12 #include "cache-tree.h"
13 #include "progress.h"
14 #include "list-objects.h"
15 #include "packfile.h"
16 #include "worktree.h"
17 #include "object-store-ll.h"
18 #include "pack-bitmap.h"
19 #include "pack-mtimes.h"
20 #include "config.h"
21 #include "run-command.h"
22 #include "sequencer.h"
24 struct connectivity_progress {
25 struct progress *progress;
26 unsigned long count;
29 static void update_progress(struct connectivity_progress *cp)
31 cp->count++;
32 if ((cp->count & 1023) == 0)
33 display_progress(cp->progress, cp->count);
36 static void add_one_file(const char *path, struct rev_info *revs)
38 struct strbuf buf = STRBUF_INIT;
39 struct object_id oid;
40 struct object *object;
42 if (!read_oneliner(&buf, path, READ_ONELINER_SKIP_IF_EMPTY)) {
43 strbuf_release(&buf);
44 return;
46 strbuf_trim(&buf);
47 if (!get_oid_hex(buf.buf, &oid)) {
48 object = parse_object_or_die(&oid, buf.buf);
49 add_pending_object(revs, object, "");
51 strbuf_release(&buf);
54 /* Mark objects recorded in rebase state files as reachable. */
55 static void add_rebase_files(struct rev_info *revs)
57 struct strbuf buf = STRBUF_INIT;
58 size_t len;
59 const char *path[] = {
60 "rebase-apply/autostash",
61 "rebase-apply/orig-head",
62 "rebase-merge/autostash",
63 "rebase-merge/orig-head",
65 struct worktree **worktrees = get_worktrees();
67 for (struct worktree **wt = worktrees; *wt; wt++) {
68 strbuf_reset(&buf);
69 strbuf_addstr(&buf, get_worktree_git_dir(*wt));
70 strbuf_complete(&buf, '/');
71 len = buf.len;
72 for (size_t i = 0; i < ARRAY_SIZE(path); i++) {
73 strbuf_setlen(&buf, len);
74 strbuf_addstr(&buf, path[i]);
75 add_one_file(buf.buf, revs);
78 strbuf_release(&buf);
79 free_worktrees(worktrees);
82 static int add_one_ref(const char *path, const struct object_id *oid,
83 int flag, void *cb_data)
85 struct rev_info *revs = (struct rev_info *)cb_data;
86 struct object *object;
88 if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
89 warning("symbolic ref is dangling: %s", path);
90 return 0;
93 object = parse_object_or_die(oid, path);
94 add_pending_object(revs, object, "");
96 return 0;
100 * The traversal will have already marked us as SEEN, so we
101 * only need to handle any progress reporting here.
103 static void mark_object(struct object *obj UNUSED,
104 const char *name UNUSED,
105 void *data)
107 update_progress(data);
110 static void mark_commit(struct commit *c, void *data)
112 mark_object(&c->object, NULL, data);
115 struct recent_data {
116 struct rev_info *revs;
117 timestamp_t timestamp;
118 report_recent_object_fn *cb;
119 int ignore_in_core_kept_packs;
121 struct oidset extra_recent_oids;
122 int extra_recent_oids_loaded;
125 static int run_one_gc_recent_objects_hook(struct oidset *set,
126 const char *args)
128 struct child_process cmd = CHILD_PROCESS_INIT;
129 struct strbuf buf = STRBUF_INIT;
130 FILE *out;
131 int ret = 0;
133 cmd.use_shell = 1;
134 cmd.out = -1;
136 strvec_push(&cmd.args, args);
138 if (start_command(&cmd))
139 return -1;
141 out = xfdopen(cmd.out, "r");
142 while (strbuf_getline(&buf, out) != EOF) {
143 struct object_id oid;
144 const char *rest;
146 if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) {
147 ret = error(_("invalid extra cruft tip: '%s'"), buf.buf);
148 break;
151 oidset_insert(set, &oid);
154 fclose(out);
155 ret |= finish_command(&cmd);
157 strbuf_release(&buf);
158 return ret;
161 static void load_gc_recent_objects(struct recent_data *data)
163 const struct string_list *programs;
164 int ret = 0;
165 size_t i;
167 data->extra_recent_oids_loaded = 1;
169 if (git_config_get_string_multi("gc.recentobjectshook", &programs))
170 return;
172 for (i = 0; i < programs->nr; i++) {
173 ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids,
174 programs->items[i].string);
175 if (ret)
176 die(_("unable to enumerate additional recent objects"));
180 static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
181 struct recent_data *data)
183 if (mtime > data->timestamp)
184 return 1;
186 if (!data->extra_recent_oids_loaded)
187 load_gc_recent_objects(data);
188 return oidset_contains(&data->extra_recent_oids, oid);
191 static void add_recent_object(const struct object_id *oid,
192 struct packed_git *pack,
193 off_t offset,
194 timestamp_t mtime,
195 struct recent_data *data)
197 struct object *obj;
198 enum object_type type;
200 if (!obj_is_recent(oid, mtime, data))
201 return;
204 * We do not want to call parse_object here, because
205 * inflating blobs and trees could be very expensive.
206 * However, we do need to know the correct type for
207 * later processing, and the revision machinery expects
208 * commits and tags to have been parsed.
210 type = oid_object_info(the_repository, oid, NULL);
211 if (type < 0)
212 die("unable to get object info for %s", oid_to_hex(oid));
214 switch (type) {
215 case OBJ_TAG:
216 case OBJ_COMMIT:
217 obj = parse_object_or_die(oid, NULL);
218 break;
219 case OBJ_TREE:
220 obj = (struct object *)lookup_tree(the_repository, oid);
221 break;
222 case OBJ_BLOB:
223 obj = (struct object *)lookup_blob(the_repository, oid);
224 break;
225 default:
226 die("unknown object type for %s: %s",
227 oid_to_hex(oid), type_name(type));
230 if (!obj)
231 die("unable to lookup %s", oid_to_hex(oid));
233 add_pending_object(data->revs, obj, "");
234 if (data->cb)
235 data->cb(obj, pack, offset, mtime);
238 static int want_recent_object(struct recent_data *data,
239 const struct object_id *oid)
241 if (data->ignore_in_core_kept_packs &&
242 has_object_kept_pack(oid, IN_CORE_KEEP_PACKS))
243 return 0;
244 return 1;
247 static int add_recent_loose(const struct object_id *oid,
248 const char *path, void *data)
250 struct stat st;
251 struct object *obj;
253 if (!want_recent_object(data, oid))
254 return 0;
256 obj = lookup_object(the_repository, oid);
258 if (obj && obj->flags & SEEN)
259 return 0;
261 if (stat(path, &st) < 0) {
263 * It's OK if an object went away during our iteration; this
264 * could be due to a simultaneous repack. But anything else
265 * we should abort, since we might then fail to mark objects
266 * which should not be pruned.
268 if (errno == ENOENT)
269 return 0;
270 return error_errno("unable to stat %s", oid_to_hex(oid));
273 add_recent_object(oid, NULL, 0, st.st_mtime, data);
274 return 0;
277 static int add_recent_packed(const struct object_id *oid,
278 struct packed_git *p,
279 uint32_t pos,
280 void *data)
282 struct object *obj;
283 timestamp_t mtime = p->mtime;
285 if (!want_recent_object(data, oid))
286 return 0;
288 obj = lookup_object(the_repository, oid);
290 if (obj && obj->flags & SEEN)
291 return 0;
292 if (p->is_cruft) {
293 if (load_pack_mtimes(p) < 0)
294 die(_("could not load cruft pack .mtimes"));
295 mtime = nth_packed_mtime(p, pos);
297 add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
298 return 0;
301 int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
302 timestamp_t timestamp,
303 report_recent_object_fn *cb,
304 int ignore_in_core_kept_packs)
306 struct recent_data data;
307 enum for_each_object_flags flags;
308 int r;
310 data.revs = revs;
311 data.timestamp = timestamp;
312 data.cb = cb;
313 data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
315 oidset_init(&data.extra_recent_oids, 0);
316 data.extra_recent_oids_loaded = 0;
318 r = for_each_loose_object(add_recent_loose, &data,
319 FOR_EACH_OBJECT_LOCAL_ONLY);
320 if (r)
321 goto done;
323 flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
324 if (ignore_in_core_kept_packs)
325 flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
327 r = for_each_packed_object(add_recent_packed, &data, flags);
329 done:
330 oidset_clear(&data.extra_recent_oids);
332 return r;
335 static int mark_object_seen(const struct object_id *oid,
336 enum object_type type,
337 int exclude UNUSED,
338 uint32_t name_hash UNUSED,
339 struct packed_git *found_pack UNUSED,
340 off_t found_offset UNUSED)
342 struct object *obj = lookup_object_by_type(the_repository, oid, type);
343 if (!obj)
344 die("unable to create object '%s'", oid_to_hex(oid));
346 obj->flags |= SEEN;
347 return 0;
350 void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
351 timestamp_t mark_recent, struct progress *progress)
353 struct connectivity_progress cp;
354 struct bitmap_index *bitmap_git;
357 * Set up revision parsing, and mark us as being interested
358 * in all object types, not just commits.
360 revs->tag_objects = 1;
361 revs->blob_objects = 1;
362 revs->tree_objects = 1;
364 /* Add all refs from the index file */
365 add_index_objects_to_pending(revs, 0);
367 /* Add all external refs */
368 refs_for_each_ref(get_main_ref_store(the_repository), add_one_ref,
369 revs);
371 /* detached HEAD is not included in the list above */
372 refs_head_ref(get_main_ref_store(the_repository), add_one_ref, revs);
373 other_head_refs(add_one_ref, revs);
375 /* rebase autostash and orig-head */
376 add_rebase_files(revs);
378 /* Add all reflog info */
379 if (mark_reflog)
380 add_reflogs_to_pending(revs, 0);
382 cp.progress = progress;
383 cp.count = 0;
385 bitmap_git = prepare_bitmap_walk(revs, 0);
386 if (bitmap_git) {
387 traverse_bitmap_commit_list(bitmap_git, revs, mark_object_seen);
388 free_bitmap_index(bitmap_git);
389 } else {
390 if (prepare_revision_walk(revs))
391 die("revision walk setup failed");
392 traverse_commit_list(revs, mark_commit, mark_object, &cp);
395 if (mark_recent) {
396 revs->ignore_missing_links = 1;
397 if (add_unseen_recent_objects_to_traversal(revs, mark_recent,
398 NULL, 0))
399 die("unable to mark recent objects");
400 if (prepare_revision_walk(revs))
401 die("revision walk setup failed");
402 traverse_commit_list(revs, mark_commit, mark_object, &cp);
405 display_progress(cp.progress, cp.count);