Merge branch 'kg/external-diff-save-env'
[git.git] / list-objects-filter.c
bloba62624a1ced704156395b3e30312f264aa46b862
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
16 /* Remember to update object flag allocation in object.h */
18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19 * that have been shown, but should be revisited if they appear
20 * in the traversal (until we mark it SEEN). This is a way to
21 * let us silently de-dup calls to show() in the caller. This
22 * is subtly different from the "revision.h:SHOWN" and the
23 * "sha1-name.c:ONELINE_SEEN" bits. And also different from
24 * the non-de-dup usage in pack-bitmap.c
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
29 * A filter for list-objects to omit ALL blobs from the traversal.
30 * And to OPTIONALLY collect a list of the omitted OIDs.
32 struct filter_blobs_none_data {
33 struct oidset *omits;
36 static enum list_objects_filter_result filter_blobs_none(
37 struct repository *r,
38 enum list_objects_filter_situation filter_situation,
39 struct object *obj,
40 const char *pathname,
41 const char *filename,
42 void *filter_data_)
44 struct filter_blobs_none_data *filter_data = filter_data_;
46 switch (filter_situation) {
47 default:
48 BUG("unknown filter_situation: %d", filter_situation);
50 case LOFS_BEGIN_TREE:
51 assert(obj->type == OBJ_TREE);
52 /* always include all tree objects */
53 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
55 case LOFS_END_TREE:
56 assert(obj->type == OBJ_TREE);
57 return LOFR_ZERO;
59 case LOFS_BLOB:
60 assert(obj->type == OBJ_BLOB);
61 assert((obj->flags & SEEN) == 0);
63 if (filter_data->omits)
64 oidset_insert(filter_data->omits, &obj->oid);
65 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
69 static void *filter_blobs_none__init(
70 struct oidset *omitted,
71 struct list_objects_filter_options *filter_options,
72 filter_object_fn *filter_fn,
73 filter_free_fn *filter_free_fn)
75 struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
76 d->omits = omitted;
78 *filter_fn = filter_blobs_none;
79 *filter_free_fn = free;
80 return d;
84 * A filter for list-objects to omit ALL trees and blobs from the traversal.
85 * Can OPTIONALLY collect a list of the omitted OIDs.
87 struct filter_trees_none_data {
88 struct oidset *omits;
91 static enum list_objects_filter_result filter_trees_none(
92 struct repository *r,
93 enum list_objects_filter_situation filter_situation,
94 struct object *obj,
95 const char *pathname,
96 const char *filename,
97 void *filter_data_)
99 struct filter_trees_none_data *filter_data = filter_data_;
101 switch (filter_situation) {
102 default:
103 BUG("unknown filter_situation: %d", filter_situation);
105 case LOFS_BEGIN_TREE:
106 case LOFS_BLOB:
107 if (filter_data->omits) {
108 oidset_insert(filter_data->omits, &obj->oid);
109 /* _MARK_SEEN but not _DO_SHOW (hard omit) */
110 return LOFR_MARK_SEEN;
111 } else {
113 * Not collecting omits so no need to to traverse tree.
115 return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
118 case LOFS_END_TREE:
119 assert(obj->type == OBJ_TREE);
120 return LOFR_ZERO;
125 static void* filter_trees_none__init(
126 struct oidset *omitted,
127 struct list_objects_filter_options *filter_options,
128 filter_object_fn *filter_fn,
129 filter_free_fn *filter_free_fn)
131 struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
132 d->omits = omitted;
134 *filter_fn = filter_trees_none;
135 *filter_free_fn = free;
136 return d;
140 * A filter for list-objects to omit large blobs.
141 * And to OPTIONALLY collect a list of the omitted OIDs.
143 struct filter_blobs_limit_data {
144 struct oidset *omits;
145 unsigned long max_bytes;
148 static enum list_objects_filter_result filter_blobs_limit(
149 struct repository *r,
150 enum list_objects_filter_situation filter_situation,
151 struct object *obj,
152 const char *pathname,
153 const char *filename,
154 void *filter_data_)
156 struct filter_blobs_limit_data *filter_data = filter_data_;
157 unsigned long object_length;
158 enum object_type t;
160 switch (filter_situation) {
161 default:
162 BUG("unknown filter_situation: %d", filter_situation);
164 case LOFS_BEGIN_TREE:
165 assert(obj->type == OBJ_TREE);
166 /* always include all tree objects */
167 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
169 case LOFS_END_TREE:
170 assert(obj->type == OBJ_TREE);
171 return LOFR_ZERO;
173 case LOFS_BLOB:
174 assert(obj->type == OBJ_BLOB);
175 assert((obj->flags & SEEN) == 0);
177 t = oid_object_info(r, &obj->oid, &object_length);
178 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
180 * We DO NOT have the blob locally, so we cannot
181 * apply the size filter criteria. Be conservative
182 * and force show it (and let the caller deal with
183 * the ambiguity).
185 goto include_it;
188 if (object_length < filter_data->max_bytes)
189 goto include_it;
191 if (filter_data->omits)
192 oidset_insert(filter_data->omits, &obj->oid);
193 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
196 include_it:
197 if (filter_data->omits)
198 oidset_remove(filter_data->omits, &obj->oid);
199 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
202 static void *filter_blobs_limit__init(
203 struct oidset *omitted,
204 struct list_objects_filter_options *filter_options,
205 filter_object_fn *filter_fn,
206 filter_free_fn *filter_free_fn)
208 struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
209 d->omits = omitted;
210 d->max_bytes = filter_options->blob_limit_value;
212 *filter_fn = filter_blobs_limit;
213 *filter_free_fn = free;
214 return d;
218 * A filter driven by a sparse-checkout specification to only
219 * include blobs that a sparse checkout would populate.
221 * The sparse-checkout spec can be loaded from a blob with the
222 * given OID or from a local pathname. We allow an OID because
223 * the repo may be bare or we may be doing the filtering on the
224 * server.
226 struct frame {
228 * defval is the usual default include/exclude value that
229 * should be inherited as we recurse into directories based
230 * upon pattern matching of the directory itself or of a
231 * containing directory.
233 int defval;
236 * 1 if the directory (recursively) contains any provisionally
237 * omitted objects.
239 * 0 if everything (recursively) contained in this directory
240 * has been explicitly included (SHOWN) in the result and
241 * the directory may be short-cut later in the traversal.
243 unsigned child_prov_omit : 1;
246 struct filter_sparse_data {
247 struct oidset *omits;
248 struct exclude_list el;
250 size_t nr, alloc;
251 struct frame *array_frame;
254 static enum list_objects_filter_result filter_sparse(
255 struct repository *r,
256 enum list_objects_filter_situation filter_situation,
257 struct object *obj,
258 const char *pathname,
259 const char *filename,
260 void *filter_data_)
262 struct filter_sparse_data *filter_data = filter_data_;
263 int val, dtype;
264 struct frame *frame;
266 switch (filter_situation) {
267 default:
268 BUG("unknown filter_situation: %d", filter_situation);
270 case LOFS_BEGIN_TREE:
271 assert(obj->type == OBJ_TREE);
272 dtype = DT_DIR;
273 val = is_excluded_from_list(pathname, strlen(pathname),
274 filename, &dtype, &filter_data->el,
275 r->index);
276 if (val < 0)
277 val = filter_data->array_frame[filter_data->nr].defval;
279 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
280 filter_data->alloc);
281 filter_data->nr++;
282 filter_data->array_frame[filter_data->nr].defval = val;
283 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
286 * A directory with this tree OID may appear in multiple
287 * places in the tree. (Think of a directory move or copy,
288 * with no other changes, so the OID is the same, but the
289 * full pathnames of objects within this directory are new
290 * and may match is_excluded() patterns differently.)
291 * So we cannot mark this directory as SEEN (yet), since
292 * that will prevent process_tree() from revisiting this
293 * tree object with other pathname prefixes.
295 * Only _DO_SHOW the tree object the first time we visit
296 * this tree object.
298 * We always show all tree objects. A future optimization
299 * may want to attempt to narrow this.
301 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
302 return LOFR_ZERO;
303 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
304 return LOFR_DO_SHOW;
306 case LOFS_END_TREE:
307 assert(obj->type == OBJ_TREE);
308 assert(filter_data->nr > 0);
310 frame = &filter_data->array_frame[filter_data->nr];
311 filter_data->nr--;
314 * Tell our parent directory if any of our children were
315 * provisionally omitted.
317 filter_data->array_frame[filter_data->nr].child_prov_omit |=
318 frame->child_prov_omit;
321 * If there are NO provisionally omitted child objects (ALL child
322 * objects in this folder were INCLUDED), then we can mark the
323 * folder as SEEN (so we will not have to revisit it again).
325 if (!frame->child_prov_omit)
326 return LOFR_MARK_SEEN;
327 return LOFR_ZERO;
329 case LOFS_BLOB:
330 assert(obj->type == OBJ_BLOB);
331 assert((obj->flags & SEEN) == 0);
333 frame = &filter_data->array_frame[filter_data->nr];
335 dtype = DT_REG;
336 val = is_excluded_from_list(pathname, strlen(pathname),
337 filename, &dtype, &filter_data->el,
338 r->index);
339 if (val < 0)
340 val = frame->defval;
341 if (val > 0) {
342 if (filter_data->omits)
343 oidset_remove(filter_data->omits, &obj->oid);
344 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
348 * Provisionally omit it. We've already established that
349 * this pathname is not in the sparse-checkout specification
350 * with the CURRENT pathname, so we *WANT* to omit this blob.
352 * However, a pathname elsewhere in the tree may also
353 * reference this same blob, so we cannot reject it yet.
354 * Leave the LOFR_ bits unset so that if the blob appears
355 * again in the traversal, we will be asked again.
357 if (filter_data->omits)
358 oidset_insert(filter_data->omits, &obj->oid);
361 * Remember that at least 1 blob in this tree was
362 * provisionally omitted. This prevents us from short
363 * cutting the tree in future iterations.
365 frame->child_prov_omit = 1;
366 return LOFR_ZERO;
371 static void filter_sparse_free(void *filter_data)
373 struct filter_sparse_data *d = filter_data;
374 /* TODO free contents of 'd' */
375 free(d);
378 static void *filter_sparse_oid__init(
379 struct oidset *omitted,
380 struct list_objects_filter_options *filter_options,
381 filter_object_fn *filter_fn,
382 filter_free_fn *filter_free_fn)
384 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
385 d->omits = omitted;
386 if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
387 NULL, 0, &d->el) < 0)
388 die("could not load filter specification");
390 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
391 d->array_frame[d->nr].defval = 0; /* default to include */
392 d->array_frame[d->nr].child_prov_omit = 0;
394 *filter_fn = filter_sparse;
395 *filter_free_fn = filter_sparse_free;
396 return d;
399 static void *filter_sparse_path__init(
400 struct oidset *omitted,
401 struct list_objects_filter_options *filter_options,
402 filter_object_fn *filter_fn,
403 filter_free_fn *filter_free_fn)
405 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
406 d->omits = omitted;
407 if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
408 NULL, 0, &d->el, NULL) < 0)
409 die("could not load filter specification");
411 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
412 d->array_frame[d->nr].defval = 0; /* default to include */
413 d->array_frame[d->nr].child_prov_omit = 0;
415 *filter_fn = filter_sparse;
416 *filter_free_fn = filter_sparse_free;
417 return d;
420 typedef void *(*filter_init_fn)(
421 struct oidset *omitted,
422 struct list_objects_filter_options *filter_options,
423 filter_object_fn *filter_fn,
424 filter_free_fn *filter_free_fn);
427 * Must match "enum list_objects_filter_choice".
429 static filter_init_fn s_filters[] = {
430 NULL,
431 filter_blobs_none__init,
432 filter_blobs_limit__init,
433 filter_trees_none__init,
434 filter_sparse_oid__init,
435 filter_sparse_path__init,
438 void *list_objects_filter__init(
439 struct oidset *omitted,
440 struct list_objects_filter_options *filter_options,
441 filter_object_fn *filter_fn,
442 filter_free_fn *filter_free_fn)
444 filter_init_fn init_fn;
446 assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
448 if (filter_options->choice >= LOFC__COUNT)
449 BUG("invalid list-objects filter choice: %d",
450 filter_options->choice);
452 init_fn = s_filters[filter_options->choice];
453 if (init_fn)
454 return init_fn(omitted, filter_options,
455 filter_fn, filter_free_fn);
456 *filter_fn = NULL;
457 *filter_free_fn = NULL;
458 return NULL;