10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
15 #include "object-store.h"
17 /* Remember to update object flag allocation in object.h */
19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
20 * that have been shown, but should be revisited if they appear
21 * in the traversal (until we mark it SEEN). This is a way to
22 * let us silently de-dup calls to show() in the caller. This
23 * is subtly different from the "revision.h:SHOWN" and the
24 * "sha1-name.c:ONELINE_SEEN" bits. And also different from
25 * the non-de-dup usage in pack-bitmap.c
27 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
30 struct filter
*filter
;
33 struct object_id skip_tree
;
34 unsigned is_skipping_tree
: 1;
38 enum list_objects_filter_result (*filter_object_fn
)(
40 enum list_objects_filter_situation filter_situation
,
48 * Optional. If this function is supplied and the filter needs
49 * to collect omits, then this function is called once before
52 * This is required because the following two conditions hold:
54 * a. A tree filter can add and remove objects as an object
56 * b. A combine filter's omit set is the union of all its
57 * subfilters, which may include tree: filters.
59 * As such, the omits sets must be separate sets, and can only
60 * be unioned after the traversal is completed.
62 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
64 void (*free_fn
)(void *filter_data
);
68 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
72 static enum list_objects_filter_result
filter_blobs_none(
74 enum list_objects_filter_situation filter_situation
,
81 switch (filter_situation
) {
83 BUG("unknown filter_situation: %d", filter_situation
);
86 assert(obj
->type
== OBJ_TREE
);
87 /* always include all tree objects */
88 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
91 assert(obj
->type
== OBJ_TREE
);
95 assert(obj
->type
== OBJ_BLOB
);
96 assert((obj
->flags
& SEEN
) == 0);
99 oidset_insert(omits
, &obj
->oid
);
100 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
104 static void filter_blobs_none__init(
105 struct list_objects_filter_options
*filter_options
,
106 struct filter
*filter
)
108 filter
->filter_object_fn
= filter_blobs_none
;
109 filter
->free_fn
= free
;
113 * A filter for list-objects to omit ALL trees and blobs from the traversal.
114 * Can OPTIONALLY collect a list of the omitted OIDs.
116 struct filter_trees_depth_data
{
118 * Maps trees to the minimum depth at which they were seen. It is not
119 * necessary to re-traverse a tree at deeper or equal depths than it has
120 * already been traversed.
122 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
123 * it from being traversed at shallower depths.
125 struct oidmap seen_at_depth
;
127 unsigned long exclude_depth
;
128 unsigned long current_depth
;
131 struct seen_map_entry
{
132 struct oidmap_entry base
;
136 /* Returns 1 if the oid was in the omits set before it was invoked. */
137 static int filter_trees_update_omits(
139 struct oidset
*omits
,
146 return oidset_remove(omits
, &obj
->oid
);
148 return oidset_insert(omits
, &obj
->oid
);
151 static enum list_objects_filter_result
filter_trees_depth(
152 struct repository
*r
,
153 enum list_objects_filter_situation filter_situation
,
155 const char *pathname
,
156 const char *filename
,
157 struct oidset
*omits
,
160 struct filter_trees_depth_data
*filter_data
= filter_data_
;
161 struct seen_map_entry
*seen_info
;
162 int include_it
= filter_data
->current_depth
<
163 filter_data
->exclude_depth
;
168 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
169 * case we encounter a tree or blob again at a shallower depth.
172 switch (filter_situation
) {
174 BUG("unknown filter_situation: %d", filter_situation
);
177 assert(obj
->type
== OBJ_TREE
);
178 filter_data
->current_depth
--;
182 filter_trees_update_omits(obj
, omits
, include_it
);
183 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
185 case LOFS_BEGIN_TREE
:
186 seen_info
= oidmap_get(
187 &filter_data
->seen_at_depth
, &obj
->oid
);
189 seen_info
= xcalloc(1, sizeof(*seen_info
));
190 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
191 seen_info
->depth
= filter_data
->current_depth
;
192 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
196 filter_data
->current_depth
>= seen_info
->depth
;
200 filter_res
= LOFR_SKIP_TREE
;
202 int been_omitted
= filter_trees_update_omits(
203 obj
, omits
, include_it
);
204 seen_info
->depth
= filter_data
->current_depth
;
207 filter_res
= LOFR_DO_SHOW
;
208 else if (omits
&& !been_omitted
)
210 * Must update omit information of children
211 * recursively; they have not been omitted yet.
213 filter_res
= LOFR_ZERO
;
215 filter_res
= LOFR_SKIP_TREE
;
218 filter_data
->current_depth
++;
223 static void filter_trees_free(void *filter_data
) {
224 struct filter_trees_depth_data
*d
= filter_data
;
227 oidmap_free(&d
->seen_at_depth
, 1);
231 static void filter_trees_depth__init(
232 struct list_objects_filter_options
*filter_options
,
233 struct filter
*filter
)
235 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
236 oidmap_init(&d
->seen_at_depth
, 0);
237 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
238 d
->current_depth
= 0;
240 filter
->filter_data
= d
;
241 filter
->filter_object_fn
= filter_trees_depth
;
242 filter
->free_fn
= filter_trees_free
;
246 * A filter for list-objects to omit large blobs.
247 * And to OPTIONALLY collect a list of the omitted OIDs.
249 struct filter_blobs_limit_data
{
250 unsigned long max_bytes
;
253 static enum list_objects_filter_result
filter_blobs_limit(
254 struct repository
*r
,
255 enum list_objects_filter_situation filter_situation
,
257 const char *pathname
,
258 const char *filename
,
259 struct oidset
*omits
,
262 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
263 unsigned long object_length
;
266 switch (filter_situation
) {
268 BUG("unknown filter_situation: %d", filter_situation
);
270 case LOFS_BEGIN_TREE
:
271 assert(obj
->type
== OBJ_TREE
);
272 /* always include all tree objects */
273 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
276 assert(obj
->type
== OBJ_TREE
);
280 assert(obj
->type
== OBJ_BLOB
);
281 assert((obj
->flags
& SEEN
) == 0);
283 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
284 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
286 * We DO NOT have the blob locally, so we cannot
287 * apply the size filter criteria. Be conservative
288 * and force show it (and let the caller deal with
294 if (object_length
< filter_data
->max_bytes
)
298 oidset_insert(omits
, &obj
->oid
);
299 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
304 oidset_remove(omits
, &obj
->oid
);
305 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
308 static void filter_blobs_limit__init(
309 struct list_objects_filter_options
*filter_options
,
310 struct filter
*filter
)
312 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
313 d
->max_bytes
= filter_options
->blob_limit_value
;
315 filter
->filter_data
= d
;
316 filter
->filter_object_fn
= filter_blobs_limit
;
317 filter
->free_fn
= free
;
321 * A filter driven by a sparse-checkout specification to only
322 * include blobs that a sparse checkout would populate.
324 * The sparse-checkout spec can be loaded from a blob with the
325 * given OID or from a local pathname. We allow an OID because
326 * the repo may be bare or we may be doing the filtering on the
331 * default_match is the usual default include/exclude value that
332 * should be inherited as we recurse into directories based
333 * upon pattern matching of the directory itself or of a
334 * containing directory.
336 enum pattern_match_result default_match
;
339 * 1 if the directory (recursively) contains any provisionally
342 * 0 if everything (recursively) contained in this directory
343 * has been explicitly included (SHOWN) in the result and
344 * the directory may be short-cut later in the traversal.
346 unsigned child_prov_omit
: 1;
349 struct filter_sparse_data
{
350 struct pattern_list pl
;
353 struct frame
*array_frame
;
356 static enum list_objects_filter_result
filter_sparse(
357 struct repository
*r
,
358 enum list_objects_filter_situation filter_situation
,
360 const char *pathname
,
361 const char *filename
,
362 struct oidset
*omits
,
365 struct filter_sparse_data
*filter_data
= filter_data_
;
368 enum pattern_match_result match
;
370 switch (filter_situation
) {
372 BUG("unknown filter_situation: %d", filter_situation
);
374 case LOFS_BEGIN_TREE
:
375 assert(obj
->type
== OBJ_TREE
);
377 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
378 filename
, &dtype
, &filter_data
->pl
,
380 if (match
== UNDECIDED
)
381 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
383 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
385 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
386 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
390 * A directory with this tree OID may appear in multiple
391 * places in the tree. (Think of a directory move or copy,
392 * with no other changes, so the OID is the same, but the
393 * full pathnames of objects within this directory are new
394 * and may match is_excluded() patterns differently.)
395 * So we cannot mark this directory as SEEN (yet), since
396 * that will prevent process_tree() from revisiting this
397 * tree object with other pathname prefixes.
399 * Only _DO_SHOW the tree object the first time we visit
402 * We always show all tree objects. A future optimization
403 * may want to attempt to narrow this.
405 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
407 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
411 assert(obj
->type
== OBJ_TREE
);
412 assert(filter_data
->nr
> 1);
414 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
417 * Tell our parent directory if any of our children were
418 * provisionally omitted.
420 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
421 frame
->child_prov_omit
;
424 * If there are NO provisionally omitted child objects (ALL child
425 * objects in this folder were INCLUDED), then we can mark the
426 * folder as SEEN (so we will not have to revisit it again).
428 if (!frame
->child_prov_omit
)
429 return LOFR_MARK_SEEN
;
433 assert(obj
->type
== OBJ_BLOB
);
434 assert((obj
->flags
& SEEN
) == 0);
436 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
439 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
440 filename
, &dtype
, &filter_data
->pl
,
442 if (match
== UNDECIDED
)
443 match
= frame
->default_match
;
444 if (match
== MATCHED
) {
446 oidset_remove(omits
, &obj
->oid
);
447 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
451 * Provisionally omit it. We've already established that
452 * this pathname is not in the sparse-checkout specification
453 * with the CURRENT pathname, so we *WANT* to omit this blob.
455 * However, a pathname elsewhere in the tree may also
456 * reference this same blob, so we cannot reject it yet.
457 * Leave the LOFR_ bits unset so that if the blob appears
458 * again in the traversal, we will be asked again.
461 oidset_insert(omits
, &obj
->oid
);
464 * Remember that at least 1 blob in this tree was
465 * provisionally omitted. This prevents us from short
466 * cutting the tree in future iterations.
468 frame
->child_prov_omit
= 1;
474 static void filter_sparse_free(void *filter_data
)
476 struct filter_sparse_data
*d
= filter_data
;
477 free(d
->array_frame
);
481 static void filter_sparse_oid__init(
482 struct list_objects_filter_options
*filter_options
,
483 struct filter
*filter
)
485 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
486 if (add_patterns_from_blob_to_list(filter_options
->sparse_oid_value
,
487 NULL
, 0, &d
->pl
) < 0)
488 die("could not load filter specification");
490 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
491 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
492 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
495 filter
->filter_data
= d
;
496 filter
->filter_object_fn
= filter_sparse
;
497 filter
->free_fn
= filter_sparse_free
;
500 /* A filter which only shows objects shown by all sub-filters. */
501 struct combine_filter_data
{
502 struct subfilter
*sub
;
506 static enum list_objects_filter_result
process_subfilter(
507 struct repository
*r
,
508 enum list_objects_filter_situation filter_situation
,
510 const char *pathname
,
511 const char *filename
,
512 struct subfilter
*sub
)
514 enum list_objects_filter_result result
;
517 * Check and update is_skipping_tree before oidset_contains so
518 * that is_skipping_tree gets unset even when the object is
519 * marked as seen. As of this writing, no filter uses
520 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
521 * ordering is only theoretically important. Be cautious if you
522 * change the order of the below checks and more filters have
525 if (sub
->is_skipping_tree
) {
526 if (filter_situation
== LOFS_END_TREE
&&
527 oideq(&obj
->oid
, &sub
->skip_tree
))
528 sub
->is_skipping_tree
= 0;
532 if (oidset_contains(&sub
->seen
, &obj
->oid
))
535 result
= list_objects_filter__filter_object(
536 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
538 if (result
& LOFR_MARK_SEEN
)
539 oidset_insert(&sub
->seen
, &obj
->oid
);
541 if (result
& LOFR_SKIP_TREE
) {
542 sub
->is_skipping_tree
= 1;
543 sub
->skip_tree
= obj
->oid
;
549 static enum list_objects_filter_result
filter_combine(
550 struct repository
*r
,
551 enum list_objects_filter_situation filter_situation
,
553 const char *pathname
,
554 const char *filename
,
555 struct oidset
*omits
,
558 struct combine_filter_data
*d
= filter_data
;
559 enum list_objects_filter_result combined_result
=
560 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
563 for (sub
= 0; sub
< d
->nr
; sub
++) {
564 enum list_objects_filter_result sub_result
= process_subfilter(
565 r
, filter_situation
, obj
, pathname
, filename
,
567 if (!(sub_result
& LOFR_DO_SHOW
))
568 combined_result
&= ~LOFR_DO_SHOW
;
569 if (!(sub_result
& LOFR_MARK_SEEN
))
570 combined_result
&= ~LOFR_MARK_SEEN
;
571 if (!d
->sub
[sub
].is_skipping_tree
)
572 combined_result
&= ~LOFR_SKIP_TREE
;
575 return combined_result
;
578 static void filter_combine__free(void *filter_data
)
580 struct combine_filter_data
*d
= filter_data
;
582 for (sub
= 0; sub
< d
->nr
; sub
++) {
583 list_objects_filter__free(d
->sub
[sub
].filter
);
584 oidset_clear(&d
->sub
[sub
].seen
);
585 if (d
->sub
[sub
].omits
.set
.size
)
586 BUG("expected oidset to be cleared already");
591 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
592 struct oidset_iter iter
;
593 struct object_id
*src_oid
;
595 oidset_iter_init(src
, &iter
);
596 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
597 oidset_insert(dest
, src_oid
);
600 static void filter_combine__finalize_omits(
601 struct oidset
*omits
,
604 struct combine_filter_data
*d
= filter_data
;
607 for (sub
= 0; sub
< d
->nr
; sub
++) {
608 add_all(omits
, &d
->sub
[sub
].omits
);
609 oidset_clear(&d
->sub
[sub
].omits
);
613 static void filter_combine__init(
614 struct list_objects_filter_options
*filter_options
,
615 struct filter
* filter
)
617 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
620 d
->nr
= filter_options
->sub_nr
;
621 d
->sub
= xcalloc(d
->nr
, sizeof(*d
->sub
));
622 for (sub
= 0; sub
< d
->nr
; sub
++)
623 d
->sub
[sub
].filter
= list_objects_filter__init(
624 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
625 &filter_options
->sub
[sub
]);
627 filter
->filter_data
= d
;
628 filter
->filter_object_fn
= filter_combine
;
629 filter
->free_fn
= filter_combine__free
;
630 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
633 typedef void (*filter_init_fn
)(
634 struct list_objects_filter_options
*filter_options
,
635 struct filter
*filter
);
638 * Must match "enum list_objects_filter_choice".
640 static filter_init_fn s_filters
[] = {
642 filter_blobs_none__init
,
643 filter_blobs_limit__init
,
644 filter_trees_depth__init
,
645 filter_sparse_oid__init
,
646 filter_combine__init
,
649 struct filter
*list_objects_filter__init(
650 struct oidset
*omitted
,
651 struct list_objects_filter_options
*filter_options
)
653 struct filter
*filter
;
654 filter_init_fn init_fn
;
656 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
658 if (filter_options
->choice
>= LOFC__COUNT
)
659 BUG("invalid list-objects filter choice: %d",
660 filter_options
->choice
);
662 init_fn
= s_filters
[filter_options
->choice
];
666 filter
= xcalloc(1, sizeof(*filter
));
667 filter
->omits
= omitted
;
668 init_fn(filter_options
, filter
);
672 enum list_objects_filter_result
list_objects_filter__filter_object(
673 struct repository
*r
,
674 enum list_objects_filter_situation filter_situation
,
676 const char *pathname
,
677 const char *filename
,
678 struct filter
*filter
)
680 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
681 return filter
->filter_object_fn(r
, filter_situation
, obj
,
684 filter
->filter_data
);
686 * No filter is active or user gave object explicitly. In this case,
687 * always show the object (except when LOFS_END_TREE, since this tree
688 * had already been shown when LOFS_BEGIN_TREE).
690 if (filter_situation
== LOFS_END_TREE
)
692 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
695 void list_objects_filter__free(struct filter
*filter
)
699 if (filter
->finalize_omits_fn
&& filter
->omits
)
700 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
701 filter
->free_fn(filter
->filter_data
);