1 #include "git-compat-util.h"
8 #include "list-objects-filter.h"
9 #include "list-objects-filter-options.h"
12 #include "object-name.h"
13 #include "object-store-ll.h"
15 /* Remember to update object flag allocation in object.h */
17 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
18 * that have been shown, but should be revisited if they appear
19 * in the traversal (until we mark it SEEN). This is a way to
20 * let us silently de-dup calls to show() in the caller. This
21 * is subtly different from the "revision.h:SHOWN" and the
22 * "object-name.c:ONELINE_SEEN" bits. And also different from
23 * the non-de-dup usage in pack-bitmap.c
25 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
28 struct filter
*filter
;
31 struct object_id skip_tree
;
32 unsigned is_skipping_tree
: 1;
36 enum list_objects_filter_result (*filter_object_fn
)(
38 enum list_objects_filter_situation filter_situation
,
46 * Optional. If this function is supplied and the filter needs
47 * to collect omits, then this function is called once before
50 * This is required because the following two conditions hold:
52 * a. A tree filter can add and remove objects as an object
54 * b. A combine filter's omit set is the union of all its
55 * subfilters, which may include tree: filters.
57 * As such, the omits sets must be separate sets, and can only
58 * be unioned after the traversal is completed.
60 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
62 void (*free_fn
)(void *filter_data
);
66 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
70 static enum list_objects_filter_result
filter_blobs_none(
71 struct repository
*r UNUSED
,
72 enum list_objects_filter_situation filter_situation
,
74 const char *pathname UNUSED
,
75 const char *filename UNUSED
,
77 void *filter_data_ UNUSED
)
79 switch (filter_situation
) {
81 BUG("unknown filter_situation: %d", filter_situation
);
84 assert(obj
->type
== OBJ_TAG
);
85 /* always include all tag objects */
86 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
89 assert(obj
->type
== OBJ_COMMIT
);
90 /* always include all commit objects */
91 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
94 assert(obj
->type
== OBJ_TREE
);
95 /* always include all tree objects */
96 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
99 assert(obj
->type
== OBJ_TREE
);
103 assert(obj
->type
== OBJ_BLOB
);
104 assert((obj
->flags
& SEEN
) == 0);
107 oidset_insert(omits
, &obj
->oid
);
108 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
112 static void filter_blobs_none__init(
113 struct list_objects_filter_options
*filter_options UNUSED
,
114 struct filter
*filter
)
116 filter
->filter_object_fn
= filter_blobs_none
;
117 filter
->free_fn
= free
;
121 * A filter for list-objects to omit ALL trees and blobs from the traversal.
122 * Can OPTIONALLY collect a list of the omitted OIDs.
124 struct filter_trees_depth_data
{
126 * Maps trees to the minimum depth at which they were seen. It is not
127 * necessary to re-traverse a tree at deeper or equal depths than it has
128 * already been traversed.
130 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
131 * it from being traversed at shallower depths.
133 struct oidmap seen_at_depth
;
135 unsigned long exclude_depth
;
136 unsigned long current_depth
;
139 struct seen_map_entry
{
140 struct oidmap_entry base
;
144 /* Returns 1 if the oid was in the omits set before it was invoked. */
145 static int filter_trees_update_omits(
147 struct oidset
*omits
,
154 return oidset_remove(omits
, &obj
->oid
);
156 return oidset_insert(omits
, &obj
->oid
);
159 static enum list_objects_filter_result
filter_trees_depth(
160 struct repository
*r UNUSED
,
161 enum list_objects_filter_situation filter_situation
,
163 const char *pathname UNUSED
,
164 const char *filename UNUSED
,
165 struct oidset
*omits
,
168 struct filter_trees_depth_data
*filter_data
= filter_data_
;
169 struct seen_map_entry
*seen_info
;
170 int include_it
= filter_data
->current_depth
<
171 filter_data
->exclude_depth
;
176 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
177 * case we encounter a tree or blob again at a shallower depth.
180 switch (filter_situation
) {
182 BUG("unknown filter_situation: %d", filter_situation
);
185 assert(obj
->type
== OBJ_TAG
);
186 /* always include all tag objects */
187 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
190 assert(obj
->type
== OBJ_COMMIT
);
191 /* always include all commit objects */
192 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
195 assert(obj
->type
== OBJ_TREE
);
196 filter_data
->current_depth
--;
200 filter_trees_update_omits(obj
, omits
, include_it
);
201 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
203 case LOFS_BEGIN_TREE
:
204 seen_info
= oidmap_get(
205 &filter_data
->seen_at_depth
, &obj
->oid
);
207 CALLOC_ARRAY(seen_info
, 1);
208 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
209 seen_info
->depth
= filter_data
->current_depth
;
210 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
214 filter_data
->current_depth
>= seen_info
->depth
;
218 filter_res
= LOFR_SKIP_TREE
;
220 int been_omitted
= filter_trees_update_omits(
221 obj
, omits
, include_it
);
222 seen_info
->depth
= filter_data
->current_depth
;
225 filter_res
= LOFR_DO_SHOW
;
226 else if (omits
&& !been_omitted
)
228 * Must update omit information of children
229 * recursively; they have not been omitted yet.
231 filter_res
= LOFR_ZERO
;
233 filter_res
= LOFR_SKIP_TREE
;
236 filter_data
->current_depth
++;
241 static void filter_trees_free(void *filter_data
) {
242 struct filter_trees_depth_data
*d
= filter_data
;
245 oidmap_free(&d
->seen_at_depth
, 1);
249 static void filter_trees_depth__init(
250 struct list_objects_filter_options
*filter_options
,
251 struct filter
*filter
)
253 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
254 oidmap_init(&d
->seen_at_depth
, 0);
255 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
256 d
->current_depth
= 0;
258 filter
->filter_data
= d
;
259 filter
->filter_object_fn
= filter_trees_depth
;
260 filter
->free_fn
= filter_trees_free
;
264 * A filter for list-objects to omit large blobs.
265 * And to OPTIONALLY collect a list of the omitted OIDs.
267 struct filter_blobs_limit_data
{
268 unsigned long max_bytes
;
271 static enum list_objects_filter_result
filter_blobs_limit(
272 struct repository
*r
,
273 enum list_objects_filter_situation filter_situation
,
275 const char *pathname UNUSED
,
276 const char *filename UNUSED
,
277 struct oidset
*omits
,
280 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
281 unsigned long object_length
;
284 switch (filter_situation
) {
286 BUG("unknown filter_situation: %d", filter_situation
);
289 assert(obj
->type
== OBJ_TAG
);
290 /* always include all tag objects */
291 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
294 assert(obj
->type
== OBJ_COMMIT
);
295 /* always include all commit objects */
296 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
298 case LOFS_BEGIN_TREE
:
299 assert(obj
->type
== OBJ_TREE
);
300 /* always include all tree objects */
301 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
304 assert(obj
->type
== OBJ_TREE
);
308 assert(obj
->type
== OBJ_BLOB
);
309 assert((obj
->flags
& SEEN
) == 0);
311 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
312 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
314 * We DO NOT have the blob locally, so we cannot
315 * apply the size filter criteria. Be conservative
316 * and force show it (and let the caller deal with
322 if (object_length
< filter_data
->max_bytes
)
326 oidset_insert(omits
, &obj
->oid
);
327 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
332 oidset_remove(omits
, &obj
->oid
);
333 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
336 static void filter_blobs_limit__init(
337 struct list_objects_filter_options
*filter_options
,
338 struct filter
*filter
)
340 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
341 d
->max_bytes
= filter_options
->blob_limit_value
;
343 filter
->filter_data
= d
;
344 filter
->filter_object_fn
= filter_blobs_limit
;
345 filter
->free_fn
= free
;
349 * A filter driven by a sparse-checkout specification to only
350 * include blobs that a sparse checkout would populate.
352 * The sparse-checkout spec can be loaded from a blob with the
353 * given OID or from a local pathname. We allow an OID because
354 * the repo may be bare or we may be doing the filtering on the
359 * default_match is the usual default include/exclude value that
360 * should be inherited as we recurse into directories based
361 * upon pattern matching of the directory itself or of a
362 * containing directory.
364 enum pattern_match_result default_match
;
367 * 1 if the directory (recursively) contains any provisionally
370 * 0 if everything (recursively) contained in this directory
371 * has been explicitly included (SHOWN) in the result and
372 * the directory may be short-cut later in the traversal.
374 unsigned child_prov_omit
: 1;
377 struct filter_sparse_data
{
378 struct pattern_list pl
;
381 struct frame
*array_frame
;
384 static enum list_objects_filter_result
filter_sparse(
385 struct repository
*r
,
386 enum list_objects_filter_situation filter_situation
,
388 const char *pathname
,
389 const char *filename
,
390 struct oidset
*omits
,
393 struct filter_sparse_data
*filter_data
= filter_data_
;
396 enum pattern_match_result match
;
398 switch (filter_situation
) {
400 BUG("unknown filter_situation: %d", filter_situation
);
403 assert(obj
->type
== OBJ_TAG
);
404 /* always include all tag objects */
405 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
408 assert(obj
->type
== OBJ_COMMIT
);
409 /* always include all commit objects */
410 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
412 case LOFS_BEGIN_TREE
:
413 assert(obj
->type
== OBJ_TREE
);
415 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
416 filename
, &dtype
, &filter_data
->pl
,
418 if (match
== UNDECIDED
)
419 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
421 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
423 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
424 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
428 * A directory with this tree OID may appear in multiple
429 * places in the tree. (Think of a directory move or copy,
430 * with no other changes, so the OID is the same, but the
431 * full pathnames of objects within this directory are new
432 * and may match is_excluded() patterns differently.)
433 * So we cannot mark this directory as SEEN (yet), since
434 * that will prevent process_tree() from revisiting this
435 * tree object with other pathname prefixes.
437 * Only _DO_SHOW the tree object the first time we visit
440 * We always show all tree objects. A future optimization
441 * may want to attempt to narrow this.
443 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
445 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
449 assert(obj
->type
== OBJ_TREE
);
450 assert(filter_data
->nr
> 1);
452 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
455 * Tell our parent directory if any of our children were
456 * provisionally omitted.
458 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
459 frame
->child_prov_omit
;
462 * If there are NO provisionally omitted child objects (ALL child
463 * objects in this folder were INCLUDED), then we can mark the
464 * folder as SEEN (so we will not have to revisit it again).
466 if (!frame
->child_prov_omit
)
467 return LOFR_MARK_SEEN
;
471 assert(obj
->type
== OBJ_BLOB
);
472 assert((obj
->flags
& SEEN
) == 0);
474 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
477 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
478 filename
, &dtype
, &filter_data
->pl
,
480 if (match
== UNDECIDED
)
481 match
= frame
->default_match
;
482 if (match
== MATCHED
) {
484 oidset_remove(omits
, &obj
->oid
);
485 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
489 * Provisionally omit it. We've already established that
490 * this pathname is not in the sparse-checkout specification
491 * with the CURRENT pathname, so we *WANT* to omit this blob.
493 * However, a pathname elsewhere in the tree may also
494 * reference this same blob, so we cannot reject it yet.
495 * Leave the LOFR_ bits unset so that if the blob appears
496 * again in the traversal, we will be asked again.
499 oidset_insert(omits
, &obj
->oid
);
502 * Remember that at least 1 blob in this tree was
503 * provisionally omitted. This prevents us from short
504 * cutting the tree in future iterations.
506 frame
->child_prov_omit
= 1;
512 static void filter_sparse_free(void *filter_data
)
514 struct filter_sparse_data
*d
= filter_data
;
515 clear_pattern_list(&d
->pl
);
516 free(d
->array_frame
);
520 static void filter_sparse_oid__init(
521 struct list_objects_filter_options
*filter_options
,
522 struct filter
*filter
)
524 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
525 struct object_context oc
;
526 struct object_id sparse_oid
;
528 if (get_oid_with_context(the_repository
,
529 filter_options
->sparse_oid_name
,
530 GET_OID_BLOB
, &sparse_oid
, &oc
))
531 die(_("unable to access sparse blob in '%s'"),
532 filter_options
->sparse_oid_name
);
533 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
534 die(_("unable to parse sparse filter data in %s"),
535 oid_to_hex(&sparse_oid
));
537 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
538 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
539 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
542 filter
->filter_data
= d
;
543 filter
->filter_object_fn
= filter_sparse
;
544 filter
->free_fn
= filter_sparse_free
;
548 * A filter for list-objects to omit large blobs.
549 * And to OPTIONALLY collect a list of the omitted OIDs.
551 struct filter_object_type_data
{
552 enum object_type object_type
;
555 static enum list_objects_filter_result
filter_object_type(
556 struct repository
*r UNUSED
,
557 enum list_objects_filter_situation filter_situation
,
559 const char *pathname UNUSED
,
560 const char *filename UNUSED
,
561 struct oidset
*omits UNUSED
,
564 struct filter_object_type_data
*filter_data
= filter_data_
;
566 switch (filter_situation
) {
568 BUG("unknown filter_situation: %d", filter_situation
);
571 assert(obj
->type
== OBJ_TAG
);
572 if (filter_data
->object_type
== OBJ_TAG
)
573 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
574 return LOFR_MARK_SEEN
;
577 assert(obj
->type
== OBJ_COMMIT
);
578 if (filter_data
->object_type
== OBJ_COMMIT
)
579 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
580 return LOFR_MARK_SEEN
;
582 case LOFS_BEGIN_TREE
:
583 assert(obj
->type
== OBJ_TREE
);
586 * If we only want to show commits or tags, then there is no
587 * need to walk down trees.
589 if (filter_data
->object_type
== OBJ_COMMIT
||
590 filter_data
->object_type
== OBJ_TAG
)
591 return LOFR_SKIP_TREE
;
593 if (filter_data
->object_type
== OBJ_TREE
)
594 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
596 return LOFR_MARK_SEEN
;
599 assert(obj
->type
== OBJ_BLOB
);
601 if (filter_data
->object_type
== OBJ_BLOB
)
602 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
603 return LOFR_MARK_SEEN
;
610 static void filter_object_type__init(
611 struct list_objects_filter_options
*filter_options
,
612 struct filter
*filter
)
614 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
615 d
->object_type
= filter_options
->object_type
;
617 filter
->filter_data
= d
;
618 filter
->filter_object_fn
= filter_object_type
;
619 filter
->free_fn
= free
;
622 /* A filter which only shows objects shown by all sub-filters. */
623 struct combine_filter_data
{
624 struct subfilter
*sub
;
628 static enum list_objects_filter_result
process_subfilter(
629 struct repository
*r
,
630 enum list_objects_filter_situation filter_situation
,
632 const char *pathname
,
633 const char *filename
,
634 struct subfilter
*sub
)
636 enum list_objects_filter_result result
;
639 * Check and update is_skipping_tree before oidset_contains so
640 * that is_skipping_tree gets unset even when the object is
641 * marked as seen. As of this writing, no filter uses
642 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
643 * ordering is only theoretically important. Be cautious if you
644 * change the order of the below checks and more filters have
647 if (sub
->is_skipping_tree
) {
648 if (filter_situation
== LOFS_END_TREE
&&
649 oideq(&obj
->oid
, &sub
->skip_tree
))
650 sub
->is_skipping_tree
= 0;
654 if (oidset_contains(&sub
->seen
, &obj
->oid
))
657 result
= list_objects_filter__filter_object(
658 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
660 if (result
& LOFR_MARK_SEEN
)
661 oidset_insert(&sub
->seen
, &obj
->oid
);
663 if (result
& LOFR_SKIP_TREE
) {
664 sub
->is_skipping_tree
= 1;
665 sub
->skip_tree
= obj
->oid
;
671 static enum list_objects_filter_result
filter_combine(
672 struct repository
*r
,
673 enum list_objects_filter_situation filter_situation
,
675 const char *pathname
,
676 const char *filename
,
677 struct oidset
*omits UNUSED
,
680 struct combine_filter_data
*d
= filter_data
;
681 enum list_objects_filter_result combined_result
=
682 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
685 for (sub
= 0; sub
< d
->nr
; sub
++) {
686 enum list_objects_filter_result sub_result
= process_subfilter(
687 r
, filter_situation
, obj
, pathname
, filename
,
689 if (!(sub_result
& LOFR_DO_SHOW
))
690 combined_result
&= ~LOFR_DO_SHOW
;
691 if (!(sub_result
& LOFR_MARK_SEEN
))
692 combined_result
&= ~LOFR_MARK_SEEN
;
693 if (!d
->sub
[sub
].is_skipping_tree
)
694 combined_result
&= ~LOFR_SKIP_TREE
;
697 return combined_result
;
700 static void filter_combine__free(void *filter_data
)
702 struct combine_filter_data
*d
= filter_data
;
704 for (sub
= 0; sub
< d
->nr
; sub
++) {
705 list_objects_filter__free(d
->sub
[sub
].filter
);
706 oidset_clear(&d
->sub
[sub
].seen
);
707 if (d
->sub
[sub
].omits
.set
.size
)
708 BUG("expected oidset to be cleared already");
714 static void filter_combine__finalize_omits(
715 struct oidset
*omits
,
718 struct combine_filter_data
*d
= filter_data
;
721 for (sub
= 0; sub
< d
->nr
; sub
++) {
722 oidset_insert_from_set(omits
, &d
->sub
[sub
].omits
);
723 oidset_clear(&d
->sub
[sub
].omits
);
727 static void filter_combine__init(
728 struct list_objects_filter_options
*filter_options
,
729 struct filter
* filter
)
731 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
734 d
->nr
= filter_options
->sub_nr
;
735 CALLOC_ARRAY(d
->sub
, d
->nr
);
736 for (sub
= 0; sub
< d
->nr
; sub
++)
737 d
->sub
[sub
].filter
= list_objects_filter__init(
738 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
739 &filter_options
->sub
[sub
]);
741 filter
->filter_data
= d
;
742 filter
->filter_object_fn
= filter_combine
;
743 filter
->free_fn
= filter_combine__free
;
744 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
747 typedef void (*filter_init_fn
)(
748 struct list_objects_filter_options
*filter_options
,
749 struct filter
*filter
);
752 * Must match "enum list_objects_filter_choice".
754 static filter_init_fn s_filters
[] = {
756 filter_blobs_none__init
,
757 filter_blobs_limit__init
,
758 filter_trees_depth__init
,
759 filter_sparse_oid__init
,
760 filter_object_type__init
,
761 filter_combine__init
,
764 struct filter
*list_objects_filter__init(
765 struct oidset
*omitted
,
766 struct list_objects_filter_options
*filter_options
)
768 struct filter
*filter
;
769 filter_init_fn init_fn
;
771 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
776 if (filter_options
->choice
>= LOFC__COUNT
)
777 BUG("invalid list-objects filter choice: %d",
778 filter_options
->choice
);
780 init_fn
= s_filters
[filter_options
->choice
];
784 CALLOC_ARRAY(filter
, 1);
785 filter
->omits
= omitted
;
786 init_fn(filter_options
, filter
);
790 enum list_objects_filter_result
list_objects_filter__filter_object(
791 struct repository
*r
,
792 enum list_objects_filter_situation filter_situation
,
794 const char *pathname
,
795 const char *filename
,
796 struct filter
*filter
)
798 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
799 return filter
->filter_object_fn(r
, filter_situation
, obj
,
802 filter
->filter_data
);
804 * No filter is active or user gave object explicitly. In this case,
805 * always show the object (except when LOFS_END_TREE, since this tree
806 * had already been shown when LOFS_BEGIN_TREE).
808 if (filter_situation
== LOFS_END_TREE
)
810 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
813 void list_objects_filter__free(struct filter
*filter
)
817 if (filter
->finalize_omits_fn
&& filter
->omits
)
818 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
819 filter
->free_fn(filter
->filter_data
);