1 #include "git-compat-util.h"
10 #include "tree-walk.h"
12 #include "list-objects.h"
13 #include "list-objects-filter.h"
14 #include "list-objects-filter-options.h"
17 #include "object-name.h"
18 #include "object-store-ll.h"
20 /* Remember to update object flag allocation in object.h */
22 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
23 * that have been shown, but should be revisited if they appear
24 * in the traversal (until we mark it SEEN). This is a way to
25 * let us silently de-dup calls to show() in the caller. This
26 * is subtly different from the "revision.h:SHOWN" and the
27 * "object-name.c:ONELINE_SEEN" bits. And also different from
28 * the non-de-dup usage in pack-bitmap.c
30 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
33 struct filter
*filter
;
36 struct object_id skip_tree
;
37 unsigned is_skipping_tree
: 1;
41 enum list_objects_filter_result (*filter_object_fn
)(
43 enum list_objects_filter_situation filter_situation
,
51 * Optional. If this function is supplied and the filter needs
52 * to collect omits, then this function is called once before
55 * This is required because the following two conditions hold:
57 * a. A tree filter can add and remove objects as an object
59 * b. A combine filter's omit set is the union of all its
60 * subfilters, which may include tree: filters.
62 * As such, the omits sets must be separate sets, and can only
63 * be unioned after the traversal is completed.
65 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
67 void (*free_fn
)(void *filter_data
);
71 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
75 static enum list_objects_filter_result
filter_blobs_none(
76 struct repository
*r UNUSED
,
77 enum list_objects_filter_situation filter_situation
,
79 const char *pathname UNUSED
,
80 const char *filename UNUSED
,
82 void *filter_data_ UNUSED
)
84 switch (filter_situation
) {
86 BUG("unknown filter_situation: %d", filter_situation
);
89 assert(obj
->type
== OBJ_TAG
);
90 /* always include all tag objects */
91 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
94 assert(obj
->type
== OBJ_COMMIT
);
95 /* always include all commit objects */
96 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
99 assert(obj
->type
== OBJ_TREE
);
100 /* always include all tree objects */
101 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
104 assert(obj
->type
== OBJ_TREE
);
108 assert(obj
->type
== OBJ_BLOB
);
109 assert((obj
->flags
& SEEN
) == 0);
112 oidset_insert(omits
, &obj
->oid
);
113 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
117 static void filter_blobs_none__init(
118 struct list_objects_filter_options
*filter_options UNUSED
,
119 struct filter
*filter
)
121 filter
->filter_object_fn
= filter_blobs_none
;
122 filter
->free_fn
= free
;
126 * A filter for list-objects to omit ALL trees and blobs from the traversal.
127 * Can OPTIONALLY collect a list of the omitted OIDs.
129 struct filter_trees_depth_data
{
131 * Maps trees to the minimum depth at which they were seen. It is not
132 * necessary to re-traverse a tree at deeper or equal depths than it has
133 * already been traversed.
135 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
136 * it from being traversed at shallower depths.
138 struct oidmap seen_at_depth
;
140 unsigned long exclude_depth
;
141 unsigned long current_depth
;
144 struct seen_map_entry
{
145 struct oidmap_entry base
;
149 /* Returns 1 if the oid was in the omits set before it was invoked. */
150 static int filter_trees_update_omits(
152 struct oidset
*omits
,
159 return oidset_remove(omits
, &obj
->oid
);
161 return oidset_insert(omits
, &obj
->oid
);
164 static enum list_objects_filter_result
filter_trees_depth(
165 struct repository
*r UNUSED
,
166 enum list_objects_filter_situation filter_situation
,
168 const char *pathname UNUSED
,
169 const char *filename UNUSED
,
170 struct oidset
*omits
,
173 struct filter_trees_depth_data
*filter_data
= filter_data_
;
174 struct seen_map_entry
*seen_info
;
175 int include_it
= filter_data
->current_depth
<
176 filter_data
->exclude_depth
;
181 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
182 * case we encounter a tree or blob again at a shallower depth.
185 switch (filter_situation
) {
187 BUG("unknown filter_situation: %d", filter_situation
);
190 assert(obj
->type
== OBJ_TAG
);
191 /* always include all tag objects */
192 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
195 assert(obj
->type
== OBJ_COMMIT
);
196 /* always include all commit objects */
197 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
200 assert(obj
->type
== OBJ_TREE
);
201 filter_data
->current_depth
--;
205 filter_trees_update_omits(obj
, omits
, include_it
);
206 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
208 case LOFS_BEGIN_TREE
:
209 seen_info
= oidmap_get(
210 &filter_data
->seen_at_depth
, &obj
->oid
);
212 CALLOC_ARRAY(seen_info
, 1);
213 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
214 seen_info
->depth
= filter_data
->current_depth
;
215 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
219 filter_data
->current_depth
>= seen_info
->depth
;
223 filter_res
= LOFR_SKIP_TREE
;
225 int been_omitted
= filter_trees_update_omits(
226 obj
, omits
, include_it
);
227 seen_info
->depth
= filter_data
->current_depth
;
230 filter_res
= LOFR_DO_SHOW
;
231 else if (omits
&& !been_omitted
)
233 * Must update omit information of children
234 * recursively; they have not been omitted yet.
236 filter_res
= LOFR_ZERO
;
238 filter_res
= LOFR_SKIP_TREE
;
241 filter_data
->current_depth
++;
246 static void filter_trees_free(void *filter_data
) {
247 struct filter_trees_depth_data
*d
= filter_data
;
250 oidmap_free(&d
->seen_at_depth
, 1);
254 static void filter_trees_depth__init(
255 struct list_objects_filter_options
*filter_options
,
256 struct filter
*filter
)
258 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
259 oidmap_init(&d
->seen_at_depth
, 0);
260 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
261 d
->current_depth
= 0;
263 filter
->filter_data
= d
;
264 filter
->filter_object_fn
= filter_trees_depth
;
265 filter
->free_fn
= filter_trees_free
;
269 * A filter for list-objects to omit large blobs.
270 * And to OPTIONALLY collect a list of the omitted OIDs.
272 struct filter_blobs_limit_data
{
273 unsigned long max_bytes
;
276 static enum list_objects_filter_result
filter_blobs_limit(
277 struct repository
*r
,
278 enum list_objects_filter_situation filter_situation
,
280 const char *pathname UNUSED
,
281 const char *filename UNUSED
,
282 struct oidset
*omits
,
285 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
286 unsigned long object_length
;
289 switch (filter_situation
) {
291 BUG("unknown filter_situation: %d", filter_situation
);
294 assert(obj
->type
== OBJ_TAG
);
295 /* always include all tag objects */
296 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
299 assert(obj
->type
== OBJ_COMMIT
);
300 /* always include all commit objects */
301 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
303 case LOFS_BEGIN_TREE
:
304 assert(obj
->type
== OBJ_TREE
);
305 /* always include all tree objects */
306 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
309 assert(obj
->type
== OBJ_TREE
);
313 assert(obj
->type
== OBJ_BLOB
);
314 assert((obj
->flags
& SEEN
) == 0);
316 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
317 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
319 * We DO NOT have the blob locally, so we cannot
320 * apply the size filter criteria. Be conservative
321 * and force show it (and let the caller deal with
327 if (object_length
< filter_data
->max_bytes
)
331 oidset_insert(omits
, &obj
->oid
);
332 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
337 oidset_remove(omits
, &obj
->oid
);
338 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
341 static void filter_blobs_limit__init(
342 struct list_objects_filter_options
*filter_options
,
343 struct filter
*filter
)
345 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
346 d
->max_bytes
= filter_options
->blob_limit_value
;
348 filter
->filter_data
= d
;
349 filter
->filter_object_fn
= filter_blobs_limit
;
350 filter
->free_fn
= free
;
354 * A filter driven by a sparse-checkout specification to only
355 * include blobs that a sparse checkout would populate.
357 * The sparse-checkout spec can be loaded from a blob with the
358 * given OID or from a local pathname. We allow an OID because
359 * the repo may be bare or we may be doing the filtering on the
364 * default_match is the usual default include/exclude value that
365 * should be inherited as we recurse into directories based
366 * upon pattern matching of the directory itself or of a
367 * containing directory.
369 enum pattern_match_result default_match
;
372 * 1 if the directory (recursively) contains any provisionally
375 * 0 if everything (recursively) contained in this directory
376 * has been explicitly included (SHOWN) in the result and
377 * the directory may be short-cut later in the traversal.
379 unsigned child_prov_omit
: 1;
382 struct filter_sparse_data
{
383 struct pattern_list pl
;
386 struct frame
*array_frame
;
389 static enum list_objects_filter_result
filter_sparse(
390 struct repository
*r
,
391 enum list_objects_filter_situation filter_situation
,
393 const char *pathname
,
394 const char *filename
,
395 struct oidset
*omits
,
398 struct filter_sparse_data
*filter_data
= filter_data_
;
401 enum pattern_match_result match
;
403 switch (filter_situation
) {
405 BUG("unknown filter_situation: %d", filter_situation
);
408 assert(obj
->type
== OBJ_TAG
);
409 /* always include all tag objects */
410 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
413 assert(obj
->type
== OBJ_COMMIT
);
414 /* always include all commit objects */
415 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
417 case LOFS_BEGIN_TREE
:
418 assert(obj
->type
== OBJ_TREE
);
420 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
421 filename
, &dtype
, &filter_data
->pl
,
423 if (match
== UNDECIDED
)
424 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
426 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
428 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
429 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
433 * A directory with this tree OID may appear in multiple
434 * places in the tree. (Think of a directory move or copy,
435 * with no other changes, so the OID is the same, but the
436 * full pathnames of objects within this directory are new
437 * and may match is_excluded() patterns differently.)
438 * So we cannot mark this directory as SEEN (yet), since
439 * that will prevent process_tree() from revisiting this
440 * tree object with other pathname prefixes.
442 * Only _DO_SHOW the tree object the first time we visit
445 * We always show all tree objects. A future optimization
446 * may want to attempt to narrow this.
448 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
450 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
454 assert(obj
->type
== OBJ_TREE
);
455 assert(filter_data
->nr
> 1);
457 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
460 * Tell our parent directory if any of our children were
461 * provisionally omitted.
463 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
464 frame
->child_prov_omit
;
467 * If there are NO provisionally omitted child objects (ALL child
468 * objects in this folder were INCLUDED), then we can mark the
469 * folder as SEEN (so we will not have to revisit it again).
471 if (!frame
->child_prov_omit
)
472 return LOFR_MARK_SEEN
;
476 assert(obj
->type
== OBJ_BLOB
);
477 assert((obj
->flags
& SEEN
) == 0);
479 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
482 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
483 filename
, &dtype
, &filter_data
->pl
,
485 if (match
== UNDECIDED
)
486 match
= frame
->default_match
;
487 if (match
== MATCHED
) {
489 oidset_remove(omits
, &obj
->oid
);
490 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
494 * Provisionally omit it. We've already established that
495 * this pathname is not in the sparse-checkout specification
496 * with the CURRENT pathname, so we *WANT* to omit this blob.
498 * However, a pathname elsewhere in the tree may also
499 * reference this same blob, so we cannot reject it yet.
500 * Leave the LOFR_ bits unset so that if the blob appears
501 * again in the traversal, we will be asked again.
504 oidset_insert(omits
, &obj
->oid
);
507 * Remember that at least 1 blob in this tree was
508 * provisionally omitted. This prevents us from short
509 * cutting the tree in future iterations.
511 frame
->child_prov_omit
= 1;
517 static void filter_sparse_free(void *filter_data
)
519 struct filter_sparse_data
*d
= filter_data
;
520 clear_pattern_list(&d
->pl
);
521 free(d
->array_frame
);
525 static void filter_sparse_oid__init(
526 struct list_objects_filter_options
*filter_options
,
527 struct filter
*filter
)
529 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
530 struct object_context oc
;
531 struct object_id sparse_oid
;
533 if (get_oid_with_context(the_repository
,
534 filter_options
->sparse_oid_name
,
535 GET_OID_BLOB
, &sparse_oid
, &oc
))
536 die(_("unable to access sparse blob in '%s'"),
537 filter_options
->sparse_oid_name
);
538 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
539 die(_("unable to parse sparse filter data in %s"),
540 oid_to_hex(&sparse_oid
));
542 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
543 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
544 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
547 filter
->filter_data
= d
;
548 filter
->filter_object_fn
= filter_sparse
;
549 filter
->free_fn
= filter_sparse_free
;
553 * A filter for list-objects to omit large blobs.
554 * And to OPTIONALLY collect a list of the omitted OIDs.
556 struct filter_object_type_data
{
557 enum object_type object_type
;
560 static enum list_objects_filter_result
filter_object_type(
561 struct repository
*r UNUSED
,
562 enum list_objects_filter_situation filter_situation
,
564 const char *pathname UNUSED
,
565 const char *filename UNUSED
,
566 struct oidset
*omits UNUSED
,
569 struct filter_object_type_data
*filter_data
= filter_data_
;
571 switch (filter_situation
) {
573 BUG("unknown filter_situation: %d", filter_situation
);
576 assert(obj
->type
== OBJ_TAG
);
577 if (filter_data
->object_type
== OBJ_TAG
)
578 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
579 return LOFR_MARK_SEEN
;
582 assert(obj
->type
== OBJ_COMMIT
);
583 if (filter_data
->object_type
== OBJ_COMMIT
)
584 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
585 return LOFR_MARK_SEEN
;
587 case LOFS_BEGIN_TREE
:
588 assert(obj
->type
== OBJ_TREE
);
591 * If we only want to show commits or tags, then there is no
592 * need to walk down trees.
594 if (filter_data
->object_type
== OBJ_COMMIT
||
595 filter_data
->object_type
== OBJ_TAG
)
596 return LOFR_SKIP_TREE
;
598 if (filter_data
->object_type
== OBJ_TREE
)
599 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
601 return LOFR_MARK_SEEN
;
604 assert(obj
->type
== OBJ_BLOB
);
606 if (filter_data
->object_type
== OBJ_BLOB
)
607 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
608 return LOFR_MARK_SEEN
;
615 static void filter_object_type__init(
616 struct list_objects_filter_options
*filter_options
,
617 struct filter
*filter
)
619 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
620 d
->object_type
= filter_options
->object_type
;
622 filter
->filter_data
= d
;
623 filter
->filter_object_fn
= filter_object_type
;
624 filter
->free_fn
= free
;
627 /* A filter which only shows objects shown by all sub-filters. */
628 struct combine_filter_data
{
629 struct subfilter
*sub
;
633 static enum list_objects_filter_result
process_subfilter(
634 struct repository
*r
,
635 enum list_objects_filter_situation filter_situation
,
637 const char *pathname
,
638 const char *filename
,
639 struct subfilter
*sub
)
641 enum list_objects_filter_result result
;
644 * Check and update is_skipping_tree before oidset_contains so
645 * that is_skipping_tree gets unset even when the object is
646 * marked as seen. As of this writing, no filter uses
647 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
648 * ordering is only theoretically important. Be cautious if you
649 * change the order of the below checks and more filters have
652 if (sub
->is_skipping_tree
) {
653 if (filter_situation
== LOFS_END_TREE
&&
654 oideq(&obj
->oid
, &sub
->skip_tree
))
655 sub
->is_skipping_tree
= 0;
659 if (oidset_contains(&sub
->seen
, &obj
->oid
))
662 result
= list_objects_filter__filter_object(
663 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
665 if (result
& LOFR_MARK_SEEN
)
666 oidset_insert(&sub
->seen
, &obj
->oid
);
668 if (result
& LOFR_SKIP_TREE
) {
669 sub
->is_skipping_tree
= 1;
670 sub
->skip_tree
= obj
->oid
;
676 static enum list_objects_filter_result
filter_combine(
677 struct repository
*r
,
678 enum list_objects_filter_situation filter_situation
,
680 const char *pathname
,
681 const char *filename
,
682 struct oidset
*omits UNUSED
,
685 struct combine_filter_data
*d
= filter_data
;
686 enum list_objects_filter_result combined_result
=
687 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
690 for (sub
= 0; sub
< d
->nr
; sub
++) {
691 enum list_objects_filter_result sub_result
= process_subfilter(
692 r
, filter_situation
, obj
, pathname
, filename
,
694 if (!(sub_result
& LOFR_DO_SHOW
))
695 combined_result
&= ~LOFR_DO_SHOW
;
696 if (!(sub_result
& LOFR_MARK_SEEN
))
697 combined_result
&= ~LOFR_MARK_SEEN
;
698 if (!d
->sub
[sub
].is_skipping_tree
)
699 combined_result
&= ~LOFR_SKIP_TREE
;
702 return combined_result
;
705 static void filter_combine__free(void *filter_data
)
707 struct combine_filter_data
*d
= filter_data
;
709 for (sub
= 0; sub
< d
->nr
; sub
++) {
710 list_objects_filter__free(d
->sub
[sub
].filter
);
711 oidset_clear(&d
->sub
[sub
].seen
);
712 if (d
->sub
[sub
].omits
.set
.size
)
713 BUG("expected oidset to be cleared already");
719 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
720 struct oidset_iter iter
;
721 struct object_id
*src_oid
;
723 oidset_iter_init(src
, &iter
);
724 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
725 oidset_insert(dest
, src_oid
);
728 static void filter_combine__finalize_omits(
729 struct oidset
*omits
,
732 struct combine_filter_data
*d
= filter_data
;
735 for (sub
= 0; sub
< d
->nr
; sub
++) {
736 add_all(omits
, &d
->sub
[sub
].omits
);
737 oidset_clear(&d
->sub
[sub
].omits
);
741 static void filter_combine__init(
742 struct list_objects_filter_options
*filter_options
,
743 struct filter
* filter
)
745 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
748 d
->nr
= filter_options
->sub_nr
;
749 CALLOC_ARRAY(d
->sub
, d
->nr
);
750 for (sub
= 0; sub
< d
->nr
; sub
++)
751 d
->sub
[sub
].filter
= list_objects_filter__init(
752 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
753 &filter_options
->sub
[sub
]);
755 filter
->filter_data
= d
;
756 filter
->filter_object_fn
= filter_combine
;
757 filter
->free_fn
= filter_combine__free
;
758 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
761 typedef void (*filter_init_fn
)(
762 struct list_objects_filter_options
*filter_options
,
763 struct filter
*filter
);
766 * Must match "enum list_objects_filter_choice".
768 static filter_init_fn s_filters
[] = {
770 filter_blobs_none__init
,
771 filter_blobs_limit__init
,
772 filter_trees_depth__init
,
773 filter_sparse_oid__init
,
774 filter_object_type__init
,
775 filter_combine__init
,
778 struct filter
*list_objects_filter__init(
779 struct oidset
*omitted
,
780 struct list_objects_filter_options
*filter_options
)
782 struct filter
*filter
;
783 filter_init_fn init_fn
;
785 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
790 if (filter_options
->choice
>= LOFC__COUNT
)
791 BUG("invalid list-objects filter choice: %d",
792 filter_options
->choice
);
794 init_fn
= s_filters
[filter_options
->choice
];
798 CALLOC_ARRAY(filter
, 1);
799 filter
->omits
= omitted
;
800 init_fn(filter_options
, filter
);
804 enum list_objects_filter_result
list_objects_filter__filter_object(
805 struct repository
*r
,
806 enum list_objects_filter_situation filter_situation
,
808 const char *pathname
,
809 const char *filename
,
810 struct filter
*filter
)
812 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
813 return filter
->filter_object_fn(r
, filter_situation
, obj
,
816 filter
->filter_data
);
818 * No filter is active or user gave object explicitly. In this case,
819 * always show the object (except when LOFS_END_TREE, since this tree
820 * had already been shown when LOFS_BEGIN_TREE).
822 if (filter_situation
== LOFS_END_TREE
)
824 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
827 void list_objects_filter__free(struct filter
*filter
)
831 if (filter
->finalize_omits_fn
&& filter
->omits
)
832 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
833 filter
->free_fn(filter
->filter_data
);