10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
15 #include "object-store.h"
17 /* Remember to update object flag allocation in object.h */
19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
20 * that have been shown, but should be revisited if they appear
21 * in the traversal (until we mark it SEEN). This is a way to
22 * let us silently de-dup calls to show() in the caller. This
23 * is subtly different from the "revision.h:SHOWN" and the
24 * "object-name.c:ONELINE_SEEN" bits. And also different from
25 * the non-de-dup usage in pack-bitmap.c
27 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
30 struct filter
*filter
;
33 struct object_id skip_tree
;
34 unsigned is_skipping_tree
: 1;
38 enum list_objects_filter_result (*filter_object_fn
)(
40 enum list_objects_filter_situation filter_situation
,
48 * Optional. If this function is supplied and the filter needs
49 * to collect omits, then this function is called once before
52 * This is required because the following two conditions hold:
54 * a. A tree filter can add and remove objects as an object
56 * b. A combine filter's omit set is the union of all its
57 * subfilters, which may include tree: filters.
59 * As such, the omits sets must be separate sets, and can only
60 * be unioned after the traversal is completed.
62 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
64 void (*free_fn
)(void *filter_data
);
68 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
72 static enum list_objects_filter_result
filter_blobs_none(
73 struct repository
*r UNUSED
,
74 enum list_objects_filter_situation filter_situation
,
76 const char *pathname UNUSED
,
77 const char *filename UNUSED
,
79 void *filter_data_ UNUSED
)
81 switch (filter_situation
) {
83 BUG("unknown filter_situation: %d", filter_situation
);
86 assert(obj
->type
== OBJ_TAG
);
87 /* always include all tag objects */
88 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
91 assert(obj
->type
== OBJ_COMMIT
);
92 /* always include all commit objects */
93 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
96 assert(obj
->type
== OBJ_TREE
);
97 /* always include all tree objects */
98 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
101 assert(obj
->type
== OBJ_TREE
);
105 assert(obj
->type
== OBJ_BLOB
);
106 assert((obj
->flags
& SEEN
) == 0);
109 oidset_insert(omits
, &obj
->oid
);
110 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
114 static void filter_blobs_none__init(
115 struct list_objects_filter_options
*filter_options UNUSED
,
116 struct filter
*filter
)
118 filter
->filter_object_fn
= filter_blobs_none
;
119 filter
->free_fn
= free
;
123 * A filter for list-objects to omit ALL trees and blobs from the traversal.
124 * Can OPTIONALLY collect a list of the omitted OIDs.
126 struct filter_trees_depth_data
{
128 * Maps trees to the minimum depth at which they were seen. It is not
129 * necessary to re-traverse a tree at deeper or equal depths than it has
130 * already been traversed.
132 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
133 * it from being traversed at shallower depths.
135 struct oidmap seen_at_depth
;
137 unsigned long exclude_depth
;
138 unsigned long current_depth
;
141 struct seen_map_entry
{
142 struct oidmap_entry base
;
146 /* Returns 1 if the oid was in the omits set before it was invoked. */
147 static int filter_trees_update_omits(
149 struct oidset
*omits
,
156 return oidset_remove(omits
, &obj
->oid
);
158 return oidset_insert(omits
, &obj
->oid
);
161 static enum list_objects_filter_result
filter_trees_depth(
162 struct repository
*r UNUSED
,
163 enum list_objects_filter_situation filter_situation
,
165 const char *pathname UNUSED
,
166 const char *filename UNUSED
,
167 struct oidset
*omits
,
170 struct filter_trees_depth_data
*filter_data
= filter_data_
;
171 struct seen_map_entry
*seen_info
;
172 int include_it
= filter_data
->current_depth
<
173 filter_data
->exclude_depth
;
178 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
179 * case we encounter a tree or blob again at a shallower depth.
182 switch (filter_situation
) {
184 BUG("unknown filter_situation: %d", filter_situation
);
187 assert(obj
->type
== OBJ_TAG
);
188 /* always include all tag objects */
189 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
192 assert(obj
->type
== OBJ_COMMIT
);
193 /* always include all commit objects */
194 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
197 assert(obj
->type
== OBJ_TREE
);
198 filter_data
->current_depth
--;
202 filter_trees_update_omits(obj
, omits
, include_it
);
203 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
205 case LOFS_BEGIN_TREE
:
206 seen_info
= oidmap_get(
207 &filter_data
->seen_at_depth
, &obj
->oid
);
209 CALLOC_ARRAY(seen_info
, 1);
210 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
211 seen_info
->depth
= filter_data
->current_depth
;
212 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
216 filter_data
->current_depth
>= seen_info
->depth
;
220 filter_res
= LOFR_SKIP_TREE
;
222 int been_omitted
= filter_trees_update_omits(
223 obj
, omits
, include_it
);
224 seen_info
->depth
= filter_data
->current_depth
;
227 filter_res
= LOFR_DO_SHOW
;
228 else if (omits
&& !been_omitted
)
230 * Must update omit information of children
231 * recursively; they have not been omitted yet.
233 filter_res
= LOFR_ZERO
;
235 filter_res
= LOFR_SKIP_TREE
;
238 filter_data
->current_depth
++;
243 static void filter_trees_free(void *filter_data
) {
244 struct filter_trees_depth_data
*d
= filter_data
;
247 oidmap_free(&d
->seen_at_depth
, 1);
251 static void filter_trees_depth__init(
252 struct list_objects_filter_options
*filter_options
,
253 struct filter
*filter
)
255 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
256 oidmap_init(&d
->seen_at_depth
, 0);
257 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
258 d
->current_depth
= 0;
260 filter
->filter_data
= d
;
261 filter
->filter_object_fn
= filter_trees_depth
;
262 filter
->free_fn
= filter_trees_free
;
266 * A filter for list-objects to omit large blobs.
267 * And to OPTIONALLY collect a list of the omitted OIDs.
269 struct filter_blobs_limit_data
{
270 unsigned long max_bytes
;
273 static enum list_objects_filter_result
filter_blobs_limit(
274 struct repository
*r
,
275 enum list_objects_filter_situation filter_situation
,
277 const char *pathname UNUSED
,
278 const char *filename UNUSED
,
279 struct oidset
*omits
,
282 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
283 unsigned long object_length
;
286 switch (filter_situation
) {
288 BUG("unknown filter_situation: %d", filter_situation
);
291 assert(obj
->type
== OBJ_TAG
);
292 /* always include all tag objects */
293 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
296 assert(obj
->type
== OBJ_COMMIT
);
297 /* always include all commit objects */
298 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
300 case LOFS_BEGIN_TREE
:
301 assert(obj
->type
== OBJ_TREE
);
302 /* always include all tree objects */
303 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
306 assert(obj
->type
== OBJ_TREE
);
310 assert(obj
->type
== OBJ_BLOB
);
311 assert((obj
->flags
& SEEN
) == 0);
313 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
314 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
316 * We DO NOT have the blob locally, so we cannot
317 * apply the size filter criteria. Be conservative
318 * and force show it (and let the caller deal with
324 if (object_length
< filter_data
->max_bytes
)
328 oidset_insert(omits
, &obj
->oid
);
329 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
334 oidset_remove(omits
, &obj
->oid
);
335 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
338 static void filter_blobs_limit__init(
339 struct list_objects_filter_options
*filter_options
,
340 struct filter
*filter
)
342 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
343 d
->max_bytes
= filter_options
->blob_limit_value
;
345 filter
->filter_data
= d
;
346 filter
->filter_object_fn
= filter_blobs_limit
;
347 filter
->free_fn
= free
;
351 * A filter driven by a sparse-checkout specification to only
352 * include blobs that a sparse checkout would populate.
354 * The sparse-checkout spec can be loaded from a blob with the
355 * given OID or from a local pathname. We allow an OID because
356 * the repo may be bare or we may be doing the filtering on the
361 * default_match is the usual default include/exclude value that
362 * should be inherited as we recurse into directories based
363 * upon pattern matching of the directory itself or of a
364 * containing directory.
366 enum pattern_match_result default_match
;
369 * 1 if the directory (recursively) contains any provisionally
372 * 0 if everything (recursively) contained in this directory
373 * has been explicitly included (SHOWN) in the result and
374 * the directory may be short-cut later in the traversal.
376 unsigned child_prov_omit
: 1;
379 struct filter_sparse_data
{
380 struct pattern_list pl
;
383 struct frame
*array_frame
;
386 static enum list_objects_filter_result
filter_sparse(
387 struct repository
*r
,
388 enum list_objects_filter_situation filter_situation
,
390 const char *pathname
,
391 const char *filename
,
392 struct oidset
*omits
,
395 struct filter_sparse_data
*filter_data
= filter_data_
;
398 enum pattern_match_result match
;
400 switch (filter_situation
) {
402 BUG("unknown filter_situation: %d", filter_situation
);
405 assert(obj
->type
== OBJ_TAG
);
406 /* always include all tag objects */
407 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
410 assert(obj
->type
== OBJ_COMMIT
);
411 /* always include all commit objects */
412 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
414 case LOFS_BEGIN_TREE
:
415 assert(obj
->type
== OBJ_TREE
);
417 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
418 filename
, &dtype
, &filter_data
->pl
,
420 if (match
== UNDECIDED
)
421 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
423 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
425 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
426 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
430 * A directory with this tree OID may appear in multiple
431 * places in the tree. (Think of a directory move or copy,
432 * with no other changes, so the OID is the same, but the
433 * full pathnames of objects within this directory are new
434 * and may match is_excluded() patterns differently.)
435 * So we cannot mark this directory as SEEN (yet), since
436 * that will prevent process_tree() from revisiting this
437 * tree object with other pathname prefixes.
439 * Only _DO_SHOW the tree object the first time we visit
442 * We always show all tree objects. A future optimization
443 * may want to attempt to narrow this.
445 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
447 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
451 assert(obj
->type
== OBJ_TREE
);
452 assert(filter_data
->nr
> 1);
454 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
457 * Tell our parent directory if any of our children were
458 * provisionally omitted.
460 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
461 frame
->child_prov_omit
;
464 * If there are NO provisionally omitted child objects (ALL child
465 * objects in this folder were INCLUDED), then we can mark the
466 * folder as SEEN (so we will not have to revisit it again).
468 if (!frame
->child_prov_omit
)
469 return LOFR_MARK_SEEN
;
473 assert(obj
->type
== OBJ_BLOB
);
474 assert((obj
->flags
& SEEN
) == 0);
476 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
479 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
480 filename
, &dtype
, &filter_data
->pl
,
482 if (match
== UNDECIDED
)
483 match
= frame
->default_match
;
484 if (match
== MATCHED
) {
486 oidset_remove(omits
, &obj
->oid
);
487 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
491 * Provisionally omit it. We've already established that
492 * this pathname is not in the sparse-checkout specification
493 * with the CURRENT pathname, so we *WANT* to omit this blob.
495 * However, a pathname elsewhere in the tree may also
496 * reference this same blob, so we cannot reject it yet.
497 * Leave the LOFR_ bits unset so that if the blob appears
498 * again in the traversal, we will be asked again.
501 oidset_insert(omits
, &obj
->oid
);
504 * Remember that at least 1 blob in this tree was
505 * provisionally omitted. This prevents us from short
506 * cutting the tree in future iterations.
508 frame
->child_prov_omit
= 1;
514 static void filter_sparse_free(void *filter_data
)
516 struct filter_sparse_data
*d
= filter_data
;
517 clear_pattern_list(&d
->pl
);
518 free(d
->array_frame
);
522 static void filter_sparse_oid__init(
523 struct list_objects_filter_options
*filter_options
,
524 struct filter
*filter
)
526 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
527 struct object_context oc
;
528 struct object_id sparse_oid
;
530 if (get_oid_with_context(the_repository
,
531 filter_options
->sparse_oid_name
,
532 GET_OID_BLOB
, &sparse_oid
, &oc
))
533 die(_("unable to access sparse blob in '%s'"),
534 filter_options
->sparse_oid_name
);
535 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
536 die(_("unable to parse sparse filter data in %s"),
537 oid_to_hex(&sparse_oid
));
539 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
540 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
541 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
544 filter
->filter_data
= d
;
545 filter
->filter_object_fn
= filter_sparse
;
546 filter
->free_fn
= filter_sparse_free
;
550 * A filter for list-objects to omit large blobs.
551 * And to OPTIONALLY collect a list of the omitted OIDs.
553 struct filter_object_type_data
{
554 enum object_type object_type
;
557 static enum list_objects_filter_result
filter_object_type(
558 struct repository
*r UNUSED
,
559 enum list_objects_filter_situation filter_situation
,
561 const char *pathname UNUSED
,
562 const char *filename UNUSED
,
563 struct oidset
*omits UNUSED
,
566 struct filter_object_type_data
*filter_data
= filter_data_
;
568 switch (filter_situation
) {
570 BUG("unknown filter_situation: %d", filter_situation
);
573 assert(obj
->type
== OBJ_TAG
);
574 if (filter_data
->object_type
== OBJ_TAG
)
575 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
576 return LOFR_MARK_SEEN
;
579 assert(obj
->type
== OBJ_COMMIT
);
580 if (filter_data
->object_type
== OBJ_COMMIT
)
581 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
582 return LOFR_MARK_SEEN
;
584 case LOFS_BEGIN_TREE
:
585 assert(obj
->type
== OBJ_TREE
);
588 * If we only want to show commits or tags, then there is no
589 * need to walk down trees.
591 if (filter_data
->object_type
== OBJ_COMMIT
||
592 filter_data
->object_type
== OBJ_TAG
)
593 return LOFR_SKIP_TREE
;
595 if (filter_data
->object_type
== OBJ_TREE
)
596 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
598 return LOFR_MARK_SEEN
;
601 assert(obj
->type
== OBJ_BLOB
);
603 if (filter_data
->object_type
== OBJ_BLOB
)
604 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
605 return LOFR_MARK_SEEN
;
612 static void filter_object_type__init(
613 struct list_objects_filter_options
*filter_options
,
614 struct filter
*filter
)
616 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
617 d
->object_type
= filter_options
->object_type
;
619 filter
->filter_data
= d
;
620 filter
->filter_object_fn
= filter_object_type
;
621 filter
->free_fn
= free
;
624 /* A filter which only shows objects shown by all sub-filters. */
625 struct combine_filter_data
{
626 struct subfilter
*sub
;
630 static enum list_objects_filter_result
process_subfilter(
631 struct repository
*r
,
632 enum list_objects_filter_situation filter_situation
,
634 const char *pathname
,
635 const char *filename
,
636 struct subfilter
*sub
)
638 enum list_objects_filter_result result
;
641 * Check and update is_skipping_tree before oidset_contains so
642 * that is_skipping_tree gets unset even when the object is
643 * marked as seen. As of this writing, no filter uses
644 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
645 * ordering is only theoretically important. Be cautious if you
646 * change the order of the below checks and more filters have
649 if (sub
->is_skipping_tree
) {
650 if (filter_situation
== LOFS_END_TREE
&&
651 oideq(&obj
->oid
, &sub
->skip_tree
))
652 sub
->is_skipping_tree
= 0;
656 if (oidset_contains(&sub
->seen
, &obj
->oid
))
659 result
= list_objects_filter__filter_object(
660 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
662 if (result
& LOFR_MARK_SEEN
)
663 oidset_insert(&sub
->seen
, &obj
->oid
);
665 if (result
& LOFR_SKIP_TREE
) {
666 sub
->is_skipping_tree
= 1;
667 sub
->skip_tree
= obj
->oid
;
673 static enum list_objects_filter_result
filter_combine(
674 struct repository
*r
,
675 enum list_objects_filter_situation filter_situation
,
677 const char *pathname
,
678 const char *filename
,
679 struct oidset
*omits UNUSED
,
682 struct combine_filter_data
*d
= filter_data
;
683 enum list_objects_filter_result combined_result
=
684 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
687 for (sub
= 0; sub
< d
->nr
; sub
++) {
688 enum list_objects_filter_result sub_result
= process_subfilter(
689 r
, filter_situation
, obj
, pathname
, filename
,
691 if (!(sub_result
& LOFR_DO_SHOW
))
692 combined_result
&= ~LOFR_DO_SHOW
;
693 if (!(sub_result
& LOFR_MARK_SEEN
))
694 combined_result
&= ~LOFR_MARK_SEEN
;
695 if (!d
->sub
[sub
].is_skipping_tree
)
696 combined_result
&= ~LOFR_SKIP_TREE
;
699 return combined_result
;
702 static void filter_combine__free(void *filter_data
)
704 struct combine_filter_data
*d
= filter_data
;
706 for (sub
= 0; sub
< d
->nr
; sub
++) {
707 list_objects_filter__free(d
->sub
[sub
].filter
);
708 oidset_clear(&d
->sub
[sub
].seen
);
709 if (d
->sub
[sub
].omits
.set
.size
)
710 BUG("expected oidset to be cleared already");
716 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
717 struct oidset_iter iter
;
718 struct object_id
*src_oid
;
720 oidset_iter_init(src
, &iter
);
721 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
722 oidset_insert(dest
, src_oid
);
725 static void filter_combine__finalize_omits(
726 struct oidset
*omits
,
729 struct combine_filter_data
*d
= filter_data
;
732 for (sub
= 0; sub
< d
->nr
; sub
++) {
733 add_all(omits
, &d
->sub
[sub
].omits
);
734 oidset_clear(&d
->sub
[sub
].omits
);
738 static void filter_combine__init(
739 struct list_objects_filter_options
*filter_options
,
740 struct filter
* filter
)
742 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
745 d
->nr
= filter_options
->sub_nr
;
746 CALLOC_ARRAY(d
->sub
, d
->nr
);
747 for (sub
= 0; sub
< d
->nr
; sub
++)
748 d
->sub
[sub
].filter
= list_objects_filter__init(
749 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
750 &filter_options
->sub
[sub
]);
752 filter
->filter_data
= d
;
753 filter
->filter_object_fn
= filter_combine
;
754 filter
->free_fn
= filter_combine__free
;
755 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
758 typedef void (*filter_init_fn
)(
759 struct list_objects_filter_options
*filter_options
,
760 struct filter
*filter
);
763 * Must match "enum list_objects_filter_choice".
765 static filter_init_fn s_filters
[] = {
767 filter_blobs_none__init
,
768 filter_blobs_limit__init
,
769 filter_trees_depth__init
,
770 filter_sparse_oid__init
,
771 filter_object_type__init
,
772 filter_combine__init
,
775 struct filter
*list_objects_filter__init(
776 struct oidset
*omitted
,
777 struct list_objects_filter_options
*filter_options
)
779 struct filter
*filter
;
780 filter_init_fn init_fn
;
782 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
787 if (filter_options
->choice
>= LOFC__COUNT
)
788 BUG("invalid list-objects filter choice: %d",
789 filter_options
->choice
);
791 init_fn
= s_filters
[filter_options
->choice
];
795 CALLOC_ARRAY(filter
, 1);
796 filter
->omits
= omitted
;
797 init_fn(filter_options
, filter
);
801 enum list_objects_filter_result
list_objects_filter__filter_object(
802 struct repository
*r
,
803 enum list_objects_filter_situation filter_situation
,
805 const char *pathname
,
806 const char *filename
,
807 struct filter
*filter
)
809 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
810 return filter
->filter_object_fn(r
, filter_situation
, obj
,
813 filter
->filter_data
);
815 * No filter is active or user gave object explicitly. In this case,
816 * always show the object (except when LOFS_END_TREE, since this tree
817 * had already been shown when LOFS_BEGIN_TREE).
819 if (filter_situation
== LOFS_END_TREE
)
821 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
824 void list_objects_filter__free(struct filter
*filter
)
828 if (filter
->finalize_omits_fn
&& filter
->omits
)
829 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
830 filter
->free_fn(filter
->filter_data
);