1 #include "git-compat-util.h"
11 #include "tree-walk.h"
13 #include "list-objects.h"
14 #include "list-objects-filter.h"
15 #include "list-objects-filter-options.h"
18 #include "object-name.h"
19 #include "object-store.h"
21 /* Remember to update object flag allocation in object.h */
23 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
24 * that have been shown, but should be revisited if they appear
25 * in the traversal (until we mark it SEEN). This is a way to
26 * let us silently de-dup calls to show() in the caller. This
27 * is subtly different from the "revision.h:SHOWN" and the
28 * "object-name.c:ONELINE_SEEN" bits. And also different from
29 * the non-de-dup usage in pack-bitmap.c
31 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
34 struct filter
*filter
;
37 struct object_id skip_tree
;
38 unsigned is_skipping_tree
: 1;
42 enum list_objects_filter_result (*filter_object_fn
)(
44 enum list_objects_filter_situation filter_situation
,
52 * Optional. If this function is supplied and the filter needs
53 * to collect omits, then this function is called once before
56 * This is required because the following two conditions hold:
58 * a. A tree filter can add and remove objects as an object
60 * b. A combine filter's omit set is the union of all its
61 * subfilters, which may include tree: filters.
63 * As such, the omits sets must be separate sets, and can only
64 * be unioned after the traversal is completed.
66 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
68 void (*free_fn
)(void *filter_data
);
72 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
76 static enum list_objects_filter_result
filter_blobs_none(
77 struct repository
*r UNUSED
,
78 enum list_objects_filter_situation filter_situation
,
80 const char *pathname UNUSED
,
81 const char *filename UNUSED
,
83 void *filter_data_ UNUSED
)
85 switch (filter_situation
) {
87 BUG("unknown filter_situation: %d", filter_situation
);
90 assert(obj
->type
== OBJ_TAG
);
91 /* always include all tag objects */
92 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
95 assert(obj
->type
== OBJ_COMMIT
);
96 /* always include all commit objects */
97 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
100 assert(obj
->type
== OBJ_TREE
);
101 /* always include all tree objects */
102 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
105 assert(obj
->type
== OBJ_TREE
);
109 assert(obj
->type
== OBJ_BLOB
);
110 assert((obj
->flags
& SEEN
) == 0);
113 oidset_insert(omits
, &obj
->oid
);
114 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
118 static void filter_blobs_none__init(
119 struct list_objects_filter_options
*filter_options UNUSED
,
120 struct filter
*filter
)
122 filter
->filter_object_fn
= filter_blobs_none
;
123 filter
->free_fn
= free
;
127 * A filter for list-objects to omit ALL trees and blobs from the traversal.
128 * Can OPTIONALLY collect a list of the omitted OIDs.
130 struct filter_trees_depth_data
{
132 * Maps trees to the minimum depth at which they were seen. It is not
133 * necessary to re-traverse a tree at deeper or equal depths than it has
134 * already been traversed.
136 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
137 * it from being traversed at shallower depths.
139 struct oidmap seen_at_depth
;
141 unsigned long exclude_depth
;
142 unsigned long current_depth
;
145 struct seen_map_entry
{
146 struct oidmap_entry base
;
150 /* Returns 1 if the oid was in the omits set before it was invoked. */
151 static int filter_trees_update_omits(
153 struct oidset
*omits
,
160 return oidset_remove(omits
, &obj
->oid
);
162 return oidset_insert(omits
, &obj
->oid
);
165 static enum list_objects_filter_result
filter_trees_depth(
166 struct repository
*r UNUSED
,
167 enum list_objects_filter_situation filter_situation
,
169 const char *pathname UNUSED
,
170 const char *filename UNUSED
,
171 struct oidset
*omits
,
174 struct filter_trees_depth_data
*filter_data
= filter_data_
;
175 struct seen_map_entry
*seen_info
;
176 int include_it
= filter_data
->current_depth
<
177 filter_data
->exclude_depth
;
182 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
183 * case we encounter a tree or blob again at a shallower depth.
186 switch (filter_situation
) {
188 BUG("unknown filter_situation: %d", filter_situation
);
191 assert(obj
->type
== OBJ_TAG
);
192 /* always include all tag objects */
193 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
196 assert(obj
->type
== OBJ_COMMIT
);
197 /* always include all commit objects */
198 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
201 assert(obj
->type
== OBJ_TREE
);
202 filter_data
->current_depth
--;
206 filter_trees_update_omits(obj
, omits
, include_it
);
207 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
209 case LOFS_BEGIN_TREE
:
210 seen_info
= oidmap_get(
211 &filter_data
->seen_at_depth
, &obj
->oid
);
213 CALLOC_ARRAY(seen_info
, 1);
214 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
215 seen_info
->depth
= filter_data
->current_depth
;
216 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
220 filter_data
->current_depth
>= seen_info
->depth
;
224 filter_res
= LOFR_SKIP_TREE
;
226 int been_omitted
= filter_trees_update_omits(
227 obj
, omits
, include_it
);
228 seen_info
->depth
= filter_data
->current_depth
;
231 filter_res
= LOFR_DO_SHOW
;
232 else if (omits
&& !been_omitted
)
234 * Must update omit information of children
235 * recursively; they have not been omitted yet.
237 filter_res
= LOFR_ZERO
;
239 filter_res
= LOFR_SKIP_TREE
;
242 filter_data
->current_depth
++;
247 static void filter_trees_free(void *filter_data
) {
248 struct filter_trees_depth_data
*d
= filter_data
;
251 oidmap_free(&d
->seen_at_depth
, 1);
255 static void filter_trees_depth__init(
256 struct list_objects_filter_options
*filter_options
,
257 struct filter
*filter
)
259 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
260 oidmap_init(&d
->seen_at_depth
, 0);
261 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
262 d
->current_depth
= 0;
264 filter
->filter_data
= d
;
265 filter
->filter_object_fn
= filter_trees_depth
;
266 filter
->free_fn
= filter_trees_free
;
270 * A filter for list-objects to omit large blobs.
271 * And to OPTIONALLY collect a list of the omitted OIDs.
273 struct filter_blobs_limit_data
{
274 unsigned long max_bytes
;
277 static enum list_objects_filter_result
filter_blobs_limit(
278 struct repository
*r
,
279 enum list_objects_filter_situation filter_situation
,
281 const char *pathname UNUSED
,
282 const char *filename UNUSED
,
283 struct oidset
*omits
,
286 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
287 unsigned long object_length
;
290 switch (filter_situation
) {
292 BUG("unknown filter_situation: %d", filter_situation
);
295 assert(obj
->type
== OBJ_TAG
);
296 /* always include all tag objects */
297 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
300 assert(obj
->type
== OBJ_COMMIT
);
301 /* always include all commit objects */
302 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
304 case LOFS_BEGIN_TREE
:
305 assert(obj
->type
== OBJ_TREE
);
306 /* always include all tree objects */
307 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
310 assert(obj
->type
== OBJ_TREE
);
314 assert(obj
->type
== OBJ_BLOB
);
315 assert((obj
->flags
& SEEN
) == 0);
317 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
318 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
320 * We DO NOT have the blob locally, so we cannot
321 * apply the size filter criteria. Be conservative
322 * and force show it (and let the caller deal with
328 if (object_length
< filter_data
->max_bytes
)
332 oidset_insert(omits
, &obj
->oid
);
333 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
338 oidset_remove(omits
, &obj
->oid
);
339 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
342 static void filter_blobs_limit__init(
343 struct list_objects_filter_options
*filter_options
,
344 struct filter
*filter
)
346 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
347 d
->max_bytes
= filter_options
->blob_limit_value
;
349 filter
->filter_data
= d
;
350 filter
->filter_object_fn
= filter_blobs_limit
;
351 filter
->free_fn
= free
;
355 * A filter driven by a sparse-checkout specification to only
356 * include blobs that a sparse checkout would populate.
358 * The sparse-checkout spec can be loaded from a blob with the
359 * given OID or from a local pathname. We allow an OID because
360 * the repo may be bare or we may be doing the filtering on the
365 * default_match is the usual default include/exclude value that
366 * should be inherited as we recurse into directories based
367 * upon pattern matching of the directory itself or of a
368 * containing directory.
370 enum pattern_match_result default_match
;
373 * 1 if the directory (recursively) contains any provisionally
376 * 0 if everything (recursively) contained in this directory
377 * has been explicitly included (SHOWN) in the result and
378 * the directory may be short-cut later in the traversal.
380 unsigned child_prov_omit
: 1;
383 struct filter_sparse_data
{
384 struct pattern_list pl
;
387 struct frame
*array_frame
;
390 static enum list_objects_filter_result
filter_sparse(
391 struct repository
*r
,
392 enum list_objects_filter_situation filter_situation
,
394 const char *pathname
,
395 const char *filename
,
396 struct oidset
*omits
,
399 struct filter_sparse_data
*filter_data
= filter_data_
;
402 enum pattern_match_result match
;
404 switch (filter_situation
) {
406 BUG("unknown filter_situation: %d", filter_situation
);
409 assert(obj
->type
== OBJ_TAG
);
410 /* always include all tag objects */
411 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
414 assert(obj
->type
== OBJ_COMMIT
);
415 /* always include all commit objects */
416 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
418 case LOFS_BEGIN_TREE
:
419 assert(obj
->type
== OBJ_TREE
);
421 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
422 filename
, &dtype
, &filter_data
->pl
,
424 if (match
== UNDECIDED
)
425 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
427 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
429 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
430 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
434 * A directory with this tree OID may appear in multiple
435 * places in the tree. (Think of a directory move or copy,
436 * with no other changes, so the OID is the same, but the
437 * full pathnames of objects within this directory are new
438 * and may match is_excluded() patterns differently.)
439 * So we cannot mark this directory as SEEN (yet), since
440 * that will prevent process_tree() from revisiting this
441 * tree object with other pathname prefixes.
443 * Only _DO_SHOW the tree object the first time we visit
446 * We always show all tree objects. A future optimization
447 * may want to attempt to narrow this.
449 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
451 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
455 assert(obj
->type
== OBJ_TREE
);
456 assert(filter_data
->nr
> 1);
458 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
461 * Tell our parent directory if any of our children were
462 * provisionally omitted.
464 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
465 frame
->child_prov_omit
;
468 * If there are NO provisionally omitted child objects (ALL child
469 * objects in this folder were INCLUDED), then we can mark the
470 * folder as SEEN (so we will not have to revisit it again).
472 if (!frame
->child_prov_omit
)
473 return LOFR_MARK_SEEN
;
477 assert(obj
->type
== OBJ_BLOB
);
478 assert((obj
->flags
& SEEN
) == 0);
480 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
483 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
484 filename
, &dtype
, &filter_data
->pl
,
486 if (match
== UNDECIDED
)
487 match
= frame
->default_match
;
488 if (match
== MATCHED
) {
490 oidset_remove(omits
, &obj
->oid
);
491 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
495 * Provisionally omit it. We've already established that
496 * this pathname is not in the sparse-checkout specification
497 * with the CURRENT pathname, so we *WANT* to omit this blob.
499 * However, a pathname elsewhere in the tree may also
500 * reference this same blob, so we cannot reject it yet.
501 * Leave the LOFR_ bits unset so that if the blob appears
502 * again in the traversal, we will be asked again.
505 oidset_insert(omits
, &obj
->oid
);
508 * Remember that at least 1 blob in this tree was
509 * provisionally omitted. This prevents us from short
510 * cutting the tree in future iterations.
512 frame
->child_prov_omit
= 1;
518 static void filter_sparse_free(void *filter_data
)
520 struct filter_sparse_data
*d
= filter_data
;
521 clear_pattern_list(&d
->pl
);
522 free(d
->array_frame
);
526 static void filter_sparse_oid__init(
527 struct list_objects_filter_options
*filter_options
,
528 struct filter
*filter
)
530 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
531 struct object_context oc
;
532 struct object_id sparse_oid
;
534 if (get_oid_with_context(the_repository
,
535 filter_options
->sparse_oid_name
,
536 GET_OID_BLOB
, &sparse_oid
, &oc
))
537 die(_("unable to access sparse blob in '%s'"),
538 filter_options
->sparse_oid_name
);
539 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
540 die(_("unable to parse sparse filter data in %s"),
541 oid_to_hex(&sparse_oid
));
543 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
544 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
545 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
548 filter
->filter_data
= d
;
549 filter
->filter_object_fn
= filter_sparse
;
550 filter
->free_fn
= filter_sparse_free
;
554 * A filter for list-objects to omit large blobs.
555 * And to OPTIONALLY collect a list of the omitted OIDs.
557 struct filter_object_type_data
{
558 enum object_type object_type
;
561 static enum list_objects_filter_result
filter_object_type(
562 struct repository
*r UNUSED
,
563 enum list_objects_filter_situation filter_situation
,
565 const char *pathname UNUSED
,
566 const char *filename UNUSED
,
567 struct oidset
*omits UNUSED
,
570 struct filter_object_type_data
*filter_data
= filter_data_
;
572 switch (filter_situation
) {
574 BUG("unknown filter_situation: %d", filter_situation
);
577 assert(obj
->type
== OBJ_TAG
);
578 if (filter_data
->object_type
== OBJ_TAG
)
579 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
580 return LOFR_MARK_SEEN
;
583 assert(obj
->type
== OBJ_COMMIT
);
584 if (filter_data
->object_type
== OBJ_COMMIT
)
585 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
586 return LOFR_MARK_SEEN
;
588 case LOFS_BEGIN_TREE
:
589 assert(obj
->type
== OBJ_TREE
);
592 * If we only want to show commits or tags, then there is no
593 * need to walk down trees.
595 if (filter_data
->object_type
== OBJ_COMMIT
||
596 filter_data
->object_type
== OBJ_TAG
)
597 return LOFR_SKIP_TREE
;
599 if (filter_data
->object_type
== OBJ_TREE
)
600 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
602 return LOFR_MARK_SEEN
;
605 assert(obj
->type
== OBJ_BLOB
);
607 if (filter_data
->object_type
== OBJ_BLOB
)
608 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
609 return LOFR_MARK_SEEN
;
616 static void filter_object_type__init(
617 struct list_objects_filter_options
*filter_options
,
618 struct filter
*filter
)
620 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
621 d
->object_type
= filter_options
->object_type
;
623 filter
->filter_data
= d
;
624 filter
->filter_object_fn
= filter_object_type
;
625 filter
->free_fn
= free
;
628 /* A filter which only shows objects shown by all sub-filters. */
629 struct combine_filter_data
{
630 struct subfilter
*sub
;
634 static enum list_objects_filter_result
process_subfilter(
635 struct repository
*r
,
636 enum list_objects_filter_situation filter_situation
,
638 const char *pathname
,
639 const char *filename
,
640 struct subfilter
*sub
)
642 enum list_objects_filter_result result
;
645 * Check and update is_skipping_tree before oidset_contains so
646 * that is_skipping_tree gets unset even when the object is
647 * marked as seen. As of this writing, no filter uses
648 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
649 * ordering is only theoretically important. Be cautious if you
650 * change the order of the below checks and more filters have
653 if (sub
->is_skipping_tree
) {
654 if (filter_situation
== LOFS_END_TREE
&&
655 oideq(&obj
->oid
, &sub
->skip_tree
))
656 sub
->is_skipping_tree
= 0;
660 if (oidset_contains(&sub
->seen
, &obj
->oid
))
663 result
= list_objects_filter__filter_object(
664 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
666 if (result
& LOFR_MARK_SEEN
)
667 oidset_insert(&sub
->seen
, &obj
->oid
);
669 if (result
& LOFR_SKIP_TREE
) {
670 sub
->is_skipping_tree
= 1;
671 sub
->skip_tree
= obj
->oid
;
677 static enum list_objects_filter_result
filter_combine(
678 struct repository
*r
,
679 enum list_objects_filter_situation filter_situation
,
681 const char *pathname
,
682 const char *filename
,
683 struct oidset
*omits UNUSED
,
686 struct combine_filter_data
*d
= filter_data
;
687 enum list_objects_filter_result combined_result
=
688 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
691 for (sub
= 0; sub
< d
->nr
; sub
++) {
692 enum list_objects_filter_result sub_result
= process_subfilter(
693 r
, filter_situation
, obj
, pathname
, filename
,
695 if (!(sub_result
& LOFR_DO_SHOW
))
696 combined_result
&= ~LOFR_DO_SHOW
;
697 if (!(sub_result
& LOFR_MARK_SEEN
))
698 combined_result
&= ~LOFR_MARK_SEEN
;
699 if (!d
->sub
[sub
].is_skipping_tree
)
700 combined_result
&= ~LOFR_SKIP_TREE
;
703 return combined_result
;
706 static void filter_combine__free(void *filter_data
)
708 struct combine_filter_data
*d
= filter_data
;
710 for (sub
= 0; sub
< d
->nr
; sub
++) {
711 list_objects_filter__free(d
->sub
[sub
].filter
);
712 oidset_clear(&d
->sub
[sub
].seen
);
713 if (d
->sub
[sub
].omits
.set
.size
)
714 BUG("expected oidset to be cleared already");
720 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
721 struct oidset_iter iter
;
722 struct object_id
*src_oid
;
724 oidset_iter_init(src
, &iter
);
725 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
726 oidset_insert(dest
, src_oid
);
729 static void filter_combine__finalize_omits(
730 struct oidset
*omits
,
733 struct combine_filter_data
*d
= filter_data
;
736 for (sub
= 0; sub
< d
->nr
; sub
++) {
737 add_all(omits
, &d
->sub
[sub
].omits
);
738 oidset_clear(&d
->sub
[sub
].omits
);
742 static void filter_combine__init(
743 struct list_objects_filter_options
*filter_options
,
744 struct filter
* filter
)
746 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
749 d
->nr
= filter_options
->sub_nr
;
750 CALLOC_ARRAY(d
->sub
, d
->nr
);
751 for (sub
= 0; sub
< d
->nr
; sub
++)
752 d
->sub
[sub
].filter
= list_objects_filter__init(
753 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
754 &filter_options
->sub
[sub
]);
756 filter
->filter_data
= d
;
757 filter
->filter_object_fn
= filter_combine
;
758 filter
->free_fn
= filter_combine__free
;
759 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
762 typedef void (*filter_init_fn
)(
763 struct list_objects_filter_options
*filter_options
,
764 struct filter
*filter
);
767 * Must match "enum list_objects_filter_choice".
769 static filter_init_fn s_filters
[] = {
771 filter_blobs_none__init
,
772 filter_blobs_limit__init
,
773 filter_trees_depth__init
,
774 filter_sparse_oid__init
,
775 filter_object_type__init
,
776 filter_combine__init
,
779 struct filter
*list_objects_filter__init(
780 struct oidset
*omitted
,
781 struct list_objects_filter_options
*filter_options
)
783 struct filter
*filter
;
784 filter_init_fn init_fn
;
786 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
791 if (filter_options
->choice
>= LOFC__COUNT
)
792 BUG("invalid list-objects filter choice: %d",
793 filter_options
->choice
);
795 init_fn
= s_filters
[filter_options
->choice
];
799 CALLOC_ARRAY(filter
, 1);
800 filter
->omits
= omitted
;
801 init_fn(filter_options
, filter
);
805 enum list_objects_filter_result
list_objects_filter__filter_object(
806 struct repository
*r
,
807 enum list_objects_filter_situation filter_situation
,
809 const char *pathname
,
810 const char *filename
,
811 struct filter
*filter
)
813 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
814 return filter
->filter_object_fn(r
, filter_situation
, obj
,
817 filter
->filter_data
);
819 * No filter is active or user gave object explicitly. In this case,
820 * always show the object (except when LOFS_END_TREE, since this tree
821 * had already been shown when LOFS_BEGIN_TREE).
823 if (filter_situation
== LOFS_END_TREE
)
825 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
828 void list_objects_filter__free(struct filter
*filter
)
832 if (filter
->finalize_omits_fn
&& filter
->omits
)
833 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
834 filter
->free_fn(filter
->filter_data
);